Thursday, October 31, 2013

Licensing...

Unless otherwise specified, any code I've written within this blog can be used (at your own risk!) however you'd like; If you're going to re-publish any (derivatives) of it though, I would appreciate a link back to the blog, and maybe a heads-up comment over here as well so I can see how you're making use of it!

Thanks,

Jason

Wednesday, March 2, 2011

C Calltrees in Bash, revisited

I've revised my "C Calltrees in Bash" tool, primarily to make it more efficient at discovering the call associations between many functions. ("call graph" (i.e. "relate N functions") used to be based on an O(N^2) application of "call_tree" (i.e. "relate 2 functions"); I realized there was a simple way to make "call_graph" O(N) and eliminate the need for "call_tree" at the same time.)

Passing args vs. piping to the shell functions is handled more consistently now.

There's a new "upndown" wrapper to generate a graph of all of a function's callers AND callees...

And there's an experimental method for generating a subgraph of all C functions that refer to a particular variable. (Not particularly useful for a variable with a name like "i".) For a certain class of globals, it might be a little useful.

Here 'tis:

#!/bin/bash

echo calltree.sh

#use cscope to build reference files (./cscope.out by default, use set_graphdb to override name or location)
set_graphdb() { export GRAPHDB=$1; }
unset_graphdb() { unset GRAPHDB; }
build_graphdb() { cscope -bkRu ${GRAPHDB:+-f $GRAPHDB} && echo Created ${GRAPHDB:-cscope.out}...; }

# cscope queries
lsyms() { cscope ${GRAPHDB:+-f $GRAPHDB} -L0 $1 | grep -v "<global>" | grep "="; }
fdefine() { cscope ${GRAPHDB:+-f $GRAPHDB} -L1 $1; }
callees() { cscope ${GRAPHDB:+-f $GRAPHDB} -L2 $1; }
callers() { cscope ${GRAPHDB:+-f $GRAPHDB} -L3 $1; }

# show which functions refer to a set of symbols
filter_syms() { local sym cscope_line
while read -a sym; do
lsyms $sym | while read -a cscope_line; do
printf "${cscope_line[1]}\n"
done
done
}

# given a set of function names, find out how they're related
filter_edges() { local sym cscope_line
while read -a sym; do
fdefine $sym | while read -a cscope_line; do
grep -wq ${cscope_line[1]} ${1:-<(echo)} &&
printf "${cscope_line[1]}\t[href=\"${cscope_line[0]}:${cscope_line[2]}\"]\t/*fdefine*/\n"
done
callees $sym | while read -a cscope_line; do
grep -wq ${cscope_line[1]} ${1:-<(echo)} &&
printf "$sym->${cscope_line[1]}\t[label=\"${cscope_line[0]}:${cscope_line[2]}\"]\t/*callee*/\n"
done
callers $sym | while read -a cscope_line; do
grep -wq ${cscope_line[1]} ${1:-<(echo)} &&
printf "${cscope_line[1]}->$sym\t[label=\"${cscope_line[0]}:${cscope_line[2]}\"]\t/*caller*/\n"
done
done
}

# dump args one-per-line
largs() { for a; do echo $a; done; }
toargs() { local symbol
while read -a symbol; do
printf "%s " $symbol
done
echo
}

# present list of symbols to filter_syms properly
refs() { local tfile=/tmp/refs.$RANDOM
cat ${1:+<(largs $@)} > $tfile
filter_syms $tfile <$tfile | sort -u
rm $tfile
}

# present list of function names to filter_edges properly
edges() { local tfile=/tmp/edges.$RANDOM
cat ${1:+<(largs $@)} > $tfile
filter_edges $tfile <$tfile
rm $tfile
}

# append unknown symbol names out of lines of cscope output
filter_cscope_lines() { local cscope_line
while read -a cscope_line; do
grep -wq ${cscope_line[1]} ${1:-/dev/null} || echo ${cscope_line[1]}
done
}

# given a set of function names piped in, help spit out all their callers or callees that aren't already in the set
descend() { local symbol
while read -a symbol; do
$1 $symbol | filter_cscope_lines $2
done
}

# discover functions upstream of initial set
all_callers() { local tfile=/tmp/all_callers.$RANDOM
cat ${1:+<(largs $@)} > $tfile
descend callers $tfile <$tfile >>$tfile
cat $tfile; rm $tfile
}

# discover functions downstream of initial set
all_callees() { local tfile=/tmp/all_callees.$RANDOM
cat ${1:+<(largs $@)} > $tfile
descend callees $tfile <$tfile >>$tfile
cat $tfile; rm $tfile
}

# all the ways to get from (a,b,...z) to (a,b,...z), i.e. intersect all_callers and all_callees of initial set
call_graph() { local tfile=/tmp/subgraph.$RANDOM; local args=/tmp/subgraph_args.$RANDOM
cat ${1:+<(largs $@)} > $args
cat $args | all_callers | sort -u > $tfile
comm -12 $tfile <(cat $args | all_callees | sort -u)
rm $tfile $args
}

# all functions downstream of callers of argument
all_callerees() { callers $1 | filter_cscope_lines | all_callees; }

# odd experimental set of calls that might help spot potential memory leaks
call_leaks() { local tfile=/tmp/graph_filter.$RANDOM
all_callerees $1 | sort -u > $tfile
comm -2 $tfile <(all_callers $2 | sort -u)
rm $tfile
}

# wrap dot-format node and edge info with dot-format whole-graph description
graph() { printf "digraph iftree {\ngraph [rankdir=LR, ratio=compress, concentrate=true];\nnode [shape=record, style=filled]\nedge [color="navy"];\n"; cat | sort -u; printf "}\n"; }

# filter out unwanted (as specified in “~/calltree.deny”) and/or unnecessary edges
graph_filter() { local tfile=/tmp/graph_filter.$RANDOM
cat > $tfile
grep fdefine $tfile
grep $1 $tfile | grep -vf ~/calltree.deny | cut -f1,3
rm $tfile
}

# how to invoke zgrviewer as a viewer
zgrviewer() { ~/bin/zgrviewer -Pdot $@; }
# how to invoke xfig as a viewer
figviewer() { xfig <(dot -Tfig $@); }
# how to create and view a png image
pngviewer() { dot -Tpng $@ -o /tmp/ct.png && gqview -t /tmp/ct.png; }

# specify a viewer
ctviewer() { pngviewer $@; }

# add color to specified nodes
colornodes() { (cat; for x in $@; do echo "$x [color=red]"; done;) }

# generate dot files
_upstream() { all_callers $1 | edges | graph_filter ${2:-caller} | colornodes $1 | graph; }
_downstream() { all_callees $1 | edges | graph_filter ${2:-callee} | colornodes $1 | graph; }
_upndown() { (all_callers $1; all_callees $1) | edges | graph_filter ${2:-callee} | colornodes $1 | graph; }
_relate() { call_graph $@ | edges | graph_filter callee | colornodes $@ | graph; }
_leaks() { call_leaks $1 $2 | edges | graph_filter ${3:-callee} | colornodes $1 $2 | graph; }

# generate dot files and invoke ctviewer
upstream() { _upstream $@ > /tmp/tfile; ctviewer /tmp/tfile; rm -f /tmp/tfile; }
downstream() { _downstream $@ > /tmp/tfile; ctviewer /tmp/tfile; rm -f /tmp/tfile; }
upndown() { _upndown $@ > /tmp/tfile; ctviewer /tmp/tfile; rm -f /tmp/tfile; }
relate() { _relate $@ > /tmp/tfile; ctviewer /tmp/tfile; rm -f /tmp/tfile; }
leaks() { _leaks $@ > /tmp/tfile; ctviewer /tmp/tfile; rm -f /tmp/tfile; }

# dot file conversions
dot2png() { dot -s36 -Tpng -o $1; }
dot2jpg() { dot -Tjpg -o $1; }
dot2html() { dot -Tpng -o $1.png -Tcmapx -o $1.map; (echo "<IMG SRC="$1.png" USEMAP="#iftree" />"; cat $1.map) > $1.html; }

(Note to self: Code formatted via kdevelop, "File" -> "export to html")

Thursday, September 30, 2010

RAII in C, too

I've been using a macro to encapsulate exactly what is going on in this "RAII in C" link, which abstracts out the more boilerplate aspects of writing code like this and leaving behind something far more clean and direct:

(Lazy, i.e. non-compiling, near-C code...)

// lispy macros
#define CAR(first,rest...) first
#define CDR(first,rest...) rest

// TRY macro easing the implementation of the RAII paradigm in C
// This implementation just scratches the surface...
#define TRY(_cond,_status,_exitpoint,_msg...)
{
if (TRY_trace) printf(#_cond ": " CAR(_msg) "\n",CDR(_msg));
if (_cond)
{
status=(_status);
printf("Failed" CAR(_msg) ", status=%d\n",CDR(_msg),status);
goto _exitpoint;
}
}

where:

"cond" is any expression that returns anything that can be interpreted as a boolean (i.e. "if (cond)..."),
"status" identifies the error if "cond" evaluates to non-zero/true,
"exitpoint" refers to a code label which will be goto'ed if "cond" is non-zero/true, and
"msg" documents what "cond" is supposed to do.

Example usage:

int foo(int bufsize,char *filename)
{
int status=0;
char *buf;
FILE *file;

TRY(!(buf=malloc(bufsize)), -1, done, "allocating %d byte buf",bufsize);
TRY(!(file=fopen(filename,"r")), -1, deallocate_buf, "opening file %s",filename);
TRY(status=bar(buf,file), status, close_file, "bar'ing buf[%d], file %s",bufsize,filename);

// could "goto done" here if not releasing resources immediately.

close_file:
fclose(file);
deallocate_buf:
free(buf);
done:
return status;
}

Niceties:
* RAII-like* paradigm, in a very compact and concise form
* Focus on what's important/unique, forget about boilerplate
* Every important line's purpose is auto-documented, as a side-effect of design

But most importantly, every important line of code is wrapped in a macro(!). An entire application can be altered, instrumented, re-interpreted, or otherwise arbitrarily processed simply by changing the definition of TRY. Trivial examples would be changing how the messages are printed/logged, or only "tracing" TRY calls to a certain depth. A less-trivial example would be implementing a debugging feature that allowed you single-step through TRY-instrumented code on-demand.

*I've never called this RAII myself, although it does make dealing with resource allocation/release consistently very easy. It's more of a general control-flow method I use; a controlled way of using goto in a way that makes nested, vulnerable-to-failure resource allocations easy to control and clean up.

Tuesday, July 8, 2008

Bash: C Call Trees and Graphs


One day I had a need. (Or was it a desire? I can't remember off the top of my head.) My need was to find all the pathways through a large set of code between one function call and another, i.e. how many ways can this code get from a() to b()?

I googled around for a while and found some call-graphing tools out there, but none did exactly what I wanted, so I wrote my own.

As with many of the other similar call-graphing tools, a lot of the grunt work can be performed with existing tools, such as cscope for determining the callers and callees of functions, and graphviz for generating graph layouts and preparing them for display. (I recently heard of Egypt which uses gcc to generate an easier-to-parse intermediate representation of code, and the ubiquitous graphviz for the back-end visualization... While AFAIK it doesn't cull the graph down to a()->b(), its use of gcc is probably much quicker than my use of cscope. I will probably convert my implementation from cscope->bash->graphviz to gcc->X->graphviz at some point in the future, while I have an "X" in mind, that's a story for another entry...)

My solution, whipped out over a weekend or so, simply teases information out of a cscope database and presents the results to graphviz for display using a dozen or so fairly simple bash functions. I thought it would take a lot more work and cleverness to generate the results I wanted, but the solution's brute force nature leaves some performance on the table, which I intend to remedy when I have time.

My solution hammers on the filesystem with a lot of pipes and temporary files to shuffle intermediate results around. The most interesting part about it is probably the interplay between all_callees(), descend(), and filter_cscope_lines(). These combine to find out all the functions that are "downstream" of an initial set, i.e. all the functions the initial set calls, and the functions these call, and so on all the way down to the tips of this tree. (There is an equivalent mechanism for finding "upstream" functions.) It does this by:
  1. Stashing the initial function(s) (specified as an argument or piped in) in a temporary "working set" file,
  2. Iterating through each function name in the working set, calling cscope on each to find out what functions it calls,
  3. Appending newly discovered downstream functions (that don't already exist in the working set) to the working set.
As descend iterates through the working set, as long as new downstream functions are being found, they get appended to the working set where descend will eventually reach them. Descend will know when it's found all the downstream tips when it simply reaches the end of the working set.

On to usage:

First, you need to build a cscope database. A simple bash wrapper ("build_graphdb") for cscope takes care of most situations; simply running it in a directory will trigger cscope to parse source files it's natively aware of in that directory and it's subdirectories and build a database there named "cscope.out". If you want to instrument code in a read-only directory or need to change the name for any other reason, simply pick a new name via "set_graphdb ", and the calltree code will use that instead.

Once a cscope database has been built, you can show:
  1. all functions "downstream" of some function X, i.e. a tree of all functions called by X, ("downstream")
  2. all functions "upstream" of some function X, i.e. all the ways X could be called, ("upstream")
  3. all code paths that lead from function X to function Y. ("subgraph")
  4. all code paths between an arbitrary set of functions A, B, C, [...] Z ("relate")
The functions themselves:
These produce raw dot-format files:
  1. _downstream
  2. _upstream
  3. _subgraph
  4. _relate
These will generate, process and (maybe) display the dot-format files using graphviz tools to interpret the dot
files, and generate various vector- or image-file formats that can be displayed by yet other programs (I prefer
zgrviewer, but had issues last time I tried it, so have settled for xfig lately):
  1. downstream
  2. upstream
  3. subgraph
  4. relate
As I mentioned, these will generate and display graphs in xfig format. You can easily substitute your own "viewer" by creating a variation of the bash function "figviewer" below, and having the generic "ctviewer" (i.e. "calltree viewer") call your variation instead. One variant, "zgrviewer", is already set up to invoke zgrviewer, and "dot2jpg", "dot2png" and "dot2html" turn dot-format files into .jpg, .png and .html files respectively as well.

Here's I generated the graph above:

bash-3.2$ . calltree.sh
bash-3.2$ cd jli
bash-3.2$ build_graphdb
bash-3.2$ _subgraph jli_parse malloc > graph.dot
bash-3.2$ cat graph.dot | dot2png graph.png

If I want to view the images directly rather than generate an image file of it, I would simply replace the last two lines with "subgraph jli_parse malloc" to invoke the viewer referenced by the "ctviewer" shell function.

There are some extra vestigial features within the shell code that I was experimenting with:
  • A quick and dirty attempt to generate graphs that would highlight possible memory leaks, and
  • A more complex set of transformations that would generate an image file and an html link mask, the idea being to gain some more interactivity by letting me click on a graph in a browser and update it with something useful via a simple netcat-and-bash-based http server... I made a some good progress before abandoning the effort due to not really having a clear use/goal for the capability.
So without further ado, here is the bash script which implements call tree/graphs: simply paste it into a bash shell to use it (you'll also need to install at least cscope and graphviz to generate dot files, and maybe xfig or zgrviewer to view the graphs.):


#!/bin/bash

echo calltree.sh

#use cscope to build reference files (./cscope.out by default, use set_graphdb to override name or location)
set_graphdb() { export GRAPHDB=$1; }
unset_graphdb() { unset GRAPHDB; }
build_graphdb() { cscope -bkRu ${GRAPHDB:+-f $GRAPHDB} && echo Created ${GRAPHDB:-cscope.out}...; }

# cscope queries
fdefine() { cscope ${GRAPHDB:+-f $GRAPHDB} -L1 $1; }
callees() { cscope ${GRAPHDB:+-f $GRAPHDB} -L2 $1; }
callers() { cscope ${GRAPHDB:+-f $GRAPHDB} -L3 $1; }

# given a set of function names, find out how they're related
filter_edges() { local sym cscope_line
while read -a sym; do
fdefine $sym | while read -a cscope_line; do
grep -wq ${cscope_line[1]} ${1:-<(echo)} &&
printf "${cscope_line[1]}\t[href=\"${cscope_line[0]}:${cscope_line[2]}\"]\t/*fdefine*/\n"
done
callees $sym | while read -a cscope_line; do
grep -wq ${cscope_line[1]} ${1:-<(echo)} &&
printf "$sym->${cscope_line[1]}\t[label=\"${cscope_line[0]}:${cscope_line[2]}\"]\t/*callee*/\n"
done
callers $sym | while read -a cscope_line; do
grep -wq ${cscope_line[1]} ${1:-<(echo)} &&
printf "${cscope_line[1]}->$sym\t[label=\"${cscope_line[0]}:${cscope_line[2]}\"]\t/*caller*/\n"
done
done
}

# present list of function names to filter_edges properly
edges() { local tfile=/tmp/edges.$RANDOM
cat > $tfile
filter_edges $tfile <$tfile
rm $tfile
}

# append unknown symbol names out of lines of cscope output
filter_cscope_lines() { local cscope_line
while read -a cscope_line; do
grep -wq ${cscope_line[1]} ${1:-/dev/null} || echo ${cscope_line[1]}
done
}

# given a set of function names piped in, help spit out all their callers or callees that aren't already in the set
descend() { local symbol
while read -a symbol; do
$1 $symbol | filter_cscope_lines $2
done
}

# discover functions upstream of initial set
all_callers() { local tfile=/tmp/all_callers.$RANDOM
cat ${1:+<(echo $1)} > $tfile
descend callers $tfile <$tfile >>$tfile
cat $tfile; rm $tfile
}

# discover functions downstream of initial set
all_callees() { local tfile=/tmp/all_callees.$RANDOM
cat ${1:+<(echo $1)} > $tfile
descend callees $tfile <$tfile >>$tfile
cat $tfile; rm $tfile
}

# intersection of all_callees(a) and all_callers(b)
call_tree() { local tfile=/tmp/graph_filter.$RANDOM
all_callees $1 | sort -u > $tfile
comm -12 $tfile <(all_callers $2 | sort -u);
rm $tfile
}

# all functions downstream of callers of argument
all_callerees() { callers $1 | filter_cscope_lines | all_callees; }

# odd experimental set of calls that might help spot potential memory leaks
call_leaks() { local tfile=/tmp/graph_filter.$RANDOM
all_callerees $1 | sort -u > $tfile
comm -2 $tfile <(all_callers $2 | sort -u)
rm $tfile
}

# all the ways to get from (a,b,...z) to (a,b,...z)
call_graph() { for a; do for b; do if [ $a != $b ]; then call_tree $a $b; fi; done; done; }

# wrap dot-format node and edge info with dot-format whole-graph description
graph() { printf "digraph iftree {\ngraph [rankdir=LR, concentrate=true];\nnode [shape=record];\nedge [];\n"; cat | sort -u; printf "}\n"; }

# filter out unwanted (as specified in “~/calltree.deny”) and/or unnecessary edges
graph_filter() { local tfile=/tmp/graph_filter.$RANDOM
cat > $tfile
grep fdefine $tfile
grep $1 $tfile | grep -vf ~/calltree.deny | cut -f1,3
rm $tfile
}

# how to invoke zgrviewer as a viewer
zgrviewer() { ~/bin/zgrviewer -Pdot $*; }
# how to invoke xfig as a viewer
figviewer() { xfig <(dot -Tfig $*); }

# specify a viewer
ctviewer() { figviewer $*; }

# add color to specified nodes
colornodes() { (cat; for x in $@; do echo "$x [color=red]"; done;) }

# generate dot files
_upstream() { all_callers $1 | edges | graph_filter ${2:-caller} | colornodes $1 | graph; }
_downstream() { all_callees $1 | edges | graph_filter ${2:-callee} | colornodes $1 | graph; }
_subgraph() { call_tree $1 $2 | edges | graph_filter ${3:-callee} | colornodes $1 $2 | graph; }
_relate() { call_graph $@ | edges | graph_filter callee | colornodes $@ | graph; }
_leaks() { call_leaks $1 $2 | edges | graph_filter ${3:-callee} | colornodes $1 $2 | graph; }

# generate dot files and invoke ctviewer
upstream() { _upstream $@ > /tmp/tfile; ctviewer /tmp/tfile; rm -f /tmp/tfile; }
downstream() { _downstream $@ > /tmp/tfile; ctviewer /tmp/tfile; rm -f /tmp/tfile; }
subgraph() { _subgraph $@ > /tmp/tfile; ctviewer /tmp/tfile; rm -f /tmp/tfile; }
relate() { _relate $@ > /tmp/tfile; ctviewer /tmp/tfile; rm -f /tmp/tfile; }
leaks() { _leaks $@ > /tmp/tfile; ctviewer /tmp/tfile; rm -f /tmp/tfile; }

# dot file conversions
dot2png() { dot -Tpng -o $1; }
dot2jpg() { dot -Tjpg -o $1; }
dot2html() { dot -Tpng -o $1.png -Tcmapx -o $1.map; (echo "<IMG SRC="$1.png" USEMAP="#iftree" />"; cat $1.map) > $1.html; }