gxl2dot.c   [plain text]


/*
    This software may only be used by you under license from AT&T Corp.
    ("AT&T").  A copy of AT&T's Source Code Agreement is available at
    AT&T's Internet website having the URL:
    <http://www.research.att.com/sw/tools/graphviz/license/source.html>
    If you received this software without first entering into a license
    with AT&T, you have an infringing copy of this software and cannot use
    it without violating AT&T's intellectual property rights.
*/
#pragma prototyped

#include    <convert.h>
#include    "aghdr.h"
#include    "agxbuf.h"
#ifdef HAVE_LIBEXPAT
#include    <expat.h>

#ifndef XML_STATUS_ERROR
#define XML_STATUS_ERROR 0
#endif

#define STACK_DEPTH	32
#define BUFSIZE		20000
#define SMALLBUF	1000
#define NAMEBUF		100

#define GXL_ATTR	"_gxl_"
#define GXL_ID      "_gxl_id"
#define GXL_ROLE	"_gxl_role"
#define GXL_HYPER	"_gxl_hypergraph"
#define GXL_FROM    "_gxl_fromorder"
#define GXL_TO      "_gxl_toorder"
#define GXL_TYPE    "_gxl_type"
#define GXL_COMP    "_gxl_composite_"
#define GXL_LOC     "_gxl_locator_"

#define	TAG_NONE	-1
#define	TAG_GRAPH	0
#define TAG_NODE	1
#define TAG_EDGE	2

typedef struct slist slist;
struct slist {
	slist* next;
	char   buf[1];
};

#define NEW(t)      (t*)malloc(sizeof(t))
#define N_NEW(n,t)  (t*)malloc((n)*sizeof(t))
/* Round x up to next multiple of y, which is a power of 2 */
#define ROUND2(x,y) (((x) + ((y)-1)) & ~((y)-1))

static void
pushString (slist** stk, const char* s)
{
	int    sz = ROUND2(sizeof(slist)+strlen(s),sizeof(void*));
	slist* sp = (slist*)N_NEW(sz,char);
	strcpy (sp->buf, s);
	sp->next = *stk;
	*stk = sp;
}

static void
popString (slist** stk)
{
	slist* sp = *stk;
	if (!sp) {
		fprintf (stderr, "PANIC: gxl2dot: empty element stack\n");
		exit(1);
	}
	*stk = sp->next;
	free (sp);
}

static char*
topString (slist* stk)
{
	if (!stk) {
		fprintf (stderr, "PANIC: gxl2dot: empty element stack\n");
		exit(1);
	}
	return stk->buf;
}

static void
freeString (slist* stk)
{
	slist* sp;

	while (stk) {
		sp = stk->next;
		free (stk);
		stk = sp;
	}
}

typedef struct userdata {
	agxbuf xml_attr_name;
	agxbuf xml_attr_value;
	agxbuf composite_buffer;
	slist* elements;
	int listen;
	int closedElementType;
	int globalAttrType;
	int compositeReadState;
	int edgeinverted;
	Dt_t* nameMap;
} userdata_t;

static Agraph_t*    root;           /* root graph */
static int			Current_class;  /* Current element type */
static Agraph_t*    G;              /* Current graph */
static Agnode_t*    N;              /* Set if Current_class == TAG_NODE */
static Agedge_t*    E;              /* Set if Current_class == TAG_EDGE */

static int			GSP;
static Agraph_t		*Gstack[STACK_DEPTH];

typedef struct {
  Dtlink_t   link;
  char*      name;
  char*      unique_name;
} namev_t;

static namev_t*
make_nitem (Dt_t* d, namev_t* objp, Dtdisc_t* disc)
{
	namev_t*  np = NEW(namev_t);
	np->name = objp->name;
	np->unique_name = 0;
	return np;
}

static void
free_nitem (Dt_t* d, namev_t* np, Dtdisc_t* disc)
{
    free (np);
}

static Dtdisc_t nameDisc = {
    offsetof(namev_t,name),
    -1,
    offsetof(namev_t,link),
    (Dtmake_f)make_nitem,
    (Dtfree_f)free_nitem,
    NIL(Dtcompar_f),
    NIL(Dthash_f),
    NIL(Dtmemory_f),
    NIL(Dtevent_f)
};

static userdata_t* 
genUserdata() 
{	
	userdata_t* user = NEW(userdata_t);
	agxbinit (&(user->xml_attr_name), NAMEBUF, 0);
	agxbinit (&(user->xml_attr_value), SMALLBUF, 0);
	agxbinit (&(user->composite_buffer), SMALLBUF, 0);
	user->listen = FALSE;
	user->elements = 0;
	user->closedElementType = TAG_NONE;
	user->globalAttrType = TAG_NONE;
	user->compositeReadState = FALSE;
	user->edgeinverted = FALSE;
	user->nameMap = dtopen (&nameDisc, Dtoset);
	return user;
}

static void 
freeUserdata(userdata_t* ud) 
{
	dtclose(ud->nameMap);
	agxbfree (&(ud->xml_attr_name));
	agxbfree (&(ud->xml_attr_value));
	agxbfree (&(ud->composite_buffer));
	freeString(ud->elements);
	free(ud);
}

static void
addToMap(Dt_t* map, char* name, char* uniqueName)
{
	namev_t  obj;
	namev_t* objp;

    obj.name = name;
	objp = dtinsert (map, &obj);
	assert (objp->unique_name == 0);
	objp->unique_name = uniqueName;
}

static char*
mapLookup(Dt_t* nm, char* name)
{
	namev_t* objp = dtmatch (nm, name);
	if (objp) return objp->unique_name;
	else return 0;
}

static int 
isAnonGraph(char *name)
{
	if (*name++ != '%') return 0;
	while (isdigit(*name)) name++;  /* skip over digits */
	return (*name == '\0');
}

static void 
push_subg(Agraph_t *g)
{
	if (GSP == STACK_DEPTH) {
		fprintf (stderr, "gxl2dot: Too many (> %d) nestings of subgraphs\n",
			STACK_DEPTH);
		exit(1);
	}
	else if (GSP == 0)
		root = g;
	G = Gstack[GSP++] = g;
}

static Agraph_t*
pop_subg(void)
{
	Agraph_t		*g;
	if (GSP == 0) {
		fprintf(stderr,"gxl2dot: Gstack underflow in graph parser\n"); exit(1);
	}
	g = Gstack[--GSP];					
	if (GSP > 0) G = Gstack[GSP - 1];	
	return g;
}

static Agnode_t*
bind_node(const char *name)
{
	N = agnode(G,(char*)name,1);
	return N;
}

static Agedge_t*
bind_edge(const char *tail, const char *head)
{
	Agnode_t *tailNode, *headNode;
    char*     key = 0;

	tailNode = agnode(G, (char*)tail,1);
	headNode = agnode(G, (char*)head,1);
	E = agedge(tailNode, headNode,key,1);
	return E;
}

static int 
get_xml_attr(char *attrname, const char **atts)
{
	int count = 0;
	while(atts[count] != NULL) {
		if(strcmp(attrname, atts[count]) == 0) {
			return count + 1;
		}
		count += 2;
	}
	return -1;
}

static void
setName (Dt_t* names, Agobj_t* n, char* value)
{
	Agsym_t* ap;
	char*    oldName;

	ap = agattr(root, AGTYPE(n), GXL_ID, "");
	agxset(n,ap,agnameof(n));
	oldName = agxget(n,ap); /* set/get gives us new copy */
	addToMap(names, oldName, value);
	agrename(n, value);
}

static char*       defval = "";

static void
setNodeAttr (Agnode_t* np, char *name, char *value, userdata_t* ud)
{
	Agsym_t*    ap;

	if(strcmp(name, "name") == 0) {
		setName (ud->nameMap, (Agobj_t*)np, value);
	} 
	else {
		ap = agattr(root,AGNODE,name,0);
		if (!ap)
			ap = agattr(root,AGNODE,name,defval);
		agxset(np,ap,value);
	}
}

#define NODELBL "node:"
#define NLBLLEN (sizeof(NODELBL)-1)
#define EDGELBL "edge:"
#define ELBLLEN (sizeof(EDGELBL)-1)

/* setGlobalNodeAttr:
 * Set global node attribute.
 * The names must always begin with "node:".
 */
static void
setGlobalNodeAttr (Agraph_t* g, char *name, char *value, userdata_t* ud)
{
	if (strncmp(name,NODELBL,NLBLLEN))
		fprintf (stderr, "Warning: global node attribute %s in graph %s does not begin with the prefix %s\n", name, agnameof(g), NODELBL);
	else
		name += NLBLLEN;
	if ((g != root) && !agattr(root,AGNODE,name,0))
		agattr(root,AGNODE,name,defval);
	agattr(G, AGNODE, name, value);
}

static void
setEdgeAttr (Agedge_t* ep, char *name, char *value, userdata_t* ud)
{
	Agsym_t*    ap;
	char*       attrname;

	if(strcmp(name, "headport") == 0) {
		if(ud->edgeinverted) attrname = "tailport";
		else attrname = "headport";
		ap = agattr(root,AGEDGE,attrname,0);
		if (!ap)
			ap = agattr(root,AGEDGE,attrname,defval);
		agxset(ep,ap,value);
	} else if(strcmp(name, "tailport") == 0) {
		if(ud->edgeinverted) attrname = "headport";
		else attrname = "tailport";
		ap = agattr(root,AGEDGE,attrname,0);
		if (!ap)
			ap = agattr(root,AGEDGE,attrname,defval);
		agxset(ep,ap,value);
	} else {
		ap = agattr(root,AGEDGE,name,0);
		if (!ap)
			ap = agattr(root,AGEDGE,name,defval);
		agxset(ep,ap,value);
	}
}

/* setGlobalEdgeAttr:
 * Set global edge attribute.
 * The names always begin with "edge:".
 */
static void
setGlobalEdgeAttr (Agraph_t* g, char *name, char *value, userdata_t* ud)
{
	if (strncmp(name,EDGELBL,ELBLLEN))
		fprintf (stderr, "Warning: global edge attribute %s in graph %s does not begin with the prefix %s\n", name, agnameof(g), EDGELBL);
	else
		name += ELBLLEN;
	if ((g != root) && !agattr(root,AGEDGE,name,0))
		agattr(root,AGEDGE,name,defval);
	agattr(g, AGEDGE, name, value);
}

static void
setGraphAttr (Agraph_t* g, char *name, char *value, userdata_t* ud)
{
	Agsym_t*    ap;

	if ((g == root) && !strcmp(name,"strict") && !strcmp(value,"true")){
		g->desc.strict = 1;
	}
	else if (strcmp(name, "name") == 0)
		setName (ud->nameMap, (Agobj_t *)g, value);
	else {
		ap = agattr(root,AGRAPH,name,0);
		if (ap) 
			agxset(g,ap,value);
		else if (g == root)
			agattr(root,AGRAPH,name,value);
		else {
			ap = agattr(root,AGRAPH,name,defval);
			agxset(g,ap,value);
		}
	}
}

static void
setAttr (char *name, char *value, userdata_t* ud)
{
	switch (Current_class) {
	case TAG_GRAPH :
		setGraphAttr (G, name, value, ud);
		break;
	case TAG_NODE :
		setNodeAttr (N, name, value, ud);
		break;
	case TAG_EDGE :
		setEdgeAttr (E, name, value, ud);
		break;
	}
}

/*------------- expat handlers ----------------------------------*/

static void
startElementHandler(void *userData, const char *name, const char **atts)
{
	int pos;
	userdata_t* ud = (userdata_t*)userData;
	Agraph_t *g = NULL;

	if(strcmp(name, "gxl") == 0) {
		// do nothing
	} else if(strcmp(name, "graph") == 0) {
		const char *edgeMode="";
		const char *id;
		char  buf[NAMEBUF];  /* holds % + number */

		Current_class = TAG_GRAPH;
		if(ud->closedElementType == TAG_GRAPH) {
			fprintf(stderr, "Warning: Node contains more than one graph.\n");
		}
		id = atts[get_xml_attr("id", atts)];
		pos = get_xml_attr("edgemode", atts);
		if(pos > 0) {
			edgeMode = atts[pos];
		}

		if(GSP == 0) {
			if(strcmp(edgeMode, "directed") == 0) {
				g = agopen((char*)id,Agdirected,&AgDefaultDisc);			
			} else if(strcmp(edgeMode, "undirected") == 0) {
				g = agopen((char*)id,Agundirected,&AgDefaultDisc);			
			}
			else {
				fprintf(stderr, "Warning: graph has no edgemode attribute");
				fprintf(stderr, " - assume directed\n");
				g = agopen((char*)id,Agdirected,&AgDefaultDisc);			
			}
			push_subg(g);
		} else {
			Agraph_t*  subg;
			if(isAnonGraph((char*)id)) {
				static int		anon_id = 1;
				sprintf(buf,"%%%d", anon_id++);
				id = buf;
			}
			subg = agsubg(G,(char*)id,1); 
			push_subg(subg);
		}

		pos = get_xml_attr("role", atts);
		if(pos > 0) {
			setGraphAttr(G, GXL_ROLE, (char*)atts[pos], ud);
		}

		pos = get_xml_attr("hypergraph", atts);
		if(pos > 0) {
			setGraphAttr (G, GXL_HYPER, (char*)atts[pos], ud);
		}

		pushString (&ud->elements, id);
	} else if(strcmp(name, "node") == 0) {
		Current_class = TAG_NODE;
		pos = get_xml_attr("id", atts);
		if(pos > 0) {
			const char *attrname;
			attrname = atts[pos];

			bind_node(attrname);		

			pushString (&ud->elements, attrname);
		}

	} else if(strcmp(name, "edge") == 0) {
		const char *head="", *tail="";
		char *tname;
		Agnode_t *t;

		Current_class = TAG_EDGE;
		pos = get_xml_attr("from", atts);
		if(pos > 0) 
			tail = atts[pos];
		pos = get_xml_attr("to", atts);
		if(pos > 0) 
			head = atts[pos];

		tname = mapLookup(ud->nameMap, (char*)tail);
		if (tname) tail = tname;

		tname = mapLookup(ud->nameMap, (char*)head);
		if (tname) head = tname;

		bind_edge(tail, head);

		t = AGTAIL(E);
		tname = agnameof(t);

		if(strcmp(tname, tail) == 0) {
			ud->edgeinverted = FALSE;
		} else if(strcmp(tname, head) == 0){
			ud->edgeinverted = TRUE;
		}

		pos = get_xml_attr("fromorder", atts);
		if(pos > 0) {
			setEdgeAttr(E, GXL_FROM, (char*)atts[pos], ud);
		}

		pos = get_xml_attr("toorder", atts);
		if(pos > 0) {
			setEdgeAttr(E, GXL_TO, (char*)atts[pos], ud);
		}

		pos = get_xml_attr("id", atts);
		if(pos > 0) {
			setEdgeAttr(E, GXL_ID, (char*)atts[pos], ud);
		}
	} else if(strcmp(name, "attr") == 0) {
		const char *attrname = atts[get_xml_attr("name", atts)];

		agxbput (&ud->xml_attr_name, (char*)attrname);
		pos = get_xml_attr("kind", atts);

		if(pos > 0) {
			if(strcmp("node", atts[pos]) == 0) 
				ud->globalAttrType = TAG_NODE;
			else if(strcmp("edge", atts[pos]) == 0) 
				ud->globalAttrType = TAG_EDGE;
			else if(strcmp("graph", atts[pos]) == 0) 
				ud->globalAttrType = TAG_GRAPH;
		} else {
			ud->globalAttrType = TAG_NONE;
		}

	} else if(strcmp(name, "string") == 0
				|| strcmp(name, "bool") == 0
				|| strcmp(name, "int") == 0
				|| strcmp(name, "float") == 0) {

		ud->listen = TRUE;
		if(ud->compositeReadState) {
			agxbputc (&ud->composite_buffer, '<');
			agxbput (&ud->composite_buffer, (char*)name);
			agxbputc (&ud->composite_buffer, '>');
		}
	} else if(strcmp(name, "rel") == 0
				|| strcmp(name, "relend") == 0) {
		fprintf(stderr, "%s element is ignored by DOT\n", name);
	} else if(strcmp(name, "type") == 0) {
		pos = get_xml_attr("xlink:href", atts);
		if(pos > 0) {
			setAttr(GXL_TYPE, (char*)atts[pos], ud);
		}		
	} else if(strcmp(name, "locator") == 0) {
		pos = get_xml_attr("xlink:href", atts);
		if(pos > 0) {
			const char *href = atts[pos];
			agxbput (&ud->xml_attr_value, GXL_LOC);
			agxbput (&ud->xml_attr_value, (char*)href);
		}				
	} else if(strcmp(name, "seq") == 0
				|| strcmp(name, "set") == 0
				|| strcmp(name, "bag") == 0
				|| strcmp(name, "tup") == 0 
				|| strcmp(name, "enum") == 0 ) {
			
		ud->compositeReadState = TRUE;		
		agxbputc (&ud->composite_buffer, '<');
		agxbput (&ud->composite_buffer, (char*)name);
		agxbputc (&ud->composite_buffer, '>');
	} else {
		// must be some extension
		fprintf(stderr, "Unknown node %s; DOT does not support extensions.\n", name);
	}
}

static void
endElementHandler(void *userData, const char *name)
{
	userdata_t* ud = (userdata_t*)userData;

	if(strcmp(name, "graph") == 0) {
		pop_subg();
		popString (&ud->elements);
		ud->closedElementType = TAG_GRAPH;
	} else if(strcmp(name, "node") == 0) {
		char* ele_name = topString (ud->elements);
		if(ud->closedElementType == TAG_GRAPH) {
			Agnode_t *node = agnode(root, ele_name, 0);
			agdelete(root, node);
		}
		popString (&ud->elements);
		Current_class = TAG_GRAPH;
		N = 0;
		ud->closedElementType = TAG_NODE;
	} else if(strcmp(name, "edge") == 0) {
		Current_class = TAG_GRAPH;
		E = 0;
		ud->closedElementType = TAG_EDGE;
		ud->edgeinverted = FALSE;
	} else if(strcmp(name, "attr") == 0) {
		char* name;
		char* value;
		char buf[SMALLBUF] = GXL_COMP;
		char* dynbuf = 0;

		ud->closedElementType = TAG_NONE;
		if(ud->compositeReadState) {
			int len = sizeof(GXL_COMP)+agxblen(&ud->xml_attr_name);	
			if (len <= SMALLBUF) {
				name = buf;
			}
			else {
				name = dynbuf = N_NEW(len, char);
				strcpy(name,GXL_COMP);
			}
			strcpy (name+sizeof(GXL_COMP)-1, agxbuse(&ud->xml_attr_name));
			value = agxbuse(&ud->composite_buffer);
			agxbclear(&ud->xml_attr_value);
			ud->compositeReadState = FALSE;
		}
		else {
			name = agxbuse(&ud->xml_attr_name);
			value = agxbuse(&ud->xml_attr_value);
		}

		switch(ud->globalAttrType) {
			case TAG_NONE:
				setAttr (name, value, ud);
				break;
			case TAG_NODE:
				setGlobalNodeAttr (G, name, value, ud);
				break;
			case TAG_EDGE:
				setGlobalEdgeAttr (G, name, value, ud);
				break;
			case TAG_GRAPH:
				setGraphAttr (G, name, value, ud);
				break;
		}
		if (dynbuf) free (dynbuf);
		ud->globalAttrType = TAG_NONE;
	} else if(strcmp(name, "string") == 0
				|| strcmp(name, "bool") == 0
				|| strcmp(name, "int") == 0
				|| strcmp(name, "float") == 0) {
		ud->listen = FALSE;
		if(ud->compositeReadState) {
			agxbputc (&ud->composite_buffer, '<');
			agxbputc (&ud->composite_buffer, '/');
			agxbput (&ud->composite_buffer, (char*)name);
			agxbputc (&ud->composite_buffer, '>');
		}
	} else if(strcmp(name, "seq") == 0
				|| strcmp(name, "set") == 0
				|| strcmp(name, "bag") == 0
				|| strcmp(name, "tup") == 0 
				|| strcmp(name, "enum") == 0 ) {
			agxbputc (&ud->composite_buffer, '<');
			agxbputc (&ud->composite_buffer, '/');
			agxbput (&ud->composite_buffer, (char*)name);
			agxbputc (&ud->composite_buffer, '>');
	}
}

static void
characterDataHandler(void *userData, const char *s, int length)
{
	userdata_t* ud = (userdata_t*)userData;
	
	if(!ud->listen) 
		return;

	if(ud->compositeReadState) {
		agxbput_n(&ud->composite_buffer, (char*)s, length);					
		return;
	}

	agxbput_n(&ud->xml_attr_value, (char*)s, length);					
}

Agraph_t* 
gxl_to_dot(FILE* gxlFile) 
{
	char buf[BUFSIZE];
	int done;
	userdata_t* udata = genUserdata();
	XML_Parser parser = XML_ParserCreate(NULL);

	XML_SetUserData(parser, udata);
	XML_SetElementHandler(parser, startElementHandler, endElementHandler);
	XML_SetCharacterDataHandler(parser, characterDataHandler);

	Current_class = TAG_GRAPH;
    root = 0;
	do {
		size_t len = fread(buf, 1, sizeof(buf), gxlFile);
		if (len == 0) break;
		done = len < sizeof(buf);
		if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
			fprintf(stderr,
		          "%s at line %d\n",
		          XML_ErrorString(XML_GetErrorCode(parser)),
		          XML_GetCurrentLineNumber(parser));	
			exit(1);
	    }
	  } while (!done);
	XML_ParserFree(parser);
	freeUserdata(udata);	
	
	return root;
}

#endif