Source code for examples.elementtree.adjacency_list
"""Illustrates an explicit way to persist an XML document expressed usingElementTree.Each DOM node is stored in an individualtable row, with attributes represented in a separate table. Thenodes are associated in a hierarchy using an adjacency liststructure. A query function is introduced which can search for nodesalong any path with a given structure of attributes, basically a(very narrow) subset of xpath.This example explicitly marshals/unmarshals the ElementTree document intomapped entities which have their own tables. Compare to pickle_type.py whichuses PickleType to accomplish the same task. Note that the usage of bothstyles of persistence are identical, as is the structure of the main Documentclass."""# PART I - Imports/Configurationimportosimportrefromxml.etreeimportElementTreefromsqlalchemyimportand_fromsqlalchemyimportColumnfromsqlalchemyimportcreate_enginefromsqlalchemyimportForeignKeyfromsqlalchemyimportIntegerfromsqlalchemyimportStringfromsqlalchemyimportTablefromsqlalchemyimportUnicodefromsqlalchemy.ormimportaliasedfromsqlalchemy.ormimportlazyloadfromsqlalchemy.ormimportmapperfromsqlalchemy.ormimportregistryfromsqlalchemy.ormimportrelationshipfromsqlalchemy.ormimportSessione=create_engine("sqlite://")mapper_registry=registry()# PART II - Table Metadata# stores a top level record of an XML document.documents=Table("documents",mapper_registry.metadata,Column("document_id",Integer,primary_key=True),Column("filename",String(30),unique=True),Column("element_id",Integer,ForeignKey("elements.element_id")),)# stores XML nodes in an adjacency list model. This corresponds to# Element and SubElement objects.elements=Table("elements",mapper_registry.metadata,Column("element_id",Integer,primary_key=True),Column("parent_id",Integer,ForeignKey("elements.element_id")),Column("tag",Unicode(30),nullable=False),Column("text",Unicode),Column("tail",Unicode),)# stores attributes. This corresponds to the dictionary of attributes# stored by an Element or SubElement.attributes=Table("attributes",mapper_registry.metadata,Column("element_id",Integer,ForeignKey("elements.element_id"),primary_key=True,),Column("name",Unicode(100),nullable=False,primary_key=True),Column("value",Unicode(255)),)mapper_registry.metadata.create_all(e)# PART III - Model# our document class. contains a string name,# and the ElementTree root element.classDocument:def__init__(self,name,element):self.filename=nameself.element=element# PART IV - Persistence Mapping# Node class. a non-public class which will represent the DB-persisted# Element/SubElement object. We cannot create mappers for ElementTree elements# directly because they are at the very least not new-style classes, and also# may be backed by native implementations. so here we construct an adapter.class_Node:pass# Attribute class. also internal, this will represent the key/value attributes# stored for a particular Node.class_Attribute:def__init__(self,name,value):self.name=nameself.value=value# setup mappers. Document will eagerly load a list of _Node objects.mapper(Document,documents,properties={"_root":relationship(_Node,lazy="joined",cascade="all")},)mapper(_Node,elements,properties={"children":relationship(_Node,cascade="all"),# eagerly load attributes"attributes":relationship(_Attribute,lazy="joined",cascade="all, delete-orphan"),},)mapper(_Attribute,attributes)# define marshalling functions that convert from _Node/_Attribute to/from# ElementTree objects. this will set the ElementTree element as# "document._element", and append the root _Node object to the "_root" mapped# collection.classElementTreeMarshal:def__get__(self,document,owner):ifdocumentisNone:returnselfifhasattr(document,"_element"):returndocument._elementdeftraverse(node,parent=None):ifparentisnotNone:elem=ElementTree.SubElement(parent,node.tag)else:elem=ElementTree.Element(node.tag)elem.text=node.textelem.tail=node.tailforattrinnode.attributes:elem.attrib[attr.name]=attr.valueforchildinnode.children:traverse(child,parent=elem)returnelemdocument._element=ElementTree.ElementTree(traverse(document._root))returndocument._elementdef__set__(self,document,element):deftraverse(node):n=_Node()n.tag=str(node.tag)n.text=str(node.text)n.tail=str(node.tail)ifnode.tailelseNonen.children=[traverse(n2)forn2innode]n.attributes=[_Attribute(str(k),str(v))fork,vinnode.attrib.items()]returnndocument._root=traverse(element.getroot())document._element=elementdef__delete__(self,document):deldocument._elementdocument._root=[]# override Document's "element" attribute with the marshaller.Document.element=ElementTreeMarshal()# PART V - Basic Persistence Exampleline="\n--------------------------------------------------------"# save to DBsession=Session(e)# get ElementTree documentsforfilein("test.xml","test2.xml","test3.xml"):filename=os.path.join(os.path.dirname(__file__),file)doc=ElementTree.parse(filename)session.add(Document(file,doc))print("\nSaving three documents...",line)session.commit()print("Done.")print("\nFull text of document 'text.xml':",line)document=session.query(Document).filter_by(filename="test.xml").first()ElementTree.dump(document.element)# PART VI - Searching for Paths# manually search for a document which contains "/somefile/header/field1:hi"root=aliased(_Node)child_node=aliased(_Node)grandchild_node=aliased(_Node)d=(session.query(Document).join(Document._root.of_type(root)).filter(root.tag=="somefile").join(root.children.of_type(child_node)).filter(child_node.tag=="header").join(child_node.children.of_type(grandchild_node)).filter(and_(grandchild_node.tag=="field1",grandchild_node.text=="hi")).one())ElementTree.dump(d.element)# generalize the above approach into an extremely impoverished xpath function:deffind_document(path,compareto):query=session.query(Document)attribute=Document._rootfori,matchinenumerate(re.finditer(r"/([\w_]+)(?:\[@([\w_]+)(?:=(.*))?\])?",path)):(token,attrname,attrvalue)=match.group(1,2,3)target_node=aliased(_Node)query=query.join(attribute.of_type(target_node)).filter(target_node.tag==token)attribute=target_node.childrenifattrname:attribute_entity=aliased(_Attribute)ifattrvalue:query=query.join(target_node.attributes.of_type(attribute_entity)).filter(and_(attribute_entity.name==attrname,attribute_entity.value==attrvalue,))else:query=query.join(target_node.attributes.of_type(attribute_entity)).filter(attribute_entity.name==attrname)return(query.options(lazyload(Document._root)).filter(target_node.text==compareto).all())forpath,comparetoin(("/somefile/header/field1","hi"),("/somefile/field1","hi"),("/somefile/header/field2","there"),("/somefile/header/field2[@attr=foo]","there"),):print("\nDocuments containing '%s=%s':"%(path,compareto),line)print([d.filenamefordinfind_document(path,compareto)])