Package RDFClosure :: Package serializers :: Module PrettyXMLSerializer
[hide private]
[frames] | no frames]

Source Code for Module RDFClosure.serializers.PrettyXMLSerializer

  1  """ 
  2   
  3  Serializer for RDF/XML. Note that this is a slightly modified version of RDFLib's PrettyXMLSerializer module; the 
  4  original version had some bugs (mainly in handling collections) hence this local version. 
  5  """ 
  6   
  7   
  8  from rdflib import RDF, RDFS 
  9   
 10  from rdflib        import URIRef, Literal, BNode 
 11  from rdflib.util  import first, uniq, more_than 
 12  from rdflib.Graph import Seq 
 13   
 14  from rdflib.syntax.serializers  import Serializer 
 15  from RDFClosure.serializers.XMLWriter import XMLWriter 
 16   
 17  XMLLANG = "http://www.w3.org/XML/1998/namespacelang" 
 18   
 19   
 20  # TODO: 
21 -def fix(val):
22 "strip off _: from nodeIDs... as they are not valid NCNames" 23 if val.startswith("_:"): 24 return val[2:] 25 else: 26 return val
27
28 -class PrettyXMLSerializer(Serializer):
29
30 - def __init__(self, store, max_depth=8):
31 super(PrettyXMLSerializer, self).__init__(store) 32 33 # collecting information on lists, for a proper and pretty printing of those... 34 self.list_heads = [] 35 self.list_really_heads = [] 36 for l in self.store.subjects(RDF.first,None) : 37 # yep, l is part of a list. But we want to retain exclusively those that are heads and not 38 # internal list elements 39 # furthermore: we want list heads which have only non-Literal elements. This is the unfortunate 40 # consequence of the RDF/XML syntax...:-( 41 items = [item for item in store.items(l)] 42 if True not in [ isinstance(item,Literal) for item in items ] : 43 references = [ r for r in self.store.subjects(RDF.rest,l) ] 44 self.list_heads.append(l) 45 if len(references) == 0 : 46 self.list_really_heads.append(l)
47
48 - def serialize(self, stream, base=None, encoding=None, **args):
49 self.__serialized = {} 50 51 # "Internal" list elements should be forgotten... 52 for l in self.list_heads : 53 if l not in self.list_really_heads : 54 self.__serialized[l] = 1 55 56 store = self.store 57 self.base = base 58 #self.max_depth = args.get("max_depth", 3) 59 self.max_depth = 8 60 61 self.nm = nm = store.namespace_manager 62 self.writer = writer = XMLWriter(stream, nm, encoding) 63 64 writer.push(RDF.RDF) 65 66 # This checks whether a predicate appears with an unallowed URI and creates 67 # an artificial namespace if the answer is yes... 68 for predicate in uniq(store.predicates()): 69 prefix, namespace, name = nm.compute_qname(predicate) 70 # The same for types, that can appear as XML element names 71 for subj,types in uniq(store.subject_objects(RDF.type)) : 72 prefix, namespace, name = nm.compute_qname(types) 73 74 ns_list= list(self.store.namespaces()) 75 ns_list.sort() 76 writer.namespaces(ns_list) 77 78 # Write out subjects that can not be inline 79 for subject in store.subjects(): 80 if (None, None, subject) in store: 81 if (subject, None, subject) in store: 82 self.subject(subject, 1) 83 else: 84 self.subject(subject, 1) 85 86 # write out anything that has not yet been reached 87 for subject in store.subjects(): 88 self.subject(subject, 1) 89 90 writer.pop(RDF.RDF) 91 92 # Set to None so that the memory can get garbage collected. 93 self.__serialized = None
94
95 - def subject(self, subject, depth=1):
96 store = self.store 97 writer = self.writer 98 if not subject in self.__serialized: 99 self.__serialized[subject] = 1 100 type = first(store.objects(subject, RDF.type)) 101 try: 102 self.nm.qname(type) 103 except: 104 type = None 105 element = type or RDF.Description 106 writer.push(element) 107 if isinstance(subject, BNode): 108 def subj_as_obj_more_than(ceil): 109 return more_than(store.triples((None, None, subject)), ceil)
110 if (depth == 1 and subj_as_obj_more_than(0)) or subj_as_obj_more_than(1): 111 writer.attribute(RDF.nodeID, fix(subject)) 112 else: 113 writer.attribute(RDF.about, self.relativize(subject)) 114 if (subject, None, None) in store: 115 for predicate, object in store.predicate_objects(subject): 116 if not (predicate==RDF.type and object==type): 117 self.predicate(predicate, object, depth+1) 118 writer.pop(element)
119
120 - def predicate(self, predicate, object, depth=1):
121 def defaultCase() : 122 if depth <= self.max_depth: 123 self.subject(object, depth+1) 124 elif isinstance(object, BNode): 125 writer.attribute(RDF.nodeID, fix(object)) 126 else: 127 writer.attribute(RDF.resource, self.relativize(object))
128 ##### 129 130 writer = self.writer 131 store = self.store 132 writer.push(predicate) 133 134 if isinstance(object, Literal): 135 attributes = "" 136 to_escape = True 137 if object.language: 138 writer.attribute(XMLLANG, object.language) 139 if object.datatype: 140 if ("%s" % object.datatype) == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" : 141 writer.attribute(RDF.parseType, "Literal") 142 to_escape = False 143 else : 144 writer.attribute(RDF.datatype, object.datatype) 145 writer.text(object,to_escape) 146 elif object in self.list_heads : 147 items = [item for item in store.items(object)] 148 if True not in [ isinstance(item,Literal) for item in items ] : 149 # This is a kosher list that could be handled with the Collection parse type trick 150 self.__serialized[object] = 1 151 if object in self.list_really_heads : 152 collection = object 153 self.__serialized[object] = 1 154 writer.attribute(RDF.parseType, "Collection") 155 for item in items : 156 if item in self.__serialized : 157 # bugger; already done somewhere else... :-) 158 writer.push(RDF.Description) 159 if isinstance(item, BNode): 160 if more_than(store.triples((None, None, item)), 0): 161 writer.attribute(RDF.nodeID, fix(item)) 162 else: 163 writer.attribute(RDF.about, self.relativize(item)) 164 writer.pop(RDF.Description) 165 else : 166 self.subject(item) 167 self.__serialized[item] = 1 168 else : 169 defaultCase() 170 elif object in self.__serialized or not (object, None, None) in store: 171 if isinstance(object, BNode): 172 if more_than(store.triples((None, None, object)), 0): 173 writer.attribute(RDF.nodeID, fix(object)) 174 else: 175 writer.attribute(RDF.resource, self.relativize(object)) 176 elif (object,RDF.type,RDF.Seq) in store or (object,RDF.type,RDF.Bag) in store or (object,RDF.type,RDF.Alt) in store : 177 seq = Seq(store,object) 178 self.__serialized[object] = 1 179 if (object,RDF.type,RDF.Seq) in store : 180 typ = RDF.Seq 181 elif (object,RDF.type,RDF.Alt) in store : 182 typ = RDF.Alt 183 else : 184 typ = RDF.Bag 185 writer.push(typ) 186 for item in seq : 187 self.predicate(RDF.li,item,depth+1) 188 writer.pop(typ) 189 else : 190 defaultCase() 191 writer.pop(predicate) 192