Package RDFClosure :: Module RestrictedDatatype
[hide private]
[frames] | no frames]

Source Code for Module RDFClosure.RestrictedDatatype

  1  # -*- coding: utf-8 -*- 
  2  # 
  3  """ 
  4  Module to datatype restrictions, ie, data ranges. 
  5    
  6  The module implements the following aspects of datatype restrictions: 
  7   
  8   - a new datatype is created run-time and added to the allowed and accepted datatypes; literals are checked whether they abide to the restrictions 
  9   - the new datatype is defined to be a 'subClass' of the restricted datatype 
 10   - literals of the restricted datatype and that abide to the restrictions defined by the facets are also assigned to be of the new type 
 11    
 12  The last item is important to handle the following structures:: 
 13   ex:RE a owl:Restriction ; 
 14          owl:onProperty ex:p ; 
 15          owl:someValuesFrom [ 
 16                  a rdfs:Datatype ; 
 17                  owl:onDatatype xsd:string ; 
 18                  owl:withRestrictions ( 
 19                          [ xsd:minLength "3"^^xsd:integer ] 
 20                          [ xsd:maxLength "6"^^xsd:integer ] 
 21                  ) 
 22          ] 
 23   . 
 24   ex:q ex:p "abcd"^^xsd:string. 
 25  In the case above the system can then infer that C{ex:q} is also of type C{ex:RE}. 
 26   
 27  Datatype restrictions are used by the L{OWL RL Extensions<OWLRLExtras.OWLRL_Extension>} extension class. 
 28   
 29  The implementation is not 100% complete. Some things that an ideal implementation should do are not done yet like: 
 30   
 31   - checking whether a facet is of a datatype that is allowed for that facet 
 32   - handling of non-literals in the facets (ie, if the resource is defined to be of type literal, but whose value 
 33   is defined via a separate 'owl:sameAs' somewhere else) 
 34   
 35  @requires: U{RDFLib<http://rdflib.net>}, 2.2.2. and higher 
 36  @license: This software is available for use under the U{W3C Software License<http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231>} 
 37  @organization: U{World Wide Web Consortium<http://www.w3.org>} 
 38  @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} 
 39   
 40  """ 
 41   
 42  """ 
 43  $Id: RestrictedDatatype.py,v 1.3 2009/09/18 15:08:50 ivan Exp $ $Date: 2009/09/18 15:08:50 $ 
 44  """ 
 45   
 46  __author__  = 'Ivan Herman' 
 47  __contact__ = 'Ivan Herman, ivan@w3.org' 
 48  __license__ = u'W3C® SOFTWARE NOTICE AND LICENSE, http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231' 
 49   
 50  import re 
 51   
 52  from OWL                        import * 
 53  from OWL                        import OWLNS as ns_owl 
 54  from rdflib.RDFS        import Datatype 
 55  from rdflib.RDF         import type 
 56  from rdflib.RDF         import RDFNS as ns_rdf 
 57   
 58  from rdflib.Literal     import Literal as rdflibLiteral 
 59  from rdflib.Literal     import _XSD_NS as ns_xsd 
 60   
 61  from DatatypeHandling import AltXSDToPYTHON 
 62   
 63  #: Constant for datatypes using min, max (inclusive and exclusive): 
 64  MIN_MAX                                 = 0 
 65  #: Constant for datatypes using length, minLength, and maxLength (and nothing else) 
 66  LENGTH                                  = 1 
 67  #: Constant for datatypes using length, minLength, maxLength, and pattern 
 68  LENGTH_AND_PATTERN              = 2 
 69  #: Constat for datatypes using length, minLength, maxLength, pattern, and lang range 
 70  LENGTH_PATTERN_LRANGE   = 3 
 71   
 72  #: Dictionary of all the datatypes, keyed by category 
 73  Datatypes_per_facets = { 
 74          MIN_MAX                                 : [ ns_owl["rational"], ns_xsd["decimal"], ns_xsd["integer"], 
 75                                                                  ns_xsd["nonNegativeInteger"], ns_xsd["nonPositiveInteger"], 
 76                                                                  ns_xsd["positiveInteger"], ns_xsd["negativeInteger"], 
 77                                                                  ns_xsd["long"], ns_xsd["short"], ns_xsd["byte"], 
 78                                                                  ns_xsd["unsignedLong"], ns_xsd["unsignedInt"], ns_xsd["unsignedShort"], ns_xsd["unsignedByte"], 
 79                                                                  ns_xsd["double"], ns_xsd["float"], 
 80                                                                  ns_xsd["dateTime"], ns_xsd["dateTimeStamp"], ns_xsd["time"], ns_xsd["date"] 
 81                                                          ], 
 82          LENGTH                                  : [ ns_xsd["hexBinary"], ns_xsd["base64Binary"] ], 
 83          LENGTH_AND_PATTERN              : [ ns_xsd["anyURI"], ns_xsd["string"], ns_xsd["NMTOKEN"], ns_xsd["Name"], ns_xsd["NCName"], 
 84                                                                  ns_xsd["language"], ns_xsd["normalizedString"] 
 85                                                          ], 
 86          LENGTH_PATTERN_LRANGE   : [ ns_rdf["plainLiteral"] ] 
 87  } 
 88   
 89  #: a simple list containing C{all} datatypes that may have a facet 
 90  facetable_datatypes = reduce(lambda x,y: x+y, Datatypes_per_facets.values()) 
 91   
 92  ####################################################################################################### 
 93   
94 -def _lit_to_value(dt, v) :
95 """ 96 This method is used to convert a string to a value with facet checking. RDF Literals are converted to 97 Python values using this method; if there is a problem, an extension is created (and caught higher 98 up to generate an error message). 99 100 The method is the equivalent of all the methods in the L{DatatypeHandling} module, and is registered 101 to the system run time, as new restricted datatypes are discovered. 102 103 (Technically, the registration is done via a C{lambda v: _lit_to_value(self,v)} setting from within a 104 L{RestrictedDatatype} instance) 105 @param dt: faceted datatype 106 @type dt: L{RestrictedDatatype} 107 @param v: literal to be converted and checked 108 @raise ValueError: invalid literal value 109 """ 110 # This may raise an exception... 111 value = dt.converter(v) 112 113 # look at the different facet categories and try to find which is 114 # is, if any, the one that is of relevant for this literal 115 for cat in Datatypes_per_facets.keys() : 116 if dt.base_type in Datatypes_per_facets[cat] : 117 # yep, this is to be checked. 118 if not dt.checkValue(value) : 119 raise ValueError("Literal value %s does not fit the faceted datatype %s" % (v,self)) 120 # got here, everything should be fine 121 return value
122
123 -def _lang_range_check(range, lang) :
124 """ 125 Implementation of the extended filtering algorithm, as defined in point 3.3.2, 126 of U{RFC 4647<http://www.rfc-editor.org/rfc/rfc4647.txt>}, on matching language ranges and language tags. 127 Needed to handle the C{rdf:PlainLiteral} datatype. 128 @param range: language range 129 @param lang: language tag 130 @rtype: boolean 131 """ 132 def _match(r,l) : 133 """Matching of a range and language item: either range is a wildcard or the two are equal 134 @param r: language range item 135 @param l: language tag item 136 @rtype: boolean 137 """ 138 return r == '*' or r == l
139 140 rangeList = range.strip().lower().split('-') 141 langList = lang.strip().lower().split('-') 142 if not _match(rangeList[0], langList[0]) : return False 143 144 rI = 1 145 rL = 1 146 while rI < len(rangeList) : 147 if rangeList[rI] == '*' : 148 rI += 1 149 continue 150 if rL >= len(langList) : 151 return False 152 if _match(rangeList[rI], langList[rL]) : 153 rI += 1 154 rL += 1 155 continue 156 if len(langList[rL]) == 1 : 157 return False 158 else : 159 rL += 1 160 continue 161 return True 162 163 ####################################################################################################### 164
165 -def extract_faceted_datatypes(core, graph) :
166 """ 167 Extractions of restricted (ie, faceted) datatypes from the graph. 168 @param core: the core closure instance that is being handled 169 @type core: L{Closure.Core} 170 @param graph: RDFLib graph 171 @return: array of L{RestrictedDatatype} instances 172 """ 173 retval = [] 174 for dtype in graph.subjects(type, Datatype) : 175 base_type = None 176 facets = [] 177 try : 178 base_types = [ x for x in graph.objects(dtype, onDatatype) ] 179 if len(base_types) > 0 : 180 if len(base_types) > 1 : 181 raise Exception("Several base datatype for the same restriction %s" % dtype) 182 else : 183 base_type = base_types[0] 184 if base_type in facetable_datatypes : 185 rlists = [ x for x in graph.objects(dtype, withRestrictions) ] 186 if len(rlists) > 1 : 187 raise Exception("More than one facet lists for the same restriction %s" % dtype) 188 elif len(rlists) > 0 : 189 final_facets = [] 190 for r in graph.items(rlists[0]) : 191 for (facet,lit) in graph.predicate_objects(r) : 192 if isinstance(lit, rdflibLiteral) : 193 # the python value of the literal should be extracted 194 # note that this call may lead to an exception, but that is fine, 195 # it is caught some lines below anyway... 196 try : 197 if lit.datatype == None or lit.datatype == ns_xsd["string"]: 198 final_facets.append((facet, str(lit))) 199 else : 200 final_facets.append((facet, AltXSDToPYTHON[lit.datatype](str(lit)))) 201 except Exception, msg : 202 core.add_error(msg) 203 continue 204 # We do have everything we need: 205 new_datatype = RestrictedDatatype(dtype, base_type, final_facets) 206 retval.append(new_datatype) 207 except Exception, msg : 208 #import sys 209 #print sys.exc_info() 210 #print sys.exc_type 211 #print sys.exc_value 212 #print sys.exc_traceback 213 core.add_error(msg) 214 continue 215 return retval
216 217
218 -class RestrictedDatatype :
219 """ 220 Implementation of a datatype with facets, ie, datatype with restrictions. 221 222 @ivar datatype : the URI for this datatype 223 @ivar base_type : URI of the datatype that is restricted 224 @ivar converter : method to convert a literal of the base type to a Python value (drawn from L{DatatypeHandling.AltXSDToPYTHON}) 225 @ivar minExclusive : value for the C{xsd:minExclusive} facet, initialized to C{None} and set to the right value if a facet is around 226 @ivar minInclusive : value for the C{xsd:minInclusive} facet, initialized to C{None} and set to the right value if a facet is around 227 @ivar maxExclusive : value for the C{xsd:maxExclusive} facet, initialized to C{None} and set to the right value if a facet is around 228 @ivar maxInclusive : value for the C{xsd:maxInclusive} facet, initialized to C{None} and set to the right value if a facet is around 229 @ivar minLength : value for the C{xsd:minLength} facet, initialized to C{None} and set to the right value if a facet is around 230 @ivar maxLength : value for the C{xsd:maxLength} facet, initialized to C{None} and set to the right value if a facet is around 231 @ivar length : value for the C{xsd:length} facet, initialized to C{None} and set to the right value if a facet is around 232 @ivar pattern : array of patterns for the C{xsd:pattern} facet, initialized to C{[]} and set to the right value if a facet is around 233 @ivar langRange : array of language ranges for the C{rdf:langRange} facet, initialized to C{[]} and set to the right value if a facet is around 234 @ivar check_methods : list of class methods that are relevant for the given C{base_type} 235 @ivar toPython : function to convert a Literal of the specified type to a Python value. Is defined by C{lambda v : _lit_to_value(self, v)}, see L{_lit_to_value} 236 """ 237
238 - def __init__(self, type_uri, base_type, facets) :
239 """ 240 @param type_uri: URI of the datatype being defined 241 @param base_type: URI of the base datatype, ie, the one being restricted 242 @param facets : array of C{(facetURI, value)} pairs 243 """ 244 self.datatype = type_uri 245 self.base_type = base_type 246 if self.base_type not in AltXSDToPYTHON : 247 raise Exception( "No facet is implemented for datatype %s" % self.base_type ) 248 self.converter = AltXSDToPYTHON[ self.base_type ] 249 250 self.minExclusive = None 251 self.maxExclusive = None 252 self.minInclusive = None 253 self.maxInclusive = None 254 self.length = None 255 self.maxLength = None 256 self.minLength = None 257 self.pattern = [] 258 self.langRange = [] 259 for (facet, value) in facets : 260 if facet == ns_xsd["minInclusive"] and (self.minInclusive == None or self.minInclusive < value) : 261 self.minInclusive = value 262 elif facet == ns_xsd["minExclusive"] and (self.minExclusive == None or self.minExclusive < value) : 263 self.minExclusive = value 264 elif facet == ns_xsd["maxInclusive"] and (self.maxInclusive == None or value < self.maxInclusive) : 265 self.maxInclusive = value 266 elif facet == ns_xsd["maxExclusive"] and (self.maxExclusive == None or value < self.maxExclusive) : 267 self.maxExclusive = value 268 elif facet == ns_rdf["langRange"] : 269 self.langRange.append(value) 270 elif facet == ns_xsd["length"] : 271 self.length = value 272 elif facet == ns_xsd["maxLength"] and (self.maxLength == None or value < self.maxLength) : 273 self.maxLength = value 274 elif facet == ns_xsd["minLength"] and (self.maxLength == None or value < self.maxLength) : 275 self.minLength = value 276 elif facet == ns_xsd["pattern"] : 277 self.pattern.append(re.compile(value)) 278 279 # Choose the methods that are relevant for this datatype, based on the base type 280 facet_to_method = { 281 MIN_MAX : [ RestrictedDatatype._check_max_exclusive, RestrictedDatatype._check_min_exclusive, 282 RestrictedDatatype._check_max_inclusive, RestrictedDatatype._check_min_inclusive ], 283 LENGTH : [ RestrictedDatatype._check_min_length, RestrictedDatatype._check_max_length, 284 RestrictedDatatype._check_length ], 285 LENGTH_AND_PATTERN : [ RestrictedDatatype._check_min_length, RestrictedDatatype._check_max_length, 286 RestrictedDatatype._check_length, RestrictedDatatype._check_pattern ], 287 LENGTH_PATTERN_LRANGE : [ RestrictedDatatype._check_min_length, RestrictedDatatype._check_max_length, 288 RestrictedDatatype._check_length, RestrictedDatatype._check_lang_range] 289 } 290 self.check_methods = [] 291 for cat in Datatypes_per_facets.keys() : 292 if self.base_type in Datatypes_per_facets[cat] : 293 self.check_methods = facet_to_method[cat] 294 break 295 self.toPython = lambda v : _lit_to_value(self, v)
296
297 - def checkValue(self, value) :
298 """ 299 Check whether the (python) value abides to the constraints defined by the current facets. 300 @param value: the value to be checked 301 @rtype: boolean 302 """ 303 for method in self.check_methods : 304 if not method(self, value) : 305 return False 306 return True
307
308 - def _check_min_exclusive(self, value) :
309 """ 310 Check the (python) value against min exclusive facet. 311 @param value: the value to be checked 312 @rtype: boolean 313 """ 314 if self.minExclusive != None : 315 return sel.minExclusive < value 316 else : 317 return True
318
319 - def _check_min_inclusive(self, value) :
320 """ 321 Check the (python) value against min inclusive facet. 322 @param value: the value to be checked 323 @rtype: boolean 324 """ 325 if self.minInclusive != None : 326 return self.minInclusive <= value 327 else : 328 return True
329
330 - def _check_max_exclusive(self, value) :
331 """ 332 Check the (python) value against max exclusive facet. 333 @param value: the value to be checked 334 @rtype: boolean 335 """ 336 if self.maxExclusive != None : 337 return value < self.maxExclusive 338 else : 339 return True
340
341 - def _check_max_inclusive(self, value) :
342 """ 343 Check the (python) value against max inclusive facet. 344 @param value: the value to be checked 345 @rtype: boolean 346 """ 347 if self.maxInclusive != None : 348 return value <= self.maxInclusive 349 else : 350 return True
351
352 - def _check_min_length(self, value) :
353 """ 354 Check the (python) value against minimum length facet. 355 @param value: the value to be checked 356 @rtype: boolean 357 """ 358 if isinstance(value, rdflibLiteral) : 359 val = str(value) 360 else : 361 val = value 362 if self.minLength != None : 363 return self.minLength <= len(val) 364 else : 365 return True
366
367 - def _check_max_length(self, value) :
368 """ 369 Check the (python) value against maximum length facet. 370 @param value: the value to be checked 371 @rtype: boolean 372 """ 373 if isinstance(value, rdflibLiteral) : 374 val = str(value) 375 else : 376 val = value 377 if self.maxLength != None : 378 return self.maxLength >= len(val) 379 else : 380 return True
381
382 - def _check_length(self, value) :
383 """ 384 Check the (python) value against exact length facet. 385 @param value: the value to be checked 386 @rtype: boolean 387 """ 388 if isinstance(value, rdflibLiteral) : 389 val = str(value) 390 else : 391 val = value 392 if self.length != None : 393 return self.length == len(val) 394 else : 395 return True
396
397 - def _check_pattern(self, value) :
398 """ 399 Check the (python) value against array of regular expressions. 400 @param value: the value to be checked 401 @rtype: boolean 402 """ 403 if isinstance(value, rdflibLiteral) : 404 val = str(value) 405 else : 406 val = value 407 for p in self.pattern : 408 if p.match(val) == None : 409 return False 410 return True
411
412 - def _check_lang_range(self, value) :
413 """ 414 Check the (python) value against array of language ranges. 415 @param value: the value to be checked 416 @rtype: boolean 417 """ 418 if isinstance(value, rdflibLiteral) : 419 lang = value.language 420 else : 421 return False 422 for r in self.langRange : 423 if _lang_range_check(r, lang) == False : 424 return False 425 return True
426