1
2
3 """
4 Module to datatype restrictions, ie, data ranges.
5
6 The module implements the following aspects of datatype restrictions:
7
8 - a new datatype is created run-time and added to the allowed and accepted datatypes; literals are checked whether they abide to the restrictions
9 - the new datatype is defined to be a 'subClass' of the restricted datatype
10 - literals of the restricted datatype and that abide to the restrictions defined by the facets are also assigned to be of the new type
11
12 The last item is important to handle the following structures::
13 ex:RE a owl:Restriction ;
14 owl:onProperty ex:p ;
15 owl:someValuesFrom [
16 a rdfs:Datatype ;
17 owl:onDatatype xsd:string ;
18 owl:withRestrictions (
19 [ xsd:minLength "3"^^xsd:integer ]
20 [ xsd:maxLength "6"^^xsd:integer ]
21 )
22 ]
23 .
24 ex:q ex:p "abcd"^^xsd:string.
25 In the case above the system can then infer that C{ex:q} is also of type C{ex:RE}.
26
27 Datatype restrictions are used by the L{OWL RL Extensions<OWLRLExtras.OWLRL_Extension>} extension class.
28
29 The implementation is not 100% complete. Some things that an ideal implementation should do are not done yet like:
30
31 - checking whether a facet is of a datatype that is allowed for that facet
32 - handling of non-literals in the facets (ie, if the resource is defined to be of type literal, but whose value
33 is defined via a separate 'owl:sameAs' somewhere else)
34
35 @requires: U{RDFLib<http://rdflib.net>}, 2.2.2. and higher
36 @license: This software is available for use under the U{W3C Software License<http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231>}
37 @organization: U{World Wide Web Consortium<http://www.w3.org>}
38 @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
39
40 """
41
42 """
43 $Id: RestrictedDatatype.py,v 1.3 2009/09/18 15:08:50 ivan Exp $ $Date: 2009/09/18 15:08:50 $
44 """
45
46 __author__ = 'Ivan Herman'
47 __contact__ = 'Ivan Herman, ivan@w3.org'
48 __license__ = u'W3C® SOFTWARE NOTICE AND LICENSE, http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231'
49
50 import re
51
52 from OWL import *
53 from OWL import OWLNS as ns_owl
54 from rdflib.RDFS import Datatype
55 from rdflib.RDF import type
56 from rdflib.RDF import RDFNS as ns_rdf
57
58 from rdflib.Literal import Literal as rdflibLiteral
59 from rdflib.Literal import _XSD_NS as ns_xsd
60
61 from DatatypeHandling import AltXSDToPYTHON
62
63
64 MIN_MAX = 0
65
66 LENGTH = 1
67
68 LENGTH_AND_PATTERN = 2
69
70 LENGTH_PATTERN_LRANGE = 3
71
72
73 Datatypes_per_facets = {
74 MIN_MAX : [ ns_owl["rational"], ns_xsd["decimal"], ns_xsd["integer"],
75 ns_xsd["nonNegativeInteger"], ns_xsd["nonPositiveInteger"],
76 ns_xsd["positiveInteger"], ns_xsd["negativeInteger"],
77 ns_xsd["long"], ns_xsd["short"], ns_xsd["byte"],
78 ns_xsd["unsignedLong"], ns_xsd["unsignedInt"], ns_xsd["unsignedShort"], ns_xsd["unsignedByte"],
79 ns_xsd["double"], ns_xsd["float"],
80 ns_xsd["dateTime"], ns_xsd["dateTimeStamp"], ns_xsd["time"], ns_xsd["date"]
81 ],
82 LENGTH : [ ns_xsd["hexBinary"], ns_xsd["base64Binary"] ],
83 LENGTH_AND_PATTERN : [ ns_xsd["anyURI"], ns_xsd["string"], ns_xsd["NMTOKEN"], ns_xsd["Name"], ns_xsd["NCName"],
84 ns_xsd["language"], ns_xsd["normalizedString"]
85 ],
86 LENGTH_PATTERN_LRANGE : [ ns_rdf["plainLiteral"] ]
87 }
88
89
90 facetable_datatypes = reduce(lambda x,y: x+y, Datatypes_per_facets.values())
91
92
93
95 """
96 This method is used to convert a string to a value with facet checking. RDF Literals are converted to
97 Python values using this method; if there is a problem, an extension is created (and caught higher
98 up to generate an error message).
99
100 The method is the equivalent of all the methods in the L{DatatypeHandling} module, and is registered
101 to the system run time, as new restricted datatypes are discovered.
102
103 (Technically, the registration is done via a C{lambda v: _lit_to_value(self,v)} setting from within a
104 L{RestrictedDatatype} instance)
105 @param dt: faceted datatype
106 @type dt: L{RestrictedDatatype}
107 @param v: literal to be converted and checked
108 @raise ValueError: invalid literal value
109 """
110
111 value = dt.converter(v)
112
113
114
115 for cat in Datatypes_per_facets.keys() :
116 if dt.base_type in Datatypes_per_facets[cat] :
117
118 if not dt.checkValue(value) :
119 raise ValueError("Literal value %s does not fit the faceted datatype %s" % (v,self))
120
121 return value
122
124 """
125 Implementation of the extended filtering algorithm, as defined in point 3.3.2,
126 of U{RFC 4647<http://www.rfc-editor.org/rfc/rfc4647.txt>}, on matching language ranges and language tags.
127 Needed to handle the C{rdf:PlainLiteral} datatype.
128 @param range: language range
129 @param lang: language tag
130 @rtype: boolean
131 """
132 def _match(r,l) :
133 """Matching of a range and language item: either range is a wildcard or the two are equal
134 @param r: language range item
135 @param l: language tag item
136 @rtype: boolean
137 """
138 return r == '*' or r == l
139
140 rangeList = range.strip().lower().split('-')
141 langList = lang.strip().lower().split('-')
142 if not _match(rangeList[0], langList[0]) : return False
143
144 rI = 1
145 rL = 1
146 while rI < len(rangeList) :
147 if rangeList[rI] == '*' :
148 rI += 1
149 continue
150 if rL >= len(langList) :
151 return False
152 if _match(rangeList[rI], langList[rL]) :
153 rI += 1
154 rL += 1
155 continue
156 if len(langList[rL]) == 1 :
157 return False
158 else :
159 rL += 1
160 continue
161 return True
162
163
164
166 """
167 Extractions of restricted (ie, faceted) datatypes from the graph.
168 @param core: the core closure instance that is being handled
169 @type core: L{Closure.Core}
170 @param graph: RDFLib graph
171 @return: array of L{RestrictedDatatype} instances
172 """
173 retval = []
174 for dtype in graph.subjects(type, Datatype) :
175 base_type = None
176 facets = []
177 try :
178 base_types = [ x for x in graph.objects(dtype, onDatatype) ]
179 if len(base_types) > 0 :
180 if len(base_types) > 1 :
181 raise Exception("Several base datatype for the same restriction %s" % dtype)
182 else :
183 base_type = base_types[0]
184 if base_type in facetable_datatypes :
185 rlists = [ x for x in graph.objects(dtype, withRestrictions) ]
186 if len(rlists) > 1 :
187 raise Exception("More than one facet lists for the same restriction %s" % dtype)
188 elif len(rlists) > 0 :
189 final_facets = []
190 for r in graph.items(rlists[0]) :
191 for (facet,lit) in graph.predicate_objects(r) :
192 if isinstance(lit, rdflibLiteral) :
193
194
195
196 try :
197 if lit.datatype == None or lit.datatype == ns_xsd["string"]:
198 final_facets.append((facet, str(lit)))
199 else :
200 final_facets.append((facet, AltXSDToPYTHON[lit.datatype](str(lit))))
201 except Exception, msg :
202 core.add_error(msg)
203 continue
204
205 new_datatype = RestrictedDatatype(dtype, base_type, final_facets)
206 retval.append(new_datatype)
207 except Exception, msg :
208
209
210
211
212
213 core.add_error(msg)
214 continue
215 return retval
216
217
219 """
220 Implementation of a datatype with facets, ie, datatype with restrictions.
221
222 @ivar datatype : the URI for this datatype
223 @ivar base_type : URI of the datatype that is restricted
224 @ivar converter : method to convert a literal of the base type to a Python value (drawn from L{DatatypeHandling.AltXSDToPYTHON})
225 @ivar minExclusive : value for the C{xsd:minExclusive} facet, initialized to C{None} and set to the right value if a facet is around
226 @ivar minInclusive : value for the C{xsd:minInclusive} facet, initialized to C{None} and set to the right value if a facet is around
227 @ivar maxExclusive : value for the C{xsd:maxExclusive} facet, initialized to C{None} and set to the right value if a facet is around
228 @ivar maxInclusive : value for the C{xsd:maxInclusive} facet, initialized to C{None} and set to the right value if a facet is around
229 @ivar minLength : value for the C{xsd:minLength} facet, initialized to C{None} and set to the right value if a facet is around
230 @ivar maxLength : value for the C{xsd:maxLength} facet, initialized to C{None} and set to the right value if a facet is around
231 @ivar length : value for the C{xsd:length} facet, initialized to C{None} and set to the right value if a facet is around
232 @ivar pattern : array of patterns for the C{xsd:pattern} facet, initialized to C{[]} and set to the right value if a facet is around
233 @ivar langRange : array of language ranges for the C{rdf:langRange} facet, initialized to C{[]} and set to the right value if a facet is around
234 @ivar check_methods : list of class methods that are relevant for the given C{base_type}
235 @ivar toPython : function to convert a Literal of the specified type to a Python value. Is defined by C{lambda v : _lit_to_value(self, v)}, see L{_lit_to_value}
236 """
237
238 - def __init__(self, type_uri, base_type, facets) :
239 """
240 @param type_uri: URI of the datatype being defined
241 @param base_type: URI of the base datatype, ie, the one being restricted
242 @param facets : array of C{(facetURI, value)} pairs
243 """
244 self.datatype = type_uri
245 self.base_type = base_type
246 if self.base_type not in AltXSDToPYTHON :
247 raise Exception( "No facet is implemented for datatype %s" % self.base_type )
248 self.converter = AltXSDToPYTHON[ self.base_type ]
249
250 self.minExclusive = None
251 self.maxExclusive = None
252 self.minInclusive = None
253 self.maxInclusive = None
254 self.length = None
255 self.maxLength = None
256 self.minLength = None
257 self.pattern = []
258 self.langRange = []
259 for (facet, value) in facets :
260 if facet == ns_xsd["minInclusive"] and (self.minInclusive == None or self.minInclusive < value) :
261 self.minInclusive = value
262 elif facet == ns_xsd["minExclusive"] and (self.minExclusive == None or self.minExclusive < value) :
263 self.minExclusive = value
264 elif facet == ns_xsd["maxInclusive"] and (self.maxInclusive == None or value < self.maxInclusive) :
265 self.maxInclusive = value
266 elif facet == ns_xsd["maxExclusive"] and (self.maxExclusive == None or value < self.maxExclusive) :
267 self.maxExclusive = value
268 elif facet == ns_rdf["langRange"] :
269 self.langRange.append(value)
270 elif facet == ns_xsd["length"] :
271 self.length = value
272 elif facet == ns_xsd["maxLength"] and (self.maxLength == None or value < self.maxLength) :
273 self.maxLength = value
274 elif facet == ns_xsd["minLength"] and (self.maxLength == None or value < self.maxLength) :
275 self.minLength = value
276 elif facet == ns_xsd["pattern"] :
277 self.pattern.append(re.compile(value))
278
279
280 facet_to_method = {
281 MIN_MAX : [ RestrictedDatatype._check_max_exclusive, RestrictedDatatype._check_min_exclusive,
282 RestrictedDatatype._check_max_inclusive, RestrictedDatatype._check_min_inclusive ],
283 LENGTH : [ RestrictedDatatype._check_min_length, RestrictedDatatype._check_max_length,
284 RestrictedDatatype._check_length ],
285 LENGTH_AND_PATTERN : [ RestrictedDatatype._check_min_length, RestrictedDatatype._check_max_length,
286 RestrictedDatatype._check_length, RestrictedDatatype._check_pattern ],
287 LENGTH_PATTERN_LRANGE : [ RestrictedDatatype._check_min_length, RestrictedDatatype._check_max_length,
288 RestrictedDatatype._check_length, RestrictedDatatype._check_lang_range]
289 }
290 self.check_methods = []
291 for cat in Datatypes_per_facets.keys() :
292 if self.base_type in Datatypes_per_facets[cat] :
293 self.check_methods = facet_to_method[cat]
294 break
295 self.toPython = lambda v : _lit_to_value(self, v)
296
298 """
299 Check whether the (python) value abides to the constraints defined by the current facets.
300 @param value: the value to be checked
301 @rtype: boolean
302 """
303 for method in self.check_methods :
304 if not method(self, value) :
305 return False
306 return True
307
309 """
310 Check the (python) value against min exclusive facet.
311 @param value: the value to be checked
312 @rtype: boolean
313 """
314 if self.minExclusive != None :
315 return sel.minExclusive < value
316 else :
317 return True
318
320 """
321 Check the (python) value against min inclusive facet.
322 @param value: the value to be checked
323 @rtype: boolean
324 """
325 if self.minInclusive != None :
326 return self.minInclusive <= value
327 else :
328 return True
329
331 """
332 Check the (python) value against max exclusive facet.
333 @param value: the value to be checked
334 @rtype: boolean
335 """
336 if self.maxExclusive != None :
337 return value < self.maxExclusive
338 else :
339 return True
340
342 """
343 Check the (python) value against max inclusive facet.
344 @param value: the value to be checked
345 @rtype: boolean
346 """
347 if self.maxInclusive != None :
348 return value <= self.maxInclusive
349 else :
350 return True
351
353 """
354 Check the (python) value against minimum length facet.
355 @param value: the value to be checked
356 @rtype: boolean
357 """
358 if isinstance(value, rdflibLiteral) :
359 val = str(value)
360 else :
361 val = value
362 if self.minLength != None :
363 return self.minLength <= len(val)
364 else :
365 return True
366
368 """
369 Check the (python) value against maximum length facet.
370 @param value: the value to be checked
371 @rtype: boolean
372 """
373 if isinstance(value, rdflibLiteral) :
374 val = str(value)
375 else :
376 val = value
377 if self.maxLength != None :
378 return self.maxLength >= len(val)
379 else :
380 return True
381
383 """
384 Check the (python) value against exact length facet.
385 @param value: the value to be checked
386 @rtype: boolean
387 """
388 if isinstance(value, rdflibLiteral) :
389 val = str(value)
390 else :
391 val = value
392 if self.length != None :
393 return self.length == len(val)
394 else :
395 return True
396
398 """
399 Check the (python) value against array of regular expressions.
400 @param value: the value to be checked
401 @rtype: boolean
402 """
403 if isinstance(value, rdflibLiteral) :
404 val = str(value)
405 else :
406 val = value
407 for p in self.pattern :
408 if p.match(val) == None :
409 return False
410 return True
411
413 """
414 Check the (python) value against array of language ranges.
415 @param value: the value to be checked
416 @rtype: boolean
417 """
418 if isinstance(value, rdflibLiteral) :
419 lang = value.language
420 else :
421 return False
422 for r in self.langRange :
423 if _lang_range_check(r, lang) == False :
424 return False
425 return True
426