Package RDFClosure :: Package parsers :: Package n3p :: Module uripath
[hide private]
[frames] | no frames]

Source Code for Module RDFClosure.parsers.n3p.uripath

  1  #!/bin/env python 
  2  """ 
  3  Uniform Resource Identifier (URI) path manipulation, 
  4  above the access layer 
  5   
  6  The name of this module and the functions are somewhat 
  7  arbitrary; they hark to other parts of the python 
  8  library; e.g. uripath.join() is somewhat like os.path.join(). 
  9   
 10  REFERENCES 
 11   
 12    Uniform Resource Identifiers (URI): Generic Syntax 
 13    http://www.ietf.org/rfc/rfc2396.txt 
 14   
 15    The Web Model: Information hiding and URI syntax (Jan 98) 
 16    http://www.w3.org/DesignIssues/Model.html 
 17   
 18    URI API design [was: URI Test Suite] Dan Connolly (Sun, Aug 12 2001) 
 19    http://lists.w3.org/Archives/Public/uri/2001Aug/0021.html 
 20   
 21  """ 
 22   
 23  __version__ = "$Id: uripath.py,v 1.1 2009/07/29 12:49:35 ivan Exp $" 
 24   
 25  from string import find, rfind, index 
 26   
 27   
28 -def splitFrag(uriref):
29 """split a URI reference between the fragment and the rest. 30 31 Punctuation is thrown away. 32 33 e.g. 34 35 >>> splitFrag("abc#def") 36 ('abc', 'def') 37 38 >>> splitFrag("abcdef") 39 ('abcdef', None) 40 41 """ 42 43 i = rfind(uriref, "#") 44 if i>= 0: return uriref[:i], uriref[i+1:] 45 else: return uriref, None
46
47 -def splitFragP(uriref, punct=0):
48 """split a URI reference before the fragment 49 50 Punctuation is kept. 51 52 e.g. 53 54 >>> splitFragP("abc#def") 55 ('abc', '#def') 56 57 >>> splitFragP("abcdef") 58 ('abcdef', '') 59 60 """ 61 62 i = rfind(uriref, "#") 63 if i>= 0: return uriref[:i], uriref[i:] 64 else: return uriref, ''
65 66
67 -def join(here, there):
68 """join an absolute URI and URI reference 69 (non-ascii characters are supported/doctested; 70 haven't checked the details of the IRI spec though) 71 72 here is assumed to be absolute. 73 there is URI reference. 74 75 >>> join('http://example/x/y/z', '../abc') 76 'http://example/x/abc' 77 78 Raise ValueError if there uses relative path 79 syntax but here has no hierarchical path. 80 81 >>> join('mid:foo@example', '../foo') 82 Traceback (most recent call last): 83 raise ValueError, here 84 ValueError: Base <mid:foo@example> has no slash after colon - with relative '../foo'. 85 86 87 We grok IRIs 88 89 >>> len(u'Andr\\xe9') 90 5 91 92 >>> join('http://example.org/', u'#Andr\\xe9') 93 u'http://example.org/#Andr\\xe9' 94 """ 95 96 assert(find(here, "#") < 0), "Base may not contain hash: '%s'"% here # caller must splitFrag (why?) 97 98 slashl = find(there, '/') 99 colonl = find(there, ':') 100 101 # join(base, 'foo:/') -- absolute 102 if colonl >= 0 and (slashl < 0 or colonl < slashl): 103 return there 104 105 bcolonl = find(here, ':') 106 assert(bcolonl >= 0), "Base uri '%s' is not absolute" % here # else it's not absolute 107 108 # join('mid:foo@example', '../foo') bzzt 109 if here[bcolonl+1:bcolonl+2] <> '/': 110 raise ValueError ("Base <%s> has no slash after colon - with relative '%s'." %(here, there)) 111 112 if here[bcolonl+1:bcolonl+3] == '//': 113 bpath = find(here, '/', bcolonl+3) 114 else: 115 bpath = bcolonl+1 116 117 # join('http://xyz', 'foo') 118 if bpath < 0: 119 bpath = len(here) 120 here = here + '/' 121 122 # join('http://xyz/', '//abc') => 'http://abc' 123 if there[:2] == '//': 124 return here[:bcolonl+1] + there 125 126 # join('http://xyz/', '/abc') => 'http://xyz/abc' 127 if there[:1] == '/': 128 return here[:bpath] + there 129 130 slashr = rfind(here, '/') 131 132 path, frag = splitFragP(there) 133 if not path: return here + frag 134 135 while 1: 136 if path[:2] == './': 137 path = path[2:] 138 if path == '.': 139 path = '' 140 elif path[:3] == '../' or path == '..': 141 path = path[3:] 142 i = rfind(here, '/', bpath, slashr) 143 if i >= 0: 144 here = here[:i+1] 145 slashr = i 146 else: 147 break 148 149 return here[:slashr+1] + path + frag
150 151 152 153 import re 154 import string 155 commonHost = re.compile(r'^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$') 156 157
158 -def refTo(base, uri):
159 """figure out a relative URI reference from base to uri 160 161 >>> refTo('http://example/x/y/z', 'http://example/x/abc') 162 '../abc' 163 164 >>> refTo('file:/ex/x/y', 'file:/ex/x/q/r#s') 165 'q/r#s' 166 167 >>> refTo(None, 'http://ex/x/y') 168 'http://ex/x/y' 169 170 >>> refTo('http://ex/x/y', 'http://ex/x/y') 171 '' 172 173 Note the relationship between refTo and join: 174 join(x, refTo(x, y)) == y 175 which points out certain strings which cannot be URIs. e.g. 176 >>> x='http://ex/x/y';y='http://ex/x/q:r';join(x, refTo(x, y)) == y 177 0 178 179 So 'http://ex/x/q:r' is not a URI. Use 'http://ex/x/q%3ar' instead: 180 >>> x='http://ex/x/y';y='http://ex/x/q%3ar';join(x, refTo(x, y)) == y 181 1 182 183 This one checks that it uses a root-realtive one where that is 184 all they share. Now uses root-relative where no path is shared. 185 This is a matter of taste but tends to give more resilience IMHO 186 -- and shorter paths 187 188 Note that base may be None, meaning no base. In some situations, there 189 just ain't a base. Slife. In these cases, relTo returns the absolute value. 190 The axiom abs(,rel(b,x))=x still holds. 191 This saves people having to set the base to "bogus:". 192 193 >>> refTo('http://ex/x/y/z', 'http://ex/r') 194 '/r' 195 196 """ 197 198 # assert base # don't mask bugs -danc # not a bug. -tim 199 if not base: return uri 200 if base == uri: return "" 201 202 # Find how many path segments in common 203 i=0 204 while i<len(uri) and i<len(base): 205 if uri[i] == base[i]: i = i + 1 206 else: break 207 # print "# relative", base, uri, " same up to ", i 208 # i point to end of shortest one or first difference 209 210 m = commonHost.match(base[:i]) 211 if m: 212 k=uri.find("//") 213 if k<0: k=-2 # no host 214 l=uri.find("/", k+2) 215 if uri[l+1:l+2] != "/" and base[l+1:l+2] != "/" and uri[:l]==base[:l]: 216 return uri[l:] 217 218 if uri[i:i+1] =="#" and len(base) == i: return uri[i:] # fragment of base 219 220 while i>0 and uri[i-1] != '/' : i=i-1 # scan for slash 221 222 if i < 3: return uri # No way. 223 if string.find(base, "//", i-2)>0 \ 224 or string.find(uri, "//", i-2)>0: return uri # An unshared "//" 225 if string.find(base, ":", i)>0: return uri # An unshared ":" 226 n = string.count(base, "/", i) 227 if n == 0 and i<len(uri) and uri[i] == '#': 228 return "./" + uri[i:] 229 elif n == 0 and i == len(uri): 230 return "./" 231 else: 232 return ("../" * n) + uri[i:]
233 234 import os
235 -def base():
236 """The base URI for this process - the Web equiv of cwd 237 238 Relative or abolute unix-standard filenames parsed relative to 239 this yeild the URI of the file. 240 If we had a reliable way of getting a computer name, 241 we should put it in the hostname just to prevent ambiguity 242 243 """ 244 # return "file://" + hostname + os.getcwd() + "/" 245 return "file:" + _fixslash(os.getcwd()) + "/"
246 247
248 -def _fixslash(str):
249 """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" 250 s = str 251 for i in range(len(s)): 252 if s[i] == "\\": s = s[:i] + "/" + s[i+1:] 253 if s[0] != "/" and s[1] == ":": s = s[2:] # @@@ Hack when drive letter present 254 return s
255 256 257 import unittest 258
259 -class Tests(unittest.TestCase):
260 - def testPaths(self):
261 cases = (("foo:xyz", "bar:abc", "bar:abc"), 262 ('http://example/x/y/z', 'http://example/x/abc', '../abc'), 263 ('http://example2/x/y/z', 'http://example/x/abc', 'http://example/x/abc'), 264 ('http://ex/x/y/z', 'http://ex/x/r', '../r'), 265 # ('http://ex/x/y/z', 'http://ex/r', '../../r'), # DanC had this. 266 ('http://ex/x/y', 'http://ex/x/q/r', 'q/r'), 267 ('http://ex/x/y', 'http://ex/x/q/r#s', 'q/r#s'), 268 ('http://ex/x/y', 'http://ex/x/q/r#s/t', 'q/r#s/t'), 269 ('http://ex/x/y', 'ftp://ex/x/q/r', 'ftp://ex/x/q/r'), 270 ('http://ex/x/y', 'http://ex/x/y', ''), 271 ('http://ex/x/y/', 'http://ex/x/y/', ''), 272 ('http://ex/x/y/pdq', 'http://ex/x/y/pdq', ''), 273 ('http://ex/x/y/', 'http://ex/x/y/z/', 'z/'), 274 ('file:/swap/test/animal.rdf', 'file:/swap/test/animal.rdf#Animal', '#Animal'), 275 ('file:/e/x/y/z', 'file:/e/x/abc', '../abc'), 276 ('file:/example2/x/y/z', 'file:/example/x/abc', '/example/x/abc'), # TBL 277 ('file:/ex/x/y/z', 'file:/ex/x/r', '../r'), 278 ('file:/ex/x/y/z', 'file:/r', '/r'), # I prefer this. - tbl 279 ('file:/ex/x/y', 'file:/ex/x/q/r', 'q/r'), 280 ('file:/ex/x/y', 'file:/ex/x/q/r#s', 'q/r#s'), 281 ('file:/ex/x/y', 'file:/ex/x/q/r#', 'q/r#'), 282 ('file:/ex/x/y', 'file:/ex/x/q/r#s/t', 'q/r#s/t'), 283 ('file:/ex/x/y', 'ftp://ex/x/q/r', 'ftp://ex/x/q/r'), 284 ('file:/ex/x/y', 'file:/ex/x/y', ''), 285 ('file:/ex/x/y/', 'file:/ex/x/y/', ''), 286 ('file:/ex/x/y/pdq', 'file:/ex/x/y/pdq', ''), 287 ('file:/ex/x/y/', 'file:/ex/x/y/z/', 'z/'), 288 ('file:/devel/WWW/2000/10/swap/test/reluri-1.n3', 289 'file://meetings.example.com/cal#m1', 'file://meetings.example.com/cal#m1'), 290 ('file:/home/connolly/w3ccvs/WWW/2000/10/swap/test/reluri-1.n3', 'file://meetings.example.com/cal#m1', 'file://meetings.example.com/cal#m1'), 291 ('file:/some/dir/foo', 'file:/some/dir/#blort', './#blort'), 292 ('file:/some/dir/foo', 'file:/some/dir/#', './#'), 293 294 # From Graham Klyne Thu, 20 Feb 2003 18:08:17 +0000 295 ("http://example/x/y%2Fz", "http://example/x/abc", "abc"), 296 ("http://example/x/y/z", "http://example/x%2Fabc", "/x%2Fabc"), 297 ("http://example/x/y%2Fz", "http://example/x%2Fabc", "/x%2Fabc"), 298 ("http://example/x%2Fy/z", "http://example/x%2Fy/abc", "abc"), 299 # Ryan Lee 300 ("http://example/x/abc.efg", "http://example/x/", "./") 301 ) 302 303 for inp1, inp2, exp in cases: 304 self.assertEquals(refTo(inp1, inp2), exp) 305 self.assertEquals(join(inp1, exp), inp2)
306 307
308 - def testSplit(self):
309 cases = ( 310 ("abc#def", "abc", "def"), 311 ("abc", "abc", None), 312 ("#def", "", "def"), 313 ("", "", None), 314 ("abc#de:f", "abc", "de:f"), 315 ("abc#de?f", "abc", "de?f"), 316 ("abc#de/f", "abc", "de/f"), 317 ) 318 for inp, exp1, exp2 in cases: 319 self.assertEquals(splitFrag(inp), (exp1, exp2))
320
321 - def testRFCCases(self):
322 323 base = 'http://a/b/c/d;p?q' 324 325 # C.1. Normal Examples 326 327 normalExamples = ( 328 (base, 'g:h', 'g:h'), 329 (base, 'g', 'http://a/b/c/g'), 330 (base, './g', 'http://a/b/c/g'), 331 (base, 'g/', 'http://a/b/c/g/'), 332 (base, '/g', 'http://a/g'), 333 (base, '//g', 'http://g'), 334 (base, '?y', 'http://a/b/c/?y'), #@@wow... really? 335 (base, 'g?y', 'http://a/b/c/g?y'), 336 (base, '#s', 'http://a/b/c/d;p?q#s'), #@@ was: (current document)#s 337 (base, 'g#s', 'http://a/b/c/g#s'), 338 (base, 'g?y#s', 'http://a/b/c/g?y#s'), 339 (base, ';x', 'http://a/b/c/;x'), 340 (base, 'g;x', 'http://a/b/c/g;x'), 341 (base, 'g;x?y#s', 'http://a/b/c/g;x?y#s'), 342 (base, '.', 'http://a/b/c/'), 343 (base, './', 'http://a/b/c/'), 344 (base, '..', 'http://a/b/'), 345 (base, '../', 'http://a/b/'), 346 (base, '../g', 'http://a/b/g'), 347 (base, '../..', 'http://a/'), 348 (base, '../../', 'http://a/'), 349 (base, '../../g', 'http://a/g') 350 ) 351 352 otherExamples = ( 353 (base, '', base), 354 (base, '../../../g', 'http://a/g'), #@@disagree with RFC2396 355 (base, '../../../../g', 'http://a/g'), #@@disagree with RFC2396 356 (base, '/./g', 'http://a/./g'), 357 (base, '/../g', 'http://a/../g'), 358 (base, 'g.', 'http://a/b/c/g.'), 359 (base, '.g', 'http://a/b/c/.g'), 360 (base, 'g..', 'http://a/b/c/g..'), 361 (base, '..g', 'http://a/b/c/..g'), 362 363 (base, './../g', 'http://a/b/g'), 364 (base, './g/.', 'http://a/b/c/g/.'), #@@hmmm... 365 (base, 'g/./h', 'http://a/b/c/g/./h'), #@@hmm... 366 (base, 'g/../h', 'http://a/b/c/g/../h'), 367 (base, 'g;x=1/./y', 'http://a/b/c/g;x=1/./y'), #@@hmmm... 368 (base, 'g;x=1/../y', 'http://a/b/c/g;x=1/../y'), #@@hmmm... 369 370 (base, 'g?y/./x', 'http://a/b/c/g?y/./x'), 371 (base, 'g?y/../x', 'http://a/b/c/g?y/../x'), 372 (base, 'g#s/./x', 'http://a/b/c/g#s/./x'), 373 (base, 'g#s/../x', 'http://a/b/c/g#s/../x') 374 ) 375 376 for b, inp, exp in normalExamples + otherExamples: 377 if exp is None: 378 self.assertRaises(ValueError, join, b, inp) 379 else: 380 self.assertEquals(join(b, inp), exp)
381
382 -def _test():
383 import doctest, uripath 384 doctest.testmod(uripath) 385 unittest.main()
386 387 if __name__ == '__main__': 388 _test() 389 390 391 # $Log: uripath.py,v $ 392 # Revision 1.1 2009/07/29 12:49:35 ivan 393 # *** empty log message *** 394 # 395 # Revision 1.16 2004/03/21 04:24:35 timbl 396 # (See doc/changes.html) 397 # on xml output, nodeID was incorrectly spelled. 398 # update.py provides cwm's --patch option. 399 # diff.py as independent progrem generates patch files for cwm --patch 400 # 401 # Revision 1.15 2004/01/28 22:22:10 connolly 402 # tested that IRIs work in uripath.join() 403 # 404 # Revision 1.14 2003/10/20 17:31:55 timbl 405 # Added @keyword support. 406 # (eventually got python+expat to wrok on fink, with patch) 407 # Trig functions are in, thanks to Karl, with some changes, but NOT in regeression.n3 408 # see test/math/test-trigo.n3 for now. 409 # 410 # Revision 1.13 2003/07/03 21:04:39 timbl 411 # New string function to compare strings normalizing case and whitespace string:containsRoughly 412 # 413 # Revision 1.12 2003/04/03 22:35:12 ryanlee 414 # fixed previous fix, added test case 415 # 416 # Revision 1.11 2003/04/03 22:06:54 ryanlee 417 # small fix in if, line 217 418 # 419 # Revision 1.10 2003/02/24 15:06:38 connolly 420 # some more tests from Graham 421 # 422 # Revision 1.9 2002/12/25 20:01:32 timbl 423 # some --flatten tests fail. --why fails. Formulae must be closed to be referenced in a add() 424 # 425 # Revision 1.8 2002/11/24 03:12:02 timbl 426 # base can be None in uripath:refTo 427 # 428 # Revision 1.7 2002/09/04 05:03:07 connolly 429 # convertet unittests to use python doctest and unittest modules; cleaned up docstrings a bit 430 # 431 # Revision 1.6 2002/09/04 04:07:50 connolly 432 # fixed uripath.refTo 433 # 434 # Revision 1.5 2002/08/23 04:36:15 connolly 435 # fixed refTo case: file:/some/dir/foo -> file:/some/dir/#blort 436 # 437 # Revision 1.4 2002/08/07 14:32:21 timbl 438 # uripath changes. passes 51 general tests and 25 loopback tests 439 # 440 # Revision 1.3 2002/08/06 01:36:09 connolly 441 # cleanup: diagnostic interface, relative/absolute uri handling 442 # 443 # Revision 1.2 2002/03/15 23:53:02 connolly 444 # handle no-auth case 445 # 446 # Revision 1.1 2002/02/19 22:52:42 connolly 447 # renamed uritools.py to uripath.py 448 # 449 # Revision 1.2 2002/02/18 07:33:51 connolly 450 # pathTo seems to work 451 # 452