# -*- test-case-name: openid.test.test_xri -*- """Utility functions for handling XRIs. @see: XRI Syntax v2.0 at the U{OASIS XRI Technical Committee} """ import re XRI_AUTHORITIES = ['!', '=', '@', '+', '$', '('] try: unichr(0x10000) except ValueError: # narrow python build UCSCHAR = [ (0xA0, 0xD7FF), (0xF900, 0xFDCF), (0xFDF0, 0xFFEF), ] IPRIVATE = [ (0xE000, 0xF8FF), ] else: UCSCHAR = [ (0xA0, 0xD7FF), (0xF900, 0xFDCF), (0xFDF0, 0xFFEF), (0x10000, 0x1FFFD), (0x20000, 0x2FFFD), (0x30000, 0x3FFFD), (0x40000, 0x4FFFD), (0x50000, 0x5FFFD), (0x60000, 0x6FFFD), (0x70000, 0x7FFFD), (0x80000, 0x8FFFD), (0x90000, 0x9FFFD), (0xA0000, 0xAFFFD), (0xB0000, 0xBFFFD), (0xC0000, 0xCFFFD), (0xD0000, 0xDFFFD), (0xE1000, 0xEFFFD), ] IPRIVATE = [ (0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD), ] _escapeme_re = re.compile('[%s]' % (''.join( map(lambda (m, n): u'%s-%s' % (unichr(m), unichr(n)), UCSCHAR + IPRIVATE)),)) def identifierScheme(identifier): """Determine if this identifier is an XRI or URI. @returns: C{"XRI"} or C{"URI"} """ if identifier.startswith('xri://') or ( identifier and identifier[0] in XRI_AUTHORITIES): return "XRI" else: return "URI" def toIRINormal(xri): """Transform an XRI to IRI-normal form.""" if not xri.startswith('xri://'): xri = 'xri://' + xri return escapeForIRI(xri) _xref_re = re.compile('\((.*?)\)') def _escape_xref(xref_match): """Escape things that need to be escaped if they're in a cross-reference. """ xref = xref_match.group() xref = xref.replace('/', '%2F') xref = xref.replace('?', '%3F') xref = xref.replace('#', '%23') return xref def escapeForIRI(xri): """Escape things that need to be escaped when transforming to an IRI.""" xri = xri.replace('%', '%25') xri = _xref_re.sub(_escape_xref, xri) return xri def toURINormal(xri): """Transform an XRI to URI normal form.""" return iriToURI(toIRINormal(xri)) def _percentEscapeUnicode(char_match): c = char_match.group() return ''.join(['%%%X' % (ord(octet),) for octet in c.encode('utf-8')]) def iriToURI(iri): """Transform an IRI to a URI by escaping unicode.""" # According to RFC 3987, section 3.1, "Mapping of IRIs to URIs" return _escapeme_re.sub(_percentEscapeUnicode, iri) def providerIsAuthoritative(providerID, canonicalID): """Is this provider ID authoritative for this XRI? @returntype: bool """ # XXX: can't use rsplit until we require python >= 2.4. lastbang = canonicalID.rindex('!') parent = canonicalID[:lastbang] return parent == providerID def rootAuthority(xri): """Return the root authority for an XRI. Example:: rootAuthority("xri://@example") == "xri://@" @type xri: unicode @returntype: unicode """ if xri.startswith('xri://'): xri = xri[6:] authority = xri.split('/', 1)[0] if authority[0] == '(': # Cross-reference. # XXX: This is incorrect if someone nests cross-references so there # is another close-paren in there. Hopefully nobody does that # before we have a real xriparse function. Hopefully nobody does # that *ever*. root = authority[:authority.index(')') + 1] elif authority[0] in XRI_AUTHORITIES: # Other XRI reference. root = authority[0] else: # IRI reference. XXX: Can IRI authorities have segments? segments = authority.split('!') segments = reduce(list.__add__, map(lambda s: s.split('*'), segments)) root = segments[0] return XRI(root) def XRI(xri): """An XRI object allowing comparison of XRI. Ideally, this would do full normalization and provide comparsion operators as per XRI Syntax. Right now, it just does a bit of canonicalization by ensuring the xri scheme is present. @param xri: an xri string @type xri: unicode """ if not xri.startswith('xri://'): xri = 'xri://' + xri return xri