Update issue 120

author: eikeon <devnull@localhost> 2010-02-13 01:27:55 +0000
committer: eikeon <devnull@localhost> 2010-02-13 01:27:55 +0000
commit: 02b60fa0002fd2efd8a245265f0a8fce1122affd (patch)
tree: a993977327f23bd83c8e2eeb495b5df8bfd135eb /rdflib/namespace.py
parent: d8166002dd19346ffffae7cc2194c4c197c9f1b5 (diff)
download: rdflib-02b60fa0002fd2efd8a245265f0a8fce1122affd.tar.gz
1 files changed, 83 insertions, 1 deletions
diff --git a/rdflib/namespace.py b/rdflib/namespace.py
index 8ee0bee6..148f46e9 100644
--- a/rdflib/namespace.py
+++ b/rdflib/namespace.py
@@ -10,7 +10,6 @@ from urlparse import urljoin, urldefrag
 from urllib import pathname2url
 
 from rdflib.term import URIRef, Variable, _XSD_PFX
-from rdflib.syntax.xml_names import split_uri
 
 
 class Namespace(URIRef):
@@ -241,3 +240,86 @@ class NamespaceManager(object):
             if uri and uri[-1]=="#" and result[-1]!="#":
                 result = "%s#" % result
         return URIRef(result)
+
+# From: http://www.w3.org/TR/REC-xml#NT-CombiningChar
+#
+# * Name start characters must have one of the categories Ll, Lu, Lo,
+#   Lt, Nl.
+#
+# * Name characters other than Name-start characters must have one of
+#   the categories Mc, Me, Mn, Lm, or Nd.
+#
+# * Characters in the compatibility area (i.e. with character code
+#   greater than #xF900 and less than #xFFFE) are not allowed in XML
+#   names.
+#
+# * Characters which have a font or compatibility decomposition
+#   (i.e. those with a "compatibility formatting tag" in field 5 of the
+#   database -- marked by field 5 beginning with a "<") are not allowed.
+#
+# * The following characters are treated as name-start characters rather
+#   than name characters, because the property file classifies them as
+#   Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.
+#
+# * Characters #x20DD-#x20E0 are excluded (in accordance with Unicode
+#   2.0, section 5.14).
+#
+# * Character #x00B7 is classified as an extender, because the property
+#   list so identifies it.
+#
+# * Character #x0387 is added as a name character, because #x00B7 is its
+#   canonical equivalent.
+#
+# * Characters ':' and '_' are allowed as name-start characters.
+#
+# * Characters '-' and '.' are allowed as name characters.
+
+from unicodedata import category, decomposition
+
+NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"]
+NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"]
+ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_"]
+
+# http://www.w3.org/TR/REC-xml-names/#NT-NCName
+#  [4] NCName ::= (Letter | '_') (NCNameChar)* /* An XML Name, minus
+#      the ":" */
+#  [5] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | CombiningChar
+#      | Extender
+
+def is_ncname(name):
+    first = name[0]
+    if first=="_" or category(first) in NAME_START_CATEGORIES:
+        for i in xrange(1, len(name)):
+            c = name[i]
+            if not category(c) in NAME_CATEGORIES:
+                if c in ALLOWED_NAME_CHARS:
+                    continue
+                return 0
+            #if in compatibility area
+            #if decomposition(c)!='':
+            #    return 0
+
+        return 1
+    else:
+        return 0
+
+XMLNS = "http://www.w3.org/XML/1998/namespace"
+
+def split_uri(uri):
+    if uri.startswith(XMLNS):
+        return (XMLNS, uri.split(XMLNS)[1])
+    length = len(uri)
+    for i in xrange(0, length):
+        c = uri[-i-1]
+        if not category(c) in NAME_CATEGORIES:
+            if c in ALLOWED_NAME_CHARS:
+                continue
+            for j in xrange(-1-i, length):
+                if category(uri[j]) in NAME_START_CATEGORIES or uri[j]=="_":
+                    ns = uri[:j]
+                    if not ns:
+                        break
+                    ln = uri[j:]
+                    return (ns, ln)
+            break
+    raise Exception("Can't split '%s'" % uri)
author	eikeon <devnull@localhost>	2010-02-13 01:27:55 +0000
committer	eikeon <devnull@localhost>	2010-02-13 01:27:55 +0000
commit	02b60fa0002fd2efd8a245265f0a8fce1122affd (patch)
tree	a993977327f23bd83c8e2eeb495b5df8bfd135eb /rdflib/namespace.py
parent	d8166002dd19346ffffae7cc2194c4c197c9f1b5 (diff)
download	rdflib-02b60fa0002fd2efd8a245265f0a8fce1122affd.tar.gz