summaryrefslogtreecommitdiff
path: root/Demo/scripts
diff options
context:
space:
mode:
authorSkip Montanaro <skip@pobox.com>2003-01-01 20:26:47 +0000
committerSkip Montanaro <skip@pobox.com>2003-01-01 20:26:47 +0000
commite6689c7cbabf7764e84672ec298160be881a75d5 (patch)
tree0f2486737b67dba01e470d80391882de1d226095 /Demo/scripts
parent5be9e01bbfaff1c3720409d4e5952357def29c4f (diff)
downloadcpython-e6689c7cbabf7764e84672ec298160be881a75d5.tar.gz
Search for Unicode character names using regular expressions.
Diffstat (limited to 'Demo/scripts')
-rw-r--r--Demo/scripts/find-uname.py40
1 files changed, 40 insertions, 0 deletions
diff --git a/Demo/scripts/find-uname.py b/Demo/scripts/find-uname.py
new file mode 100644
index 0000000000..b76b9f0fe8
--- /dev/null
+++ b/Demo/scripts/find-uname.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+"""
+For each argument on the command line, look for it in the set of all Unicode
+names. Arguments are treated as case-insensitive regular expressions, e.g.:
+
+ % find-uname 'small letter a$' 'horizontal line'
+ *** small letter a$ matches ***
+ LATIN SMALL LETTER A (97)
+ COMBINING LATIN SMALL LETTER A (867)
+ CYRILLIC SMALL LETTER A (1072)
+ PARENTHESIZED LATIN SMALL LETTER A (9372)
+ CIRCLED LATIN SMALL LETTER A (9424)
+ FULLWIDTH LATIN SMALL LETTER A (65345)
+ *** horizontal line matches ***
+ HORIZONTAL LINE EXTENSION (9135)
+"""
+
+import unicodedata
+import sys
+import re
+
+def main(args):
+ unicode_names= []
+ for ix in range(sys.maxunicode+1):
+ try:
+ unicode_names.append( (ix, unicodedata.name(unichr(ix))) )
+ except ValueError: # no name for the character
+ pass
+ for arg in args:
+ pat = re.compile(arg, re.I)
+ matches = [(x,y) for (x,y) in unicode_names
+ if pat.search(y) is not None]
+ if matches:
+ print "***", arg, "matches", "***"
+ for (x,y) in matches:
+ print "%s (%d)" % (y,x)
+
+if __name__ == "__main__":
+ main(sys.argv[1:])