diff options
author | Skip Montanaro <skip@pobox.com> | 2003-01-01 20:26:47 +0000 |
---|---|---|
committer | Skip Montanaro <skip@pobox.com> | 2003-01-01 20:26:47 +0000 |
commit | e6689c7cbabf7764e84672ec298160be881a75d5 (patch) | |
tree | 0f2486737b67dba01e470d80391882de1d226095 /Demo/scripts | |
parent | 5be9e01bbfaff1c3720409d4e5952357def29c4f (diff) | |
download | cpython-e6689c7cbabf7764e84672ec298160be881a75d5.tar.gz |
Search for Unicode character names using regular expressions.
Diffstat (limited to 'Demo/scripts')
-rw-r--r-- | Demo/scripts/find-uname.py | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/Demo/scripts/find-uname.py b/Demo/scripts/find-uname.py new file mode 100644 index 0000000000..b76b9f0fe8 --- /dev/null +++ b/Demo/scripts/find-uname.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +""" +For each argument on the command line, look for it in the set of all Unicode +names. Arguments are treated as case-insensitive regular expressions, e.g.: + + % find-uname 'small letter a$' 'horizontal line' + *** small letter a$ matches *** + LATIN SMALL LETTER A (97) + COMBINING LATIN SMALL LETTER A (867) + CYRILLIC SMALL LETTER A (1072) + PARENTHESIZED LATIN SMALL LETTER A (9372) + CIRCLED LATIN SMALL LETTER A (9424) + FULLWIDTH LATIN SMALL LETTER A (65345) + *** horizontal line matches *** + HORIZONTAL LINE EXTENSION (9135) +""" + +import unicodedata +import sys +import re + +def main(args): + unicode_names= [] + for ix in range(sys.maxunicode+1): + try: + unicode_names.append( (ix, unicodedata.name(unichr(ix))) ) + except ValueError: # no name for the character + pass + for arg in args: + pat = re.compile(arg, re.I) + matches = [(x,y) for (x,y) in unicode_names + if pat.search(y) is not None] + if matches: + print "***", arg, "matches", "***" + for (x,y) in matches: + print "%s (%d)" % (y,x) + +if __name__ == "__main__": + main(sys.argv[1:]) |