From 814514e1bd7b7585af45ab28864484a04e45d1dd Mon Sep 17 00:00:00 2001 From: Donald Stufft Date: Tue, 22 Oct 2013 21:32:25 -0400 Subject: Rename pip.vendor.* to pip._vendor.* to ensure it's obviously private --- pip/_vendor/Makefile | 12 + pip/_vendor/README.rst | 24 + pip/_vendor/__init__.py | 8 + pip/_vendor/colorama/__init__.py | 7 + pip/_vendor/colorama/ansi.py | 50 + pip/_vendor/colorama/ansitowin32.py | 189 ++ pip/_vendor/colorama/initialise.py | 56 + pip/_vendor/colorama/win32.py | 134 + pip/_vendor/colorama/winterm.py | 120 + pip/_vendor/distlib/__init__.py | 22 + pip/_vendor/distlib/_backport/__init__.py | 6 + pip/_vendor/distlib/_backport/misc.py | 41 + pip/_vendor/distlib/_backport/shutil.py | 761 +++++ pip/_vendor/distlib/_backport/sysconfig.cfg | 84 + pip/_vendor/distlib/_backport/sysconfig.py | 787 +++++ pip/_vendor/distlib/_backport/tarfile.py | 2607 ++++++++++++++++ pip/_vendor/distlib/compat.py | 1061 +++++++ pip/_vendor/distlib/database.py | 1301 ++++++++ pip/_vendor/distlib/index.py | 478 +++ pip/_vendor/distlib/locators.py | 1184 +++++++ pip/_vendor/distlib/manifest.py | 364 +++ pip/_vendor/distlib/markers.py | 190 ++ pip/_vendor/distlib/metadata.py | 1013 ++++++ pip/_vendor/distlib/resources.py | 343 ++ pip/_vendor/distlib/scripts.py | 312 ++ pip/_vendor/distlib/t32.exe | Bin 0 -> 54272 bytes pip/_vendor/distlib/t64.exe | Bin 0 -> 55296 bytes pip/_vendor/distlib/util.py | 1517 +++++++++ pip/_vendor/distlib/version.py | 692 ++++ pip/_vendor/distlib/w32.exe | Bin 0 -> 53760 bytes pip/_vendor/distlib/w64.exe | Bin 0 -> 54784 bytes pip/_vendor/distlib/wheel.py | 722 +++++ pip/_vendor/html5lib/LICENSE | 20 + pip/_vendor/html5lib/__init__.py | 23 + pip/_vendor/html5lib/constants.py | 3086 ++++++++++++++++++ pip/_vendor/html5lib/filters/__init__.py | 0 pip/_vendor/html5lib/filters/_base.py | 12 + .../html5lib/filters/alphabeticalattributes.py | 20 + .../html5lib/filters/inject_meta_charset.py | 65 + pip/_vendor/html5lib/filters/lint.py | 93 + pip/_vendor/html5lib/filters/optionaltags.py | 205 ++ pip/_vendor/html5lib/filters/sanitizer.py | 12 + pip/_vendor/html5lib/filters/whitespace.py | 38 + pip/_vendor/html5lib/html5parser.py | 2725 ++++++++++++++++ pip/_vendor/html5lib/ihatexml.py | 285 ++ pip/_vendor/html5lib/inputstream.py | 881 ++++++ pip/_vendor/html5lib/sanitizer.py | 271 ++ pip/_vendor/html5lib/serializer/__init__.py | 16 + pip/_vendor/html5lib/serializer/htmlserializer.py | 309 ++ pip/_vendor/html5lib/tokenizer.py | 1731 ++++++++++ pip/_vendor/html5lib/treebuilders/__init__.py | 76 + pip/_vendor/html5lib/treebuilders/_base.py | 377 +++ pip/_vendor/html5lib/treebuilders/dom.py | 290 ++ pip/_vendor/html5lib/treebuilders/etree.py | 337 ++ pip/_vendor/html5lib/treebuilders/etree_lxml.py | 369 +++ pip/_vendor/html5lib/treewalkers/__init__.py | 57 + pip/_vendor/html5lib/treewalkers/_base.py | 196 ++ pip/_vendor/html5lib/treewalkers/dom.py | 46 + pip/_vendor/html5lib/treewalkers/etree.py | 131 + pip/_vendor/html5lib/treewalkers/genshistream.py | 69 + pip/_vendor/html5lib/treewalkers/lxmletree.py | 208 ++ pip/_vendor/html5lib/treewalkers/pulldom.py | 63 + pip/_vendor/html5lib/trie/__init__.py | 12 + pip/_vendor/html5lib/trie/_base.py | 37 + pip/_vendor/html5lib/trie/datrie.py | 44 + pip/_vendor/html5lib/trie/py.py | 67 + pip/_vendor/html5lib/utils.py | 78 + pip/_vendor/re-vendor.py | 34 + pip/_vendor/requests/__init__.py | 77 + pip/_vendor/requests/adapters.py | 369 +++ pip/_vendor/requests/api.py | 120 + pip/_vendor/requests/auth.py | 180 ++ pip/_vendor/requests/cacert.pem | 3290 ++++++++++++++++++++ pip/_vendor/requests/certs.py | 24 + pip/_vendor/requests/compat.py | 115 + pip/_vendor/requests/cookies.py | 406 +++ pip/_vendor/requests/exceptions.py | 59 + pip/_vendor/requests/hooks.py | 45 + pip/_vendor/requests/models.py | 730 +++++ pip/_vendor/requests/packages/__init__.py | 3 + pip/_vendor/requests/packages/charade/__init__.py | 32 + pip/_vendor/requests/packages/charade/big5freq.py | 925 ++++++ .../requests/packages/charade/big5prober.py | 42 + .../requests/packages/charade/chardistribution.py | 231 ++ .../packages/charade/charsetgroupprober.py | 106 + .../requests/packages/charade/charsetprober.py | 62 + .../packages/charade/codingstatemachine.py | 61 + pip/_vendor/requests/packages/charade/compat.py | 34 + pip/_vendor/requests/packages/charade/constants.py | 39 + .../requests/packages/charade/cp949prober.py | 44 + pip/_vendor/requests/packages/charade/escprober.py | 86 + pip/_vendor/requests/packages/charade/escsm.py | 242 ++ .../requests/packages/charade/eucjpprober.py | 90 + pip/_vendor/requests/packages/charade/euckrfreq.py | 596 ++++ .../requests/packages/charade/euckrprober.py | 42 + pip/_vendor/requests/packages/charade/euctwfreq.py | 428 +++ .../requests/packages/charade/euctwprober.py | 41 + .../requests/packages/charade/gb2312freq.py | 472 +++ .../requests/packages/charade/gb2312prober.py | 41 + .../requests/packages/charade/hebrewprober.py | 283 ++ pip/_vendor/requests/packages/charade/jisfreq.py | 569 ++++ pip/_vendor/requests/packages/charade/jpcntx.py | 219 ++ .../packages/charade/langbulgarianmodel.py | 229 ++ .../requests/packages/charade/langcyrillicmodel.py | 329 ++ .../requests/packages/charade/langgreekmodel.py | 225 ++ .../requests/packages/charade/langhebrewmodel.py | 201 ++ .../packages/charade/langhungarianmodel.py | 225 ++ .../requests/packages/charade/langthaimodel.py | 200 ++ .../requests/packages/charade/latin1prober.py | 139 + .../requests/packages/charade/mbcharsetprober.py | 86 + .../requests/packages/charade/mbcsgroupprober.py | 54 + pip/_vendor/requests/packages/charade/mbcssm.py | 575 ++++ .../requests/packages/charade/sbcharsetprober.py | 120 + .../requests/packages/charade/sbcsgroupprober.py | 69 + .../requests/packages/charade/sjisprober.py | 91 + .../requests/packages/charade/universaldetector.py | 172 + .../requests/packages/charade/utf8prober.py | 76 + pip/_vendor/requests/packages/urllib3/__init__.py | 58 + .../requests/packages/urllib3/_collections.py | 94 + .../requests/packages/urllib3/connectionpool.py | 745 +++++ .../requests/packages/urllib3/contrib/__init__.py | 0 .../requests/packages/urllib3/contrib/ntlmpool.py | 120 + .../requests/packages/urllib3/contrib/pyopenssl.py | 344 ++ .../requests/packages/urllib3/exceptions.py | 121 + pip/_vendor/requests/packages/urllib3/fields.py | 177 ++ pip/_vendor/requests/packages/urllib3/filepost.py | 101 + .../requests/packages/urllib3/packages/__init__.py | 4 + .../packages/urllib3/packages/ordered_dict.py | 260 ++ .../requests/packages/urllib3/packages/six.py | 385 +++ .../packages/ssl_match_hostname/__init__.py | 98 + .../requests/packages/urllib3/poolmanager.py | 259 ++ pip/_vendor/requests/packages/urllib3/request.py | 142 + pip/_vendor/requests/packages/urllib3/response.py | 301 ++ pip/_vendor/requests/packages/urllib3/util.py | 626 ++++ pip/_vendor/requests/sessions.py | 527 ++++ pip/_vendor/requests/status_codes.py | 88 + pip/_vendor/requests/structures.py | 128 + pip/_vendor/requests/utils.py | 571 ++++ pip/_vendor/six.py | 404 +++ pip/_vendor/vendor.txt | 5 + pip/download.py | 12 +- pip/index.py | 4 +- pip/log.py | 2 +- pip/util.py | 2 +- pip/vendor/Makefile | 12 - pip/vendor/README.rst | 24 - pip/vendor/__init__.py | 8 - pip/vendor/colorama/__init__.py | 7 - pip/vendor/colorama/ansi.py | 50 - pip/vendor/colorama/ansitowin32.py | 189 -- pip/vendor/colorama/initialise.py | 56 - pip/vendor/colorama/win32.py | 134 - pip/vendor/colorama/winterm.py | 120 - pip/vendor/distlib/__init__.py | 22 - pip/vendor/distlib/_backport/__init__.py | 6 - pip/vendor/distlib/_backport/misc.py | 41 - pip/vendor/distlib/_backport/shutil.py | 761 ----- pip/vendor/distlib/_backport/sysconfig.cfg | 84 - pip/vendor/distlib/_backport/sysconfig.py | 787 ----- pip/vendor/distlib/_backport/tarfile.py | 2607 ---------------- pip/vendor/distlib/compat.py | 1061 ------- pip/vendor/distlib/database.py | 1301 -------- pip/vendor/distlib/index.py | 478 --- pip/vendor/distlib/locators.py | 1184 ------- pip/vendor/distlib/manifest.py | 364 --- pip/vendor/distlib/markers.py | 190 -- pip/vendor/distlib/metadata.py | 1013 ------ pip/vendor/distlib/resources.py | 343 -- pip/vendor/distlib/scripts.py | 312 -- pip/vendor/distlib/t32.exe | Bin 54272 -> 0 bytes pip/vendor/distlib/t64.exe | Bin 55296 -> 0 bytes pip/vendor/distlib/util.py | 1517 --------- pip/vendor/distlib/version.py | 692 ---- pip/vendor/distlib/w32.exe | Bin 53760 -> 0 bytes pip/vendor/distlib/w64.exe | Bin 54784 -> 0 bytes pip/vendor/distlib/wheel.py | 722 ----- pip/vendor/html5lib/LICENSE | 20 - pip/vendor/html5lib/__init__.py | 23 - pip/vendor/html5lib/constants.py | 3086 ------------------ pip/vendor/html5lib/filters/__init__.py | 0 pip/vendor/html5lib/filters/_base.py | 12 - .../html5lib/filters/alphabeticalattributes.py | 20 - pip/vendor/html5lib/filters/inject_meta_charset.py | 65 - pip/vendor/html5lib/filters/lint.py | 93 - pip/vendor/html5lib/filters/optionaltags.py | 205 -- pip/vendor/html5lib/filters/sanitizer.py | 12 - pip/vendor/html5lib/filters/whitespace.py | 38 - pip/vendor/html5lib/html5parser.py | 2725 ---------------- pip/vendor/html5lib/ihatexml.py | 285 -- pip/vendor/html5lib/inputstream.py | 881 ------ pip/vendor/html5lib/sanitizer.py | 271 -- pip/vendor/html5lib/serializer/__init__.py | 16 - pip/vendor/html5lib/serializer/htmlserializer.py | 309 -- pip/vendor/html5lib/tokenizer.py | 1731 ---------- pip/vendor/html5lib/treebuilders/__init__.py | 76 - pip/vendor/html5lib/treebuilders/_base.py | 377 --- pip/vendor/html5lib/treebuilders/dom.py | 290 -- pip/vendor/html5lib/treebuilders/etree.py | 337 -- pip/vendor/html5lib/treebuilders/etree_lxml.py | 369 --- pip/vendor/html5lib/treewalkers/__init__.py | 57 - pip/vendor/html5lib/treewalkers/_base.py | 196 -- pip/vendor/html5lib/treewalkers/dom.py | 46 - pip/vendor/html5lib/treewalkers/etree.py | 131 - pip/vendor/html5lib/treewalkers/genshistream.py | 69 - pip/vendor/html5lib/treewalkers/lxmletree.py | 208 -- pip/vendor/html5lib/treewalkers/pulldom.py | 63 - pip/vendor/html5lib/trie/__init__.py | 12 - pip/vendor/html5lib/trie/_base.py | 37 - pip/vendor/html5lib/trie/datrie.py | 44 - pip/vendor/html5lib/trie/py.py | 67 - pip/vendor/html5lib/utils.py | 78 - pip/vendor/re-vendor.py | 34 - pip/vendor/requests/__init__.py | 77 - pip/vendor/requests/adapters.py | 369 --- pip/vendor/requests/api.py | 120 - pip/vendor/requests/auth.py | 180 -- pip/vendor/requests/cacert.pem | 3290 -------------------- pip/vendor/requests/certs.py | 24 - pip/vendor/requests/compat.py | 115 - pip/vendor/requests/cookies.py | 406 --- pip/vendor/requests/exceptions.py | 59 - pip/vendor/requests/hooks.py | 45 - pip/vendor/requests/models.py | 730 ----- pip/vendor/requests/packages/__init__.py | 3 - pip/vendor/requests/packages/charade/__init__.py | 32 - pip/vendor/requests/packages/charade/big5freq.py | 925 ------ pip/vendor/requests/packages/charade/big5prober.py | 42 - .../requests/packages/charade/chardistribution.py | 231 -- .../packages/charade/charsetgroupprober.py | 106 - .../requests/packages/charade/charsetprober.py | 62 - .../packages/charade/codingstatemachine.py | 61 - pip/vendor/requests/packages/charade/compat.py | 34 - pip/vendor/requests/packages/charade/constants.py | 39 - .../requests/packages/charade/cp949prober.py | 44 - pip/vendor/requests/packages/charade/escprober.py | 86 - pip/vendor/requests/packages/charade/escsm.py | 242 -- .../requests/packages/charade/eucjpprober.py | 90 - pip/vendor/requests/packages/charade/euckrfreq.py | 596 ---- .../requests/packages/charade/euckrprober.py | 42 - pip/vendor/requests/packages/charade/euctwfreq.py | 428 --- .../requests/packages/charade/euctwprober.py | 41 - pip/vendor/requests/packages/charade/gb2312freq.py | 472 --- .../requests/packages/charade/gb2312prober.py | 41 - .../requests/packages/charade/hebrewprober.py | 283 -- pip/vendor/requests/packages/charade/jisfreq.py | 569 ---- pip/vendor/requests/packages/charade/jpcntx.py | 219 -- .../packages/charade/langbulgarianmodel.py | 229 -- .../requests/packages/charade/langcyrillicmodel.py | 329 -- .../requests/packages/charade/langgreekmodel.py | 225 -- .../requests/packages/charade/langhebrewmodel.py | 201 -- .../packages/charade/langhungarianmodel.py | 225 -- .../requests/packages/charade/langthaimodel.py | 200 -- .../requests/packages/charade/latin1prober.py | 139 - .../requests/packages/charade/mbcharsetprober.py | 86 - .../requests/packages/charade/mbcsgroupprober.py | 54 - pip/vendor/requests/packages/charade/mbcssm.py | 575 ---- .../requests/packages/charade/sbcharsetprober.py | 120 - .../requests/packages/charade/sbcsgroupprober.py | 69 - pip/vendor/requests/packages/charade/sjisprober.py | 91 - .../requests/packages/charade/universaldetector.py | 172 - pip/vendor/requests/packages/charade/utf8prober.py | 76 - pip/vendor/requests/packages/urllib3/__init__.py | 58 - .../requests/packages/urllib3/_collections.py | 94 - .../requests/packages/urllib3/connectionpool.py | 745 ----- .../requests/packages/urllib3/contrib/__init__.py | 0 .../requests/packages/urllib3/contrib/ntlmpool.py | 120 - .../requests/packages/urllib3/contrib/pyopenssl.py | 344 -- pip/vendor/requests/packages/urllib3/exceptions.py | 121 - pip/vendor/requests/packages/urllib3/fields.py | 177 -- pip/vendor/requests/packages/urllib3/filepost.py | 101 - .../requests/packages/urllib3/packages/__init__.py | 4 - .../packages/urllib3/packages/ordered_dict.py | 260 -- .../requests/packages/urllib3/packages/six.py | 385 --- .../packages/ssl_match_hostname/__init__.py | 98 - .../requests/packages/urllib3/poolmanager.py | 259 -- pip/vendor/requests/packages/urllib3/request.py | 142 - pip/vendor/requests/packages/urllib3/response.py | 301 -- pip/vendor/requests/packages/urllib3/util.py | 626 ---- pip/vendor/requests/sessions.py | 527 ---- pip/vendor/requests/status_codes.py | 88 - pip/vendor/requests/structures.py | 128 - pip/vendor/requests/utils.py | 571 ---- pip/vendor/six.py | 404 --- pip/vendor/vendor.txt | 5 - 284 files changed, 44830 insertions(+), 44830 deletions(-) create mode 100644 pip/_vendor/Makefile create mode 100644 pip/_vendor/README.rst create mode 100644 pip/_vendor/__init__.py create mode 100644 pip/_vendor/colorama/__init__.py create mode 100644 pip/_vendor/colorama/ansi.py create mode 100644 pip/_vendor/colorama/ansitowin32.py create mode 100644 pip/_vendor/colorama/initialise.py create mode 100644 pip/_vendor/colorama/win32.py create mode 100644 pip/_vendor/colorama/winterm.py create mode 100644 pip/_vendor/distlib/__init__.py create mode 100644 pip/_vendor/distlib/_backport/__init__.py create mode 100644 pip/_vendor/distlib/_backport/misc.py create mode 100644 pip/_vendor/distlib/_backport/shutil.py create mode 100644 pip/_vendor/distlib/_backport/sysconfig.cfg create mode 100644 pip/_vendor/distlib/_backport/sysconfig.py create mode 100644 pip/_vendor/distlib/_backport/tarfile.py create mode 100644 pip/_vendor/distlib/compat.py create mode 100644 pip/_vendor/distlib/database.py create mode 100644 pip/_vendor/distlib/index.py create mode 100644 pip/_vendor/distlib/locators.py create mode 100644 pip/_vendor/distlib/manifest.py create mode 100644 pip/_vendor/distlib/markers.py create mode 100644 pip/_vendor/distlib/metadata.py create mode 100644 pip/_vendor/distlib/resources.py create mode 100644 pip/_vendor/distlib/scripts.py create mode 100644 pip/_vendor/distlib/t32.exe create mode 100644 pip/_vendor/distlib/t64.exe create mode 100644 pip/_vendor/distlib/util.py create mode 100644 pip/_vendor/distlib/version.py create mode 100644 pip/_vendor/distlib/w32.exe create mode 100644 pip/_vendor/distlib/w64.exe create mode 100644 pip/_vendor/distlib/wheel.py create mode 100644 pip/_vendor/html5lib/LICENSE create mode 100644 pip/_vendor/html5lib/__init__.py create mode 100644 pip/_vendor/html5lib/constants.py create mode 100644 pip/_vendor/html5lib/filters/__init__.py create mode 100644 pip/_vendor/html5lib/filters/_base.py create mode 100644 pip/_vendor/html5lib/filters/alphabeticalattributes.py create mode 100644 pip/_vendor/html5lib/filters/inject_meta_charset.py create mode 100644 pip/_vendor/html5lib/filters/lint.py create mode 100644 pip/_vendor/html5lib/filters/optionaltags.py create mode 100644 pip/_vendor/html5lib/filters/sanitizer.py create mode 100644 pip/_vendor/html5lib/filters/whitespace.py create mode 100644 pip/_vendor/html5lib/html5parser.py create mode 100644 pip/_vendor/html5lib/ihatexml.py create mode 100644 pip/_vendor/html5lib/inputstream.py create mode 100644 pip/_vendor/html5lib/sanitizer.py create mode 100644 pip/_vendor/html5lib/serializer/__init__.py create mode 100644 pip/_vendor/html5lib/serializer/htmlserializer.py create mode 100644 pip/_vendor/html5lib/tokenizer.py create mode 100644 pip/_vendor/html5lib/treebuilders/__init__.py create mode 100644 pip/_vendor/html5lib/treebuilders/_base.py create mode 100644 pip/_vendor/html5lib/treebuilders/dom.py create mode 100644 pip/_vendor/html5lib/treebuilders/etree.py create mode 100644 pip/_vendor/html5lib/treebuilders/etree_lxml.py create mode 100644 pip/_vendor/html5lib/treewalkers/__init__.py create mode 100644 pip/_vendor/html5lib/treewalkers/_base.py create mode 100644 pip/_vendor/html5lib/treewalkers/dom.py create mode 100644 pip/_vendor/html5lib/treewalkers/etree.py create mode 100644 pip/_vendor/html5lib/treewalkers/genshistream.py create mode 100644 pip/_vendor/html5lib/treewalkers/lxmletree.py create mode 100644 pip/_vendor/html5lib/treewalkers/pulldom.py create mode 100644 pip/_vendor/html5lib/trie/__init__.py create mode 100644 pip/_vendor/html5lib/trie/_base.py create mode 100644 pip/_vendor/html5lib/trie/datrie.py create mode 100644 pip/_vendor/html5lib/trie/py.py create mode 100644 pip/_vendor/html5lib/utils.py create mode 100644 pip/_vendor/re-vendor.py create mode 100644 pip/_vendor/requests/__init__.py create mode 100644 pip/_vendor/requests/adapters.py create mode 100644 pip/_vendor/requests/api.py create mode 100644 pip/_vendor/requests/auth.py create mode 100644 pip/_vendor/requests/cacert.pem create mode 100644 pip/_vendor/requests/certs.py create mode 100644 pip/_vendor/requests/compat.py create mode 100644 pip/_vendor/requests/cookies.py create mode 100644 pip/_vendor/requests/exceptions.py create mode 100644 pip/_vendor/requests/hooks.py create mode 100644 pip/_vendor/requests/models.py create mode 100644 pip/_vendor/requests/packages/__init__.py create mode 100644 pip/_vendor/requests/packages/charade/__init__.py create mode 100644 pip/_vendor/requests/packages/charade/big5freq.py create mode 100644 pip/_vendor/requests/packages/charade/big5prober.py create mode 100644 pip/_vendor/requests/packages/charade/chardistribution.py create mode 100644 pip/_vendor/requests/packages/charade/charsetgroupprober.py create mode 100644 pip/_vendor/requests/packages/charade/charsetprober.py create mode 100644 pip/_vendor/requests/packages/charade/codingstatemachine.py create mode 100644 pip/_vendor/requests/packages/charade/compat.py create mode 100644 pip/_vendor/requests/packages/charade/constants.py create mode 100644 pip/_vendor/requests/packages/charade/cp949prober.py create mode 100644 pip/_vendor/requests/packages/charade/escprober.py create mode 100644 pip/_vendor/requests/packages/charade/escsm.py create mode 100644 pip/_vendor/requests/packages/charade/eucjpprober.py create mode 100644 pip/_vendor/requests/packages/charade/euckrfreq.py create mode 100644 pip/_vendor/requests/packages/charade/euckrprober.py create mode 100644 pip/_vendor/requests/packages/charade/euctwfreq.py create mode 100644 pip/_vendor/requests/packages/charade/euctwprober.py create mode 100644 pip/_vendor/requests/packages/charade/gb2312freq.py create mode 100644 pip/_vendor/requests/packages/charade/gb2312prober.py create mode 100644 pip/_vendor/requests/packages/charade/hebrewprober.py create mode 100644 pip/_vendor/requests/packages/charade/jisfreq.py create mode 100644 pip/_vendor/requests/packages/charade/jpcntx.py create mode 100644 pip/_vendor/requests/packages/charade/langbulgarianmodel.py create mode 100644 pip/_vendor/requests/packages/charade/langcyrillicmodel.py create mode 100644 pip/_vendor/requests/packages/charade/langgreekmodel.py create mode 100644 pip/_vendor/requests/packages/charade/langhebrewmodel.py create mode 100644 pip/_vendor/requests/packages/charade/langhungarianmodel.py create mode 100644 pip/_vendor/requests/packages/charade/langthaimodel.py create mode 100644 pip/_vendor/requests/packages/charade/latin1prober.py create mode 100644 pip/_vendor/requests/packages/charade/mbcharsetprober.py create mode 100644 pip/_vendor/requests/packages/charade/mbcsgroupprober.py create mode 100644 pip/_vendor/requests/packages/charade/mbcssm.py create mode 100644 pip/_vendor/requests/packages/charade/sbcharsetprober.py create mode 100644 pip/_vendor/requests/packages/charade/sbcsgroupprober.py create mode 100644 pip/_vendor/requests/packages/charade/sjisprober.py create mode 100644 pip/_vendor/requests/packages/charade/universaldetector.py create mode 100644 pip/_vendor/requests/packages/charade/utf8prober.py create mode 100644 pip/_vendor/requests/packages/urllib3/__init__.py create mode 100644 pip/_vendor/requests/packages/urllib3/_collections.py create mode 100644 pip/_vendor/requests/packages/urllib3/connectionpool.py create mode 100644 pip/_vendor/requests/packages/urllib3/contrib/__init__.py create mode 100644 pip/_vendor/requests/packages/urllib3/contrib/ntlmpool.py create mode 100644 pip/_vendor/requests/packages/urllib3/contrib/pyopenssl.py create mode 100644 pip/_vendor/requests/packages/urllib3/exceptions.py create mode 100644 pip/_vendor/requests/packages/urllib3/fields.py create mode 100644 pip/_vendor/requests/packages/urllib3/filepost.py create mode 100644 pip/_vendor/requests/packages/urllib3/packages/__init__.py create mode 100644 pip/_vendor/requests/packages/urllib3/packages/ordered_dict.py create mode 100644 pip/_vendor/requests/packages/urllib3/packages/six.py create mode 100644 pip/_vendor/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py create mode 100644 pip/_vendor/requests/packages/urllib3/poolmanager.py create mode 100644 pip/_vendor/requests/packages/urllib3/request.py create mode 100644 pip/_vendor/requests/packages/urllib3/response.py create mode 100644 pip/_vendor/requests/packages/urllib3/util.py create mode 100644 pip/_vendor/requests/sessions.py create mode 100644 pip/_vendor/requests/status_codes.py create mode 100644 pip/_vendor/requests/structures.py create mode 100644 pip/_vendor/requests/utils.py create mode 100644 pip/_vendor/six.py create mode 100644 pip/_vendor/vendor.txt delete mode 100644 pip/vendor/Makefile delete mode 100644 pip/vendor/README.rst delete mode 100644 pip/vendor/__init__.py delete mode 100644 pip/vendor/colorama/__init__.py delete mode 100644 pip/vendor/colorama/ansi.py delete mode 100644 pip/vendor/colorama/ansitowin32.py delete mode 100644 pip/vendor/colorama/initialise.py delete mode 100644 pip/vendor/colorama/win32.py delete mode 100644 pip/vendor/colorama/winterm.py delete mode 100644 pip/vendor/distlib/__init__.py delete mode 100644 pip/vendor/distlib/_backport/__init__.py delete mode 100644 pip/vendor/distlib/_backport/misc.py delete mode 100644 pip/vendor/distlib/_backport/shutil.py delete mode 100644 pip/vendor/distlib/_backport/sysconfig.cfg delete mode 100644 pip/vendor/distlib/_backport/sysconfig.py delete mode 100644 pip/vendor/distlib/_backport/tarfile.py delete mode 100644 pip/vendor/distlib/compat.py delete mode 100644 pip/vendor/distlib/database.py delete mode 100644 pip/vendor/distlib/index.py delete mode 100644 pip/vendor/distlib/locators.py delete mode 100644 pip/vendor/distlib/manifest.py delete mode 100644 pip/vendor/distlib/markers.py delete mode 100644 pip/vendor/distlib/metadata.py delete mode 100644 pip/vendor/distlib/resources.py delete mode 100644 pip/vendor/distlib/scripts.py delete mode 100644 pip/vendor/distlib/t32.exe delete mode 100644 pip/vendor/distlib/t64.exe delete mode 100644 pip/vendor/distlib/util.py delete mode 100644 pip/vendor/distlib/version.py delete mode 100644 pip/vendor/distlib/w32.exe delete mode 100644 pip/vendor/distlib/w64.exe delete mode 100644 pip/vendor/distlib/wheel.py delete mode 100644 pip/vendor/html5lib/LICENSE delete mode 100644 pip/vendor/html5lib/__init__.py delete mode 100644 pip/vendor/html5lib/constants.py delete mode 100644 pip/vendor/html5lib/filters/__init__.py delete mode 100644 pip/vendor/html5lib/filters/_base.py delete mode 100644 pip/vendor/html5lib/filters/alphabeticalattributes.py delete mode 100644 pip/vendor/html5lib/filters/inject_meta_charset.py delete mode 100644 pip/vendor/html5lib/filters/lint.py delete mode 100644 pip/vendor/html5lib/filters/optionaltags.py delete mode 100644 pip/vendor/html5lib/filters/sanitizer.py delete mode 100644 pip/vendor/html5lib/filters/whitespace.py delete mode 100644 pip/vendor/html5lib/html5parser.py delete mode 100644 pip/vendor/html5lib/ihatexml.py delete mode 100644 pip/vendor/html5lib/inputstream.py delete mode 100644 pip/vendor/html5lib/sanitizer.py delete mode 100644 pip/vendor/html5lib/serializer/__init__.py delete mode 100644 pip/vendor/html5lib/serializer/htmlserializer.py delete mode 100644 pip/vendor/html5lib/tokenizer.py delete mode 100644 pip/vendor/html5lib/treebuilders/__init__.py delete mode 100644 pip/vendor/html5lib/treebuilders/_base.py delete mode 100644 pip/vendor/html5lib/treebuilders/dom.py delete mode 100644 pip/vendor/html5lib/treebuilders/etree.py delete mode 100644 pip/vendor/html5lib/treebuilders/etree_lxml.py delete mode 100644 pip/vendor/html5lib/treewalkers/__init__.py delete mode 100644 pip/vendor/html5lib/treewalkers/_base.py delete mode 100644 pip/vendor/html5lib/treewalkers/dom.py delete mode 100644 pip/vendor/html5lib/treewalkers/etree.py delete mode 100644 pip/vendor/html5lib/treewalkers/genshistream.py delete mode 100644 pip/vendor/html5lib/treewalkers/lxmletree.py delete mode 100644 pip/vendor/html5lib/treewalkers/pulldom.py delete mode 100644 pip/vendor/html5lib/trie/__init__.py delete mode 100644 pip/vendor/html5lib/trie/_base.py delete mode 100644 pip/vendor/html5lib/trie/datrie.py delete mode 100644 pip/vendor/html5lib/trie/py.py delete mode 100644 pip/vendor/html5lib/utils.py delete mode 100644 pip/vendor/re-vendor.py delete mode 100644 pip/vendor/requests/__init__.py delete mode 100644 pip/vendor/requests/adapters.py delete mode 100644 pip/vendor/requests/api.py delete mode 100644 pip/vendor/requests/auth.py delete mode 100644 pip/vendor/requests/cacert.pem delete mode 100644 pip/vendor/requests/certs.py delete mode 100644 pip/vendor/requests/compat.py delete mode 100644 pip/vendor/requests/cookies.py delete mode 100644 pip/vendor/requests/exceptions.py delete mode 100644 pip/vendor/requests/hooks.py delete mode 100644 pip/vendor/requests/models.py delete mode 100644 pip/vendor/requests/packages/__init__.py delete mode 100644 pip/vendor/requests/packages/charade/__init__.py delete mode 100644 pip/vendor/requests/packages/charade/big5freq.py delete mode 100644 pip/vendor/requests/packages/charade/big5prober.py delete mode 100644 pip/vendor/requests/packages/charade/chardistribution.py delete mode 100644 pip/vendor/requests/packages/charade/charsetgroupprober.py delete mode 100644 pip/vendor/requests/packages/charade/charsetprober.py delete mode 100644 pip/vendor/requests/packages/charade/codingstatemachine.py delete mode 100644 pip/vendor/requests/packages/charade/compat.py delete mode 100644 pip/vendor/requests/packages/charade/constants.py delete mode 100644 pip/vendor/requests/packages/charade/cp949prober.py delete mode 100644 pip/vendor/requests/packages/charade/escprober.py delete mode 100644 pip/vendor/requests/packages/charade/escsm.py delete mode 100644 pip/vendor/requests/packages/charade/eucjpprober.py delete mode 100644 pip/vendor/requests/packages/charade/euckrfreq.py delete mode 100644 pip/vendor/requests/packages/charade/euckrprober.py delete mode 100644 pip/vendor/requests/packages/charade/euctwfreq.py delete mode 100644 pip/vendor/requests/packages/charade/euctwprober.py delete mode 100644 pip/vendor/requests/packages/charade/gb2312freq.py delete mode 100644 pip/vendor/requests/packages/charade/gb2312prober.py delete mode 100644 pip/vendor/requests/packages/charade/hebrewprober.py delete mode 100644 pip/vendor/requests/packages/charade/jisfreq.py delete mode 100644 pip/vendor/requests/packages/charade/jpcntx.py delete mode 100644 pip/vendor/requests/packages/charade/langbulgarianmodel.py delete mode 100644 pip/vendor/requests/packages/charade/langcyrillicmodel.py delete mode 100644 pip/vendor/requests/packages/charade/langgreekmodel.py delete mode 100644 pip/vendor/requests/packages/charade/langhebrewmodel.py delete mode 100644 pip/vendor/requests/packages/charade/langhungarianmodel.py delete mode 100644 pip/vendor/requests/packages/charade/langthaimodel.py delete mode 100644 pip/vendor/requests/packages/charade/latin1prober.py delete mode 100644 pip/vendor/requests/packages/charade/mbcharsetprober.py delete mode 100644 pip/vendor/requests/packages/charade/mbcsgroupprober.py delete mode 100644 pip/vendor/requests/packages/charade/mbcssm.py delete mode 100644 pip/vendor/requests/packages/charade/sbcharsetprober.py delete mode 100644 pip/vendor/requests/packages/charade/sbcsgroupprober.py delete mode 100644 pip/vendor/requests/packages/charade/sjisprober.py delete mode 100644 pip/vendor/requests/packages/charade/universaldetector.py delete mode 100644 pip/vendor/requests/packages/charade/utf8prober.py delete mode 100644 pip/vendor/requests/packages/urllib3/__init__.py delete mode 100644 pip/vendor/requests/packages/urllib3/_collections.py delete mode 100644 pip/vendor/requests/packages/urllib3/connectionpool.py delete mode 100644 pip/vendor/requests/packages/urllib3/contrib/__init__.py delete mode 100644 pip/vendor/requests/packages/urllib3/contrib/ntlmpool.py delete mode 100644 pip/vendor/requests/packages/urllib3/contrib/pyopenssl.py delete mode 100644 pip/vendor/requests/packages/urllib3/exceptions.py delete mode 100644 pip/vendor/requests/packages/urllib3/fields.py delete mode 100644 pip/vendor/requests/packages/urllib3/filepost.py delete mode 100644 pip/vendor/requests/packages/urllib3/packages/__init__.py delete mode 100644 pip/vendor/requests/packages/urllib3/packages/ordered_dict.py delete mode 100644 pip/vendor/requests/packages/urllib3/packages/six.py delete mode 100644 pip/vendor/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py delete mode 100644 pip/vendor/requests/packages/urllib3/poolmanager.py delete mode 100644 pip/vendor/requests/packages/urllib3/request.py delete mode 100644 pip/vendor/requests/packages/urllib3/response.py delete mode 100644 pip/vendor/requests/packages/urllib3/util.py delete mode 100644 pip/vendor/requests/sessions.py delete mode 100644 pip/vendor/requests/status_codes.py delete mode 100644 pip/vendor/requests/structures.py delete mode 100644 pip/vendor/requests/utils.py delete mode 100644 pip/vendor/six.py delete mode 100644 pip/vendor/vendor.txt (limited to 'pip') diff --git a/pip/_vendor/Makefile b/pip/_vendor/Makefile new file mode 100644 index 000000000..416249aca --- /dev/null +++ b/pip/_vendor/Makefile @@ -0,0 +1,12 @@ +all: clean vendor + +clean: + @# Delete vendored items + find . -maxdepth 1 -mindepth 1 -type d -exec rm -rf {} \; + +vendor: + @# Install vendored libraries + pip install -t . -r vendor.txt + + @# Cleanup .egg-info directories + rm -rf *.egg-info diff --git a/pip/_vendor/README.rst b/pip/_vendor/README.rst new file mode 100644 index 000000000..324edd2f8 --- /dev/null +++ b/pip/_vendor/README.rst @@ -0,0 +1,24 @@ +Policy +====== + +Vendored libraries should not be modified except as required to actually +successfully vendor them. + + +Modifications +============= + +* html5lib has been modified to import six from pip._vendor + + +Note to Downstream Distributors +=============================== + +Libraries are vendored/bundled inside of this directory in order to prevent +end users from needing to manually install packages if they accidently remove +something that pip depends on. + +All bundled packages exist in the ``pip._vendor`` namespace, and the versions +(fetched from PyPI) that we use are located in vendor.txt. If you remove +``pip._vendor.*`` you'll also need to update the import statements that import +these packages. diff --git a/pip/_vendor/__init__.py b/pip/_vendor/__init__.py new file mode 100644 index 000000000..f233ca0d3 --- /dev/null +++ b/pip/_vendor/__init__.py @@ -0,0 +1,8 @@ +""" +pip._vendor is for vendoring dependencies of pip to prevent needing pip to +depend on something external. + +Files inside of pip._vendor should be considered immutable and should only be +updated to versions from upstream. +""" +from __future__ import absolute_import diff --git a/pip/_vendor/colorama/__init__.py b/pip/_vendor/colorama/__init__.py new file mode 100644 index 000000000..2d127fa8e --- /dev/null +++ b/pip/_vendor/colorama/__init__.py @@ -0,0 +1,7 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +from .initialise import init, deinit, reinit +from .ansi import Fore, Back, Style +from .ansitowin32 import AnsiToWin32 + +VERSION = '0.2.7' + diff --git a/pip/_vendor/colorama/ansi.py b/pip/_vendor/colorama/ansi.py new file mode 100644 index 000000000..fef40571d --- /dev/null +++ b/pip/_vendor/colorama/ansi.py @@ -0,0 +1,50 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +''' +This module generates ANSI character codes to printing colors to terminals. +See: http://en.wikipedia.org/wiki/ANSI_escape_code +''' + +CSI = '\033[' + +def code_to_chars(code): + return CSI + str(code) + 'm' + +class AnsiCodes(object): + def __init__(self, codes): + for name in dir(codes): + if not name.startswith('_'): + value = getattr(codes, name) + setattr(self, name, code_to_chars(value)) + +class AnsiFore: + BLACK = 30 + RED = 31 + GREEN = 32 + YELLOW = 33 + BLUE = 34 + MAGENTA = 35 + CYAN = 36 + WHITE = 37 + RESET = 39 + +class AnsiBack: + BLACK = 40 + RED = 41 + GREEN = 42 + YELLOW = 43 + BLUE = 44 + MAGENTA = 45 + CYAN = 46 + WHITE = 47 + RESET = 49 + +class AnsiStyle: + BRIGHT = 1 + DIM = 2 + NORMAL = 22 + RESET_ALL = 0 + +Fore = AnsiCodes( AnsiFore ) +Back = AnsiCodes( AnsiBack ) +Style = AnsiCodes( AnsiStyle ) + diff --git a/pip/_vendor/colorama/ansitowin32.py b/pip/_vendor/colorama/ansitowin32.py new file mode 100644 index 000000000..d29060187 --- /dev/null +++ b/pip/_vendor/colorama/ansitowin32.py @@ -0,0 +1,189 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import re +import sys + +from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style +from .winterm import WinTerm, WinColor, WinStyle +from .win32 import windll + + +if windll is not None: + winterm = WinTerm() + + +def is_a_tty(stream): + return hasattr(stream, 'isatty') and stream.isatty() + + +class StreamWrapper(object): + ''' + Wraps a stream (such as stdout), acting as a transparent proxy for all + attribute access apart from method 'write()', which is delegated to our + Converter instance. + ''' + def __init__(self, wrapped, converter): + # double-underscore everything to prevent clashes with names of + # attributes on the wrapped stream object. + self.__wrapped = wrapped + self.__convertor = converter + + def __getattr__(self, name): + return getattr(self.__wrapped, name) + + def write(self, text): + self.__convertor.write(text) + + +class AnsiToWin32(object): + ''' + Implements a 'write()' method which, on Windows, will strip ANSI character + sequences from the text, and if outputting to a tty, will convert them into + win32 function calls. + ''' + ANSI_RE = re.compile('\033\[((?:\d|;)*)([a-zA-Z])') + + def __init__(self, wrapped, convert=None, strip=None, autoreset=False): + # The wrapped stream (normally sys.stdout or sys.stderr) + self.wrapped = wrapped + + # should we reset colors to defaults after every .write() + self.autoreset = autoreset + + # create the proxy wrapping our output stream + self.stream = StreamWrapper(wrapped, self) + + on_windows = sys.platform.startswith('win') + + # should we strip ANSI sequences from our output? + if strip is None: + strip = on_windows + self.strip = strip + + # should we should convert ANSI sequences into win32 calls? + if convert is None: + convert = on_windows and is_a_tty(wrapped) + self.convert = convert + + # dict of ansi codes to win32 functions and parameters + self.win32_calls = self.get_win32_calls() + + # are we wrapping stderr? + self.on_stderr = self.wrapped is sys.stderr + + + def should_wrap(self): + ''' + True if this class is actually needed. If false, then the output + stream will not be affected, nor will win32 calls be issued, so + wrapping stdout is not actually required. This will generally be + False on non-Windows platforms, unless optional functionality like + autoreset has been requested using kwargs to init() + ''' + return self.convert or self.strip or self.autoreset + + + def get_win32_calls(self): + if self.convert and winterm: + return { + AnsiStyle.RESET_ALL: (winterm.reset_all, ), + AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT), + AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL), + AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL), + AnsiFore.BLACK: (winterm.fore, WinColor.BLACK), + AnsiFore.RED: (winterm.fore, WinColor.RED), + AnsiFore.GREEN: (winterm.fore, WinColor.GREEN), + AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW), + AnsiFore.BLUE: (winterm.fore, WinColor.BLUE), + AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA), + AnsiFore.CYAN: (winterm.fore, WinColor.CYAN), + AnsiFore.WHITE: (winterm.fore, WinColor.GREY), + AnsiFore.RESET: (winterm.fore, ), + AnsiBack.BLACK: (winterm.back, WinColor.BLACK), + AnsiBack.RED: (winterm.back, WinColor.RED), + AnsiBack.GREEN: (winterm.back, WinColor.GREEN), + AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW), + AnsiBack.BLUE: (winterm.back, WinColor.BLUE), + AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA), + AnsiBack.CYAN: (winterm.back, WinColor.CYAN), + AnsiBack.WHITE: (winterm.back, WinColor.GREY), + AnsiBack.RESET: (winterm.back, ), + } + + + def write(self, text): + if self.strip or self.convert: + self.write_and_convert(text) + else: + self.wrapped.write(text) + self.wrapped.flush() + if self.autoreset: + self.reset_all() + + + def reset_all(self): + if self.convert: + self.call_win32('m', (0,)) + elif is_a_tty(self.wrapped): + self.wrapped.write(Style.RESET_ALL) + + + def write_and_convert(self, text): + ''' + Write the given text to our wrapped stream, stripping any ANSI + sequences from the text, and optionally converting them into win32 + calls. + ''' + cursor = 0 + for match in self.ANSI_RE.finditer(text): + start, end = match.span() + self.write_plain_text(text, cursor, start) + self.convert_ansi(*match.groups()) + cursor = end + self.write_plain_text(text, cursor, len(text)) + + + def write_plain_text(self, text, start, end): + if start < end: + self.wrapped.write(text[start:end]) + self.wrapped.flush() + + + def convert_ansi(self, paramstring, command): + if self.convert: + params = self.extract_params(paramstring) + self.call_win32(command, params) + + + def extract_params(self, paramstring): + def split(paramstring): + for p in paramstring.split(';'): + if p != '': + yield int(p) + return tuple(split(paramstring)) + + + def call_win32(self, command, params): + if params == []: + params = [0] + if command == 'm': + for param in params: + if param in self.win32_calls: + func_args = self.win32_calls[param] + func = func_args[0] + args = func_args[1:] + kwargs = dict(on_stderr=self.on_stderr) + func(*args, **kwargs) + elif command in ('H', 'f'): # set cursor position + func = winterm.set_cursor_position + func(params, on_stderr=self.on_stderr) + elif command in ('J'): + func = winterm.erase_data + func(params, on_stderr=self.on_stderr) + elif command == 'A': + if params == () or params == None: + num_rows = 1 + else: + num_rows = params[0] + func = winterm.cursor_up + func(num_rows, on_stderr=self.on_stderr) + diff --git a/pip/_vendor/colorama/initialise.py b/pip/_vendor/colorama/initialise.py new file mode 100644 index 000000000..7c8e77611 --- /dev/null +++ b/pip/_vendor/colorama/initialise.py @@ -0,0 +1,56 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import atexit +import sys + +from .ansitowin32 import AnsiToWin32 + + +orig_stdout = sys.stdout +orig_stderr = sys.stderr + +wrapped_stdout = sys.stdout +wrapped_stderr = sys.stderr + +atexit_done = False + + +def reset_all(): + AnsiToWin32(orig_stdout).reset_all() + + +def init(autoreset=False, convert=None, strip=None, wrap=True): + + if not wrap and any([autoreset, convert, strip]): + raise ValueError('wrap=False conflicts with any other arg=True') + + global wrapped_stdout, wrapped_stderr + sys.stdout = wrapped_stdout = \ + wrap_stream(orig_stdout, convert, strip, autoreset, wrap) + sys.stderr = wrapped_stderr = \ + wrap_stream(orig_stderr, convert, strip, autoreset, wrap) + + global atexit_done + if not atexit_done: + atexit.register(reset_all) + atexit_done = True + + +def deinit(): + sys.stdout = orig_stdout + sys.stderr = orig_stderr + + +def reinit(): + sys.stdout = wrapped_stdout + sys.stderr = wrapped_stdout + + +def wrap_stream(stream, convert, strip, autoreset, wrap): + if wrap: + wrapper = AnsiToWin32(stream, + convert=convert, strip=strip, autoreset=autoreset) + if wrapper.should_wrap(): + stream = wrapper.stream + return stream + + diff --git a/pip/_vendor/colorama/win32.py b/pip/_vendor/colorama/win32.py new file mode 100644 index 000000000..f4024f95e --- /dev/null +++ b/pip/_vendor/colorama/win32.py @@ -0,0 +1,134 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. + +# from winbase.h +STDOUT = -11 +STDERR = -12 + +try: + from ctypes import windll + from ctypes import wintypes +except ImportError: + windll = None + SetConsoleTextAttribute = lambda *_: None +else: + from ctypes import ( + byref, Structure, c_char, c_short, c_uint32, c_ushort, POINTER + ) + + class CONSOLE_SCREEN_BUFFER_INFO(Structure): + """struct in wincon.h.""" + _fields_ = [ + ("dwSize", wintypes._COORD), + ("dwCursorPosition", wintypes._COORD), + ("wAttributes", wintypes.WORD), + ("srWindow", wintypes.SMALL_RECT), + ("dwMaximumWindowSize", wintypes._COORD), + ] + def __str__(self): + return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % ( + self.dwSize.Y, self.dwSize.X + , self.dwCursorPosition.Y, self.dwCursorPosition.X + , self.wAttributes + , self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right + , self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X + ) + + _GetStdHandle = windll.kernel32.GetStdHandle + _GetStdHandle.argtypes = [ + wintypes.DWORD, + ] + _GetStdHandle.restype = wintypes.HANDLE + + _GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo + _GetConsoleScreenBufferInfo.argtypes = [ + wintypes.HANDLE, + POINTER(CONSOLE_SCREEN_BUFFER_INFO), + ] + _GetConsoleScreenBufferInfo.restype = wintypes.BOOL + + _SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute + _SetConsoleTextAttribute.argtypes = [ + wintypes.HANDLE, + wintypes.WORD, + ] + _SetConsoleTextAttribute.restype = wintypes.BOOL + + _SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition + _SetConsoleCursorPosition.argtypes = [ + wintypes.HANDLE, + wintypes._COORD, + ] + _SetConsoleCursorPosition.restype = wintypes.BOOL + + _FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA + _FillConsoleOutputCharacterA.argtypes = [ + wintypes.HANDLE, + c_char, + wintypes.DWORD, + wintypes._COORD, + POINTER(wintypes.DWORD), + ] + _FillConsoleOutputCharacterA.restype = wintypes.BOOL + + _FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute + _FillConsoleOutputAttribute.argtypes = [ + wintypes.HANDLE, + wintypes.WORD, + wintypes.DWORD, + wintypes._COORD, + POINTER(wintypes.DWORD), + ] + _FillConsoleOutputAttribute.restype = wintypes.BOOL + + handles = { + STDOUT: _GetStdHandle(STDOUT), + STDERR: _GetStdHandle(STDERR), + } + + def GetConsoleScreenBufferInfo(stream_id=STDOUT): + handle = handles[stream_id] + csbi = CONSOLE_SCREEN_BUFFER_INFO() + success = _GetConsoleScreenBufferInfo( + handle, byref(csbi)) + return csbi + + def SetConsoleTextAttribute(stream_id, attrs): + handle = handles[stream_id] + return _SetConsoleTextAttribute(handle, attrs) + + def SetConsoleCursorPosition(stream_id, position): + position = wintypes._COORD(*position) + # If the position is out of range, do nothing. + if position.Y <= 0 or position.X <= 0: + return + # Adjust for Windows' SetConsoleCursorPosition: + # 1. being 0-based, while ANSI is 1-based. + # 2. expecting (x,y), while ANSI uses (y,x). + adjusted_position = wintypes._COORD(position.Y - 1, position.X - 1) + # Adjust for viewport's scroll position + sr = GetConsoleScreenBufferInfo(STDOUT).srWindow + adjusted_position.Y += sr.Top + adjusted_position.X += sr.Left + # Resume normal processing + handle = handles[stream_id] + return _SetConsoleCursorPosition(handle, adjusted_position) + + def FillConsoleOutputCharacter(stream_id, char, length, start): + handle = handles[stream_id] + char = c_char(char) + length = wintypes.DWORD(length) + num_written = wintypes.DWORD(0) + # Note that this is hard-coded for ANSI (vs wide) bytes. + success = _FillConsoleOutputCharacterA( + handle, char, length, start, byref(num_written)) + return num_written.value + + def FillConsoleOutputAttribute(stream_id, attr, length, start): + ''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )''' + handle = handles[stream_id] + attribute = wintypes.WORD(attr) + length = wintypes.DWORD(length) + num_written = wintypes.DWORD(0) + # Note that this is hard-coded for ANSI (vs wide) bytes. + return _FillConsoleOutputAttribute( + handle, attribute, length, start, byref(num_written)) diff --git a/pip/_vendor/colorama/winterm.py b/pip/_vendor/colorama/winterm.py new file mode 100644 index 000000000..4a711fde9 --- /dev/null +++ b/pip/_vendor/colorama/winterm.py @@ -0,0 +1,120 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +from . import win32 + + +# from wincon.h +class WinColor(object): + BLACK = 0 + BLUE = 1 + GREEN = 2 + CYAN = 3 + RED = 4 + MAGENTA = 5 + YELLOW = 6 + GREY = 7 + +# from wincon.h +class WinStyle(object): + NORMAL = 0x00 # dim text, dim background + BRIGHT = 0x08 # bright text, dim background + + +class WinTerm(object): + + def __init__(self): + self._default = win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes + self.set_attrs(self._default) + self._default_fore = self._fore + self._default_back = self._back + self._default_style = self._style + + def get_attrs(self): + return self._fore + self._back * 16 + self._style + + def set_attrs(self, value): + self._fore = value & 7 + self._back = (value >> 4) & 7 + self._style = value & WinStyle.BRIGHT + + def reset_all(self, on_stderr=None): + self.set_attrs(self._default) + self.set_console(attrs=self._default) + + def fore(self, fore=None, on_stderr=False): + if fore is None: + fore = self._default_fore + self._fore = fore + self.set_console(on_stderr=on_stderr) + + def back(self, back=None, on_stderr=False): + if back is None: + back = self._default_back + self._back = back + self.set_console(on_stderr=on_stderr) + + def style(self, style=None, on_stderr=False): + if style is None: + style = self._default_style + self._style = style + self.set_console(on_stderr=on_stderr) + + def set_console(self, attrs=None, on_stderr=False): + if attrs is None: + attrs = self.get_attrs() + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + win32.SetConsoleTextAttribute(handle, attrs) + + def get_position(self, handle): + position = win32.GetConsoleScreenBufferInfo(handle).dwCursorPosition + # Because Windows coordinates are 0-based, + # and win32.SetConsoleCursorPosition expects 1-based. + position.X += 1 + position.Y += 1 + return position + + def set_cursor_position(self, position=None, on_stderr=False): + if position is None: + #I'm not currently tracking the position, so there is no default. + #position = self.get_position() + return + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + win32.SetConsoleCursorPosition(handle, position) + + def cursor_up(self, num_rows=0, on_stderr=False): + if num_rows == 0: + return + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + position = self.get_position(handle) + adjusted_position = (position.Y - num_rows, position.X) + self.set_cursor_position(adjusted_position, on_stderr) + + def erase_data(self, mode=0, on_stderr=False): + # 0 (or None) should clear from the cursor to the end of the screen. + # 1 should clear from the cursor to the beginning of the screen. + # 2 should clear the entire screen. (And maybe move cursor to (1,1)?) + # + # At the moment, I only support mode 2. From looking at the API, it + # should be possible to calculate a different number of bytes to clear, + # and to do so relative to the cursor position. + if mode[0] not in (2,): + return + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + # here's where we'll home the cursor + coord_screen = win32.COORD(0,0) + csbi = win32.GetConsoleScreenBufferInfo(handle) + # get the number of character cells in the current buffer + dw_con_size = csbi.dwSize.X * csbi.dwSize.Y + # fill the entire screen with blanks + win32.FillConsoleOutputCharacter(handle, ' ', dw_con_size, coord_screen) + # now set the buffer's attributes accordingly + win32.FillConsoleOutputAttribute(handle, self.get_attrs(), dw_con_size, coord_screen ); + # put the cursor at (0, 0) + win32.SetConsoleCursorPosition(handle, (coord_screen.X, coord_screen.Y)) diff --git a/pip/_vendor/distlib/__init__.py b/pip/_vendor/distlib/__init__.py new file mode 100644 index 000000000..08cf98d8f --- /dev/null +++ b/pip/_vendor/distlib/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +import logging + +__version__ = '0.1.3' + +class DistlibException(Exception): + pass + +try: + from logging import NullHandler +except ImportError: # pragma: no cover + class NullHandler(logging.Handler): + def handle(self, record): pass + def emit(self, record): pass + +logger = logging.getLogger(__name__) +logger.addHandler(NullHandler()) diff --git a/pip/_vendor/distlib/_backport/__init__.py b/pip/_vendor/distlib/_backport/__init__.py new file mode 100644 index 000000000..f7dbf4c9a --- /dev/null +++ b/pip/_vendor/distlib/_backport/__init__.py @@ -0,0 +1,6 @@ +"""Modules copied from Python 3 standard libraries, for internal use only. + +Individual classes and functions are found in d2._backport.misc. Intended +usage is to always import things missing from 3.1 from that module: the +built-in/stdlib objects will be used if found. +""" diff --git a/pip/_vendor/distlib/_backport/misc.py b/pip/_vendor/distlib/_backport/misc.py new file mode 100644 index 000000000..cfb318d34 --- /dev/null +++ b/pip/_vendor/distlib/_backport/misc.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +"""Backports for individual classes and functions.""" + +import os +import sys + +__all__ = ['cache_from_source', 'callable', 'fsencode'] + + +try: + from imp import cache_from_source +except ImportError: + def cache_from_source(py_file, debug=__debug__): + ext = debug and 'c' or 'o' + return py_file + ext + + +try: + callable = callable +except NameError: + from collections import Callable + + def callable(obj): + return isinstance(obj, Callable) + + +try: + fsencode = os.fsencode +except AttributeError: + def fsencode(filename): + if isinstance(filename, bytes): + return filename + elif isinstance(filename, str): + return filename.encode(sys.getfilesystemencoding()) + else: + raise TypeError("expect bytes or str, not %s" % + type(filename).__name__) diff --git a/pip/_vendor/distlib/_backport/shutil.py b/pip/_vendor/distlib/_backport/shutil.py new file mode 100644 index 000000000..9e2e234d4 --- /dev/null +++ b/pip/_vendor/distlib/_backport/shutil.py @@ -0,0 +1,761 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +"""Utility functions for copying and archiving files and directory trees. + +XXX The functions here don't copy the resource fork or other metadata on Mac. + +""" + +import os +import sys +import stat +from os.path import abspath +import fnmatch +import collections +import errno +from . import tarfile + +try: + import bz2 + _BZ2_SUPPORTED = True +except ImportError: + _BZ2_SUPPORTED = False + +try: + from pwd import getpwnam +except ImportError: + getpwnam = None + +try: + from grp import getgrnam +except ImportError: + getgrnam = None + +__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", + "copytree", "move", "rmtree", "Error", "SpecialFileError", + "ExecError", "make_archive", "get_archive_formats", + "register_archive_format", "unregister_archive_format", + "get_unpack_formats", "register_unpack_format", + "unregister_unpack_format", "unpack_archive", "ignore_patterns"] + +class Error(EnvironmentError): + pass + +class SpecialFileError(EnvironmentError): + """Raised when trying to do a kind of operation (e.g. copying) which is + not supported on a special file (e.g. a named pipe)""" + +class ExecError(EnvironmentError): + """Raised when a command could not be executed""" + +class ReadError(EnvironmentError): + """Raised when an archive cannot be read""" + +class RegistryError(Exception): + """Raised when a registery operation with the archiving + and unpacking registeries fails""" + + +try: + WindowsError +except NameError: + WindowsError = None + +def copyfileobj(fsrc, fdst, length=16*1024): + """copy data from file-like object fsrc to file-like object fdst""" + while 1: + buf = fsrc.read(length) + if not buf: + break + fdst.write(buf) + +def _samefile(src, dst): + # Macintosh, Unix. + if hasattr(os.path, 'samefile'): + try: + return os.path.samefile(src, dst) + except OSError: + return False + + # All other platforms: check for same pathname. + return (os.path.normcase(os.path.abspath(src)) == + os.path.normcase(os.path.abspath(dst))) + +def copyfile(src, dst): + """Copy data from src to dst""" + if _samefile(src, dst): + raise Error("`%s` and `%s` are the same file" % (src, dst)) + + for fn in [src, dst]: + try: + st = os.stat(fn) + except OSError: + # File most likely does not exist + pass + else: + # XXX What about other special files? (sockets, devices...) + if stat.S_ISFIFO(st.st_mode): + raise SpecialFileError("`%s` is a named pipe" % fn) + + with open(src, 'rb') as fsrc: + with open(dst, 'wb') as fdst: + copyfileobj(fsrc, fdst) + +def copymode(src, dst): + """Copy mode bits from src to dst""" + if hasattr(os, 'chmod'): + st = os.stat(src) + mode = stat.S_IMODE(st.st_mode) + os.chmod(dst, mode) + +def copystat(src, dst): + """Copy all stat info (mode bits, atime, mtime, flags) from src to dst""" + st = os.stat(src) + mode = stat.S_IMODE(st.st_mode) + if hasattr(os, 'utime'): + os.utime(dst, (st.st_atime, st.st_mtime)) + if hasattr(os, 'chmod'): + os.chmod(dst, mode) + if hasattr(os, 'chflags') and hasattr(st, 'st_flags'): + try: + os.chflags(dst, st.st_flags) + except OSError as why: + if (not hasattr(errno, 'EOPNOTSUPP') or + why.errno != errno.EOPNOTSUPP): + raise + +def copy(src, dst): + """Copy data and mode bits ("cp src dst"). + + The destination may be a directory. + + """ + if os.path.isdir(dst): + dst = os.path.join(dst, os.path.basename(src)) + copyfile(src, dst) + copymode(src, dst) + +def copy2(src, dst): + """Copy data and all stat info ("cp -p src dst"). + + The destination may be a directory. + + """ + if os.path.isdir(dst): + dst = os.path.join(dst, os.path.basename(src)) + copyfile(src, dst) + copystat(src, dst) + +def ignore_patterns(*patterns): + """Function that can be used as copytree() ignore parameter. + + Patterns is a sequence of glob-style patterns + that are used to exclude files""" + def _ignore_patterns(path, names): + ignored_names = [] + for pattern in patterns: + ignored_names.extend(fnmatch.filter(names, pattern)) + return set(ignored_names) + return _ignore_patterns + +def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, + ignore_dangling_symlinks=False): + """Recursively copy a directory tree. + + The destination directory must not already exist. + If exception(s) occur, an Error is raised with a list of reasons. + + If the optional symlinks flag is true, symbolic links in the + source tree result in symbolic links in the destination tree; if + it is false, the contents of the files pointed to by symbolic + links are copied. If the file pointed by the symlink doesn't + exist, an exception will be added in the list of errors raised in + an Error exception at the end of the copy process. + + You can set the optional ignore_dangling_symlinks flag to true if you + want to silence this exception. Notice that this has no effect on + platforms that don't support os.symlink. + + The optional ignore argument is a callable. If given, it + is called with the `src` parameter, which is the directory + being visited by copytree(), and `names` which is the list of + `src` contents, as returned by os.listdir(): + + callable(src, names) -> ignored_names + + Since copytree() is called recursively, the callable will be + called once for each directory that is copied. It returns a + list of names relative to the `src` directory that should + not be copied. + + The optional copy_function argument is a callable that will be used + to copy each file. It will be called with the source path and the + destination path as arguments. By default, copy2() is used, but any + function that supports the same signature (like copy()) can be used. + + """ + names = os.listdir(src) + if ignore is not None: + ignored_names = ignore(src, names) + else: + ignored_names = set() + + os.makedirs(dst) + errors = [] + for name in names: + if name in ignored_names: + continue + srcname = os.path.join(src, name) + dstname = os.path.join(dst, name) + try: + if os.path.islink(srcname): + linkto = os.readlink(srcname) + if symlinks: + os.symlink(linkto, dstname) + else: + # ignore dangling symlink if the flag is on + if not os.path.exists(linkto) and ignore_dangling_symlinks: + continue + # otherwise let the copy occurs. copy2 will raise an error + copy_function(srcname, dstname) + elif os.path.isdir(srcname): + copytree(srcname, dstname, symlinks, ignore, copy_function) + else: + # Will raise a SpecialFileError for unsupported file types + copy_function(srcname, dstname) + # catch the Error from the recursive copytree so that we can + # continue with other files + except Error as err: + errors.extend(err.args[0]) + except EnvironmentError as why: + errors.append((srcname, dstname, str(why))) + try: + copystat(src, dst) + except OSError as why: + if WindowsError is not None and isinstance(why, WindowsError): + # Copying file access times may fail on Windows + pass + else: + errors.extend((src, dst, str(why))) + if errors: + raise Error(errors) + +def rmtree(path, ignore_errors=False, onerror=None): + """Recursively delete a directory tree. + + If ignore_errors is set, errors are ignored; otherwise, if onerror + is set, it is called to handle the error with arguments (func, + path, exc_info) where func is os.listdir, os.remove, or os.rmdir; + path is the argument to that function that caused it to fail; and + exc_info is a tuple returned by sys.exc_info(). If ignore_errors + is false and onerror is None, an exception is raised. + + """ + if ignore_errors: + def onerror(*args): + pass + elif onerror is None: + def onerror(*args): + raise + try: + if os.path.islink(path): + # symlinks to directories are forbidden, see bug #1669 + raise OSError("Cannot call rmtree on a symbolic link") + except OSError: + onerror(os.path.islink, path, sys.exc_info()) + # can't continue even if onerror hook returns + return + names = [] + try: + names = os.listdir(path) + except os.error: + onerror(os.listdir, path, sys.exc_info()) + for name in names: + fullname = os.path.join(path, name) + try: + mode = os.lstat(fullname).st_mode + except os.error: + mode = 0 + if stat.S_ISDIR(mode): + rmtree(fullname, ignore_errors, onerror) + else: + try: + os.remove(fullname) + except os.error: + onerror(os.remove, fullname, sys.exc_info()) + try: + os.rmdir(path) + except os.error: + onerror(os.rmdir, path, sys.exc_info()) + + +def _basename(path): + # A basename() variant which first strips the trailing slash, if present. + # Thus we always get the last component of the path, even for directories. + return os.path.basename(path.rstrip(os.path.sep)) + +def move(src, dst): + """Recursively move a file or directory to another location. This is + similar to the Unix "mv" command. + + If the destination is a directory or a symlink to a directory, the source + is moved inside the directory. The destination path must not already + exist. + + If the destination already exists but is not a directory, it may be + overwritten depending on os.rename() semantics. + + If the destination is on our current filesystem, then rename() is used. + Otherwise, src is copied to the destination and then removed. + A lot more could be done here... A look at a mv.c shows a lot of + the issues this implementation glosses over. + + """ + real_dst = dst + if os.path.isdir(dst): + if _samefile(src, dst): + # We might be on a case insensitive filesystem, + # perform the rename anyway. + os.rename(src, dst) + return + + real_dst = os.path.join(dst, _basename(src)) + if os.path.exists(real_dst): + raise Error("Destination path '%s' already exists" % real_dst) + try: + os.rename(src, real_dst) + except OSError: + if os.path.isdir(src): + if _destinsrc(src, dst): + raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst)) + copytree(src, real_dst, symlinks=True) + rmtree(src) + else: + copy2(src, real_dst) + os.unlink(src) + +def _destinsrc(src, dst): + src = abspath(src) + dst = abspath(dst) + if not src.endswith(os.path.sep): + src += os.path.sep + if not dst.endswith(os.path.sep): + dst += os.path.sep + return dst.startswith(src) + +def _get_gid(name): + """Returns a gid, given a group name.""" + if getgrnam is None or name is None: + return None + try: + result = getgrnam(name) + except KeyError: + result = None + if result is not None: + return result[2] + return None + +def _get_uid(name): + """Returns an uid, given a user name.""" + if getpwnam is None or name is None: + return None + try: + result = getpwnam(name) + except KeyError: + result = None + if result is not None: + return result[2] + return None + +def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, + owner=None, group=None, logger=None): + """Create a (possibly compressed) tar file from all the files under + 'base_dir'. + + 'compress' must be "gzip" (the default), "bzip2", or None. + + 'owner' and 'group' can be used to define an owner and a group for the + archive that is being built. If not provided, the current owner and group + will be used. + + The output tar file will be named 'base_name' + ".tar", possibly plus + the appropriate compression extension (".gz", or ".bz2"). + + Returns the output filename. + """ + tar_compression = {'gzip': 'gz', None: ''} + compress_ext = {'gzip': '.gz'} + + if _BZ2_SUPPORTED: + tar_compression['bzip2'] = 'bz2' + compress_ext['bzip2'] = '.bz2' + + # flags for compression program, each element of list will be an argument + if compress is not None and compress not in compress_ext: + raise ValueError("bad value for 'compress', or compression format not " + "supported : {0}".format(compress)) + + archive_name = base_name + '.tar' + compress_ext.get(compress, '') + archive_dir = os.path.dirname(archive_name) + + if not os.path.exists(archive_dir): + if logger is not None: + logger.info("creating %s", archive_dir) + if not dry_run: + os.makedirs(archive_dir) + + # creating the tarball + if logger is not None: + logger.info('Creating tar archive') + + uid = _get_uid(owner) + gid = _get_gid(group) + + def _set_uid_gid(tarinfo): + if gid is not None: + tarinfo.gid = gid + tarinfo.gname = group + if uid is not None: + tarinfo.uid = uid + tarinfo.uname = owner + return tarinfo + + if not dry_run: + tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) + try: + tar.add(base_dir, filter=_set_uid_gid) + finally: + tar.close() + + return archive_name + +def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False): + # XXX see if we want to keep an external call here + if verbose: + zipoptions = "-r" + else: + zipoptions = "-rq" + from distutils.errors import DistutilsExecError + from distutils.spawn import spawn + try: + spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) + except DistutilsExecError: + # XXX really should distinguish between "couldn't find + # external 'zip' command" and "zip failed". + raise ExecError("unable to create zip file '%s': " + "could neither import the 'zipfile' module nor " + "find a standalone zip utility") % zip_filename + +def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): + """Create a zip file from all the files under 'base_dir'. + + The output zip file will be named 'base_name' + ".zip". Uses either the + "zipfile" Python module (if available) or the InfoZIP "zip" utility + (if installed and found on the default search path). If neither tool is + available, raises ExecError. Returns the name of the output zip + file. + """ + zip_filename = base_name + ".zip" + archive_dir = os.path.dirname(base_name) + + if not os.path.exists(archive_dir): + if logger is not None: + logger.info("creating %s", archive_dir) + if not dry_run: + os.makedirs(archive_dir) + + # If zipfile module is not available, try spawning an external 'zip' + # command. + try: + import zipfile + except ImportError: + zipfile = None + + if zipfile is None: + _call_external_zip(base_dir, zip_filename, verbose, dry_run) + else: + if logger is not None: + logger.info("creating '%s' and adding '%s' to it", + zip_filename, base_dir) + + if not dry_run: + zip = zipfile.ZipFile(zip_filename, "w", + compression=zipfile.ZIP_DEFLATED) + + for dirpath, dirnames, filenames in os.walk(base_dir): + for name in filenames: + path = os.path.normpath(os.path.join(dirpath, name)) + if os.path.isfile(path): + zip.write(path, path) + if logger is not None: + logger.info("adding '%s'", path) + zip.close() + + return zip_filename + +_ARCHIVE_FORMATS = { + 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), + 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), + 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), + 'zip': (_make_zipfile, [], "ZIP file"), + } + +if _BZ2_SUPPORTED: + _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')], + "bzip2'ed tar-file") + +def get_archive_formats(): + """Returns a list of supported formats for archiving and unarchiving. + + Each element of the returned sequence is a tuple (name, description) + """ + formats = [(name, registry[2]) for name, registry in + _ARCHIVE_FORMATS.items()] + formats.sort() + return formats + +def register_archive_format(name, function, extra_args=None, description=''): + """Registers an archive format. + + name is the name of the format. function is the callable that will be + used to create archives. If provided, extra_args is a sequence of + (name, value) tuples that will be passed as arguments to the callable. + description can be provided to describe the format, and will be returned + by the get_archive_formats() function. + """ + if extra_args is None: + extra_args = [] + if not isinstance(function, collections.Callable): + raise TypeError('The %s object is not callable' % function) + if not isinstance(extra_args, (tuple, list)): + raise TypeError('extra_args needs to be a sequence') + for element in extra_args: + if not isinstance(element, (tuple, list)) or len(element) !=2: + raise TypeError('extra_args elements are : (arg_name, value)') + + _ARCHIVE_FORMATS[name] = (function, extra_args, description) + +def unregister_archive_format(name): + del _ARCHIVE_FORMATS[name] + +def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, + dry_run=0, owner=None, group=None, logger=None): + """Create an archive file (eg. zip or tar). + + 'base_name' is the name of the file to create, minus any format-specific + extension; 'format' is the archive format: one of "zip", "tar", "bztar" + or "gztar". + + 'root_dir' is a directory that will be the root directory of the + archive; ie. we typically chdir into 'root_dir' before creating the + archive. 'base_dir' is the directory where we start archiving from; + ie. 'base_dir' will be the common prefix of all files and + directories in the archive. 'root_dir' and 'base_dir' both default + to the current directory. Returns the name of the archive file. + + 'owner' and 'group' are used when creating a tar archive. By default, + uses the current owner and group. + """ + save_cwd = os.getcwd() + if root_dir is not None: + if logger is not None: + logger.debug("changing into '%s'", root_dir) + base_name = os.path.abspath(base_name) + if not dry_run: + os.chdir(root_dir) + + if base_dir is None: + base_dir = os.curdir + + kwargs = {'dry_run': dry_run, 'logger': logger} + + try: + format_info = _ARCHIVE_FORMATS[format] + except KeyError: + raise ValueError("unknown archive format '%s'" % format) + + func = format_info[0] + for arg, val in format_info[1]: + kwargs[arg] = val + + if format != 'zip': + kwargs['owner'] = owner + kwargs['group'] = group + + try: + filename = func(base_name, base_dir, **kwargs) + finally: + if root_dir is not None: + if logger is not None: + logger.debug("changing back to '%s'", save_cwd) + os.chdir(save_cwd) + + return filename + + +def get_unpack_formats(): + """Returns a list of supported formats for unpacking. + + Each element of the returned sequence is a tuple + (name, extensions, description) + """ + formats = [(name, info[0], info[3]) for name, info in + _UNPACK_FORMATS.items()] + formats.sort() + return formats + +def _check_unpack_options(extensions, function, extra_args): + """Checks what gets registered as an unpacker.""" + # first make sure no other unpacker is registered for this extension + existing_extensions = {} + for name, info in _UNPACK_FORMATS.items(): + for ext in info[0]: + existing_extensions[ext] = name + + for extension in extensions: + if extension in existing_extensions: + msg = '%s is already registered for "%s"' + raise RegistryError(msg % (extension, + existing_extensions[extension])) + + if not isinstance(function, collections.Callable): + raise TypeError('The registered function must be a callable') + + +def register_unpack_format(name, extensions, function, extra_args=None, + description=''): + """Registers an unpack format. + + `name` is the name of the format. `extensions` is a list of extensions + corresponding to the format. + + `function` is the callable that will be + used to unpack archives. The callable will receive archives to unpack. + If it's unable to handle an archive, it needs to raise a ReadError + exception. + + If provided, `extra_args` is a sequence of + (name, value) tuples that will be passed as arguments to the callable. + description can be provided to describe the format, and will be returned + by the get_unpack_formats() function. + """ + if extra_args is None: + extra_args = [] + _check_unpack_options(extensions, function, extra_args) + _UNPACK_FORMATS[name] = extensions, function, extra_args, description + +def unregister_unpack_format(name): + """Removes the pack format from the registery.""" + del _UNPACK_FORMATS[name] + +def _ensure_directory(path): + """Ensure that the parent directory of `path` exists""" + dirname = os.path.dirname(path) + if not os.path.isdir(dirname): + os.makedirs(dirname) + +def _unpack_zipfile(filename, extract_dir): + """Unpack zip `filename` to `extract_dir` + """ + try: + import zipfile + except ImportError: + raise ReadError('zlib not supported, cannot unpack this archive.') + + if not zipfile.is_zipfile(filename): + raise ReadError("%s is not a zip file" % filename) + + zip = zipfile.ZipFile(filename) + try: + for info in zip.infolist(): + name = info.filename + + # don't extract absolute paths or ones with .. in them + if name.startswith('/') or '..' in name: + continue + + target = os.path.join(extract_dir, *name.split('/')) + if not target: + continue + + _ensure_directory(target) + if not name.endswith('/'): + # file + data = zip.read(info.filename) + f = open(target, 'wb') + try: + f.write(data) + finally: + f.close() + del data + finally: + zip.close() + +def _unpack_tarfile(filename, extract_dir): + """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir` + """ + try: + tarobj = tarfile.open(filename) + except tarfile.TarError: + raise ReadError( + "%s is not a compressed or uncompressed tar file" % filename) + try: + tarobj.extractall(extract_dir) + finally: + tarobj.close() + +_UNPACK_FORMATS = { + 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"), + 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"), + 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file") + } + +if _BZ2_SUPPORTED: + _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [], + "bzip2'ed tar-file") + +def _find_unpack_format(filename): + for name, info in _UNPACK_FORMATS.items(): + for extension in info[0]: + if filename.endswith(extension): + return name + return None + +def unpack_archive(filename, extract_dir=None, format=None): + """Unpack an archive. + + `filename` is the name of the archive. + + `extract_dir` is the name of the target directory, where the archive + is unpacked. If not provided, the current working directory is used. + + `format` is the archive format: one of "zip", "tar", or "gztar". Or any + other registered format. If not provided, unpack_archive will use the + filename extension and see if an unpacker was registered for that + extension. + + In case none is found, a ValueError is raised. + """ + if extract_dir is None: + extract_dir = os.getcwd() + + if format is not None: + try: + format_info = _UNPACK_FORMATS[format] + except KeyError: + raise ValueError("Unknown unpack format '{0}'".format(format)) + + func = format_info[1] + func(filename, extract_dir, **dict(format_info[2])) + else: + # we need to look at the registered unpackers supported extensions + format = _find_unpack_format(filename) + if format is None: + raise ReadError("Unknown archive format '{0}'".format(filename)) + + func = _UNPACK_FORMATS[format][1] + kwargs = dict(_UNPACK_FORMATS[format][2]) + func(filename, extract_dir, **kwargs) diff --git a/pip/_vendor/distlib/_backport/sysconfig.cfg b/pip/_vendor/distlib/_backport/sysconfig.cfg new file mode 100644 index 000000000..1746bd01c --- /dev/null +++ b/pip/_vendor/distlib/_backport/sysconfig.cfg @@ -0,0 +1,84 @@ +[posix_prefix] +# Configuration directories. Some of these come straight out of the +# configure script. They are for implementing the other variables, not to +# be used directly in [resource_locations]. +confdir = /etc +datadir = /usr/share +libdir = /usr/lib +statedir = /var +# User resource directory +local = ~/.local/{distribution.name} + +stdlib = {base}/lib/python{py_version_short} +platstdlib = {platbase}/lib/python{py_version_short} +purelib = {base}/lib/python{py_version_short}/site-packages +platlib = {platbase}/lib/python{py_version_short}/site-packages +include = {base}/include/python{py_version_short}{abiflags} +platinclude = {platbase}/include/python{py_version_short}{abiflags} +data = {base} + +[posix_home] +stdlib = {base}/lib/python +platstdlib = {base}/lib/python +purelib = {base}/lib/python +platlib = {base}/lib/python +include = {base}/include/python +platinclude = {base}/include/python +scripts = {base}/bin +data = {base} + +[nt] +stdlib = {base}/Lib +platstdlib = {base}/Lib +purelib = {base}/Lib/site-packages +platlib = {base}/Lib/site-packages +include = {base}/Include +platinclude = {base}/Include +scripts = {base}/Scripts +data = {base} + +[os2] +stdlib = {base}/Lib +platstdlib = {base}/Lib +purelib = {base}/Lib/site-packages +platlib = {base}/Lib/site-packages +include = {base}/Include +platinclude = {base}/Include +scripts = {base}/Scripts +data = {base} + +[os2_home] +stdlib = {userbase}/lib/python{py_version_short} +platstdlib = {userbase}/lib/python{py_version_short} +purelib = {userbase}/lib/python{py_version_short}/site-packages +platlib = {userbase}/lib/python{py_version_short}/site-packages +include = {userbase}/include/python{py_version_short} +scripts = {userbase}/bin +data = {userbase} + +[nt_user] +stdlib = {userbase}/Python{py_version_nodot} +platstdlib = {userbase}/Python{py_version_nodot} +purelib = {userbase}/Python{py_version_nodot}/site-packages +platlib = {userbase}/Python{py_version_nodot}/site-packages +include = {userbase}/Python{py_version_nodot}/Include +scripts = {userbase}/Scripts +data = {userbase} + +[posix_user] +stdlib = {userbase}/lib/python{py_version_short} +platstdlib = {userbase}/lib/python{py_version_short} +purelib = {userbase}/lib/python{py_version_short}/site-packages +platlib = {userbase}/lib/python{py_version_short}/site-packages +include = {userbase}/include/python{py_version_short} +scripts = {userbase}/bin +data = {userbase} + +[osx_framework_user] +stdlib = {userbase}/lib/python +platstdlib = {userbase}/lib/python +purelib = {userbase}/lib/python/site-packages +platlib = {userbase}/lib/python/site-packages +include = {userbase}/include +scripts = {userbase}/bin +data = {userbase} diff --git a/pip/_vendor/distlib/_backport/sysconfig.py b/pip/_vendor/distlib/_backport/sysconfig.py new file mode 100644 index 000000000..34e2ef197 --- /dev/null +++ b/pip/_vendor/distlib/_backport/sysconfig.py @@ -0,0 +1,787 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +"""Access to Python's configuration information.""" + +import codecs +import os +import re +import sys +from os.path import pardir, realpath +try: + import configparser +except ImportError: + import ConfigParser as configparser + + +__all__ = [ + 'get_config_h_filename', + 'get_config_var', + 'get_config_vars', + 'get_makefile_filename', + 'get_path', + 'get_path_names', + 'get_paths', + 'get_platform', + 'get_python_version', + 'get_scheme_names', + 'parse_config_h', +] + + +def _safe_realpath(path): + try: + return realpath(path) + except OSError: + return path + + +if sys.executable: + _PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable)) +else: + # sys.executable can be empty if argv[0] has been changed and Python is + # unable to retrieve the real program name + _PROJECT_BASE = _safe_realpath(os.getcwd()) + +if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower(): + _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir)) +# PC/VS7.1 +if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower(): + _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) +# PC/AMD64 +if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower(): + _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) + + +def is_python_build(): + for fn in ("Setup.dist", "Setup.local"): + if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)): + return True + return False + +_PYTHON_BUILD = is_python_build() + +_cfg_read = False + +def _ensure_cfg_read(): + global _cfg_read + if not _cfg_read: + from distlib.resources import finder + _finder = finder('distlib._backport') + _cfgfile = _finder.find('sysconfig.cfg') + assert _cfgfile, 'sysconfig.cfg exists' + with _cfgfile.as_stream() as s: + _SCHEMES.readfp(s) + if _PYTHON_BUILD: + for scheme in ('posix_prefix', 'posix_home'): + _SCHEMES.set(scheme, 'include', '{srcdir}/Include') + _SCHEMES.set(scheme, 'platinclude', '{projectbase}/.') + + _cfg_read = True + + +_SCHEMES = configparser.RawConfigParser() +_VAR_REPL = re.compile(r'\{([^{]*?)\}') + +def _expand_globals(config): + _ensure_cfg_read() + if config.has_section('globals'): + globals = config.items('globals') + else: + globals = tuple() + + sections = config.sections() + for section in sections: + if section == 'globals': + continue + for option, value in globals: + if config.has_option(section, option): + continue + config.set(section, option, value) + config.remove_section('globals') + + # now expanding local variables defined in the cfg file + # + for section in config.sections(): + variables = dict(config.items(section)) + + def _replacer(matchobj): + name = matchobj.group(1) + if name in variables: + return variables[name] + return matchobj.group(0) + + for option, value in config.items(section): + config.set(section, option, _VAR_REPL.sub(_replacer, value)) + +#_expand_globals(_SCHEMES) + + # FIXME don't rely on sys.version here, its format is an implementation detail + # of CPython, use sys.version_info or sys.hexversion +_PY_VERSION = sys.version.split()[0] +_PY_VERSION_SHORT = sys.version[:3] +_PY_VERSION_SHORT_NO_DOT = _PY_VERSION[0] + _PY_VERSION[2] +_PREFIX = os.path.normpath(sys.prefix) +_EXEC_PREFIX = os.path.normpath(sys.exec_prefix) +_CONFIG_VARS = None +_USER_BASE = None + + +def _subst_vars(path, local_vars): + """In the string `path`, replace tokens like {some.thing} with the + corresponding value from the map `local_vars`. + + If there is no corresponding value, leave the token unchanged. + """ + def _replacer(matchobj): + name = matchobj.group(1) + if name in local_vars: + return local_vars[name] + elif name in os.environ: + return os.environ[name] + return matchobj.group(0) + return _VAR_REPL.sub(_replacer, path) + + +def _extend_dict(target_dict, other_dict): + target_keys = target_dict.keys() + for key, value in other_dict.items(): + if key in target_keys: + continue + target_dict[key] = value + + +def _expand_vars(scheme, vars): + res = {} + if vars is None: + vars = {} + _extend_dict(vars, get_config_vars()) + + for key, value in _SCHEMES.items(scheme): + if os.name in ('posix', 'nt'): + value = os.path.expanduser(value) + res[key] = os.path.normpath(_subst_vars(value, vars)) + return res + + +def format_value(value, vars): + def _replacer(matchobj): + name = matchobj.group(1) + if name in vars: + return vars[name] + return matchobj.group(0) + return _VAR_REPL.sub(_replacer, value) + + +def _get_default_scheme(): + if os.name == 'posix': + # the default scheme for posix is posix_prefix + return 'posix_prefix' + return os.name + + +def _getuserbase(): + env_base = os.environ.get("PYTHONUSERBASE", None) + + def joinuser(*args): + return os.path.expanduser(os.path.join(*args)) + + # what about 'os2emx', 'riscos' ? + if os.name == "nt": + base = os.environ.get("APPDATA") or "~" + if env_base: + return env_base + else: + return joinuser(base, "Python") + + if sys.platform == "darwin": + framework = get_config_var("PYTHONFRAMEWORK") + if framework: + if env_base: + return env_base + else: + return joinuser("~", "Library", framework, "%d.%d" % + sys.version_info[:2]) + + if env_base: + return env_base + else: + return joinuser("~", ".local") + + +def _parse_makefile(filename, vars=None): + """Parse a Makefile-style file. + + A dictionary containing name/value pairs is returned. If an + optional dictionary is passed in as the second argument, it is + used instead of a new dictionary. + """ + # Regexes needed for parsing Makefile (and similar syntaxes, + # like old-style Setup files). + _variable_rx = re.compile("([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)") + _findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)") + _findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}") + + if vars is None: + vars = {} + done = {} + notdone = {} + + with codecs.open(filename, encoding='utf-8', errors="surrogateescape") as f: + lines = f.readlines() + + for line in lines: + if line.startswith('#') or line.strip() == '': + continue + m = _variable_rx.match(line) + if m: + n, v = m.group(1, 2) + v = v.strip() + # `$$' is a literal `$' in make + tmpv = v.replace('$$', '') + + if "$" in tmpv: + notdone[n] = v + else: + try: + v = int(v) + except ValueError: + # insert literal `$' + done[n] = v.replace('$$', '$') + else: + done[n] = v + + # do variable interpolation here + variables = list(notdone.keys()) + + # Variables with a 'PY_' prefix in the makefile. These need to + # be made available without that prefix through sysconfig. + # Special care is needed to ensure that variable expansion works, even + # if the expansion uses the name without a prefix. + renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS') + + while len(variables) > 0: + for name in tuple(variables): + value = notdone[name] + m = _findvar1_rx.search(value) or _findvar2_rx.search(value) + if m is not None: + n = m.group(1) + found = True + if n in done: + item = str(done[n]) + elif n in notdone: + # get it on a subsequent round + found = False + elif n in os.environ: + # do it like make: fall back to environment + item = os.environ[n] + + elif n in renamed_variables: + if (name.startswith('PY_') and + name[3:] in renamed_variables): + item = "" + + elif 'PY_' + n in notdone: + found = False + + else: + item = str(done['PY_' + n]) + + else: + done[n] = item = "" + + if found: + after = value[m.end():] + value = value[:m.start()] + item + after + if "$" in after: + notdone[name] = value + else: + try: + value = int(value) + except ValueError: + done[name] = value.strip() + else: + done[name] = value + variables.remove(name) + + if (name.startswith('PY_') and + name[3:] in renamed_variables): + + name = name[3:] + if name not in done: + done[name] = value + + else: + # bogus variable reference (e.g. "prefix=$/opt/python"); + # just drop it since we can't deal + done[name] = value + variables.remove(name) + + # strip spurious spaces + for k, v in done.items(): + if isinstance(v, str): + done[k] = v.strip() + + # save the results in the global dictionary + vars.update(done) + return vars + + +def get_makefile_filename(): + """Return the path of the Makefile.""" + if _PYTHON_BUILD: + return os.path.join(_PROJECT_BASE, "Makefile") + if hasattr(sys, 'abiflags'): + config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags) + else: + config_dir_name = 'config' + return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile') + + +def _init_posix(vars): + """Initialize the module as appropriate for POSIX systems.""" + # load the installed Makefile: + makefile = get_makefile_filename() + try: + _parse_makefile(makefile, vars) + except IOError as e: + msg = "invalid Python installation: unable to open %s" % makefile + if hasattr(e, "strerror"): + msg = msg + " (%s)" % e.strerror + raise IOError(msg) + # load the installed pyconfig.h: + config_h = get_config_h_filename() + try: + with open(config_h) as f: + parse_config_h(f, vars) + except IOError as e: + msg = "invalid Python installation: unable to open %s" % config_h + if hasattr(e, "strerror"): + msg = msg + " (%s)" % e.strerror + raise IOError(msg) + # On AIX, there are wrong paths to the linker scripts in the Makefile + # -- these paths are relative to the Python source, but when installed + # the scripts are in another directory. + if _PYTHON_BUILD: + vars['LDSHARED'] = vars['BLDSHARED'] + + +def _init_non_posix(vars): + """Initialize the module as appropriate for NT""" + # set basic install directories + vars['LIBDEST'] = get_path('stdlib') + vars['BINLIBDEST'] = get_path('platstdlib') + vars['INCLUDEPY'] = get_path('include') + vars['SO'] = '.pyd' + vars['EXE'] = '.exe' + vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT + vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable)) + +# +# public APIs +# + + +def parse_config_h(fp, vars=None): + """Parse a config.h-style file. + + A dictionary containing name/value pairs is returned. If an + optional dictionary is passed in as the second argument, it is + used instead of a new dictionary. + """ + if vars is None: + vars = {} + define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n") + undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n") + + while True: + line = fp.readline() + if not line: + break + m = define_rx.match(line) + if m: + n, v = m.group(1, 2) + try: + v = int(v) + except ValueError: + pass + vars[n] = v + else: + m = undef_rx.match(line) + if m: + vars[m.group(1)] = 0 + return vars + + +def get_config_h_filename(): + """Return the path of pyconfig.h.""" + if _PYTHON_BUILD: + if os.name == "nt": + inc_dir = os.path.join(_PROJECT_BASE, "PC") + else: + inc_dir = _PROJECT_BASE + else: + inc_dir = get_path('platinclude') + return os.path.join(inc_dir, 'pyconfig.h') + + +def get_scheme_names(): + """Return a tuple containing the schemes names.""" + return tuple(sorted(_SCHEMES.sections())) + + +def get_path_names(): + """Return a tuple containing the paths names.""" + # xxx see if we want a static list + return _SCHEMES.options('posix_prefix') + + +def get_paths(scheme=_get_default_scheme(), vars=None, expand=True): + """Return a mapping containing an install scheme. + + ``scheme`` is the install scheme name. If not provided, it will + return the default scheme for the current platform. + """ + _ensure_cfg_read() + if expand: + return _expand_vars(scheme, vars) + else: + return dict(_SCHEMES.items(scheme)) + + +def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True): + """Return a path corresponding to the scheme. + + ``scheme`` is the install scheme name. + """ + return get_paths(scheme, vars, expand)[name] + + +def get_config_vars(*args): + """With no arguments, return a dictionary of all configuration + variables relevant for the current platform. + + On Unix, this means every variable defined in Python's installed Makefile; + On Windows and Mac OS it's a much smaller set. + + With arguments, return a list of values that result from looking up + each argument in the configuration variable dictionary. + """ + global _CONFIG_VARS + if _CONFIG_VARS is None: + _CONFIG_VARS = {} + # Normalized versions of prefix and exec_prefix are handy to have; + # in fact, these are the standard versions used most places in the + # distutils2 module. + _CONFIG_VARS['prefix'] = _PREFIX + _CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX + _CONFIG_VARS['py_version'] = _PY_VERSION + _CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT + _CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2] + _CONFIG_VARS['base'] = _PREFIX + _CONFIG_VARS['platbase'] = _EXEC_PREFIX + _CONFIG_VARS['projectbase'] = _PROJECT_BASE + try: + _CONFIG_VARS['abiflags'] = sys.abiflags + except AttributeError: + # sys.abiflags may not be defined on all platforms. + _CONFIG_VARS['abiflags'] = '' + + if os.name in ('nt', 'os2'): + _init_non_posix(_CONFIG_VARS) + if os.name == 'posix': + _init_posix(_CONFIG_VARS) + # Setting 'userbase' is done below the call to the + # init function to enable using 'get_config_var' in + # the init-function. + if sys.version >= '2.6': + _CONFIG_VARS['userbase'] = _getuserbase() + + if 'srcdir' not in _CONFIG_VARS: + _CONFIG_VARS['srcdir'] = _PROJECT_BASE + else: + _CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir']) + + # Convert srcdir into an absolute path if it appears necessary. + # Normally it is relative to the build directory. However, during + # testing, for example, we might be running a non-installed python + # from a different directory. + if _PYTHON_BUILD and os.name == "posix": + base = _PROJECT_BASE + try: + cwd = os.getcwd() + except OSError: + cwd = None + if (not os.path.isabs(_CONFIG_VARS['srcdir']) and + base != cwd): + # srcdir is relative and we are not in the same directory + # as the executable. Assume executable is in the build + # directory and make srcdir absolute. + srcdir = os.path.join(base, _CONFIG_VARS['srcdir']) + _CONFIG_VARS['srcdir'] = os.path.normpath(srcdir) + + if sys.platform == 'darwin': + kernel_version = os.uname()[2] # Kernel version (8.4.3) + major_version = int(kernel_version.split('.')[0]) + + if major_version < 8: + # On Mac OS X before 10.4, check if -arch and -isysroot + # are in CFLAGS or LDFLAGS and remove them if they are. + # This is needed when building extensions on a 10.3 system + # using a universal build of python. + for key in ('LDFLAGS', 'BASECFLAGS', + # a number of derived variables. These need to be + # patched up as well. + 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): + flags = _CONFIG_VARS[key] + flags = re.sub('-arch\s+\w+\s', ' ', flags) + flags = re.sub('-isysroot [^ \t]*', ' ', flags) + _CONFIG_VARS[key] = flags + else: + # Allow the user to override the architecture flags using + # an environment variable. + # NOTE: This name was introduced by Apple in OSX 10.5 and + # is used by several scripting languages distributed with + # that OS release. + if 'ARCHFLAGS' in os.environ: + arch = os.environ['ARCHFLAGS'] + for key in ('LDFLAGS', 'BASECFLAGS', + # a number of derived variables. These need to be + # patched up as well. + 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): + + flags = _CONFIG_VARS[key] + flags = re.sub('-arch\s+\w+\s', ' ', flags) + flags = flags + ' ' + arch + _CONFIG_VARS[key] = flags + + # If we're on OSX 10.5 or later and the user tries to + # compiles an extension using an SDK that is not present + # on the current machine it is better to not use an SDK + # than to fail. + # + # The major usecase for this is users using a Python.org + # binary installer on OSX 10.6: that installer uses + # the 10.4u SDK, but that SDK is not installed by default + # when you install Xcode. + # + CFLAGS = _CONFIG_VARS.get('CFLAGS', '') + m = re.search('-isysroot\s+(\S+)', CFLAGS) + if m is not None: + sdk = m.group(1) + if not os.path.exists(sdk): + for key in ('LDFLAGS', 'BASECFLAGS', + # a number of derived variables. These need to be + # patched up as well. + 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): + + flags = _CONFIG_VARS[key] + flags = re.sub('-isysroot\s+\S+(\s|$)', ' ', flags) + _CONFIG_VARS[key] = flags + + if args: + vals = [] + for name in args: + vals.append(_CONFIG_VARS.get(name)) + return vals + else: + return _CONFIG_VARS + + +def get_config_var(name): + """Return the value of a single variable using the dictionary returned by + 'get_config_vars()'. + + Equivalent to get_config_vars().get(name) + """ + return get_config_vars().get(name) + + +def get_platform(): + """Return a string that identifies the current platform. + + This is used mainly to distinguish platform-specific build directories and + platform-specific built distributions. Typically includes the OS name + and version and the architecture (as supplied by 'os.uname()'), + although the exact information included depends on the OS; eg. for IRIX + the architecture isn't particularly important (IRIX only runs on SGI + hardware), but for Linux the kernel version isn't particularly + important. + + Examples of returned values: + linux-i586 + linux-alpha (?) + solaris-2.6-sun4u + irix-5.3 + irix64-6.2 + + Windows will return one of: + win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc) + win-ia64 (64bit Windows on Itanium) + win32 (all others - specifically, sys.platform is returned) + + For other non-POSIX platforms, currently just returns 'sys.platform'. + """ + if os.name == 'nt': + # sniff sys.version for architecture. + prefix = " bit (" + i = sys.version.find(prefix) + if i == -1: + return sys.platform + j = sys.version.find(")", i) + look = sys.version[i+len(prefix):j].lower() + if look == 'amd64': + return 'win-amd64' + if look == 'itanium': + return 'win-ia64' + return sys.platform + + if os.name != "posix" or not hasattr(os, 'uname'): + # XXX what about the architecture? NT is Intel or Alpha, + # Mac OS is M68k or PPC, etc. + return sys.platform + + # Try to distinguish various flavours of Unix + osname, host, release, version, machine = os.uname() + + # Convert the OS name to lowercase, remove '/' characters + # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh") + osname = osname.lower().replace('/', '') + machine = machine.replace(' ', '_') + machine = machine.replace('/', '-') + + if osname[:5] == "linux": + # At least on Linux/Intel, 'machine' is the processor -- + # i386, etc. + # XXX what about Alpha, SPARC, etc? + return "%s-%s" % (osname, machine) + elif osname[:5] == "sunos": + if release[0] >= "5": # SunOS 5 == Solaris 2 + osname = "solaris" + release = "%d.%s" % (int(release[0]) - 3, release[2:]) + # fall through to standard osname-release-machine representation + elif osname[:4] == "irix": # could be "irix64"! + return "%s-%s" % (osname, release) + elif osname[:3] == "aix": + return "%s-%s.%s" % (osname, version, release) + elif osname[:6] == "cygwin": + osname = "cygwin" + rel_re = re.compile(r'[\d.]+') + m = rel_re.match(release) + if m: + release = m.group() + elif osname[:6] == "darwin": + # + # For our purposes, we'll assume that the system version from + # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set + # to. This makes the compatibility story a bit more sane because the + # machine is going to compile and link as if it were + # MACOSX_DEPLOYMENT_TARGET. + cfgvars = get_config_vars() + macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET') + + if True: + # Always calculate the release of the running machine, + # needed to determine if we can build fat binaries or not. + + macrelease = macver + # Get the system version. Reading this plist is a documented + # way to get the system version (see the documentation for + # the Gestalt Manager) + try: + f = open('/System/Library/CoreServices/SystemVersion.plist') + except IOError: + # We're on a plain darwin box, fall back to the default + # behaviour. + pass + else: + try: + m = re.search(r'ProductUserVisibleVersion\s*' + r'(.*?)', f.read()) + finally: + f.close() + if m is not None: + macrelease = '.'.join(m.group(1).split('.')[:2]) + # else: fall back to the default behaviour + + if not macver: + macver = macrelease + + if macver: + release = macver + osname = "macosx" + + if ((macrelease + '.') >= '10.4.' and + '-arch' in get_config_vars().get('CFLAGS', '').strip()): + # The universal build will build fat binaries, but not on + # systems before 10.4 + # + # Try to detect 4-way universal builds, those have machine-type + # 'universal' instead of 'fat'. + + machine = 'fat' + cflags = get_config_vars().get('CFLAGS') + + archs = re.findall('-arch\s+(\S+)', cflags) + archs = tuple(sorted(set(archs))) + + if len(archs) == 1: + machine = archs[0] + elif archs == ('i386', 'ppc'): + machine = 'fat' + elif archs == ('i386', 'x86_64'): + machine = 'intel' + elif archs == ('i386', 'ppc', 'x86_64'): + machine = 'fat3' + elif archs == ('ppc64', 'x86_64'): + machine = 'fat64' + elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'): + machine = 'universal' + else: + raise ValueError( + "Don't know machine value for archs=%r" % (archs,)) + + elif machine == 'i386': + # On OSX the machine type returned by uname is always the + # 32-bit variant, even if the executable architecture is + # the 64-bit variant + if sys.maxsize >= 2**32: + machine = 'x86_64' + + elif machine in ('PowerPC', 'Power_Macintosh'): + # Pick a sane name for the PPC architecture. + # See 'i386' case + if sys.maxsize >= 2**32: + machine = 'ppc64' + else: + machine = 'ppc' + + return "%s-%s-%s" % (osname, release, machine) + + +def get_python_version(): + return _PY_VERSION_SHORT + + +def _print_dict(title, data): + for index, (key, value) in enumerate(sorted(data.items())): + if index == 0: + print('%s: ' % (title)) + print('\t%s = "%s"' % (key, value)) + + +def _main(): + """Display all information sysconfig detains.""" + print('Platform: "%s"' % get_platform()) + print('Python version: "%s"' % get_python_version()) + print('Current installation scheme: "%s"' % _get_default_scheme()) + print() + _print_dict('Paths', get_paths()) + print() + _print_dict('Variables', get_config_vars()) + + +if __name__ == '__main__': + _main() diff --git a/pip/_vendor/distlib/_backport/tarfile.py b/pip/_vendor/distlib/_backport/tarfile.py new file mode 100644 index 000000000..0580fb795 --- /dev/null +++ b/pip/_vendor/distlib/_backport/tarfile.py @@ -0,0 +1,2607 @@ +#------------------------------------------------------------------- +# tarfile.py +#------------------------------------------------------------------- +# Copyright (C) 2002 Lars Gustaebel +# All rights reserved. +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +from __future__ import print_function + +"""Read from and write to tar format archives. +""" + +__version__ = "$Revision$" + +version = "0.9.0" +__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" +__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $" +__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $" +__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." + +#--------- +# Imports +#--------- +import sys +import os +import stat +import errno +import time +import struct +import copy +import re + +try: + import grp, pwd +except ImportError: + grp = pwd = None + +# os.symlink on Windows prior to 6.0 raises NotImplementedError +symlink_exception = (AttributeError, NotImplementedError) +try: + # WindowsError (1314) will be raised if the caller does not hold the + # SeCreateSymbolicLinkPrivilege privilege + symlink_exception += (WindowsError,) +except NameError: + pass + +# from tarfile import * +__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] + +if sys.version_info[0] < 3: + import __builtin__ as builtins +else: + import builtins + +_open = builtins.open # Since 'open' is TarFile.open + +#--------------------------------------------------------- +# tar constants +#--------------------------------------------------------- +NUL = b"\0" # the null character +BLOCKSIZE = 512 # length of processing blocks +RECORDSIZE = BLOCKSIZE * 20 # length of records +GNU_MAGIC = b"ustar \0" # magic gnu tar string +POSIX_MAGIC = b"ustar\x0000" # magic posix tar string + +LENGTH_NAME = 100 # maximum length of a filename +LENGTH_LINK = 100 # maximum length of a linkname +LENGTH_PREFIX = 155 # maximum length of the prefix field + +REGTYPE = b"0" # regular file +AREGTYPE = b"\0" # regular file +LNKTYPE = b"1" # link (inside tarfile) +SYMTYPE = b"2" # symbolic link +CHRTYPE = b"3" # character special device +BLKTYPE = b"4" # block special device +DIRTYPE = b"5" # directory +FIFOTYPE = b"6" # fifo special device +CONTTYPE = b"7" # contiguous file + +GNUTYPE_LONGNAME = b"L" # GNU tar longname +GNUTYPE_LONGLINK = b"K" # GNU tar longlink +GNUTYPE_SPARSE = b"S" # GNU tar sparse file + +XHDTYPE = b"x" # POSIX.1-2001 extended header +XGLTYPE = b"g" # POSIX.1-2001 global header +SOLARIS_XHDTYPE = b"X" # Solaris extended header + +USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format +GNU_FORMAT = 1 # GNU tar format +PAX_FORMAT = 2 # POSIX.1-2001 (pax) format +DEFAULT_FORMAT = GNU_FORMAT + +#--------------------------------------------------------- +# tarfile constants +#--------------------------------------------------------- +# File types that tarfile supports: +SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, + SYMTYPE, DIRTYPE, FIFOTYPE, + CONTTYPE, CHRTYPE, BLKTYPE, + GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, + GNUTYPE_SPARSE) + +# File types that will be treated as a regular file. +REGULAR_TYPES = (REGTYPE, AREGTYPE, + CONTTYPE, GNUTYPE_SPARSE) + +# File types that are part of the GNU tar format. +GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, + GNUTYPE_SPARSE) + +# Fields from a pax header that override a TarInfo attribute. +PAX_FIELDS = ("path", "linkpath", "size", "mtime", + "uid", "gid", "uname", "gname") + +# Fields from a pax header that are affected by hdrcharset. +PAX_NAME_FIELDS = set(("path", "linkpath", "uname", "gname")) + +# Fields in a pax header that are numbers, all other fields +# are treated as strings. +PAX_NUMBER_FIELDS = { + "atime": float, + "ctime": float, + "mtime": float, + "uid": int, + "gid": int, + "size": int +} + +#--------------------------------------------------------- +# Bits used in the mode field, values in octal. +#--------------------------------------------------------- +S_IFLNK = 0o120000 # symbolic link +S_IFREG = 0o100000 # regular file +S_IFBLK = 0o060000 # block device +S_IFDIR = 0o040000 # directory +S_IFCHR = 0o020000 # character device +S_IFIFO = 0o010000 # fifo + +TSUID = 0o4000 # set UID on execution +TSGID = 0o2000 # set GID on execution +TSVTX = 0o1000 # reserved + +TUREAD = 0o400 # read by owner +TUWRITE = 0o200 # write by owner +TUEXEC = 0o100 # execute/search by owner +TGREAD = 0o040 # read by group +TGWRITE = 0o020 # write by group +TGEXEC = 0o010 # execute/search by group +TOREAD = 0o004 # read by other +TOWRITE = 0o002 # write by other +TOEXEC = 0o001 # execute/search by other + +#--------------------------------------------------------- +# initialization +#--------------------------------------------------------- +if os.name in ("nt", "ce"): + ENCODING = "utf-8" +else: + ENCODING = sys.getfilesystemencoding() + +#--------------------------------------------------------- +# Some useful functions +#--------------------------------------------------------- + +def stn(s, length, encoding, errors): + """Convert a string to a null-terminated bytes object. + """ + s = s.encode(encoding, errors) + return s[:length] + (length - len(s)) * NUL + +def nts(s, encoding, errors): + """Convert a null-terminated bytes object to a string. + """ + p = s.find(b"\0") + if p != -1: + s = s[:p] + return s.decode(encoding, errors) + +def nti(s): + """Convert a number field to a python number. + """ + # There are two possible encodings for a number field, see + # itn() below. + if s[0] != chr(0o200): + try: + n = int(nts(s, "ascii", "strict") or "0", 8) + except ValueError: + raise InvalidHeaderError("invalid header") + else: + n = 0 + for i in range(len(s) - 1): + n <<= 8 + n += ord(s[i + 1]) + return n + +def itn(n, digits=8, format=DEFAULT_FORMAT): + """Convert a python number to a number field. + """ + # POSIX 1003.1-1988 requires numbers to be encoded as a string of + # octal digits followed by a null-byte, this allows values up to + # (8**(digits-1))-1. GNU tar allows storing numbers greater than + # that if necessary. A leading 0o200 byte indicates this particular + # encoding, the following digits-1 bytes are a big-endian + # representation. This allows values up to (256**(digits-1))-1. + if 0 <= n < 8 ** (digits - 1): + s = ("%0*o" % (digits - 1, n)).encode("ascii") + NUL + else: + if format != GNU_FORMAT or n >= 256 ** (digits - 1): + raise ValueError("overflow in number field") + + if n < 0: + # XXX We mimic GNU tar's behaviour with negative numbers, + # this could raise OverflowError. + n = struct.unpack("L", struct.pack("l", n))[0] + + s = bytearray() + for i in range(digits - 1): + s.insert(0, n & 0o377) + n >>= 8 + s.insert(0, 0o200) + return s + +def calc_chksums(buf): + """Calculate the checksum for a member's header by summing up all + characters except for the chksum field which is treated as if + it was filled with spaces. According to the GNU tar sources, + some tars (Sun and NeXT) calculate chksum with signed char, + which will be different if there are chars in the buffer with + the high bit set. So we calculate two checksums, unsigned and + signed. + """ + unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) + signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) + return unsigned_chksum, signed_chksum + +def copyfileobj(src, dst, length=None): + """Copy length bytes from fileobj src to fileobj dst. + If length is None, copy the entire content. + """ + if length == 0: + return + if length is None: + while True: + buf = src.read(16*1024) + if not buf: + break + dst.write(buf) + return + + BUFSIZE = 16 * 1024 + blocks, remainder = divmod(length, BUFSIZE) + for b in range(blocks): + buf = src.read(BUFSIZE) + if len(buf) < BUFSIZE: + raise IOError("end of file reached") + dst.write(buf) + + if remainder != 0: + buf = src.read(remainder) + if len(buf) < remainder: + raise IOError("end of file reached") + dst.write(buf) + return + +filemode_table = ( + ((S_IFLNK, "l"), + (S_IFREG, "-"), + (S_IFBLK, "b"), + (S_IFDIR, "d"), + (S_IFCHR, "c"), + (S_IFIFO, "p")), + + ((TUREAD, "r"),), + ((TUWRITE, "w"),), + ((TUEXEC|TSUID, "s"), + (TSUID, "S"), + (TUEXEC, "x")), + + ((TGREAD, "r"),), + ((TGWRITE, "w"),), + ((TGEXEC|TSGID, "s"), + (TSGID, "S"), + (TGEXEC, "x")), + + ((TOREAD, "r"),), + ((TOWRITE, "w"),), + ((TOEXEC|TSVTX, "t"), + (TSVTX, "T"), + (TOEXEC, "x")) +) + +def filemode(mode): + """Convert a file's mode to a string of the form + -rwxrwxrwx. + Used by TarFile.list() + """ + perm = [] + for table in filemode_table: + for bit, char in table: + if mode & bit == bit: + perm.append(char) + break + else: + perm.append("-") + return "".join(perm) + +class TarError(Exception): + """Base exception.""" + pass +class ExtractError(TarError): + """General exception for extract errors.""" + pass +class ReadError(TarError): + """Exception for unreadble tar archives.""" + pass +class CompressionError(TarError): + """Exception for unavailable compression methods.""" + pass +class StreamError(TarError): + """Exception for unsupported operations on stream-like TarFiles.""" + pass +class HeaderError(TarError): + """Base exception for header errors.""" + pass +class EmptyHeaderError(HeaderError): + """Exception for empty headers.""" + pass +class TruncatedHeaderError(HeaderError): + """Exception for truncated headers.""" + pass +class EOFHeaderError(HeaderError): + """Exception for end of file headers.""" + pass +class InvalidHeaderError(HeaderError): + """Exception for invalid headers.""" + pass +class SubsequentHeaderError(HeaderError): + """Exception for missing and invalid extended headers.""" + pass + +#--------------------------- +# internal stream interface +#--------------------------- +class _LowLevelFile(object): + """Low-level file object. Supports reading and writing. + It is used instead of a regular file object for streaming + access. + """ + + def __init__(self, name, mode): + mode = { + "r": os.O_RDONLY, + "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, + }[mode] + if hasattr(os, "O_BINARY"): + mode |= os.O_BINARY + self.fd = os.open(name, mode, 0o666) + + def close(self): + os.close(self.fd) + + def read(self, size): + return os.read(self.fd, size) + + def write(self, s): + os.write(self.fd, s) + +class _Stream(object): + """Class that serves as an adapter between TarFile and + a stream-like object. The stream-like object only + needs to have a read() or write() method and is accessed + blockwise. Use of gzip or bzip2 compression is possible. + A stream-like object could be for example: sys.stdin, + sys.stdout, a socket, a tape device etc. + + _Stream is intended to be used only internally. + """ + + def __init__(self, name, mode, comptype, fileobj, bufsize): + """Construct a _Stream object. + """ + self._extfileobj = True + if fileobj is None: + fileobj = _LowLevelFile(name, mode) + self._extfileobj = False + + if comptype == '*': + # Enable transparent compression detection for the + # stream interface + fileobj = _StreamProxy(fileobj) + comptype = fileobj.getcomptype() + + self.name = name or "" + self.mode = mode + self.comptype = comptype + self.fileobj = fileobj + self.bufsize = bufsize + self.buf = b"" + self.pos = 0 + self.closed = False + + try: + if comptype == "gz": + try: + import zlib + except ImportError: + raise CompressionError("zlib module is not available") + self.zlib = zlib + self.crc = zlib.crc32(b"") + if mode == "r": + self._init_read_gz() + else: + self._init_write_gz() + + if comptype == "bz2": + try: + import bz2 + except ImportError: + raise CompressionError("bz2 module is not available") + if mode == "r": + self.dbuf = b"" + self.cmp = bz2.BZ2Decompressor() + else: + self.cmp = bz2.BZ2Compressor() + except: + if not self._extfileobj: + self.fileobj.close() + self.closed = True + raise + + def __del__(self): + if hasattr(self, "closed") and not self.closed: + self.close() + + def _init_write_gz(self): + """Initialize for writing with gzip compression. + """ + self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, + -self.zlib.MAX_WBITS, + self.zlib.DEF_MEM_LEVEL, + 0) + timestamp = struct.pack(" self.bufsize: + self.fileobj.write(self.buf[:self.bufsize]) + self.buf = self.buf[self.bufsize:] + + def close(self): + """Close the _Stream object. No operation should be + done on it afterwards. + """ + if self.closed: + return + + if self.mode == "w" and self.comptype != "tar": + self.buf += self.cmp.flush() + + if self.mode == "w" and self.buf: + self.fileobj.write(self.buf) + self.buf = b"" + if self.comptype == "gz": + # The native zlib crc is an unsigned 32-bit integer, but + # the Python wrapper implicitly casts that to a signed C + # long. So, on a 32-bit box self.crc may "look negative", + # while the same crc on a 64-bit box may "look positive". + # To avoid irksome warnings from the `struct` module, force + # it to look positive on all boxes. + self.fileobj.write(struct.pack("= 0: + blocks, remainder = divmod(pos - self.pos, self.bufsize) + for i in range(blocks): + self.read(self.bufsize) + self.read(remainder) + else: + raise StreamError("seeking backwards is not allowed") + return self.pos + + def read(self, size=None): + """Return the next size number of bytes from the stream. + If size is not defined, return all bytes of the stream + up to EOF. + """ + if size is None: + t = [] + while True: + buf = self._read(self.bufsize) + if not buf: + break + t.append(buf) + buf = "".join(t) + else: + buf = self._read(size) + self.pos += len(buf) + return buf + + def _read(self, size): + """Return size bytes from the stream. + """ + if self.comptype == "tar": + return self.__read(size) + + c = len(self.dbuf) + while c < size: + buf = self.__read(self.bufsize) + if not buf: + break + try: + buf = self.cmp.decompress(buf) + except IOError: + raise ReadError("invalid compressed data") + self.dbuf += buf + c += len(buf) + buf = self.dbuf[:size] + self.dbuf = self.dbuf[size:] + return buf + + def __read(self, size): + """Return size bytes from stream. If internal buffer is empty, + read another block from the stream. + """ + c = len(self.buf) + while c < size: + buf = self.fileobj.read(self.bufsize) + if not buf: + break + self.buf += buf + c += len(buf) + buf = self.buf[:size] + self.buf = self.buf[size:] + return buf +# class _Stream + +class _StreamProxy(object): + """Small proxy class that enables transparent compression + detection for the Stream interface (mode 'r|*'). + """ + + def __init__(self, fileobj): + self.fileobj = fileobj + self.buf = self.fileobj.read(BLOCKSIZE) + + def read(self, size): + self.read = self.fileobj.read + return self.buf + + def getcomptype(self): + if self.buf.startswith(b"\037\213\010"): + return "gz" + if self.buf.startswith(b"BZh91"): + return "bz2" + return "tar" + + def close(self): + self.fileobj.close() +# class StreamProxy + +class _BZ2Proxy(object): + """Small proxy class that enables external file object + support for "r:bz2" and "w:bz2" modes. This is actually + a workaround for a limitation in bz2 module's BZ2File + class which (unlike gzip.GzipFile) has no support for + a file object argument. + """ + + blocksize = 16 * 1024 + + def __init__(self, fileobj, mode): + self.fileobj = fileobj + self.mode = mode + self.name = getattr(self.fileobj, "name", None) + self.init() + + def init(self): + import bz2 + self.pos = 0 + if self.mode == "r": + self.bz2obj = bz2.BZ2Decompressor() + self.fileobj.seek(0) + self.buf = b"" + else: + self.bz2obj = bz2.BZ2Compressor() + + def read(self, size): + x = len(self.buf) + while x < size: + raw = self.fileobj.read(self.blocksize) + if not raw: + break + data = self.bz2obj.decompress(raw) + self.buf += data + x += len(data) + + buf = self.buf[:size] + self.buf = self.buf[size:] + self.pos += len(buf) + return buf + + def seek(self, pos): + if pos < self.pos: + self.init() + self.read(pos - self.pos) + + def tell(self): + return self.pos + + def write(self, data): + self.pos += len(data) + raw = self.bz2obj.compress(data) + self.fileobj.write(raw) + + def close(self): + if self.mode == "w": + raw = self.bz2obj.flush() + self.fileobj.write(raw) +# class _BZ2Proxy + +#------------------------ +# Extraction file object +#------------------------ +class _FileInFile(object): + """A thin wrapper around an existing file object that + provides a part of its data as an individual file + object. + """ + + def __init__(self, fileobj, offset, size, blockinfo=None): + self.fileobj = fileobj + self.offset = offset + self.size = size + self.position = 0 + + if blockinfo is None: + blockinfo = [(0, size)] + + # Construct a map with data and zero blocks. + self.map_index = 0 + self.map = [] + lastpos = 0 + realpos = self.offset + for offset, size in blockinfo: + if offset > lastpos: + self.map.append((False, lastpos, offset, None)) + self.map.append((True, offset, offset + size, realpos)) + realpos += size + lastpos = offset + size + if lastpos < self.size: + self.map.append((False, lastpos, self.size, None)) + + def seekable(self): + if not hasattr(self.fileobj, "seekable"): + # XXX gzip.GzipFile and bz2.BZ2File + return True + return self.fileobj.seekable() + + def tell(self): + """Return the current file position. + """ + return self.position + + def seek(self, position): + """Seek to a position in the file. + """ + self.position = position + + def read(self, size=None): + """Read data from the file. + """ + if size is None: + size = self.size - self.position + else: + size = min(size, self.size - self.position) + + buf = b"" + while size > 0: + while True: + data, start, stop, offset = self.map[self.map_index] + if start <= self.position < stop: + break + else: + self.map_index += 1 + if self.map_index == len(self.map): + self.map_index = 0 + length = min(size, stop - self.position) + if data: + self.fileobj.seek(offset + (self.position - start)) + buf += self.fileobj.read(length) + else: + buf += NUL * length + size -= length + self.position += length + return buf +#class _FileInFile + + +class ExFileObject(object): + """File-like object for reading an archive member. + Is returned by TarFile.extractfile(). + """ + blocksize = 1024 + + def __init__(self, tarfile, tarinfo): + self.fileobj = _FileInFile(tarfile.fileobj, + tarinfo.offset_data, + tarinfo.size, + tarinfo.sparse) + self.name = tarinfo.name + self.mode = "r" + self.closed = False + self.size = tarinfo.size + + self.position = 0 + self.buffer = b"" + + def readable(self): + return True + + def writable(self): + return False + + def seekable(self): + return self.fileobj.seekable() + + def read(self, size=None): + """Read at most size bytes from the file. If size is not + present or None, read all data until EOF is reached. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + buf = b"" + if self.buffer: + if size is None: + buf = self.buffer + self.buffer = b"" + else: + buf = self.buffer[:size] + self.buffer = self.buffer[size:] + + if size is None: + buf += self.fileobj.read() + else: + buf += self.fileobj.read(size - len(buf)) + + self.position += len(buf) + return buf + + # XXX TextIOWrapper uses the read1() method. + read1 = read + + def readline(self, size=-1): + """Read one entire line from the file. If size is present + and non-negative, return a string with at most that + size, which may be an incomplete line. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + pos = self.buffer.find(b"\n") + 1 + if pos == 0: + # no newline found. + while True: + buf = self.fileobj.read(self.blocksize) + self.buffer += buf + if not buf or b"\n" in buf: + pos = self.buffer.find(b"\n") + 1 + if pos == 0: + # no newline found. + pos = len(self.buffer) + break + + if size != -1: + pos = min(size, pos) + + buf = self.buffer[:pos] + self.buffer = self.buffer[pos:] + self.position += len(buf) + return buf + + def readlines(self): + """Return a list with all remaining lines. + """ + result = [] + while True: + line = self.readline() + if not line: break + result.append(line) + return result + + def tell(self): + """Return the current file position. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + return self.position + + def seek(self, pos, whence=os.SEEK_SET): + """Seek to a position in the file. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + if whence == os.SEEK_SET: + self.position = min(max(pos, 0), self.size) + elif whence == os.SEEK_CUR: + if pos < 0: + self.position = max(self.position + pos, 0) + else: + self.position = min(self.position + pos, self.size) + elif whence == os.SEEK_END: + self.position = max(min(self.size + pos, self.size), 0) + else: + raise ValueError("Invalid argument") + + self.buffer = b"" + self.fileobj.seek(self.position) + + def close(self): + """Close the file object. + """ + self.closed = True + + def __iter__(self): + """Get an iterator over the file's lines. + """ + while True: + line = self.readline() + if not line: + break + yield line +#class ExFileObject + +#------------------ +# Exported Classes +#------------------ +class TarInfo(object): + """Informational class which holds the details about an + archive member given by a tar header block. + TarInfo objects are returned by TarFile.getmember(), + TarFile.getmembers() and TarFile.gettarinfo() and are + usually created internally. + """ + + __slots__ = ("name", "mode", "uid", "gid", "size", "mtime", + "chksum", "type", "linkname", "uname", "gname", + "devmajor", "devminor", + "offset", "offset_data", "pax_headers", "sparse", + "tarfile", "_sparse_structs", "_link_target") + + def __init__(self, name=""): + """Construct a TarInfo object. name is the optional name + of the member. + """ + self.name = name # member name + self.mode = 0o644 # file permissions + self.uid = 0 # user id + self.gid = 0 # group id + self.size = 0 # file size + self.mtime = 0 # modification time + self.chksum = 0 # header checksum + self.type = REGTYPE # member type + self.linkname = "" # link name + self.uname = "" # user name + self.gname = "" # group name + self.devmajor = 0 # device major number + self.devminor = 0 # device minor number + + self.offset = 0 # the tar header starts here + self.offset_data = 0 # the file's data starts here + + self.sparse = None # sparse member information + self.pax_headers = {} # pax header information + + # In pax headers the "name" and "linkname" field are called + # "path" and "linkpath". + def _getpath(self): + return self.name + def _setpath(self, name): + self.name = name + path = property(_getpath, _setpath) + + def _getlinkpath(self): + return self.linkname + def _setlinkpath(self, linkname): + self.linkname = linkname + linkpath = property(_getlinkpath, _setlinkpath) + + def __repr__(self): + return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) + + def get_info(self): + """Return the TarInfo's attributes as a dictionary. + """ + info = { + "name": self.name, + "mode": self.mode & 0o7777, + "uid": self.uid, + "gid": self.gid, + "size": self.size, + "mtime": self.mtime, + "chksum": self.chksum, + "type": self.type, + "linkname": self.linkname, + "uname": self.uname, + "gname": self.gname, + "devmajor": self.devmajor, + "devminor": self.devminor + } + + if info["type"] == DIRTYPE and not info["name"].endswith("/"): + info["name"] += "/" + + return info + + def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): + """Return a tar header as a string of 512 byte blocks. + """ + info = self.get_info() + + if format == USTAR_FORMAT: + return self.create_ustar_header(info, encoding, errors) + elif format == GNU_FORMAT: + return self.create_gnu_header(info, encoding, errors) + elif format == PAX_FORMAT: + return self.create_pax_header(info, encoding) + else: + raise ValueError("invalid format") + + def create_ustar_header(self, info, encoding, errors): + """Return the object as a ustar header block. + """ + info["magic"] = POSIX_MAGIC + + if len(info["linkname"]) > LENGTH_LINK: + raise ValueError("linkname is too long") + + if len(info["name"]) > LENGTH_NAME: + info["prefix"], info["name"] = self._posix_split_name(info["name"]) + + return self._create_header(info, USTAR_FORMAT, encoding, errors) + + def create_gnu_header(self, info, encoding, errors): + """Return the object as a GNU header block sequence. + """ + info["magic"] = GNU_MAGIC + + buf = b"" + if len(info["linkname"]) > LENGTH_LINK: + buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) + + if len(info["name"]) > LENGTH_NAME: + buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) + + return buf + self._create_header(info, GNU_FORMAT, encoding, errors) + + def create_pax_header(self, info, encoding): + """Return the object as a ustar header block. If it cannot be + represented this way, prepend a pax extended header sequence + with supplement information. + """ + info["magic"] = POSIX_MAGIC + pax_headers = self.pax_headers.copy() + + # Test string fields for values that exceed the field length or cannot + # be represented in ASCII encoding. + for name, hname, length in ( + ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), + ("uname", "uname", 32), ("gname", "gname", 32)): + + if hname in pax_headers: + # The pax header has priority. + continue + + # Try to encode the string as ASCII. + try: + info[name].encode("ascii", "strict") + except UnicodeEncodeError: + pax_headers[hname] = info[name] + continue + + if len(info[name]) > length: + pax_headers[hname] = info[name] + + # Test number fields for values that exceed the field limit or values + # that like to be stored as float. + for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): + if name in pax_headers: + # The pax header has priority. Avoid overflow. + info[name] = 0 + continue + + val = info[name] + if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): + pax_headers[name] = str(val) + info[name] = 0 + + # Create a pax extended header if necessary. + if pax_headers: + buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) + else: + buf = b"" + + return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") + + @classmethod + def create_pax_global_header(cls, pax_headers): + """Return the object as a pax global header block sequence. + """ + return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8") + + def _posix_split_name(self, name): + """Split a name longer than 100 chars into a prefix + and a name part. + """ + prefix = name[:LENGTH_PREFIX + 1] + while prefix and prefix[-1] != "/": + prefix = prefix[:-1] + + name = name[len(prefix):] + prefix = prefix[:-1] + + if not prefix or len(name) > LENGTH_NAME: + raise ValueError("name is too long") + return prefix, name + + @staticmethod + def _create_header(info, format, encoding, errors): + """Return a header block. info is a dictionary with file + information, format must be one of the *_FORMAT constants. + """ + parts = [ + stn(info.get("name", ""), 100, encoding, errors), + itn(info.get("mode", 0) & 0o7777, 8, format), + itn(info.get("uid", 0), 8, format), + itn(info.get("gid", 0), 8, format), + itn(info.get("size", 0), 12, format), + itn(info.get("mtime", 0), 12, format), + b" ", # checksum field + info.get("type", REGTYPE), + stn(info.get("linkname", ""), 100, encoding, errors), + info.get("magic", POSIX_MAGIC), + stn(info.get("uname", ""), 32, encoding, errors), + stn(info.get("gname", ""), 32, encoding, errors), + itn(info.get("devmajor", 0), 8, format), + itn(info.get("devminor", 0), 8, format), + stn(info.get("prefix", ""), 155, encoding, errors) + ] + + buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) + chksum = calc_chksums(buf[-BLOCKSIZE:])[0] + buf = buf[:-364] + ("%06o\0" % chksum).encode("ascii") + buf[-357:] + return buf + + @staticmethod + def _create_payload(payload): + """Return the string payload filled with zero bytes + up to the next 512 byte border. + """ + blocks, remainder = divmod(len(payload), BLOCKSIZE) + if remainder > 0: + payload += (BLOCKSIZE - remainder) * NUL + return payload + + @classmethod + def _create_gnu_long_header(cls, name, type, encoding, errors): + """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence + for name. + """ + name = name.encode(encoding, errors) + NUL + + info = {} + info["name"] = "././@LongLink" + info["type"] = type + info["size"] = len(name) + info["magic"] = GNU_MAGIC + + # create extended header + name blocks. + return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ + cls._create_payload(name) + + @classmethod + def _create_pax_generic_header(cls, pax_headers, type, encoding): + """Return a POSIX.1-2008 extended or global header sequence + that contains a list of keyword, value pairs. The values + must be strings. + """ + # Check if one of the fields contains surrogate characters and thereby + # forces hdrcharset=BINARY, see _proc_pax() for more information. + binary = False + for keyword, value in pax_headers.items(): + try: + value.encode("utf8", "strict") + except UnicodeEncodeError: + binary = True + break + + records = b"" + if binary: + # Put the hdrcharset field at the beginning of the header. + records += b"21 hdrcharset=BINARY\n" + + for keyword, value in pax_headers.items(): + keyword = keyword.encode("utf8") + if binary: + # Try to restore the original byte representation of `value'. + # Needless to say, that the encoding must match the string. + value = value.encode(encoding, "surrogateescape") + else: + value = value.encode("utf8") + + l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' + n = p = 0 + while True: + n = l + len(str(p)) + if n == p: + break + p = n + records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" + + # We use a hardcoded "././@PaxHeader" name like star does + # instead of the one that POSIX recommends. + info = {} + info["name"] = "././@PaxHeader" + info["type"] = type + info["size"] = len(records) + info["magic"] = POSIX_MAGIC + + # Create pax header + record blocks. + return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ + cls._create_payload(records) + + @classmethod + def frombuf(cls, buf, encoding, errors): + """Construct a TarInfo object from a 512 byte bytes object. + """ + if len(buf) == 0: + raise EmptyHeaderError("empty header") + if len(buf) != BLOCKSIZE: + raise TruncatedHeaderError("truncated header") + if buf.count(NUL) == BLOCKSIZE: + raise EOFHeaderError("end of file header") + + chksum = nti(buf[148:156]) + if chksum not in calc_chksums(buf): + raise InvalidHeaderError("bad checksum") + + obj = cls() + obj.name = nts(buf[0:100], encoding, errors) + obj.mode = nti(buf[100:108]) + obj.uid = nti(buf[108:116]) + obj.gid = nti(buf[116:124]) + obj.size = nti(buf[124:136]) + obj.mtime = nti(buf[136:148]) + obj.chksum = chksum + obj.type = buf[156:157] + obj.linkname = nts(buf[157:257], encoding, errors) + obj.uname = nts(buf[265:297], encoding, errors) + obj.gname = nts(buf[297:329], encoding, errors) + obj.devmajor = nti(buf[329:337]) + obj.devminor = nti(buf[337:345]) + prefix = nts(buf[345:500], encoding, errors) + + # Old V7 tar format represents a directory as a regular + # file with a trailing slash. + if obj.type == AREGTYPE and obj.name.endswith("/"): + obj.type = DIRTYPE + + # The old GNU sparse format occupies some of the unused + # space in the buffer for up to 4 sparse structures. + # Save the them for later processing in _proc_sparse(). + if obj.type == GNUTYPE_SPARSE: + pos = 386 + structs = [] + for i in range(4): + try: + offset = nti(buf[pos:pos + 12]) + numbytes = nti(buf[pos + 12:pos + 24]) + except ValueError: + break + structs.append((offset, numbytes)) + pos += 24 + isextended = bool(buf[482]) + origsize = nti(buf[483:495]) + obj._sparse_structs = (structs, isextended, origsize) + + # Remove redundant slashes from directories. + if obj.isdir(): + obj.name = obj.name.rstrip("/") + + # Reconstruct a ustar longname. + if prefix and obj.type not in GNU_TYPES: + obj.name = prefix + "/" + obj.name + return obj + + @classmethod + def fromtarfile(cls, tarfile): + """Return the next TarInfo object from TarFile object + tarfile. + """ + buf = tarfile.fileobj.read(BLOCKSIZE) + obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) + obj.offset = tarfile.fileobj.tell() - BLOCKSIZE + return obj._proc_member(tarfile) + + #-------------------------------------------------------------------------- + # The following are methods that are called depending on the type of a + # member. The entry point is _proc_member() which can be overridden in a + # subclass to add custom _proc_*() methods. A _proc_*() method MUST + # implement the following + # operations: + # 1. Set self.offset_data to the position where the data blocks begin, + # if there is data that follows. + # 2. Set tarfile.offset to the position where the next member's header will + # begin. + # 3. Return self or another valid TarInfo object. + def _proc_member(self, tarfile): + """Choose the right processing method depending on + the type and call it. + """ + if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): + return self._proc_gnulong(tarfile) + elif self.type == GNUTYPE_SPARSE: + return self._proc_sparse(tarfile) + elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): + return self._proc_pax(tarfile) + else: + return self._proc_builtin(tarfile) + + def _proc_builtin(self, tarfile): + """Process a builtin type or an unknown type which + will be treated as a regular file. + """ + self.offset_data = tarfile.fileobj.tell() + offset = self.offset_data + if self.isreg() or self.type not in SUPPORTED_TYPES: + # Skip the following data blocks. + offset += self._block(self.size) + tarfile.offset = offset + + # Patch the TarInfo object with saved global + # header information. + self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) + + return self + + def _proc_gnulong(self, tarfile): + """Process the blocks that hold a GNU longname + or longlink member. + """ + buf = tarfile.fileobj.read(self._block(self.size)) + + # Fetch the next header and process it. + try: + next = self.fromtarfile(tarfile) + except HeaderError: + raise SubsequentHeaderError("missing or bad subsequent header") + + # Patch the TarInfo object from the next header with + # the longname information. + next.offset = self.offset + if self.type == GNUTYPE_LONGNAME: + next.name = nts(buf, tarfile.encoding, tarfile.errors) + elif self.type == GNUTYPE_LONGLINK: + next.linkname = nts(buf, tarfile.encoding, tarfile.errors) + + return next + + def _proc_sparse(self, tarfile): + """Process a GNU sparse header plus extra headers. + """ + # We already collected some sparse structures in frombuf(). + structs, isextended, origsize = self._sparse_structs + del self._sparse_structs + + # Collect sparse structures from extended header blocks. + while isextended: + buf = tarfile.fileobj.read(BLOCKSIZE) + pos = 0 + for i in range(21): + try: + offset = nti(buf[pos:pos + 12]) + numbytes = nti(buf[pos + 12:pos + 24]) + except ValueError: + break + if offset and numbytes: + structs.append((offset, numbytes)) + pos += 24 + isextended = bool(buf[504]) + self.sparse = structs + + self.offset_data = tarfile.fileobj.tell() + tarfile.offset = self.offset_data + self._block(self.size) + self.size = origsize + return self + + def _proc_pax(self, tarfile): + """Process an extended or global header as described in + POSIX.1-2008. + """ + # Read the header information. + buf = tarfile.fileobj.read(self._block(self.size)) + + # A pax header stores supplemental information for either + # the following file (extended) or all following files + # (global). + if self.type == XGLTYPE: + pax_headers = tarfile.pax_headers + else: + pax_headers = tarfile.pax_headers.copy() + + # Check if the pax header contains a hdrcharset field. This tells us + # the encoding of the path, linkpath, uname and gname fields. Normally, + # these fields are UTF-8 encoded but since POSIX.1-2008 tar + # implementations are allowed to store them as raw binary strings if + # the translation to UTF-8 fails. + match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) + if match is not None: + pax_headers["hdrcharset"] = match.group(1).decode("utf8") + + # For the time being, we don't care about anything other than "BINARY". + # The only other value that is currently allowed by the standard is + # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. + hdrcharset = pax_headers.get("hdrcharset") + if hdrcharset == "BINARY": + encoding = tarfile.encoding + else: + encoding = "utf8" + + # Parse pax header information. A record looks like that: + # "%d %s=%s\n" % (length, keyword, value). length is the size + # of the complete record including the length field itself and + # the newline. keyword and value are both UTF-8 encoded strings. + regex = re.compile(br"(\d+) ([^=]+)=") + pos = 0 + while True: + match = regex.match(buf, pos) + if not match: + break + + length, keyword = match.groups() + length = int(length) + value = buf[match.end(2) + 1:match.start(1) + length - 1] + + # Normally, we could just use "utf8" as the encoding and "strict" + # as the error handler, but we better not take the risk. For + # example, GNU tar <= 1.23 is known to store filenames it cannot + # translate to UTF-8 as raw strings (unfortunately without a + # hdrcharset=BINARY header). + # We first try the strict standard encoding, and if that fails we + # fall back on the user's encoding and error handler. + keyword = self._decode_pax_field(keyword, "utf8", "utf8", + tarfile.errors) + if keyword in PAX_NAME_FIELDS: + value = self._decode_pax_field(value, encoding, tarfile.encoding, + tarfile.errors) + else: + value = self._decode_pax_field(value, "utf8", "utf8", + tarfile.errors) + + pax_headers[keyword] = value + pos += length + + # Fetch the next header. + try: + next = self.fromtarfile(tarfile) + except HeaderError: + raise SubsequentHeaderError("missing or bad subsequent header") + + # Process GNU sparse information. + if "GNU.sparse.map" in pax_headers: + # GNU extended sparse format version 0.1. + self._proc_gnusparse_01(next, pax_headers) + + elif "GNU.sparse.size" in pax_headers: + # GNU extended sparse format version 0.0. + self._proc_gnusparse_00(next, pax_headers, buf) + + elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": + # GNU extended sparse format version 1.0. + self._proc_gnusparse_10(next, pax_headers, tarfile) + + if self.type in (XHDTYPE, SOLARIS_XHDTYPE): + # Patch the TarInfo object with the extended header info. + next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) + next.offset = self.offset + + if "size" in pax_headers: + # If the extended header replaces the size field, + # we need to recalculate the offset where the next + # header starts. + offset = next.offset_data + if next.isreg() or next.type not in SUPPORTED_TYPES: + offset += next._block(next.size) + tarfile.offset = offset + + return next + + def _proc_gnusparse_00(self, next, pax_headers, buf): + """Process a GNU tar extended sparse header, version 0.0. + """ + offsets = [] + for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): + offsets.append(int(match.group(1))) + numbytes = [] + for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): + numbytes.append(int(match.group(1))) + next.sparse = list(zip(offsets, numbytes)) + + def _proc_gnusparse_01(self, next, pax_headers): + """Process a GNU tar extended sparse header, version 0.1. + """ + sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] + next.sparse = list(zip(sparse[::2], sparse[1::2])) + + def _proc_gnusparse_10(self, next, pax_headers, tarfile): + """Process a GNU tar extended sparse header, version 1.0. + """ + fields = None + sparse = [] + buf = tarfile.fileobj.read(BLOCKSIZE) + fields, buf = buf.split(b"\n", 1) + fields = int(fields) + while len(sparse) < fields * 2: + if b"\n" not in buf: + buf += tarfile.fileobj.read(BLOCKSIZE) + number, buf = buf.split(b"\n", 1) + sparse.append(int(number)) + next.offset_data = tarfile.fileobj.tell() + next.sparse = list(zip(sparse[::2], sparse[1::2])) + + def _apply_pax_info(self, pax_headers, encoding, errors): + """Replace fields with supplemental information from a previous + pax extended or global header. + """ + for keyword, value in pax_headers.items(): + if keyword == "GNU.sparse.name": + setattr(self, "path", value) + elif keyword == "GNU.sparse.size": + setattr(self, "size", int(value)) + elif keyword == "GNU.sparse.realsize": + setattr(self, "size", int(value)) + elif keyword in PAX_FIELDS: + if keyword in PAX_NUMBER_FIELDS: + try: + value = PAX_NUMBER_FIELDS[keyword](value) + except ValueError: + value = 0 + if keyword == "path": + value = value.rstrip("/") + setattr(self, keyword, value) + + self.pax_headers = pax_headers.copy() + + def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): + """Decode a single field from a pax record. + """ + try: + return value.decode(encoding, "strict") + except UnicodeDecodeError: + return value.decode(fallback_encoding, fallback_errors) + + def _block(self, count): + """Round up a byte count by BLOCKSIZE and return it, + e.g. _block(834) => 1024. + """ + blocks, remainder = divmod(count, BLOCKSIZE) + if remainder: + blocks += 1 + return blocks * BLOCKSIZE + + def isreg(self): + return self.type in REGULAR_TYPES + def isfile(self): + return self.isreg() + def isdir(self): + return self.type == DIRTYPE + def issym(self): + return self.type == SYMTYPE + def islnk(self): + return self.type == LNKTYPE + def ischr(self): + return self.type == CHRTYPE + def isblk(self): + return self.type == BLKTYPE + def isfifo(self): + return self.type == FIFOTYPE + def issparse(self): + return self.sparse is not None + def isdev(self): + return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) +# class TarInfo + +class TarFile(object): + """The TarFile Class provides an interface to tar archives. + """ + + debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) + + dereference = False # If true, add content of linked file to the + # tar file, else the link. + + ignore_zeros = False # If true, skips empty or invalid blocks and + # continues processing. + + errorlevel = 1 # If 0, fatal errors only appear in debug + # messages (if debug >= 0). If > 0, errors + # are passed to the caller as exceptions. + + format = DEFAULT_FORMAT # The format to use when creating an archive. + + encoding = ENCODING # Encoding for 8-bit character strings. + + errors = None # Error handler for unicode conversion. + + tarinfo = TarInfo # The default TarInfo class to use. + + fileobject = ExFileObject # The default ExFileObject class to use. + + def __init__(self, name=None, mode="r", fileobj=None, format=None, + tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, + errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None): + """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to + read from an existing archive, 'a' to append data to an existing + file or 'w' to create a new file overwriting an existing one. `mode' + defaults to 'r'. + If `fileobj' is given, it is used for reading or writing data. If it + can be determined, `mode' is overridden by `fileobj's mode. + `fileobj' is not closed, when TarFile is closed. + """ + if len(mode) > 1 or mode not in "raw": + raise ValueError("mode must be 'r', 'a' or 'w'") + self.mode = mode + self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] + + if not fileobj: + if self.mode == "a" and not os.path.exists(name): + # Create nonexistent files in append mode. + self.mode = "w" + self._mode = "wb" + fileobj = bltn_open(name, self._mode) + self._extfileobj = False + else: + if name is None and hasattr(fileobj, "name"): + name = fileobj.name + if hasattr(fileobj, "mode"): + self._mode = fileobj.mode + self._extfileobj = True + self.name = os.path.abspath(name) if name else None + self.fileobj = fileobj + + # Init attributes. + if format is not None: + self.format = format + if tarinfo is not None: + self.tarinfo = tarinfo + if dereference is not None: + self.dereference = dereference + if ignore_zeros is not None: + self.ignore_zeros = ignore_zeros + if encoding is not None: + self.encoding = encoding + self.errors = errors + + if pax_headers is not None and self.format == PAX_FORMAT: + self.pax_headers = pax_headers + else: + self.pax_headers = {} + + if debug is not None: + self.debug = debug + if errorlevel is not None: + self.errorlevel = errorlevel + + # Init datastructures. + self.closed = False + self.members = [] # list of members as TarInfo objects + self._loaded = False # flag if all members have been read + self.offset = self.fileobj.tell() + # current position in the archive file + self.inodes = {} # dictionary caching the inodes of + # archive members already added + + try: + if self.mode == "r": + self.firstmember = None + self.firstmember = self.next() + + if self.mode == "a": + # Move to the end of the archive, + # before the first empty block. + while True: + self.fileobj.seek(self.offset) + try: + tarinfo = self.tarinfo.fromtarfile(self) + self.members.append(tarinfo) + except EOFHeaderError: + self.fileobj.seek(self.offset) + break + except HeaderError as e: + raise ReadError(str(e)) + + if self.mode in "aw": + self._loaded = True + + if self.pax_headers: + buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) + self.fileobj.write(buf) + self.offset += len(buf) + except: + if not self._extfileobj: + self.fileobj.close() + self.closed = True + raise + + #-------------------------------------------------------------------------- + # Below are the classmethods which act as alternate constructors to the + # TarFile class. The open() method is the only one that is needed for + # public use; it is the "super"-constructor and is able to select an + # adequate "sub"-constructor for a particular compression using the mapping + # from OPEN_METH. + # + # This concept allows one to subclass TarFile without losing the comfort of + # the super-constructor. A sub-constructor is registered and made available + # by adding it to the mapping in OPEN_METH. + + @classmethod + def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): + """Open a tar archive for reading, writing or appending. Return + an appropriate TarFile class. + + mode: + 'r' or 'r:*' open for reading with transparent compression + 'r:' open for reading exclusively uncompressed + 'r:gz' open for reading with gzip compression + 'r:bz2' open for reading with bzip2 compression + 'a' or 'a:' open for appending, creating the file if necessary + 'w' or 'w:' open for writing without compression + 'w:gz' open for writing with gzip compression + 'w:bz2' open for writing with bzip2 compression + + 'r|*' open a stream of tar blocks with transparent compression + 'r|' open an uncompressed stream of tar blocks for reading + 'r|gz' open a gzip compressed stream of tar blocks + 'r|bz2' open a bzip2 compressed stream of tar blocks + 'w|' open an uncompressed stream for writing + 'w|gz' open a gzip compressed stream for writing + 'w|bz2' open a bzip2 compressed stream for writing + """ + + if not name and not fileobj: + raise ValueError("nothing to open") + + if mode in ("r", "r:*"): + # Find out which *open() is appropriate for opening the file. + for comptype in cls.OPEN_METH: + func = getattr(cls, cls.OPEN_METH[comptype]) + if fileobj is not None: + saved_pos = fileobj.tell() + try: + return func(name, "r", fileobj, **kwargs) + except (ReadError, CompressionError) as e: + if fileobj is not None: + fileobj.seek(saved_pos) + continue + raise ReadError("file could not be opened successfully") + + elif ":" in mode: + filemode, comptype = mode.split(":", 1) + filemode = filemode or "r" + comptype = comptype or "tar" + + # Select the *open() function according to + # given compression. + if comptype in cls.OPEN_METH: + func = getattr(cls, cls.OPEN_METH[comptype]) + else: + raise CompressionError("unknown compression type %r" % comptype) + return func(name, filemode, fileobj, **kwargs) + + elif "|" in mode: + filemode, comptype = mode.split("|", 1) + filemode = filemode or "r" + comptype = comptype or "tar" + + if filemode not in "rw": + raise ValueError("mode must be 'r' or 'w'") + + stream = _Stream(name, filemode, comptype, fileobj, bufsize) + try: + t = cls(name, filemode, stream, **kwargs) + except: + stream.close() + raise + t._extfileobj = False + return t + + elif mode in "aw": + return cls.taropen(name, mode, fileobj, **kwargs) + + raise ValueError("undiscernible mode") + + @classmethod + def taropen(cls, name, mode="r", fileobj=None, **kwargs): + """Open uncompressed tar archive name for reading or writing. + """ + if len(mode) > 1 or mode not in "raw": + raise ValueError("mode must be 'r', 'a' or 'w'") + return cls(name, mode, fileobj, **kwargs) + + @classmethod + def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): + """Open gzip compressed tar archive name for reading or writing. + Appending is not allowed. + """ + if len(mode) > 1 or mode not in "rw": + raise ValueError("mode must be 'r' or 'w'") + + try: + import gzip + gzip.GzipFile + except (ImportError, AttributeError): + raise CompressionError("gzip module is not available") + + extfileobj = fileobj is not None + try: + fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj) + t = cls.taropen(name, mode, fileobj, **kwargs) + except IOError: + if not extfileobj and fileobj is not None: + fileobj.close() + if fileobj is None: + raise + raise ReadError("not a gzip file") + except: + if not extfileobj and fileobj is not None: + fileobj.close() + raise + t._extfileobj = extfileobj + return t + + @classmethod + def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): + """Open bzip2 compressed tar archive name for reading or writing. + Appending is not allowed. + """ + if len(mode) > 1 or mode not in "rw": + raise ValueError("mode must be 'r' or 'w'.") + + try: + import bz2 + except ImportError: + raise CompressionError("bz2 module is not available") + + if fileobj is not None: + fileobj = _BZ2Proxy(fileobj, mode) + else: + fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) + + try: + t = cls.taropen(name, mode, fileobj, **kwargs) + except (IOError, EOFError): + fileobj.close() + raise ReadError("not a bzip2 file") + t._extfileobj = False + return t + + # All *open() methods are registered here. + OPEN_METH = { + "tar": "taropen", # uncompressed tar + "gz": "gzopen", # gzip compressed tar + "bz2": "bz2open" # bzip2 compressed tar + } + + #-------------------------------------------------------------------------- + # The public methods which TarFile provides: + + def close(self): + """Close the TarFile. In write-mode, two finishing zero blocks are + appended to the archive. + """ + if self.closed: + return + + if self.mode in "aw": + self.fileobj.write(NUL * (BLOCKSIZE * 2)) + self.offset += (BLOCKSIZE * 2) + # fill up the end with zero-blocks + # (like option -b20 for tar does) + blocks, remainder = divmod(self.offset, RECORDSIZE) + if remainder > 0: + self.fileobj.write(NUL * (RECORDSIZE - remainder)) + + if not self._extfileobj: + self.fileobj.close() + self.closed = True + + def getmember(self, name): + """Return a TarInfo object for member `name'. If `name' can not be + found in the archive, KeyError is raised. If a member occurs more + than once in the archive, its last occurrence is assumed to be the + most up-to-date version. + """ + tarinfo = self._getmember(name) + if tarinfo is None: + raise KeyError("filename %r not found" % name) + return tarinfo + + def getmembers(self): + """Return the members of the archive as a list of TarInfo objects. The + list has the same order as the members in the archive. + """ + self._check() + if not self._loaded: # if we want to obtain a list of + self._load() # all members, we first have to + # scan the whole archive. + return self.members + + def getnames(self): + """Return the members of the archive as a list of their names. It has + the same order as the list returned by getmembers(). + """ + return [tarinfo.name for tarinfo in self.getmembers()] + + def gettarinfo(self, name=None, arcname=None, fileobj=None): + """Create a TarInfo object for either the file `name' or the file + object `fileobj' (using os.fstat on its file descriptor). You can + modify some of the TarInfo's attributes before you add it using + addfile(). If given, `arcname' specifies an alternative name for the + file in the archive. + """ + self._check("aw") + + # When fileobj is given, replace name by + # fileobj's real name. + if fileobj is not None: + name = fileobj.name + + # Building the name of the member in the archive. + # Backward slashes are converted to forward slashes, + # Absolute paths are turned to relative paths. + if arcname is None: + arcname = name + drv, arcname = os.path.splitdrive(arcname) + arcname = arcname.replace(os.sep, "/") + arcname = arcname.lstrip("/") + + # Now, fill the TarInfo object with + # information specific for the file. + tarinfo = self.tarinfo() + tarinfo.tarfile = self + + # Use os.stat or os.lstat, depending on platform + # and if symlinks shall be resolved. + if fileobj is None: + if hasattr(os, "lstat") and not self.dereference: + statres = os.lstat(name) + else: + statres = os.stat(name) + else: + statres = os.fstat(fileobj.fileno()) + linkname = "" + + stmd = statres.st_mode + if stat.S_ISREG(stmd): + inode = (statres.st_ino, statres.st_dev) + if not self.dereference and statres.st_nlink > 1 and \ + inode in self.inodes and arcname != self.inodes[inode]: + # Is it a hardlink to an already + # archived file? + type = LNKTYPE + linkname = self.inodes[inode] + else: + # The inode is added only if its valid. + # For win32 it is always 0. + type = REGTYPE + if inode[0]: + self.inodes[inode] = arcname + elif stat.S_ISDIR(stmd): + type = DIRTYPE + elif stat.S_ISFIFO(stmd): + type = FIFOTYPE + elif stat.S_ISLNK(stmd): + type = SYMTYPE + linkname = os.readlink(name) + elif stat.S_ISCHR(stmd): + type = CHRTYPE + elif stat.S_ISBLK(stmd): + type = BLKTYPE + else: + return None + + # Fill the TarInfo object with all + # information we can get. + tarinfo.name = arcname + tarinfo.mode = stmd + tarinfo.uid = statres.st_uid + tarinfo.gid = statres.st_gid + if type == REGTYPE: + tarinfo.size = statres.st_size + else: + tarinfo.size = 0 + tarinfo.mtime = statres.st_mtime + tarinfo.type = type + tarinfo.linkname = linkname + if pwd: + try: + tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] + except KeyError: + pass + if grp: + try: + tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] + except KeyError: + pass + + if type in (CHRTYPE, BLKTYPE): + if hasattr(os, "major") and hasattr(os, "minor"): + tarinfo.devmajor = os.major(statres.st_rdev) + tarinfo.devminor = os.minor(statres.st_rdev) + return tarinfo + + def list(self, verbose=True): + """Print a table of contents to sys.stdout. If `verbose' is False, only + the names of the members are printed. If it is True, an `ls -l'-like + output is produced. + """ + self._check() + + for tarinfo in self: + if verbose: + print(filemode(tarinfo.mode), end=' ') + print("%s/%s" % (tarinfo.uname or tarinfo.uid, + tarinfo.gname or tarinfo.gid), end=' ') + if tarinfo.ischr() or tarinfo.isblk(): + print("%10s" % ("%d,%d" \ + % (tarinfo.devmajor, tarinfo.devminor)), end=' ') + else: + print("%10d" % tarinfo.size, end=' ') + print("%d-%02d-%02d %02d:%02d:%02d" \ + % time.localtime(tarinfo.mtime)[:6], end=' ') + + print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ') + + if verbose: + if tarinfo.issym(): + print("->", tarinfo.linkname, end=' ') + if tarinfo.islnk(): + print("link to", tarinfo.linkname, end=' ') + print() + + def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): + """Add the file `name' to the archive. `name' may be any type of file + (directory, fifo, symbolic link, etc.). If given, `arcname' + specifies an alternative name for the file in the archive. + Directories are added recursively by default. This can be avoided by + setting `recursive' to False. `exclude' is a function that should + return True for each filename to be excluded. `filter' is a function + that expects a TarInfo object argument and returns the changed + TarInfo object, if it returns None the TarInfo object will be + excluded from the archive. + """ + self._check("aw") + + if arcname is None: + arcname = name + + # Exclude pathnames. + if exclude is not None: + import warnings + warnings.warn("use the filter argument instead", + DeprecationWarning, 2) + if exclude(name): + self._dbg(2, "tarfile: Excluded %r" % name) + return + + # Skip if somebody tries to archive the archive... + if self.name is not None and os.path.abspath(name) == self.name: + self._dbg(2, "tarfile: Skipped %r" % name) + return + + self._dbg(1, name) + + # Create a TarInfo object from the file. + tarinfo = self.gettarinfo(name, arcname) + + if tarinfo is None: + self._dbg(1, "tarfile: Unsupported type %r" % name) + return + + # Change or exclude the TarInfo object. + if filter is not None: + tarinfo = filter(tarinfo) + if tarinfo is None: + self._dbg(2, "tarfile: Excluded %r" % name) + return + + # Append the tar header and data to the archive. + if tarinfo.isreg(): + f = bltn_open(name, "rb") + self.addfile(tarinfo, f) + f.close() + + elif tarinfo.isdir(): + self.addfile(tarinfo) + if recursive: + for f in os.listdir(name): + self.add(os.path.join(name, f), os.path.join(arcname, f), + recursive, exclude, filter=filter) + + else: + self.addfile(tarinfo) + + def addfile(self, tarinfo, fileobj=None): + """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is + given, tarinfo.size bytes are read from it and added to the archive. + You can create TarInfo objects using gettarinfo(). + On Windows platforms, `fileobj' should always be opened with mode + 'rb' to avoid irritation about the file size. + """ + self._check("aw") + + tarinfo = copy.copy(tarinfo) + + buf = tarinfo.tobuf(self.format, self.encoding, self.errors) + self.fileobj.write(buf) + self.offset += len(buf) + + # If there's data to follow, append it. + if fileobj is not None: + copyfileobj(fileobj, self.fileobj, tarinfo.size) + blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) + if remainder > 0: + self.fileobj.write(NUL * (BLOCKSIZE - remainder)) + blocks += 1 + self.offset += blocks * BLOCKSIZE + + self.members.append(tarinfo) + + def extractall(self, path=".", members=None): + """Extract all members from the archive to the current working + directory and set owner, modification time and permissions on + directories afterwards. `path' specifies a different directory + to extract to. `members' is optional and must be a subset of the + list returned by getmembers(). + """ + directories = [] + + if members is None: + members = self + + for tarinfo in members: + if tarinfo.isdir(): + # Extract directories with a safe mode. + directories.append(tarinfo) + tarinfo = copy.copy(tarinfo) + tarinfo.mode = 0o700 + # Do not set_attrs directories, as we will do that further down + self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) + + # Reverse sort directories. + directories.sort(key=lambda a: a.name) + directories.reverse() + + # Set correct owner, mtime and filemode on directories. + for tarinfo in directories: + dirpath = os.path.join(path, tarinfo.name) + try: + self.chown(tarinfo, dirpath) + self.utime(tarinfo, dirpath) + self.chmod(tarinfo, dirpath) + except ExtractError as e: + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + + def extract(self, member, path="", set_attrs=True): + """Extract a member from the archive to the current working directory, + using its full name. Its file information is extracted as accurately + as possible. `member' may be a filename or a TarInfo object. You can + specify a different directory using `path'. File attributes (owner, + mtime, mode) are set unless `set_attrs' is False. + """ + self._check("r") + + if isinstance(member, str): + tarinfo = self.getmember(member) + else: + tarinfo = member + + # Prepare the link target for makelink(). + if tarinfo.islnk(): + tarinfo._link_target = os.path.join(path, tarinfo.linkname) + + try: + self._extract_member(tarinfo, os.path.join(path, tarinfo.name), + set_attrs=set_attrs) + except EnvironmentError as e: + if self.errorlevel > 0: + raise + else: + if e.filename is None: + self._dbg(1, "tarfile: %s" % e.strerror) + else: + self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) + except ExtractError as e: + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + + def extractfile(self, member): + """Extract a member from the archive as a file object. `member' may be + a filename or a TarInfo object. If `member' is a regular file, a + file-like object is returned. If `member' is a link, a file-like + object is constructed from the link's target. If `member' is none of + the above, None is returned. + The file-like object is read-only and provides the following + methods: read(), readline(), readlines(), seek() and tell() + """ + self._check("r") + + if isinstance(member, str): + tarinfo = self.getmember(member) + else: + tarinfo = member + + if tarinfo.isreg(): + return self.fileobject(self, tarinfo) + + elif tarinfo.type not in SUPPORTED_TYPES: + # If a member's type is unknown, it is treated as a + # regular file. + return self.fileobject(self, tarinfo) + + elif tarinfo.islnk() or tarinfo.issym(): + if isinstance(self.fileobj, _Stream): + # A small but ugly workaround for the case that someone tries + # to extract a (sym)link as a file-object from a non-seekable + # stream of tar blocks. + raise StreamError("cannot extract (sym)link as file object") + else: + # A (sym)link's file object is its target's file object. + return self.extractfile(self._find_link_target(tarinfo)) + else: + # If there's no data associated with the member (directory, chrdev, + # blkdev, etc.), return None instead of a file object. + return None + + def _extract_member(self, tarinfo, targetpath, set_attrs=True): + """Extract the TarInfo object tarinfo to a physical + file called targetpath. + """ + # Fetch the TarInfo object for the given name + # and build the destination pathname, replacing + # forward slashes to platform specific separators. + targetpath = targetpath.rstrip("/") + targetpath = targetpath.replace("/", os.sep) + + # Create all upper directories. + upperdirs = os.path.dirname(targetpath) + if upperdirs and not os.path.exists(upperdirs): + # Create directories that are not part of the archive with + # default permissions. + os.makedirs(upperdirs) + + if tarinfo.islnk() or tarinfo.issym(): + self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) + else: + self._dbg(1, tarinfo.name) + + if tarinfo.isreg(): + self.makefile(tarinfo, targetpath) + elif tarinfo.isdir(): + self.makedir(tarinfo, targetpath) + elif tarinfo.isfifo(): + self.makefifo(tarinfo, targetpath) + elif tarinfo.ischr() or tarinfo.isblk(): + self.makedev(tarinfo, targetpath) + elif tarinfo.islnk() or tarinfo.issym(): + self.makelink(tarinfo, targetpath) + elif tarinfo.type not in SUPPORTED_TYPES: + self.makeunknown(tarinfo, targetpath) + else: + self.makefile(tarinfo, targetpath) + + if set_attrs: + self.chown(tarinfo, targetpath) + if not tarinfo.issym(): + self.chmod(tarinfo, targetpath) + self.utime(tarinfo, targetpath) + + #-------------------------------------------------------------------------- + # Below are the different file methods. They are called via + # _extract_member() when extract() is called. They can be replaced in a + # subclass to implement other functionality. + + def makedir(self, tarinfo, targetpath): + """Make a directory called targetpath. + """ + try: + # Use a safe mode for the directory, the real mode is set + # later in _extract_member(). + os.mkdir(targetpath, 0o700) + except EnvironmentError as e: + if e.errno != errno.EEXIST: + raise + + def makefile(self, tarinfo, targetpath): + """Make a file called targetpath. + """ + source = self.fileobj + source.seek(tarinfo.offset_data) + target = bltn_open(targetpath, "wb") + if tarinfo.sparse is not None: + for offset, size in tarinfo.sparse: + target.seek(offset) + copyfileobj(source, target, size) + else: + copyfileobj(source, target, tarinfo.size) + target.seek(tarinfo.size) + target.truncate() + target.close() + + def makeunknown(self, tarinfo, targetpath): + """Make a file from a TarInfo object with an unknown type + at targetpath. + """ + self.makefile(tarinfo, targetpath) + self._dbg(1, "tarfile: Unknown file type %r, " \ + "extracted as regular file." % tarinfo.type) + + def makefifo(self, tarinfo, targetpath): + """Make a fifo called targetpath. + """ + if hasattr(os, "mkfifo"): + os.mkfifo(targetpath) + else: + raise ExtractError("fifo not supported by system") + + def makedev(self, tarinfo, targetpath): + """Make a character or block device called targetpath. + """ + if not hasattr(os, "mknod") or not hasattr(os, "makedev"): + raise ExtractError("special devices not supported by system") + + mode = tarinfo.mode + if tarinfo.isblk(): + mode |= stat.S_IFBLK + else: + mode |= stat.S_IFCHR + + os.mknod(targetpath, mode, + os.makedev(tarinfo.devmajor, tarinfo.devminor)) + + def makelink(self, tarinfo, targetpath): + """Make a (symbolic) link called targetpath. If it cannot be created + (platform limitation), we try to make a copy of the referenced file + instead of a link. + """ + try: + # For systems that support symbolic and hard links. + if tarinfo.issym(): + os.symlink(tarinfo.linkname, targetpath) + else: + # See extract(). + if os.path.exists(tarinfo._link_target): + os.link(tarinfo._link_target, targetpath) + else: + self._extract_member(self._find_link_target(tarinfo), + targetpath) + except symlink_exception: + if tarinfo.issym(): + linkpath = os.path.join(os.path.dirname(tarinfo.name), + tarinfo.linkname) + else: + linkpath = tarinfo.linkname + else: + try: + self._extract_member(self._find_link_target(tarinfo), + targetpath) + except KeyError: + raise ExtractError("unable to resolve link inside archive") + + def chown(self, tarinfo, targetpath): + """Set owner of targetpath according to tarinfo. + """ + if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: + # We have to be root to do so. + try: + g = grp.getgrnam(tarinfo.gname)[2] + except KeyError: + g = tarinfo.gid + try: + u = pwd.getpwnam(tarinfo.uname)[2] + except KeyError: + u = tarinfo.uid + try: + if tarinfo.issym() and hasattr(os, "lchown"): + os.lchown(targetpath, u, g) + else: + if sys.platform != "os2emx": + os.chown(targetpath, u, g) + except EnvironmentError as e: + raise ExtractError("could not change owner") + + def chmod(self, tarinfo, targetpath): + """Set file permissions of targetpath according to tarinfo. + """ + if hasattr(os, 'chmod'): + try: + os.chmod(targetpath, tarinfo.mode) + except EnvironmentError as e: + raise ExtractError("could not change mode") + + def utime(self, tarinfo, targetpath): + """Set modification time of targetpath according to tarinfo. + """ + if not hasattr(os, 'utime'): + return + try: + os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) + except EnvironmentError as e: + raise ExtractError("could not change modification time") + + #-------------------------------------------------------------------------- + def next(self): + """Return the next member of the archive as a TarInfo object, when + TarFile is opened for reading. Return None if there is no more + available. + """ + self._check("ra") + if self.firstmember is not None: + m = self.firstmember + self.firstmember = None + return m + + # Read the next block. + self.fileobj.seek(self.offset) + tarinfo = None + while True: + try: + tarinfo = self.tarinfo.fromtarfile(self) + except EOFHeaderError as e: + if self.ignore_zeros: + self._dbg(2, "0x%X: %s" % (self.offset, e)) + self.offset += BLOCKSIZE + continue + except InvalidHeaderError as e: + if self.ignore_zeros: + self._dbg(2, "0x%X: %s" % (self.offset, e)) + self.offset += BLOCKSIZE + continue + elif self.offset == 0: + raise ReadError(str(e)) + except EmptyHeaderError: + if self.offset == 0: + raise ReadError("empty file") + except TruncatedHeaderError as e: + if self.offset == 0: + raise ReadError(str(e)) + except SubsequentHeaderError as e: + raise ReadError(str(e)) + break + + if tarinfo is not None: + self.members.append(tarinfo) + else: + self._loaded = True + + return tarinfo + + #-------------------------------------------------------------------------- + # Little helper methods: + + def _getmember(self, name, tarinfo=None, normalize=False): + """Find an archive member by name from bottom to top. + If tarinfo is given, it is used as the starting point. + """ + # Ensure that all members have been loaded. + members = self.getmembers() + + # Limit the member search list up to tarinfo. + if tarinfo is not None: + members = members[:members.index(tarinfo)] + + if normalize: + name = os.path.normpath(name) + + for member in reversed(members): + if normalize: + member_name = os.path.normpath(member.name) + else: + member_name = member.name + + if name == member_name: + return member + + def _load(self): + """Read through the entire archive file and look for readable + members. + """ + while True: + tarinfo = self.next() + if tarinfo is None: + break + self._loaded = True + + def _check(self, mode=None): + """Check if TarFile is still open, and if the operation's mode + corresponds to TarFile's mode. + """ + if self.closed: + raise IOError("%s is closed" % self.__class__.__name__) + if mode is not None and self.mode not in mode: + raise IOError("bad operation for mode %r" % self.mode) + + def _find_link_target(self, tarinfo): + """Find the target member of a symlink or hardlink member in the + archive. + """ + if tarinfo.issym(): + # Always search the entire archive. + linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname + limit = None + else: + # Search the archive before the link, because a hard link is + # just a reference to an already archived file. + linkname = tarinfo.linkname + limit = tarinfo + + member = self._getmember(linkname, tarinfo=limit, normalize=True) + if member is None: + raise KeyError("linkname %r not found" % linkname) + return member + + def __iter__(self): + """Provide an iterator object. + """ + if self._loaded: + return iter(self.members) + else: + return TarIter(self) + + def _dbg(self, level, msg): + """Write debugging output to sys.stderr. + """ + if level <= self.debug: + print(msg, file=sys.stderr) + + def __enter__(self): + self._check() + return self + + def __exit__(self, type, value, traceback): + if type is None: + self.close() + else: + # An exception occurred. We must not call close() because + # it would try to write end-of-archive blocks and padding. + if not self._extfileobj: + self.fileobj.close() + self.closed = True +# class TarFile + +class TarIter(object): + """Iterator Class. + + for tarinfo in TarFile(...): + suite... + """ + + def __init__(self, tarfile): + """Construct a TarIter object. + """ + self.tarfile = tarfile + self.index = 0 + def __iter__(self): + """Return iterator object. + """ + return self + + def __next__(self): + """Return the next item using TarFile's next() method. + When all members have been read, set TarFile as _loaded. + """ + # Fix for SF #1100429: Under rare circumstances it can + # happen that getmembers() is called during iteration, + # which will cause TarIter to stop prematurely. + if not self.tarfile._loaded: + tarinfo = self.tarfile.next() + if not tarinfo: + self.tarfile._loaded = True + raise StopIteration + else: + try: + tarinfo = self.tarfile.members[self.index] + except IndexError: + raise StopIteration + self.index += 1 + return tarinfo + + next = __next__ # for Python 2.x + +#-------------------- +# exported functions +#-------------------- +def is_tarfile(name): + """Return True if name points to a tar archive that we + are able to handle, else return False. + """ + try: + t = open(name) + t.close() + return True + except TarError: + return False + +bltn_open = open +open = TarFile.open diff --git a/pip/_vendor/distlib/compat.py b/pip/_vendor/distlib/compat.py new file mode 100644 index 000000000..f639a081b --- /dev/null +++ b/pip/_vendor/distlib/compat.py @@ -0,0 +1,1061 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2013 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +from __future__ import absolute_import + +import os +import re +import sys + +if sys.version_info[0] < 3: + from StringIO import StringIO + string_types = basestring, + text_type = unicode + from types import FileType as file_type + import __builtin__ as builtins + import ConfigParser as configparser + from ._backport import shutil + from urlparse import urlparse, urlunparse, urljoin, urlsplit, urlunsplit + from urllib import (urlretrieve, quote as _quote, unquote, url2pathname, + pathname2url, ContentTooShortError, splittype) + + def quote(s): + if isinstance(s, unicode): + s = s.encode('utf-8') + return _quote(s) + + import urllib2 + from urllib2 import (Request, urlopen, URLError, HTTPError, + HTTPBasicAuthHandler, HTTPPasswordMgr, + HTTPSHandler, HTTPHandler, HTTPRedirectHandler, + build_opener) + import httplib + import xmlrpclib + import Queue as queue + from HTMLParser import HTMLParser + import htmlentitydefs + raw_input = raw_input + from itertools import ifilter as filter + from itertools import ifilterfalse as filterfalse + + _userprog = None + def splituser(host): + """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" + global _userprog + if _userprog is None: + import re + _userprog = re.compile('^(.*)@(.*)$') + + match = _userprog.match(host) + if match: return match.group(1, 2) + return None, host + +else: + from io import StringIO + string_types = str, + text_type = str + from io import TextIOWrapper as file_type + import builtins + import configparser + import shutil + from urllib.parse import (urlparse, urlunparse, urljoin, splituser, quote, + unquote, urlsplit, urlunsplit, splittype) + from urllib.request import (urlopen, urlretrieve, Request, url2pathname, + pathname2url, + HTTPBasicAuthHandler, HTTPPasswordMgr, + HTTPSHandler, HTTPHandler, HTTPRedirectHandler, + build_opener) + from urllib.error import HTTPError, URLError, ContentTooShortError + import http.client as httplib + import urllib.request as urllib2 + import xmlrpc.client as xmlrpclib + import queue + from html.parser import HTMLParser + import html.entities as htmlentitydefs + raw_input = input + from itertools import filterfalse + filter = filter + +try: + from ssl import match_hostname, CertificateError +except ImportError: + class CertificateError(ValueError): + pass + + + def _dnsname_to_pat(dn): + pats = [] + for frag in dn.split(r'.'): + if frag == '*': + # When '*' is a fragment by itself, it matches a non-empty + # dotless fragment. + pats.append('[^.]+') + else: + # Otherwise, '*' matches any dotless fragment. + frag = re.escape(frag) + pats.append(frag.replace(r'\*', '[^.]*')) + return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + + + def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules + are mostly followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") + + +try: + from types import SimpleNamespace as Container +except ImportError: + class Container(object): + """ + A generic container for when multiple values need to be returned + """ + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + +try: + from shutil import which +except ImportError: + # Implementation from Python 3.3 + def which(cmd, mode=os.F_OK | os.X_OK, path=None): + """Given a command, mode, and a PATH string, return the path which + conforms to the given mode on the PATH, or None if there is no such + file. + + `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result + of os.environ.get("PATH"), or can be overridden with a custom search + path. + + """ + # Check that a given file can be accessed with the correct mode. + # Additionally check that `file` is not a directory, as on Windows + # directories pass the os.access check. + def _access_check(fn, mode): + return (os.path.exists(fn) and os.access(fn, mode) + and not os.path.isdir(fn)) + + # If we're given a path with a directory part, look it up directly rather + # than referring to PATH directories. This includes checking relative to the + # current directory, e.g. ./script + if os.path.dirname(cmd): + if _access_check(cmd, mode): + return cmd + return None + + if path is None: + path = os.environ.get("PATH", os.defpath) + if not path: + return None + path = path.split(os.pathsep) + + if sys.platform == "win32": + # The current directory takes precedence on Windows. + if not os.curdir in path: + path.insert(0, os.curdir) + + # PATHEXT is necessary to check on Windows. + pathext = os.environ.get("PATHEXT", "").split(os.pathsep) + # See if the given file matches any of the expected path extensions. + # This will allow us to short circuit when given "python.exe". + # If it does match, only test that one, otherwise we have to try + # others. + if any(cmd.lower().endswith(ext.lower()) for ext in pathext): + files = [cmd] + else: + files = [cmd + ext for ext in pathext] + else: + # On other platforms you don't have things like PATHEXT to tell you + # what file suffixes are executable, so just pass on cmd as-is. + files = [cmd] + + seen = set() + for dir in path: + normdir = os.path.normcase(dir) + if not normdir in seen: + seen.add(normdir) + for thefile in files: + name = os.path.join(dir, thefile) + if _access_check(name, mode): + return name + return None + + +# ZipFile is a context manager in 2.7, but not in 2.6 + +from zipfile import ZipFile as BaseZipFile + +if hasattr(BaseZipFile, '__enter__'): + ZipFile = BaseZipFile +else: + from zipfile import ZipExtFile as BaseZipExtFile + + class ZipExtFile(BaseZipExtFile): + def __init__(self, base): + self.__dict__.update(base.__dict__) + + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self.close() + # return None, so if an exception occurred, it will propagate + + class ZipFile(BaseZipFile): + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self.close() + # return None, so if an exception occurred, it will propagate + + def open(self, *args, **kwargs): + base = BaseZipFile.open(self, *args, **kwargs) + return ZipExtFile(base) + +try: + from platform import python_implementation +except ImportError: # pragma: no cover + def python_implementation(): + """Return a string identifying the Python implementation.""" + if 'PyPy' in sys.version: + return 'PyPy' + if os.name == 'java': + return 'Jython' + if sys.version.startswith('IronPython'): + return 'IronPython' + return 'CPython' + +try: + import sysconfig +except ImportError: # pragma: no cover + from ._backport import sysconfig + +try: + callable = callable +except NameError: # pragma: no cover + from collections import Callable + + def callable(obj): + return isinstance(obj, Callable) + + +try: + fsencode = os.fsencode + fsdecode = os.fsdecode +except AttributeError: # pragma: no cover + _fsencoding = sys.getfilesystemencoding() + if _fsencoding == 'mbcs': + _fserrors = 'strict' + else: + _fserrors = 'surrogateescape' + + def fsencode(filename): + if isinstance(filename, bytes): + return filename + elif isinstance(filename, text_type): + return filename.encode(_fsencoding, _fserrors) + else: + raise TypeError("expect bytes or str, not %s" % + type(filename).__name__) + + def fsdecode(filename): + if isinstance(filename, text_type): + return filename + elif isinstance(filename, bytes): + return filename.decode(_fsencoding, _fserrors) + else: + raise TypeError("expect bytes or str, not %s" % + type(filename).__name__) + +try: + from tokenize import detect_encoding +except ImportError: # pragma: no cover + from codecs import BOM_UTF8, lookup + import re + + cookie_re = re.compile("coding[:=]\s*([-\w.]+)") + + def _get_normal_name(orig_enc): + """Imitates get_normal_name in tokenizer.c.""" + # Only care about the first 12 characters. + enc = orig_enc[:12].lower().replace("_", "-") + if enc == "utf-8" or enc.startswith("utf-8-"): + return "utf-8" + if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ + enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): + return "iso-8859-1" + return orig_enc + + def detect_encoding(readline): + """ + The detect_encoding() function is used to detect the encoding that should + be used to decode a Python source file. It requires one argment, readline, + in the same way as the tokenize() generator. + + It will call readline a maximum of twice, and return the encoding used + (as a string) and a list of any lines (left as bytes) it has read in. + + It detects the encoding from the presence of a utf-8 bom or an encoding + cookie as specified in pep-0263. If both a bom and a cookie are present, + but disagree, a SyntaxError will be raised. If the encoding cookie is an + invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, + 'utf-8-sig' is returned. + + If no encoding is specified, then the default of 'utf-8' will be returned. + """ + try: + filename = readline.__self__.name + except AttributeError: + filename = None + bom_found = False + encoding = None + default = 'utf-8' + def read_or_stop(): + try: + return readline() + except StopIteration: + return b'' + + def find_cookie(line): + try: + # Decode as UTF-8. Either the line is an encoding declaration, + # in which case it should be pure ASCII, or it must be UTF-8 + # per default encoding. + line_string = line.decode('utf-8') + except UnicodeDecodeError: + msg = "invalid or missing encoding declaration" + if filename is not None: + msg = '{} for {!r}'.format(msg, filename) + raise SyntaxError(msg) + + matches = cookie_re.findall(line_string) + if not matches: + return None + encoding = _get_normal_name(matches[0]) + try: + codec = lookup(encoding) + except LookupError: + # This behaviour mimics the Python interpreter + if filename is None: + msg = "unknown encoding: " + encoding + else: + msg = "unknown encoding for {!r}: {}".format(filename, + encoding) + raise SyntaxError(msg) + + if bom_found: + if codec.name != 'utf-8': + # This behaviour mimics the Python interpreter + if filename is None: + msg = 'encoding problem: utf-8' + else: + msg = 'encoding problem for {!r}: utf-8'.format(filename) + raise SyntaxError(msg) + encoding += '-sig' + return encoding + + first = read_or_stop() + if first.startswith(BOM_UTF8): + bom_found = True + first = first[3:] + default = 'utf-8-sig' + if not first: + return default, [] + + encoding = find_cookie(first) + if encoding: + return encoding, [first] + + second = read_or_stop() + if not second: + return default, [first] + + encoding = find_cookie(second) + if encoding: + return encoding, [first, second] + + return default, [first, second] + +# For converting & <-> & etc. +try: + from html import escape +except ImportError: + from cgi import escape +unescape = HTMLParser().unescape + +try: + from collections import ChainMap +except ImportError: # pragma: no cover + from collections import MutableMapping + + try: + from reprlib import recursive_repr as _recursive_repr + except ImportError: + def _recursive_repr(fillvalue='...'): + ''' + Decorator to make a repr function return fillvalue for a recursive + call + ''' + + def decorating_function(user_function): + repr_running = set() + + def wrapper(self): + key = id(self), get_ident() + if key in repr_running: + return fillvalue + repr_running.add(key) + try: + result = user_function(self) + finally: + repr_running.discard(key) + return result + + # Can't use functools.wraps() here because of bootstrap issues + wrapper.__module__ = getattr(user_function, '__module__') + wrapper.__doc__ = getattr(user_function, '__doc__') + wrapper.__name__ = getattr(user_function, '__name__') + wrapper.__annotations__ = getattr(user_function, '__annotations__', {}) + return wrapper + + return decorating_function + + class ChainMap(MutableMapping): + ''' A ChainMap groups multiple dicts (or other mappings) together + to create a single, updateable view. + + The underlying mappings are stored in a list. That list is public and can + accessed or updated using the *maps* attribute. There is no other state. + + Lookups search the underlying mappings successively until a key is found. + In contrast, writes, updates, and deletions only operate on the first + mapping. + + ''' + + def __init__(self, *maps): + '''Initialize a ChainMap by setting *maps* to the given mappings. + If no mappings are provided, a single empty dictionary is used. + + ''' + self.maps = list(maps) or [{}] # always at least one map + + def __missing__(self, key): + raise KeyError(key) + + def __getitem__(self, key): + for mapping in self.maps: + try: + return mapping[key] # can't use 'key in mapping' with defaultdict + except KeyError: + pass + return self.__missing__(key) # support subclasses that define __missing__ + + def get(self, key, default=None): + return self[key] if key in self else default + + def __len__(self): + return len(set().union(*self.maps)) # reuses stored hash values if possible + + def __iter__(self): + return iter(set().union(*self.maps)) + + def __contains__(self, key): + return any(key in m for m in self.maps) + + def __bool__(self): + return any(self.maps) + + @_recursive_repr() + def __repr__(self): + return '{0.__class__.__name__}({1})'.format( + self, ', '.join(map(repr, self.maps))) + + @classmethod + def fromkeys(cls, iterable, *args): + 'Create a ChainMap with a single dict created from the iterable.' + return cls(dict.fromkeys(iterable, *args)) + + def copy(self): + 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' + return self.__class__(self.maps[0].copy(), *self.maps[1:]) + + __copy__ = copy + + def new_child(self): # like Django's Context.push() + 'New ChainMap with a new dict followed by all previous maps.' + return self.__class__({}, *self.maps) + + @property + def parents(self): # like Django's Context.pop() + 'New ChainMap from maps[1:].' + return self.__class__(*self.maps[1:]) + + def __setitem__(self, key, value): + self.maps[0][key] = value + + def __delitem__(self, key): + try: + del self.maps[0][key] + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def popitem(self): + 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' + try: + return self.maps[0].popitem() + except KeyError: + raise KeyError('No keys found in the first mapping.') + + def pop(self, key, *args): + 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' + try: + return self.maps[0].pop(key, *args) + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def clear(self): + 'Clear maps[0], leaving maps[1:] intact.' + self.maps[0].clear() + +try: + from imp import cache_from_source +except ImportError: # pragma: no cover + def cache_from_source(path, debug_override=None): + assert path.endswith('.py') + if debug_override is None: + debug_override = __debug__ + if debug_override: + suffix = 'c' + else: + suffix = 'o' + return path + suffix + +try: + from collections import OrderedDict +except ImportError: # pragma: no cover +## {{{ http://code.activestate.com/recipes/576693/ (r9) +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. + try: + from thread import get_ident as _get_ident + except ImportError: + from dummy_thread import get_ident as _get_ident + + try: + from _abcoll import KeysView, ValuesView, ItemsView + except ImportError: + pass + + + class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running=None): + 'od.__repr__() <==> repr(od)' + if not _repr_running: _repr_running = {} + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) + +try: + from logging.config import BaseConfigurator, valid_ident +except ImportError: # pragma: no cover + IDENTIFIER = re.compile('^[a-z_][a-z0-9_]*$', re.I) + + + def valid_ident(s): + m = IDENTIFIER.match(s) + if not m: + raise ValueError('Not a valid Python identifier: %r' % s) + return True + + + # The ConvertingXXX classes are wrappers around standard Python containers, + # and they serve to convert any suitable values in the container. The + # conversion converts base dicts, lists and tuples to their wrapped + # equivalents, whereas strings which match a conversion format are converted + # appropriately. + # + # Each wrapper should have a configurator attribute holding the actual + # configurator to use for conversion. + + class ConvertingDict(dict): + """A converting dictionary wrapper.""" + + def __getitem__(self, key): + value = dict.__getitem__(self, key) + result = self.configurator.convert(value) + #If the converted value is different, save for next time + if value is not result: + self[key] = result + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + result.key = key + return result + + def get(self, key, default=None): + value = dict.get(self, key, default) + result = self.configurator.convert(value) + #If the converted value is different, save for next time + if value is not result: + self[key] = result + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + result.key = key + return result + + def pop(self, key, default=None): + value = dict.pop(self, key, default) + result = self.configurator.convert(value) + if value is not result: + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + result.key = key + return result + + class ConvertingList(list): + """A converting list wrapper.""" + def __getitem__(self, key): + value = list.__getitem__(self, key) + result = self.configurator.convert(value) + #If the converted value is different, save for next time + if value is not result: + self[key] = result + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + result.key = key + return result + + def pop(self, idx=-1): + value = list.pop(self, idx) + result = self.configurator.convert(value) + if value is not result: + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + return result + + class ConvertingTuple(tuple): + """A converting tuple wrapper.""" + def __getitem__(self, key): + value = tuple.__getitem__(self, key) + result = self.configurator.convert(value) + if value is not result: + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + result.key = key + return result + + class BaseConfigurator(object): + """ + The configurator base class which defines some useful defaults. + """ + + CONVERT_PATTERN = re.compile(r'^(?P[a-z]+)://(?P.*)$') + + WORD_PATTERN = re.compile(r'^\s*(\w+)\s*') + DOT_PATTERN = re.compile(r'^\.\s*(\w+)\s*') + INDEX_PATTERN = re.compile(r'^\[\s*(\w+)\s*\]\s*') + DIGIT_PATTERN = re.compile(r'^\d+$') + + value_converters = { + 'ext' : 'ext_convert', + 'cfg' : 'cfg_convert', + } + + # We might want to use a different one, e.g. importlib + importer = staticmethod(__import__) + + def __init__(self, config): + self.config = ConvertingDict(config) + self.config.configurator = self + + def resolve(self, s): + """ + Resolve strings to objects using standard import and attribute + syntax. + """ + name = s.split('.') + used = name.pop(0) + try: + found = self.importer(used) + for frag in name: + used += '.' + frag + try: + found = getattr(found, frag) + except AttributeError: + self.importer(used) + found = getattr(found, frag) + return found + except ImportError: + e, tb = sys.exc_info()[1:] + v = ValueError('Cannot resolve %r: %s' % (s, e)) + v.__cause__, v.__traceback__ = e, tb + raise v + + def ext_convert(self, value): + """Default converter for the ext:// protocol.""" + return self.resolve(value) + + def cfg_convert(self, value): + """Default converter for the cfg:// protocol.""" + rest = value + m = self.WORD_PATTERN.match(rest) + if m is None: + raise ValueError("Unable to convert %r" % value) + else: + rest = rest[m.end():] + d = self.config[m.groups()[0]] + #print d, rest + while rest: + m = self.DOT_PATTERN.match(rest) + if m: + d = d[m.groups()[0]] + else: + m = self.INDEX_PATTERN.match(rest) + if m: + idx = m.groups()[0] + if not self.DIGIT_PATTERN.match(idx): + d = d[idx] + else: + try: + n = int(idx) # try as number first (most likely) + d = d[n] + except TypeError: + d = d[idx] + if m: + rest = rest[m.end():] + else: + raise ValueError('Unable to convert ' + '%r at %r' % (value, rest)) + #rest should be empty + return d + + def convert(self, value): + """ + Convert values to an appropriate type. dicts, lists and tuples are + replaced by their converting alternatives. Strings are checked to + see if they have a conversion format and are converted if they do. + """ + if not isinstance(value, ConvertingDict) and isinstance(value, dict): + value = ConvertingDict(value) + value.configurator = self + elif not isinstance(value, ConvertingList) and isinstance(value, list): + value = ConvertingList(value) + value.configurator = self + elif not isinstance(value, ConvertingTuple) and\ + isinstance(value, tuple): + value = ConvertingTuple(value) + value.configurator = self + elif isinstance(value, string_types): + m = self.CONVERT_PATTERN.match(value) + if m: + d = m.groupdict() + prefix = d['prefix'] + converter = self.value_converters.get(prefix, None) + if converter: + suffix = d['suffix'] + converter = getattr(self, converter) + value = converter(suffix) + return value + + def configure_custom(self, config): + """Configure an object with a user-supplied factory.""" + c = config.pop('()') + if not callable(c): + c = self.resolve(c) + props = config.pop('.', None) + # Check for valid identifiers + kwargs = dict([(k, config[k]) for k in config if valid_ident(k)]) + result = c(**kwargs) + if props: + for name, value in props.items(): + setattr(result, name, value) + return result + + def as_tuple(self, value): + """Utility function which converts lists to tuples.""" + if isinstance(value, list): + value = tuple(value) + return value diff --git a/pip/_vendor/distlib/database.py b/pip/_vendor/distlib/database.py new file mode 100644 index 000000000..0f013ec54 --- /dev/null +++ b/pip/_vendor/distlib/database.py @@ -0,0 +1,1301 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +"""PEP 376 implementation.""" + +from __future__ import unicode_literals + +import base64 +import codecs +import contextlib +import hashlib +import logging +import os +import posixpath +import sys +import zipimport + +from . import DistlibException, resources +from .compat import StringIO +from .version import get_scheme, UnsupportedVersionError +from .metadata import Metadata, METADATA_FILENAME +from .util import (parse_requirement, cached_property, parse_name_and_version, + read_exports, write_exports, CSVReader, CSVWriter) + + +__all__ = ['Distribution', 'BaseInstalledDistribution', + 'InstalledDistribution', 'EggInfoDistribution', + 'DistributionPath'] + + +logger = logging.getLogger(__name__) + +EXPORTS_FILENAME = 'pydist-exports.json' +COMMANDS_FILENAME = 'pydist-commands.json' + +DIST_FILES = ('INSTALLER', METADATA_FILENAME, 'RECORD', 'REQUESTED', + 'RESOURCES', EXPORTS_FILENAME, 'SHARED') + +DISTINFO_EXT = '.dist-info' + + +class _Cache(object): + """ + A simple cache mapping names and .dist-info paths to distributions + """ + def __init__(self): + """ + Initialise an instance. There is normally one for each DistributionPath. + """ + self.name = {} + self.path = {} + self.generated = False + + def clear(self): + """ + Clear the cache, setting it to its initial state. + """ + self.name.clear() + self.path.clear() + self.generated = False + + def add(self, dist): + """ + Add a distribution to the cache. + :param dist: The distribution to add. + """ + if dist.path not in self.path: + self.path[dist.path] = dist + self.name.setdefault(dist.key, []).append(dist) + + +class DistributionPath(object): + """ + Represents a set of distributions installed on a path (typically sys.path). + """ + def __init__(self, path=None, include_egg=False): + """ + Create an instance from a path, optionally including legacy (distutils/ + setuptools/distribute) distributions. + :param path: The path to use, as a list of directories. If not specified, + sys.path is used. + :param include_egg: If True, this instance will look for and return legacy + distributions as well as those based on PEP 376. + """ + if path is None: + path = sys.path + self.path = path + self._include_dist = True + self._include_egg = include_egg + + self._cache = _Cache() + self._cache_egg = _Cache() + self._cache_enabled = True + self._scheme = get_scheme('default') + + def _get_cache_enabled(self): + return self._cache_enabled + + def _set_cache_enabled(self, value): + self._cache_enabled = value + + cache_enabled = property(_get_cache_enabled, _set_cache_enabled) + + def clear_cache(self): + """ + Clears the internal cache. + """ + self._cache.clear() + self._cache_egg.clear() + + + def _yield_distributions(self): + """ + Yield .dist-info and/or .egg(-info) distributions. + """ + # We need to check if we've seen some resources already, because on + # some Linux systems (e.g. some Debian/Ubuntu variants) there are + # symlinks which alias other files in the environment. + seen = set() + for path in self.path: + finder = resources.finder_for_path(path) + if finder is None: + continue + r = finder.find('') + if not r or not r.is_container: + continue + rset = sorted(r.resources) + for entry in rset: + r = finder.find(entry) + if not r or r.path in seen: + continue + if self._include_dist and entry.endswith(DISTINFO_EXT): + metadata_path = posixpath.join(entry, METADATA_FILENAME) + pydist = finder.find(metadata_path) + if not pydist: + continue + + metadata = Metadata(fileobj=pydist.as_stream(), + scheme='legacy') + logger.debug('Found %s', r.path) + seen.add(r.path) + yield new_dist_class(r.path, metadata=metadata, + env=self) + elif self._include_egg and entry.endswith(('.egg-info', + '.egg')): + logger.debug('Found %s', r.path) + seen.add(r.path) + yield old_dist_class(r.path, self) + + def _generate_cache(self): + """ + Scan the path for distributions and populate the cache with + those that are found. + """ + gen_dist = not self._cache.generated + gen_egg = self._include_egg and not self._cache_egg.generated + if gen_dist or gen_egg: + for dist in self._yield_distributions(): + if isinstance(dist, InstalledDistribution): + self._cache.add(dist) + else: + self._cache_egg.add(dist) + + if gen_dist: + self._cache.generated = True + if gen_egg: + self._cache_egg.generated = True + + @classmethod + def distinfo_dirname(cls, name, version): + """ + The *name* and *version* parameters are converted into their + filename-escaped form, i.e. any ``'-'`` characters are replaced + with ``'_'`` other than the one in ``'dist-info'`` and the one + separating the name from the version number. + + :parameter name: is converted to a standard distribution name by replacing + any runs of non- alphanumeric characters with a single + ``'-'``. + :type name: string + :parameter version: is converted to a standard version string. Spaces + become dots, and all other non-alphanumeric characters + (except dots) become dashes, with runs of multiple + dashes condensed to a single dash. + :type version: string + :returns: directory name + :rtype: string""" + name = name.replace('-', '_') + return '-'.join([name, version]) + DISTINFO_EXT + + def get_distributions(self): + """ + Provides an iterator that looks for distributions and returns + :class:`InstalledDistribution` or + :class:`EggInfoDistribution` instances for each one of them. + + :rtype: iterator of :class:`InstalledDistribution` and + :class:`EggInfoDistribution` instances + """ + if not self._cache_enabled: + for dist in self._yield_distributions(): + yield dist + else: + self._generate_cache() + + for dist in self._cache.path.values(): + yield dist + + if self._include_egg: + for dist in self._cache_egg.path.values(): + yield dist + + def get_distribution(self, name): + """ + Looks for a named distribution on the path. + + This function only returns the first result found, as no more than one + value is expected. If nothing is found, ``None`` is returned. + + :rtype: :class:`InstalledDistribution`, :class:`EggInfoDistribution` + or ``None`` + """ + result = None + name = name.lower() + if not self._cache_enabled: + for dist in self._yield_distributions(): + if dist.key == name: + result = dist + break + else: + self._generate_cache() + + if name in self._cache.name: + result = self._cache.name[name][0] + elif self._include_egg and name in self._cache_egg.name: + result = self._cache_egg.name[name][0] + return result + + def provides_distribution(self, name, version=None): + """ + Iterates over all distributions to find which distributions provide *name*. + If a *version* is provided, it will be used to filter the results. + + This function only returns the first result found, since no more than + one values are expected. If the directory is not found, returns ``None``. + + :parameter version: a version specifier that indicates the version + required, conforming to the format in ``PEP-345`` + + :type name: string + :type version: string + """ + matcher = None + if not version is None: + try: + matcher = self._scheme.matcher('%s (%s)' % (name, version)) + except ValueError: + raise DistlibException('invalid name or version: %r, %r' % + (name, version)) + + for dist in self.get_distributions(): + provided = dist.provides + + for p in provided: + p_name, p_ver = parse_name_and_version(p) + if matcher is None: + if p_name == name: + yield dist + break + else: + if p_name == name and matcher.match(p_ver): + yield dist + break + + def get_file_path(self, name, relative_path): + """ + Return the path to a resource file. + """ + dist = self.get_distribution(name) + if dist is None: + raise LookupError('no distribution named %r found' % name) + return dist.get_resource_path(relative_path) + + def get_exported_entries(self, category, name=None): + """ + Return all of the exported entries in a particular category. + + :param category: The category to search for entries. + :param name: If specified, only entries with that name are returned. + """ + for dist in self.get_distributions(): + r = dist.exports + if category in r: + d = r[category] + if name is not None: + if name in d: + yield d[name] + else: + for v in d.values(): + yield v + + +class Distribution(object): + """ + A base class for distributions, whether installed or from indexes. + Either way, it must have some metadata, so that's all that's needed + for construction. + """ + + build_time_dependency = False + """ + Set to True if it's known to be only a build-time dependency (i.e. + not needed after installation). + """ + + requested = False + """A boolean that indicates whether the ``REQUESTED`` metadata file is + present (in other words, whether the package was installed by user + request or it was installed as a dependency).""" + + def __init__(self, metadata): + """ + Initialise an instance. + :param metadata: The instance of :class:`Metadata` describing this + distribution. + """ + self.metadata = metadata + self.name = metadata.name + self.key = self.name.lower() # for case-insensitive comparisons + self.version = metadata.version + self.locator = None + self.digest = None + self.extras = None # additional features requested + self.context = None # environment marker overrides + + @property + def source_url(self): + """ + The source archive download URL for this distribution. + """ + return self.metadata.source_url + + download_url = source_url # Backward compatibility + + @property + def name_and_version(self): + """ + A utility property which displays the name and version in parentheses. + """ + return '%s (%s)' % (self.name, self.version) + + @property + def provides(self): + """ + A set of distribution names and versions provided by this distribution. + :return: A set of "name (version)" strings. + """ + plist = self.metadata.provides + s = '%s (%s)' % (self.name, self.version) + if s not in plist: + plist.append(s) + return plist + + def _get_requirements(self, req_attr): + reqts = getattr(self.metadata, req_attr) + return set(self.metadata.get_requirements(reqts, extras=self.extras, + env=self.context)) + + @property + def run_requires(self): + return self._get_requirements('run_requires') + + @property + def meta_requires(self): + return self._get_requirements('meta_requires') + + @property + def build_requires(self): + return self._get_requirements('build_requires') + + @property + def test_requires(self): + return self._get_requirements('test_requires') + + @property + def dev_requires(self): + return self._get_requirements('dev_requires') + + def matches_requirement(self, req): + """ + Say if this instance matches (fulfills) a requirement. + :param req: The requirement to match. + :rtype req: str + :return: True if it matches, else False. + """ + # Requirement may contain extras - parse to lose those + # from what's passed to the matcher + r = parse_requirement(req) + scheme = get_scheme(self.metadata.scheme) + try: + matcher = scheme.matcher(r.requirement) + except UnsupportedVersionError: + # XXX compat-mode if cannot read the version + logger.warning('could not read version %r - using name only', + req) + name = req.split()[0] + matcher = scheme.matcher(name) + + name = matcher.key # case-insensitive + + result = False + for p in self.provides: + p_name, p_ver = parse_name_and_version(p) + if p_name != name: + continue + try: + result = matcher.match(p_ver) + break + except UnsupportedVersionError: + pass + return result + + def __repr__(self): + """ + Return a textual representation of this instance, + """ + if self.source_url: + suffix = ' [%s]' % self.source_url + else: + suffix = '' + return '' % (self.name, self.version, suffix) + + def __eq__(self, other): + """ + See if this distribution is the same as another. + :param other: The distribution to compare with. To be equal to one + another. distributions must have the same type, name, + version and source_url. + :return: True if it is the same, else False. + """ + if type(other) is not type(self): + result = False + else: + result = (self.name == other.name and + self.version == other.version and + self.source_url == other.source_url) + return result + + def __hash__(self): + """ + Compute hash in a way which matches the equality test. + """ + return hash(self.name) + hash(self.version) + hash(self.source_url) + + +class BaseInstalledDistribution(Distribution): + """ + This is the base class for installed distributions (whether PEP 376 or + legacy). + """ + + hasher = None + + def __init__(self, metadata, path, env=None): + """ + Initialise an instance. + :param metadata: An instance of :class:`Metadata` which describes the + distribution. This will normally have been initialised + from a metadata file in the ``path``. + :param path: The path of the ``.dist-info`` or ``.egg-info`` + directory for the distribution. + :param env: This is normally the :class:`DistributionPath` + instance where this distribution was found. + """ + super(BaseInstalledDistribution, self).__init__(metadata) + self.path = path + self.dist_path = env + + def get_hash(self, data, hasher=None): + """ + Get the hash of some data, using a particular hash algorithm, if + specified. + + :param data: The data to be hashed. + :type data: bytes + :param hasher: The name of a hash implementation, supported by hashlib, + or ``None``. Examples of valid values are ``'sha1'``, + ``'sha224'``, ``'sha384'``, '``sha256'``, ``'md5'`` and + ``'sha512'``. If no hasher is specified, the ``hasher`` + attribute of the :class:`InstalledDistribution` instance + is used. If the hasher is determined to be ``None``, MD5 + is used as the hashing algorithm. + :returns: The hash of the data. If a hasher was explicitly specified, + the returned hash will be prefixed with the specified hasher + followed by '='. + :rtype: str + """ + if hasher is None: + hasher = self.hasher + if hasher is None: + hasher = hashlib.md5 + prefix = '' + else: + hasher = getattr(hashlib, hasher) + prefix = '%s=' % self.hasher + digest = hasher(data).digest() + digest = base64.urlsafe_b64encode(digest).rstrip(b'=').decode('ascii') + return '%s%s' % (prefix, digest) + + +class InstalledDistribution(BaseInstalledDistribution): + """ + Created with the *path* of the ``.dist-info`` directory provided to the + constructor. It reads the metadata contained in ``pydist.json`` when it is + instantiated., or uses a passed in Metadata instance (useful for when + dry-run mode is being used). + """ + + hasher = 'sha256' + + def __init__(self, path, metadata=None, env=None): + self.finder = finder = resources.finder_for_path(path) + if finder is None: + import pdb; pdb.set_trace () + if env and env._cache_enabled and path in env._cache.path: + metadata = env._cache.path[path].metadata + elif metadata is None: + r = finder.find(METADATA_FILENAME) + # Temporary - for legacy support + if r is None: + r = finder.find('METADATA') + if r is None: + raise ValueError('no %s found in %s' % (METADATA_FILENAME, + path)) + with contextlib.closing(r.as_stream()) as stream: + metadata = Metadata(fileobj=stream, scheme='legacy') + + super(InstalledDistribution, self).__init__(metadata, path, env) + + if env and env._cache_enabled: + env._cache.add(self) + + try: + r = finder.find('REQUESTED') + except AttributeError: + import pdb; pdb.set_trace () + self.requested = r is not None + + def __repr__(self): + return '' % ( + self.name, self.version, self.path) + + def __str__(self): + return "%s %s" % (self.name, self.version) + + def _get_records(self): + """ + Get the list of installed files for the distribution + :return: A list of tuples of path, hash and size. Note that hash and + size might be ``None`` for some entries. The path is exactly + as stored in the file (which is as in PEP 376). + """ + results = [] + r = self.get_distinfo_resource('RECORD') + with contextlib.closing(r.as_stream()) as stream: + with CSVReader(stream=stream) as record_reader: + # Base location is parent dir of .dist-info dir + #base_location = os.path.dirname(self.path) + #base_location = os.path.abspath(base_location) + for row in record_reader: + missing = [None for i in range(len(row), 3)] + path, checksum, size = row + missing + #if not os.path.isabs(path): + # path = path.replace('/', os.sep) + # path = os.path.join(base_location, path) + results.append((path, checksum, size)) + return results + + @cached_property + def exports(self): + """ + Return the information exported by this distribution. + :return: A dictionary of exports, mapping an export category to a dict + of :class:`ExportEntry` instances describing the individual + export entries, and keyed by name. + """ + result = {} + r = self.get_distinfo_resource(EXPORTS_FILENAME) + if r: + result = self.read_exports() + return result + + def read_exports(self): + """ + Read exports data from a file in .ini format. + + :return: A dictionary of exports, mapping an export category to a list + of :class:`ExportEntry` instances describing the individual + export entries. + """ + result = {} + r = self.get_distinfo_resource(EXPORTS_FILENAME) + if r: + with contextlib.closing(r.as_stream()) as stream: + result = read_exports(stream) + return result + + def write_exports(self, exports): + """ + Write a dictionary of exports to a file in .ini format. + :param exports: A dictionary of exports, mapping an export category to + a list of :class:`ExportEntry` instances describing the + individual export entries. + """ + rf = self.get_distinfo_file(EXPORTS_FILENAME) + with open(rf, 'w') as f: + write_exports(exports, f) + + def get_resource_path(self, relative_path): + """ + NOTE: This API may change in the future. + + Return the absolute path to a resource file with the given relative + path. + + :param relative_path: The path, relative to .dist-info, of the resource + of interest. + :return: The absolute path where the resource is to be found. + """ + r = self.get_distinfo_resource('RESOURCES') + with contextlib.closing(r.as_stream()) as stream: + with CSVReader(stream=stream) as resources_reader: + for relative, destination in resources_reader: + if relative == relative_path: + return destination + raise KeyError('no resource file with relative path %r ' + 'is installed' % relative_path) + + def list_installed_files(self): + """ + Iterates over the ``RECORD`` entries and returns a tuple + ``(path, hash, size)`` for each line. + + :returns: iterator of (path, hash, size) + """ + for result in self._get_records(): + yield result + + def write_installed_files(self, paths, prefix, dry_run=False): + """ + Writes the ``RECORD`` file, using the ``paths`` iterable passed in. Any + existing ``RECORD`` file is silently overwritten. + + prefix is used to determine when to write absolute paths. + """ + prefix = os.path.join(prefix, '') + base = os.path.dirname(self.path) + base_under_prefix = base.startswith(prefix) + base = os.path.join(base, '') + record_path = self.get_distinfo_file('RECORD') + logger.info('creating %s', record_path) + if dry_run: + return None + with CSVWriter(record_path) as writer: + for path in paths: + if os.path.isdir(path) or path.endswith(('.pyc', '.pyo')): + # do not put size and hash, as in PEP-376 + hash_value = size = '' + else: + size = '%d' % os.path.getsize(path) + with open(path, 'rb') as fp: + hash_value = self.get_hash(fp.read()) + if path.startswith(base) or (base_under_prefix and + path.startswith(prefix)): + path = os.path.relpath(path, base) + writer.writerow((path, hash_value, size)) + + # add the RECORD file itself + if record_path.startswith(base): + record_path = os.path.relpath(record_path, base) + writer.writerow((record_path, '', '')) + return record_path + + def check_installed_files(self): + """ + Checks that the hashes and sizes of the files in ``RECORD`` are + matched by the files themselves. Returns a (possibly empty) list of + mismatches. Each entry in the mismatch list will be a tuple consisting + of the path, 'exists', 'size' or 'hash' according to what didn't match + (existence is checked first, then size, then hash), the expected + value and the actual value. + """ + mismatches = [] + base = os.path.dirname(self.path) + record_path = self.get_distinfo_file('RECORD') + for path, hash_value, size in self.list_installed_files(): + if not os.path.isabs(path): + path = os.path.join(base, path) + if path == record_path: + continue + if not os.path.exists(path): + mismatches.append((path, 'exists', True, False)) + elif os.path.isfile(path): + actual_size = str(os.path.getsize(path)) + if size and actual_size != size: + mismatches.append((path, 'size', size, actual_size)) + elif hash_value: + if '=' in hash_value: + hasher = hash_value.split('=', 1)[0] + else: + hasher = None + + with open(path, 'rb') as f: + actual_hash = self.get_hash(f.read(), hasher) + if actual_hash != hash_value: + mismatches.append((path, 'hash', hash_value, actual_hash)) + return mismatches + + @cached_property + def shared_locations(self): + """ + A dictionary of shared locations whose keys are in the set 'prefix', + 'purelib', 'platlib', 'scripts', 'headers', 'data' and 'namespace'. + The corresponding value is the absolute path of that category for + this distribution, and takes into account any paths selected by the + user at installation time (e.g. via command-line arguments). In the + case of the 'namespace' key, this would be a list of absolute paths + for the roots of namespace packages in this distribution. + + The first time this property is accessed, the relevant information is + read from the SHARED file in the .dist-info directory. + """ + result = {} + shared_path = os.path.join(self.path, 'SHARED') + if os.path.isfile(shared_path): + with codecs.open(shared_path, 'r', encoding='utf-8') as f: + lines = f.read().splitlines() + for line in lines: + key, value = line.split('=', 1) + if key == 'namespace': + result.setdefault(key, []).append(value) + else: + result[key] = value + return result + + def write_shared_locations(self, paths, dry_run=False): + """ + Write shared location information to the SHARED file in .dist-info. + :param paths: A dictionary as described in the documentation for + :meth:`shared_locations`. + :param dry_run: If True, the action is logged but no file is actually + written. + :return: The path of the file written to. + """ + shared_path = os.path.join(self.path, 'SHARED') + logger.info('creating %s', shared_path) + if dry_run: + return None + lines = [] + for key in ('prefix', 'lib', 'headers', 'scripts', 'data'): + path = paths[key] + if os.path.isdir(paths[key]): + lines.append('%s=%s' % (key, path)) + for ns in paths.get('namespace', ()): + lines.append('namespace=%s' % ns) + + with codecs.open(shared_path, 'w', encoding='utf-8') as f: + f.write('\n'.join(lines)) + return shared_path + + def get_distinfo_resource(self, path): + if path not in DIST_FILES: + raise DistlibException('invalid path for a dist-info file: ' + '%r at %r' % (path, self.path)) + finder = resources.finder_for_path(self.path) + if finder is None: + raise DistlibException('Unable to get a finder for %s' % self.path) + return finder.find(path) + + def get_distinfo_file(self, path): + """ + Returns a path located under the ``.dist-info`` directory. Returns a + string representing the path. + + :parameter path: a ``'/'``-separated path relative to the + ``.dist-info`` directory or an absolute path; + If *path* is an absolute path and doesn't start + with the ``.dist-info`` directory path, + a :class:`DistlibException` is raised + :type path: str + :rtype: str + """ + # Check if it is an absolute path # XXX use relpath, add tests + if path.find(os.sep) >= 0: + # it's an absolute path? + distinfo_dirname, path = path.split(os.sep)[-2:] + if distinfo_dirname != self.path.split(os.sep)[-1]: + raise DistlibException( + 'dist-info file %r does not belong to the %r %s ' + 'distribution' % (path, self.name, self.version)) + + # The file must be relative + if path not in DIST_FILES: + raise DistlibException('invalid path for a dist-info file: ' + '%r at %r' % (path, self.path)) + + return os.path.join(self.path, path) + + def list_distinfo_files(self): + """ + Iterates over the ``RECORD`` entries and returns paths for each line if + the path is pointing to a file located in the ``.dist-info`` directory + or one of its subdirectories. + + :returns: iterator of paths + """ + base = os.path.dirname(self.path) + for path, checksum, size in self._get_records(): + # XXX add separator or use real relpath algo + if not os.path.isabs(path): + path = os.path.join(base, path) + if path.startswith(self.path): + yield path + + def __eq__(self, other): + return (isinstance(other, InstalledDistribution) and + self.path == other.path) + + # See http://docs.python.org/reference/datamodel#object.__hash__ + __hash__ = object.__hash__ + + +class EggInfoDistribution(BaseInstalledDistribution): + """Created with the *path* of the ``.egg-info`` directory or file provided + to the constructor. It reads the metadata contained in the file itself, or + if the given path happens to be a directory, the metadata is read from the + file ``PKG-INFO`` under that directory.""" + + requested = True # as we have no way of knowing, assume it was + shared_locations = {} + + def __init__(self, path, env=None): + def set_name_and_version(s, n, v): + s.name = n + s.key = n.lower() # for case-insensitive comparisons + s.version = v + + self.path = path + self.dist_path = env + if env and env._cache_enabled and path in env._cache_egg.path: + metadata = env._cache_egg.path[path].metadata + set_name_and_version(self, metadata.name, metadata.version) + else: + metadata = self._get_metadata(path) + + # Need to be set before caching + set_name_and_version(self, metadata.name, metadata.version) + + if env and env._cache_enabled: + env._cache_egg.add(self) + super(EggInfoDistribution, self).__init__(metadata, path, env) + + def _get_metadata(self, path): + requires = None + + def parse_requires_data(data): + """Create a list of dependencies from a requires.txt file. + + *data*: the contents of a setuptools-produced requires.txt file. + """ + reqs = [] + lines = data.splitlines() + for line in lines: + line = line.strip() + if line.startswith('['): + logger.warning('Unexpected line: quitting requirement scan: %r', + line) + break + r = parse_requirement(line) + if not r: + logger.warning('Not recognised as a requirement: %r', line) + continue + if r.extras: + logger.warning('extra requirements in requires.txt are ' + 'not supported') + if not r.constraints: + reqs.append(r.name) + else: + cons = ', '.join('%s%s' % c for c in r.constraints) + reqs.append('%s (%s)' % (r.name, cons)) + return reqs + + def parse_requires_path(req_path): + """Create a list of dependencies from a requires.txt file. + + *req_path*: the path to a setuptools-produced requires.txt file. + """ + + reqs = [] + try: + with codecs.open(req_path, 'r', 'utf-8') as fp: + reqs = parse_requires_data(fp.read()) + except IOError: + pass + return reqs + + if path.endswith('.egg'): + if os.path.isdir(path): + meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO') + metadata = Metadata(path=meta_path, scheme='legacy') + req_path = os.path.join(path, 'EGG-INFO', 'requires.txt') + requires = parse_requires_path(req_path) + else: + # FIXME handle the case where zipfile is not available + zipf = zipimport.zipimporter(path) + fileobj = StringIO( + zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8')) + metadata = Metadata(fileobj=fileobj, scheme='legacy') + try: + data = zipf.get_data('EGG-INFO/requires.txt') + requires = parse_requires_data(data.decode('utf-8')) + except IOError: + requires = None + elif path.endswith('.egg-info'): + if os.path.isdir(path): + path = os.path.join(path, 'PKG-INFO') + req_path = os.path.join(path, 'requires.txt') + requires = parse_requires_path(req_path) + metadata = Metadata(path=path, scheme='legacy') + else: + raise DistlibException('path must end with .egg-info or .egg, ' + 'got %r' % path) + + if requires: + metadata.add_requirements(requires) + return metadata + + def __repr__(self): + return '' % ( + self.name, self.version, self.path) + + def __str__(self): + return "%s %s" % (self.name, self.version) + + def check_installed_files(self): + """ + Checks that the hashes and sizes of the files in ``RECORD`` are + matched by the files themselves. Returns a (possibly empty) list of + mismatches. Each entry in the mismatch list will be a tuple consisting + of the path, 'exists', 'size' or 'hash' according to what didn't match + (existence is checked first, then size, then hash), the expected + value and the actual value. + """ + mismatches = [] + record_path = os.path.join(self.path, 'installed-files.txt') + if os.path.exists(record_path): + for path, _, _ in self.list_installed_files(): + if path == record_path: + continue + if not os.path.exists(path): + mismatches.append((path, 'exists', True, False)) + return mismatches + + def list_installed_files(self): + """ + Iterates over the ``installed-files.txt`` entries and returns a tuple + ``(path, hash, size)`` for each line. + + :returns: a list of (path, hash, size) + """ + + def _md5(path): + f = open(path, 'rb') + try: + content = f.read() + finally: + f.close() + return hashlib.md5(content).hexdigest() + + def _size(path): + return os.stat(path).st_size + + record_path = os.path.join(self.path, 'installed-files.txt') + result = [] + if os.path.exists(record_path): + with codecs.open(record_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + p = os.path.normpath(os.path.join(self.path, line)) + # "./" is present as a marker between installed files + # and installation metadata files + if not os.path.exists(p): + logger.warning('Non-existent file: %s', p) + if p.endswith(('.pyc', '.pyo')): + continue + #otherwise fall through and fail + if not os.path.isdir(p): + result.append((p, _md5(p), _size(p))) + result.append((record_path, None, None)) + return result + + def list_distinfo_files(self, absolute=False): + """ + Iterates over the ``installed-files.txt`` entries and returns paths for + each line if the path is pointing to a file located in the + ``.egg-info`` directory or one of its subdirectories. + + :parameter absolute: If *absolute* is ``True``, each returned path is + transformed into a local absolute path. Otherwise the + raw value from ``installed-files.txt`` is returned. + :type absolute: boolean + :returns: iterator of paths + """ + record_path = os.path.join(self.path, 'installed-files.txt') + skip = True + with codecs.open(record_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + if line == './': + skip = False + continue + if not skip: + p = os.path.normpath(os.path.join(self.path, line)) + if p.startswith(self.path): + if absolute: + yield p + else: + yield line + + def __eq__(self, other): + return (isinstance(other, EggInfoDistribution) and + self.path == other.path) + + # See http://docs.python.org/reference/datamodel#object.__hash__ + __hash__ = object.__hash__ + +new_dist_class = InstalledDistribution +old_dist_class = EggInfoDistribution + + +class DependencyGraph(object): + """ + Represents a dependency graph between distributions. + + The dependency relationships are stored in an ``adjacency_list`` that maps + distributions to a list of ``(other, label)`` tuples where ``other`` + is a distribution and the edge is labeled with ``label`` (i.e. the version + specifier, if such was provided). Also, for more efficient traversal, for + every distribution ``x``, a list of predecessors is kept in + ``reverse_list[x]``. An edge from distribution ``a`` to + distribution ``b`` means that ``a`` depends on ``b``. If any missing + dependencies are found, they are stored in ``missing``, which is a + dictionary that maps distributions to a list of requirements that were not + provided by any other distributions. + """ + + def __init__(self): + self.adjacency_list = {} + self.reverse_list = {} + self.missing = {} + + def add_distribution(self, distribution): + """Add the *distribution* to the graph. + + :type distribution: :class:`distutils2.database.InstalledDistribution` + or :class:`distutils2.database.EggInfoDistribution` + """ + self.adjacency_list[distribution] = [] + self.reverse_list[distribution] = [] + #self.missing[distribution] = [] + + def add_edge(self, x, y, label=None): + """Add an edge from distribution *x* to distribution *y* with the given + *label*. + + :type x: :class:`distutils2.database.InstalledDistribution` or + :class:`distutils2.database.EggInfoDistribution` + :type y: :class:`distutils2.database.InstalledDistribution` or + :class:`distutils2.database.EggInfoDistribution` + :type label: ``str`` or ``None`` + """ + self.adjacency_list[x].append((y, label)) + # multiple edges are allowed, so be careful + if x not in self.reverse_list[y]: + self.reverse_list[y].append(x) + + def add_missing(self, distribution, requirement): + """ + Add a missing *requirement* for the given *distribution*. + + :type distribution: :class:`distutils2.database.InstalledDistribution` + or :class:`distutils2.database.EggInfoDistribution` + :type requirement: ``str`` + """ + logger.debug('%s missing %r', distribution, requirement) + self.missing.setdefault(distribution, []).append(requirement) + + def _repr_dist(self, dist): + return '%s %s' % (dist.name, dist.version) + + def repr_node(self, dist, level=1): + """Prints only a subgraph""" + output = [self._repr_dist(dist)] + for other, label in self.adjacency_list[dist]: + dist = self._repr_dist(other) + if label is not None: + dist = '%s [%s]' % (dist, label) + output.append(' ' * level + str(dist)) + suboutput = self.repr_node(other, level + 1) + subs = suboutput.split('\n') + output.extend(subs[1:]) + return '\n'.join(output) + + def to_dot(self, f, skip_disconnected=True): + """Writes a DOT output for the graph to the provided file *f*. + + If *skip_disconnected* is set to ``True``, then all distributions + that are not dependent on any other distribution are skipped. + + :type f: has to support ``file``-like operations + :type skip_disconnected: ``bool`` + """ + disconnected = [] + + f.write("digraph dependencies {\n") + for dist, adjs in self.adjacency_list.items(): + if len(adjs) == 0 and not skip_disconnected: + disconnected.append(dist) + for other, label in adjs: + if not label is None: + f.write('"%s" -> "%s" [label="%s"]\n' % + (dist.name, other.name, label)) + else: + f.write('"%s" -> "%s"\n' % (dist.name, other.name)) + if not skip_disconnected and len(disconnected) > 0: + f.write('subgraph disconnected {\n') + f.write('label = "Disconnected"\n') + f.write('bgcolor = red\n') + + for dist in disconnected: + f.write('"%s"' % dist.name) + f.write('\n') + f.write('}\n') + f.write('}\n') + + def topological_sort(self): + """ + Perform a topological sort of the graph. + :return: A tuple, the first element of which is a topologically sorted + list of distributions, and the second element of which is a + list of distributions that cannot be sorted because they have + circular dependencies and so form a cycle. + """ + result = [] + # Make a shallow copy of the adjacency list + alist = {} + for k, v in self.adjacency_list.items(): + alist[k] = v[:] + while True: + # See what we can remove in this run + to_remove = [] + for k, v in list(alist.items())[:]: + if not v: + to_remove.append(k) + del alist[k] + if not to_remove: + # What's left in alist (if anything) is a cycle. + break + # Remove from the adjacency list of others + for k, v in alist.items(): + alist[k] = [(d, r) for d, r in v if d not in to_remove] + logger.debug('Moving to result: %s', + ['%s (%s)' % (d.name, d.version) for d in to_remove]) + result.extend(to_remove) + return result, list(alist.keys()) + + def __repr__(self): + """Representation of the graph""" + output = [] + for dist, adjs in self.adjacency_list.items(): + output.append(self.repr_node(dist)) + return '\n'.join(output) + + +def make_graph(dists, scheme='default'): + """Makes a dependency graph from the given distributions. + + :parameter dists: a list of distributions + :type dists: list of :class:`distutils2.database.InstalledDistribution` and + :class:`distutils2.database.EggInfoDistribution` instances + :rtype: a :class:`DependencyGraph` instance + """ + scheme = get_scheme(scheme) + graph = DependencyGraph() + provided = {} # maps names to lists of (version, dist) tuples + + # first, build the graph and find out what's provided + for dist in dists: + graph.add_distribution(dist) + + for p in dist.provides: + name, version = parse_name_and_version(p) + logger.debug('Add to provided: %s, %s, %s', name, version, dist) + provided.setdefault(name, []).append((version, dist)) + + # now make the edges + for dist in dists: + requires = (dist.run_requires | dist.meta_requires | + dist.build_requires | dist.dev_requires) + for req in requires: + try: + matcher = scheme.matcher(req) + except UnsupportedVersionError: + # XXX compat-mode if cannot read the version + logger.warning('could not read version %r - using name only', + req) + name = req.split()[0] + matcher = scheme.matcher(name) + + name = matcher.key # case-insensitive + + matched = False + if name in provided: + for version, provider in provided[name]: + try: + match = matcher.match(version) + except UnsupportedVersionError: + match = False + + if match: + graph.add_edge(dist, provider, req) + matched = True + break + if not matched: + graph.add_missing(dist, req) + return graph + + +def get_dependent_dists(dists, dist): + """Recursively generate a list of distributions from *dists* that are + dependent on *dist*. + + :param dists: a list of distributions + :param dist: a distribution, member of *dists* for which we are interested + """ + if dist not in dists: + raise DistlibException('given distribution %r is not a member ' + 'of the list' % dist.name) + graph = make_graph(dists) + + dep = [dist] # dependent distributions + todo = graph.reverse_list[dist] # list of nodes we should inspect + + while todo: + d = todo.pop() + dep.append(d) + for succ in graph.reverse_list[d]: + if succ not in dep: + todo.append(succ) + + dep.pop(0) # remove dist from dep, was there to prevent infinite loops + return dep + + +def get_required_dists(dists, dist): + """Recursively generate a list of distributions from *dists* that are + required by *dist*. + + :param dists: a list of distributions + :param dist: a distribution, member of *dists* for which we are interested + """ + if dist not in dists: + raise DistlibException('given distribution %r is not a member ' + 'of the list' % dist.name) + graph = make_graph(dists) + + req = [] # required distributions + todo = graph.adjacency_list[dist] # list of nodes we should inspect + + while todo: + d = todo.pop()[0] + req.append(d) + for pred in graph.adjacency_list[d]: + if pred not in req: + todo.append(pred) + + return req + + +def make_dist(name, version, **kwargs): + """ + A convenience method for making a dist given just a name and version. + """ + summary = kwargs.pop('summary', 'Placeholder for summary') + md = Metadata(**kwargs) + md.name = name + md.version = version + md.summary = summary or 'Plaeholder for summary' + return Distribution(md) diff --git a/pip/_vendor/distlib/index.py b/pip/_vendor/distlib/index.py new file mode 100644 index 000000000..fac88eb1a --- /dev/null +++ b/pip/_vendor/distlib/index.py @@ -0,0 +1,478 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2013 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +import hashlib +import logging +import os +import shutil +import subprocess +import tempfile +from threading import Thread + +from distlib import DistlibException +from distlib.compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr, + urlparse, build_opener) +from distlib.util import cached_property, zip_dir + +logger = logging.getLogger(__name__) + +DEFAULT_INDEX = 'http://pypi.python.org/pypi' +DEFAULT_REALM = 'pypi' + +class PackageIndex(object): + """ + This class represents a package index compatible with PyPI, the Python + Package Index. + """ + + boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$' + + def __init__(self, url=None): + """ + Initialise an instance. + + :param url: The URL of the index. If not specified, the URL for PyPI is + used. + """ + self.url = url or DEFAULT_INDEX + self.read_configuration() + scheme, netloc, path, params, query, frag = urlparse(self.url) + if params or query or frag or scheme not in ('http', 'https'): + raise DistlibException('invalid repository: %s' % self.url) + self.password_handler = None + self.ssl_verifier = None + self.gpg = None + self.gpg_home = None + with open(os.devnull, 'w') as sink: + for s in ('gpg2', 'gpg'): + try: + rc = subprocess.check_call([s, '--version'], stdout=sink, + stderr=sink) + if rc == 0: + self.gpg = s + break + except OSError: + pass + + def _get_pypirc_command(self): + """ + Get the distutils command for interacting with PyPI configurations. + :return: the command. + """ + from distutils.core import Distribution + from distutils.config import PyPIRCCommand + d = Distribution() + return PyPIRCCommand(d) + + def read_configuration(self): + """ + Read the PyPI access configuration as supported by distutils, getting + PyPI to do the acutal work. This populates ``username``, ``password``, + ``realm`` and ``url`` attributes from the configuration. + """ + # get distutils to do the work + c = self._get_pypirc_command() + c.repository = self.url + cfg = c._read_pypirc() + self.username = cfg.get('username') + self.password = cfg.get('password') + self.realm = cfg.get('realm', 'pypi') + self.url = cfg.get('repository', self.url) + + def save_configuration(self): + """ + Save the PyPI access configuration. You must have set ``username`` and + ``password`` attributes before calling this method. + + Again, distutils is used to do the actual work. + """ + self.check_credentials() + # get distutils to do the work + c = self._get_pypirc_command() + c._store_pypirc(self.username, self.password) + + def check_credentials(self): + """ + Check that ``username`` and ``password`` have been set, and raise an + exception if not. + """ + if self.username is None or self.password is None: + raise DistlibException('username and password must be set') + pm = HTTPPasswordMgr() + _, netloc, _, _, _, _ = urlparse(self.url) + pm.add_password(self.realm, netloc, self.username, self.password) + self.password_handler = HTTPBasicAuthHandler(pm) + + def register(self, metadata): + """ + Register a distribution on PyPI, using the provided metadata. + + :param metadata: A :class:`Metadata` instance defining at least a name + and version number for the distribution to be + registered. + :return: The HTTP response received from PyPI upon submission of the + request. + """ + self.check_credentials() + metadata.validate() + d = metadata.todict() + d[':action'] = 'verify' + request = self.encode_request(d.items(), []) + response = self.send_request(request) + d[':action'] = 'submit' + request = self.encode_request(d.items(), []) + return self.send_request(request) + + def _reader(self, name, stream, outbuf): + """ + Thread runner for reading lines of from a subprocess into a buffer. + + :param name: The logical name of the stream (used for logging only). + :param stream: The stream to read from. This will typically a pipe + connected to the output stream of a subprocess. + :param outbuf: The list to append the read lines to. + """ + while True: + s = stream.readline() + if not s: + break + s = s.decode('utf-8').rstrip() + outbuf.append(s) + logger.debug('%s: %s' % (name, s)) + stream.close() + + def get_sign_command(self, filename, signer, sign_password): + """ + Return a suitable command for signing a file. + + :param filename: The pathname to the file to be signed. + :param signer: The identifier of the signer of the file. + :param sign_password: The passphrase for the signer's + private key used for signing. + :return: The signing command as a list suitable to be + passed to :class:`subprocess.Popen`. + """ + cmd = [self.gpg, '--status-fd', '2', '--no-tty'] + if self.gpg_home: + cmd.extend(['--homedir', self.gpg_home]) + if sign_password is not None: + cmd.extend(['--batch', '--passphrase-fd', '0']) + td = tempfile.mkdtemp() + sf = os.path.join(td, os.path.basename(filename) + '.asc') + cmd.extend(['--detach-sign', '--armor', '--local-user', + signer, '--output', sf, filename]) + logger.debug('invoking: %s', ' '.join(cmd)) + return cmd, sf + + def run_command(self, cmd, input_data=None): + """ + Run a command in a child process , passing it any input data specified. + + :param cmd: The command to run. + :param input_data: If specified, this must be a byte string containing + data to be sent to the child process. + :return: A tuple consisting of the subprocess' exit code, a list of + lines read from the subprocess' ``stdout``, and a list of + lines read from the subprocess' ``stderr``. + """ + kwargs = { + 'stdout': subprocess.PIPE, + 'stderr': subprocess.PIPE, + } + if input_data is not None: + kwargs['stdin'] = subprocess.PIPE + stdout = [] + stderr = [] + p = subprocess.Popen(cmd, **kwargs) + # We don't use communicate() here because we may need to + # get clever with interacting with the command + t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout)) + t1.start() + t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr)) + t2.start() + if input_data is not None: + p.stdin.write(input_data) + p.stdin.close() + + p.wait() + t1.join() + t2.join() + return p.returncode, stdout, stderr + + def sign_file(self, filename, signer, sign_password): + """ + Sign a file. + + :param filename: The pathname to the file to be signed. + :param signer: The identifier of the signer of the file. + :param sign_password: The passphrase for the signer's + private key used for signing. + :return: The absolute pathname of the file where the signature is + stored. + """ + cmd, sig_file = self.get_sign_command(filename, signer, sign_password) + rc, stdout, stderr = self.run_command(cmd, + sign_password.encode('utf-8')) + if rc != 0: + raise DistlibException('sign command failed with error ' + 'code %s' % rc) + return sig_file + + def upload_file(self, metadata, filename, signer=None, sign_password=None, + filetype='sdist', pyversion='source'): + """ + Upload a release file to the index. + + :param metadata: A :class:`Metadata` instance defining at least a name + and version number for the file to be uploaded. + :param filename: The pathname of the file to be uploaded. + :param signer: The identifier of the signer of the file. + :param sign_password: The passphrase for the signer's + private key used for signing. + :param filetype: The type of the file being uploaded. This is the + distutils command which produced that file, e.g. + ``sdist`` or ``bdist_wheel``. + :param pyversion: The version of Python which the release relates + to. For code compatible with any Python, this would + be ``source``, otherwise it would be e.g. ``3.2``. + :return: The HTTP response received from PyPI upon submission of the + request. + """ + self.check_credentials() + if not os.path.exists(filename): + raise DistlibException('not found: %s' % filename) + metadata.validate() + d = metadata.todict() + sig_file = None + if signer: + if not self.gpg: + logger.warning('no signing program available - not signed') + else: + sig_file = self.sign_file(filename, signer, sign_password) + with open(filename, 'rb') as f: + file_data = f.read() + md5_digest = hashlib.md5(file_data).hexdigest() + sha256_digest = hashlib.sha256(file_data).hexdigest() + d.update({ + ':action': 'file_upload', + 'protcol_version': '1', + 'filetype': filetype, + 'pyversion': pyversion, + 'md5_digest': md5_digest, + 'sha256_digest': sha256_digest, + }) + files = [('content', os.path.basename(filename), file_data)] + if sig_file: + with open(sig_file, 'rb') as f: + sig_data = f.read() + files.append(('gpg_signature', os.path.basename(sig_file), + sig_data)) + shutil.rmtree(os.path.dirname(sig_file)) + logger.debug('files: %s', files) + request = self.encode_request(d.items(), files) + return self.send_request(request) + + def upload_documentation(self, metadata, doc_dir): + """ + Upload documentation to the index. + + :param metadata: A :class:`Metadata` instance defining at least a name + and version number for the documentation to be + uploaded. + :param doc_dir: The pathname of the directory which contains the + documentation. This should be the directory that + contains the ``index.html`` for the documentation. + :return: The HTTP response received from PyPI upon submission of the + request. + """ + self.check_credentials() + if not os.path.isdir(doc_dir): + raise DistlibException('not a directory: %r' % doc_dir) + fn = os.path.join(doc_dir, 'index.html') + if not os.path.exists(fn): + raise DistlibException('not found: %r' % fn) + metadata.validate() + name, version = metadata.name, metadata.version + zip_data = zip_dir(doc_dir).getvalue() + fields = [(':action', 'doc_upload'), + ('name', name), ('version', version)] + files = [('content', name, zip_data)] + request = self.encode_request(fields, files) + return self.send_request(request) + + def get_verify_command(self, signature_filename, data_filename): + """ + Return a suitable command for verifying a file. + + :param signature_filename: The pathname to the file containing the + signature. + :param data_filename: The pathname to the file containing the + signed data. + :return: The verifying command as a list suitable to be + passed to :class:`subprocess.Popen`. + """ + cmd = [self.gpg, '--status-fd', '2', '--no-tty'] + if self.gpg_home: + cmd.extend(['--homedir', self.gpg_home]) + cmd.extend(['--verify', signature_filename, data_filename]) + logger.debug('invoking: %s', ' '.join(cmd)) + return cmd + + def verify_signature(self, signature_filename, data_filename): + """ + Verify a signature for a file. + + :param signature_filename: The pathname to the file containing the + signature. + :param data_filename: The pathname to the file containing the + signed data. + :return: True if the signature was verified, else False. + """ + if not self.gpg: + raise DistlibException('verification unavailable because gpg ' + 'unavailable') + cmd = self.get_verify_command(signature_filename, data_filename) + rc, stdout, stderr = self.run_command(cmd) + if rc not in (0, 1): + raise DistlibException('verify command failed with error ' + 'code %s' % rc) + return rc == 0 + + def download_file(self, url, destfile, digest=None, reporthook=None): + """ + This is a convenience method for downloading a file from an URL. + Normally, this will be a file from the index, though currently + no check is made for this (i.e. a file can be downloaded from + anywhere). + + The method is just like the :func:`urlretrieve` function in the + standard library, except that it allows digest computation to be + done during download and checking that the downloaded data + matched any expected value. + + :param url: The URL of the file to be downloaded (assumed to be + available via an HTTP GET request). + :param destfile: The pathname where the downloaded file is to be + saved. + :param digest: If specified, this must be a (hasher, value) + tuple, where hasher is the algorithm used (e.g. + ``'md5'``) and ``value`` is the expected value. + :param reporthook: The same as for :func:`urlretrieve` in the + standard library. + """ + if digest is None: + digester = None + logger.debug('No digest specified') + else: + if isinstance(digest, (list, tuple)): + hasher, digest = digest + else: + hasher = 'md5' + digester = getattr(hashlib, hasher)() + logger.debug('Digest specified: %s' % digest) + # The following code is equivalent to urlretrieve. + # We need to do it this way so that we can compute the + # digest of the file as we go. + with open(destfile, 'wb') as dfp: + # addinfourl is not a context manager on 2.x + # so we have to use try/finally + sfp = self.send_request(Request(url)) + try: + headers = sfp.info() + blocksize = 8192 + size = -1 + read = 0 + blocknum = 0 + if "content-length" in headers: + size = int(headers["Content-Length"]) + if reporthook: + reporthook(blocknum, blocksize, size) + while True: + block = sfp.read(blocksize) + if not block: + break + read += len(block) + dfp.write(block) + if digester: + digester.update(block) + blocknum += 1 + if reporthook: + reporthook(blocknum, blocksize, size) + finally: + sfp.close() + + # check that we got the whole file, if we can + if size >= 0 and read < size: + raise DistlibException( + 'retrieval incomplete: got only %d out of %d bytes' + % (read, size)) + # if we have a digest, it must match. + if digester: + actual = digester.hexdigest() + if digest != actual: + raise DistlibException('%s digest mismatch for %s: expected ' + '%s, got %s' % (hasher, destfile, + digest, actual)) + logger.debug('Digest verified: %s', digest) + + def send_request(self, req): + """ + Send a standard library :class:`Request` to PyPI and return its + response. + + :param req: The request to send. + :return: The HTTP response from PyPI (a standard library HTTPResponse). + """ + handlers = [] + if self.password_handler: + handlers.append(self.password_handler) + if self.ssl_verifier: + handlers.append(self.ssl_verifier) + opener = build_opener(*handlers) + return opener.open(req) + + def encode_request(self, fields, files): + """ + Encode fields and files for posting to an HTTP server. + + :param fields: The fields to send as a list of (fieldname, value) + tuples. + :param files: The files to send as a list of (fieldname, filename, + file_bytes) tuple. + """ + # Adapted from packaging, which in turn was adapted from + # http://code.activestate.com/recipes/146306 + + parts = [] + boundary = self.boundary + for k, values in fields: + if not isinstance(values, (list, tuple)): + values = [values] + + for v in values: + parts.extend(( + b'--' + boundary, + ('Content-Disposition: form-data; name="%s"' % + k).encode('utf-8'), + b'', + v.encode('utf-8'))) + for key, filename, value in files: + parts.extend(( + b'--' + boundary, + ('Content-Disposition: form-data; name="%s"; filename="%s"' % + (key, filename)).encode('utf-8'), + b'', + value)) + + parts.extend((b'--' + boundary + b'--', b'')) + + body = b'\r\n'.join(parts) + ct = b'multipart/form-data; boundary=' + boundary + headers = { + 'Content-type': ct, + 'Content-length': str(len(body)) + } + return Request(self.url, body, headers) diff --git a/pip/_vendor/distlib/locators.py b/pip/_vendor/distlib/locators.py new file mode 100644 index 000000000..c8668986b --- /dev/null +++ b/pip/_vendor/distlib/locators.py @@ -0,0 +1,1184 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# + +import gzip +from io import BytesIO +import json +import logging +import os +import posixpath +import re +import threading +import zlib + +from . import DistlibException +from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url, + queue, quote, unescape, string_types, build_opener, + HTTPRedirectHandler as BaseRedirectHandler, + Request, HTTPError, URLError) +from .database import Distribution, DistributionPath, make_dist +from .metadata import Metadata +from .util import (cached_property, parse_credentials, ensure_slash, + split_filename, get_project_data, parse_requirement, + parse_name_and_version, ServerProxy) +from .version import get_scheme, UnsupportedVersionError +from .wheel import Wheel, is_compatible + +logger = logging.getLogger(__name__) + +HASHER_HASH = re.compile('^(\w+)=([a-f0-9]+)') +CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I) +HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml') +DEFAULT_INDEX = 'http://python.org/pypi' + +def get_all_distribution_names(url=None): + """ + Return all distribution names known by an index. + :param url: The URL of the index. + :return: A list of all known distribution names. + """ + if url is None: + url = DEFAULT_INDEX + client = ServerProxy(url, timeout=3.0) + return client.list_packages() + +class RedirectHandler(BaseRedirectHandler): + """ + A class to work around a bug in some Python 3.2.x releases. + """ + # There's a bug in the base version for some 3.2.x + # (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header + # returns e.g. /abc, it bails because it says the scheme '' + # is bogus, when actually it should use the request's + # URL for the scheme. See Python issue #13696. + def http_error_302(self, req, fp, code, msg, headers): + # Some servers (incorrectly) return multiple Location headers + # (so probably same goes for URI). Use first header. + newurl = None + for key in ('location', 'uri'): + if key in headers: + newurl = headers[key] + break + if newurl is None: + return + urlparts = urlparse(newurl) + if urlparts.scheme == '': + newurl = urljoin(req.get_full_url(), newurl) + if hasattr(headers, 'replace_header'): + headers.replace_header(key, newurl) + else: + headers[key] = newurl + return BaseRedirectHandler.http_error_302(self, req, fp, code, msg, + headers) + + http_error_301 = http_error_303 = http_error_307 = http_error_302 + +class Locator(object): + """ + A base class for locators - things that locate distributions. + """ + source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz') + binary_extensions = ('.egg', '.exe', '.whl') + excluded_extensions = ('.pdf',) + + # A list of tags indicating which wheels you want to match. The default + # value of None matches against the tags compatible with the running + # Python. If you want to match other values, set wheel_tags on a locator + # instance to a list of tuples (pyver, abi, arch) which you want to match. + wheel_tags = None + + downloadable_extensions = source_extensions + ('.whl',) + + def __init__(self, scheme='default'): + """ + Initialise an instance. + :param scheme: Because locators look for most recent versions, they + need to know the version scheme to use. This specifies + the current PEP-recommended scheme - use ``'legacy'`` + if you need to support existing distributions on PyPI. + """ + self._cache = {} + self.scheme = scheme + # Because of bugs in some of the handlers on some of the platforms, + # we use our own opener rather than just using urlopen. + self.opener = build_opener(RedirectHandler()) + # If get_project() is called from locate(), the matcher instance + # is set from the requirement passed to locate(). See issue #18 for + # why this can be useful to know. + self.matcher = None + + def clear_cache(self): + self._cache.clear() + + def _get_scheme(self): + return self._scheme + + def _set_scheme(self, value): + self._scheme = value + + scheme = property(_get_scheme, _set_scheme) + + def _get_project(self, name): + """ + For a given project, get a dictionary mapping available versions to Distribution + instances. + + This should be implemented in subclasses. + + If called from a locate() request, self.matcher will be set to a + matcher for the requirement to satisfy, otherwise it will be None. + """ + raise NotImplementedError('Please implement in the subclass') + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + raise NotImplementedError('Please implement in the subclass') + + def get_project(self, name): + """ + For a given project, get a dictionary mapping available versions to Distribution + instances. + + This calls _get_project to do all the work, and just implements a caching layer on top. + """ + if self._cache is None: + result = self._get_project(name) + elif name in self._cache: + result = self._cache[name] + else: + result = self._get_project(name) + self._cache[name] = result + return result + + def score_url(self, url): + """ + Give an url a score which can be used to choose preferred URLs + for a given project release. + """ + t = urlparse(url) + return (t.scheme != 'https', 'pypi.python.org' in t.netloc, + posixpath.basename(t.path)) + + def prefer_url(self, url1, url2): + """ + Choose one of two URLs where both are candidates for distribution + archives for the same version of a distribution (for example, + .tar.gz vs. zip). + + The current implement favours http:// URLs over https://, archives + from PyPI over those from other locations and then the archive name. + """ + result = url2 + if url1: + s1 = self.score_url(url1) + s2 = self.score_url(url2) + if s1 > s2: + result = url1 + if result != url2: + logger.debug('Not replacing %r with %r', url1, url2) + else: + logger.debug('Replacing %r with %r', url1, url2) + return result + + def split_filename(self, filename, project_name): + """ + Attempt to split a filename in project name, version and Python version. + """ + return split_filename(filename, project_name) + + def convert_url_to_download_info(self, url, project_name): + """ + See if a URL is a candidate for a download URL for a project (the URL + has typically been scraped from an HTML page). + + If it is, a dictionary is returned with keys "name", "version", + "filename" and "url"; otherwise, None is returned. + """ + def same_project(name1, name2): + name1, name2 = name1.lower(), name2.lower() + if name1 == name2: + result = True + else: + # distribute replaces '-' by '_' in project names, so it + # can tell where the version starts in a filename. + result = name1.replace('_', '-') == name2.replace('_', '-') + return result + + result = None + scheme, netloc, path, params, query, frag = urlparse(url) + if frag.lower().startswith('egg='): + logger.debug('%s: version hint in fragment: %r', + project_name, frag) + m = HASHER_HASH.match(frag) + if m: + algo, digest = m.groups() + else: + algo, digest = None, None + origpath = path + if path and path[-1] == '/': + path = path[:-1] + if path.endswith('.whl'): + try: + wheel = Wheel(path) + if is_compatible(wheel, self.wheel_tags): + if project_name is None: + include = True + else: + include = same_project(wheel.name, project_name) + if include: + result = { + 'name': wheel.name, + 'version': wheel.version, + 'filename': wheel.filename, + 'url': urlunparse((scheme, netloc, origpath, + params, query, '')), + 'python-version': ', '.join( + ['.'.join(list(v[2:])) for v in wheel.pyver]), + } + except Exception as e: + logger.warning('invalid path for wheel: %s', path) + elif path.endswith(self.downloadable_extensions): + path = filename = posixpath.basename(path) + for ext in self.downloadable_extensions: + if path.endswith(ext): + path = path[:-len(ext)] + t = self.split_filename(path, project_name) + if not t: + logger.debug('No match for project/version: %s', path) + else: + name, version, pyver = t + if not project_name or same_project(project_name, name): + result = { + 'name': name, + 'version': version, + 'filename': filename, + 'url': urlunparse((scheme, netloc, origpath, + params, query, '')), + #'packagetype': 'sdist', + } + if pyver: + result['python-version'] = pyver + break + if result and algo: + result['%s_digest' % algo] = digest + return result + + def _get_digest(self, info): + """ + Get a digest from a dictionary by looking at keys of the form + 'algo_digest'. + + Returns a 2-tuple (algo, digest) if found, else None. Currently + looks only for SHA256, then MD5. + """ + result = None + for algo in ('sha256', 'md5'): + key = '%s_digest' % algo + if key in info: + result = (algo, info[key]) + break + return result + + def _update_version_data(self, result, info): + """ + Update a result dictionary (the final result from _get_project) with a dictionary for a + specific version, whih typically holds information gleaned from a filename or URL for an + archive for the distribution. + """ + name = info.pop('name') + version = info.pop('version') + if version in result: + dist = result[version] + md = dist.metadata + else: + dist = make_dist(name, version, scheme=self.scheme) + md = dist.metadata + dist.digest = self._get_digest(info) + if md.source_url != info['url']: + md.source_url = self.prefer_url(md.source_url, info['url']) + dist.locator = self + result[version] = dist + + def locate(self, requirement, prereleases=False): + """ + Find the most recent distribution which matches the given + requirement. + + :param requirement: A requirement of the form 'foo (1.0)' or perhaps + 'foo (>= 1.0, < 2.0, != 1.3)' + :param prereleases: If ``True``, allow pre-release versions + to be located. Otherwise, pre-release versions + are not returned. + :return: A :class:`Distribution` instance, or ``None`` if no such + distribution could be located. + """ + result = None + r = parse_requirement(requirement) + if r is None: + raise DistlibException('Not a valid requirement: %r' % requirement) + scheme = get_scheme(self.scheme) + self.matcher = matcher = scheme.matcher(r.requirement) + logger.debug('matcher: %s (%s)', matcher, type(matcher).__name__) + versions = self.get_project(r.name) + if versions: + # sometimes, versions are invalid + slist = [] + vcls = matcher.version_class + for k in versions: + try: + if not matcher.match(k): + logger.debug('%s did not match %r', matcher, k) + else: + if prereleases or not vcls(k).is_prerelease: + slist.append(k) + else: + logger.debug('skipping pre-release ' + 'version %s of %s', k, matcher.name) + except Exception: + logger.warning('error matching %s with %r', matcher, k) + pass # slist.append(k) + if len(slist) > 1: + slist = sorted(slist, key=scheme.key) + if slist: + logger.debug('sorted list: %s', slist) + result = versions[slist[-1]] + if result and r.extras: + result.extras = r.extras + self.matcher = None + return result + + +class PyPIRPCLocator(Locator): + """ + This locator uses XML-RPC to locate distributions. It therefore + cannot be used with simple mirrors (that only mirror file content). + """ + def __init__(self, url, **kwargs): + """ + Initialise an instance. + + :param url: The URL to use for XML-RPC. + :param kwargs: Passed to the superclass constructor. + """ + super(PyPIRPCLocator, self).__init__(**kwargs) + self.base_url = url + self.client = ServerProxy(url, timeout=3.0) + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + return set(self.client.list_packages()) + + def _get_project(self, name): + result = {} + versions = self.client.package_releases(name, True) + for v in versions: + urls = self.client.release_urls(name, v) + data = self.client.release_data(name, v) + metadata = Metadata(scheme=self.scheme) + metadata.name = data['name'] + metadata.version = data['version'] + metadata.license = data.get('license') + metadata.keywords = data.get('keywords', []) + metadata.summary = data.get('summary') + dist = Distribution(metadata) + if urls: + info = urls[0] + metadata.source_url = info['url'] + dist.digest = self._get_digest(info) + dist.locator = self + result[v] = dist + return result + +class PyPIJSONLocator(Locator): + """ + This locator uses PyPI's JSON interface. It's very limited in functionality + nad probably not worth using. + """ + def __init__(self, url, **kwargs): + super(PyPIJSONLocator, self).__init__(**kwargs) + self.base_url = ensure_slash(url) + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + raise NotImplementedError('Not available from this locator') + + def _get_project(self, name): + result = {} + url = urljoin(self.base_url, '%s/json' % quote(name)) + try: + resp = self.opener.open(url) + data = resp.read().decode() # for now + d = json.loads(data) + md = Metadata(scheme=self.scheme) + data = d['info'] + md.name = data['name'] + md.version = data['version'] + md.license = data.get('license') + md.keywords = data.get('keywords', []) + md.summary = data.get('summary') + dist = Distribution(md) + urls = d['urls'] + if urls: + info = urls[0] + md.source_url = info['url'] + dist.digest = self._get_digest(info) + dist.locator = self + result[md.version] = dist + except Exception as e: + logger.exception('JSON fetch failed: %s', e) + return result + + +class Page(object): + """ + This class represents a scraped HTML page. + """ + # The following slightly hairy-looking regex just looks for the contents of + # an anchor link, which has an attribute "href" either immediately preceded + # or immediately followed by a "rel" attribute. The attribute values can be + # declared with double quotes, single quotes or no quotes - which leads to + # the length of the expression. + _href = re.compile(""" +(rel\s*=\s*(?:"(?P[^"]*)"|'(?P[^']*)'|(?P[^>\s\n]*))\s+)? +href\s*=\s*(?:"(?P[^"]*)"|'(?P[^']*)'|(?P[^>\s\n]*)) +(\s+rel\s*=\s*(?:"(?P[^"]*)"|'(?P[^']*)'|(?P[^>\s\n]*)))? +""", re.I | re.S | re.X) + _base = re.compile(r"""]+)""", re.I | re.S) + + def __init__(self, data, url): + """ + Initialise an instance with the Unicode page contents and the URL they + came from. + """ + self.data = data + self.base_url = self.url = url + m = self._base.search(self.data) + if m: + self.base_url = m.group(1) + + _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I) + + @cached_property + def links(self): + """ + Return the URLs of all the links on a page together with information + about their "rel" attribute, for determining which ones to treat as + downloads and which ones to queue for further scraping. + """ + def clean(url): + "Tidy up an URL." + scheme, netloc, path, params, query, frag = urlparse(url) + return urlunparse((scheme, netloc, quote(path), + params, query, frag)) + + result = set() + for match in self._href.finditer(self.data): + d = match.groupdict('') + rel = (d['rel1'] or d['rel2'] or d['rel3'] or + d['rel4'] or d['rel5'] or d['rel6']) + url = d['url1'] or d['url2'] or d['url3'] + url = urljoin(self.base_url, url) + url = unescape(url) + url = self._clean_re.sub(lambda m: '%%%2x' % ord(m.group(0)), url) + result.add((url, rel)) + # We sort the result, hoping to bring the most recent versions + # to the front + result = sorted(result, key=lambda t: t[0], reverse=True) + return result + + +class SimpleScrapingLocator(Locator): + """ + A locator which scrapes HTML pages to locate downloads for a distribution. + This runs multiple threads to do the I/O; performance is at least as good + as pip's PackageFinder, which works in an analogous fashion. + """ + + # These are used to deal with various Content-Encoding schemes. + decoders = { + 'deflate': zlib.decompress, + 'gzip': lambda b: gzip.GzipFile(fileobj=BytesIO(d)).read(), + 'none': lambda b: b, + } + + def __init__(self, url, timeout=None, num_workers=10, **kwargs): + """ + Initialise an instance. + :param url: The root URL to use for scraping. + :param timeout: The timeout, in seconds, to be applied to requests. + This defaults to ``None`` (no timeout specified). + :param num_workers: The number of worker threads you want to do I/O, + This defaults to 10. + :param kwargs: Passed to the superclass. + """ + super(SimpleScrapingLocator, self).__init__(**kwargs) + self.base_url = ensure_slash(url) + self.timeout = timeout + self._page_cache = {} + self._seen = set() + self._to_fetch = queue.Queue() + self._bad_hosts = set() + self.skip_externals = False + self.num_workers = num_workers + self._lock = threading.RLock() + + def _prepare_threads(self): + """ + Threads are created only when get_project is called, and terminate + before it returns. They are there primarily to parallelise I/O (i.e. + fetching web pages). + """ + self._threads = [] + for i in range(self.num_workers): + t = threading.Thread(target=self._fetch) + t.setDaemon(True) + t.start() + self._threads.append(t) + + def _wait_threads(self): + """ + Tell all the threads to terminate (by sending a sentinel value) and + wait for them to do so. + """ + # Note that you need two loops, since you can't say which + # thread will get each sentinel + for t in self._threads: + self._to_fetch.put(None) # sentinel + for t in self._threads: + t.join() + self._threads = [] + + def _get_project(self, name): + self.result = result = {} + self.project_name = name + url = urljoin(self.base_url, '%s/' % quote(name)) + self._seen.clear() + self._page_cache.clear() + self._prepare_threads() + try: + logger.debug('Queueing %s', url) + self._to_fetch.put(url) + self._to_fetch.join() + finally: + self._wait_threads() + del self.result + return result + + platform_dependent = re.compile(r'\b(linux-(i\d86|x86_64|arm\w+)|' + r'win(32|-amd64)|macosx-?\d+)\b', re.I) + + def _is_platform_dependent(self, url): + """ + Does an URL refer to a platform-specific download? + """ + return self.platform_dependent.search(url) + + def _process_download(self, url): + """ + See if an URL is a suitable download for a project. + + If it is, register information in the result dictionary (for + _get_project) about the specific version it's for. + + Note that the return value isn't actually used other than as a boolean + value. + """ + if self._is_platform_dependent(url): + info = None + else: + info = self.convert_url_to_download_info(url, self.project_name) + logger.debug('process_download: %s -> %s', url, info) + if info: + with self._lock: # needed because self.result is shared + self._update_version_data(self.result, info) + return info + + def _should_queue(self, link, referrer, rel): + """ + Determine whether a link URL from a referring page and with a + particular "rel" attribute should be queued for scraping. + """ + scheme, netloc, path, _, _, _ = urlparse(link) + if path.endswith(self.source_extensions + self.binary_extensions + + self.excluded_extensions): + result = False + elif self.skip_externals and not link.startswith(self.base_url): + result = False + elif not referrer.startswith(self.base_url): + result = False + elif rel not in ('homepage', 'download'): + result = False + elif scheme not in ('http', 'https', 'ftp'): + result = False + elif self._is_platform_dependent(link): + result = False + else: + host = netloc.split(':', 1)[0] + if host.lower() == 'localhost': + result = False + else: + result = True + logger.debug('should_queue: %s (%s) from %s -> %s', link, rel, + referrer, result) + return result + + def _fetch(self): + """ + Get a URL to fetch from the work queue, get the HTML page, examine its + links for download candidates and candidates for further scraping. + + This is a handy method to run in a thread. + """ + while True: + url = self._to_fetch.get() + try: + if url: + page = self.get_page(url) + if page is None: # e.g. after an error + continue + for link, rel in page.links: + if link not in self._seen: + self._seen.add(link) + if (not self._process_download(link) and + self._should_queue(link, url, rel)): + logger.debug('Queueing %s from %s', link, url) + self._to_fetch.put(link) + finally: + # always do this, to avoid hangs :-) + self._to_fetch.task_done() + if not url: + #logger.debug('Sentinel seen, quitting.') + break + + def get_page(self, url): + """ + Get the HTML for an URL, possibly from an in-memory cache. + + XXX TODO Note: this cache is never actually cleared. It's assumed that + the data won't get stale over the lifetime of a locator instance (not + necessarily true for the default_locator). + """ + # http://peak.telecommunity.com/DevCenter/EasyInstall#package-index-api + scheme, netloc, path, _, _, _ = urlparse(url) + if scheme == 'file' and os.path.isdir(url2pathname(path)): + url = urljoin(ensure_slash(url), 'index.html') + + if url in self._page_cache: + result = self._page_cache[url] + logger.debug('Returning %s from cache: %s', url, result) + else: + host = netloc.split(':', 1)[0] + result = None + if host in self._bad_hosts: + logger.debug('Skipping %s due to bad host %s', url, host) + else: + req = Request(url, headers={'Accept-encoding': 'identity'}) + try: + logger.debug('Fetching %s', url) + resp = self.opener.open(req, timeout=self.timeout) + logger.debug('Fetched %s', url) + headers = resp.info() + content_type = headers.get('Content-Type', '') + if HTML_CONTENT_TYPE.match(content_type): + final_url = resp.geturl() + data = resp.read() + encoding = headers.get('Content-Encoding') + if encoding: + decoder = self.decoders[encoding] # fail if not found + data = decoder(data) + encoding = 'utf-8' + m = CHARSET.search(content_type) + if m: + encoding = m.group(1) + try: + data = data.decode(encoding) + except UnicodeError: + data = data.decode('latin-1') # fallback + result = Page(data, final_url) + self._page_cache[final_url] = result + except HTTPError as e: + if e.code != 404: + logger.exception('Fetch failed: %s: %s', url, e) + except URLError as e: + logger.exception('Fetch failed: %s: %s', url, e) + with self._lock: + self._bad_hosts.add(host) + except Exception as e: + logger.exception('Fetch failed: %s: %s', url, e) + finally: + self._page_cache[url] = result # even if None (failure) + return result + + _distname_re = re.compile(']*>([^<]+)<') + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + result = set() + page = self.get_page(self.base_url) + if not page: + raise DistlibException('Unable to get %s' % self.base_url) + for match in self._distname_re.finditer(page.data): + result.add(match.group(1)) + return result + +class DirectoryLocator(Locator): + """ + This class locates distributions in a directory tree. + """ + + def __init__(self, path, **kwargs): + """ + Initialise an instance. + :param path: The root of the directory tree to search. + :param kwargs: Passed to the superclass constructor, + except for: + * recursive - if True (the default), subdirectories are + recursed into. If False, only the top-level directory + is searched, + """ + self.recursive = kwargs.pop('recursive', True) + super(DirectoryLocator, self).__init__(**kwargs) + path = os.path.abspath(path) + if not os.path.isdir(path): + raise DistlibException('Not a directory: %r' % path) + self.base_dir = path + + def should_include(self, filename, parent): + """ + Should a filename be considered as a candidate for a distribution + archive? As well as the filename, the directory which contains it + is provided, though not used by the current implementation. + """ + return filename.endswith(self.downloadable_extensions) + + def _get_project(self, name): + result = {} + for root, dirs, files in os.walk(self.base_dir): + for fn in files: + if self.should_include(fn, root): + fn = os.path.join(root, fn) + url = urlunparse(('file', '', + pathname2url(os.path.abspath(fn)), + '', '', '')) + info = self.convert_url_to_download_info(url, name) + if info: + self._update_version_data(result, info) + if not self.recursive: + break + return result + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + result = set() + for root, dirs, files in os.walk(self.base_dir): + for fn in files: + if self.should_include(fn, root): + fn = os.path.join(root, fn) + url = urlunparse(('file', '', + pathname2url(os.path.abspath(fn)), + '', '', '')) + info = self.convert_url_to_download_info(url, None) + if info: + result.add(info['name']) + if not self.recursive: + break + return result + +class JSONLocator(Locator): + """ + This locator uses special extended metadata (not available on PyPI) and is + the basis of performant dependency resolution in distlib. Other locators + require archive downloads before dependencies can be determined! As you + might imagine, that can be slow. + """ + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + raise NotImplementedError('Not available from this locator') + + def _get_project(self, name): + result = {} + data = get_project_data(name) + if data: + for info in data.get('files', []): + if info['ptype'] != 'sdist' or info['pyversion'] != 'source': + continue + # We don't store summary in project metadata as it makes + # the data bigger for no benefit during dependency + # resolution + dist = make_dist(data['name'], info['version'], + summary=data.get('summary', + 'Placeholder for summary'), + scheme=self.scheme) + md = dist.metadata + md.source_url = info['url'] + # TODO SHA256 digest + if 'digest' in info and info['digest']: + dist.digest = ('md5', info['digest']) + md.dependencies = info.get('requirements', {}) + dist.exports = info.get('exports', {}) + result[dist.version] = dist + return result + +class DistPathLocator(Locator): + """ + This locator finds installed distributions in a path. It can be useful for + adding to an :class:`AggregatingLocator`. + """ + def __init__(self, distpath, **kwargs): + """ + Initialise an instance. + + :param distpath: A :class:`DistributionPath` instance to search. + """ + super(DistPathLocator, self).__init__(**kwargs) + assert isinstance(distpath, DistributionPath) + self.distpath = distpath + + def _get_project(self, name): + dist = self.distpath.get_distribution(name) + if dist is None: + result = {} + else: + result = { dist.version: dist } + return result + + +class AggregatingLocator(Locator): + """ + This class allows you to chain and/or merge a list of locators. + """ + def __init__(self, *locators, **kwargs): + """ + Initialise an instance. + + :param locators: The list of locators to search. + :param kwargs: Passed to the superclass constructor, + except for: + * merge - if False (the default), the first successful + search from any of the locators is returned. If True, + the results from all locators are merged (this can be + slow). + """ + self.merge = kwargs.pop('merge', False) + self.locators = locators + super(AggregatingLocator, self).__init__(**kwargs) + + def clear_cache(self): + super(AggregatingLocator, self).clear_cache() + for locator in self.locators: + locator.clear_cache() + + def _set_scheme(self, value): + self._scheme = value + for locator in self.locators: + locator.scheme = value + + scheme = property(Locator.scheme.fget, _set_scheme) + + def _get_project(self, name): + result = {} + for locator in self.locators: + d = locator.get_project(name) + if d: + if self.merge: + result.update(d) + else: + # See issue #18. If any dists are found and we're looking + # for specific constraints, we only return something if + # a match is found. For example, if a DirectoryLocator + # returns just foo (1.0) while we're looking for + # foo (>= 2.0), we'll pretend there was nothing there so + # that subsequent locators can be queried. Otherwise we + # would just return foo (1.0) which would then lead to a + # failure to find foo (>= 2.0), because other locators + # weren't searched. Note that this only matters when + # merge=False. + if self.matcher is None: + found = True + else: + found = False + for k in d: + if self.matcher.match(k): + found = True + break + if found: + result = d + break + return result + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + result = set() + for locator in self.locators: + try: + result |= locator.get_distribution_names() + except NotImplementedError: + pass + return result + + +# We use a legacy scheme simply because most of the dists on PyPI use legacy +# versions which don't conform to PEP 426 / PEP 440. +default_locator = AggregatingLocator( + JSONLocator(), + SimpleScrapingLocator('https://pypi.python.org/simple/', + timeout=3.0), + scheme='legacy') + +locate = default_locator.locate + +NAME_VERSION_RE = re.compile(r'(?P[\w-]+)\s*' + r'\(\s*(==\s*)?(?P[^)]+)\)$') + +class DependencyFinder(object): + """ + Locate dependencies for distributions. + """ + + def __init__(self, locator=None): + """ + Initialise an instance, using the specified locator + to locate distributions. + """ + self.locator = locator or default_locator + self.scheme = get_scheme(self.locator.scheme) + + def add_distribution(self, dist): + """ + Add a distribution to the finder. This will update internal information + about who provides what. + :param dist: The distribution to add. + """ + logger.debug('adding distribution %s', dist) + name = dist.key + self.dists_by_name[name] = dist + self.dists[(name, dist.version)] = dist + for p in dist.provides: + name, version = parse_name_and_version(p) + logger.debug('Add to provided: %s, %s, %s', name, version, dist) + self.provided.setdefault(name, set()).add((version, dist)) + + def remove_distribution(self, dist): + """ + Remove a distribution from the finder. This will update internal + information about who provides what. + :param dist: The distribution to remove. + """ + logger.debug('removing distribution %s', dist) + name = dist.key + del self.dists_by_name[name] + del self.dists[(name, dist.version)] + for p in dist.provides: + name, version = parse_name_and_version(p) + logger.debug('Remove from provided: %s, %s, %s', name, version, dist) + s = self.provided[name] + s.remove((version, dist)) + if not s: + del self.provided[name] + + def get_matcher(self, reqt): + """ + Get a version matcher for a requirement. + :param reqt: The requirement + :type reqt: str + :return: A version matcher (an instance of + :class:`distlib.version.Matcher`). + """ + try: + matcher = self.scheme.matcher(reqt) + except UnsupportedVersionError: + # XXX compat-mode if cannot read the version + name = reqt.split()[0] + matcher = self.scheme.matcher(name) + return matcher + + def find_providers(self, reqt): + """ + Find the distributions which can fulfill a requirement. + + :param reqt: The requirement. + :type reqt: str + :return: A set of distribution which can fulfill the requirement. + """ + matcher = self.get_matcher(reqt) + name = matcher.key # case-insensitive + result = set() + provided = self.provided + if name in provided: + for version, provider in provided[name]: + try: + match = matcher.match(version) + except UnsupportedVersionError: + match = False + + if match: + result.add(provider) + break + return result + + def try_to_replace(self, provider, other, problems): + """ + Attempt to replace one provider with another. This is typically used + when resolving dependencies from multiple sources, e.g. A requires + (B >= 1.0) while C requires (B >= 1.1). + + For successful replacement, ``provider`` must meet all the requirements + which ``other`` fulfills. + + :param provider: The provider we are trying to replace with. + :param other: The provider we're trying to replace. + :param problems: If False is returned, this will contain what + problems prevented replacement. This is currently + a tuple of the literal string 'cantreplace', + ``provider``, ``other`` and the set of requirements + that ``provider`` couldn't fulfill. + :return: True if we can replace ``other`` with ``provider``, else + False. + """ + rlist = self.reqts[other] + unmatched = set() + for s in rlist: + matcher = self.get_matcher(s) + if not matcher.match(provider.version): + unmatched.add(s) + if unmatched: + # can't replace other with provider + problems.add(('cantreplace', provider, other, unmatched)) + result = False + else: + # can replace other with provider + self.remove_distribution(other) + del self.reqts[other] + for s in rlist: + self.reqts.setdefault(provider, set()).add(s) + self.add_distribution(provider) + result = True + return result + + def find(self, requirement, meta_extras=None, prereleases=False): + """ + Find a distribution and all distributions it depends on. + + :param requirement: The requirement specifying the distribution to + find, or a Distribution instance. + :param meta_extras: A list of meta extras such as :test:, :build: and + so on. + :param prereleases: If ``True``, allow pre-release versions to be + returned - otherwise, don't return prereleases + unless they're all that's available. + + Return a set of :class:`Distribution` instances and a set of + problems. + + The distributions returned should be such that they have the + :attr:`required` attribute set to ``True`` if they were + from the ``requirement`` passed to ``find()``, and they have the + :attr:`build_time_dependency` attribute set to ``True`` unless they + are post-installation dependencies of the ``requirement``. + + The problems should be a tuple consisting of the string + ``'unsatisfied'`` and the requirement which couldn't be satisfied + by any distribution known to the locator. + """ + + self.provided = {} + self.dists = {} + self.dists_by_name = {} + self.reqts = {} + + meta_extras = set(meta_extras or []) + if ':*:' in meta_extras: + meta_extras.remove(':*:') + # :meta: and :run: are implicitly included + meta_extras |= set([':test:', ':build:', ':dev:']) + + if isinstance(requirement, Distribution): + dist = odist = requirement + logger.debug('passed %s as requirement', odist) + else: + dist = odist = self.locator.locate(requirement, + prereleases=prereleases) + if dist is None: + raise DistlibException('Unable to locate %r' % requirement) + logger.debug('located %s', odist) + dist.requested = True + problems = set() + todo = set([dist]) + install_dists = set([odist]) + while todo: + dist = todo.pop() + name = dist.key # case-insensitive + if name not in self.dists_by_name: + self.add_distribution(dist) + else: + #import pdb; pdb.set_trace() + other = self.dists_by_name[name] + if other != dist: + self.try_to_replace(dist, other, problems) + + ireqts = dist.run_requires | dist.meta_requires + sreqts = dist.build_requires + ereqts = set() + if dist in install_dists: + for key in ('test', 'build', 'dev'): + e = ':%s:' % key + if e in meta_extras: + ereqts |= getattr(dist, '%s_requires' % key) + all_reqts = ireqts | sreqts | ereqts + for r in all_reqts: + providers = self.find_providers(r) + if not providers: + logger.debug('No providers found for %r', r) + provider = self.locator.locate(r, prereleases=prereleases) + # If no provider is found and we didn't consider + # prereleases, consider them now. + if provider is None and not prereleases: + provider = self.locator.locate(r, prereleases=True) + if provider is None: + logger.debug('Cannot satisfy %r', r) + problems.add(('unsatisfied', r)) + else: + n, v = provider.key, provider.version + if (n, v) not in self.dists: + todo.add(provider) + providers.add(provider) + if r in ireqts and dist in install_dists: + install_dists.add(provider) + logger.debug('Adding %s to install_dists', + provider.name_and_version) + for p in providers: + name = p.key + if name not in self.dists_by_name: + self.reqts.setdefault(p, set()).add(r) + else: + other = self.dists_by_name[name] + if other != p: + # see if other can be replaced by p + self.try_to_replace(p, other, problems) + + dists = set(self.dists.values()) + for dist in dists: + dist.build_time_dependency = dist not in install_dists + if dist.build_time_dependency: + logger.debug('%s is a build-time dependency only.', + dist.name_and_version) + logger.debug('find done for %s', odist) + return dists, problems diff --git a/pip/_vendor/distlib/manifest.py b/pip/_vendor/distlib/manifest.py new file mode 100644 index 000000000..c6b98c59a --- /dev/null +++ b/pip/_vendor/distlib/manifest.py @@ -0,0 +1,364 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +""" +Class representing the list of files in a distribution. + +Equivalent to distutils.filelist, but fixes some problems. +""" +import fnmatch +import logging +import os +import re + +from . import DistlibException +from .compat import fsdecode +from .util import convert_path + + +__all__ = ['Manifest'] + +logger = logging.getLogger(__name__) + +# a \ followed by some spaces + EOL +_COLLAPSE_PATTERN = re.compile('\\\w*\n', re.M) +_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S) + + +class Manifest(object): + """A list of files built by on exploring the filesystem and filtered by + applying various patterns to what we find there. + """ + + def __init__(self, base=None): + """ + Initialise an instance. + + :param base: The base directory to explore under. + """ + self.base = os.path.abspath(os.path.normpath(base or os.getcwd())) + self.prefix = self.base + os.sep + self.allfiles = None + self.files = set() + + # + # Public API + # + + def findall(self): + """Find all files under the base and set ``allfiles`` to the absolute + pathnames of files found. + """ + from stat import S_ISREG, S_ISDIR, S_ISLNK + + self.allfiles = allfiles = [] + root = self.base + stack = [root] + pop = stack.pop + push = stack.append + + while stack: + root = pop() + names = os.listdir(root) + + for name in names: + fullname = os.path.join(root, name) + + # Avoid excess stat calls -- just one will do, thank you! + stat = os.stat(fullname) + mode = stat.st_mode + if S_ISREG(mode): + allfiles.append(fsdecode(fullname)) + elif S_ISDIR(mode) and not S_ISLNK(mode): + push(fullname) + + def add(self, item): + """ + Add a file to the manifest. + + :param item: The pathname to add. This can be relative to the base. + """ + if not item.startswith(self.prefix): + item = os.path.join(self.base, item) + self.files.add(os.path.normpath(item)) + + def add_many(self, items): + """ + Add a list of files to the manifest. + + :param items: The pathnames to add. These can be relative to the base. + """ + for item in items: + self.add(item) + + def sorted(self, wantdirs=False): + """ + Return sorted files in directory order + """ + + def add_dir(dirs, d): + dirs.add(d) + logger.debug('add_dir added %s', d) + if d != self.base: + parent, _ = os.path.split(d) + assert parent not in ('', '/') + add_dir(dirs, parent) + + result = set(self.files) # make a copy! + if wantdirs: + dirs = set() + for f in result: + add_dir(dirs, os.path.dirname(f)) + result |= dirs + return [os.path.join(*path_tuple) for path_tuple in + sorted(os.path.split(path) for path in result)] + + def clear(self): + """Clear all collected files.""" + self.files = set() + self.allfiles = [] + + def process_directive(self, directive): + """ + Process a directive which either adds some files from ``allfiles`` to + ``files``, or removes some files from ``files``. + + :param directive: The directive to process. This should be in a format + compatible with distutils ``MANIFEST.in`` files: + + http://docs.python.org/distutils/sourcedist.html#commands + """ + # Parse the line: split it up, make sure the right number of words + # is there, and return the relevant words. 'action' is always + # defined: it's the first word of the line. Which of the other + # three are defined depends on the action; it'll be either + # patterns, (dir and patterns), or (dirpattern). + action, patterns, thedir, dirpattern = self._parse_directive(directive) + + # OK, now we know that the action is valid and we have the + # right number of words on the line for that action -- so we + # can proceed with minimal error-checking. + if action == 'include': + for pattern in patterns: + if not self._include_pattern(pattern, anchor=True): + logger.warning('no files found matching %r', pattern) + + elif action == 'exclude': + for pattern in patterns: + if not self._exclude_pattern(pattern, anchor=True): + logger.warning('no previously-included files ' + 'found matching %r', pattern) + + elif action == 'global-include': + for pattern in patterns: + if not self._include_pattern(pattern, anchor=False): + logger.warning('no files found matching %r ' + 'anywhere in distribution', pattern) + + elif action == 'global-exclude': + for pattern in patterns: + if not self._exclude_pattern(pattern, anchor=False): + logger.warning('no previously-included files ' + 'matching %r found anywhere in ' + 'distribution', pattern) + + elif action == 'recursive-include': + for pattern in patterns: + if not self._include_pattern(pattern, prefix=thedir): + logger.warning('no files found matching %r ' + 'under directory %r', pattern, thedir) + + elif action == 'recursive-exclude': + for pattern in patterns: + if not self._exclude_pattern(pattern, prefix=thedir): + logger.warning('no previously-included files ' + 'matching %r found under directory %r', + pattern, thedir) + + elif action == 'graft': + if not self._include_pattern(None, prefix=dirpattern): + logger.warning('no directories found matching %r', + dirpattern) + + elif action == 'prune': + if not self._exclude_pattern(None, prefix=dirpattern): + logger.warning('no previously-included directories found ' + 'matching %r', dirpattern) + else: # pragma: no cover + # This should never happen, as it should be caught in + # _parse_template_line + raise DistlibException( + 'invalid action %r' % action) + + # + # Private API + # + + def _parse_directive(self, directive): + """ + Validate a directive. + :param directive: The directive to validate. + :return: A tuple of action, patterns, thedir, dir_patterns + """ + words = directive.split() + if len(words) == 1 and words[0] not in ('include', 'exclude', + 'global-include', + 'global-exclude', + 'recursive-include', + 'recursive-exclude', + 'graft', 'prune'): + # no action given, let's use the default 'include' + words.insert(0, 'include') + + action = words[0] + patterns = thedir = dir_pattern = None + + if action in ('include', 'exclude', + 'global-include', 'global-exclude'): + if len(words) < 2: + raise DistlibException( + '%r expects ...' % action) + + patterns = [convert_path(word) for word in words[1:]] + + elif action in ('recursive-include', 'recursive-exclude'): + if len(words) < 3: + raise DistlibException( + '%r expects ...' % action) + + thedir = convert_path(words[1]) + patterns = [convert_path(word) for word in words[2:]] + + elif action in ('graft', 'prune'): + if len(words) != 2: + raise DistlibException( + '%r expects a single ' % action) + + dir_pattern = convert_path(words[1]) + + else: + raise DistlibException('unknown action %r' % action) + + return action, patterns, thedir, dir_pattern + + def _include_pattern(self, pattern, anchor=True, prefix=None, + is_regex=False): + """Select strings (presumably filenames) from 'self.files' that + match 'pattern', a Unix-style wildcard (glob) pattern. + + Patterns are not quite the same as implemented by the 'fnmatch' + module: '*' and '?' match non-special characters, where "special" + is platform-dependent: slash on Unix; colon, slash, and backslash on + DOS/Windows; and colon on Mac OS. + + If 'anchor' is true (the default), then the pattern match is more + stringent: "*.py" will match "foo.py" but not "foo/bar.py". If + 'anchor' is false, both of these will match. + + If 'prefix' is supplied, then only filenames starting with 'prefix' + (itself a pattern) and ending with 'pattern', with anything in between + them, will match. 'anchor' is ignored in this case. + + If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and + 'pattern' is assumed to be either a string containing a regex or a + regex object -- no translation is done, the regex is just compiled + and used as-is. + + Selected strings will be added to self.files. + + Return True if files are found. + """ + # XXX docstring lying about what the special chars are? + found = False + pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex) + + # delayed loading of allfiles list + if self.allfiles is None: + self.findall() + + for name in self.allfiles: + if pattern_re.search(name): + self.files.add(name) + found = True + return found + + def _exclude_pattern(self, pattern, anchor=True, prefix=None, + is_regex=False): + """Remove strings (presumably filenames) from 'files' that match + 'pattern'. + + Other parameters are the same as for 'include_pattern()', above. + The list 'self.files' is modified in place. Return True if files are + found. + + This API is public to allow e.g. exclusion of SCM subdirs, e.g. when + packaging source distributions + """ + found = False + pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex) + for f in list(self.files): + if pattern_re.search(f): + self.files.remove(f) + found = True + return found + + def _translate_pattern(self, pattern, anchor=True, prefix=None, + is_regex=False): + """Translate a shell-like wildcard pattern to a compiled regular + expression. + + Return the compiled regex. If 'is_regex' true, + then 'pattern' is directly compiled to a regex (if it's a string) + or just returned as-is (assumes it's a regex object). + """ + if is_regex: + if isinstance(pattern, str): + return re.compile(pattern) + else: + return pattern + + if pattern: + pattern_re = self._glob_to_re(pattern) + else: + pattern_re = '' + + base = re.escape(os.path.join(self.base, '')) + if prefix is not None: + # ditch end of pattern character + empty_pattern = self._glob_to_re('') + prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)] + sep = os.sep + if os.sep == '\\': + sep = r'\\' + pattern_re = '^' + base + sep.join((prefix_re, + '.*' + pattern_re)) + else: # no prefix -- respect anchor flag + if anchor: + pattern_re = '^' + base + pattern_re + + return re.compile(pattern_re) + + def _glob_to_re(self, pattern): + """Translate a shell-like glob pattern to a regular expression. + + Return a string containing the regex. Differs from + 'fnmatch.translate()' in that '*' does not match "special characters" + (which are platform-specific). + """ + pattern_re = fnmatch.translate(pattern) + + # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which + # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, + # and by extension they shouldn't match such "special characters" under + # any OS. So change all non-escaped dots in the RE to match any + # character except the special characters (currently: just os.sep). + sep = os.sep + if os.sep == '\\': + # we're using a regex to manipulate a regex, so we need + # to escape the backslash twice + sep = r'\\\\' + escaped = r'\1[^%s]' % sep + pattern_re = re.sub(r'((? y, + 'gte': lambda x, y: x >= y, + 'in': lambda x, y: x in y, + 'lt': lambda x, y: x < y, + 'lte': lambda x, y: x <= y, + 'not': lambda x: not x, + 'noteq': lambda x, y: x != y, + 'notin': lambda x, y: x not in y, + } + + allowed_values = { + 'sys_platform': sys.platform, + 'python_version': '%s.%s' % sys.version_info[:2], + # parsing sys.platform is not reliable, but there is no other + # way to get e.g. 2.7.2+, and the PEP is defined with sys.version + 'python_full_version': sys.version.split(' ', 1)[0], + 'os_name': os.name, + 'platform_in_venv': str(in_venv()), + 'platform_release': platform.release(), + 'platform_version': platform.version(), + 'platform_machine': platform.machine(), + 'platform_python_implementation': python_implementation(), + } + + def __init__(self, context=None): + """ + Initialise an instance. + + :param context: If specified, names are looked up in this mapping. + """ + self.context = context or {} + self.source = None + + def get_fragment(self, offset): + """ + Get the part of the source which is causing a problem. + """ + fragment_len = 10 + s = '%r' % (self.source[offset:offset + fragment_len]) + if offset + fragment_len < len(self.source): + s += '...' + return s + + def get_handler(self, node_type): + """ + Get a handler for the specified AST node type. + """ + return getattr(self, 'do_%s' % node_type, None) + + def evaluate(self, node, filename=None): + """ + Evaluate a source string or node, using ``filename`` when + displaying errors. + """ + if isinstance(node, string_types): + self.source = node + kwargs = {'mode': 'eval'} + if filename: + kwargs['filename'] = filename + try: + node = ast.parse(node, **kwargs) + except SyntaxError as e: + s = self.get_fragment(e.offset) + raise SyntaxError('syntax error %s' % s) + node_type = node.__class__.__name__.lower() + handler = self.get_handler(node_type) + if handler is None: + if self.source is None: + s = '(source not available)' + else: + s = self.get_fragment(node.col_offset) + raise SyntaxError("don't know how to evaluate %r %s" % ( + node_type, s)) + return handler(node) + + def get_attr_key(self, node): + assert isinstance(node, ast.Attribute), 'attribute node expected' + return '%s.%s' % (node.value.id, node.attr) + + def do_attribute(self, node): + if not isinstance(node.value, ast.Name): + valid = False + else: + key = self.get_attr_key(node) + valid = key in self.context or key in self.allowed_values + if not valid: + raise SyntaxError('invalid expression: %s' % key) + if key in self.context: + result = self.context[key] + else: + result = self.allowed_values[key] + return result + + def do_boolop(self, node): + result = self.evaluate(node.values[0]) + is_or = node.op.__class__ is ast.Or + is_and = node.op.__class__ is ast.And + assert is_or or is_and + if (is_and and result) or (is_or and not result): + for n in node.values[1:]: + result = self.evaluate(n) + if (is_or and result) or (is_and and not result): + break + return result + + def do_compare(self, node): + def sanity_check(lhsnode, rhsnode): + valid = True + if isinstance(lhsnode, ast.Str) and isinstance(rhsnode, ast.Str): + valid = False + #elif (isinstance(lhsnode, ast.Attribute) + # and isinstance(rhsnode, ast.Attribute)): + # klhs = self.get_attr_key(lhsnode) + # krhs = self.get_attr_key(rhsnode) + # valid = klhs != krhs + if not valid: + s = self.get_fragment(node.col_offset) + raise SyntaxError('Invalid comparison: %s' % s) + + lhsnode = node.left + lhs = self.evaluate(lhsnode) + result = True + for op, rhsnode in zip(node.ops, node.comparators): + sanity_check(lhsnode, rhsnode) + op = op.__class__.__name__.lower() + if op not in self.operators: + raise SyntaxError('unsupported operation: %r' % op) + rhs = self.evaluate(rhsnode) + result = self.operators[op](lhs, rhs) + if not result: + break + lhs = rhs + lhsnode = rhsnode + return result + + def do_expression(self, node): + return self.evaluate(node.body) + + def do_name(self, node): + valid = False + if node.id in self.context: + valid = True + result = self.context[node.id] + elif node.id in self.allowed_values: + valid = True + result = self.allowed_values[node.id] + if not valid: + raise SyntaxError('invalid expression: %s' % node.id) + return result + + def do_str(self, node): + return node.s + + +def interpret(marker, execution_context=None): + """ + Interpret a marker and return a result depending on environment. + + :param marker: The marker to interpret. + :type marker: str + :param execution_context: The context used for name lookup. + :type execution_context: mapping + """ + return Evaluator(execution_context).evaluate(marker.strip()) diff --git a/pip/_vendor/distlib/metadata.py b/pip/_vendor/distlib/metadata.py new file mode 100644 index 000000000..a8f79071e --- /dev/null +++ b/pip/_vendor/distlib/metadata.py @@ -0,0 +1,1013 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +"""Implementation of the Metadata for Python packages PEPs. + +Supports all metadata formats (1.0, 1.1, 1.2, and 2.0 experimental). +""" +from __future__ import unicode_literals + +import codecs +from email import message_from_file +import json +import logging +import re + + +from . import DistlibException, __version__ +from .compat import StringIO, string_types, text_type +from .markers import interpret +from .util import extract_by_key, get_extras +from .version import get_scheme, PEP426_VERSION_RE + +logger = logging.getLogger(__name__) + + +class MetadataMissingError(DistlibException): + """A required metadata is missing""" + + +class MetadataConflictError(DistlibException): + """Attempt to read or write metadata fields that are conflictual.""" + + +class MetadataUnrecognizedVersionError(DistlibException): + """Unknown metadata version number.""" + + +class MetadataInvalidError(DistlibException): + """A metadata value is invalid""" + +# public API of this module +__all__ = ['Metadata', 'PKG_INFO_ENCODING', 'PKG_INFO_PREFERRED_VERSION'] + +# Encoding used for the PKG-INFO files +PKG_INFO_ENCODING = 'utf-8' + +# preferred version. Hopefully will be changed +# to 1.2 once PEP 345 is supported everywhere +PKG_INFO_PREFERRED_VERSION = '1.1' + +_LINE_PREFIX = re.compile('\n \|') +_241_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', + 'Summary', 'Description', + 'Keywords', 'Home-page', 'Author', 'Author-email', + 'License') + +_314_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', + 'Supported-Platform', 'Summary', 'Description', + 'Keywords', 'Home-page', 'Author', 'Author-email', + 'License', 'Classifier', 'Download-URL', 'Obsoletes', + 'Provides', 'Requires') + +_314_MARKERS = ('Obsoletes', 'Provides', 'Requires', 'Classifier', + 'Download-URL') + +_345_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', + 'Supported-Platform', 'Summary', 'Description', + 'Keywords', 'Home-page', 'Author', 'Author-email', + 'Maintainer', 'Maintainer-email', 'License', + 'Classifier', 'Download-URL', 'Obsoletes-Dist', + 'Project-URL', 'Provides-Dist', 'Requires-Dist', + 'Requires-Python', 'Requires-External') + +_345_MARKERS = ('Provides-Dist', 'Requires-Dist', 'Requires-Python', + 'Obsoletes-Dist', 'Requires-External', 'Maintainer', + 'Maintainer-email', 'Project-URL') + +_426_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', + 'Supported-Platform', 'Summary', 'Description', + 'Keywords', 'Home-page', 'Author', 'Author-email', + 'Maintainer', 'Maintainer-email', 'License', + 'Classifier', 'Download-URL', 'Obsoletes-Dist', + 'Project-URL', 'Provides-Dist', 'Requires-Dist', + 'Requires-Python', 'Requires-External', 'Private-Version', + 'Obsoleted-By', 'Setup-Requires-Dist', 'Extension', + 'Provides-Extra') + +_426_MARKERS = ('Private-Version', 'Provides-Extra', 'Obsoleted-By', + 'Setup-Requires-Dist', 'Extension') + +_ALL_FIELDS = set() +_ALL_FIELDS.update(_241_FIELDS) +_ALL_FIELDS.update(_314_FIELDS) +_ALL_FIELDS.update(_345_FIELDS) +_ALL_FIELDS.update(_426_FIELDS) + +EXTRA_RE = re.compile(r'''extra\s*==\s*("([^"]+)"|'([^']+)')''') + + +def _version2fieldlist(version): + if version == '1.0': + return _241_FIELDS + elif version == '1.1': + return _314_FIELDS + elif version == '1.2': + return _345_FIELDS + elif version == '2.0': + return _426_FIELDS + raise MetadataUnrecognizedVersionError(version) + + +def _best_version(fields): + """Detect the best version depending on the fields used.""" + def _has_marker(keys, markers): + for marker in markers: + if marker in keys: + return True + return False + + keys = [] + for key, value in fields.items(): + if value in ([], 'UNKNOWN', None): + continue + keys.append(key) + + possible_versions = ['1.0', '1.1', '1.2', '2.0'] + + # first let's try to see if a field is not part of one of the version + for key in keys: + if key not in _241_FIELDS and '1.0' in possible_versions: + possible_versions.remove('1.0') + if key not in _314_FIELDS and '1.1' in possible_versions: + possible_versions.remove('1.1') + if key not in _345_FIELDS and '1.2' in possible_versions: + possible_versions.remove('1.2') + if key not in _426_FIELDS and '2.0' in possible_versions: + possible_versions.remove('2.0') + + # possible_version contains qualified versions + if len(possible_versions) == 1: + return possible_versions[0] # found ! + elif len(possible_versions) == 0: + raise MetadataConflictError('Unknown metadata set') + + # let's see if one unique marker is found + is_1_1 = '1.1' in possible_versions and _has_marker(keys, _314_MARKERS) + is_1_2 = '1.2' in possible_versions and _has_marker(keys, _345_MARKERS) + is_2_0 = '2.0' in possible_versions and _has_marker(keys, _426_MARKERS) + if int(is_1_1) + int(is_1_2) + int(is_2_0) > 1: + raise MetadataConflictError('You used incompatible 1.1/1.2/2.0 fields') + + # we have the choice, 1.0, or 1.2, or 2.0 + # - 1.0 has a broken Summary field but works with all tools + # - 1.1 is to avoid + # - 1.2 fixes Summary but has little adoption + # - 2.0 adds more features and is very new + if not is_1_1 and not is_1_2 and not is_2_0: + # we couldn't find any specific marker + if PKG_INFO_PREFERRED_VERSION in possible_versions: + return PKG_INFO_PREFERRED_VERSION + if is_1_1: + return '1.1' + if is_1_2: + return '1.2' + + return '2.0' + +_ATTR2FIELD = { + 'metadata_version': 'Metadata-Version', + 'name': 'Name', + 'version': 'Version', + 'platform': 'Platform', + 'supported_platform': 'Supported-Platform', + 'summary': 'Summary', + 'description': 'Description', + 'keywords': 'Keywords', + 'home_page': 'Home-page', + 'author': 'Author', + 'author_email': 'Author-email', + 'maintainer': 'Maintainer', + 'maintainer_email': 'Maintainer-email', + 'license': 'License', + 'classifier': 'Classifier', + 'download_url': 'Download-URL', + 'obsoletes_dist': 'Obsoletes-Dist', + 'provides_dist': 'Provides-Dist', + 'requires_dist': 'Requires-Dist', + 'setup_requires_dist': 'Setup-Requires-Dist', + 'requires_python': 'Requires-Python', + 'requires_external': 'Requires-External', + 'requires': 'Requires', + 'provides': 'Provides', + 'obsoletes': 'Obsoletes', + 'project_url': 'Project-URL', + 'private_version': 'Private-Version', + 'obsoleted_by': 'Obsoleted-By', + 'extension': 'Extension', + 'provides_extra': 'Provides-Extra', +} + +_PREDICATE_FIELDS = ('Requires-Dist', 'Obsoletes-Dist', 'Provides-Dist') +_VERSIONS_FIELDS = ('Requires-Python',) +_VERSION_FIELDS = ('Version',) +_LISTFIELDS = ('Platform', 'Classifier', 'Obsoletes', + 'Requires', 'Provides', 'Obsoletes-Dist', + 'Provides-Dist', 'Requires-Dist', 'Requires-External', + 'Project-URL', 'Supported-Platform', 'Setup-Requires-Dist', + 'Provides-Extra', 'Extension') +_LISTTUPLEFIELDS = ('Project-URL',) + +_ELEMENTSFIELD = ('Keywords',) + +_UNICODEFIELDS = ('Author', 'Maintainer', 'Summary', 'Description') + +_MISSING = object() + +_FILESAFE = re.compile('[^A-Za-z0-9.]+') + + +def _get_name_and_version(name, version, for_filename=False): + """Return the distribution name with version. + + If for_filename is true, return a filename-escaped form.""" + if for_filename: + # For both name and version any runs of non-alphanumeric or '.' + # characters are replaced with a single '-'. Additionally any + # spaces in the version string become '.' + name = _FILESAFE.sub('-', name) + version = _FILESAFE.sub('-', version.replace(' ', '.')) + return '%s-%s' % (name, version) + + +class LegacyMetadata(object): + """The legacy metadata of a release. + + Supports versions 1.0, 1.1 and 1.2 (auto-detected). You can + instantiate the class with one of these arguments (or none): + - *path*, the path to a metadata file + - *fileobj* give a file-like object with metadata as content + - *mapping* is a dict-like object + - *scheme* is a version scheme name + """ + # TODO document the mapping API and UNKNOWN default key + + def __init__(self, path=None, fileobj=None, mapping=None, + scheme='default'): + if [path, fileobj, mapping].count(None) < 2: + raise TypeError('path, fileobj and mapping are exclusive') + self._fields = {} + self.requires_files = [] + self._dependencies = None + self.scheme = scheme + if path is not None: + self.read(path) + elif fileobj is not None: + self.read_file(fileobj) + elif mapping is not None: + self.update(mapping) + self.set_metadata_version() + + def set_metadata_version(self): + self._fields['Metadata-Version'] = _best_version(self._fields) + + def _write_field(self, fileobj, name, value): + fileobj.write('%s: %s\n' % (name, value)) + + def __getitem__(self, name): + return self.get(name) + + def __setitem__(self, name, value): + return self.set(name, value) + + def __delitem__(self, name): + field_name = self._convert_name(name) + try: + del self._fields[field_name] + except KeyError: + raise KeyError(name) + + def __contains__(self, name): + return (name in self._fields or + self._convert_name(name) in self._fields) + + def _convert_name(self, name): + if name in _ALL_FIELDS: + return name + name = name.replace('-', '_').lower() + return _ATTR2FIELD.get(name, name) + + def _default_value(self, name): + if name in _LISTFIELDS or name in _ELEMENTSFIELD: + return [] + return 'UNKNOWN' + + def _remove_line_prefix(self, value): + return _LINE_PREFIX.sub('\n', value) + + def __getattr__(self, name): + if name in _ATTR2FIELD: + return self[name] + raise AttributeError(name) + + # + # Public API + # + +# dependencies = property(_get_dependencies, _set_dependencies) + + def get_fullname(self, filesafe=False): + """Return the distribution name with version. + + If filesafe is true, return a filename-escaped form.""" + return _get_name_and_version(self['Name'], self['Version'], filesafe) + + def is_field(self, name): + """return True if name is a valid metadata key""" + name = self._convert_name(name) + return name in _ALL_FIELDS + + def is_multi_field(self, name): + name = self._convert_name(name) + return name in _LISTFIELDS + + def read(self, filepath): + """Read the metadata values from a file path.""" + fp = codecs.open(filepath, 'r', encoding='utf-8') + try: + self.read_file(fp) + finally: + fp.close() + + def read_file(self, fileob): + """Read the metadata values from a file object.""" + msg = message_from_file(fileob) + self._fields['Metadata-Version'] = msg['metadata-version'] + + # When reading, get all the fields we can + for field in _ALL_FIELDS: + if field not in msg: + continue + if field in _LISTFIELDS: + # we can have multiple lines + values = msg.get_all(field) + if field in _LISTTUPLEFIELDS and values is not None: + values = [tuple(value.split(',')) for value in values] + self.set(field, values) + else: + # single line + value = msg[field] + if value is not None and value != 'UNKNOWN': + self.set(field, value) + self.set_metadata_version() + + def write(self, filepath, skip_unknown=False): + """Write the metadata fields to filepath.""" + fp = codecs.open(filepath, 'w', encoding='utf-8') + try: + self.write_file(fp, skip_unknown) + finally: + fp.close() + + def write_file(self, fileobject, skip_unknown=False): + """Write the PKG-INFO format data to a file object.""" + self.set_metadata_version() + + for field in _version2fieldlist(self['Metadata-Version']): + values = self.get(field) + if skip_unknown and values in ('UNKNOWN', [], ['UNKNOWN']): + continue + if field in _ELEMENTSFIELD: + self._write_field(fileobject, field, ','.join(values)) + continue + if field not in _LISTFIELDS: + if field == 'Description': + values = values.replace('\n', '\n |') + values = [values] + + if field in _LISTTUPLEFIELDS: + values = [','.join(value) for value in values] + + for value in values: + self._write_field(fileobject, field, value) + + def update(self, other=None, **kwargs): + """Set metadata values from the given iterable `other` and kwargs. + + Behavior is like `dict.update`: If `other` has a ``keys`` method, + they are looped over and ``self[key]`` is assigned ``other[key]``. + Else, ``other`` is an iterable of ``(key, value)`` iterables. + + Keys that don't match a metadata field or that have an empty value are + dropped. + """ + def _set(key, value): + if key in _ATTR2FIELD and value: + self.set(self._convert_name(key), value) + + if not other: + # other is None or empty container + pass + elif hasattr(other, 'keys'): + for k in other.keys(): + _set(k, other[k]) + else: + for k, v in other: + _set(k, v) + + if kwargs: + for k, v in kwargs.items(): + _set(k, v) + + def set(self, name, value): + """Control then set a metadata field.""" + name = self._convert_name(name) + + if ((name in _ELEMENTSFIELD or name == 'Platform') and + not isinstance(value, (list, tuple))): + if isinstance(value, string_types): + value = [v.strip() for v in value.split(',')] + else: + value = [] + elif (name in _LISTFIELDS and + not isinstance(value, (list, tuple))): + if isinstance(value, string_types): + value = [value] + else: + value = [] + + if logger.isEnabledFor(logging.WARNING): + project_name = self['Name'] + + scheme = get_scheme(self.scheme) + if name in _PREDICATE_FIELDS and value is not None: + for v in value: + # check that the values are valid + if not scheme.is_valid_matcher(v.split(';')[0]): + logger.warning( + '%r: %r is not valid (field %r)', + project_name, v, name) + # FIXME this rejects UNKNOWN, is that right? + elif name in _VERSIONS_FIELDS and value is not None: + if not scheme.is_valid_constraint_list(value): + logger.warning('%r: %r is not a valid version (field %r)', + project_name, value, name) + elif name in _VERSION_FIELDS and value is not None: + if not scheme.is_valid_version(value): + logger.warning('%r: %r is not a valid version (field %r)', + project_name, value, name) + + if name in _UNICODEFIELDS: + if name == 'Description': + value = self._remove_line_prefix(value) + + self._fields[name] = value + + def get(self, name, default=_MISSING): + """Get a metadata field.""" + name = self._convert_name(name) + if name not in self._fields: + if default is _MISSING: + default = self._default_value(name) + return default + if name in _UNICODEFIELDS: + value = self._fields[name] + return value + elif name in _LISTFIELDS: + value = self._fields[name] + if value is None: + return [] + res = [] + for val in value: + if name not in _LISTTUPLEFIELDS: + res.append(val) + else: + # That's for Project-URL + res.append((val[0], val[1])) + return res + + elif name in _ELEMENTSFIELD: + value = self._fields[name] + if isinstance(value, string_types): + return value.split(',') + return self._fields[name] + + def check(self, strict=False): + """Check if the metadata is compliant. If strict is True then raise if + no Name or Version are provided""" + self.set_metadata_version() + + # XXX should check the versions (if the file was loaded) + missing, warnings = [], [] + + for attr in ('Name', 'Version'): # required by PEP 345 + if attr not in self: + missing.append(attr) + + if strict and missing != []: + msg = 'missing required metadata: %s' % ', '.join(missing) + raise MetadataMissingError(msg) + + for attr in ('Home-page', 'Author'): + if attr not in self: + missing.append(attr) + + # checking metadata 1.2 (XXX needs to check 1.1, 1.0) + if self['Metadata-Version'] != '1.2': + return missing, warnings + + scheme = get_scheme(self.scheme) + + def are_valid_constraints(value): + for v in value: + if not scheme.is_valid_matcher(v.split(';')[0]): + return False + return True + + for fields, controller in ((_PREDICATE_FIELDS, are_valid_constraints), + (_VERSIONS_FIELDS, + scheme.is_valid_constraint_list), + (_VERSION_FIELDS, + scheme.is_valid_version)): + for field in fields: + value = self.get(field, None) + if value is not None and not controller(value): + warnings.append('Wrong value for %r: %s' % (field, value)) + + return missing, warnings + + def todict(self, skip_missing=False): + """Return fields as a dict. + + Field names will be converted to use the underscore-lowercase style + instead of hyphen-mixed case (i.e. home_page instead of Home-page). + """ + self.set_metadata_version() + + mapping_1_0 = ( + ('metadata_version', 'Metadata-Version'), + ('name', 'Name'), + ('version', 'Version'), + ('summary', 'Summary'), + ('home_page', 'Home-page'), + ('author', 'Author'), + ('author_email', 'Author-email'), + ('license', 'License'), + ('description', 'Description'), + ('keywords', 'Keywords'), + ('platform', 'Platform'), + ('classifier', 'Classifier'), + ('download_url', 'Download-URL'), + ) + + data = {} + for key, field_name in mapping_1_0: + if not skip_missing or field_name in self._fields: + data[key] = self[field_name] + + if self['Metadata-Version'] == '1.2': + mapping_1_2 = ( + ('requires_dist', 'Requires-Dist'), + ('requires_python', 'Requires-Python'), + ('requires_external', 'Requires-External'), + ('provides_dist', 'Provides-Dist'), + ('obsoletes_dist', 'Obsoletes-Dist'), + ('project_url', 'Project-URL'), + ('maintainer', 'Maintainer'), + ('maintainer_email', 'Maintainer-email'), + ) + for key, field_name in mapping_1_2: + if not skip_missing or field_name in self._fields: + if key != 'project_url': + data[key] = self[field_name] + else: + data[key] = [','.join(u) for u in self[field_name]] + + elif self['Metadata-Version'] == '1.1': + mapping_1_1 = ( + ('provides', 'Provides'), + ('requires', 'Requires'), + ('obsoletes', 'Obsoletes'), + ) + for key, field_name in mapping_1_1: + if not skip_missing or field_name in self._fields: + data[key] = self[field_name] + + return data + + def add_requirements(self, requirements): + if self['Metadata-Version'] == '1.1': + # we can't have 1.1 metadata *and* Setuptools requires + for field in ('Obsoletes', 'Requires', 'Provides'): + if field in self: + del self[field] + self['Requires-Dist'] += requirements + + # Mapping API + # TODO could add iter* variants + + def keys(self): + return list(_version2fieldlist(self['Metadata-Version'])) + + def __iter__(self): + for key in self.keys(): + yield key + + def values(self): + return [self[key] for key in self.keys()] + + def items(self): + return [(key, self[key]) for key in self.keys()] + + def __repr__(self): + return '<%s %s %s>' % (self.__class__.__name__, self.name, + self.version) + + +METADATA_FILENAME = 'pydist.json' + + +class Metadata(object): + """ + The metadata of a release. This implementation uses 2.0 (JSON) + metadata where possible. If not possible, it wraps a LegacyMetadata + instance which handles the key-value metadata format. + """ + + METADATA_VERSION_MATCHER = re.compile('^\d+(\.\d+)*$') + + NAME_MATCHER = re.compile('^[0-9A-Z]([0-9A-Z_.-]*[0-9A-Z])?$', re.I) + + VERSION_MATCHER = PEP426_VERSION_RE + + SUMMARY_MATCHER = re.compile('.{1,2047}') + + METADATA_VERSION = '2.0' + + GENERATOR = 'distlib (%s)' % __version__ + + MANDATORY_KEYS = { + 'name': (), + 'version': (), + 'summary': ('legacy',), + } + + INDEX_KEYS = 'name version license summary description' + + DEPENDENCY_KEYS = ('extras run_requires test_requires build_requires ' + 'dev_requires provides meta_requires obsoleted_by ' + 'supports_environments') + + SYNTAX_VALIDATORS = { + 'metadata_version': (METADATA_VERSION_MATCHER, ()), + 'name': (NAME_MATCHER, ('legacy',)), + 'version': (VERSION_MATCHER, ('legacy',)), + 'summary': (SUMMARY_MATCHER, ('legacy',)), + } + + __slots__ = ('_legacy', '_data', 'scheme') + + def __init__(self, path=None, fileobj=None, mapping=None, + scheme='default'): + if [path, fileobj, mapping].count(None) < 2: + raise TypeError('path, fileobj and mapping are exclusive') + self._legacy = None + self._data = None + self.scheme = scheme + #import pdb; pdb.set_trace() + if mapping is not None: + try: + self._validate_mapping(mapping, scheme) + self._data = mapping + except MetadataUnrecognizedVersionError: + self._legacy = LegacyMetadata(mapping=mapping, scheme=scheme) + self.validate() + else: + data = None + if path: + with open(path, 'rb') as f: + data = f.read() + elif fileobj: + data = fileobj.read() + if data is None: + # Initialised with no args - to be added + self._data = { + 'metadata_version': self.METADATA_VERSION, + 'generator': self.GENERATOR, + } + else: + if not isinstance(data, text_type): + data = data.decode('utf-8') + try: + self._data = json.loads(data) + self._validate_mapping(self._data, scheme) + except ValueError: + # Note: MetadataUnrecognizedVersionError does not + # inherit from ValueError (it's a DistlibException, + # which should not inherit from ValueError). + # The ValueError comes from the json.load - if that + # succeeds and we get a validation error, we want + # that to propagate + self._legacy = LegacyMetadata(fileobj=StringIO(data), + scheme=scheme) + self.validate() + + common_keys = set(('name', 'version', 'license', 'keywords', 'summary')) + + none_list = (None, list) + none_dict = (None, dict) + + mapped_keys = { + 'run_requires': ('Requires-Dist', list), + 'build_requires': ('Setup-Requires-Dist', list), + 'dev_requires': none_list, + 'test_requires': none_list, + 'meta_requires': none_list, + 'extras': ('Provides-Extra', list), + 'modules': none_list, + 'namespaces': none_list, + 'exports': none_dict, + 'commands': none_dict, + 'classifiers': ('Classifier', list), + 'source_url': ('Download-URL', None), + 'metadata_version': ('Metadata-Version', None), + } + + del none_list, none_dict + + def __getattribute__(self, key): + common = object.__getattribute__(self, 'common_keys') + mapped = object.__getattribute__(self, 'mapped_keys') + if key in mapped: + lk, maker = mapped[key] + if self._legacy: + if lk is None: + result = None if maker is None else maker() + else: + result = self._legacy.get(lk) + else: + value = None if maker is None else maker() + result = self._data.get(key, value) + elif key not in common: + result = object.__getattribute__(self, key) + elif self._legacy: + result = self._legacy.get(key) + else: + result = self._data.get(key) + return result + + def _validate_value(self, key, value, scheme=None): + if key in self.SYNTAX_VALIDATORS: + pattern, exclusions = self.SYNTAX_VALIDATORS[key] + if (scheme or self.scheme) not in exclusions: + m = pattern.match(value) + if not m: + raise MetadataInvalidError('%r is an invalid value for ' + 'the %r property' % (value, + key)) + + def __setattr__(self, key, value): + self._validate_value(key, value) + common = object.__getattribute__(self, 'common_keys') + mapped = object.__getattribute__(self, 'mapped_keys') + if key in mapped: + lk, _ = mapped[key] + if self._legacy: + if lk is None: + raise NotImplementedError + self._legacy[lk] = value + else: + self._data[key] = value + elif key not in common: + object.__setattr__(self, key, value) + else: + if key == 'keywords': + if isinstance(value, string_types): + value = value.strip() + if value: + value = value.split() + else: + value = [] + if self._legacy: + self._legacy[key] = value + else: + self._data[key] = value + + @property + def name_and_version(self): + return _get_name_and_version(self.name, self.version, True) + + @property + def provides(self): + if self._legacy: + result = self._legacy['Provides-Dist'] + else: + result = self._data.setdefault('provides', []) + s = '%s (%s)' % (self.name, self.version) + if s not in result: + result.append(s) + return result + + @provides.setter + def provides(self, value): + if self._legacy: + self._legacy['Provides-Dist'] = value + else: + self._data['provides'] = value + + def get_requirements(self, reqts, extras=None, env=None): + """ + Base method to get dependencies, given a set of extras + to satisfy and an optional environment context. + :param reqts: A list of sometimes-wanted dependencies, + perhaps dependent on extras and environment. + :param extras: A list of optional components being requested. + :param env: An optional environment for marker evaluation. + """ + if self._legacy: + result = reqts + else: + result = [] + extras = get_extras(extras or [], self.extras) + for d in reqts: + if 'extra' not in d and 'environment' not in d: + # unconditional + include = True + else: + if 'extra' not in d: + # Not extra-dependent - only environment-dependent + include = True + else: + include = d.get('extra') in extras + if include: + # Not excluded because of extras, check environment + marker = d.get('environment') + if marker: + include = interpret(marker, env) + if include: + result.extend(d['requires']) + for key in ('build', 'dev', 'test'): + e = ':%s:' % key + if e in extras: + extras.remove(e) + # A recursive call, but it should terminate since 'test' + # has been removed from the extras + reqts = self._data.get('%s_requires' % key, []) + result.extend(self.get_requirements(reqts, extras=extras, + env=env)) + return result + + @property + def dictionary(self): + if self._legacy: + return self._from_legacy() + return self._data + + @property + def dependencies(self): + if self._legacy: + raise NotImplementedError + else: + return extract_by_key(self._data, self.DEPENDENCY_KEYS) + + @dependencies.setter + def dependencies(self, value): + if self._legacy: + raise NotImplementedError + else: + self._data.update(value) + + def _validate_mapping(self, mapping, scheme): + if mapping.get('metadata_version') != self.METADATA_VERSION: + raise MetadataUnrecognizedVersionError() + missing = [] + for key, exclusions in self.MANDATORY_KEYS.items(): + if key not in mapping: + if scheme not in exclusions: + missing.append(key) + if missing: + msg = 'Missing metadata items: %s' % ', '.join(missing) + raise MetadataMissingError(msg) + for k, v in mapping.items(): + self._validate_value(k, v, scheme) + + def validate(self): + if self._legacy: + missing, warnings = self._legacy.check(True) + if missing or warnings: + logger.warning('Metadata: missing: %s, warnings: %s', + missing, warnings) + else: + self._validate_mapping(self._data, self.scheme) + + def todict(self): + if self._legacy: + return self._legacy.todict(True) + else: + result = extract_by_key(self._data, self.INDEX_KEYS) + return result + + def _from_legacy(self): + assert self._legacy and not self._data + result = { + 'metadata_version': self.METADATA_VERSION, + 'generator': self.GENERATOR, + } + lmd = self._legacy.todict(True) # skip missing ones + for k in ('name', 'version', 'license', 'summary', 'description', + 'classifier'): + if k in lmd: + if k == 'classifier': + nk = 'classifiers' + else: + nk = k + result[nk] = lmd[k] + kw = lmd.get('Keywords', []) + if kw == ['']: + kw = [] + result['keywords'] = kw + keys = (('requires_dist', 'run_requires'), + ('setup_requires_dist', 'build_requires')) + for ok, nk in keys: + if ok in lmd and lmd[ok]: + result[nk] = [{'requires': lmd[ok]}] + result['provides'] = self.provides + author = {} + maintainer = {} + return result + + LEGACY_MAPPING = { + 'name': 'Name', + 'version': 'Version', + 'license': 'License', + 'summary': 'Summary', + 'description': 'Description', + 'classifiers': 'Classifier', + } + + def _to_legacy(self): + def process_entries(entries): + reqts = set() + for e in entries: + extra = e.get('extra') + env = e.get('environment') + rlist = e['requires'] + for r in rlist: + if not env and not extra: + reqts.add(r) + else: + marker = '' + if extra: + marker = 'extra == "%s"' % extra + if env: + if marker: + marker = '(%s) and %s' % (env, marker) + else: + marker = env + reqts.add(';'.join((r, marker))) + return reqts + + assert self._data and not self._legacy + result = LegacyMetadata() + nmd = self._data + for nk, ok in self.LEGACY_MAPPING.items(): + if nk in nmd: + result[ok] = nmd[nk] + r1 = process_entries(self.run_requires + self.meta_requires) + r2 = process_entries(self.build_requires + self.dev_requires) + if self.extras: + result['Provides-Extra'] = sorted(self.extras) + result['Requires-Dist'] = sorted(r1) + result['Setup-Requires-Dist'] = sorted(r2) + # TODO: other fields such as contacts + return result + + def write(self, path=None, fileobj=None, legacy=False, skip_unknown=True): + if [path, fileobj].count(None) != 1: + raise ValueError('Exactly one of path and fileobj is needed') + self.validate() + if legacy: + if self._legacy: + legacy_md = self._legacy + else: + legacy_md = self._to_legacy() + if path: + legacy_md.write(path, skip_unknown=skip_unknown) + else: + legacy_md.write_file(fileobj, skip_unknown=skip_unknown) + else: + if self._legacy: + d = self._from_legacy() + else: + d = self._data + if fileobj: + json.dump(d, fileobj, ensure_ascii=True, indent=2, + sort_keys=True) + else: + with codecs.open(path, 'w', 'utf-8') as f: + json.dump(d, f, ensure_ascii=True, indent=2, + sort_keys=True) + + def add_requirements(self, requirements): + if self._legacy: + self._legacy.add_requirements(requirements) + else: + self._data.setdefault('run_requires', []).extend(requirements) + + def __repr__(self): + name = self.name or '(no name)' + version = self.version or 'no version' + return '<%s %s %s (%s)>' % (self.__class__.__name__, + self.metadata_version, name, version) diff --git a/pip/_vendor/distlib/resources.py b/pip/_vendor/distlib/resources.py new file mode 100644 index 000000000..d58b25a92 --- /dev/null +++ b/pip/_vendor/distlib/resources.py @@ -0,0 +1,343 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2013 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +from __future__ import unicode_literals + +import bisect +import io +import logging +import os +import pkgutil +import shutil +import sys +import types +import zipimport + +from . import DistlibException +from .util import cached_property, get_cache_base, path_to_cache_dir + +logger = logging.getLogger(__name__) + + +class Cache(object): + """ + A class implementing a cache for resources that need to live in the file system + e.g. shared libraries. + """ + + def __init__(self, base=None): + """ + Initialise an instance. + + :param base: The base directory where the cache should be located. If + not specified, this will be the ``resource-cache`` + directory under whatever :func:`get_cache_base` returns. + """ + if base is None: + base = os.path.join(get_cache_base(), 'resource-cache') + # we use 'isdir' instead of 'exists', because we want to + # fail if there's a file with that name + if not os.path.isdir(base): + os.makedirs(base) + self.base = os.path.abspath(os.path.normpath(base)) + + def prefix_to_dir(self, prefix): + """ + Converts a resource prefix to a directory name in the cache. + """ + return path_to_cache_dir(prefix) + + def is_stale(self, resource, path): + """ + Is the cache stale for the given resource? + + :param resource: The :class:`Resource` being cached. + :param path: The path of the resource in the cache. + :return: True if the cache is stale. + """ + # Cache invalidation is a hard problem :-) + return True + + def get(self, resource): + """ + Get a resource into the cache, + + :param resource: A :class:`Resource` instance. + :return: The pathname of the resource in the cache. + """ + prefix, path = resource.finder.get_cache_info(resource) + if prefix is None: + result = path + else: + result = os.path.join(self.base, self.prefix_to_dir(prefix), path) + dirname = os.path.dirname(result) + if not os.path.isdir(dirname): + os.makedirs(dirname) + if not os.path.exists(result): + stale = True + else: + stale = self.is_stale(resource, path) + if stale: + # write the bytes of the resource to the cache location + with open(result, 'wb') as f: + f.write(resource.bytes) + return result + + def clear(self): + """ + Clear the cache. + """ + not_removed = [] + for fn in os.listdir(self.base): + fn = os.path.join(self.base, fn) + try: + if os.path.islink(fn) or os.path.isfile(fn): + os.remove(fn) + elif os.path.isdir(fn): + shutil.rmtree(fn) + except Exception: + not_removed.append(fn) + return not_removed + +cache = Cache() + + +class ResourceBase(object): + def __init__(self, finder, name): + self.finder = finder + self.name = name + + +class Resource(ResourceBase): + """ + A class representing an in-package resource, such as a data file. This is + not normally instantiated by user code, but rather by a + :class:`ResourceFinder` which manages the resource. + """ + is_container = False # Backwards compatibility + + def as_stream(self): + """ + Get the resource as a stream. + + This is not a property to make it obvious that it returns a new stream + each time. + """ + return self.finder.get_stream(self) + + @cached_property + def file_path(self): + return cache.get(self) + + @cached_property + def bytes(self): + return self.finder.get_bytes(self) + + @cached_property + def size(self): + return self.finder.get_size(self) + + +class ResourceContainer(ResourceBase): + is_container = True # Backwards compatibility + + @cached_property + def resources(self): + return self.finder.get_resources(self) + + +class ResourceFinder(object): + """ + Resource finder for file system resources. + """ + def __init__(self, module): + self.module = module + self.loader = getattr(module, '__loader__', None) + self.base = os.path.dirname(getattr(module, '__file__', '')) + + def _make_path(self, resource_name): + parts = resource_name.split('/') + parts.insert(0, self.base) + return os.path.realpath(os.path.join(*parts)) + + def _find(self, path): + return os.path.exists(path) + + def get_cache_info(self, resource): + return None, resource.path + + def find(self, resource_name): + path = self._make_path(resource_name) + if not self._find(path): + result = None + else: + if self._is_directory(path): + result = ResourceContainer(self, resource_name) + else: + result = Resource(self, resource_name) + result.path = path + return result + + def get_stream(self, resource): + return open(resource.path, 'rb') + + def get_bytes(self, resource): + with open(resource.path, 'rb') as f: + return f.read() + + def get_size(self, resource): + return os.path.getsize(resource.path) + + def get_resources(self, resource): + def allowed(f): + return f != '__pycache__' and not f.endswith(('.pyc', '.pyo')) + return set([f for f in os.listdir(resource.path) if allowed(f)]) + + def is_container(self, resource): + return self._is_directory(resource.path) + + _is_directory = staticmethod(os.path.isdir) + + +class ZipResourceFinder(ResourceFinder): + """ + Resource finder for resources in .zip files. + """ + def __init__(self, module): + super(ZipResourceFinder, self).__init__(module) + archive = self.loader.archive + self.prefix_len = 1 + len(archive) + # PyPy doesn't have a _files attr on zipimporter, and you can't set one + if hasattr(self.loader, '_files'): + self._files = self.loader._files + else: + self._files = zipimport._zip_directory_cache[archive] + self.index = sorted(self._files) + + def _find(self, path): + path = path[self.prefix_len:] + if path in self._files: + result = True + else: + if path and path[-1] != os.sep: + path = path + os.sep + i = bisect.bisect(self.index, path) + try: + result = self.index[i].startswith(path) + except IndexError: + result = False + if not result: + logger.debug('_find failed: %r %r', path, self.loader.prefix) + else: + logger.debug('_find worked: %r %r', path, self.loader.prefix) + return result + + def get_cache_info(self, resource): + prefix = self.loader.archive + path = resource.path[1 + len(prefix):] + return prefix, path + + def get_bytes(self, resource): + return self.loader.get_data(resource.path) + + def get_stream(self, resource): + return io.BytesIO(self.get_bytes(resource)) + + def get_size(self, resource): + path = resource.path[self.prefix_len:] + return self._files[path][3] + + def get_resources(self, resource): + path = resource.path[self.prefix_len:] + if path and path[-1] != os.sep: + path += os.sep + plen = len(path) + result = set() + i = bisect.bisect(self.index, path) + while i < len(self.index): + if not self.index[i].startswith(path): + break + s = self.index[i][plen:] + result.add(s.split(os.sep, 1)[0]) # only immediate children + i += 1 + return result + + def _is_directory(self, path): + path = path[self.prefix_len:] + if path and path[-1] != os.sep: + path += os.sep + i = bisect.bisect(self.index, path) + try: + result = self.index[i].startswith(path) + except IndexError: + result = False + return result + +_finder_registry = { + type(None): ResourceFinder, + zipimport.zipimporter: ZipResourceFinder +} + +try: + import _frozen_importlib + _finder_registry[_frozen_importlib.SourceFileLoader] = ResourceFinder + _finder_registry[_frozen_importlib.FileFinder] = ResourceFinder +except (ImportError, AttributeError): + pass + + +def register_finder(loader, finder_maker): + _finder_registry[type(loader)] = finder_maker + +_finder_cache = {} + + +def finder(package): + """ + Return a resource finder for a package. + :param package: The name of the package. + :return: A :class:`ResourceFinder` instance for the package. + """ + if package in _finder_cache: + result = _finder_cache[package] + else: + if package not in sys.modules: + __import__(package) + module = sys.modules[package] + path = getattr(module, '__path__', None) + if path is None: + raise DistlibException('You cannot get a finder for a module, ' + 'only for a package') + loader = getattr(module, '__loader__', None) + finder_maker = _finder_registry.get(type(loader)) + if finder_maker is None: + raise DistlibException('Unable to locate finder for %r' % package) + result = finder_maker(module) + _finder_cache[package] = result + return result + + +_dummy_module = types.ModuleType(str('__dummy__')) + + +def finder_for_path(path): + """ + Return a resource finder for a path, which should represent a container. + + :param path: The path. + :return: A :class:`ResourceFinder` instance for the path. + """ + result = None + # calls any path hooks, gets importer into cache + pkgutil.get_importer(path) + loader = sys.path_importer_cache.get(path) + finder = _finder_registry.get(type(loader)) + if finder: + module = _dummy_module + module.__file__ = os.path.join(path, '') + module.__loader__ = loader + result = finder(module) + return result diff --git a/pip/_vendor/distlib/scripts.py b/pip/_vendor/distlib/scripts.py new file mode 100644 index 000000000..39537935a --- /dev/null +++ b/pip/_vendor/distlib/scripts.py @@ -0,0 +1,312 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2013 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +from io import BytesIO +import logging +import os +import re +import struct +import sys + +from .compat import sysconfig, fsencode, detect_encoding, ZipFile +from .resources import finder +from .util import FileOperator, get_export_entry, convert_path, get_executable + +logger = logging.getLogger(__name__) + +_DEFAULT_MANIFEST = ''' + + + + + + + + + + + + +'''.strip() + +# check if Python is called on the first line with this expression +FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$') +SCRIPT_TEMPLATE = '''# -*- coding: utf-8 -*- +if __name__ == '__main__': + import sys, re + + def _resolve(module, func): + __import__(module) + mod = sys.modules[module] + parts = func.split('.') + result = getattr(mod, parts.pop(0)) + for p in parts: + result = getattr(result, p) + return result + + try: + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + + func = _resolve('%(module)s', '%(func)s') + rc = func() # None interpreted as 0 + except Exception as e: # only supporting Python >= 2.6 + sys.stderr.write('%%s\\n' %% e) + rc = 1 + sys.exit(rc) +''' + + +class ScriptMaker(object): + """ + A class to copy or create scripts from source scripts or callable + specifications. + """ + script_template = SCRIPT_TEMPLATE + + executable = None # for shebangs + + def __init__(self, source_dir, target_dir, add_launchers=True, + dry_run=False, fileop=None): + self.source_dir = source_dir + self.target_dir = target_dir + self.add_launchers = add_launchers + self.force = False + self.clobber = False + self.set_mode = False + self.variants = set(('', 'X.Y')) + self._fileop = fileop or FileOperator(dry_run) + + def _get_alternate_executable(self, executable, options): + if options.get('gui', False) and os.name == 'nt': + dn, fn = os.path.split(executable) + fn = fn.replace('python', 'pythonw') + executable = os.path.join(dn, fn) + return executable + + def _get_shebang(self, encoding, post_interp=b'', options=None): + if self.executable: + executable = self.executable + elif not sysconfig.is_python_build(): + executable = get_executable() + elif hasattr(sys, 'base_prefix') and sys.prefix != sys.base_prefix: + executable = os.path.join(sysconfig.get_path('scripts'), + 'python%s' % sysconfig.get_config_var('EXE')) + else: + executable = os.path.join( + sysconfig.get_config_var('BINDIR'), + 'python%s%s' % (sysconfig.get_config_var('VERSION'), + sysconfig.get_config_var('EXE'))) + if options: + executable = self._get_alternate_executable(executable, options) + + executable = fsencode(executable) + shebang = b'#!' + executable + post_interp + b'\n' + # Python parser starts to read a script using UTF-8 until + # it gets a #coding:xxx cookie. The shebang has to be the + # first line of a file, the #coding:xxx cookie cannot be + # written before. So the shebang has to be decodable from + # UTF-8. + try: + shebang.decode('utf-8') + except UnicodeDecodeError: + raise ValueError( + 'The shebang (%r) is not decodable from utf-8' % shebang) + # If the script is encoded to a custom encoding (use a + # #coding:xxx cookie), the shebang has to be decodable from + # the script encoding too. + if encoding != 'utf-8': + try: + shebang.decode(encoding) + except UnicodeDecodeError: + raise ValueError( + 'The shebang (%r) is not decodable ' + 'from the script encoding (%r)' % (shebang, encoding)) + return shebang + + def _get_script_text(self, entry): + return self.script_template % dict(module=entry.prefix, + func=entry.suffix) + + manifest = _DEFAULT_MANIFEST + + def get_manifest(self, exename): + base = os.path.basename(exename) + return self.manifest % base + + def _write_script(self, names, shebang, script_bytes, filenames, ext): + use_launcher = self.add_launchers and os.name == 'nt' + linesep = os.linesep.encode('utf-8') + if not use_launcher: + script_bytes = shebang + linesep + script_bytes + else: + if ext == 'py': + launcher = self._get_launcher('t') + else: + launcher = self._get_launcher('w') + stream = BytesIO() + with ZipFile(stream, 'w') as zf: + zf.writestr('__main__.py', script_bytes) + zip_data = stream.getvalue() + script_bytes = launcher + shebang + linesep + zip_data + for name in names: + outname = os.path.join(self.target_dir, name) + if use_launcher: + n, e = os.path.splitext(outname) + if e.startswith('.py'): + outname = n + outname = '%s.exe' % outname + try: + self._fileop.write_binary_file(outname, script_bytes) + except Exception: + # Failed writing an executable - it might be in use. + logger.warning('Failed to write executable - trying to ' + 'use .deleteme logic') + dfname = '%s.deleteme' % outname + if os.path.exists(dfname): + os.remove(dfname) # Not allowed to fail here + os.rename(outname, dfname) # nor here + self._fileop.write_binary_file(outname, script_bytes) + logger.debug('Able to replace executable using ' + '.deleteme logic') + try: + os.remove(dfname) + except Exception: + pass # still in use - ignore error + else: + if os.name == 'nt' and not outname.endswith('.' + ext): + outname = '%s.%s' % (outname, ext) + if os.path.exists(outname) and not self.clobber: + logger.warning('Skipping existing file %s', outname) + continue + self._fileop.write_binary_file(outname, script_bytes) + if self.set_mode: + self._fileop.set_executable_mode([outname]) + filenames.append(outname) + + def _make_script(self, entry, filenames, options=None): + shebang = self._get_shebang('utf-8', options=options) + script = self._get_script_text(entry).encode('utf-8') + name = entry.name + scriptnames = set() + if '' in self.variants: + scriptnames.add(name) + if 'X' in self.variants: + scriptnames.add('%s%s' % (name, sys.version[0])) + if 'X.Y' in self.variants: + scriptnames.add('%s-%s' % (name, sys.version[:3])) + if options and options.get('gui', False): + ext = 'pyw' + else: + ext = 'py' + self._write_script(scriptnames, shebang, script, filenames, ext) + + def _copy_script(self, script, filenames): + adjust = False + script = os.path.join(self.source_dir, convert_path(script)) + outname = os.path.join(self.target_dir, os.path.basename(script)) + if not self.force and not self._fileop.newer(script, outname): + logger.debug('not copying %s (up-to-date)', script) + return + + # Always open the file, but ignore failures in dry-run mode -- + # that way, we'll get accurate feedback if we can read the + # script. + try: + f = open(script, 'rb') + except IOError: + if not self.dry_run: + raise + f = None + else: + encoding, lines = detect_encoding(f.readline) + f.seek(0) + first_line = f.readline() + if not first_line: + logger.warning('%s: %s is an empty file (skipping)', + self.get_command_name(), script) + return + + match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n')) + if match: + adjust = True + post_interp = match.group(1) or b'' + + if not adjust: + if f: + f.close() + self._fileop.copy_file(script, outname) + if self.set_mode: + self._fileop.set_executable_mode([outname]) + filenames.append(outname) + else: + logger.info('copying and adjusting %s -> %s', script, + self.target_dir) + if not self._fileop.dry_run: + shebang = self._get_shebang(encoding, post_interp) + if b'pythonw' in first_line: + ext = 'pyw' + else: + ext = 'py' + n = os.path.basename(outname) + self._write_script([n], shebang, f.read(), filenames, ext) + if f: + f.close() + + @property + def dry_run(self): + return self._fileop.dry_run + + @dry_run.setter + def dry_run(self, value): + self._fileop.dry_run = value + + if os.name == 'nt': + # Executable launcher support. + # Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/ + + def _get_launcher(self, kind): + if struct.calcsize('P') == 8: # 64-bit + bits = '64' + else: + bits = '32' + name = '%s%s.exe' % (kind, bits) + result = finder('distlib').find(name).bytes + return result + + # Public API follows + + def make(self, specification, options=None): + """ + Make a script. + + :param specification: The specification, which is either a valid export + entry specification (to make a script from a + callable) or a filename (to make a script by + copying from a source location). + :param options: A dictionary of options controlling script generation. + :return: A list of all absolute pathnames written to. + """ + filenames = [] + entry = get_export_entry(specification) + if entry is None: + self._copy_script(specification, filenames) + else: + self._make_script(entry, filenames, options=options) + return filenames + + def make_multiple(self, specifications, options=None): + """ + Take a list of specifications and make scripts from them, + :param specifications: A list of specifications. + :return: A list of all absolute pathnames written to, + """ + filenames = [] + for specification in specifications: + filenames.extend(self.make(specification, options)) + return filenames diff --git a/pip/_vendor/distlib/t32.exe b/pip/_vendor/distlib/t32.exe new file mode 100644 index 000000000..a92fdc746 Binary files /dev/null and b/pip/_vendor/distlib/t32.exe differ diff --git a/pip/_vendor/distlib/t64.exe b/pip/_vendor/distlib/t64.exe new file mode 100644 index 000000000..0dc55de1a Binary files /dev/null and b/pip/_vendor/distlib/t64.exe differ diff --git a/pip/_vendor/distlib/util.py b/pip/_vendor/distlib/util.py new file mode 100644 index 000000000..0ec178f8e --- /dev/null +++ b/pip/_vendor/distlib/util.py @@ -0,0 +1,1517 @@ +# +# Copyright (C) 2012-2013 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +import codecs +from collections import deque +import contextlib +import csv +from glob import iglob as std_iglob +import io +import json +import logging +import os +import py_compile +import re +import shutil +import socket +import ssl +import subprocess +import sys +import tarfile +import tempfile +import threading +import time + +from . import DistlibException +from .compat import (string_types, text_type, shutil, raw_input, StringIO, + cache_from_source, urlopen, httplib, xmlrpclib, splittype, + HTTPHandler, HTTPSHandler as BaseHTTPSHandler, + BaseConfigurator, valid_ident, Container, configparser, + URLError, match_hostname, CertificateError, ZipFile) + +logger = logging.getLogger(__name__) + +# +# Requirement parsing code for name + optional constraints + optional extras +# +# e.g. 'foo >= 1.2, < 2.0 [bar, baz]' +# +# The regex can seem a bit hairy, so we build it up out of smaller pieces +# which are manageable. +# + +COMMA = r'\s*,\s*' +COMMA_RE = re.compile(COMMA) + +IDENT = r'(\w|[.-])+' +EXTRA_IDENT = r'(\*|:(\*|\w+):|' + IDENT + ')' +VERSPEC = IDENT + r'\*?' + +RELOP = '([<>=!~]=)|[<>]' + +# +# The first relop is optional - if absent, will be taken as '~=' +# +BARE_CONSTRAINTS = ('(' + RELOP + r')?\s*(' + VERSPEC + ')(' + COMMA + '(' + + RELOP + r')\s*(' + VERSPEC + '))*') + +DIRECT_REF = '(from\s+(?P.*))' + +# +# Either the bare constraints or the bare constraints in parentheses +# +CONSTRAINTS = (r'\(\s*(?P' + BARE_CONSTRAINTS + '|' + DIRECT_REF + + r')\s*\)|(?P' + BARE_CONSTRAINTS + '\s*)') + +EXTRA_LIST = EXTRA_IDENT + '(' + COMMA + EXTRA_IDENT + ')*' +EXTRAS = r'\[\s*(?P' + EXTRA_LIST + r')?\s*\]' +REQUIREMENT = ('(?P' + IDENT + r')\s*(' + EXTRAS + r'\s*)?(\s*' + + CONSTRAINTS + ')?$') +REQUIREMENT_RE = re.compile(REQUIREMENT) + +# +# Used to scan through the constraints +# +RELOP_IDENT = '(?P' + RELOP + r')\s*(?P' + VERSPEC + ')' +RELOP_IDENT_RE = re.compile(RELOP_IDENT) + +def parse_requirement(s): + + def get_constraint(m): + d = m.groupdict() + return d['op'], d['vn'] + + result = None + m = REQUIREMENT_RE.match(s) + if m: + d = m.groupdict() + name = d['dn'] + cons = d['c1'] or d['c2'] + if not d['diref']: + url = None + else: + # direct reference + cons = None + url = d['diref'].strip() + if not cons: + cons = None + constr = '' + rs = d['dn'] + else: + if cons[0] not in '<>!=': + cons = '~=' + cons + iterator = RELOP_IDENT_RE.finditer(cons) + cons = [get_constraint(m) for m in iterator] + rs = '%s (%s)' % (name, ', '.join(['%s %s' % con for con in cons])) + if not d['ex']: + extras = None + else: + extras = COMMA_RE.split(d['ex']) + result = Container(name=name, constraints=cons, extras=extras, + requirement=rs, source=s, url=url) + return result + + +def get_resources_dests(resources_root, rules): + """Find destinations for resources files""" + + def get_rel_path(base, path): + # normalizes and returns a lstripped-/-separated path + base = base.replace(os.path.sep, '/') + path = path.replace(os.path.sep, '/') + assert path.startswith(base) + return path[len(base):].lstrip('/') + + + destinations = {} + for base, suffix, dest in rules: + prefix = os.path.join(resources_root, base) + for abs_base in iglob(prefix): + abs_glob = os.path.join(abs_base, suffix) + for abs_path in iglob(abs_glob): + resource_file = get_rel_path(resources_root, abs_path) + if dest is None: # remove the entry if it was here + destinations.pop(resource_file, None) + else: + rel_path = get_rel_path(abs_base, abs_path) + rel_dest = dest.replace(os.path.sep, '/').rstrip('/') + destinations[resource_file] = rel_dest + '/' + rel_path + return destinations + + +def in_venv(): + if hasattr(sys, 'real_prefix'): + # virtualenv venvs + result = True + else: + # PEP 405 venvs + result = sys.prefix != getattr(sys, 'base_prefix', sys.prefix) + return result + + +def get_executable(): + if sys.platform == 'darwin' and ('__VENV_LAUNCHER__' + in os.environ): + result = os.environ['__VENV_LAUNCHER__'] + else: + result = sys.executable + return result + + +def proceed(prompt, allowed_chars, error_prompt=None, default=None): + p = prompt + while True: + s = raw_input(p) + p = prompt + if not s and default: + s = default + if s: + c = s[0].lower() + if c in allowed_chars: + break + if error_prompt: + p = '%c: %s\n%s' % (c, error_prompt, prompt) + return c + + +def extract_by_key(d, keys): + if isinstance(keys, string_types): + keys = keys.split() + result = {} + for key in keys: + if key in d: + result[key] = d[key] + return result + +def read_exports(stream): + if sys.version_info[0] >= 3: + # needs to be a text stream + stream = codecs.getreader('utf-8')(stream) + # Try to load as JSON, falling back on legacy format + data = stream.read() + stream = StringIO(data) + try: + data = json.load(stream) + result = data['exports'] + for group, entries in result.items(): + for k, v in entries.items(): + s = '%s = %s' % (k, v) + entry = get_export_entry(s) + assert entry is not None + entries[k] = entry + return result + except Exception: + stream.seek(0, 0) + cp = configparser.ConfigParser() + if hasattr(cp, 'read_file'): + cp.read_file(stream) + else: + cp.readfp(stream) + result = {} + for key in cp.sections(): + result[key] = entries = {} + for name, value in cp.items(key): + s = '%s = %s' % (name, value) + entry = get_export_entry(s) + assert entry is not None + #entry.dist = self + entries[name] = entry + return result + + +def write_exports(exports, stream): + if sys.version_info[0] >= 3: + # needs to be a text stream + stream = codecs.getwriter('utf-8')(stream) + cp = configparser.ConfigParser() + for k, v in exports.items(): + # TODO check k, v for valid values + cp.add_section(k) + for entry in v.values(): + if entry.suffix is None: + s = entry.prefix + else: + s = '%s:%s' % (entry.prefix, entry.suffix) + if entry.flags: + s = '%s [%s]' % (s, ', '.join(entry.flags)) + cp.set(k, entry.name, s) + cp.write(stream) + + +@contextlib.contextmanager +def tempdir(): + td = tempfile.mkdtemp() + try: + yield td + finally: + shutil.rmtree(td) + +@contextlib.contextmanager +def chdir(d): + cwd = os.getcwd() + try: + os.chdir(d) + yield + finally: + os.chdir(cwd) + + +@contextlib.contextmanager +def socket_timeout(seconds=15): + cto = socket.getdefaulttimeout() + try: + socket.setdefaulttimeout(seconds) + yield + finally: + socket.setdefaulttimeout(cto) + + +class cached_property(object): + def __init__(self, func): + self.func = func + #for attr in ('__name__', '__module__', '__doc__'): + # setattr(self, attr, getattr(func, attr, None)) + + def __get__(self, obj, cls=None): + if obj is None: + return self + value = self.func(obj) + object.__setattr__(obj, self.func.__name__, value) + #obj.__dict__[self.func.__name__] = value = self.func(obj) + return value + +def convert_path(pathname): + """Return 'pathname' as a name that will work on the native filesystem. + + The path is split on '/' and put back together again using the current + directory separator. Needed because filenames in the setup script are + always supplied in Unix style, and have to be converted to the local + convention before we can actually use them in the filesystem. Raises + ValueError on non-Unix-ish systems if 'pathname' either starts or + ends with a slash. + """ + if os.sep == '/': + return pathname + if not pathname: + return pathname + if pathname[0] == '/': + raise ValueError("path '%s' cannot be absolute" % pathname) + if pathname[-1] == '/': + raise ValueError("path '%s' cannot end with '/'" % pathname) + + paths = pathname.split('/') + while os.curdir in paths: + paths.remove(os.curdir) + if not paths: + return os.curdir + return os.path.join(*paths) + + +class FileOperator(object): + def __init__(self, dry_run=False): + self.dry_run = dry_run + self.ensured = set() + self._init_record() + + def _init_record(self): + self.record = False + self.files_written = set() + self.dirs_created = set() + + def record_as_written(self, path): + if self.record: + self.files_written.add(path) + + def newer(self, source, target): + """Tell if the target is newer than the source. + + Returns true if 'source' exists and is more recently modified than + 'target', or if 'source' exists and 'target' doesn't. + + Returns false if both exist and 'target' is the same age or younger + than 'source'. Raise PackagingFileError if 'source' does not exist. + + Note that this test is not very accurate: files created in the same + second will have the same "age". + """ + if not os.path.exists(source): + raise DistlibException("file '%r' does not exist" % + os.path.abspath(source)) + if not os.path.exists(target): + return True + + return os.stat(source).st_mtime > os.stat(target).st_mtime + + def copy_file(self, infile, outfile, check=True): + """Copy a file respecting dry-run and force flags. + """ + self.ensure_dir(os.path.dirname(outfile)) + logger.info('Copying %s to %s', infile, outfile) + if not self.dry_run: + msg = None + if check: + if os.path.islink(outfile): + msg = '%s is a symlink' % outfile + elif os.path.exists(outfile) and not os.path.isfile(outfile): + msg = '%s is a non-regular file' % outfile + if msg: + raise ValueError(msg + ' which would be overwritten') + shutil.copyfile(infile, outfile) + self.record_as_written(outfile) + + def copy_stream(self, instream, outfile, encoding=None): + assert not os.path.isdir(outfile) + self.ensure_dir(os.path.dirname(outfile)) + logger.info('Copying stream %s to %s', instream, outfile) + if not self.dry_run: + if encoding is None: + outstream = open(outfile, 'wb') + else: + outstream = codecs.open(outfile, 'w', encoding=encoding) + try: + shutil.copyfileobj(instream, outstream) + finally: + outstream.close() + self.record_as_written(outfile) + + def write_binary_file(self, path, data): + self.ensure_dir(os.path.dirname(path)) + if not self.dry_run: + with open(path, 'wb') as f: + f.write(data) + self.record_as_written(path) + + def write_text_file(self, path, data, encoding): + self.ensure_dir(os.path.dirname(path)) + if not self.dry_run: + with open(path, 'wb') as f: + f.write(data.encode(encoding)) + self.record_as_written(path) + + def set_mode(self, bits, mask, files): + if os.name == 'posix': + # Set the executable bits (owner, group, and world) on + # all the files specified. + for f in files: + if self.dry_run: + logger.info("changing mode of %s", f) + else: + mode = (os.stat(f).st_mode | bits) & mask + logger.info("changing mode of %s to %o", f, mode) + os.chmod(f, mode) + + set_executable_mode = lambda s, f: s.set_mode(0o555, 0o7777, f) + + def ensure_dir(self, path): + path = os.path.abspath(path) + if path not in self.ensured and not os.path.exists(path): + self.ensured.add(path) + d, f = os.path.split(path) + self.ensure_dir(d) + logger.info('Creating %s' % path) + if not self.dry_run: + os.mkdir(path) + if self.record: + self.dirs_created.add(path) + + def byte_compile(self, path, optimize=False, force=False, prefix=None): + dpath = cache_from_source(path, not optimize) + logger.info('Byte-compiling %s to %s', path, dpath) + if not self.dry_run: + if force or self.newer(path, dpath): + if not prefix: + diagpath = None + else: + assert path.startswith(prefix) + diagpath = path[len(prefix):] + py_compile.compile(path, dpath, diagpath, True) # raise error + self.record_as_written(dpath) + return dpath + + def ensure_removed(self, path): + if os.path.exists(path): + if os.path.isdir(path) and not os.path.islink(path): + logger.debug('Removing directory tree at %s', path) + if not self.dry_run: + shutil.rmtree(path) + if self.record: + if path in self.dirs_created: + self.dirs_created.remove(path) + else: + if os.path.islink(path): + s = 'link' + else: + s = 'file' + logger.debug('Removing %s %s', s, path) + if not self.dry_run: + os.remove(path) + if self.record: + if path in self.files_written: + self.files_written.remove(path) + + def is_writable(self, path): + result = False + while not result: + if os.path.exists(path): + result = os.access(path, os.W_OK) + break + parent = os.path.dirname(path) + if parent == path: + break + path = parent + return result + + def commit(self): + """ + Commit recorded changes, turn off recording, return + changes. + """ + assert self.record + result = self.files_written, self.dirs_created + self._init_record() + return result + + def rollback(self): + if not self.dry_run: + for f in list(self.files_written): + if os.path.exists(f): + os.remove(f) + # dirs should all be empty now, except perhaps for + # __pycache__ subdirs + # reverse so that subdirs appear before their parents + dirs = sorted(self.dirs_created, reverse=True) + for d in dirs: + flist = os.listdir(d) + if flist: + assert flist == ['__pycache__'] + sd = os.path.join(d, flist[0]) + os.rmdir(sd) + os.rmdir(d) # should fail if non-empty + self._init_record() + +def resolve(module_name, dotted_path): + if module_name in sys.modules: + mod = sys.modules[module_name] + else: + mod = __import__(module_name) + if dotted_path is None: + result = mod + else: + parts = dotted_path.split('.') + result = getattr(mod, parts.pop(0)) + for p in parts: + result = getattr(result, p) + return result + + +class ExportEntry(object): + def __init__(self, name, prefix, suffix, flags): + self.name = name + self.prefix = prefix + self.suffix = suffix + self.flags = flags + + @cached_property + def value(self): + return resolve(self.prefix, self.suffix) + + def __repr__(self): + return '' % (self.name, self.prefix, + self.suffix, self.flags) + + def __eq__(self, other): + if not isinstance(other, ExportEntry): + result = False + else: + result = (self.name == other.name and + self.prefix == other.prefix and + self.suffix == other.suffix and + self.flags == other.flags) + return result + + __hash__ = object.__hash__ + + +ENTRY_RE = re.compile(r'''(?P(\w|[-.])+) + \s*=\s*(?P(\w+)([:\.]\w+)*) + \s*(\[\s*(?P\w+(=\w+)?(,\s*\w+(=\w+)?)*)\s*\])? + ''', re.VERBOSE) + + +def get_export_entry(specification): + m = ENTRY_RE.search(specification) + if not m: + result = None + if '[' in specification or ']' in specification: + raise DistlibException('Invalid specification ' + '%r' % specification) + else: + d = m.groupdict() + name = d['name'] + path = d['callable'] + colons = path.count(':') + if colons == 0: + prefix, suffix = path, None + else: + if colons != 1: + raise DistlibException('Invalid specification ' + '%r' % specification) + prefix, suffix = path.split(':') + flags = d['flags'] + if flags is None: + if '[' in specification or ']' in specification: + raise DistlibException('Invalid specification ' + '%r' % specification) + flags = [] + else: + flags = [f.strip() for f in flags.split(',')] + result = ExportEntry(name, prefix, suffix, flags) + return result + + +def get_cache_base(suffix=None): + """ + Return the default base location for distlib caches. If the directory does + not exist, it is created. Use the suffix provided for the base directory, + and default to '.distlib' if it isn't provided. + + On Windows, if LOCALAPPDATA is defined in the environment, then it is + assumed to be a directory, and will be the parent directory of the result. + On POSIX, and on Windows if LOCALAPPDATA is not defined, the user's home + directory - using os.expanduser('~') - will be the parent directory of + the result. + + The result is just the directory '.distlib' in the parent directory as + determined above, or with the name specified with ``suffix``. + """ + if suffix is None: + suffix = '.distlib' + if os.name == 'nt' and 'LOCALAPPDATA' in os.environ: + result = os.path.expandvars('$localappdata') + else: + # Assume posix, or old Windows + result = os.path.expanduser('~') + result = os.path.join(result, suffix) + # we use 'isdir' instead of 'exists', because we want to + # fail if there's a file with that name + if not os.path.isdir(result): + os.makedirs(result) + return result + + +def path_to_cache_dir(path): + """ + Convert an absolute path to a directory name for use in a cache. + + The algorithm used is: + + #. On Windows, any ``':'`` in the drive is replaced with ``'---'``. + #. Any occurrence of ``os.sep`` is replaced with ``'--'``. + #. ``'.cache'`` is appended. + """ + d, p = os.path.splitdrive(os.path.abspath(path)) + if d: + d = d.replace(':', '---') + p = p.replace(os.sep, '--') + return d + p + '.cache' + + +def ensure_slash(s): + if not s.endswith('/'): + return s + '/' + return s + + +def parse_credentials(netloc): + username = password = None + if '@' in netloc: + prefix, netloc = netloc.split('@', 1) + if ':' not in prefix: + username = prefix + else: + username, password = prefix.split(':', 1) + return username, password, netloc + + +def get_process_umask(): + result = os.umask(0o22) + os.umask(result) + return result + +def is_string_sequence(seq): + result = True + i = None + for i, s in enumerate(seq): + if not isinstance(s, string_types): + result = False + break + assert i is not None + return result + +PROJECT_NAME_AND_VERSION = re.compile('([a-z0-9_]+([.-][a-z_][a-z0-9_]*)*)-' + '([a-z0-9_.+-]+)', re.I) +PYTHON_VERSION = re.compile(r'-py(\d\.?\d?)') + + +def split_filename(filename, project_name=None): + """ + Extract name, version, python version from a filename (no extension) + + Return name, version, pyver or None + """ + result = None + pyver = None + m = PYTHON_VERSION.search(filename) + if m: + pyver = m.group(1) + filename = filename[:m.start()] + if project_name and len(filename) > len(project_name) + 1: + m = re.match(re.escape(project_name) + r'\b', filename) + if m: + n = m.end() + result = filename[:n], filename[n + 1:], pyver + if result is None: + m = PROJECT_NAME_AND_VERSION.match(filename) + if m: + result = m.group(1), m.group(3), pyver + return result + +# Allow spaces in name because of legacy dists like "Twisted Core" +NAME_VERSION_RE = re.compile(r'(?P[\w .-]+)\s*' + r'\(\s*(?P[^\s)]+)\)$') + +def parse_name_and_version(p): + """ + A utility method used to get name and version from a string. + + From e.g. a Provides-Dist value. + + :param p: A value in a form 'foo (1.0)' + :return: The name and version as a tuple. + """ + m = NAME_VERSION_RE.match(p) + if not m: + raise DistlibException('Ill-formed name/version string: \'%s\'' % p) + d = m.groupdict() + return d['name'].strip().lower(), d['ver'] + +def get_extras(requested, available): + result = set() + requested = set(requested or []) + available = set(available or []) + if '*' in requested: + requested.remove('*') + result |= available + for r in requested: + if r == '-': + result.add(r) + elif r.startswith('-'): + unwanted = r[1:] + if unwanted not in available: + logger.warning('undeclared extra: %s' % unwanted) + if unwanted in result: + result.remove(unwanted) + else: + if r not in available: + logger.warning('undeclared extra: %s' % r) + result.add(r) + return result +# +# Extended metadata functionality +# + +def _get_external_data(url): + result = {} + try: + # urlopen might fail if it runs into redirections, + # because of Python issue #13696. Fixed in locators + # using a custom redirect handler. + resp = urlopen(url) + headers = resp.info() + if headers.get('Content-Type') != 'application/json': + logger.debug('Unexpected response for JSON request') + else: + reader = codecs.getreader('utf-8')(resp) + #data = reader.read().decode('utf-8') + #result = json.loads(data) + result = json.load(reader) + except Exception as e: + logger.exception('Failed to get external data for %s: %s', url, e) + return result + + +def get_project_data(name): + url = ('https://www.red-dove.com/pypi/projects/' + '%s/%s/project.json' % (name[0].upper(), name)) + result = _get_external_data(url) + return result + +def get_package_data(name, version): + url = ('https://www.red-dove.com/pypi/projects/' + '%s/%s/package-%s.json' % (name[0].upper(), name, version)) + return _get_external_data(url) + + +class EventMixin(object): + """ + A very simple publish/subscribe system. + """ + def __init__(self): + self._subscribers = {} + + def add(self, event, subscriber, append=True): + """ + Add a subscriber for an event. + + :param event: The name of an event. + :param subscriber: The subscriber to be added (and called when the + event is published). + :param append: Whether to append or prepend the subscriber to an + existing subscriber list for the event. + """ + subs = self._subscribers + if event not in subs: + subs[event] = deque([subscriber]) + else: + sq = subs[event] + if append: + sq.append(subscriber) + else: + sq.appendleft(subscriber) + + def remove(self, event, subscriber): + """ + Remove a subscriber for an event. + + :param event: The name of an event. + :param subscriber: The subscriber to be removed. + """ + subs = self._subscribers + if event not in subs: + raise ValueError('No subscribers: %r' % event) + subs[event].remove(subscriber) + + def get_subscribers(self, event): + """ + Return an iterator for the subscribers for an event. + :param event: The event to return subscribers for. + """ + return iter(self._subscribers.get(event, ())) + + def publish(self, event, *args, **kwargs): + """ + Publish a event and return a list of values returned by its + subscribers. + + :param event: The event to publish. + :param args: The positional arguments to pass to the event's + subscribers. + :param kwargs: The keyword arguments to pass to the event's + subscribers. + """ + result = [] + for subscriber in self.get_subscribers(event): + try: + value = subscriber(event, *args, **kwargs) + except Exception: + logger.exception('Exception during event publication') + value = None + result.append(value) + logger.debug('publish %s: args = %s, kwargs = %s, result = %s', + event, args, kwargs, result) + return result + +# +# Simple sequencing +# +class Sequencer(object): + def __init__(self): + self._preds = {} + self._succs = {} + self._nodes = set() # nodes with no preds/succs + + def add_node(self, node): + self._nodes.add(node) + + def remove_node(self, node, edges=False): + if node in self._nodes: + self._nodes.remove(node) + if edges: + for p in set(self._preds.get(node, ())): + self.remove(p, node) + for s in set(self._succs.get(node, ())): + self.remove(node, s) + # Remove empties + for k, v in list(self._preds.items()): + if not v: + del self._preds[k] + for k, v in list(self._succs.items()): + if not v: + del self._succs[k] + + def add(self, pred, succ): + assert pred != succ + self._preds.setdefault(succ, set()).add(pred) + self._succs.setdefault(pred, set()).add(succ) + + def remove(self, pred, succ): + assert pred != succ + try: + preds = self._preds[succ] + succs = self._succs[pred] + except KeyError: + raise ValueError('%r not a successor of anything' % succ) + try: + preds.remove(pred) + succs.remove(succ) + except KeyError: + raise ValueError('%r not a successor of %r' % (succ, pred)) + + def is_step(self, step): + return (step in self._preds or step in self._succs or + step in self._nodes) + + def get_steps(self, final): + if not self.is_step(final): + raise ValueError('Unknown: %r' % final) + result = [] + todo = [] + seen = set() + todo.append(final) + while todo: + step = todo.pop(0) + if step in seen: + # if a step was already seen, + # move it to the end (so it will appear earlier + # when reversed on return) ... but not for the + # final step, as that would be confusing for + # users + if step != final: + result.remove(step) + result.append(step) + else: + seen.add(step) + result.append(step) + preds = self._preds.get(step, ()) + todo.extend(preds) + return reversed(result) + + @property + def strong_connections(self): + #http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm + index_counter = [0] + stack = [] + lowlinks = {} + index = {} + result = [] + + graph = self._succs + + def strongconnect(node): + # set the depth index for this node to the smallest unused index + index[node] = index_counter[0] + lowlinks[node] = index_counter[0] + index_counter[0] += 1 + stack.append(node) + + # Consider successors + try: + successors = graph[node] + except Exception: + successors = [] + for successor in successors: + if successor not in lowlinks: + # Successor has not yet been visited + strongconnect(successor) + lowlinks[node] = min(lowlinks[node],lowlinks[successor]) + elif successor in stack: + # the successor is in the stack and hence in the current + # strongly connected component (SCC) + lowlinks[node] = min(lowlinks[node],index[successor]) + + # If `node` is a root node, pop the stack and generate an SCC + if lowlinks[node] == index[node]: + connected_component = [] + + while True: + successor = stack.pop() + connected_component.append(successor) + if successor == node: break + component = tuple(connected_component) + # storing the result + result.append(component) + + for node in graph: + if node not in lowlinks: + strongconnect(node) + + return result + + @property + def dot(self): + result = ['digraph G {'] + for succ in self._preds: + preds = self._preds[succ] + for pred in preds: + result.append(' %s -> %s;' % (pred, succ)) + for node in self._nodes: + result.append(' %s;' % node) + result.append('}') + return '\n'.join(result) + +# +# Unarchiving functionality for zip, tar, tgz, tbz, whl +# + +ARCHIVE_EXTENSIONS = ('.tar.gz', '.tar.bz2', '.tar', '.zip', + '.tgz', '.tbz', '.whl') + +def unarchive(archive_filename, dest_dir, format=None, check=True): + + def check_path(path): + if not isinstance(path, text_type): + path = path.decode('utf-8') + p = os.path.abspath(os.path.join(dest_dir, path)) + if not p.startswith(dest_dir) or p[plen] != os.sep: + raise ValueError('path outside destination: %r' % p) + + dest_dir = os.path.abspath(dest_dir) + plen = len(dest_dir) + archive = None + if format is None: + if archive_filename.endswith(('.zip', '.whl')): + format = 'zip' + elif archive_filename.endswith(('.tar.gz', '.tgz')): + format = 'tgz' + mode = 'r:gz' + elif archive_filename.endswith(('.tar.bz2', '.tbz')): + format = 'tbz' + mode = 'r:bz2' + elif archive_filename.endswith('.tar'): + format = 'tar' + mode = 'r' + else: + raise ValueError('Unknown format for %r' % archive_filename) + try: + if format == 'zip': + archive = ZipFile(archive_filename, 'r') + if check: + names = archive.namelist() + for name in names: + check_path(name) + else: + archive = tarfile.open(archive_filename, mode) + if check: + names = archive.getnames() + for name in names: + check_path(name) + if format != 'zip' and sys.version_info[0] < 3: + # See Python issue 17153. If the dest path contains Unicode, + # tarfile extraction fails on Python 2.x if a member path name + # contains non-ASCII characters - it leads to an implicit + # bytes -> unicode conversion using ASCII to decode. + for tarinfo in archive.getmembers(): + if not isinstance(tarinfo.name, text_type): + tarinfo.name = tarinfo.name.decode('utf-8') + archive.extractall(dest_dir) + + finally: + if archive: + archive.close() + + +def zip_dir(directory): + """zip a directory tree into a BytesIO object""" + result = io.BytesIO() + dlen = len(directory) + with ZipFile(result, "w") as zf: + for root, dirs, files in os.walk(directory): + for name in files: + full = os.path.join(root, name) + rel = root[dlen:] + dest = os.path.join(rel, name) + zf.write(full, dest) + return result + +# +# Simple progress bar +# + +UNITS = ('', 'K', 'M', 'G','T','P') + + +class Progress(object): + unknown = 'UNKNOWN' + + def __init__(self, minval=0, maxval=100): + assert maxval is None or maxval >= minval + self.min = self.cur = minval + self.max = maxval + self.started = None + self.elapsed = 0 + self.done = False + + def update(self, curval): + assert self.min <= curval + assert self.max is None or curval <= self.max + self.cur = curval + now = time.time() + if self.started is None: + self.started = now + else: + self.elapsed = now - self.started + + def increment(self, incr): + assert incr >= 0 + self.update(self.cur + incr) + + def start(self): + self.update(self.min) + return self + + def stop(self): + if self.max is not None: + self.update(self.max) + self.done = True + + @property + def maximum(self): + return self.unknown if self.max is None else self.max + + @property + def percentage(self): + if self.done: + result = '100 %' + elif self.max is None: + result = ' ?? %' + else: + v = 100.0 * (self.cur - self.min) / (self.max - self.min) + result = '%3d %%' % v + return result + + def format_duration(self, duration): + if (duration <= 0) and self.max is None or self.cur == self.min: + result = '??:??:??' + #elif duration < 1: + # result = '--:--:--' + else: + result = time.strftime('%H:%M:%S', time.gmtime(duration)) + return result + + @property + def ETA(self): + if self.done: + prefix = 'Done' + t = self.elapsed + #import pdb; pdb.set_trace() + else: + prefix = 'ETA ' + if self.max is None: + t = -1 + elif self.elapsed == 0 or (self.cur == self.min): + t = 0 + else: + #import pdb; pdb.set_trace() + t = float(self.max - self.min) + t /= self.cur - self.min + t = (t - 1) * self.elapsed + return '%s: %s' % (prefix, self.format_duration(t)) + + @property + def speed(self): + if self.elapsed == 0: + result = 0.0 + else: + result = (self.cur - self.min) / self.elapsed + for unit in UNITS: + if result < 1000: + break + result /= 1000.0 + return '%d %sB/s' % (result, unit) + +# +# Glob functionality +# + +RICH_GLOB = re.compile(r'\{([^}]*)\}') +_CHECK_RECURSIVE_GLOB = re.compile(r'[^/\\,{]\*\*|\*\*[^/\\,}]') +_CHECK_MISMATCH_SET = re.compile(r'^[^{]*\}|\{[^}]*$') + + +def iglob(path_glob): + """Extended globbing function that supports ** and {opt1,opt2,opt3}.""" + if _CHECK_RECURSIVE_GLOB.search(path_glob): + msg = """invalid glob %r: recursive glob "**" must be used alone""" + raise ValueError(msg % path_glob) + if _CHECK_MISMATCH_SET.search(path_glob): + msg = """invalid glob %r: mismatching set marker '{' or '}'""" + raise ValueError(msg % path_glob) + return _iglob(path_glob) + + +def _iglob(path_glob): + rich_path_glob = RICH_GLOB.split(path_glob, 1) + if len(rich_path_glob) > 1: + assert len(rich_path_glob) == 3, rich_path_glob + prefix, set, suffix = rich_path_glob + for item in set.split(','): + for path in _iglob(''.join((prefix, item, suffix))): + yield path + else: + if '**' not in path_glob: + for item in std_iglob(path_glob): + yield item + else: + prefix, radical = path_glob.split('**', 1) + if prefix == '': + prefix = '.' + if radical == '': + radical = '*' + else: + # we support both + radical = radical.lstrip('/') + radical = radical.lstrip('\\') + for path, dir, files in os.walk(prefix): + path = os.path.normpath(path) + for fn in _iglob(os.path.join(path, radical)): + yield fn + + + +# +# HTTPSConnection which verifies certificates/matches domains +# + +class HTTPSConnection(httplib.HTTPSConnection): + ca_certs = None # set this to the path to the certs file (.pem) + check_domain = True # only used if ca_certs is not None + + # noinspection PyPropertyAccess + def connect(self): + sock = socket.create_connection((self.host, self.port), self.timeout) + if getattr(self, '_tunnel_host', False): + self.sock = sock + self._tunnel() + + if not hasattr(ssl, 'SSLContext'): + # For 2.x + if self.ca_certs: + cert_reqs = ssl.CERT_REQUIRED + else: + cert_reqs = ssl.CERT_NONE + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=cert_reqs, + ssl_version=ssl.PROTOCOL_SSLv23, + ca_certs=self.ca_certs) + else: + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.options |= ssl.OP_NO_SSLv2 + if self.cert_file: + context.load_cert_chain(self.cert_file, self.key_file) + kwargs = {} + if self.ca_certs: + context.verify_mode = ssl.CERT_REQUIRED + context.load_verify_locations(cafile=self.ca_certs) + if getattr(ssl, 'HAS_SNI', False): + kwargs['server_hostname'] = self.host + self.sock = context.wrap_socket(sock, **kwargs) + if self.ca_certs and self.check_domain: + try: + match_hostname(self.sock.getpeercert(), self.host) + logger.debug('Host verified: %s', self.host) + except CertificateError: + self.sock.shutdown(socket.SHUT_RDWR) + self.sock.close() + raise + +class HTTPSHandler(BaseHTTPSHandler): + def __init__(self, ca_certs, check_domain=True): + BaseHTTPSHandler.__init__(self) + self.ca_certs = ca_certs + self.check_domain = check_domain + + def _conn_maker(self, *args, **kwargs): + """ + This is called to create a connection instance. Normally you'd + pass a connection class to do_open, but it doesn't actually check for + a class, and just expects a callable. As long as we behave just as a + constructor would have, we should be OK. If it ever changes so that + we *must* pass a class, we'll create an UnsafeHTTPSConnection class + which just sets check_domain to False in the class definition, and + choose which one to pass to do_open. + """ + result = HTTPSConnection(*args, **kwargs) + if self.ca_certs: + result.ca_certs = self.ca_certs + result.check_domain = self.check_domain + return result + + def https_open(self, req): + try: + return self.do_open(self._conn_maker, req) + except URLError as e: + if 'certificate verify failed' in str(e.reason): + raise CertificateError('Unable to verify server certificate ' + 'for %s' % req.host) + else: + raise + +# +# To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The- +# Middle proxy using HTTP listens on port 443, or an index mistakenly serves +# HTML containing a http://xyz link when it should be https://xyz), +# you can use the following handler class, which does not allow HTTP traffic. +# +# It works by inheriting from HTTPHandler - so build_opener won't add a +# handler for HTTP itself. +# +class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler): + def http_open(self, req): + raise URLError('Unexpected HTTP request on what should be a secure ' + 'connection: %s' % req) + +# +# XML-RPC with timeouts +# + +_ver_info = sys.version_info[:2] + +if _ver_info == (2, 6): + class HTTP(httplib.HTTP): + def __init__(self, host='', port=None, **kwargs): + if port == 0: # 0 means use port 0, not the default port + port = None + self._setup(self._connection_class(host, port, **kwargs)) + + + class HTTPS(httplib.HTTPS): + def __init__(self, host='', port=None, **kwargs): + if port == 0: # 0 means use port 0, not the default port + port = None + self._setup(self._connection_class(host, port, **kwargs)) + + +class Transport(xmlrpclib.Transport): + def __init__(self, timeout, use_datetime=0): + self.timeout = timeout + xmlrpclib.Transport.__init__(self, use_datetime) + + def make_connection(self, host): + h, eh, x509 = self.get_host_info(host) + if _ver_info == (2, 6): + result = HTTP(h, timeout=self.timeout) + else: + if not self._connection or host != self._connection[0]: + self._extra_headers = eh + self._connection = host, httplib.HTTPConnection(h) + result = self._connection[1] + return result + +class SafeTransport(xmlrpclib.SafeTransport): + def __init__(self, timeout, use_datetime=0): + self.timeout = timeout + xmlrpclib.SafeTransport.__init__(self, use_datetime) + + def make_connection(self, host): + h, eh, kwargs = self.get_host_info(host) + if not kwargs: + kwargs = {} + kwargs['timeout'] = self.timeout + if _ver_info == (2, 6): + result = HTTPS(host, None, **kwargs) + else: + if not self._connection or host != self._connection[0]: + self._extra_headers = eh + self._connection = host, httplib.HTTPSConnection(h, None, + **kwargs) + result = self._connection[1] + return result + + +class ServerProxy(xmlrpclib.ServerProxy): + def __init__(self, uri, **kwargs): + self.timeout = timeout = kwargs.pop('timeout', None) + # The above classes only come into play if a timeout + # is specified + if timeout is not None: + scheme, _ = splittype(uri) + use_datetime = kwargs.get('use_datetime', 0) + if scheme == 'https': + tcls = SafeTransport + else: + tcls = Transport + kwargs['transport'] = t = tcls(timeout, use_datetime=use_datetime) + self.transport = t + xmlrpclib.ServerProxy.__init__(self, uri, **kwargs) + +# +# CSV functionality. This is provided because on 2.x, the csv module can't +# handle Unicode. However, we need to deal with Unicode in e.g. RECORD files. +# + +def _csv_open(fn, mode, **kwargs): + if sys.version_info[0] < 3: + mode += 'b' + else: + kwargs['newline'] = '' + return open(fn, mode, **kwargs) + + +class CSVBase(object): + defaults = { + 'delimiter': str(','), # The strs are used because we need native + 'quotechar': str('"'), # str in the csv API (2.x won't take + 'lineterminator': str('\n') # Unicode) + } + + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self.stream.close() + + +class CSVReader(CSVBase): + def __init__(self, **kwargs): + if 'stream' in kwargs: + stream = kwargs['stream'] + if sys.version_info[0] >= 3: + # needs to be a text stream + stream = codecs.getreader('utf-8')(stream) + self.stream = stream + else: + self.stream = _csv_open(kwargs['path'], 'r') + self.reader = csv.reader(self.stream, **self.defaults) + + def __iter__(self): + return self + + def next(self): + result = next(self.reader) + if sys.version_info[0] < 3: + for i, item in enumerate(result): + if not isinstance(item, text_type): + result[i] = item.decode('utf-8') + return result + + __next__ = next + +class CSVWriter(CSVBase): + def __init__(self, fn, **kwargs): + self.stream = _csv_open(fn, 'w') + self.writer = csv.writer(self.stream, **self.defaults) + + def writerow(self, row): + if sys.version_info[0] < 3: + r = [] + for item in row: + if isinstance(item, text_type): + item = item.encode('utf-8') + r.append(item) + row = r + self.writer.writerow(row) + +# +# Configurator functionality +# + +class Configurator(BaseConfigurator): + + value_converters = dict(BaseConfigurator.value_converters) + value_converters['inc'] = 'inc_convert' + + def __init__(self, config, base=None): + super(Configurator, self).__init__(config) + self.base = base or os.getcwd() + + def configure_custom(self, config): + def convert(o): + if isinstance(o, (list, tuple)): + result = type(o)([convert(i) for i in o]) + elif isinstance(o, dict): + if '()' in o: + result = self.configure_custom(o) + else: + result = {} + for k in o: + result[k] = convert(o[k]) + else: + result = self.convert(o) + return result + + c = config.pop('()') + if not callable(c): + c = self.resolve(c) + props = config.pop('.', None) + # Check for valid identifiers + args = config.pop('[]', ()) + if args: + args = tuple([convert(o) for o in args]) + items = [(k, convert(config[k])) for k in config if valid_ident(k)] + kwargs = dict(items) + result = c(*args, **kwargs) + if props: + for n, v in props.items(): + setattr(result, n, convert(v)) + return result + + def __getitem__(self, key): + result = self.config[key] + if isinstance(result, dict) and '()' in result: + self.config[key] = result = self.configure_custom(result) + return result + + def inc_convert(self, value): + """Default converter for the inc:// protocol.""" + if not os.path.isabs(value): + value = os.path.join(self.base, value) + with codecs.open(value, 'r', encoding='utf-8') as f: + result = json.load(f) + return result + +# +# Mixin for running subprocesses and capturing their output +# + +class SubprocessMixin(object): + def __init__(self, verbose=False, progress=None): + self.verbose = verbose + self.progress = progress + + def reader(self, stream, context): + """ + Read lines from a subprocess' output stream and either pass to a progress + callable (if specified) or write progress information to sys.stderr. + """ + progress = self.progress + verbose = self.verbose + while True: + s = stream.readline() + if not s: + break + if progress is not None: + progress(s, context) + else: + if not verbose: + sys.stderr.write('.') + else: + sys.stderr.write(s.decode('utf-8')) + sys.stderr.flush() + stream.close() + + def run_command(self, cmd, **kwargs): + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, **kwargs) + t1 = threading.Thread(target=self.reader, args=(p.stdout, 'stdout')) + t1.start() + t2 = threading.Thread(target=self.reader, args=(p.stderr, 'stderr')) + t2.start() + p.wait() + t1.join() + t2.join() + if self.progress is not None: + self.progress('done.', 'main') + elif self.verbose: + sys.stderr.write('done.\n') + return p diff --git a/pip/_vendor/distlib/version.py b/pip/_vendor/distlib/version.py new file mode 100644 index 000000000..4cbfe9048 --- /dev/null +++ b/pip/_vendor/distlib/version.py @@ -0,0 +1,692 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +""" +Implementation of a flexible versioning scheme providing support for PEP-386, +distribute-compatible and semantic versioning. +""" + +import logging +import re + +from .compat import string_types + +__all__ = ['NormalizedVersion', 'NormalizedMatcher', + 'LegacyVersion', 'LegacyMatcher', + 'SemanticVersion', 'SemanticMatcher', + 'UnsupportedVersionError', 'get_scheme'] + +logger = logging.getLogger(__name__) + + +class UnsupportedVersionError(ValueError): + """This is an unsupported version.""" + pass + + +class Version(object): + def __init__(self, s): + self._string = s = s.strip() + self._parts = parts = self.parse(s) + assert isinstance(parts, tuple) + assert len(parts) > 0 + + def parse(self, s): + raise NotImplementedError('please implement in a subclass') + + def _check_compatible(self, other): + if type(self) != type(other): + raise TypeError('cannot compare %r and %r' % (self, other)) + + def __eq__(self, other): + self._check_compatible(other) + return self._parts == other._parts + + def __ne__(self, other): + return not self.__eq__(other) + + def __lt__(self, other): + self._check_compatible(other) + return self._parts < other._parts + + def __gt__(self, other): + return not (self.__lt__(other) or self.__eq__(other)) + + def __le__(self, other): + return self.__lt__(other) or self.__eq__(other) + + def __ge__(self, other): + return self.__gt__(other) or self.__eq__(other) + + # See http://docs.python.org/reference/datamodel#object.__hash__ + def __hash__(self): + return hash(self._parts) + + def __repr__(self): + return "%s('%s')" % (self.__class__.__name__, self._string) + + def __str__(self): + return self._string + + @property + def is_prerelease(self): + raise NotImplementedError('Please implement in subclasses.') + + +class Matcher(object): + version_class = None + + dist_re = re.compile(r"^(\w[\s\w'.-]*)(\((.*)\))?") + comp_re = re.compile(r'^(<=|>=|<|>|!=|==|~=)?\s*([^\s,]+)$') + num_re = re.compile(r'^\d+(\.\d+)*$') + + # value is either a callable or the name of a method + _operators = { + '<': lambda v, c, p: v < c, + '>': lambda v, c, p: v > c, + '<=': lambda v, c, p: v == c or v < c, + '>=': lambda v, c, p: v == c or v > c, + '==': lambda v, c, p: v == c, + # by default, compatible => >=. + '~=': lambda v, c, p: v == c or v > c, + '!=': lambda v, c, p: v != c, + } + + def __init__(self, s): + if self.version_class is None: + raise ValueError('Please specify a version class') + self._string = s = s.strip() + m = self.dist_re.match(s) + if not m: + raise ValueError('Not valid: %r' % s) + groups = m.groups('') + self.name = groups[0].strip() + self.key = self.name.lower() # for case-insensitive comparisons + clist = [] + if groups[2]: + constraints = [c.strip() for c in groups[2].split(',')] + for c in constraints: + m = self.comp_re.match(c) + if not m: + raise ValueError('Invalid %r in %r' % (c, s)) + groups = m.groups() + op = groups[0] or '~=' + s = groups[1] + if s.endswith('.*'): + if op not in ('==', '!='): + raise ValueError('\'.*\' not allowed for ' + '%r constraints' % op) + # Could be a partial version (e.g. for '2.*') which + # won't parse as a version, so keep it as a string + vn, prefix = s[:-2], True + if not self.num_re.match(vn): + # Just to check that vn is a valid version + self.version_class(vn) + else: + # Should parse as a version, so we can create an + # instance for the comparison + vn, prefix = self.version_class(s), False + clist.append((op, vn, prefix)) + self._parts = tuple(clist) + + def match(self, version): + """ + Check if the provided version matches the constraints. + + :param version: The version to match against this instance. + :type version: Strring or :class:`Version` instance. + """ + if isinstance(version, string_types): + version = self.version_class(version) + for operator, constraint, prefix in self._parts: + f = self._operators.get(operator) + if isinstance(f, string_types): + f = getattr(self, f) + if not f: + msg = ('%r not implemented ' + 'for %s' % (operator, self.__class__.__name__)) + raise NotImplementedError(msg) + if not f(version, constraint, prefix): + return False + return True + + @property + def exact_version(self): + result = None + if len(self._parts) == 1 and self._parts[0][0] == '==': + result = self._parts[0][1] + return result + + def _check_compatible(self, other): + if type(self) != type(other) or self.name != other.name: + raise TypeError('cannot compare %s and %s' % (self, other)) + + def __eq__(self, other): + self._check_compatible(other) + return self.key == other.key and self._parts == other._parts + + def __ne__(self, other): + return not self.__eq__(other) + + # See http://docs.python.org/reference/datamodel#object.__hash__ + def __hash__(self): + return hash(self.key) + hash(self._parts) + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self._string) + + def __str__(self): + return self._string + + +PEP426_VERSION_RE = re.compile(r'^(\d+\.\d+(\.\d+)*)((a|b|c|rc)(\d+))?' + r'(\.(post)(\d+))?(\.(dev)(\d+))?$') + + +def _pep426_key(s): + s = s.strip() + m = PEP426_VERSION_RE.match(s) + if not m: + raise UnsupportedVersionError('Not a valid version: %s' % s) + groups = m.groups() + nums = tuple(int(v) for v in groups[0].split('.')) + while len(nums) > 1 and nums[-1] == 0: + nums = nums[:-1] + + pre = groups[3:5] + post = groups[6:8] + dev = groups[9:11] + if pre == (None, None): + pre = () + else: + pre = pre[0], int(pre[1]) + if post == (None, None): + post = () + else: + post = post[0], int(post[1]) + if dev == (None, None): + dev = () + else: + dev = dev[0], int(dev[1]) + if not pre: + # either before pre-release, or final release and after + if not post and dev: + # before pre-release + pre = ('a', -1) # to sort before a0 + else: + pre = ('z',) # to sort after all pre-releases + # now look at the state of post and dev. + if not post: + post = ('_',) # sort before 'a' + if not dev: + dev = ('final',) + + #print('%s -> %s' % (s, m.groups())) + return nums, pre, post, dev + + +_normalized_key = _pep426_key + + +class NormalizedVersion(Version): + """A rational version. + + Good: + 1.2 # equivalent to "1.2.0" + 1.2.0 + 1.2a1 + 1.2.3a2 + 1.2.3b1 + 1.2.3c1 + 1.2.3.4 + TODO: fill this out + + Bad: + 1 # mininum two numbers + 1.2a # release level must have a release serial + 1.2.3b + """ + def parse(self, s): + result = _normalized_key(s) + # _normalized_key loses trailing zeroes in the release + # clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0 + # However, PEP 440 prefix matching needs it: for example, + # (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0). + m = PEP426_VERSION_RE.match(s) # must succeed + groups = m.groups() + self._release_clause = tuple(int(v) for v in groups[0].split('.')) + return result + + PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev']) + + @property + def is_prerelease(self): + return any(t[0] in self.PREREL_TAGS for t in self._parts) + + +def _match_prefix(x, y): + x = str(x) + y = str(y) + if x == y: + return True + if not x.startswith(y): + return False + n = len(y) + return x[n] == '.' + + +class NormalizedMatcher(Matcher): + version_class = NormalizedVersion + + # value is either a callable or the name of a method + _operators = { + '~=': '_match_compatible', + '<': '_match_lt', + '>': '_match_gt', + '<=': '_match_le', + '>=': '_match_ge', + '==': '_match_eq', + '!=': '_match_ne', + } + + def _match_lt(self, version, constraint, prefix): + if version >= constraint: + return False + release_clause = constraint._release_clause + pfx = '.'.join([str(i) for i in release_clause]) + return not _match_prefix(version, pfx) + + def _match_gt(self, version, constraint, prefix): + if version <= constraint: + return False + release_clause = constraint._release_clause + pfx = '.'.join([str(i) for i in release_clause]) + return not _match_prefix(version, pfx) + + def _match_le(self, version, constraint, prefix): + return version <= constraint + + def _match_ge(self, version, constraint, prefix): + return version >= constraint + + def _match_eq(self, version, constraint, prefix): + if not prefix: + result = (version == constraint) + else: + result = _match_prefix(version, constraint) + return result + + def _match_ne(self, version, constraint, prefix): + if not prefix: + result = (version != constraint) + else: + result = not _match_prefix(version, constraint) + return result + + def _match_compatible(self, version, constraint, prefix): + if version == constraint: + return True + if version < constraint: + return False + release_clause = constraint._release_clause + if len(release_clause) > 1: + release_clause = release_clause[:-1] + pfx = '.'.join([str(i) for i in release_clause]) + return _match_prefix(version, pfx) + +_REPLACEMENTS = ( + (re.compile('[.+-]$'), ''), # remove trailing puncts + (re.compile(r'^[.](\d)'), r'0.\1'), # .N -> 0.N at start + (re.compile('^[.-]'), ''), # remove leading puncts + (re.compile(r'^\((.*)\)$'), r'\1'), # remove parentheses + (re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'), # remove leading v(ersion) + (re.compile(r'^r(ev)?\s*(\d+)'), r'\2'), # remove leading v(ersion) + (re.compile('[.]{2,}'), '.'), # multiple runs of '.' + (re.compile(r'\b(alfa|apha)\b'), 'alpha'), # misspelt alpha + (re.compile(r'\b(pre-alpha|prealpha)\b'), + 'pre.alpha'), # standardise + (re.compile(r'\(beta\)$'), 'beta'), # remove parentheses +) + +_SUFFIX_REPLACEMENTS = ( + (re.compile('^[:~._+-]+'), ''), # remove leading puncts + (re.compile('[,*")([\]]'), ''), # remove unwanted chars + (re.compile('[~:+_ -]'), '.'), # replace illegal chars + (re.compile('[.]{2,}'), '.'), # multiple runs of '.' + (re.compile(r'\.$'), ''), # trailing '.' +) + +_NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)') + + +def _suggest_semantic_version(s): + """ + Try to suggest a semantic form for a version for which + _suggest_normalized_version couldn't come up with anything. + """ + result = s.strip().lower() + for pat, repl in _REPLACEMENTS: + result = pat.sub(repl, result) + if not result: + result = '0.0.0' + + # Now look for numeric prefix, and separate it out from + # the rest. + #import pdb; pdb.set_trace() + m = _NUMERIC_PREFIX.match(result) + if not m: + prefix = '0.0.0' + suffix = result + else: + prefix = m.groups()[0].split('.') + prefix = [int(i) for i in prefix] + while len(prefix) < 3: + prefix.append(0) + if len(prefix) == 3: + suffix = result[m.end():] + else: + suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():] + prefix = prefix[:3] + prefix = '.'.join([str(i) for i in prefix]) + suffix = suffix.strip() + if suffix: + #import pdb; pdb.set_trace() + # massage the suffix. + for pat, repl in _SUFFIX_REPLACEMENTS: + suffix = pat.sub(repl, suffix) + + if not suffix: + result = prefix + else: + sep = '-' if 'dev' in suffix else '+' + result = prefix + sep + suffix + if not is_semver(result): + result = None + return result + + +def _suggest_normalized_version(s): + """Suggest a normalized version close to the given version string. + + If you have a version string that isn't rational (i.e. NormalizedVersion + doesn't like it) then you might be able to get an equivalent (or close) + rational version from this function. + + This does a number of simple normalizations to the given string, based + on observation of versions currently in use on PyPI. Given a dump of + those version during PyCon 2009, 4287 of them: + - 2312 (53.93%) match NormalizedVersion without change + with the automatic suggestion + - 3474 (81.04%) match when using this suggestion method + + @param s {str} An irrational version string. + @returns A rational version string, or None, if couldn't determine one. + """ + try: + _normalized_key(s) + return s # already rational + except UnsupportedVersionError: + pass + + rs = s.lower() + + # part of this could use maketrans + for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'), + ('beta', 'b'), ('rc', 'c'), ('-final', ''), + ('-pre', 'c'), + ('-release', ''), ('.release', ''), ('-stable', ''), + ('+', '.'), ('_', '.'), (' ', ''), ('.final', ''), + ('final', '')): + rs = rs.replace(orig, repl) + + # if something ends with dev or pre, we add a 0 + rs = re.sub(r"pre$", r"pre0", rs) + rs = re.sub(r"dev$", r"dev0", rs) + + # if we have something like "b-2" or "a.2" at the end of the + # version, that is pobably beta, alpha, etc + # let's remove the dash or dot + rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs) + + # 1.0-dev-r371 -> 1.0.dev371 + # 0.1-dev-r79 -> 0.1.dev79 + rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs) + + # Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1 + rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs) + + # Clean: v0.3, v1.0 + if rs.startswith('v'): + rs = rs[1:] + + # Clean leading '0's on numbers. + #TODO: unintended side-effect on, e.g., "2003.05.09" + # PyPI stats: 77 (~2%) better + rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs) + + # Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers + # zero. + # PyPI stats: 245 (7.56%) better + rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs) + + # the 'dev-rNNN' tag is a dev tag + rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs) + + # clean the - when used as a pre delimiter + rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs) + + # a terminal "dev" or "devel" can be changed into ".dev0" + rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs) + + # a terminal "dev" can be changed into ".dev0" + rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs) + + # a terminal "final" or "stable" can be removed + rs = re.sub(r"(final|stable)$", "", rs) + + # The 'r' and the '-' tags are post release tags + # 0.4a1.r10 -> 0.4a1.post10 + # 0.9.33-17222 -> 0.9.33.post17222 + # 0.9.33-r17222 -> 0.9.33.post17222 + rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs) + + # Clean 'r' instead of 'dev' usage: + # 0.9.33+r17222 -> 0.9.33.dev17222 + # 1.0dev123 -> 1.0.dev123 + # 1.0.git123 -> 1.0.dev123 + # 1.0.bzr123 -> 1.0.dev123 + # 0.1a0dev.123 -> 0.1a0.dev123 + # PyPI stats: ~150 (~4%) better + rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs) + + # Clean '.pre' (normalized from '-pre' above) instead of 'c' usage: + # 0.2.pre1 -> 0.2c1 + # 0.2-c1 -> 0.2c1 + # 1.0preview123 -> 1.0c123 + # PyPI stats: ~21 (0.62%) better + rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs) + + # Tcl/Tk uses "px" for their post release markers + rs = re.sub(r"p(\d+)$", r".post\1", rs) + + try: + _normalized_key(rs) + except UnsupportedVersionError: + rs = None + return rs + +# +# Legacy version processing (distribute-compatible) +# + +_VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I) +_VERSION_REPLACE = { + 'pre': 'c', + 'preview': 'c', + '-': 'final-', + 'rc': 'c', + 'dev': '@', + '': None, + '.': None, +} + + +def _legacy_key(s): + def get_parts(s): + result = [] + for p in _VERSION_PART.split(s.lower()): + p = _VERSION_REPLACE.get(p, p) + if p: + if '0' <= p[:1] <= '9': + p = p.zfill(8) + else: + p = '*' + p + result.append(p) + result.append('*final') + return result + + result = [] + for p in get_parts(s): + if p.startswith('*'): + if p < '*final': + while result and result[-1] == '*final-': + result.pop() + while result and result[-1] == '00000000': + result.pop() + result.append(p) + return tuple(result) + + +class LegacyVersion(Version): + def parse(self, s): + return _legacy_key(s) + + PREREL_TAGS = set( + ['*a', '*alpha', '*b', '*beta', '*c', '*rc', '*r', '*@', '*pre'] + ) + + @property + def is_prerelease(self): + return any(x in self.PREREL_TAGS for x in self._parts) + + +class LegacyMatcher(Matcher): + version_class = LegacyVersion + + _operators = dict(Matcher._operators) + _operators['~='] = '_match_compatible' + + numeric_re = re.compile('^(\d+(\.\d+)*)') + + def _match_compatible(self, version, constraint, prefix): + if version < constraint: + return False + m = self.numeric_re.match(str(constraint)) + if not m: + logger.warning('Cannot compute compatible match for version %s ' + ' and constraint %s', version, constraint) + return True + s = m.groups()[0] + if '.' in s: + s = s.rsplit('.', 1)[0] + return _match_prefix(version, s) + +# +# Semantic versioning +# + +_SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)' + r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?' + r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I) + + +def is_semver(s): + return _SEMVER_RE.match(s) + + +def _semantic_key(s): + def make_tuple(s, absent): + if s is None: + result = (absent,) + else: + parts = s[1:].split('.') + # We can't compare ints and strings on Python 3, so fudge it + # by zero-filling numeric values so simulate a numeric comparison + result = tuple([p.zfill(8) if p.isdigit() else p for p in parts]) + return result + + m = is_semver(s) + if not m: + raise UnsupportedVersionError(s) + groups = m.groups() + major, minor, patch = [int(i) for i in groups[:3]] + # choose the '|' and '*' so that versions sort correctly + pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*') + return (major, minor, patch), pre, build + + +class SemanticVersion(Version): + def parse(self, s): + return _semantic_key(s) + + @property + def is_prerelease(self): + return self._parts[1][0] != '|' + + +class SemanticMatcher(Matcher): + version_class = SemanticVersion + + +class VersionScheme(object): + def __init__(self, key, matcher, suggester=None): + self.key = key + self.matcher = matcher + self.suggester = suggester + + def is_valid_version(self, s): + try: + self.matcher.version_class(s) + result = True + except UnsupportedVersionError: + result = False + return result + + def is_valid_matcher(self, s): + try: + self.matcher(s) + result = True + except UnsupportedVersionError: + result = False + return result + + def is_valid_constraint_list(self, s): + """ + Used for processing some metadata fields + """ + return self.is_valid_matcher('dummy_name (%s)' % s) + + def suggest(self, s): + if self.suggester is None: + result = None + else: + result = self.suggester(s) + return result + +_SCHEMES = { + 'normalized': VersionScheme(_normalized_key, NormalizedMatcher, + _suggest_normalized_version), + 'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s), + 'semantic': VersionScheme(_semantic_key, SemanticMatcher, + _suggest_semantic_version), +} + +_SCHEMES['default'] = _SCHEMES['normalized'] + + +def get_scheme(name): + if name not in _SCHEMES: + raise ValueError('unknown scheme name: %r' % name) + return _SCHEMES[name] diff --git a/pip/_vendor/distlib/w32.exe b/pip/_vendor/distlib/w32.exe new file mode 100644 index 000000000..62db2d226 Binary files /dev/null and b/pip/_vendor/distlib/w32.exe differ diff --git a/pip/_vendor/distlib/w64.exe b/pip/_vendor/distlib/w64.exe new file mode 100644 index 000000000..4351ccb25 Binary files /dev/null and b/pip/_vendor/distlib/w64.exe differ diff --git a/pip/_vendor/distlib/wheel.py b/pip/_vendor/distlib/wheel.py new file mode 100644 index 000000000..a2bca6ed9 --- /dev/null +++ b/pip/_vendor/distlib/wheel.py @@ -0,0 +1,722 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2013 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +from __future__ import unicode_literals + +import base64 +import codecs +import datetime +import distutils.util +from email import message_from_file +import hashlib +import imp +import json +import logging +import os +import posixpath +import re +import shutil +import sys +import tempfile +import zipfile + +from . import __version__, DistlibException +from .compat import sysconfig, ZipFile, fsdecode, text_type, filter +from .database import InstalledDistribution +from .metadata import Metadata, METADATA_FILENAME +from .util import (FileOperator, convert_path, CSVReader, CSVWriter, + cached_property, get_cache_base, read_exports) + + +logger = logging.getLogger(__name__) + + +if hasattr(sys, 'pypy_version_info'): + IMP_PREFIX = 'pp' +elif sys.platform.startswith('java'): + IMP_PREFIX = 'jy' +elif sys.platform == 'cli': + IMP_PREFIX = 'ip' +else: + IMP_PREFIX = 'cp' + +VER_SUFFIX = sysconfig.get_config_var('py_version_nodot') +if not VER_SUFFIX: # pragma: no cover + VER_SUFFIX = '%s%s' % sys.version_info[:2] +PYVER = 'py' + VER_SUFFIX +IMPVER = IMP_PREFIX + VER_SUFFIX + +ARCH = distutils.util.get_platform().replace('-', '_').replace('.', '_') + +ABI = sysconfig.get_config_var('SOABI') +if ABI and ABI.startswith('cpython-'): + ABI = ABI.replace('cpython-', 'cp') +else: + ABI = 'none' + +FILENAME_RE = re.compile(r''' +(?P[^-]+) +-(?P\d+[^-]*) +(-(?P\d+[^-]*))? +-(?P\w+\d+(\.\w+\d+)*) +-(?P\w+) +-(?P\w+) +\.whl$ +''', re.IGNORECASE | re.VERBOSE) + +NAME_VERSION_RE = re.compile(r''' +(?P[^-]+) +-(?P\d+[^-]*) +(-(?P\d+[^-]*))?$ +''', re.IGNORECASE | re.VERBOSE) + +SHEBANG_RE = re.compile(br'\s*#![^\r\n]*') + +if os.sep == '/': + to_posix = lambda o: o +else: + to_posix = lambda o: o.replace(os.sep, '/') + + +class Mounter(object): + def __init__(self): + self.impure_wheels = {} + self.libs = {} + + def add(self, pathname, extensions): + self.impure_wheels[pathname] = extensions + self.libs.update(extensions) + + def remove(self, pathname): + extensions = self.impure_wheels.pop(pathname) + for k, v in extensions: + if k in self.libs: + del self.libs[k] + + def find_module(self, fullname, path=None): + if fullname in self.libs: + result = self + else: + result = None + return result + + def load_module(self, fullname): + if fullname in sys.modules: + result = sys.modules[fullname] + else: + if fullname not in self.libs: + raise ImportError('unable to find extension for %s' % fullname) + result = imp.load_dynamic(fullname, self.libs[fullname]) + result.__loader__ = self + parts = fullname.rsplit('.', 1) + if len(parts) > 1: + result.__package__ = parts[0] + return result + +_hook = Mounter() + + +class Wheel(object): + """ + Class to build and install from Wheel files (PEP 427). + """ + + wheel_version = (1, 1) + hash_kind = 'sha256' + + def __init__(self, filename=None, sign=False, verify=False): + """ + Initialise an instance using a (valid) filename. + """ + self.sign = sign + self.verify = verify + self.buildver = '' + self.pyver = [PYVER] + self.abi = ['none'] + self.arch = ['any'] + self.dirname = os.getcwd() + if filename is None: + self.name = 'dummy' + self.version = '0.1' + self._filename = self.filename + else: + m = NAME_VERSION_RE.match(filename) + if m: + info = m.groupdict('') + self.name = info['nm'] + self.version = info['vn'] + self.buildver = info['bn'] + self._filename = self.filename + else: + dirname, filename = os.path.split(filename) + m = FILENAME_RE.match(filename) + if not m: + raise DistlibException('Invalid name or ' + 'filename: %r' % filename) + if dirname: + self.dirname = os.path.abspath(dirname) + self._filename = filename + info = m.groupdict('') + self.name = info['nm'] + self.version = info['vn'] + self.buildver = info['bn'] + self.pyver = info['py'].split('.') + self.abi = info['bi'].split('.') + self.arch = info['ar'].split('.') + + @property + def filename(self): + """ + Build and return a filename from the various components. + """ + if self.buildver: + buildver = '-' + self.buildver + else: + buildver = '' + pyver = '.'.join(self.pyver) + abi = '.'.join(self.abi) + arch = '.'.join(self.arch) + return '%s-%s%s-%s-%s-%s.whl' % (self.name, self.version, buildver, + pyver, abi, arch) + + @property + def tags(self): + for pyver in self.pyver: + for abi in self.abi: + for arch in self.arch: + yield pyver, abi, arch + + @cached_property + def metadata(self): + pathname = os.path.join(self.dirname, self.filename) + name_ver = '%s-%s' % (self.name, self.version) + info_dir = '%s.dist-info' % name_ver + wrapper = codecs.getreader('utf-8') + metadata_filename = posixpath.join(info_dir, METADATA_FILENAME) + with ZipFile(pathname, 'r') as zf: + try: + with zf.open(metadata_filename) as bf: + wf = wrapper(bf) + result = Metadata(fileobj=wf) + except KeyError: + raise ValueError('Invalid wheel, because %s is ' + 'missing' % METADATA_FILENAME) + return result + + @cached_property + def info(self): + pathname = os.path.join(self.dirname, self.filename) + name_ver = '%s-%s' % (self.name, self.version) + info_dir = '%s.dist-info' % name_ver + metadata_filename = posixpath.join(info_dir, 'WHEEL') + wrapper = codecs.getreader('utf-8') + with ZipFile(pathname, 'r') as zf: + with zf.open(metadata_filename) as bf: + wf = wrapper(bf) + message = message_from_file(wf) + result = dict(message) + return result + + def process_shebang(self, data): + m = SHEBANG_RE.match(data) + if m: + data = b'#!python' + data[m.end():] + else: + cr = data.find(b'\r') + lf = data.find(b'\n') + if cr < 0 or cr > lf: + term = b'\n' + else: + if data[cr:cr + 2] == b'\r\n': + term = b'\r\n' + else: + term = b'\r' + data = b'#!python' + term + data + return data + + def get_hash(self, data, hash_kind=None): + if hash_kind is None: + hash_kind = self.hash_kind + try: + hasher = getattr(hashlib, hash_kind) + except AttributeError: + raise DistlibException('Unsupported hash algorithm: %r' % hash_kind) + result = hasher(data).digest() + result = base64.urlsafe_b64encode(result).rstrip(b'=').decode('ascii') + return hash_kind, result + + def write_record(self, records, record_path, base): + with CSVWriter(record_path) as writer: + for row in records: + writer.writerow(row) + p = to_posix(os.path.relpath(record_path, base)) + writer.writerow((p, '', '')) + + def build(self, paths, tags=None, wheel_version=None): + """ + Build a wheel from files in specified paths, and use any specified tags + when determining the name of the wheel. + """ + if tags is None: + tags = {} + + libkey = list(filter(lambda o: o in paths, ('purelib', 'platlib')))[0] + if libkey == 'platlib': + is_pure = 'false' + default_pyver = [IMPVER] + default_abi = [ABI] + default_arch = [ARCH] + else: + is_pure = 'true' + default_pyver = [PYVER] + default_abi = ['none'] + default_arch = ['any'] + + self.pyver = tags.get('pyver', default_pyver) + self.abi = tags.get('abi', default_abi) + self.arch = tags.get('arch', default_arch) + + libdir = paths[libkey] + + name_ver = '%s-%s' % (self.name, self.version) + data_dir = '%s.data' % name_ver + info_dir = '%s.dist-info' % name_ver + + archive_paths = [] + + # First, stuff which is not in site-packages + for key in ('data', 'headers', 'scripts'): + if key not in paths: + continue + path = paths[key] + if os.path.isdir(path): + for root, dirs, files in os.walk(path): + for fn in files: + p = fsdecode(os.path.join(root, fn)) + rp = os.path.relpath(p, path) + ap = to_posix(os.path.join(data_dir, key, rp)) + archive_paths.append((ap, p)) + if key == 'scripts' and not p.endswith('.exe'): + with open(p, 'rb') as f: + data = f.read() + data = self.process_shebang(data) + with open(p, 'wb') as f: + f.write(data) + + # Now, stuff which is in site-packages, other than the + # distinfo stuff. + path = libdir + distinfo = None + for root, dirs, files in os.walk(path): + if root == path: + # At the top level only, save distinfo for later + # and skip it for now + for i, dn in enumerate(dirs): + dn = fsdecode(dn) + if dn.endswith('.dist-info'): + distinfo = os.path.join(root, dn) + del dirs[i] + break + assert distinfo, '.dist-info directory expected, not found' + + for fn in files: + # comment out next suite to leave .pyc files in + if fsdecode(fn).endswith(('.pyc', '.pyo')): + continue + p = os.path.join(root, fn) + rp = to_posix(os.path.relpath(p, path)) + archive_paths.append((rp, p)) + + # Now distinfo. Assumed to be flat, i.e. os.listdir is enough. + files = os.listdir(distinfo) + for fn in files: + if fn not in ('RECORD', 'INSTALLER', 'SHARED'): + p = fsdecode(os.path.join(distinfo, fn)) + ap = to_posix(os.path.join(info_dir, fn)) + archive_paths.append((ap, p)) + + wheel_metadata = [ + 'Wheel-Version: %d.%d' % (wheel_version or self.wheel_version), + 'Generator: distlib %s' % __version__, + 'Root-Is-Purelib: %s' % is_pure, + ] + for pyver, abi, arch in self.tags: + wheel_metadata.append('Tag: %s-%s-%s' % (pyver, abi, arch)) + p = os.path.join(distinfo, 'WHEEL') + with open(p, 'w') as f: + f.write('\n'.join(wheel_metadata)) + ap = to_posix(os.path.join(info_dir, 'WHEEL')) + archive_paths.append((ap, p)) + + # Now, at last, RECORD. + # Paths in here are archive paths - nothing else makes sense. + records = [] + hasher = getattr(hashlib, self.hash_kind) + for ap, p in archive_paths: + with open(p, 'rb') as f: + data = f.read() + digest = '%s=%s' % self.get_hash(data) + size = os.path.getsize(p) + records.append((ap, digest, size)) + + p = os.path.join(distinfo, 'RECORD') + self.write_record(records, p, libdir) + ap = to_posix(os.path.join(info_dir, 'RECORD')) + archive_paths.append((ap, p)) + # Now, ready to build the zip file + pathname = os.path.join(self.dirname, self.filename) + with ZipFile(pathname, 'w', zipfile.ZIP_DEFLATED) as zf: + for ap, p in archive_paths: + logger.debug('Wrote %s to %s in wheel', p, ap) + zf.write(p, ap) + return pathname + + def install(self, paths, maker, **kwargs): + """ + Install a wheel to the specified paths. If kwarg ``warner`` is + specified, it should be a callable, which will be called with two + tuples indicating the wheel version of this software and the wheel + version in the file, if there is a discrepancy in the versions. + This can be used to issue any warnings to raise any exceptions. + If kwarg ``lib_only`` is True, only the purelib/platlib files are + installed, and the headers, scripts, data and dist-info metadata are + not written. + + The return value is a :class:`InstalledDistribution` instance unless + ``options.lib_only`` is True, in which case the return value is ``None``. + """ + + dry_run = maker.dry_run + warner = kwargs.get('warner') + lib_only = kwargs.get('lib_only', False) + + pathname = os.path.join(self.dirname, self.filename) + name_ver = '%s-%s' % (self.name, self.version) + data_dir = '%s.data' % name_ver + info_dir = '%s.dist-info' % name_ver + + metadata_name = posixpath.join(info_dir, METADATA_FILENAME) + wheel_metadata_name = posixpath.join(info_dir, 'WHEEL') + record_name = posixpath.join(info_dir, 'RECORD') + + wrapper = codecs.getreader('utf-8') + + with ZipFile(pathname, 'r') as zf: + with zf.open(wheel_metadata_name) as bwf: + wf = wrapper(bwf) + message = message_from_file(wf) + wv = message['Wheel-Version'].split('.', 1) + file_version = tuple([int(i) for i in wv]) + if (file_version != self.wheel_version) and warner: + warner(self.wheel_version, file_version) + + if message['Root-Is-Purelib'] == 'true': + libdir = paths['purelib'] + else: + libdir = paths['platlib'] + + records = {} + with zf.open(record_name) as bf: + with CSVReader(stream=bf) as reader: + for row in reader: + p = row[0] + records[p] = row + + data_pfx = posixpath.join(data_dir, '') + info_pfx = posixpath.join(info_dir, '') + script_pfx = posixpath.join(data_dir, 'scripts', '') + + # make a new instance rather than a copy of maker's, + # as we mutate it + fileop = FileOperator(dry_run=dry_run) + fileop.record = True # so we can rollback if needed + + bc = not sys.dont_write_bytecode # Double negatives. Lovely! + + outfiles = [] # for RECORD writing + + # for script copying/shebang processing + workdir = tempfile.mkdtemp() + # set target dir later + # we default add_launchers to False, as the + # Python Launcher should be used instead + maker.source_dir = workdir + maker.target_dir = None + try: + for zinfo in zf.infolist(): + arcname = zinfo.filename + if isinstance(arcname, text_type): + u_arcname = arcname + else: + u_arcname = arcname.decode('utf-8') + # The signature file won't be in RECORD, + # and we don't currently don't do anything with it + if u_arcname.endswith('/RECORD.jws'): + continue + row = records[u_arcname] + if row[2] and str(zinfo.file_size) != row[2]: + raise DistlibException('size mismatch for ' + '%s' % u_arcname) + if row[1]: + kind, value = row[1].split('=', 1) + with zf.open(arcname) as bf: + data = bf.read() + _, digest = self.get_hash(data, kind) + if digest != value: + raise DistlibException('digest mismatch for ' + '%s' % arcname) + + if lib_only and u_arcname.startswith((info_pfx, data_pfx)): + logger.debug('lib_only: skipping %s', u_arcname) + continue + is_script = (u_arcname.startswith(script_pfx) + and not u_arcname.endswith('.exe')) + + if u_arcname.startswith(data_pfx): + _, where, rp = u_arcname.split('/', 2) + outfile = os.path.join(paths[where], convert_path(rp)) + else: + # meant for site-packages. + if u_arcname in (wheel_metadata_name, record_name): + continue + outfile = os.path.join(libdir, convert_path(u_arcname)) + if not is_script: + with zf.open(arcname) as bf: + fileop.copy_stream(bf, outfile) + outfiles.append(outfile) + # Double check the digest of the written file + if not dry_run and row[1]: + with open(outfile, 'rb') as bf: + data = bf.read() + _, newdigest = self.get_hash(data, kind) + if newdigest != digest: + raise DistlibException('digest mismatch ' + 'on write for ' + '%s' % outfile) + if bc and outfile.endswith('.py'): + try: + pyc = fileop.byte_compile(outfile) + outfiles.append(pyc) + except Exception: + # Don't give up if byte-compilation fails, + # but log it and perhaps warn the user + logger.warning('Byte-compilation failed', + exc_info=True) + else: + fn = os.path.basename(convert_path(arcname)) + workname = os.path.join(workdir, fn) + with zf.open(arcname) as bf: + fileop.copy_stream(bf, workname) + + dn, fn = os.path.split(outfile) + maker.target_dir = dn + filenames = maker.make(fn) + fileop.set_executable_mode(filenames) + outfiles.extend(filenames) + + if lib_only: + logger.debug('lib_only: returning None') + dist = None + else: + # Generate scripts + + # Try to get pydist.json so we can see if there are + # any commands to generate. If this fails (e.g. because + # of a legacy wheel), log a warning but don't give up. + commands = None + file_version = self.info['Wheel-Version'] + if file_version == '1.0': + # Use legacy info + ep = posixpath.join(info_dir, 'entry_points.txt') + try: + with zf.open(ep) as bwf: + epdata = read_exports(bwf) + commands = {} + for key in ('console', 'gui'): + k = '%s_scripts' % key + if k in epdata: + commands['wrap_%s' % key] = d = {} + for v in epdata[k].values(): + s = '%s:%s' % (v.prefix, v.suffix) + if v.flags: + s += ' %s' % v.flags + d[v.name] = s + except Exception: + logger.warning('Unable to read legacy script ' + 'metadata, so cannot generate ' + 'scripts') + else: + try: + with zf.open(metadata_name) as bwf: + wf = wrapper(bwf) + commands = json.load(wf).get('commands') + except Exception: + logger.warning('Unable to read JSON metadata, so ' + 'cannot generate scripts') + if commands: + console_scripts = commands.get('wrap_console', {}) + gui_scripts = commands.get('wrap_gui', {}) + if console_scripts or gui_scripts: + script_dir = paths.get('scripts', '') + if not os.path.isdir(script_dir): + raise ValueError('Valid script path not ' + 'specified') + maker.target_dir = script_dir + for k, v in console_scripts.items(): + script = '%s = %s' % (k, v) + filenames = maker.make(script) + fileop.set_executable_mode(filenames) + + if gui_scripts: + options = {'gui': True } + for k, v in gui_scripts.items(): + script = '%s = %s' % (k, v) + filenames = maker.make(script, options) + fileop.set_executable_mode(filenames) + + p = os.path.join(libdir, info_dir) + dist = InstalledDistribution(p) + + # Write SHARED + paths = dict(paths) # don't change passed in dict + del paths['purelib'] + del paths['platlib'] + paths['lib'] = libdir + p = dist.write_shared_locations(paths, dry_run) + if p: + outfiles.append(p) + + # Write RECORD + dist.write_installed_files(outfiles, paths['prefix'], + dry_run) + return dist + except Exception: # pragma: no cover + logger.exception('installation failed.') + fileop.rollback() + raise + finally: + shutil.rmtree(workdir) + + def _get_dylib_cache(self): + result = os.path.join(get_cache_base(), 'dylib-cache', sys.version[:3]) + if not os.path.isdir(result): + os.makedirs(result) + return result + + def _get_extensions(self): + pathname = os.path.join(self.dirname, self.filename) + name_ver = '%s-%s' % (self.name, self.version) + info_dir = '%s.dist-info' % name_ver + arcname = posixpath.join(info_dir, 'EXTENSIONS') + wrapper = codecs.getreader('utf-8') + result = [] + with ZipFile(pathname, 'r') as zf: + try: + with zf.open(arcname) as bf: + wf = wrapper(bf) + extensions = json.load(wf) + cache_base = self._get_dylib_cache() + for name, relpath in extensions.items(): + dest = os.path.join(cache_base, convert_path(relpath)) + if not os.path.exists(dest): + extract = True + else: + file_time = os.stat(dest).st_mtime + file_time = datetime.datetime.fromtimestamp(file_time) + info = zf.getinfo(relpath) + wheel_time = datetime.datetime(*info.date_time) + extract = wheel_time > file_time + if extract: + zf.extract(relpath, cache_base) + result.append((name, dest)) + except KeyError: + pass + return result + + def mount(self, append=False): + pathname = os.path.abspath(os.path.join(self.dirname, self.filename)) + if not is_compatible(self): + msg = 'Wheel %s not mountable in this Python.' % pathname + raise DistlibException(msg) + if pathname in sys.path: + logger.debug('%s already in path', pathname) + else: + if append: + sys.path.append(pathname) + else: + sys.path.insert(0, pathname) + extensions = self._get_extensions() + if extensions: + if _hook not in sys.meta_path: + sys.meta_path.append(_hook) + _hook.add(pathname, extensions) + + def unmount(self): + pathname = os.path.abspath(os.path.join(self.dirname, self.filename)) + if pathname not in sys.path: + logger.debug('%s not in path', pathname) + else: + sys.path.remove(pathname) + if pathname in _hook.impure_wheels: + _hook.remove(pathname) + if not _hook.impure_wheels: + if _hook in sys.meta_path: + sys.meta_path.remove(_hook) + + +def compatible_tags(): + """ + Return (pyver, abi, arch) tuples compatible with this Python. + """ + versions = [VER_SUFFIX] + major = VER_SUFFIX[0] + for minor in range(sys.version_info[1] - 1, - 1, -1): + versions.append(''.join([major, str(minor)])) + + abis = [] + for suffix, _, _ in imp.get_suffixes(): + if suffix.startswith('.abi'): + abis.append(suffix.split('.', 2)[1]) + abis.sort() + if ABI != 'none': + abis.insert(0, ABI) + abis.append('none') + result = [] + + # Most specific - our Python version, ABI and arch + for abi in abis: + result.append((''.join((IMP_PREFIX, versions[0])), abi, ARCH)) + + # where no ABI / arch dependency, but IMP_PREFIX dependency + for i, version in enumerate(versions): + result.append((''.join((IMP_PREFIX, version)), 'none', 'any')) + if i == 0: + result.append((''.join((IMP_PREFIX, version[0])), 'none', 'any')) + + # no IMP_PREFIX, ABI or arch dependency + for i, version in enumerate(versions): + result.append((''.join(('py', version)), 'none', 'any')) + if i == 0: + result.append((''.join(('py', version[0])), 'none', 'any')) + return result + + +COMPATIBLE_TAGS = compatible_tags() + +del compatible_tags + + +def is_compatible(wheel, tags=None): + if not isinstance(wheel, Wheel): + wheel = Wheel(wheel) # assume it's a filename + result = False + if tags is None: + tags = COMPATIBLE_TAGS + for ver, abi, arch in tags: + if ver in wheel.pyver and abi in wheel.abi and arch in wheel.arch: + result = True + break + return result diff --git a/pip/_vendor/html5lib/LICENSE b/pip/_vendor/html5lib/LICENSE new file mode 100644 index 000000000..c87fa7a00 --- /dev/null +++ b/pip/_vendor/html5lib/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2006-2013 James Graham and other contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/pip/_vendor/html5lib/__init__.py b/pip/_vendor/html5lib/__init__.py new file mode 100644 index 000000000..10e2b74c2 --- /dev/null +++ b/pip/_vendor/html5lib/__init__.py @@ -0,0 +1,23 @@ +""" +HTML parsing library based on the WHATWG "HTML5" +specification. The parser is designed to be compatible with existing +HTML found in the wild and implements well-defined error recovery that +is largely compatible with modern desktop web browsers. + +Example usage: + +import html5lib +f = open("my_document.html") +tree = html5lib.parse(f) +""" + +from __future__ import absolute_import, division, unicode_literals + +from .html5parser import HTMLParser, parse, parseFragment +from .treebuilders import getTreeBuilder +from .treewalkers import getTreeWalker +from .serializer import serialize + +__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", + "getTreeWalker", "serialize"] +__version__ = "1.0b1" diff --git a/pip/_vendor/html5lib/constants.py b/pip/_vendor/html5lib/constants.py new file mode 100644 index 000000000..1866dd78e --- /dev/null +++ b/pip/_vendor/html5lib/constants.py @@ -0,0 +1,3086 @@ +from __future__ import absolute_import, division, unicode_literals + +import string +import gettext +_ = gettext.gettext + +EOF = None + +E = { + "null-character": + _("Null character in input stream, replaced with U+FFFD."), + "invalid-codepoint": + _("Invalid codepoint in stream."), + "incorrectly-placed-solidus": + _("Solidus (/) incorrectly placed in tag."), + "incorrect-cr-newline-entity": + _("Incorrect CR newline entity, replaced with LF."), + "illegal-windows-1252-entity": + _("Entity used with illegal number (windows-1252 reference)."), + "cant-convert-numeric-entity": + _("Numeric entity couldn't be converted to character " + "(codepoint U+%(charAsInt)08x)."), + "illegal-codepoint-for-numeric-entity": + _("Numeric entity represents an illegal codepoint: " + "U+%(charAsInt)08x."), + "numeric-entity-without-semicolon": + _("Numeric entity didn't end with ';'."), + "expected-numeric-entity-but-got-eof": + _("Numeric entity expected. Got end of file instead."), + "expected-numeric-entity": + _("Numeric entity expected but none found."), + "named-entity-without-semicolon": + _("Named entity didn't end with ';'."), + "expected-named-entity": + _("Named entity expected. Got none."), + "attributes-in-end-tag": + _("End tag contains unexpected attributes."), + 'self-closing-flag-on-end-tag': + _("End tag contains unexpected self-closing flag."), + "expected-tag-name-but-got-right-bracket": + _("Expected tag name. Got '>' instead."), + "expected-tag-name-but-got-question-mark": + _("Expected tag name. Got '?' instead. (HTML doesn't " + "support processing instructions.)"), + "expected-tag-name": + _("Expected tag name. Got something else instead"), + "expected-closing-tag-but-got-right-bracket": + _("Expected closing tag. Got '>' instead. Ignoring ''."), + "expected-closing-tag-but-got-eof": + _("Expected closing tag. Unexpected end of file."), + "expected-closing-tag-but-got-char": + _("Expected closing tag. Unexpected character '%(data)s' found."), + "eof-in-tag-name": + _("Unexpected end of file in the tag name."), + "expected-attribute-name-but-got-eof": + _("Unexpected end of file. Expected attribute name instead."), + "eof-in-attribute-name": + _("Unexpected end of file in attribute name."), + "invalid-character-in-attribute-name": + _("Invalid character in attribute name"), + "duplicate-attribute": + _("Dropped duplicate attribute on tag."), + "expected-end-of-tag-name-but-got-eof": + _("Unexpected end of file. Expected = or end of tag."), + "expected-attribute-value-but-got-eof": + _("Unexpected end of file. Expected attribute value."), + "expected-attribute-value-but-got-right-bracket": + _("Expected attribute value. Got '>' instead."), + 'equals-in-unquoted-attribute-value': + _("Unexpected = in unquoted attribute"), + 'unexpected-character-in-unquoted-attribute-value': + _("Unexpected character in unquoted attribute"), + "invalid-character-after-attribute-name": + _("Unexpected character after attribute name."), + "unexpected-character-after-attribute-value": + _("Unexpected character after attribute value."), + "eof-in-attribute-value-double-quote": + _("Unexpected end of file in attribute value (\")."), + "eof-in-attribute-value-single-quote": + _("Unexpected end of file in attribute value (')."), + "eof-in-attribute-value-no-quotes": + _("Unexpected end of file in attribute value."), + "unexpected-EOF-after-solidus-in-tag": + _("Unexpected end of file in tag. Expected >"), + "unexpected-character-after-solidus-in-tag": + _("Unexpected character after / in tag. Expected >"), + "expected-dashes-or-doctype": + _("Expected '--' or 'DOCTYPE'. Not found."), + "unexpected-bang-after-double-dash-in-comment": + _("Unexpected ! after -- in comment"), + "unexpected-space-after-double-dash-in-comment": + _("Unexpected space after -- in comment"), + "incorrect-comment": + _("Incorrect comment."), + "eof-in-comment": + _("Unexpected end of file in comment."), + "eof-in-comment-end-dash": + _("Unexpected end of file in comment (-)"), + "unexpected-dash-after-double-dash-in-comment": + _("Unexpected '-' after '--' found in comment."), + "eof-in-comment-double-dash": + _("Unexpected end of file in comment (--)."), + "eof-in-comment-end-space-state": + _("Unexpected end of file in comment."), + "eof-in-comment-end-bang-state": + _("Unexpected end of file in comment."), + "unexpected-char-in-comment": + _("Unexpected character in comment found."), + "need-space-after-doctype": + _("No space after literal string 'DOCTYPE'."), + "expected-doctype-name-but-got-right-bracket": + _("Unexpected > character. Expected DOCTYPE name."), + "expected-doctype-name-but-got-eof": + _("Unexpected end of file. Expected DOCTYPE name."), + "eof-in-doctype-name": + _("Unexpected end of file in DOCTYPE name."), + "eof-in-doctype": + _("Unexpected end of file in DOCTYPE."), + "expected-space-or-right-bracket-in-doctype": + _("Expected space or '>'. Got '%(data)s'"), + "unexpected-end-of-doctype": + _("Unexpected end of DOCTYPE."), + "unexpected-char-in-doctype": + _("Unexpected character in DOCTYPE."), + "eof-in-innerhtml": + _("XXX innerHTML EOF"), + "unexpected-doctype": + _("Unexpected DOCTYPE. Ignored."), + "non-html-root": + _("html needs to be the first start tag."), + "expected-doctype-but-got-eof": + _("Unexpected End of file. Expected DOCTYPE."), + "unknown-doctype": + _("Erroneous DOCTYPE."), + "expected-doctype-but-got-chars": + _("Unexpected non-space characters. Expected DOCTYPE."), + "expected-doctype-but-got-start-tag": + _("Unexpected start tag (%(name)s). Expected DOCTYPE."), + "expected-doctype-but-got-end-tag": + _("Unexpected end tag (%(name)s). Expected DOCTYPE."), + "end-tag-after-implied-root": + _("Unexpected end tag (%(name)s) after the (implied) root element."), + "expected-named-closing-tag-but-got-eof": + _("Unexpected end of file. Expected end tag (%(name)s)."), + "two-heads-are-not-better-than-one": + _("Unexpected start tag head in existing head. Ignored."), + "unexpected-end-tag": + _("Unexpected end tag (%(name)s). Ignored."), + "unexpected-start-tag-out-of-my-head": + _("Unexpected start tag (%(name)s) that can be in head. Moved."), + "unexpected-start-tag": + _("Unexpected start tag (%(name)s)."), + "missing-end-tag": + _("Missing end tag (%(name)s)."), + "missing-end-tags": + _("Missing end tags (%(name)s)."), + "unexpected-start-tag-implies-end-tag": + _("Unexpected start tag (%(startName)s) " + "implies end tag (%(endName)s)."), + "unexpected-start-tag-treated-as": + _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."), + "deprecated-tag": + _("Unexpected start tag %(name)s. Don't use it!"), + "unexpected-start-tag-ignored": + _("Unexpected start tag %(name)s. Ignored."), + "expected-one-end-tag-but-got-another": + _("Unexpected end tag (%(gotName)s). " + "Missing end tag (%(expectedName)s)."), + "end-tag-too-early": + _("End tag (%(name)s) seen too early. Expected other end tag."), + "end-tag-too-early-named": + _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."), + "end-tag-too-early-ignored": + _("End tag (%(name)s) seen too early. Ignored."), + "adoption-agency-1.1": + _("End tag (%(name)s) violates step 1, " + "paragraph 1 of the adoption agency algorithm."), + "adoption-agency-1.2": + _("End tag (%(name)s) violates step 1, " + "paragraph 2 of the adoption agency algorithm."), + "adoption-agency-1.3": + _("End tag (%(name)s) violates step 1, " + "paragraph 3 of the adoption agency algorithm."), + "adoption-agency-4.4": + _("End tag (%(name)s) violates step 4, " + "paragraph 4 of the adoption agency algorithm."), + "unexpected-end-tag-treated-as": + _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."), + "no-end-tag": + _("This element (%(name)s) has no end tag."), + "unexpected-implied-end-tag-in-table": + _("Unexpected implied end tag (%(name)s) in the table phase."), + "unexpected-implied-end-tag-in-table-body": + _("Unexpected implied end tag (%(name)s) in the table body phase."), + "unexpected-char-implies-table-voodoo": + _("Unexpected non-space characters in " + "table context caused voodoo mode."), + "unexpected-hidden-input-in-table": + _("Unexpected input with type hidden in table context."), + "unexpected-form-in-table": + _("Unexpected form in table context."), + "unexpected-start-tag-implies-table-voodoo": + _("Unexpected start tag (%(name)s) in " + "table context caused voodoo mode."), + "unexpected-end-tag-implies-table-voodoo": + _("Unexpected end tag (%(name)s) in " + "table context caused voodoo mode."), + "unexpected-cell-in-table-body": + _("Unexpected table cell start tag (%(name)s) " + "in the table body phase."), + "unexpected-cell-end-tag": + _("Got table cell end tag (%(name)s) " + "while required end tags are missing."), + "unexpected-end-tag-in-table-body": + _("Unexpected end tag (%(name)s) in the table body phase. Ignored."), + "unexpected-implied-end-tag-in-table-row": + _("Unexpected implied end tag (%(name)s) in the table row phase."), + "unexpected-end-tag-in-table-row": + _("Unexpected end tag (%(name)s) in the table row phase. Ignored."), + "unexpected-select-in-select": + _("Unexpected select start tag in the select phase " + "treated as select end tag."), + "unexpected-input-in-select": + _("Unexpected input start tag in the select phase."), + "unexpected-start-tag-in-select": + _("Unexpected start tag token (%(name)s in the select phase. " + "Ignored."), + "unexpected-end-tag-in-select": + _("Unexpected end tag (%(name)s) in the select phase. Ignored."), + "unexpected-table-element-start-tag-in-select-in-table": + _("Unexpected table element start tag (%(name)s) in the select in table phase."), + "unexpected-table-element-end-tag-in-select-in-table": + _("Unexpected table element end tag (%(name)s) in the select in table phase."), + "unexpected-char-after-body": + _("Unexpected non-space characters in the after body phase."), + "unexpected-start-tag-after-body": + _("Unexpected start tag token (%(name)s)" + " in the after body phase."), + "unexpected-end-tag-after-body": + _("Unexpected end tag token (%(name)s)" + " in the after body phase."), + "unexpected-char-in-frameset": + _("Unexpected characters in the frameset phase. Characters ignored."), + "unexpected-start-tag-in-frameset": + _("Unexpected start tag token (%(name)s)" + " in the frameset phase. Ignored."), + "unexpected-frameset-in-frameset-innerhtml": + _("Unexpected end tag token (frameset) " + "in the frameset phase (innerHTML)."), + "unexpected-end-tag-in-frameset": + _("Unexpected end tag token (%(name)s)" + " in the frameset phase. Ignored."), + "unexpected-char-after-frameset": + _("Unexpected non-space characters in the " + "after frameset phase. Ignored."), + "unexpected-start-tag-after-frameset": + _("Unexpected start tag (%(name)s)" + " in the after frameset phase. Ignored."), + "unexpected-end-tag-after-frameset": + _("Unexpected end tag (%(name)s)" + " in the after frameset phase. Ignored."), + "unexpected-end-tag-after-body-innerhtml": + _("Unexpected end tag after body(innerHtml)"), + "expected-eof-but-got-char": + _("Unexpected non-space characters. Expected end of file."), + "expected-eof-but-got-start-tag": + _("Unexpected start tag (%(name)s)" + ". Expected end of file."), + "expected-eof-but-got-end-tag": + _("Unexpected end tag (%(name)s)" + ". Expected end of file."), + "eof-in-table": + _("Unexpected end of file. Expected table content."), + "eof-in-select": + _("Unexpected end of file. Expected select content."), + "eof-in-frameset": + _("Unexpected end of file. Expected frameset content."), + "eof-in-script-in-script": + _("Unexpected end of file. Expected script content."), + "eof-in-foreign-lands": + _("Unexpected end of file. Expected foreign content"), + "non-void-element-with-trailing-solidus": + _("Trailing solidus not allowed on element %(name)s"), + "unexpected-html-element-in-foreign-content": + _("Element %(name)s not allowed in a non-html context"), + "unexpected-end-tag-before-html": + _("Unexpected end tag (%(name)s) before html."), + "XXX-undefined-error": + _("Undefined error (this sucks and should be fixed)"), +} + +namespaces = { + "html": "http://www.w3.org/1999/xhtml", + "mathml": "http://www.w3.org/1998/Math/MathML", + "svg": "http://www.w3.org/2000/svg", + "xlink": "http://www.w3.org/1999/xlink", + "xml": "http://www.w3.org/XML/1998/namespace", + "xmlns": "http://www.w3.org/2000/xmlns/" +} + +scopingElements = frozenset(( + (namespaces["html"], "applet"), + (namespaces["html"], "caption"), + (namespaces["html"], "html"), + (namespaces["html"], "marquee"), + (namespaces["html"], "object"), + (namespaces["html"], "table"), + (namespaces["html"], "td"), + (namespaces["html"], "th"), + (namespaces["mathml"], "mi"), + (namespaces["mathml"], "mo"), + (namespaces["mathml"], "mn"), + (namespaces["mathml"], "ms"), + (namespaces["mathml"], "mtext"), + (namespaces["mathml"], "annotation-xml"), + (namespaces["svg"], "foreignObject"), + (namespaces["svg"], "desc"), + (namespaces["svg"], "title"), +)) + +formattingElements = frozenset(( + (namespaces["html"], "a"), + (namespaces["html"], "b"), + (namespaces["html"], "big"), + (namespaces["html"], "code"), + (namespaces["html"], "em"), + (namespaces["html"], "font"), + (namespaces["html"], "i"), + (namespaces["html"], "nobr"), + (namespaces["html"], "s"), + (namespaces["html"], "small"), + (namespaces["html"], "strike"), + (namespaces["html"], "strong"), + (namespaces["html"], "tt"), + (namespaces["html"], "u") +)) + +specialElements = frozenset(( + (namespaces["html"], "address"), + (namespaces["html"], "applet"), + (namespaces["html"], "area"), + (namespaces["html"], "article"), + (namespaces["html"], "aside"), + (namespaces["html"], "base"), + (namespaces["html"], "basefont"), + (namespaces["html"], "bgsound"), + (namespaces["html"], "blockquote"), + (namespaces["html"], "body"), + (namespaces["html"], "br"), + (namespaces["html"], "button"), + (namespaces["html"], "caption"), + (namespaces["html"], "center"), + (namespaces["html"], "col"), + (namespaces["html"], "colgroup"), + (namespaces["html"], "command"), + (namespaces["html"], "dd"), + (namespaces["html"], "details"), + (namespaces["html"], "dir"), + (namespaces["html"], "div"), + (namespaces["html"], "dl"), + (namespaces["html"], "dt"), + (namespaces["html"], "embed"), + (namespaces["html"], "fieldset"), + (namespaces["html"], "figure"), + (namespaces["html"], "footer"), + (namespaces["html"], "form"), + (namespaces["html"], "frame"), + (namespaces["html"], "frameset"), + (namespaces["html"], "h1"), + (namespaces["html"], "h2"), + (namespaces["html"], "h3"), + (namespaces["html"], "h4"), + (namespaces["html"], "h5"), + (namespaces["html"], "h6"), + (namespaces["html"], "head"), + (namespaces["html"], "header"), + (namespaces["html"], "hr"), + (namespaces["html"], "html"), + (namespaces["html"], "iframe"), + # Note that image is commented out in the spec as "this isn't an + # element that can end up on the stack, so it doesn't matter," + (namespaces["html"], "image"), + (namespaces["html"], "img"), + (namespaces["html"], "input"), + (namespaces["html"], "isindex"), + (namespaces["html"], "li"), + (namespaces["html"], "link"), + (namespaces["html"], "listing"), + (namespaces["html"], "marquee"), + (namespaces["html"], "menu"), + (namespaces["html"], "meta"), + (namespaces["html"], "nav"), + (namespaces["html"], "noembed"), + (namespaces["html"], "noframes"), + (namespaces["html"], "noscript"), + (namespaces["html"], "object"), + (namespaces["html"], "ol"), + (namespaces["html"], "p"), + (namespaces["html"], "param"), + (namespaces["html"], "plaintext"), + (namespaces["html"], "pre"), + (namespaces["html"], "script"), + (namespaces["html"], "section"), + (namespaces["html"], "select"), + (namespaces["html"], "style"), + (namespaces["html"], "table"), + (namespaces["html"], "tbody"), + (namespaces["html"], "td"), + (namespaces["html"], "textarea"), + (namespaces["html"], "tfoot"), + (namespaces["html"], "th"), + (namespaces["html"], "thead"), + (namespaces["html"], "title"), + (namespaces["html"], "tr"), + (namespaces["html"], "ul"), + (namespaces["html"], "wbr"), + (namespaces["html"], "xmp"), + (namespaces["svg"], "foreignObject") +)) + +htmlIntegrationPointElements = frozenset(( + (namespaces["mathml"], "annotaion-xml"), + (namespaces["svg"], "foreignObject"), + (namespaces["svg"], "desc"), + (namespaces["svg"], "title") +)) + +mathmlTextIntegrationPointElements = frozenset(( + (namespaces["mathml"], "mi"), + (namespaces["mathml"], "mo"), + (namespaces["mathml"], "mn"), + (namespaces["mathml"], "ms"), + (namespaces["mathml"], "mtext") +)) + +spaceCharacters = frozenset(( + "\t", + "\n", + "\u000C", + " ", + "\r" +)) + +tableInsertModeElements = frozenset(( + "table", + "tbody", + "tfoot", + "thead", + "tr" +)) + +asciiLowercase = frozenset(string.ascii_lowercase) +asciiUppercase = frozenset(string.ascii_uppercase) +asciiLetters = frozenset(string.ascii_letters) +digits = frozenset(string.digits) +hexDigits = frozenset(string.hexdigits) + +asciiUpper2Lower = dict([(ord(c), ord(c.lower())) + for c in string.ascii_uppercase]) + +# Heading elements need to be ordered +headingElements = ( + "h1", + "h2", + "h3", + "h4", + "h5", + "h6" +) + +voidElements = frozenset(( + "base", + "command", + "event-source", + "link", + "meta", + "hr", + "br", + "img", + "embed", + "param", + "area", + "col", + "input", + "source", + "track" +)) + +cdataElements = frozenset(('title', 'textarea')) + +rcdataElements = frozenset(( + 'style', + 'script', + 'xmp', + 'iframe', + 'noembed', + 'noframes', + 'noscript' +)) + +booleanAttributes = { + "": frozenset(("irrelevant",)), + "style": frozenset(("scoped",)), + "img": frozenset(("ismap",)), + "audio": frozenset(("autoplay", "controls")), + "video": frozenset(("autoplay", "controls")), + "script": frozenset(("defer", "async")), + "details": frozenset(("open",)), + "datagrid": frozenset(("multiple", "disabled")), + "command": frozenset(("hidden", "disabled", "checked", "default")), + "hr": frozenset(("noshade")), + "menu": frozenset(("autosubmit",)), + "fieldset": frozenset(("disabled", "readonly")), + "option": frozenset(("disabled", "readonly", "selected")), + "optgroup": frozenset(("disabled", "readonly")), + "button": frozenset(("disabled", "autofocus")), + "input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")), + "select": frozenset(("disabled", "readonly", "autofocus", "multiple")), + "output": frozenset(("disabled", "readonly")), +} + +# entitiesWindows1252 has to be _ordered_ and needs to have an index. It +# therefore can't be a frozenset. +entitiesWindows1252 = ( + 8364, # 0x80 0x20AC EURO SIGN + 65533, # 0x81 UNDEFINED + 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK + 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK + 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK + 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS + 8224, # 0x86 0x2020 DAGGER + 8225, # 0x87 0x2021 DOUBLE DAGGER + 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT + 8240, # 0x89 0x2030 PER MILLE SIGN + 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON + 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE + 65533, # 0x8D UNDEFINED + 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON + 65533, # 0x8F UNDEFINED + 65533, # 0x90 UNDEFINED + 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK + 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK + 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK + 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK + 8226, # 0x95 0x2022 BULLET + 8211, # 0x96 0x2013 EN DASH + 8212, # 0x97 0x2014 EM DASH + 732, # 0x98 0x02DC SMALL TILDE + 8482, # 0x99 0x2122 TRADE MARK SIGN + 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON + 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE + 65533, # 0x9D UNDEFINED + 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON + 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS +) + +xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;')) + +entities = { + "AElig": "\xc6", + "AElig;": "\xc6", + "AMP": "&", + "AMP;": "&", + "Aacute": "\xc1", + "Aacute;": "\xc1", + "Abreve;": "\u0102", + "Acirc": "\xc2", + "Acirc;": "\xc2", + "Acy;": "\u0410", + "Afr;": "\U0001d504", + "Agrave": "\xc0", + "Agrave;": "\xc0", + "Alpha;": "\u0391", + "Amacr;": "\u0100", + "And;": "\u2a53", + "Aogon;": "\u0104", + "Aopf;": "\U0001d538", + "ApplyFunction;": "\u2061", + "Aring": "\xc5", + "Aring;": "\xc5", + "Ascr;": "\U0001d49c", + "Assign;": "\u2254", + "Atilde": "\xc3", + "Atilde;": "\xc3", + "Auml": "\xc4", + "Auml;": "\xc4", + "Backslash;": "\u2216", + "Barv;": "\u2ae7", + "Barwed;": "\u2306", + "Bcy;": "\u0411", + "Because;": "\u2235", + "Bernoullis;": "\u212c", + "Beta;": "\u0392", + "Bfr;": "\U0001d505", + "Bopf;": "\U0001d539", + "Breve;": "\u02d8", + "Bscr;": "\u212c", + "Bumpeq;": "\u224e", + "CHcy;": "\u0427", + "COPY": "\xa9", + "COPY;": "\xa9", + "Cacute;": "\u0106", + "Cap;": "\u22d2", + "CapitalDifferentialD;": "\u2145", + "Cayleys;": "\u212d", + "Ccaron;": "\u010c", + "Ccedil": "\xc7", + "Ccedil;": "\xc7", + "Ccirc;": "\u0108", + "Cconint;": "\u2230", + "Cdot;": "\u010a", + "Cedilla;": "\xb8", + "CenterDot;": "\xb7", + "Cfr;": "\u212d", + "Chi;": "\u03a7", + "CircleDot;": "\u2299", + "CircleMinus;": "\u2296", + "CirclePlus;": "\u2295", + "CircleTimes;": "\u2297", + "ClockwiseContourIntegral;": "\u2232", + "CloseCurlyDoubleQuote;": "\u201d", + "CloseCurlyQuote;": "\u2019", + "Colon;": "\u2237", + "Colone;": "\u2a74", + "Congruent;": "\u2261", + "Conint;": "\u222f", + "ContourIntegral;": "\u222e", + "Copf;": "\u2102", + "Coproduct;": "\u2210", + "CounterClockwiseContourIntegral;": "\u2233", + "Cross;": "\u2a2f", + "Cscr;": "\U0001d49e", + "Cup;": "\u22d3", + "CupCap;": "\u224d", + "DD;": "\u2145", + "DDotrahd;": "\u2911", + "DJcy;": "\u0402", + "DScy;": "\u0405", + "DZcy;": "\u040f", + "Dagger;": "\u2021", + "Darr;": "\u21a1", + "Dashv;": "\u2ae4", + "Dcaron;": "\u010e", + "Dcy;": "\u0414", + "Del;": "\u2207", + "Delta;": "\u0394", + "Dfr;": "\U0001d507", + "DiacriticalAcute;": "\xb4", + "DiacriticalDot;": "\u02d9", + "DiacriticalDoubleAcute;": "\u02dd", + "DiacriticalGrave;": "`", + "DiacriticalTilde;": "\u02dc", + "Diamond;": "\u22c4", + "DifferentialD;": "\u2146", + "Dopf;": "\U0001d53b", + "Dot;": "\xa8", + "DotDot;": "\u20dc", + "DotEqual;": "\u2250", + "DoubleContourIntegral;": "\u222f", + "DoubleDot;": "\xa8", + "DoubleDownArrow;": "\u21d3", + "DoubleLeftArrow;": "\u21d0", + "DoubleLeftRightArrow;": "\u21d4", + "DoubleLeftTee;": "\u2ae4", + "DoubleLongLeftArrow;": "\u27f8", + "DoubleLongLeftRightArrow;": "\u27fa", + "DoubleLongRightArrow;": "\u27f9", + "DoubleRightArrow;": "\u21d2", + "DoubleRightTee;": "\u22a8", + "DoubleUpArrow;": "\u21d1", + "DoubleUpDownArrow;": "\u21d5", + "DoubleVerticalBar;": "\u2225", + "DownArrow;": "\u2193", + "DownArrowBar;": "\u2913", + "DownArrowUpArrow;": "\u21f5", + "DownBreve;": "\u0311", + "DownLeftRightVector;": "\u2950", + "DownLeftTeeVector;": "\u295e", + "DownLeftVector;": "\u21bd", + "DownLeftVectorBar;": "\u2956", + "DownRightTeeVector;": "\u295f", + "DownRightVector;": "\u21c1", + "DownRightVectorBar;": "\u2957", + "DownTee;": "\u22a4", + "DownTeeArrow;": "\u21a7", + "Downarrow;": "\u21d3", + "Dscr;": "\U0001d49f", + "Dstrok;": "\u0110", + "ENG;": "\u014a", + "ETH": "\xd0", + "ETH;": "\xd0", + "Eacute": "\xc9", + "Eacute;": "\xc9", + "Ecaron;": "\u011a", + "Ecirc": "\xca", + "Ecirc;": "\xca", + "Ecy;": "\u042d", + "Edot;": "\u0116", + "Efr;": "\U0001d508", + "Egrave": "\xc8", + "Egrave;": "\xc8", + "Element;": "\u2208", + "Emacr;": "\u0112", + "EmptySmallSquare;": "\u25fb", + "EmptyVerySmallSquare;": "\u25ab", + "Eogon;": "\u0118", + "Eopf;": "\U0001d53c", + "Epsilon;": "\u0395", + "Equal;": "\u2a75", + "EqualTilde;": "\u2242", + "Equilibrium;": "\u21cc", + "Escr;": "\u2130", + "Esim;": "\u2a73", + "Eta;": "\u0397", + "Euml": "\xcb", + "Euml;": "\xcb", + "Exists;": "\u2203", + "ExponentialE;": "\u2147", + "Fcy;": "\u0424", + "Ffr;": "\U0001d509", + "FilledSmallSquare;": "\u25fc", + "FilledVerySmallSquare;": "\u25aa", + "Fopf;": "\U0001d53d", + "ForAll;": "\u2200", + "Fouriertrf;": "\u2131", + "Fscr;": "\u2131", + "GJcy;": "\u0403", + "GT": ">", + "GT;": ">", + "Gamma;": "\u0393", + "Gammad;": "\u03dc", + "Gbreve;": "\u011e", + "Gcedil;": "\u0122", + "Gcirc;": "\u011c", + "Gcy;": "\u0413", + "Gdot;": "\u0120", + "Gfr;": "\U0001d50a", + "Gg;": "\u22d9", + "Gopf;": "\U0001d53e", + "GreaterEqual;": "\u2265", + "GreaterEqualLess;": "\u22db", + "GreaterFullEqual;": "\u2267", + "GreaterGreater;": "\u2aa2", + "GreaterLess;": "\u2277", + "GreaterSlantEqual;": "\u2a7e", + "GreaterTilde;": "\u2273", + "Gscr;": "\U0001d4a2", + "Gt;": "\u226b", + "HARDcy;": "\u042a", + "Hacek;": "\u02c7", + "Hat;": "^", + "Hcirc;": "\u0124", + "Hfr;": "\u210c", + "HilbertSpace;": "\u210b", + "Hopf;": "\u210d", + "HorizontalLine;": "\u2500", + "Hscr;": "\u210b", + "Hstrok;": "\u0126", + "HumpDownHump;": "\u224e", + "HumpEqual;": "\u224f", + "IEcy;": "\u0415", + "IJlig;": "\u0132", + "IOcy;": "\u0401", + "Iacute": "\xcd", + "Iacute;": "\xcd", + "Icirc": "\xce", + "Icirc;": "\xce", + "Icy;": "\u0418", + "Idot;": "\u0130", + "Ifr;": "\u2111", + "Igrave": "\xcc", + "Igrave;": "\xcc", + "Im;": "\u2111", + "Imacr;": "\u012a", + "ImaginaryI;": "\u2148", + "Implies;": "\u21d2", + "Int;": "\u222c", + "Integral;": "\u222b", + "Intersection;": "\u22c2", + "InvisibleComma;": "\u2063", + "InvisibleTimes;": "\u2062", + "Iogon;": "\u012e", + "Iopf;": "\U0001d540", + "Iota;": "\u0399", + "Iscr;": "\u2110", + "Itilde;": "\u0128", + "Iukcy;": "\u0406", + "Iuml": "\xcf", + "Iuml;": "\xcf", + "Jcirc;": "\u0134", + "Jcy;": "\u0419", + "Jfr;": "\U0001d50d", + "Jopf;": "\U0001d541", + "Jscr;": "\U0001d4a5", + "Jsercy;": "\u0408", + "Jukcy;": "\u0404", + "KHcy;": "\u0425", + "KJcy;": "\u040c", + "Kappa;": "\u039a", + "Kcedil;": "\u0136", + "Kcy;": "\u041a", + "Kfr;": "\U0001d50e", + "Kopf;": "\U0001d542", + "Kscr;": "\U0001d4a6", + "LJcy;": "\u0409", + "LT": "<", + "LT;": "<", + "Lacute;": "\u0139", + "Lambda;": "\u039b", + "Lang;": "\u27ea", + "Laplacetrf;": "\u2112", + "Larr;": "\u219e", + "Lcaron;": "\u013d", + "Lcedil;": "\u013b", + "Lcy;": "\u041b", + "LeftAngleBracket;": "\u27e8", + "LeftArrow;": "\u2190", + "LeftArrowBar;": "\u21e4", + "LeftArrowRightArrow;": "\u21c6", + "LeftCeiling;": "\u2308", + "LeftDoubleBracket;": "\u27e6", + "LeftDownTeeVector;": "\u2961", + "LeftDownVector;": "\u21c3", + "LeftDownVectorBar;": "\u2959", + "LeftFloor;": "\u230a", + "LeftRightArrow;": "\u2194", + "LeftRightVector;": "\u294e", + "LeftTee;": "\u22a3", + "LeftTeeArrow;": "\u21a4", + "LeftTeeVector;": "\u295a", + "LeftTriangle;": "\u22b2", + "LeftTriangleBar;": "\u29cf", + "LeftTriangleEqual;": "\u22b4", + "LeftUpDownVector;": "\u2951", + "LeftUpTeeVector;": "\u2960", + "LeftUpVector;": "\u21bf", + "LeftUpVectorBar;": "\u2958", + "LeftVector;": "\u21bc", + "LeftVectorBar;": "\u2952", + "Leftarrow;": "\u21d0", + "Leftrightarrow;": "\u21d4", + "LessEqualGreater;": "\u22da", + "LessFullEqual;": "\u2266", + "LessGreater;": "\u2276", + "LessLess;": "\u2aa1", + "LessSlantEqual;": "\u2a7d", + "LessTilde;": "\u2272", + "Lfr;": "\U0001d50f", + "Ll;": "\u22d8", + "Lleftarrow;": "\u21da", + "Lmidot;": "\u013f", + "LongLeftArrow;": "\u27f5", + "LongLeftRightArrow;": "\u27f7", + "LongRightArrow;": "\u27f6", + "Longleftarrow;": "\u27f8", + "Longleftrightarrow;": "\u27fa", + "Longrightarrow;": "\u27f9", + "Lopf;": "\U0001d543", + "LowerLeftArrow;": "\u2199", + "LowerRightArrow;": "\u2198", + "Lscr;": "\u2112", + "Lsh;": "\u21b0", + "Lstrok;": "\u0141", + "Lt;": "\u226a", + "Map;": "\u2905", + "Mcy;": "\u041c", + "MediumSpace;": "\u205f", + "Mellintrf;": "\u2133", + "Mfr;": "\U0001d510", + "MinusPlus;": "\u2213", + "Mopf;": "\U0001d544", + "Mscr;": "\u2133", + "Mu;": "\u039c", + "NJcy;": "\u040a", + "Nacute;": "\u0143", + "Ncaron;": "\u0147", + "Ncedil;": "\u0145", + "Ncy;": "\u041d", + "NegativeMediumSpace;": "\u200b", + "NegativeThickSpace;": "\u200b", + "NegativeThinSpace;": "\u200b", + "NegativeVeryThinSpace;": "\u200b", + "NestedGreaterGreater;": "\u226b", + "NestedLessLess;": "\u226a", + "NewLine;": "\n", + "Nfr;": "\U0001d511", + "NoBreak;": "\u2060", + "NonBreakingSpace;": "\xa0", + "Nopf;": "\u2115", + "Not;": "\u2aec", + "NotCongruent;": "\u2262", + "NotCupCap;": "\u226d", + "NotDoubleVerticalBar;": "\u2226", + "NotElement;": "\u2209", + "NotEqual;": "\u2260", + "NotEqualTilde;": "\u2242\u0338", + "NotExists;": "\u2204", + "NotGreater;": "\u226f", + "NotGreaterEqual;": "\u2271", + "NotGreaterFullEqual;": "\u2267\u0338", + "NotGreaterGreater;": "\u226b\u0338", + "NotGreaterLess;": "\u2279", + "NotGreaterSlantEqual;": "\u2a7e\u0338", + "NotGreaterTilde;": "\u2275", + "NotHumpDownHump;": "\u224e\u0338", + "NotHumpEqual;": "\u224f\u0338", + "NotLeftTriangle;": "\u22ea", + "NotLeftTriangleBar;": "\u29cf\u0338", + "NotLeftTriangleEqual;": "\u22ec", + "NotLess;": "\u226e", + "NotLessEqual;": "\u2270", + "NotLessGreater;": "\u2278", + "NotLessLess;": "\u226a\u0338", + "NotLessSlantEqual;": "\u2a7d\u0338", + "NotLessTilde;": "\u2274", + "NotNestedGreaterGreater;": "\u2aa2\u0338", + "NotNestedLessLess;": "\u2aa1\u0338", + "NotPrecedes;": "\u2280", + "NotPrecedesEqual;": "\u2aaf\u0338", + "NotPrecedesSlantEqual;": "\u22e0", + "NotReverseElement;": "\u220c", + "NotRightTriangle;": "\u22eb", + "NotRightTriangleBar;": "\u29d0\u0338", + "NotRightTriangleEqual;": "\u22ed", + "NotSquareSubset;": "\u228f\u0338", + "NotSquareSubsetEqual;": "\u22e2", + "NotSquareSuperset;": "\u2290\u0338", + "NotSquareSupersetEqual;": "\u22e3", + "NotSubset;": "\u2282\u20d2", + "NotSubsetEqual;": "\u2288", + "NotSucceeds;": "\u2281", + "NotSucceedsEqual;": "\u2ab0\u0338", + "NotSucceedsSlantEqual;": "\u22e1", + "NotSucceedsTilde;": "\u227f\u0338", + "NotSuperset;": "\u2283\u20d2", + "NotSupersetEqual;": "\u2289", + "NotTilde;": "\u2241", + "NotTildeEqual;": "\u2244", + "NotTildeFullEqual;": "\u2247", + "NotTildeTilde;": "\u2249", + "NotVerticalBar;": "\u2224", + "Nscr;": "\U0001d4a9", + "Ntilde": "\xd1", + "Ntilde;": "\xd1", + "Nu;": "\u039d", + "OElig;": "\u0152", + "Oacute": "\xd3", + "Oacute;": "\xd3", + "Ocirc": "\xd4", + "Ocirc;": "\xd4", + "Ocy;": "\u041e", + "Odblac;": "\u0150", + "Ofr;": "\U0001d512", + "Ograve": "\xd2", + "Ograve;": "\xd2", + "Omacr;": "\u014c", + "Omega;": "\u03a9", + "Omicron;": "\u039f", + "Oopf;": "\U0001d546", + "OpenCurlyDoubleQuote;": "\u201c", + "OpenCurlyQuote;": "\u2018", + "Or;": "\u2a54", + "Oscr;": "\U0001d4aa", + "Oslash": "\xd8", + "Oslash;": "\xd8", + "Otilde": "\xd5", + "Otilde;": "\xd5", + "Otimes;": "\u2a37", + "Ouml": "\xd6", + "Ouml;": "\xd6", + "OverBar;": "\u203e", + "OverBrace;": "\u23de", + "OverBracket;": "\u23b4", + "OverParenthesis;": "\u23dc", + "PartialD;": "\u2202", + "Pcy;": "\u041f", + "Pfr;": "\U0001d513", + "Phi;": "\u03a6", + "Pi;": "\u03a0", + "PlusMinus;": "\xb1", + "Poincareplane;": "\u210c", + "Popf;": "\u2119", + "Pr;": "\u2abb", + "Precedes;": "\u227a", + "PrecedesEqual;": "\u2aaf", + "PrecedesSlantEqual;": "\u227c", + "PrecedesTilde;": "\u227e", + "Prime;": "\u2033", + "Product;": "\u220f", + "Proportion;": "\u2237", + "Proportional;": "\u221d", + "Pscr;": "\U0001d4ab", + "Psi;": "\u03a8", + "QUOT": "\"", + "QUOT;": "\"", + "Qfr;": "\U0001d514", + "Qopf;": "\u211a", + "Qscr;": "\U0001d4ac", + "RBarr;": "\u2910", + "REG": "\xae", + "REG;": "\xae", + "Racute;": "\u0154", + "Rang;": "\u27eb", + "Rarr;": "\u21a0", + "Rarrtl;": "\u2916", + "Rcaron;": "\u0158", + "Rcedil;": "\u0156", + "Rcy;": "\u0420", + "Re;": "\u211c", + "ReverseElement;": "\u220b", + "ReverseEquilibrium;": "\u21cb", + "ReverseUpEquilibrium;": "\u296f", + "Rfr;": "\u211c", + "Rho;": "\u03a1", + "RightAngleBracket;": "\u27e9", + "RightArrow;": "\u2192", + "RightArrowBar;": "\u21e5", + "RightArrowLeftArrow;": "\u21c4", + "RightCeiling;": "\u2309", + "RightDoubleBracket;": "\u27e7", + "RightDownTeeVector;": "\u295d", + "RightDownVector;": "\u21c2", + "RightDownVectorBar;": "\u2955", + "RightFloor;": "\u230b", + "RightTee;": "\u22a2", + "RightTeeArrow;": "\u21a6", + "RightTeeVector;": "\u295b", + "RightTriangle;": "\u22b3", + "RightTriangleBar;": "\u29d0", + "RightTriangleEqual;": "\u22b5", + "RightUpDownVector;": "\u294f", + "RightUpTeeVector;": "\u295c", + "RightUpVector;": "\u21be", + "RightUpVectorBar;": "\u2954", + "RightVector;": "\u21c0", + "RightVectorBar;": "\u2953", + "Rightarrow;": "\u21d2", + "Ropf;": "\u211d", + "RoundImplies;": "\u2970", + "Rrightarrow;": "\u21db", + "Rscr;": "\u211b", + "Rsh;": "\u21b1", + "RuleDelayed;": "\u29f4", + "SHCHcy;": "\u0429", + "SHcy;": "\u0428", + "SOFTcy;": "\u042c", + "Sacute;": "\u015a", + "Sc;": "\u2abc", + "Scaron;": "\u0160", + "Scedil;": "\u015e", + "Scirc;": "\u015c", + "Scy;": "\u0421", + "Sfr;": "\U0001d516", + "ShortDownArrow;": "\u2193", + "ShortLeftArrow;": "\u2190", + "ShortRightArrow;": "\u2192", + "ShortUpArrow;": "\u2191", + "Sigma;": "\u03a3", + "SmallCircle;": "\u2218", + "Sopf;": "\U0001d54a", + "Sqrt;": "\u221a", + "Square;": "\u25a1", + "SquareIntersection;": "\u2293", + "SquareSubset;": "\u228f", + "SquareSubsetEqual;": "\u2291", + "SquareSuperset;": "\u2290", + "SquareSupersetEqual;": "\u2292", + "SquareUnion;": "\u2294", + "Sscr;": "\U0001d4ae", + "Star;": "\u22c6", + "Sub;": "\u22d0", + "Subset;": "\u22d0", + "SubsetEqual;": "\u2286", + "Succeeds;": "\u227b", + "SucceedsEqual;": "\u2ab0", + "SucceedsSlantEqual;": "\u227d", + "SucceedsTilde;": "\u227f", + "SuchThat;": "\u220b", + "Sum;": "\u2211", + "Sup;": "\u22d1", + "Superset;": "\u2283", + "SupersetEqual;": "\u2287", + "Supset;": "\u22d1", + "THORN": "\xde", + "THORN;": "\xde", + "TRADE;": "\u2122", + "TSHcy;": "\u040b", + "TScy;": "\u0426", + "Tab;": "\t", + "Tau;": "\u03a4", + "Tcaron;": "\u0164", + "Tcedil;": "\u0162", + "Tcy;": "\u0422", + "Tfr;": "\U0001d517", + "Therefore;": "\u2234", + "Theta;": "\u0398", + "ThickSpace;": "\u205f\u200a", + "ThinSpace;": "\u2009", + "Tilde;": "\u223c", + "TildeEqual;": "\u2243", + "TildeFullEqual;": "\u2245", + "TildeTilde;": "\u2248", + "Topf;": "\U0001d54b", + "TripleDot;": "\u20db", + "Tscr;": "\U0001d4af", + "Tstrok;": "\u0166", + "Uacute": "\xda", + "Uacute;": "\xda", + "Uarr;": "\u219f", + "Uarrocir;": "\u2949", + "Ubrcy;": "\u040e", + "Ubreve;": "\u016c", + "Ucirc": "\xdb", + "Ucirc;": "\xdb", + "Ucy;": "\u0423", + "Udblac;": "\u0170", + "Ufr;": "\U0001d518", + "Ugrave": "\xd9", + "Ugrave;": "\xd9", + "Umacr;": "\u016a", + "UnderBar;": "_", + "UnderBrace;": "\u23df", + "UnderBracket;": "\u23b5", + "UnderParenthesis;": "\u23dd", + "Union;": "\u22c3", + "UnionPlus;": "\u228e", + "Uogon;": "\u0172", + "Uopf;": "\U0001d54c", + "UpArrow;": "\u2191", + "UpArrowBar;": "\u2912", + "UpArrowDownArrow;": "\u21c5", + "UpDownArrow;": "\u2195", + "UpEquilibrium;": "\u296e", + "UpTee;": "\u22a5", + "UpTeeArrow;": "\u21a5", + "Uparrow;": "\u21d1", + "Updownarrow;": "\u21d5", + "UpperLeftArrow;": "\u2196", + "UpperRightArrow;": "\u2197", + "Upsi;": "\u03d2", + "Upsilon;": "\u03a5", + "Uring;": "\u016e", + "Uscr;": "\U0001d4b0", + "Utilde;": "\u0168", + "Uuml": "\xdc", + "Uuml;": "\xdc", + "VDash;": "\u22ab", + "Vbar;": "\u2aeb", + "Vcy;": "\u0412", + "Vdash;": "\u22a9", + "Vdashl;": "\u2ae6", + "Vee;": "\u22c1", + "Verbar;": "\u2016", + "Vert;": "\u2016", + "VerticalBar;": "\u2223", + "VerticalLine;": "|", + "VerticalSeparator;": "\u2758", + "VerticalTilde;": "\u2240", + "VeryThinSpace;": "\u200a", + "Vfr;": "\U0001d519", + "Vopf;": "\U0001d54d", + "Vscr;": "\U0001d4b1", + "Vvdash;": "\u22aa", + "Wcirc;": "\u0174", + "Wedge;": "\u22c0", + "Wfr;": "\U0001d51a", + "Wopf;": "\U0001d54e", + "Wscr;": "\U0001d4b2", + "Xfr;": "\U0001d51b", + "Xi;": "\u039e", + "Xopf;": "\U0001d54f", + "Xscr;": "\U0001d4b3", + "YAcy;": "\u042f", + "YIcy;": "\u0407", + "YUcy;": "\u042e", + "Yacute": "\xdd", + "Yacute;": "\xdd", + "Ycirc;": "\u0176", + "Ycy;": "\u042b", + "Yfr;": "\U0001d51c", + "Yopf;": "\U0001d550", + "Yscr;": "\U0001d4b4", + "Yuml;": "\u0178", + "ZHcy;": "\u0416", + "Zacute;": "\u0179", + "Zcaron;": "\u017d", + "Zcy;": "\u0417", + "Zdot;": "\u017b", + "ZeroWidthSpace;": "\u200b", + "Zeta;": "\u0396", + "Zfr;": "\u2128", + "Zopf;": "\u2124", + "Zscr;": "\U0001d4b5", + "aacute": "\xe1", + "aacute;": "\xe1", + "abreve;": "\u0103", + "ac;": "\u223e", + "acE;": "\u223e\u0333", + "acd;": "\u223f", + "acirc": "\xe2", + "acirc;": "\xe2", + "acute": "\xb4", + "acute;": "\xb4", + "acy;": "\u0430", + "aelig": "\xe6", + "aelig;": "\xe6", + "af;": "\u2061", + "afr;": "\U0001d51e", + "agrave": "\xe0", + "agrave;": "\xe0", + "alefsym;": "\u2135", + "aleph;": "\u2135", + "alpha;": "\u03b1", + "amacr;": "\u0101", + "amalg;": "\u2a3f", + "amp": "&", + "amp;": "&", + "and;": "\u2227", + "andand;": "\u2a55", + "andd;": "\u2a5c", + "andslope;": "\u2a58", + "andv;": "\u2a5a", + "ang;": "\u2220", + "ange;": "\u29a4", + "angle;": "\u2220", + "angmsd;": "\u2221", + "angmsdaa;": "\u29a8", + "angmsdab;": "\u29a9", + "angmsdac;": "\u29aa", + "angmsdad;": "\u29ab", + "angmsdae;": "\u29ac", + "angmsdaf;": "\u29ad", + "angmsdag;": "\u29ae", + "angmsdah;": "\u29af", + "angrt;": "\u221f", + "angrtvb;": "\u22be", + "angrtvbd;": "\u299d", + "angsph;": "\u2222", + "angst;": "\xc5", + "angzarr;": "\u237c", + "aogon;": "\u0105", + "aopf;": "\U0001d552", + "ap;": "\u2248", + "apE;": "\u2a70", + "apacir;": "\u2a6f", + "ape;": "\u224a", + "apid;": "\u224b", + "apos;": "'", + "approx;": "\u2248", + "approxeq;": "\u224a", + "aring": "\xe5", + "aring;": "\xe5", + "ascr;": "\U0001d4b6", + "ast;": "*", + "asymp;": "\u2248", + "asympeq;": "\u224d", + "atilde": "\xe3", + "atilde;": "\xe3", + "auml": "\xe4", + "auml;": "\xe4", + "awconint;": "\u2233", + "awint;": "\u2a11", + "bNot;": "\u2aed", + "backcong;": "\u224c", + "backepsilon;": "\u03f6", + "backprime;": "\u2035", + "backsim;": "\u223d", + "backsimeq;": "\u22cd", + "barvee;": "\u22bd", + "barwed;": "\u2305", + "barwedge;": "\u2305", + "bbrk;": "\u23b5", + "bbrktbrk;": "\u23b6", + "bcong;": "\u224c", + "bcy;": "\u0431", + "bdquo;": "\u201e", + "becaus;": "\u2235", + "because;": "\u2235", + "bemptyv;": "\u29b0", + "bepsi;": "\u03f6", + "bernou;": "\u212c", + "beta;": "\u03b2", + "beth;": "\u2136", + "between;": "\u226c", + "bfr;": "\U0001d51f", + "bigcap;": "\u22c2", + "bigcirc;": "\u25ef", + "bigcup;": "\u22c3", + "bigodot;": "\u2a00", + "bigoplus;": "\u2a01", + "bigotimes;": "\u2a02", + "bigsqcup;": "\u2a06", + "bigstar;": "\u2605", + "bigtriangledown;": "\u25bd", + "bigtriangleup;": "\u25b3", + "biguplus;": "\u2a04", + "bigvee;": "\u22c1", + "bigwedge;": "\u22c0", + "bkarow;": "\u290d", + "blacklozenge;": "\u29eb", + "blacksquare;": "\u25aa", + "blacktriangle;": "\u25b4", + "blacktriangledown;": "\u25be", + "blacktriangleleft;": "\u25c2", + "blacktriangleright;": "\u25b8", + "blank;": "\u2423", + "blk12;": "\u2592", + "blk14;": "\u2591", + "blk34;": "\u2593", + "block;": "\u2588", + "bne;": "=\u20e5", + "bnequiv;": "\u2261\u20e5", + "bnot;": "\u2310", + "bopf;": "\U0001d553", + "bot;": "\u22a5", + "bottom;": "\u22a5", + "bowtie;": "\u22c8", + "boxDL;": "\u2557", + "boxDR;": "\u2554", + "boxDl;": "\u2556", + "boxDr;": "\u2553", + "boxH;": "\u2550", + "boxHD;": "\u2566", + "boxHU;": "\u2569", + "boxHd;": "\u2564", + "boxHu;": "\u2567", + "boxUL;": "\u255d", + "boxUR;": "\u255a", + "boxUl;": "\u255c", + "boxUr;": "\u2559", + "boxV;": "\u2551", + "boxVH;": "\u256c", + "boxVL;": "\u2563", + "boxVR;": "\u2560", + "boxVh;": "\u256b", + "boxVl;": "\u2562", + "boxVr;": "\u255f", + "boxbox;": "\u29c9", + "boxdL;": "\u2555", + "boxdR;": "\u2552", + "boxdl;": "\u2510", + "boxdr;": "\u250c", + "boxh;": "\u2500", + "boxhD;": "\u2565", + "boxhU;": "\u2568", + "boxhd;": "\u252c", + "boxhu;": "\u2534", + "boxminus;": "\u229f", + "boxplus;": "\u229e", + "boxtimes;": "\u22a0", + "boxuL;": "\u255b", + "boxuR;": "\u2558", + "boxul;": "\u2518", + "boxur;": "\u2514", + "boxv;": "\u2502", + "boxvH;": "\u256a", + "boxvL;": "\u2561", + "boxvR;": "\u255e", + "boxvh;": "\u253c", + "boxvl;": "\u2524", + "boxvr;": "\u251c", + "bprime;": "\u2035", + "breve;": "\u02d8", + "brvbar": "\xa6", + "brvbar;": "\xa6", + "bscr;": "\U0001d4b7", + "bsemi;": "\u204f", + "bsim;": "\u223d", + "bsime;": "\u22cd", + "bsol;": "\\", + "bsolb;": "\u29c5", + "bsolhsub;": "\u27c8", + "bull;": "\u2022", + "bullet;": "\u2022", + "bump;": "\u224e", + "bumpE;": "\u2aae", + "bumpe;": "\u224f", + "bumpeq;": "\u224f", + "cacute;": "\u0107", + "cap;": "\u2229", + "capand;": "\u2a44", + "capbrcup;": "\u2a49", + "capcap;": "\u2a4b", + "capcup;": "\u2a47", + "capdot;": "\u2a40", + "caps;": "\u2229\ufe00", + "caret;": "\u2041", + "caron;": "\u02c7", + "ccaps;": "\u2a4d", + "ccaron;": "\u010d", + "ccedil": "\xe7", + "ccedil;": "\xe7", + "ccirc;": "\u0109", + "ccups;": "\u2a4c", + "ccupssm;": "\u2a50", + "cdot;": "\u010b", + "cedil": "\xb8", + "cedil;": "\xb8", + "cemptyv;": "\u29b2", + "cent": "\xa2", + "cent;": "\xa2", + "centerdot;": "\xb7", + "cfr;": "\U0001d520", + "chcy;": "\u0447", + "check;": "\u2713", + "checkmark;": "\u2713", + "chi;": "\u03c7", + "cir;": "\u25cb", + "cirE;": "\u29c3", + "circ;": "\u02c6", + "circeq;": "\u2257", + "circlearrowleft;": "\u21ba", + "circlearrowright;": "\u21bb", + "circledR;": "\xae", + "circledS;": "\u24c8", + "circledast;": "\u229b", + "circledcirc;": "\u229a", + "circleddash;": "\u229d", + "cire;": "\u2257", + "cirfnint;": "\u2a10", + "cirmid;": "\u2aef", + "cirscir;": "\u29c2", + "clubs;": "\u2663", + "clubsuit;": "\u2663", + "colon;": ":", + "colone;": "\u2254", + "coloneq;": "\u2254", + "comma;": ",", + "commat;": "@", + "comp;": "\u2201", + "compfn;": "\u2218", + "complement;": "\u2201", + "complexes;": "\u2102", + "cong;": "\u2245", + "congdot;": "\u2a6d", + "conint;": "\u222e", + "copf;": "\U0001d554", + "coprod;": "\u2210", + "copy": "\xa9", + "copy;": "\xa9", + "copysr;": "\u2117", + "crarr;": "\u21b5", + "cross;": "\u2717", + "cscr;": "\U0001d4b8", + "csub;": "\u2acf", + "csube;": "\u2ad1", + "csup;": "\u2ad0", + "csupe;": "\u2ad2", + "ctdot;": "\u22ef", + "cudarrl;": "\u2938", + "cudarrr;": "\u2935", + "cuepr;": "\u22de", + "cuesc;": "\u22df", + "cularr;": "\u21b6", + "cularrp;": "\u293d", + "cup;": "\u222a", + "cupbrcap;": "\u2a48", + "cupcap;": "\u2a46", + "cupcup;": "\u2a4a", + "cupdot;": "\u228d", + "cupor;": "\u2a45", + "cups;": "\u222a\ufe00", + "curarr;": "\u21b7", + "curarrm;": "\u293c", + "curlyeqprec;": "\u22de", + "curlyeqsucc;": "\u22df", + "curlyvee;": "\u22ce", + "curlywedge;": "\u22cf", + "curren": "\xa4", + "curren;": "\xa4", + "curvearrowleft;": "\u21b6", + "curvearrowright;": "\u21b7", + "cuvee;": "\u22ce", + "cuwed;": "\u22cf", + "cwconint;": "\u2232", + "cwint;": "\u2231", + "cylcty;": "\u232d", + "dArr;": "\u21d3", + "dHar;": "\u2965", + "dagger;": "\u2020", + "daleth;": "\u2138", + "darr;": "\u2193", + "dash;": "\u2010", + "dashv;": "\u22a3", + "dbkarow;": "\u290f", + "dblac;": "\u02dd", + "dcaron;": "\u010f", + "dcy;": "\u0434", + "dd;": "\u2146", + "ddagger;": "\u2021", + "ddarr;": "\u21ca", + "ddotseq;": "\u2a77", + "deg": "\xb0", + "deg;": "\xb0", + "delta;": "\u03b4", + "demptyv;": "\u29b1", + "dfisht;": "\u297f", + "dfr;": "\U0001d521", + "dharl;": "\u21c3", + "dharr;": "\u21c2", + "diam;": "\u22c4", + "diamond;": "\u22c4", + "diamondsuit;": "\u2666", + "diams;": "\u2666", + "die;": "\xa8", + "digamma;": "\u03dd", + "disin;": "\u22f2", + "div;": "\xf7", + "divide": "\xf7", + "divide;": "\xf7", + "divideontimes;": "\u22c7", + "divonx;": "\u22c7", + "djcy;": "\u0452", + "dlcorn;": "\u231e", + "dlcrop;": "\u230d", + "dollar;": "$", + "dopf;": "\U0001d555", + "dot;": "\u02d9", + "doteq;": "\u2250", + "doteqdot;": "\u2251", + "dotminus;": "\u2238", + "dotplus;": "\u2214", + "dotsquare;": "\u22a1", + "doublebarwedge;": "\u2306", + "downarrow;": "\u2193", + "downdownarrows;": "\u21ca", + "downharpoonleft;": "\u21c3", + "downharpoonright;": "\u21c2", + "drbkarow;": "\u2910", + "drcorn;": "\u231f", + "drcrop;": "\u230c", + "dscr;": "\U0001d4b9", + "dscy;": "\u0455", + "dsol;": "\u29f6", + "dstrok;": "\u0111", + "dtdot;": "\u22f1", + "dtri;": "\u25bf", + "dtrif;": "\u25be", + "duarr;": "\u21f5", + "duhar;": "\u296f", + "dwangle;": "\u29a6", + "dzcy;": "\u045f", + "dzigrarr;": "\u27ff", + "eDDot;": "\u2a77", + "eDot;": "\u2251", + "eacute": "\xe9", + "eacute;": "\xe9", + "easter;": "\u2a6e", + "ecaron;": "\u011b", + "ecir;": "\u2256", + "ecirc": "\xea", + "ecirc;": "\xea", + "ecolon;": "\u2255", + "ecy;": "\u044d", + "edot;": "\u0117", + "ee;": "\u2147", + "efDot;": "\u2252", + "efr;": "\U0001d522", + "eg;": "\u2a9a", + "egrave": "\xe8", + "egrave;": "\xe8", + "egs;": "\u2a96", + "egsdot;": "\u2a98", + "el;": "\u2a99", + "elinters;": "\u23e7", + "ell;": "\u2113", + "els;": "\u2a95", + "elsdot;": "\u2a97", + "emacr;": "\u0113", + "empty;": "\u2205", + "emptyset;": "\u2205", + "emptyv;": "\u2205", + "emsp13;": "\u2004", + "emsp14;": "\u2005", + "emsp;": "\u2003", + "eng;": "\u014b", + "ensp;": "\u2002", + "eogon;": "\u0119", + "eopf;": "\U0001d556", + "epar;": "\u22d5", + "eparsl;": "\u29e3", + "eplus;": "\u2a71", + "epsi;": "\u03b5", + "epsilon;": "\u03b5", + "epsiv;": "\u03f5", + "eqcirc;": "\u2256", + "eqcolon;": "\u2255", + "eqsim;": "\u2242", + "eqslantgtr;": "\u2a96", + "eqslantless;": "\u2a95", + "equals;": "=", + "equest;": "\u225f", + "equiv;": "\u2261", + "equivDD;": "\u2a78", + "eqvparsl;": "\u29e5", + "erDot;": "\u2253", + "erarr;": "\u2971", + "escr;": "\u212f", + "esdot;": "\u2250", + "esim;": "\u2242", + "eta;": "\u03b7", + "eth": "\xf0", + "eth;": "\xf0", + "euml": "\xeb", + "euml;": "\xeb", + "euro;": "\u20ac", + "excl;": "!", + "exist;": "\u2203", + "expectation;": "\u2130", + "exponentiale;": "\u2147", + "fallingdotseq;": "\u2252", + "fcy;": "\u0444", + "female;": "\u2640", + "ffilig;": "\ufb03", + "fflig;": "\ufb00", + "ffllig;": "\ufb04", + "ffr;": "\U0001d523", + "filig;": "\ufb01", + "fjlig;": "fj", + "flat;": "\u266d", + "fllig;": "\ufb02", + "fltns;": "\u25b1", + "fnof;": "\u0192", + "fopf;": "\U0001d557", + "forall;": "\u2200", + "fork;": "\u22d4", + "forkv;": "\u2ad9", + "fpartint;": "\u2a0d", + "frac12": "\xbd", + "frac12;": "\xbd", + "frac13;": "\u2153", + "frac14": "\xbc", + "frac14;": "\xbc", + "frac15;": "\u2155", + "frac16;": "\u2159", + "frac18;": "\u215b", + "frac23;": "\u2154", + "frac25;": "\u2156", + "frac34": "\xbe", + "frac34;": "\xbe", + "frac35;": "\u2157", + "frac38;": "\u215c", + "frac45;": "\u2158", + "frac56;": "\u215a", + "frac58;": "\u215d", + "frac78;": "\u215e", + "frasl;": "\u2044", + "frown;": "\u2322", + "fscr;": "\U0001d4bb", + "gE;": "\u2267", + "gEl;": "\u2a8c", + "gacute;": "\u01f5", + "gamma;": "\u03b3", + "gammad;": "\u03dd", + "gap;": "\u2a86", + "gbreve;": "\u011f", + "gcirc;": "\u011d", + "gcy;": "\u0433", + "gdot;": "\u0121", + "ge;": "\u2265", + "gel;": "\u22db", + "geq;": "\u2265", + "geqq;": "\u2267", + "geqslant;": "\u2a7e", + "ges;": "\u2a7e", + "gescc;": "\u2aa9", + "gesdot;": "\u2a80", + "gesdoto;": "\u2a82", + "gesdotol;": "\u2a84", + "gesl;": "\u22db\ufe00", + "gesles;": "\u2a94", + "gfr;": "\U0001d524", + "gg;": "\u226b", + "ggg;": "\u22d9", + "gimel;": "\u2137", + "gjcy;": "\u0453", + "gl;": "\u2277", + "glE;": "\u2a92", + "gla;": "\u2aa5", + "glj;": "\u2aa4", + "gnE;": "\u2269", + "gnap;": "\u2a8a", + "gnapprox;": "\u2a8a", + "gne;": "\u2a88", + "gneq;": "\u2a88", + "gneqq;": "\u2269", + "gnsim;": "\u22e7", + "gopf;": "\U0001d558", + "grave;": "`", + "gscr;": "\u210a", + "gsim;": "\u2273", + "gsime;": "\u2a8e", + "gsiml;": "\u2a90", + "gt": ">", + "gt;": ">", + "gtcc;": "\u2aa7", + "gtcir;": "\u2a7a", + "gtdot;": "\u22d7", + "gtlPar;": "\u2995", + "gtquest;": "\u2a7c", + "gtrapprox;": "\u2a86", + "gtrarr;": "\u2978", + "gtrdot;": "\u22d7", + "gtreqless;": "\u22db", + "gtreqqless;": "\u2a8c", + "gtrless;": "\u2277", + "gtrsim;": "\u2273", + "gvertneqq;": "\u2269\ufe00", + "gvnE;": "\u2269\ufe00", + "hArr;": "\u21d4", + "hairsp;": "\u200a", + "half;": "\xbd", + "hamilt;": "\u210b", + "hardcy;": "\u044a", + "harr;": "\u2194", + "harrcir;": "\u2948", + "harrw;": "\u21ad", + "hbar;": "\u210f", + "hcirc;": "\u0125", + "hearts;": "\u2665", + "heartsuit;": "\u2665", + "hellip;": "\u2026", + "hercon;": "\u22b9", + "hfr;": "\U0001d525", + "hksearow;": "\u2925", + "hkswarow;": "\u2926", + "hoarr;": "\u21ff", + "homtht;": "\u223b", + "hookleftarrow;": "\u21a9", + "hookrightarrow;": "\u21aa", + "hopf;": "\U0001d559", + "horbar;": "\u2015", + "hscr;": "\U0001d4bd", + "hslash;": "\u210f", + "hstrok;": "\u0127", + "hybull;": "\u2043", + "hyphen;": "\u2010", + "iacute": "\xed", + "iacute;": "\xed", + "ic;": "\u2063", + "icirc": "\xee", + "icirc;": "\xee", + "icy;": "\u0438", + "iecy;": "\u0435", + "iexcl": "\xa1", + "iexcl;": "\xa1", + "iff;": "\u21d4", + "ifr;": "\U0001d526", + "igrave": "\xec", + "igrave;": "\xec", + "ii;": "\u2148", + "iiiint;": "\u2a0c", + "iiint;": "\u222d", + "iinfin;": "\u29dc", + "iiota;": "\u2129", + "ijlig;": "\u0133", + "imacr;": "\u012b", + "image;": "\u2111", + "imagline;": "\u2110", + "imagpart;": "\u2111", + "imath;": "\u0131", + "imof;": "\u22b7", + "imped;": "\u01b5", + "in;": "\u2208", + "incare;": "\u2105", + "infin;": "\u221e", + "infintie;": "\u29dd", + "inodot;": "\u0131", + "int;": "\u222b", + "intcal;": "\u22ba", + "integers;": "\u2124", + "intercal;": "\u22ba", + "intlarhk;": "\u2a17", + "intprod;": "\u2a3c", + "iocy;": "\u0451", + "iogon;": "\u012f", + "iopf;": "\U0001d55a", + "iota;": "\u03b9", + "iprod;": "\u2a3c", + "iquest": "\xbf", + "iquest;": "\xbf", + "iscr;": "\U0001d4be", + "isin;": "\u2208", + "isinE;": "\u22f9", + "isindot;": "\u22f5", + "isins;": "\u22f4", + "isinsv;": "\u22f3", + "isinv;": "\u2208", + "it;": "\u2062", + "itilde;": "\u0129", + "iukcy;": "\u0456", + "iuml": "\xef", + "iuml;": "\xef", + "jcirc;": "\u0135", + "jcy;": "\u0439", + "jfr;": "\U0001d527", + "jmath;": "\u0237", + "jopf;": "\U0001d55b", + "jscr;": "\U0001d4bf", + "jsercy;": "\u0458", + "jukcy;": "\u0454", + "kappa;": "\u03ba", + "kappav;": "\u03f0", + "kcedil;": "\u0137", + "kcy;": "\u043a", + "kfr;": "\U0001d528", + "kgreen;": "\u0138", + "khcy;": "\u0445", + "kjcy;": "\u045c", + "kopf;": "\U0001d55c", + "kscr;": "\U0001d4c0", + "lAarr;": "\u21da", + "lArr;": "\u21d0", + "lAtail;": "\u291b", + "lBarr;": "\u290e", + "lE;": "\u2266", + "lEg;": "\u2a8b", + "lHar;": "\u2962", + "lacute;": "\u013a", + "laemptyv;": "\u29b4", + "lagran;": "\u2112", + "lambda;": "\u03bb", + "lang;": "\u27e8", + "langd;": "\u2991", + "langle;": "\u27e8", + "lap;": "\u2a85", + "laquo": "\xab", + "laquo;": "\xab", + "larr;": "\u2190", + "larrb;": "\u21e4", + "larrbfs;": "\u291f", + "larrfs;": "\u291d", + "larrhk;": "\u21a9", + "larrlp;": "\u21ab", + "larrpl;": "\u2939", + "larrsim;": "\u2973", + "larrtl;": "\u21a2", + "lat;": "\u2aab", + "latail;": "\u2919", + "late;": "\u2aad", + "lates;": "\u2aad\ufe00", + "lbarr;": "\u290c", + "lbbrk;": "\u2772", + "lbrace;": "{", + "lbrack;": "[", + "lbrke;": "\u298b", + "lbrksld;": "\u298f", + "lbrkslu;": "\u298d", + "lcaron;": "\u013e", + "lcedil;": "\u013c", + "lceil;": "\u2308", + "lcub;": "{", + "lcy;": "\u043b", + "ldca;": "\u2936", + "ldquo;": "\u201c", + "ldquor;": "\u201e", + "ldrdhar;": "\u2967", + "ldrushar;": "\u294b", + "ldsh;": "\u21b2", + "le;": "\u2264", + "leftarrow;": "\u2190", + "leftarrowtail;": "\u21a2", + "leftharpoondown;": "\u21bd", + "leftharpoonup;": "\u21bc", + "leftleftarrows;": "\u21c7", + "leftrightarrow;": "\u2194", + "leftrightarrows;": "\u21c6", + "leftrightharpoons;": "\u21cb", + "leftrightsquigarrow;": "\u21ad", + "leftthreetimes;": "\u22cb", + "leg;": "\u22da", + "leq;": "\u2264", + "leqq;": "\u2266", + "leqslant;": "\u2a7d", + "les;": "\u2a7d", + "lescc;": "\u2aa8", + "lesdot;": "\u2a7f", + "lesdoto;": "\u2a81", + "lesdotor;": "\u2a83", + "lesg;": "\u22da\ufe00", + "lesges;": "\u2a93", + "lessapprox;": "\u2a85", + "lessdot;": "\u22d6", + "lesseqgtr;": "\u22da", + "lesseqqgtr;": "\u2a8b", + "lessgtr;": "\u2276", + "lesssim;": "\u2272", + "lfisht;": "\u297c", + "lfloor;": "\u230a", + "lfr;": "\U0001d529", + "lg;": "\u2276", + "lgE;": "\u2a91", + "lhard;": "\u21bd", + "lharu;": "\u21bc", + "lharul;": "\u296a", + "lhblk;": "\u2584", + "ljcy;": "\u0459", + "ll;": "\u226a", + "llarr;": "\u21c7", + "llcorner;": "\u231e", + "llhard;": "\u296b", + "lltri;": "\u25fa", + "lmidot;": "\u0140", + "lmoust;": "\u23b0", + "lmoustache;": "\u23b0", + "lnE;": "\u2268", + "lnap;": "\u2a89", + "lnapprox;": "\u2a89", + "lne;": "\u2a87", + "lneq;": "\u2a87", + "lneqq;": "\u2268", + "lnsim;": "\u22e6", + "loang;": "\u27ec", + "loarr;": "\u21fd", + "lobrk;": "\u27e6", + "longleftarrow;": "\u27f5", + "longleftrightarrow;": "\u27f7", + "longmapsto;": "\u27fc", + "longrightarrow;": "\u27f6", + "looparrowleft;": "\u21ab", + "looparrowright;": "\u21ac", + "lopar;": "\u2985", + "lopf;": "\U0001d55d", + "loplus;": "\u2a2d", + "lotimes;": "\u2a34", + "lowast;": "\u2217", + "lowbar;": "_", + "loz;": "\u25ca", + "lozenge;": "\u25ca", + "lozf;": "\u29eb", + "lpar;": "(", + "lparlt;": "\u2993", + "lrarr;": "\u21c6", + "lrcorner;": "\u231f", + "lrhar;": "\u21cb", + "lrhard;": "\u296d", + "lrm;": "\u200e", + "lrtri;": "\u22bf", + "lsaquo;": "\u2039", + "lscr;": "\U0001d4c1", + "lsh;": "\u21b0", + "lsim;": "\u2272", + "lsime;": "\u2a8d", + "lsimg;": "\u2a8f", + "lsqb;": "[", + "lsquo;": "\u2018", + "lsquor;": "\u201a", + "lstrok;": "\u0142", + "lt": "<", + "lt;": "<", + "ltcc;": "\u2aa6", + "ltcir;": "\u2a79", + "ltdot;": "\u22d6", + "lthree;": "\u22cb", + "ltimes;": "\u22c9", + "ltlarr;": "\u2976", + "ltquest;": "\u2a7b", + "ltrPar;": "\u2996", + "ltri;": "\u25c3", + "ltrie;": "\u22b4", + "ltrif;": "\u25c2", + "lurdshar;": "\u294a", + "luruhar;": "\u2966", + "lvertneqq;": "\u2268\ufe00", + "lvnE;": "\u2268\ufe00", + "mDDot;": "\u223a", + "macr": "\xaf", + "macr;": "\xaf", + "male;": "\u2642", + "malt;": "\u2720", + "maltese;": "\u2720", + "map;": "\u21a6", + "mapsto;": "\u21a6", + "mapstodown;": "\u21a7", + "mapstoleft;": "\u21a4", + "mapstoup;": "\u21a5", + "marker;": "\u25ae", + "mcomma;": "\u2a29", + "mcy;": "\u043c", + "mdash;": "\u2014", + "measuredangle;": "\u2221", + "mfr;": "\U0001d52a", + "mho;": "\u2127", + "micro": "\xb5", + "micro;": "\xb5", + "mid;": "\u2223", + "midast;": "*", + "midcir;": "\u2af0", + "middot": "\xb7", + "middot;": "\xb7", + "minus;": "\u2212", + "minusb;": "\u229f", + "minusd;": "\u2238", + "minusdu;": "\u2a2a", + "mlcp;": "\u2adb", + "mldr;": "\u2026", + "mnplus;": "\u2213", + "models;": "\u22a7", + "mopf;": "\U0001d55e", + "mp;": "\u2213", + "mscr;": "\U0001d4c2", + "mstpos;": "\u223e", + "mu;": "\u03bc", + "multimap;": "\u22b8", + "mumap;": "\u22b8", + "nGg;": "\u22d9\u0338", + "nGt;": "\u226b\u20d2", + "nGtv;": "\u226b\u0338", + "nLeftarrow;": "\u21cd", + "nLeftrightarrow;": "\u21ce", + "nLl;": "\u22d8\u0338", + "nLt;": "\u226a\u20d2", + "nLtv;": "\u226a\u0338", + "nRightarrow;": "\u21cf", + "nVDash;": "\u22af", + "nVdash;": "\u22ae", + "nabla;": "\u2207", + "nacute;": "\u0144", + "nang;": "\u2220\u20d2", + "nap;": "\u2249", + "napE;": "\u2a70\u0338", + "napid;": "\u224b\u0338", + "napos;": "\u0149", + "napprox;": "\u2249", + "natur;": "\u266e", + "natural;": "\u266e", + "naturals;": "\u2115", + "nbsp": "\xa0", + "nbsp;": "\xa0", + "nbump;": "\u224e\u0338", + "nbumpe;": "\u224f\u0338", + "ncap;": "\u2a43", + "ncaron;": "\u0148", + "ncedil;": "\u0146", + "ncong;": "\u2247", + "ncongdot;": "\u2a6d\u0338", + "ncup;": "\u2a42", + "ncy;": "\u043d", + "ndash;": "\u2013", + "ne;": "\u2260", + "neArr;": "\u21d7", + "nearhk;": "\u2924", + "nearr;": "\u2197", + "nearrow;": "\u2197", + "nedot;": "\u2250\u0338", + "nequiv;": "\u2262", + "nesear;": "\u2928", + "nesim;": "\u2242\u0338", + "nexist;": "\u2204", + "nexists;": "\u2204", + "nfr;": "\U0001d52b", + "ngE;": "\u2267\u0338", + "nge;": "\u2271", + "ngeq;": "\u2271", + "ngeqq;": "\u2267\u0338", + "ngeqslant;": "\u2a7e\u0338", + "nges;": "\u2a7e\u0338", + "ngsim;": "\u2275", + "ngt;": "\u226f", + "ngtr;": "\u226f", + "nhArr;": "\u21ce", + "nharr;": "\u21ae", + "nhpar;": "\u2af2", + "ni;": "\u220b", + "nis;": "\u22fc", + "nisd;": "\u22fa", + "niv;": "\u220b", + "njcy;": "\u045a", + "nlArr;": "\u21cd", + "nlE;": "\u2266\u0338", + "nlarr;": "\u219a", + "nldr;": "\u2025", + "nle;": "\u2270", + "nleftarrow;": "\u219a", + "nleftrightarrow;": "\u21ae", + "nleq;": "\u2270", + "nleqq;": "\u2266\u0338", + "nleqslant;": "\u2a7d\u0338", + "nles;": "\u2a7d\u0338", + "nless;": "\u226e", + "nlsim;": "\u2274", + "nlt;": "\u226e", + "nltri;": "\u22ea", + "nltrie;": "\u22ec", + "nmid;": "\u2224", + "nopf;": "\U0001d55f", + "not": "\xac", + "not;": "\xac", + "notin;": "\u2209", + "notinE;": "\u22f9\u0338", + "notindot;": "\u22f5\u0338", + "notinva;": "\u2209", + "notinvb;": "\u22f7", + "notinvc;": "\u22f6", + "notni;": "\u220c", + "notniva;": "\u220c", + "notnivb;": "\u22fe", + "notnivc;": "\u22fd", + "npar;": "\u2226", + "nparallel;": "\u2226", + "nparsl;": "\u2afd\u20e5", + "npart;": "\u2202\u0338", + "npolint;": "\u2a14", + "npr;": "\u2280", + "nprcue;": "\u22e0", + "npre;": "\u2aaf\u0338", + "nprec;": "\u2280", + "npreceq;": "\u2aaf\u0338", + "nrArr;": "\u21cf", + "nrarr;": "\u219b", + "nrarrc;": "\u2933\u0338", + "nrarrw;": "\u219d\u0338", + "nrightarrow;": "\u219b", + "nrtri;": "\u22eb", + "nrtrie;": "\u22ed", + "nsc;": "\u2281", + "nsccue;": "\u22e1", + "nsce;": "\u2ab0\u0338", + "nscr;": "\U0001d4c3", + "nshortmid;": "\u2224", + "nshortparallel;": "\u2226", + "nsim;": "\u2241", + "nsime;": "\u2244", + "nsimeq;": "\u2244", + "nsmid;": "\u2224", + "nspar;": "\u2226", + "nsqsube;": "\u22e2", + "nsqsupe;": "\u22e3", + "nsub;": "\u2284", + "nsubE;": "\u2ac5\u0338", + "nsube;": "\u2288", + "nsubset;": "\u2282\u20d2", + "nsubseteq;": "\u2288", + "nsubseteqq;": "\u2ac5\u0338", + "nsucc;": "\u2281", + "nsucceq;": "\u2ab0\u0338", + "nsup;": "\u2285", + "nsupE;": "\u2ac6\u0338", + "nsupe;": "\u2289", + "nsupset;": "\u2283\u20d2", + "nsupseteq;": "\u2289", + "nsupseteqq;": "\u2ac6\u0338", + "ntgl;": "\u2279", + "ntilde": "\xf1", + "ntilde;": "\xf1", + "ntlg;": "\u2278", + "ntriangleleft;": "\u22ea", + "ntrianglelefteq;": "\u22ec", + "ntriangleright;": "\u22eb", + "ntrianglerighteq;": "\u22ed", + "nu;": "\u03bd", + "num;": "#", + "numero;": "\u2116", + "numsp;": "\u2007", + "nvDash;": "\u22ad", + "nvHarr;": "\u2904", + "nvap;": "\u224d\u20d2", + "nvdash;": "\u22ac", + "nvge;": "\u2265\u20d2", + "nvgt;": ">\u20d2", + "nvinfin;": "\u29de", + "nvlArr;": "\u2902", + "nvle;": "\u2264\u20d2", + "nvlt;": "<\u20d2", + "nvltrie;": "\u22b4\u20d2", + "nvrArr;": "\u2903", + "nvrtrie;": "\u22b5\u20d2", + "nvsim;": "\u223c\u20d2", + "nwArr;": "\u21d6", + "nwarhk;": "\u2923", + "nwarr;": "\u2196", + "nwarrow;": "\u2196", + "nwnear;": "\u2927", + "oS;": "\u24c8", + "oacute": "\xf3", + "oacute;": "\xf3", + "oast;": "\u229b", + "ocir;": "\u229a", + "ocirc": "\xf4", + "ocirc;": "\xf4", + "ocy;": "\u043e", + "odash;": "\u229d", + "odblac;": "\u0151", + "odiv;": "\u2a38", + "odot;": "\u2299", + "odsold;": "\u29bc", + "oelig;": "\u0153", + "ofcir;": "\u29bf", + "ofr;": "\U0001d52c", + "ogon;": "\u02db", + "ograve": "\xf2", + "ograve;": "\xf2", + "ogt;": "\u29c1", + "ohbar;": "\u29b5", + "ohm;": "\u03a9", + "oint;": "\u222e", + "olarr;": "\u21ba", + "olcir;": "\u29be", + "olcross;": "\u29bb", + "oline;": "\u203e", + "olt;": "\u29c0", + "omacr;": "\u014d", + "omega;": "\u03c9", + "omicron;": "\u03bf", + "omid;": "\u29b6", + "ominus;": "\u2296", + "oopf;": "\U0001d560", + "opar;": "\u29b7", + "operp;": "\u29b9", + "oplus;": "\u2295", + "or;": "\u2228", + "orarr;": "\u21bb", + "ord;": "\u2a5d", + "order;": "\u2134", + "orderof;": "\u2134", + "ordf": "\xaa", + "ordf;": "\xaa", + "ordm": "\xba", + "ordm;": "\xba", + "origof;": "\u22b6", + "oror;": "\u2a56", + "orslope;": "\u2a57", + "orv;": "\u2a5b", + "oscr;": "\u2134", + "oslash": "\xf8", + "oslash;": "\xf8", + "osol;": "\u2298", + "otilde": "\xf5", + "otilde;": "\xf5", + "otimes;": "\u2297", + "otimesas;": "\u2a36", + "ouml": "\xf6", + "ouml;": "\xf6", + "ovbar;": "\u233d", + "par;": "\u2225", + "para": "\xb6", + "para;": "\xb6", + "parallel;": "\u2225", + "parsim;": "\u2af3", + "parsl;": "\u2afd", + "part;": "\u2202", + "pcy;": "\u043f", + "percnt;": "%", + "period;": ".", + "permil;": "\u2030", + "perp;": "\u22a5", + "pertenk;": "\u2031", + "pfr;": "\U0001d52d", + "phi;": "\u03c6", + "phiv;": "\u03d5", + "phmmat;": "\u2133", + "phone;": "\u260e", + "pi;": "\u03c0", + "pitchfork;": "\u22d4", + "piv;": "\u03d6", + "planck;": "\u210f", + "planckh;": "\u210e", + "plankv;": "\u210f", + "plus;": "+", + "plusacir;": "\u2a23", + "plusb;": "\u229e", + "pluscir;": "\u2a22", + "plusdo;": "\u2214", + "plusdu;": "\u2a25", + "pluse;": "\u2a72", + "plusmn": "\xb1", + "plusmn;": "\xb1", + "plussim;": "\u2a26", + "plustwo;": "\u2a27", + "pm;": "\xb1", + "pointint;": "\u2a15", + "popf;": "\U0001d561", + "pound": "\xa3", + "pound;": "\xa3", + "pr;": "\u227a", + "prE;": "\u2ab3", + "prap;": "\u2ab7", + "prcue;": "\u227c", + "pre;": "\u2aaf", + "prec;": "\u227a", + "precapprox;": "\u2ab7", + "preccurlyeq;": "\u227c", + "preceq;": "\u2aaf", + "precnapprox;": "\u2ab9", + "precneqq;": "\u2ab5", + "precnsim;": "\u22e8", + "precsim;": "\u227e", + "prime;": "\u2032", + "primes;": "\u2119", + "prnE;": "\u2ab5", + "prnap;": "\u2ab9", + "prnsim;": "\u22e8", + "prod;": "\u220f", + "profalar;": "\u232e", + "profline;": "\u2312", + "profsurf;": "\u2313", + "prop;": "\u221d", + "propto;": "\u221d", + "prsim;": "\u227e", + "prurel;": "\u22b0", + "pscr;": "\U0001d4c5", + "psi;": "\u03c8", + "puncsp;": "\u2008", + "qfr;": "\U0001d52e", + "qint;": "\u2a0c", + "qopf;": "\U0001d562", + "qprime;": "\u2057", + "qscr;": "\U0001d4c6", + "quaternions;": "\u210d", + "quatint;": "\u2a16", + "quest;": "?", + "questeq;": "\u225f", + "quot": "\"", + "quot;": "\"", + "rAarr;": "\u21db", + "rArr;": "\u21d2", + "rAtail;": "\u291c", + "rBarr;": "\u290f", + "rHar;": "\u2964", + "race;": "\u223d\u0331", + "racute;": "\u0155", + "radic;": "\u221a", + "raemptyv;": "\u29b3", + "rang;": "\u27e9", + "rangd;": "\u2992", + "range;": "\u29a5", + "rangle;": "\u27e9", + "raquo": "\xbb", + "raquo;": "\xbb", + "rarr;": "\u2192", + "rarrap;": "\u2975", + "rarrb;": "\u21e5", + "rarrbfs;": "\u2920", + "rarrc;": "\u2933", + "rarrfs;": "\u291e", + "rarrhk;": "\u21aa", + "rarrlp;": "\u21ac", + "rarrpl;": "\u2945", + "rarrsim;": "\u2974", + "rarrtl;": "\u21a3", + "rarrw;": "\u219d", + "ratail;": "\u291a", + "ratio;": "\u2236", + "rationals;": "\u211a", + "rbarr;": "\u290d", + "rbbrk;": "\u2773", + "rbrace;": "}", + "rbrack;": "]", + "rbrke;": "\u298c", + "rbrksld;": "\u298e", + "rbrkslu;": "\u2990", + "rcaron;": "\u0159", + "rcedil;": "\u0157", + "rceil;": "\u2309", + "rcub;": "}", + "rcy;": "\u0440", + "rdca;": "\u2937", + "rdldhar;": "\u2969", + "rdquo;": "\u201d", + "rdquor;": "\u201d", + "rdsh;": "\u21b3", + "real;": "\u211c", + "realine;": "\u211b", + "realpart;": "\u211c", + "reals;": "\u211d", + "rect;": "\u25ad", + "reg": "\xae", + "reg;": "\xae", + "rfisht;": "\u297d", + "rfloor;": "\u230b", + "rfr;": "\U0001d52f", + "rhard;": "\u21c1", + "rharu;": "\u21c0", + "rharul;": "\u296c", + "rho;": "\u03c1", + "rhov;": "\u03f1", + "rightarrow;": "\u2192", + "rightarrowtail;": "\u21a3", + "rightharpoondown;": "\u21c1", + "rightharpoonup;": "\u21c0", + "rightleftarrows;": "\u21c4", + "rightleftharpoons;": "\u21cc", + "rightrightarrows;": "\u21c9", + "rightsquigarrow;": "\u219d", + "rightthreetimes;": "\u22cc", + "ring;": "\u02da", + "risingdotseq;": "\u2253", + "rlarr;": "\u21c4", + "rlhar;": "\u21cc", + "rlm;": "\u200f", + "rmoust;": "\u23b1", + "rmoustache;": "\u23b1", + "rnmid;": "\u2aee", + "roang;": "\u27ed", + "roarr;": "\u21fe", + "robrk;": "\u27e7", + "ropar;": "\u2986", + "ropf;": "\U0001d563", + "roplus;": "\u2a2e", + "rotimes;": "\u2a35", + "rpar;": ")", + "rpargt;": "\u2994", + "rppolint;": "\u2a12", + "rrarr;": "\u21c9", + "rsaquo;": "\u203a", + "rscr;": "\U0001d4c7", + "rsh;": "\u21b1", + "rsqb;": "]", + "rsquo;": "\u2019", + "rsquor;": "\u2019", + "rthree;": "\u22cc", + "rtimes;": "\u22ca", + "rtri;": "\u25b9", + "rtrie;": "\u22b5", + "rtrif;": "\u25b8", + "rtriltri;": "\u29ce", + "ruluhar;": "\u2968", + "rx;": "\u211e", + "sacute;": "\u015b", + "sbquo;": "\u201a", + "sc;": "\u227b", + "scE;": "\u2ab4", + "scap;": "\u2ab8", + "scaron;": "\u0161", + "sccue;": "\u227d", + "sce;": "\u2ab0", + "scedil;": "\u015f", + "scirc;": "\u015d", + "scnE;": "\u2ab6", + "scnap;": "\u2aba", + "scnsim;": "\u22e9", + "scpolint;": "\u2a13", + "scsim;": "\u227f", + "scy;": "\u0441", + "sdot;": "\u22c5", + "sdotb;": "\u22a1", + "sdote;": "\u2a66", + "seArr;": "\u21d8", + "searhk;": "\u2925", + "searr;": "\u2198", + "searrow;": "\u2198", + "sect": "\xa7", + "sect;": "\xa7", + "semi;": ";", + "seswar;": "\u2929", + "setminus;": "\u2216", + "setmn;": "\u2216", + "sext;": "\u2736", + "sfr;": "\U0001d530", + "sfrown;": "\u2322", + "sharp;": "\u266f", + "shchcy;": "\u0449", + "shcy;": "\u0448", + "shortmid;": "\u2223", + "shortparallel;": "\u2225", + "shy": "\xad", + "shy;": "\xad", + "sigma;": "\u03c3", + "sigmaf;": "\u03c2", + "sigmav;": "\u03c2", + "sim;": "\u223c", + "simdot;": "\u2a6a", + "sime;": "\u2243", + "simeq;": "\u2243", + "simg;": "\u2a9e", + "simgE;": "\u2aa0", + "siml;": "\u2a9d", + "simlE;": "\u2a9f", + "simne;": "\u2246", + "simplus;": "\u2a24", + "simrarr;": "\u2972", + "slarr;": "\u2190", + "smallsetminus;": "\u2216", + "smashp;": "\u2a33", + "smeparsl;": "\u29e4", + "smid;": "\u2223", + "smile;": "\u2323", + "smt;": "\u2aaa", + "smte;": "\u2aac", + "smtes;": "\u2aac\ufe00", + "softcy;": "\u044c", + "sol;": "/", + "solb;": "\u29c4", + "solbar;": "\u233f", + "sopf;": "\U0001d564", + "spades;": "\u2660", + "spadesuit;": "\u2660", + "spar;": "\u2225", + "sqcap;": "\u2293", + "sqcaps;": "\u2293\ufe00", + "sqcup;": "\u2294", + "sqcups;": "\u2294\ufe00", + "sqsub;": "\u228f", + "sqsube;": "\u2291", + "sqsubset;": "\u228f", + "sqsubseteq;": "\u2291", + "sqsup;": "\u2290", + "sqsupe;": "\u2292", + "sqsupset;": "\u2290", + "sqsupseteq;": "\u2292", + "squ;": "\u25a1", + "square;": "\u25a1", + "squarf;": "\u25aa", + "squf;": "\u25aa", + "srarr;": "\u2192", + "sscr;": "\U0001d4c8", + "ssetmn;": "\u2216", + "ssmile;": "\u2323", + "sstarf;": "\u22c6", + "star;": "\u2606", + "starf;": "\u2605", + "straightepsilon;": "\u03f5", + "straightphi;": "\u03d5", + "strns;": "\xaf", + "sub;": "\u2282", + "subE;": "\u2ac5", + "subdot;": "\u2abd", + "sube;": "\u2286", + "subedot;": "\u2ac3", + "submult;": "\u2ac1", + "subnE;": "\u2acb", + "subne;": "\u228a", + "subplus;": "\u2abf", + "subrarr;": "\u2979", + "subset;": "\u2282", + "subseteq;": "\u2286", + "subseteqq;": "\u2ac5", + "subsetneq;": "\u228a", + "subsetneqq;": "\u2acb", + "subsim;": "\u2ac7", + "subsub;": "\u2ad5", + "subsup;": "\u2ad3", + "succ;": "\u227b", + "succapprox;": "\u2ab8", + "succcurlyeq;": "\u227d", + "succeq;": "\u2ab0", + "succnapprox;": "\u2aba", + "succneqq;": "\u2ab6", + "succnsim;": "\u22e9", + "succsim;": "\u227f", + "sum;": "\u2211", + "sung;": "\u266a", + "sup1": "\xb9", + "sup1;": "\xb9", + "sup2": "\xb2", + "sup2;": "\xb2", + "sup3": "\xb3", + "sup3;": "\xb3", + "sup;": "\u2283", + "supE;": "\u2ac6", + "supdot;": "\u2abe", + "supdsub;": "\u2ad8", + "supe;": "\u2287", + "supedot;": "\u2ac4", + "suphsol;": "\u27c9", + "suphsub;": "\u2ad7", + "suplarr;": "\u297b", + "supmult;": "\u2ac2", + "supnE;": "\u2acc", + "supne;": "\u228b", + "supplus;": "\u2ac0", + "supset;": "\u2283", + "supseteq;": "\u2287", + "supseteqq;": "\u2ac6", + "supsetneq;": "\u228b", + "supsetneqq;": "\u2acc", + "supsim;": "\u2ac8", + "supsub;": "\u2ad4", + "supsup;": "\u2ad6", + "swArr;": "\u21d9", + "swarhk;": "\u2926", + "swarr;": "\u2199", + "swarrow;": "\u2199", + "swnwar;": "\u292a", + "szlig": "\xdf", + "szlig;": "\xdf", + "target;": "\u2316", + "tau;": "\u03c4", + "tbrk;": "\u23b4", + "tcaron;": "\u0165", + "tcedil;": "\u0163", + "tcy;": "\u0442", + "tdot;": "\u20db", + "telrec;": "\u2315", + "tfr;": "\U0001d531", + "there4;": "\u2234", + "therefore;": "\u2234", + "theta;": "\u03b8", + "thetasym;": "\u03d1", + "thetav;": "\u03d1", + "thickapprox;": "\u2248", + "thicksim;": "\u223c", + "thinsp;": "\u2009", + "thkap;": "\u2248", + "thksim;": "\u223c", + "thorn": "\xfe", + "thorn;": "\xfe", + "tilde;": "\u02dc", + "times": "\xd7", + "times;": "\xd7", + "timesb;": "\u22a0", + "timesbar;": "\u2a31", + "timesd;": "\u2a30", + "tint;": "\u222d", + "toea;": "\u2928", + "top;": "\u22a4", + "topbot;": "\u2336", + "topcir;": "\u2af1", + "topf;": "\U0001d565", + "topfork;": "\u2ada", + "tosa;": "\u2929", + "tprime;": "\u2034", + "trade;": "\u2122", + "triangle;": "\u25b5", + "triangledown;": "\u25bf", + "triangleleft;": "\u25c3", + "trianglelefteq;": "\u22b4", + "triangleq;": "\u225c", + "triangleright;": "\u25b9", + "trianglerighteq;": "\u22b5", + "tridot;": "\u25ec", + "trie;": "\u225c", + "triminus;": "\u2a3a", + "triplus;": "\u2a39", + "trisb;": "\u29cd", + "tritime;": "\u2a3b", + "trpezium;": "\u23e2", + "tscr;": "\U0001d4c9", + "tscy;": "\u0446", + "tshcy;": "\u045b", + "tstrok;": "\u0167", + "twixt;": "\u226c", + "twoheadleftarrow;": "\u219e", + "twoheadrightarrow;": "\u21a0", + "uArr;": "\u21d1", + "uHar;": "\u2963", + "uacute": "\xfa", + "uacute;": "\xfa", + "uarr;": "\u2191", + "ubrcy;": "\u045e", + "ubreve;": "\u016d", + "ucirc": "\xfb", + "ucirc;": "\xfb", + "ucy;": "\u0443", + "udarr;": "\u21c5", + "udblac;": "\u0171", + "udhar;": "\u296e", + "ufisht;": "\u297e", + "ufr;": "\U0001d532", + "ugrave": "\xf9", + "ugrave;": "\xf9", + "uharl;": "\u21bf", + "uharr;": "\u21be", + "uhblk;": "\u2580", + "ulcorn;": "\u231c", + "ulcorner;": "\u231c", + "ulcrop;": "\u230f", + "ultri;": "\u25f8", + "umacr;": "\u016b", + "uml": "\xa8", + "uml;": "\xa8", + "uogon;": "\u0173", + "uopf;": "\U0001d566", + "uparrow;": "\u2191", + "updownarrow;": "\u2195", + "upharpoonleft;": "\u21bf", + "upharpoonright;": "\u21be", + "uplus;": "\u228e", + "upsi;": "\u03c5", + "upsih;": "\u03d2", + "upsilon;": "\u03c5", + "upuparrows;": "\u21c8", + "urcorn;": "\u231d", + "urcorner;": "\u231d", + "urcrop;": "\u230e", + "uring;": "\u016f", + "urtri;": "\u25f9", + "uscr;": "\U0001d4ca", + "utdot;": "\u22f0", + "utilde;": "\u0169", + "utri;": "\u25b5", + "utrif;": "\u25b4", + "uuarr;": "\u21c8", + "uuml": "\xfc", + "uuml;": "\xfc", + "uwangle;": "\u29a7", + "vArr;": "\u21d5", + "vBar;": "\u2ae8", + "vBarv;": "\u2ae9", + "vDash;": "\u22a8", + "vangrt;": "\u299c", + "varepsilon;": "\u03f5", + "varkappa;": "\u03f0", + "varnothing;": "\u2205", + "varphi;": "\u03d5", + "varpi;": "\u03d6", + "varpropto;": "\u221d", + "varr;": "\u2195", + "varrho;": "\u03f1", + "varsigma;": "\u03c2", + "varsubsetneq;": "\u228a\ufe00", + "varsubsetneqq;": "\u2acb\ufe00", + "varsupsetneq;": "\u228b\ufe00", + "varsupsetneqq;": "\u2acc\ufe00", + "vartheta;": "\u03d1", + "vartriangleleft;": "\u22b2", + "vartriangleright;": "\u22b3", + "vcy;": "\u0432", + "vdash;": "\u22a2", + "vee;": "\u2228", + "veebar;": "\u22bb", + "veeeq;": "\u225a", + "vellip;": "\u22ee", + "verbar;": "|", + "vert;": "|", + "vfr;": "\U0001d533", + "vltri;": "\u22b2", + "vnsub;": "\u2282\u20d2", + "vnsup;": "\u2283\u20d2", + "vopf;": "\U0001d567", + "vprop;": "\u221d", + "vrtri;": "\u22b3", + "vscr;": "\U0001d4cb", + "vsubnE;": "\u2acb\ufe00", + "vsubne;": "\u228a\ufe00", + "vsupnE;": "\u2acc\ufe00", + "vsupne;": "\u228b\ufe00", + "vzigzag;": "\u299a", + "wcirc;": "\u0175", + "wedbar;": "\u2a5f", + "wedge;": "\u2227", + "wedgeq;": "\u2259", + "weierp;": "\u2118", + "wfr;": "\U0001d534", + "wopf;": "\U0001d568", + "wp;": "\u2118", + "wr;": "\u2240", + "wreath;": "\u2240", + "wscr;": "\U0001d4cc", + "xcap;": "\u22c2", + "xcirc;": "\u25ef", + "xcup;": "\u22c3", + "xdtri;": "\u25bd", + "xfr;": "\U0001d535", + "xhArr;": "\u27fa", + "xharr;": "\u27f7", + "xi;": "\u03be", + "xlArr;": "\u27f8", + "xlarr;": "\u27f5", + "xmap;": "\u27fc", + "xnis;": "\u22fb", + "xodot;": "\u2a00", + "xopf;": "\U0001d569", + "xoplus;": "\u2a01", + "xotime;": "\u2a02", + "xrArr;": "\u27f9", + "xrarr;": "\u27f6", + "xscr;": "\U0001d4cd", + "xsqcup;": "\u2a06", + "xuplus;": "\u2a04", + "xutri;": "\u25b3", + "xvee;": "\u22c1", + "xwedge;": "\u22c0", + "yacute": "\xfd", + "yacute;": "\xfd", + "yacy;": "\u044f", + "ycirc;": "\u0177", + "ycy;": "\u044b", + "yen": "\xa5", + "yen;": "\xa5", + "yfr;": "\U0001d536", + "yicy;": "\u0457", + "yopf;": "\U0001d56a", + "yscr;": "\U0001d4ce", + "yucy;": "\u044e", + "yuml": "\xff", + "yuml;": "\xff", + "zacute;": "\u017a", + "zcaron;": "\u017e", + "zcy;": "\u0437", + "zdot;": "\u017c", + "zeetrf;": "\u2128", + "zeta;": "\u03b6", + "zfr;": "\U0001d537", + "zhcy;": "\u0436", + "zigrarr;": "\u21dd", + "zopf;": "\U0001d56b", + "zscr;": "\U0001d4cf", + "zwj;": "\u200d", + "zwnj;": "\u200c", +} + +replacementCharacters = { + 0x0: "\uFFFD", + 0x0d: "\u000D", + 0x80: "\u20AC", + 0x81: "\u0081", + 0x81: "\u0081", + 0x82: "\u201A", + 0x83: "\u0192", + 0x84: "\u201E", + 0x85: "\u2026", + 0x86: "\u2020", + 0x87: "\u2021", + 0x88: "\u02C6", + 0x89: "\u2030", + 0x8A: "\u0160", + 0x8B: "\u2039", + 0x8C: "\u0152", + 0x8D: "\u008D", + 0x8E: "\u017D", + 0x8F: "\u008F", + 0x90: "\u0090", + 0x91: "\u2018", + 0x92: "\u2019", + 0x93: "\u201C", + 0x94: "\u201D", + 0x95: "\u2022", + 0x96: "\u2013", + 0x97: "\u2014", + 0x98: "\u02DC", + 0x99: "\u2122", + 0x9A: "\u0161", + 0x9B: "\u203A", + 0x9C: "\u0153", + 0x9D: "\u009D", + 0x9E: "\u017E", + 0x9F: "\u0178", +} + +encodings = { + '437': 'cp437', + '850': 'cp850', + '852': 'cp852', + '855': 'cp855', + '857': 'cp857', + '860': 'cp860', + '861': 'cp861', + '862': 'cp862', + '863': 'cp863', + '865': 'cp865', + '866': 'cp866', + '869': 'cp869', + 'ansix341968': 'ascii', + 'ansix341986': 'ascii', + 'arabic': 'iso8859-6', + 'ascii': 'ascii', + 'asmo708': 'iso8859-6', + 'big5': 'big5', + 'big5hkscs': 'big5hkscs', + 'chinese': 'gbk', + 'cp037': 'cp037', + 'cp1026': 'cp1026', + 'cp154': 'ptcp154', + 'cp367': 'ascii', + 'cp424': 'cp424', + 'cp437': 'cp437', + 'cp500': 'cp500', + 'cp775': 'cp775', + 'cp819': 'windows-1252', + 'cp850': 'cp850', + 'cp852': 'cp852', + 'cp855': 'cp855', + 'cp857': 'cp857', + 'cp860': 'cp860', + 'cp861': 'cp861', + 'cp862': 'cp862', + 'cp863': 'cp863', + 'cp864': 'cp864', + 'cp865': 'cp865', + 'cp866': 'cp866', + 'cp869': 'cp869', + 'cp936': 'gbk', + 'cpgr': 'cp869', + 'cpis': 'cp861', + 'csascii': 'ascii', + 'csbig5': 'big5', + 'cseuckr': 'cp949', + 'cseucpkdfmtjapanese': 'euc_jp', + 'csgb2312': 'gbk', + 'cshproman8': 'hp-roman8', + 'csibm037': 'cp037', + 'csibm1026': 'cp1026', + 'csibm424': 'cp424', + 'csibm500': 'cp500', + 'csibm855': 'cp855', + 'csibm857': 'cp857', + 'csibm860': 'cp860', + 'csibm861': 'cp861', + 'csibm863': 'cp863', + 'csibm864': 'cp864', + 'csibm865': 'cp865', + 'csibm866': 'cp866', + 'csibm869': 'cp869', + 'csiso2022jp': 'iso2022_jp', + 'csiso2022jp2': 'iso2022_jp_2', + 'csiso2022kr': 'iso2022_kr', + 'csiso58gb231280': 'gbk', + 'csisolatin1': 'windows-1252', + 'csisolatin2': 'iso8859-2', + 'csisolatin3': 'iso8859-3', + 'csisolatin4': 'iso8859-4', + 'csisolatin5': 'windows-1254', + 'csisolatin6': 'iso8859-10', + 'csisolatinarabic': 'iso8859-6', + 'csisolatincyrillic': 'iso8859-5', + 'csisolatingreek': 'iso8859-7', + 'csisolatinhebrew': 'iso8859-8', + 'cskoi8r': 'koi8-r', + 'csksc56011987': 'cp949', + 'cspc775baltic': 'cp775', + 'cspc850multilingual': 'cp850', + 'cspc862latinhebrew': 'cp862', + 'cspc8codepage437': 'cp437', + 'cspcp852': 'cp852', + 'csptcp154': 'ptcp154', + 'csshiftjis': 'shift_jis', + 'csunicode11utf7': 'utf-7', + 'cyrillic': 'iso8859-5', + 'cyrillicasian': 'ptcp154', + 'ebcdiccpbe': 'cp500', + 'ebcdiccpca': 'cp037', + 'ebcdiccpch': 'cp500', + 'ebcdiccphe': 'cp424', + 'ebcdiccpnl': 'cp037', + 'ebcdiccpus': 'cp037', + 'ebcdiccpwt': 'cp037', + 'ecma114': 'iso8859-6', + 'ecma118': 'iso8859-7', + 'elot928': 'iso8859-7', + 'eucjp': 'euc_jp', + 'euckr': 'cp949', + 'extendedunixcodepackedformatforjapanese': 'euc_jp', + 'gb18030': 'gb18030', + 'gb2312': 'gbk', + 'gb231280': 'gbk', + 'gbk': 'gbk', + 'greek': 'iso8859-7', + 'greek8': 'iso8859-7', + 'hebrew': 'iso8859-8', + 'hproman8': 'hp-roman8', + 'hzgb2312': 'hz', + 'ibm037': 'cp037', + 'ibm1026': 'cp1026', + 'ibm367': 'ascii', + 'ibm424': 'cp424', + 'ibm437': 'cp437', + 'ibm500': 'cp500', + 'ibm775': 'cp775', + 'ibm819': 'windows-1252', + 'ibm850': 'cp850', + 'ibm852': 'cp852', + 'ibm855': 'cp855', + 'ibm857': 'cp857', + 'ibm860': 'cp860', + 'ibm861': 'cp861', + 'ibm862': 'cp862', + 'ibm863': 'cp863', + 'ibm864': 'cp864', + 'ibm865': 'cp865', + 'ibm866': 'cp866', + 'ibm869': 'cp869', + 'iso2022jp': 'iso2022_jp', + 'iso2022jp2': 'iso2022_jp_2', + 'iso2022kr': 'iso2022_kr', + 'iso646irv1991': 'ascii', + 'iso646us': 'ascii', + 'iso88591': 'windows-1252', + 'iso885910': 'iso8859-10', + 'iso8859101992': 'iso8859-10', + 'iso885911987': 'windows-1252', + 'iso885913': 'iso8859-13', + 'iso885914': 'iso8859-14', + 'iso8859141998': 'iso8859-14', + 'iso885915': 'iso8859-15', + 'iso885916': 'iso8859-16', + 'iso8859162001': 'iso8859-16', + 'iso88592': 'iso8859-2', + 'iso885921987': 'iso8859-2', + 'iso88593': 'iso8859-3', + 'iso885931988': 'iso8859-3', + 'iso88594': 'iso8859-4', + 'iso885941988': 'iso8859-4', + 'iso88595': 'iso8859-5', + 'iso885951988': 'iso8859-5', + 'iso88596': 'iso8859-6', + 'iso885961987': 'iso8859-6', + 'iso88597': 'iso8859-7', + 'iso885971987': 'iso8859-7', + 'iso88598': 'iso8859-8', + 'iso885981988': 'iso8859-8', + 'iso88599': 'windows-1254', + 'iso885991989': 'windows-1254', + 'isoceltic': 'iso8859-14', + 'isoir100': 'windows-1252', + 'isoir101': 'iso8859-2', + 'isoir109': 'iso8859-3', + 'isoir110': 'iso8859-4', + 'isoir126': 'iso8859-7', + 'isoir127': 'iso8859-6', + 'isoir138': 'iso8859-8', + 'isoir144': 'iso8859-5', + 'isoir148': 'windows-1254', + 'isoir149': 'cp949', + 'isoir157': 'iso8859-10', + 'isoir199': 'iso8859-14', + 'isoir226': 'iso8859-16', + 'isoir58': 'gbk', + 'isoir6': 'ascii', + 'koi8r': 'koi8-r', + 'koi8u': 'koi8-u', + 'korean': 'cp949', + 'ksc5601': 'cp949', + 'ksc56011987': 'cp949', + 'ksc56011989': 'cp949', + 'l1': 'windows-1252', + 'l10': 'iso8859-16', + 'l2': 'iso8859-2', + 'l3': 'iso8859-3', + 'l4': 'iso8859-4', + 'l5': 'windows-1254', + 'l6': 'iso8859-10', + 'l8': 'iso8859-14', + 'latin1': 'windows-1252', + 'latin10': 'iso8859-16', + 'latin2': 'iso8859-2', + 'latin3': 'iso8859-3', + 'latin4': 'iso8859-4', + 'latin5': 'windows-1254', + 'latin6': 'iso8859-10', + 'latin8': 'iso8859-14', + 'latin9': 'iso8859-15', + 'ms936': 'gbk', + 'mskanji': 'shift_jis', + 'pt154': 'ptcp154', + 'ptcp154': 'ptcp154', + 'r8': 'hp-roman8', + 'roman8': 'hp-roman8', + 'shiftjis': 'shift_jis', + 'tis620': 'cp874', + 'unicode11utf7': 'utf-7', + 'us': 'ascii', + 'usascii': 'ascii', + 'utf16': 'utf-16', + 'utf16be': 'utf-16-be', + 'utf16le': 'utf-16-le', + 'utf8': 'utf-8', + 'windows1250': 'cp1250', + 'windows1251': 'cp1251', + 'windows1252': 'cp1252', + 'windows1253': 'cp1253', + 'windows1254': 'cp1254', + 'windows1255': 'cp1255', + 'windows1256': 'cp1256', + 'windows1257': 'cp1257', + 'windows1258': 'cp1258', + 'windows936': 'gbk', + 'x-x-big5': 'big5'} + +tokenTypes = { + "Doctype": 0, + "Characters": 1, + "SpaceCharacters": 2, + "StartTag": 3, + "EndTag": 4, + "EmptyTag": 5, + "Comment": 6, + "ParseError": 7 +} + +tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], + tokenTypes["EmptyTag"])) + + +prefixes = dict([(v, k) for k, v in namespaces.items()]) +prefixes["http://www.w3.org/1998/Math/MathML"] = "math" + + +class DataLossWarning(UserWarning): + pass + + +class ReparseException(Exception): + pass diff --git a/pip/_vendor/html5lib/filters/__init__.py b/pip/_vendor/html5lib/filters/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pip/_vendor/html5lib/filters/_base.py b/pip/_vendor/html5lib/filters/_base.py new file mode 100644 index 000000000..c7dbaed0f --- /dev/null +++ b/pip/_vendor/html5lib/filters/_base.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + + +class Filter(object): + def __init__(self, source): + self.source = source + + def __iter__(self): + return iter(self.source) + + def __getattr__(self, name): + return getattr(self.source, name) diff --git a/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/pip/_vendor/html5lib/filters/alphabeticalattributes.py new file mode 100644 index 000000000..fed6996c1 --- /dev/null +++ b/pip/_vendor/html5lib/filters/alphabeticalattributes.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base + +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict + + +class Filter(_base.Filter): + def __iter__(self): + for token in _base.Filter.__iter__(self): + if token["type"] in ("StartTag", "EmptyTag"): + attrs = OrderedDict() + for name, value in sorted(token["data"].items(), + key=lambda x: x[0]): + attrs[name] = value + token["data"] = attrs + yield token diff --git a/pip/_vendor/html5lib/filters/inject_meta_charset.py b/pip/_vendor/html5lib/filters/inject_meta_charset.py new file mode 100644 index 000000000..ca33b70b5 --- /dev/null +++ b/pip/_vendor/html5lib/filters/inject_meta_charset.py @@ -0,0 +1,65 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base + + +class Filter(_base.Filter): + def __init__(self, source, encoding): + _base.Filter.__init__(self, source) + self.encoding = encoding + + def __iter__(self): + state = "pre_head" + meta_found = (self.encoding is None) + pending = [] + + for token in _base.Filter.__iter__(self): + type = token["type"] + if type == "StartTag": + if token["name"].lower() == "head": + state = "in_head" + + elif type == "EmptyTag": + if token["name"].lower() == "meta": + # replace charset with actual encoding + has_http_equiv_content_type = False + for (namespace, name), value in token["data"].items(): + if namespace is not None: + continue + elif name.lower() == 'charset': + token["data"][(namespace, name)] = self.encoding + meta_found = True + break + elif name == 'http-equiv' and value.lower() == 'content-type': + has_http_equiv_content_type = True + else: + if has_http_equiv_content_type and (None, "content") in token["data"]: + token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding + meta_found = True + + elif token["name"].lower() == "head" and not meta_found: + # insert meta into empty head + yield {"type": "StartTag", "name": "head", + "data": token["data"]} + yield {"type": "EmptyTag", "name": "meta", + "data": {(None, "charset"): self.encoding}} + yield {"type": "EndTag", "name": "head"} + meta_found = True + continue + + elif type == "EndTag": + if token["name"].lower() == "head" and pending: + # insert meta into head (if necessary) and flush pending queue + yield pending.pop(0) + if not meta_found: + yield {"type": "EmptyTag", "name": "meta", + "data": {(None, "charset"): self.encoding}} + while pending: + yield pending.pop(0) + meta_found = True + state = "post_head" + + if state == "in_head": + pending.append(token) + else: + yield token diff --git a/pip/_vendor/html5lib/filters/lint.py b/pip/_vendor/html5lib/filters/lint.py new file mode 100644 index 000000000..83ad63971 --- /dev/null +++ b/pip/_vendor/html5lib/filters/lint.py @@ -0,0 +1,93 @@ +from __future__ import absolute_import, division, unicode_literals + +from gettext import gettext +_ = gettext + +from . import _base +from ..constants import cdataElements, rcdataElements, voidElements + +from ..constants import spaceCharacters +spaceCharacters = "".join(spaceCharacters) + + +class LintError(Exception): + pass + + +class Filter(_base.Filter): + def __iter__(self): + open_elements = [] + contentModelFlag = "PCDATA" + for token in _base.Filter.__iter__(self): + type = token["type"] + if type in ("StartTag", "EmptyTag"): + name = token["name"] + if contentModelFlag != "PCDATA": + raise LintError(_("StartTag not in PCDATA content model flag: %s") % name) + if not isinstance(name, str): + raise LintError(_("Tag name is not a string: %r") % name) + if not name: + raise LintError(_("Empty tag name")) + if type == "StartTag" and name in voidElements: + raise LintError(_("Void element reported as StartTag token: %s") % name) + elif type == "EmptyTag" and name not in voidElements: + raise LintError(_("Non-void element reported as EmptyTag token: %s") % token["name"]) + if type == "StartTag": + open_elements.append(name) + for name, value in token["data"]: + if not isinstance(name, str): + raise LintError(_("Attribute name is not a string: %r") % name) + if not name: + raise LintError(_("Empty attribute name")) + if not isinstance(value, str): + raise LintError(_("Attribute value is not a string: %r") % value) + if name in cdataElements: + contentModelFlag = "CDATA" + elif name in rcdataElements: + contentModelFlag = "RCDATA" + elif name == "plaintext": + contentModelFlag = "PLAINTEXT" + + elif type == "EndTag": + name = token["name"] + if not isinstance(name, str): + raise LintError(_("Tag name is not a string: %r") % name) + if not name: + raise LintError(_("Empty tag name")) + if name in voidElements: + raise LintError(_("Void element reported as EndTag token: %s") % name) + start_name = open_elements.pop() + if start_name != name: + raise LintError(_("EndTag (%s) does not match StartTag (%s)") % (name, start_name)) + contentModelFlag = "PCDATA" + + elif type == "Comment": + if contentModelFlag != "PCDATA": + raise LintError(_("Comment not in PCDATA content model flag")) + + elif type in ("Characters", "SpaceCharacters"): + data = token["data"] + if not isinstance(data, str): + raise LintError(_("Attribute name is not a string: %r") % data) + if not data: + raise LintError(_("%s token with empty data") % type) + if type == "SpaceCharacters": + data = data.strip(spaceCharacters) + if data: + raise LintError(_("Non-space character(s) found in SpaceCharacters token: ") % data) + + elif type == "Doctype": + name = token["name"] + if contentModelFlag != "PCDATA": + raise LintError(_("Doctype not in PCDATA content model flag: %s") % name) + if not isinstance(name, str): + raise LintError(_("Tag name is not a string: %r") % name) + # XXX: what to do with token["data"] ? + + elif type in ("ParseError", "SerializeError"): + pass + + else: + raise LintError(_("Unknown token type: %s") % type) + + yield token diff --git a/pip/_vendor/html5lib/filters/optionaltags.py b/pip/_vendor/html5lib/filters/optionaltags.py new file mode 100644 index 000000000..fefe0b309 --- /dev/null +++ b/pip/_vendor/html5lib/filters/optionaltags.py @@ -0,0 +1,205 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base + + +class Filter(_base.Filter): + def slider(self): + previous1 = previous2 = None + for token in self.source: + if previous1 is not None: + yield previous2, previous1, token + previous2 = previous1 + previous1 = token + yield previous2, previous1, None + + def __iter__(self): + for previous, token, next in self.slider(): + type = token["type"] + if type == "StartTag": + if (token["data"] or + not self.is_optional_start(token["name"], previous, next)): + yield token + elif type == "EndTag": + if not self.is_optional_end(token["name"], next): + yield token + else: + yield token + + def is_optional_start(self, tagname, previous, next): + type = next and next["type"] or None + if tagname in 'html': + # An html element's start tag may be omitted if the first thing + # inside the html element is not a space character or a comment. + return type not in ("Comment", "SpaceCharacters") + elif tagname == 'head': + # A head element's start tag may be omitted if the first thing + # inside the head element is an element. + # XXX: we also omit the start tag if the head element is empty + if type in ("StartTag", "EmptyTag"): + return True + elif type == "EndTag": + return next["name"] == "head" + elif tagname == 'body': + # A body element's start tag may be omitted if the first thing + # inside the body element is not a space character or a comment, + # except if the first thing inside the body element is a script + # or style element and the node immediately preceding the body + # element is a head element whose end tag has been omitted. + if type in ("Comment", "SpaceCharacters"): + return False + elif type == "StartTag": + # XXX: we do not look at the preceding event, so we never omit + # the body element's start tag if it's followed by a script or + # a style element. + return next["name"] not in ('script', 'style') + else: + return True + elif tagname == 'colgroup': + # A colgroup element's start tag may be omitted if the first thing + # inside the colgroup element is a col element, and if the element + # is not immediately preceeded by another colgroup element whose + # end tag has been omitted. + if type in ("StartTag", "EmptyTag"): + # XXX: we do not look at the preceding event, so instead we never + # omit the colgroup element's end tag when it is immediately + # followed by another colgroup element. See is_optional_end. + return next["name"] == "col" + else: + return False + elif tagname == 'tbody': + # A tbody element's start tag may be omitted if the first thing + # inside the tbody element is a tr element, and if the element is + # not immediately preceeded by a tbody, thead, or tfoot element + # whose end tag has been omitted. + if type == "StartTag": + # omit the thead and tfoot elements' end tag when they are + # immediately followed by a tbody element. See is_optional_end. + if previous and previous['type'] == 'EndTag' and \ + previous['name'] in ('tbody', 'thead', 'tfoot'): + return False + return next["name"] == 'tr' + else: + return False + return False + + def is_optional_end(self, tagname, next): + type = next and next["type"] or None + if tagname in ('html', 'head', 'body'): + # An html element's end tag may be omitted if the html element + # is not immediately followed by a space character or a comment. + return type not in ("Comment", "SpaceCharacters") + elif tagname in ('li', 'optgroup', 'tr'): + # A li element's end tag may be omitted if the li element is + # immediately followed by another li element or if there is + # no more content in the parent element. + # An optgroup element's end tag may be omitted if the optgroup + # element is immediately followed by another optgroup element, + # or if there is no more content in the parent element. + # A tr element's end tag may be omitted if the tr element is + # immediately followed by another tr element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] == tagname + else: + return type == "EndTag" or type is None + elif tagname in ('dt', 'dd'): + # A dt element's end tag may be omitted if the dt element is + # immediately followed by another dt element or a dd element. + # A dd element's end tag may be omitted if the dd element is + # immediately followed by another dd element or a dt element, + # or if there is no more content in the parent element. + if type == "StartTag": + return next["name"] in ('dt', 'dd') + elif tagname == 'dd': + return type == "EndTag" or type is None + else: + return False + elif tagname == 'p': + # A p element's end tag may be omitted if the p element is + # immediately followed by an address, article, aside, + # blockquote, datagrid, dialog, dir, div, dl, fieldset, + # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, + # nav, ol, p, pre, section, table, or ul, element, or if + # there is no more content in the parent element. + if type in ("StartTag", "EmptyTag"): + return next["name"] in ('address', 'article', 'aside', + 'blockquote', 'datagrid', 'dialog', + 'dir', 'div', 'dl', 'fieldset', 'footer', + 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + 'header', 'hr', 'menu', 'nav', 'ol', + 'p', 'pre', 'section', 'table', 'ul') + else: + return type == "EndTag" or type is None + elif tagname == 'option': + # An option element's end tag may be omitted if the option + # element is immediately followed by another option element, + # or if it is immediately followed by an optgroup + # element, or if there is no more content in the parent + # element. + if type == "StartTag": + return next["name"] in ('option', 'optgroup') + else: + return type == "EndTag" or type is None + elif tagname in ('rt', 'rp'): + # An rt element's end tag may be omitted if the rt element is + # immediately followed by an rt or rp element, or if there is + # no more content in the parent element. + # An rp element's end tag may be omitted if the rp element is + # immediately followed by an rt or rp element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] in ('rt', 'rp') + else: + return type == "EndTag" or type is None + elif tagname == 'colgroup': + # A colgroup element's end tag may be omitted if the colgroup + # element is not immediately followed by a space character or + # a comment. + if type in ("Comment", "SpaceCharacters"): + return False + elif type == "StartTag": + # XXX: we also look for an immediately following colgroup + # element. See is_optional_start. + return next["name"] != 'colgroup' + else: + return True + elif tagname in ('thead', 'tbody'): + # A thead element's end tag may be omitted if the thead element + # is immediately followed by a tbody or tfoot element. + # A tbody element's end tag may be omitted if the tbody element + # is immediately followed by a tbody or tfoot element, or if + # there is no more content in the parent element. + # A tfoot element's end tag may be omitted if the tfoot element + # is immediately followed by a tbody element, or if there is no + # more content in the parent element. + # XXX: we never omit the end tag when the following element is + # a tbody. See is_optional_start. + if type == "StartTag": + return next["name"] in ['tbody', 'tfoot'] + elif tagname == 'tbody': + return type == "EndTag" or type is None + else: + return False + elif tagname == 'tfoot': + # A tfoot element's end tag may be omitted if the tfoot element + # is immediately followed by a tbody element, or if there is no + # more content in the parent element. + # XXX: we never omit the end tag when the following element is + # a tbody. See is_optional_start. + if type == "StartTag": + return next["name"] == 'tbody' + else: + return type == "EndTag" or type is None + elif tagname in ('td', 'th'): + # A td element's end tag may be omitted if the td element is + # immediately followed by a td or th element, or if there is + # no more content in the parent element. + # A th element's end tag may be omitted if the th element is + # immediately followed by a td or th element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] in ('td', 'th') + else: + return type == "EndTag" or type is None + return False diff --git a/pip/_vendor/html5lib/filters/sanitizer.py b/pip/_vendor/html5lib/filters/sanitizer.py new file mode 100644 index 000000000..b206b54e7 --- /dev/null +++ b/pip/_vendor/html5lib/filters/sanitizer.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base +from ..sanitizer import HTMLSanitizerMixin + + +class Filter(_base.Filter, HTMLSanitizerMixin): + def __iter__(self): + for token in _base.Filter.__iter__(self): + token = self.sanitize_token(token) + if token: + yield token diff --git a/pip/_vendor/html5lib/filters/whitespace.py b/pip/_vendor/html5lib/filters/whitespace.py new file mode 100644 index 000000000..dfc60eebd --- /dev/null +++ b/pip/_vendor/html5lib/filters/whitespace.py @@ -0,0 +1,38 @@ +from __future__ import absolute_import, division, unicode_literals + +import re + +from . import _base +from ..constants import rcdataElements, spaceCharacters +spaceCharacters = "".join(spaceCharacters) + +SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) + + +class Filter(_base.Filter): + + spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) + + def __iter__(self): + preserve = 0 + for token in _base.Filter.__iter__(self): + type = token["type"] + if type == "StartTag" \ + and (preserve or token["name"] in self.spacePreserveElements): + preserve += 1 + + elif type == "EndTag" and preserve: + preserve -= 1 + + elif not preserve and type == "SpaceCharacters" and token["data"]: + # Test on token["data"] above to not introduce spaces where there were not + token["data"] = " " + + elif not preserve and type == "Characters": + token["data"] = collapse_spaces(token["data"]) + + yield token + + +def collapse_spaces(text): + return SPACES_REGEX.sub(' ', text) diff --git a/pip/_vendor/html5lib/html5parser.py b/pip/_vendor/html5lib/html5parser.py new file mode 100644 index 000000000..8a5acfeb0 --- /dev/null +++ b/pip/_vendor/html5lib/html5parser.py @@ -0,0 +1,2725 @@ +from __future__ import absolute_import, division, unicode_literals +from pip._vendor.six import with_metaclass + +import types + +from . import inputstream +from . import tokenizer + +from . import treebuilders +from .treebuilders._base import Marker + +from . import utils +from . import constants +from .constants import spaceCharacters, asciiUpper2Lower +from .constants import specialElements +from .constants import headingElements +from .constants import cdataElements, rcdataElements +from .constants import tokenTypes, ReparseException, namespaces +from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements + + +def parse(doc, treebuilder="etree", encoding=None, + namespaceHTMLElements=True): + """Parse a string or file-like object into a tree""" + tb = treebuilders.getTreeBuilder(treebuilder) + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) + return p.parse(doc, encoding=encoding) + + +def parseFragment(doc, container="div", treebuilder="etree", encoding=None, + namespaceHTMLElements=True): + tb = treebuilders.getTreeBuilder(treebuilder) + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) + return p.parseFragment(doc, container=container, encoding=encoding) + + +def method_decorator_metaclass(function): + class Decorated(type): + def __new__(meta, classname, bases, classDict): + for attributeName, attribute in classDict.items(): + if isinstance(attribute, types.FunctionType): + attribute = function(attribute) + + classDict[attributeName] = attribute + return type.__new__(meta, classname, bases, classDict) + return Decorated + + +class HTMLParser(object): + """HTML parser. Generates a tree structure from a stream of (possibly + malformed) HTML""" + + def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer, + strict=False, namespaceHTMLElements=True, debug=False): + """ + strict - raise an exception when a parse error is encountered + + tree - a treebuilder class controlling the type of tree that will be + returned. Built in treebuilders can be accessed through + html5lib.treebuilders.getTreeBuilder(treeType) + + tokenizer - a class that provides a stream of tokens to the treebuilder. + This may be replaced for e.g. a sanitizer which converts some tags to + text + """ + + # Raise an exception on the first error encountered + self.strict = strict + + if tree is None: + tree = treebuilders.getTreeBuilder("etree") + self.tree = tree(namespaceHTMLElements) + self.tokenizer_class = tokenizer + self.errors = [] + + self.phases = dict([(name, cls(self, self.tree)) for name, cls in + getPhases(debug).items()]) + + def _parse(self, stream, innerHTML=False, container="div", + encoding=None, parseMeta=True, useChardet=True, **kwargs): + + self.innerHTMLMode = innerHTML + self.container = container + self.tokenizer = self.tokenizer_class(stream, encoding=encoding, + parseMeta=parseMeta, + useChardet=useChardet, + parser=self, **kwargs) + self.reset() + + while True: + try: + self.mainLoop() + break + except ReparseException: + self.reset() + + def reset(self): + self.tree.reset() + self.firstStartTag = False + self.errors = [] + self.log = [] # only used with debug mode + # "quirks" / "limited quirks" / "no quirks" + self.compatMode = "no quirks" + + if self.innerHTMLMode: + self.innerHTML = self.container.lower() + + if self.innerHTML in cdataElements: + self.tokenizer.state = self.tokenizer.rcdataState + elif self.innerHTML in rcdataElements: + self.tokenizer.state = self.tokenizer.rawtextState + elif self.innerHTML == 'plaintext': + self.tokenizer.state = self.tokenizer.plaintextState + else: + # state already is data state + # self.tokenizer.state = self.tokenizer.dataState + pass + self.phase = self.phases["beforeHtml"] + self.phase.insertHtmlElement() + self.resetInsertionMode() + else: + self.innerHTML = False + self.phase = self.phases["initial"] + + self.lastPhase = None + + self.beforeRCDataPhase = None + + self.framesetOK = True + + def isHTMLIntegrationPoint(self, element): + if (element.name == "annotation-xml" and + element.namespace == namespaces["mathml"]): + return ("encoding" in element.attributes and + element.attributes["encoding"].translate( + asciiUpper2Lower) in + ("text/html", "application/xhtml+xml")) + else: + return (element.namespace, element.name) in htmlIntegrationPointElements + + def isMathMLTextIntegrationPoint(self, element): + return (element.namespace, element.name) in mathmlTextIntegrationPointElements + + def mainLoop(self): + CharactersToken = tokenTypes["Characters"] + SpaceCharactersToken = tokenTypes["SpaceCharacters"] + StartTagToken = tokenTypes["StartTag"] + EndTagToken = tokenTypes["EndTag"] + CommentToken = tokenTypes["Comment"] + DoctypeToken = tokenTypes["Doctype"] + ParseErrorToken = tokenTypes["ParseError"] + + for token in self.normalizedTokens(): + new_token = token + while new_token is not None: + currentNode = self.tree.openElements[-1] if self.tree.openElements else None + currentNodeNamespace = currentNode.namespace if currentNode else None + currentNodeName = currentNode.name if currentNode else None + + type = new_token["type"] + + if type == ParseErrorToken: + self.parseError(new_token["data"], new_token.get("datavars", {})) + new_token = None + else: + if (len(self.tree.openElements) == 0 or + currentNodeNamespace == self.tree.defaultNamespace or + (self.isMathMLTextIntegrationPoint(currentNode) and + ((type == StartTagToken and + token["name"] not in frozenset(["mglyph", "malignmark"])) or + type in (CharactersToken, SpaceCharactersToken))) or + (currentNodeNamespace == namespaces["mathml"] and + currentNodeName == "annotation-xml" and + token["name"] == "svg") or + (self.isHTMLIntegrationPoint(currentNode) and + type in (StartTagToken, CharactersToken, SpaceCharactersToken))): + phase = self.phase + else: + phase = self.phases["inForeignContent"] + + if type == CharactersToken: + new_token = phase.processCharacters(new_token) + elif type == SpaceCharactersToken: + new_token = phase.processSpaceCharacters(new_token) + elif type == StartTagToken: + new_token = phase.processStartTag(new_token) + elif type == EndTagToken: + new_token = phase.processEndTag(new_token) + elif type == CommentToken: + new_token = phase.processComment(new_token) + elif type == DoctypeToken: + new_token = phase.processDoctype(new_token) + + if (type == StartTagToken and token["selfClosing"] + and not token["selfClosingAcknowledged"]): + self.parseError("non-void-element-with-trailing-solidus", + {"name": token["name"]}) + + # When the loop finishes it's EOF + reprocess = True + phases = [] + while reprocess: + phases.append(self.phase) + reprocess = self.phase.processEOF() + if reprocess: + assert self.phase not in phases + + def normalizedTokens(self): + for token in self.tokenizer: + yield self.normalizeToken(token) + + def parse(self, stream, encoding=None, parseMeta=True, useChardet=True): + """Parse a HTML document into a well-formed tree + + stream - a filelike object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + """ + self._parse(stream, innerHTML=False, encoding=encoding, + parseMeta=parseMeta, useChardet=useChardet) + return self.tree.getDocument() + + def parseFragment(self, stream, container="div", encoding=None, + parseMeta=False, useChardet=True): + """Parse a HTML fragment into a well-formed tree fragment + + container - name of the element we're setting the innerHTML property + if set to None, default to 'div' + + stream - a filelike object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + """ + self._parse(stream, True, container=container, encoding=encoding) + return self.tree.getFragment() + + def parseError(self, errorcode="XXX-undefined-error", datavars={}): + # XXX The idea is to make errorcode mandatory. + self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) + if self.strict: + raise ParseError + + def normalizeToken(self, token): + """ HTML5 specific normalizations to the token stream """ + + if token["type"] == tokenTypes["StartTag"]: + token["data"] = dict(token["data"][::-1]) + + return token + + def adjustMathMLAttributes(self, token): + replacements = {"definitionurl": "definitionURL"} + for k, v in replacements.items(): + if k in token["data"]: + token["data"][v] = token["data"][k] + del token["data"][k] + + def adjustSVGAttributes(self, token): + replacements = { + "attributename": "attributeName", + "attributetype": "attributeType", + "basefrequency": "baseFrequency", + "baseprofile": "baseProfile", + "calcmode": "calcMode", + "clippathunits": "clipPathUnits", + "contentscripttype": "contentScriptType", + "contentstyletype": "contentStyleType", + "diffuseconstant": "diffuseConstant", + "edgemode": "edgeMode", + "externalresourcesrequired": "externalResourcesRequired", + "filterres": "filterRes", + "filterunits": "filterUnits", + "glyphref": "glyphRef", + "gradienttransform": "gradientTransform", + "gradientunits": "gradientUnits", + "kernelmatrix": "kernelMatrix", + "kernelunitlength": "kernelUnitLength", + "keypoints": "keyPoints", + "keysplines": "keySplines", + "keytimes": "keyTimes", + "lengthadjust": "lengthAdjust", + "limitingconeangle": "limitingConeAngle", + "markerheight": "markerHeight", + "markerunits": "markerUnits", + "markerwidth": "markerWidth", + "maskcontentunits": "maskContentUnits", + "maskunits": "maskUnits", + "numoctaves": "numOctaves", + "pathlength": "pathLength", + "patterncontentunits": "patternContentUnits", + "patterntransform": "patternTransform", + "patternunits": "patternUnits", + "pointsatx": "pointsAtX", + "pointsaty": "pointsAtY", + "pointsatz": "pointsAtZ", + "preservealpha": "preserveAlpha", + "preserveaspectratio": "preserveAspectRatio", + "primitiveunits": "primitiveUnits", + "refx": "refX", + "refy": "refY", + "repeatcount": "repeatCount", + "repeatdur": "repeatDur", + "requiredextensions": "requiredExtensions", + "requiredfeatures": "requiredFeatures", + "specularconstant": "specularConstant", + "specularexponent": "specularExponent", + "spreadmethod": "spreadMethod", + "startoffset": "startOffset", + "stddeviation": "stdDeviation", + "stitchtiles": "stitchTiles", + "surfacescale": "surfaceScale", + "systemlanguage": "systemLanguage", + "tablevalues": "tableValues", + "targetx": "targetX", + "targety": "targetY", + "textlength": "textLength", + "viewbox": "viewBox", + "viewtarget": "viewTarget", + "xchannelselector": "xChannelSelector", + "ychannelselector": "yChannelSelector", + "zoomandpan": "zoomAndPan" + } + for originalName in list(token["data"].keys()): + if originalName in replacements: + svgName = replacements[originalName] + token["data"][svgName] = token["data"][originalName] + del token["data"][originalName] + + def adjustForeignAttributes(self, token): + replacements = { + "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), + "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), + "xlink:href": ("xlink", "href", namespaces["xlink"]), + "xlink:role": ("xlink", "role", namespaces["xlink"]), + "xlink:show": ("xlink", "show", namespaces["xlink"]), + "xlink:title": ("xlink", "title", namespaces["xlink"]), + "xlink:type": ("xlink", "type", namespaces["xlink"]), + "xml:base": ("xml", "base", namespaces["xml"]), + "xml:lang": ("xml", "lang", namespaces["xml"]), + "xml:space": ("xml", "space", namespaces["xml"]), + "xmlns": (None, "xmlns", namespaces["xmlns"]), + "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) + } + + for originalName in token["data"].keys(): + if originalName in replacements: + foreignName = replacements[originalName] + token["data"][foreignName] = token["data"][originalName] + del token["data"][originalName] + + def reparseTokenNormal(self, token): + self.parser.phase() + + def resetInsertionMode(self): + # The name of this method is mostly historical. (It's also used in the + # specification.) + last = False + newModes = { + "select": "inSelect", + "td": "inCell", + "th": "inCell", + "tr": "inRow", + "tbody": "inTableBody", + "thead": "inTableBody", + "tfoot": "inTableBody", + "caption": "inCaption", + "colgroup": "inColumnGroup", + "table": "inTable", + "head": "inBody", + "body": "inBody", + "frameset": "inFrameset", + "html": "beforeHead" + } + for node in self.tree.openElements[::-1]: + nodeName = node.name + new_phase = None + if node == self.tree.openElements[0]: + assert self.innerHTML + last = True + nodeName = self.innerHTML + # Check for conditions that should only happen in the innerHTML + # case + if nodeName in ("select", "colgroup", "head", "html"): + assert self.innerHTML + + if not last and node.namespace != self.tree.defaultNamespace: + continue + + if nodeName in newModes: + new_phase = self.phases[newModes[nodeName]] + break + elif last: + new_phase = self.phases["inBody"] + break + + self.phase = new_phase + + def parseRCDataRawtext(self, token, contentType): + """Generic RCDATA/RAWTEXT Parsing algorithm + contentType - RCDATA or RAWTEXT + """ + assert contentType in ("RAWTEXT", "RCDATA") + + self.tree.insertElement(token) + + if contentType == "RAWTEXT": + self.tokenizer.state = self.tokenizer.rawtextState + else: + self.tokenizer.state = self.tokenizer.rcdataState + + self.originalPhase = self.phase + + self.phase = self.phases["text"] + + +def getPhases(debug): + def log(function): + """Logger that records which phase processes each token""" + type_names = dict((value, key) for key, value in + constants.tokenTypes.items()) + + def wrapped(self, *args, **kwargs): + if function.__name__.startswith("process") and len(args) > 0: + token = args[0] + try: + info = {"type": type_names[token['type']]} + except: + raise + if token['type'] in constants.tagTokenTypes: + info["name"] = token['name'] + + self.parser.log.append((self.parser.tokenizer.state.__name__, + self.parser.phase.__class__.__name__, + self.__class__.__name__, + function.__name__, + info)) + return function(self, *args, **kwargs) + else: + return function(self, *args, **kwargs) + return wrapped + + def getMetaclass(use_metaclass, metaclass_func): + if use_metaclass: + return method_decorator_metaclass(metaclass_func) + else: + return type + + class Phase(with_metaclass(getMetaclass(debug, log))): + """Base class for helper object that implements each phase of processing + """ + + def __init__(self, parser, tree): + self.parser = parser + self.tree = tree + + def processEOF(self): + raise NotImplementedError + + def processComment(self, token): + # For most phases the following is correct. Where it's not it will be + # overridden. + self.tree.insertComment(token, self.tree.openElements[-1]) + + def processDoctype(self, token): + self.parser.parseError("unexpected-doctype") + + def processCharacters(self, token): + self.tree.insertText(token["data"]) + + def processSpaceCharacters(self, token): + self.tree.insertText(token["data"]) + + def processStartTag(self, token): + return self.startTagHandler[token["name"]](token) + + def startTagHtml(self, token): + if not self.parser.firstStartTag and token["name"] == "html": + self.parser.parseError("non-html-root") + # XXX Need a check here to see if the first start tag token emitted is + # this token... If it's not, invoke self.parser.parseError(). + for attr, value in token["data"].items(): + if attr not in self.tree.openElements[0].attributes: + self.tree.openElements[0].attributes[attr] = value + self.parser.firstStartTag = False + + def processEndTag(self, token): + return self.endTagHandler[token["name"]](token) + + class InitialPhase(Phase): + def processSpaceCharacters(self, token): + pass + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + correct = token["correct"] + + if (name != "html" or publicId is not None or + systemId is not None and systemId != "about:legacy-compat"): + self.parser.parseError("unknown-doctype") + + if publicId is None: + publicId = "" + + self.tree.insertDoctype(token) + + if publicId != "": + publicId = publicId.translate(asciiUpper2Lower) + + if (not correct or token["name"] != "html" + or publicId.startswith( + ("+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//")) + or publicId in + ("-//w3o//dtd w3 html strict 3.0//en//", + "-/w3c/dtd html 4.0 transitional/en", + "html") + or publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is None + or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): + self.parser.compatMode = "quirks" + elif (publicId.startswith( + ("-//w3c//dtd xhtml 1.0 frameset//", + "-//w3c//dtd xhtml 1.0 transitional//")) + or publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is not None): + self.parser.compatMode = "limited quirks" + + self.parser.phase = self.parser.phases["beforeHtml"] + + def anythingElse(self): + self.parser.compatMode = "quirks" + self.parser.phase = self.parser.phases["beforeHtml"] + + def processCharacters(self, token): + self.parser.parseError("expected-doctype-but-got-chars") + self.anythingElse() + return token + + def processStartTag(self, token): + self.parser.parseError("expected-doctype-but-got-start-tag", + {"name": token["name"]}) + self.anythingElse() + return token + + def processEndTag(self, token): + self.parser.parseError("expected-doctype-but-got-end-tag", + {"name": token["name"]}) + self.anythingElse() + return token + + def processEOF(self): + self.parser.parseError("expected-doctype-but-got-eof") + self.anythingElse() + return True + + class BeforeHtmlPhase(Phase): + # helper methods + def insertHtmlElement(self): + self.tree.insertRoot(impliedTagToken("html", "StartTag")) + self.parser.phase = self.parser.phases["beforeHead"] + + # other + def processEOF(self): + self.insertHtmlElement() + return True + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processSpaceCharacters(self, token): + pass + + def processCharacters(self, token): + self.insertHtmlElement() + return token + + def processStartTag(self, token): + if token["name"] == "html": + self.parser.firstStartTag = True + self.insertHtmlElement() + return token + + def processEndTag(self, token): + if token["name"] not in ("head", "body", "html", "br"): + self.parser.parseError("unexpected-end-tag-before-html", + {"name": token["name"]}) + else: + self.insertHtmlElement() + return token + + class BeforeHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = utils.MethodDispatcher([ + (("head", "body", "html", "br"), self.endTagImplyHead) + ]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.startTagHead(impliedTagToken("head", "StartTag")) + return True + + def processSpaceCharacters(self, token): + pass + + def processCharacters(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagHead(self, token): + self.tree.insertElement(token) + self.tree.headPointer = self.tree.openElements[-1] + self.parser.phase = self.parser.phases["inHead"] + + def startTagOther(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def endTagImplyHead(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def endTagOther(self, token): + self.parser.parseError("end-tag-after-implied-root", + {"name": token["name"]}) + + class InHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("title", self.startTagTitle), + (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle), + ("script", self.startTagScript), + (("base", "basefont", "bgsound", "command", "link"), + self.startTagBaseLinkCommand), + ("meta", self.startTagMeta), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + + self. endTagHandler = utils.MethodDispatcher([ + ("head", self.endTagHead), + (("br", "html", "body"), self.endTagHtmlBodyBr) + ]) + self.endTagHandler.default = self.endTagOther + + # the real thing + def processEOF(self): + self.anythingElse() + return True + + def processCharacters(self, token): + self.anythingElse() + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagHead(self, token): + self.parser.parseError("two-heads-are-not-better-than-one") + + def startTagBaseLinkCommand(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + def startTagMeta(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + attributes = token["data"] + if self.parser.tokenizer.stream.charEncoding[1] == "tentative": + if "charset" in attributes: + self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) + elif ("content" in attributes and + "http-equiv" in attributes and + attributes["http-equiv"].lower() == "content-type"): + # Encoding it as UTF-8 here is a hack, as really we should pass + # the abstract Unicode string, and just use the + # ContentAttrParser on that, but using UTF-8 allows all chars + # to be encoded and as a ASCII-superset works. + data = inputstream.EncodingBytes(attributes["content"].encode("utf-8")) + parser = inputstream.ContentAttrParser(data) + codec = parser.parse() + self.parser.tokenizer.stream.changeEncoding(codec) + + def startTagTitle(self, token): + self.parser.parseRCDataRawtext(token, "RCDATA") + + def startTagNoScriptNoFramesStyle(self, token): + # Need to decide whether to implement the scripting-disabled case + self.parser.parseRCDataRawtext(token, "RAWTEXT") + + def startTagScript(self, token): + self.tree.insertElement(token) + self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState + self.parser.originalPhase = self.parser.phase + self.parser.phase = self.parser.phases["text"] + + def startTagOther(self, token): + self.anythingElse() + return token + + def endTagHead(self, token): + node = self.parser.tree.openElements.pop() + assert node.name == "head", "Expected head got %s" % node.name + self.parser.phase = self.parser.phases["afterHead"] + + def endTagHtmlBodyBr(self, token): + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + self.endTagHead(impliedTagToken("head")) + + # XXX If we implement a parser for which scripting is disabled we need to + # implement this phase. + # + # class InHeadNoScriptPhase(Phase): + class AfterHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("body", self.startTagBody), + ("frameset", self.startTagFrameset), + (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", + "style", "title"), + self.startTagFromHead), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"), + self.endTagHtmlBodyBr)]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.anythingElse() + return True + + def processCharacters(self, token): + self.anythingElse() + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagBody(self, token): + self.parser.framesetOK = False + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inBody"] + + def startTagFrameset(self, token): + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inFrameset"] + + def startTagFromHead(self, token): + self.parser.parseError("unexpected-start-tag-out-of-my-head", + {"name": token["name"]}) + self.tree.openElements.append(self.tree.headPointer) + self.parser.phases["inHead"].processStartTag(token) + for node in self.tree.openElements[::-1]: + if node.name == "head": + self.tree.openElements.remove(node) + break + + def startTagHead(self, token): + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) + + def startTagOther(self, token): + self.anythingElse() + return token + + def endTagHtmlBodyBr(self, token): + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + self.tree.insertElement(impliedTagToken("body", "StartTag")) + self.parser.phase = self.parser.phases["inBody"] + self.parser.framesetOK = True + + class InBodyPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody + # the really-really-really-very crazy mode + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + # Keep a ref to this for special handling of whitespace in
+            self.processSpaceCharactersNonPre = self.processSpaceCharacters
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                (("base", "basefont", "bgsound", "command", "link", "meta",
+                  "noframes", "script", "style", "title"),
+                 self.startTagProcessInHead),
+                ("body", self.startTagBody),
+                ("frameset", self.startTagFrameset),
+                (("address", "article", "aside", "blockquote", "center", "details",
+                  "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+                  "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
+                  "section", "summary", "ul"),
+                 self.startTagCloseP),
+                (headingElements, self.startTagHeading),
+                (("pre", "listing"), self.startTagPreListing),
+                ("form", self.startTagForm),
+                (("li", "dd", "dt"), self.startTagListItem),
+                ("plaintext", self.startTagPlaintext),
+                ("a", self.startTagA),
+                (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
+                  "strong", "tt", "u"), self.startTagFormatting),
+                ("nobr", self.startTagNobr),
+                ("button", self.startTagButton),
+                (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
+                ("xmp", self.startTagXmp),
+                ("table", self.startTagTable),
+                (("area", "br", "embed", "img", "keygen", "wbr"),
+                 self.startTagVoidFormatting),
+                (("param", "source", "track"), self.startTagParamSource),
+                ("input", self.startTagInput),
+                ("hr", self.startTagHr),
+                ("image", self.startTagImage),
+                ("isindex", self.startTagIsIndex),
+                ("textarea", self.startTagTextarea),
+                ("iframe", self.startTagIFrame),
+                (("noembed", "noframes", "noscript"), self.startTagRawtext),
+                ("select", self.startTagSelect),
+                (("rp", "rt"), self.startTagRpRt),
+                (("option", "optgroup"), self.startTagOpt),
+                (("math"), self.startTagMath),
+                (("svg"), self.startTagSvg),
+                (("caption", "col", "colgroup", "frame", "head",
+                  "tbody", "td", "tfoot", "th", "thead",
+                  "tr"), self.startTagMisplaced)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                ("body", self.endTagBody),
+                ("html", self.endTagHtml),
+                (("address", "article", "aside", "blockquote", "button", "center",
+                  "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+                  "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
+                  "section", "summary", "ul"), self.endTagBlock),
+                ("form", self.endTagForm),
+                ("p", self.endTagP),
+                (("dd", "dt", "li"), self.endTagListItem),
+                (headingElements, self.endTagHeading),
+                (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
+                  "strike", "strong", "tt", "u"), self.endTagFormatting),
+                (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
+                ("br", self.endTagBr),
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        def isMatchingFormattingElement(self, node1, node2):
+            if node1.name != node2.name or node1.namespace != node2.namespace:
+                return False
+            elif len(node1.attributes) != len(node2.attributes):
+                return False
+            else:
+                attributes1 = sorted(node1.attributes.items())
+                attributes2 = sorted(node2.attributes.items())
+                for attr1, attr2 in zip(attributes1, attributes2):
+                    if attr1 != attr2:
+                        return False
+            return True
+
+        # helper
+        def addFormattingElement(self, token):
+            self.tree.insertElement(token)
+            element = self.tree.openElements[-1]
+
+            matchingElements = []
+            for node in self.tree.activeFormattingElements[::-1]:
+                if node is Marker:
+                    break
+                elif self.isMatchingFormattingElement(node, element):
+                    matchingElements.append(node)
+
+            assert len(matchingElements) <= 3
+            if len(matchingElements) == 3:
+                self.tree.activeFormattingElements.remove(matchingElements[-1])
+            self.tree.activeFormattingElements.append(element)
+
+        # the real deal
+        def processEOF(self):
+            allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
+                                          "tfoot", "th", "thead", "tr", "body",
+                                          "html"))
+            for node in self.tree.openElements[::-1]:
+                if node.name not in allowed_elements:
+                    self.parser.parseError("expected-closing-tag-but-got-eof")
+                    break
+            # Stop parsing
+
+        def processSpaceCharactersDropNewline(self, token):
+            # Sometimes (start of 
, , and