diff options
Diffstat (limited to 'src/pip/_vendor/chardet/langhebrewmodel.py')
-rw-r--r-- | src/pip/_vendor/chardet/langhebrewmodel.py | 533 |
1 files changed, 265 insertions, 268 deletions
diff --git a/src/pip/_vendor/chardet/langhebrewmodel.py b/src/pip/_vendor/chardet/langhebrewmodel.py index 484c652a4..56d297587 100644 --- a/src/pip/_vendor/chardet/langhebrewmodel.py +++ b/src/pip/_vendor/chardet/langhebrewmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from pip._vendor.chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,269 +4111,270 @@ HEBREW_LANG_MODEL = { # Character Mapping Table(s): WINDOWS_1255_HEBREW_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 69, # 'A' - 66: 91, # 'B' - 67: 79, # 'C' - 68: 80, # 'D' - 69: 92, # 'E' - 70: 89, # 'F' - 71: 97, # 'G' - 72: 90, # 'H' - 73: 68, # 'I' - 74: 111, # 'J' - 75: 112, # 'K' - 76: 82, # 'L' - 77: 73, # 'M' - 78: 95, # 'N' - 79: 85, # 'O' - 80: 78, # 'P' - 81: 121, # 'Q' - 82: 86, # 'R' - 83: 71, # 'S' - 84: 67, # 'T' - 85: 102, # 'U' - 86: 107, # 'V' - 87: 84, # 'W' - 88: 114, # 'X' - 89: 103, # 'Y' - 90: 115, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 50, # 'a' - 98: 74, # 'b' - 99: 60, # 'c' - 100: 61, # 'd' - 101: 42, # 'e' - 102: 76, # 'f' - 103: 70, # 'g' - 104: 64, # 'h' - 105: 53, # 'i' - 106: 105, # 'j' - 107: 93, # 'k' - 108: 56, # 'l' - 109: 65, # 'm' - 110: 54, # 'n' - 111: 49, # 'o' - 112: 66, # 'p' - 113: 110, # 'q' - 114: 51, # 'r' - 115: 43, # 's' - 116: 44, # 't' - 117: 63, # 'u' - 118: 81, # 'v' - 119: 77, # 'w' - 120: 98, # 'x' - 121: 75, # 'y' - 122: 108, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 124, # '€' - 129: 202, # None - 130: 203, # '‚' - 131: 204, # 'ƒ' - 132: 205, # '„' - 133: 40, # '…' - 134: 58, # '†' - 135: 206, # '‡' - 136: 207, # 'ˆ' - 137: 208, # '‰' - 138: 209, # None - 139: 210, # '‹' - 140: 211, # None - 141: 212, # None - 142: 213, # None - 143: 214, # None - 144: 215, # None - 145: 83, # '‘' - 146: 52, # '’' - 147: 47, # '“' - 148: 46, # '”' - 149: 72, # '•' - 150: 32, # '–' - 151: 94, # '—' - 152: 216, # '˜' - 153: 113, # '™' - 154: 217, # None - 155: 109, # '›' - 156: 218, # None - 157: 219, # None - 158: 220, # None - 159: 221, # None - 160: 34, # '\xa0' - 161: 116, # '¡' - 162: 222, # '¢' - 163: 118, # '£' - 164: 100, # '₪' - 165: 223, # '¥' - 166: 224, # '¦' - 167: 117, # '§' - 168: 119, # '¨' - 169: 104, # '©' - 170: 125, # '×' - 171: 225, # '«' - 172: 226, # '¬' - 173: 87, # '\xad' - 174: 99, # '®' - 175: 227, # '¯' - 176: 106, # '°' - 177: 122, # '±' - 178: 123, # '²' - 179: 228, # '³' - 180: 55, # '´' - 181: 229, # 'µ' - 182: 230, # '¶' - 183: 101, # '·' - 184: 231, # '¸' - 185: 232, # '¹' - 186: 120, # '÷' - 187: 233, # '»' - 188: 48, # '¼' - 189: 39, # '½' - 190: 57, # '¾' - 191: 234, # '¿' - 192: 30, # 'ְ' - 193: 59, # 'ֱ' - 194: 41, # 'ֲ' - 195: 88, # 'ֳ' - 196: 33, # 'ִ' - 197: 37, # 'ֵ' - 198: 36, # 'ֶ' - 199: 31, # 'ַ' - 200: 29, # 'ָ' - 201: 35, # 'ֹ' - 202: 235, # None - 203: 62, # 'ֻ' - 204: 28, # 'ּ' - 205: 236, # 'ֽ' - 206: 126, # '־' - 207: 237, # 'ֿ' - 208: 238, # '׀' - 209: 38, # 'ׁ' - 210: 45, # 'ׂ' - 211: 239, # '׃' - 212: 240, # 'װ' - 213: 241, # 'ױ' - 214: 242, # 'ײ' - 215: 243, # '׳' - 216: 127, # '״' - 217: 244, # None - 218: 245, # None - 219: 246, # None - 220: 247, # None - 221: 248, # None - 222: 249, # None - 223: 250, # None - 224: 9, # 'א' - 225: 8, # 'ב' - 226: 20, # 'ג' - 227: 16, # 'ד' - 228: 3, # 'ה' - 229: 2, # 'ו' - 230: 24, # 'ז' - 231: 14, # 'ח' - 232: 22, # 'ט' - 233: 1, # 'י' - 234: 25, # 'ך' - 235: 15, # 'כ' - 236: 4, # 'ל' - 237: 11, # 'ם' - 238: 6, # 'מ' - 239: 23, # 'ן' - 240: 12, # 'נ' - 241: 19, # 'ס' - 242: 13, # 'ע' - 243: 26, # 'ף' - 244: 18, # 'פ' - 245: 27, # 'ץ' - 246: 21, # 'צ' - 247: 17, # 'ק' - 248: 7, # 'ר' - 249: 10, # 'ש' - 250: 5, # 'ת' - 251: 251, # None - 252: 252, # None - 253: 128, # '\u200e' - 254: 96, # '\u200f' - 255: 253, # None + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 69, # 'A' + 66: 91, # 'B' + 67: 79, # 'C' + 68: 80, # 'D' + 69: 92, # 'E' + 70: 89, # 'F' + 71: 97, # 'G' + 72: 90, # 'H' + 73: 68, # 'I' + 74: 111, # 'J' + 75: 112, # 'K' + 76: 82, # 'L' + 77: 73, # 'M' + 78: 95, # 'N' + 79: 85, # 'O' + 80: 78, # 'P' + 81: 121, # 'Q' + 82: 86, # 'R' + 83: 71, # 'S' + 84: 67, # 'T' + 85: 102, # 'U' + 86: 107, # 'V' + 87: 84, # 'W' + 88: 114, # 'X' + 89: 103, # 'Y' + 90: 115, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 50, # 'a' + 98: 74, # 'b' + 99: 60, # 'c' + 100: 61, # 'd' + 101: 42, # 'e' + 102: 76, # 'f' + 103: 70, # 'g' + 104: 64, # 'h' + 105: 53, # 'i' + 106: 105, # 'j' + 107: 93, # 'k' + 108: 56, # 'l' + 109: 65, # 'm' + 110: 54, # 'n' + 111: 49, # 'o' + 112: 66, # 'p' + 113: 110, # 'q' + 114: 51, # 'r' + 115: 43, # 's' + 116: 44, # 't' + 117: 63, # 'u' + 118: 81, # 'v' + 119: 77, # 'w' + 120: 98, # 'x' + 121: 75, # 'y' + 122: 108, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 124, # '€' + 129: 202, # None + 130: 203, # '‚' + 131: 204, # 'ƒ' + 132: 205, # '„' + 133: 40, # '…' + 134: 58, # '†' + 135: 206, # '‡' + 136: 207, # 'ˆ' + 137: 208, # '‰' + 138: 209, # None + 139: 210, # '‹' + 140: 211, # None + 141: 212, # None + 142: 213, # None + 143: 214, # None + 144: 215, # None + 145: 83, # '‘' + 146: 52, # '’' + 147: 47, # '“' + 148: 46, # '”' + 149: 72, # '•' + 150: 32, # '–' + 151: 94, # '—' + 152: 216, # '˜' + 153: 113, # '™' + 154: 217, # None + 155: 109, # '›' + 156: 218, # None + 157: 219, # None + 158: 220, # None + 159: 221, # None + 160: 34, # '\xa0' + 161: 116, # '¡' + 162: 222, # '¢' + 163: 118, # '£' + 164: 100, # '₪' + 165: 223, # '¥' + 166: 224, # '¦' + 167: 117, # '§' + 168: 119, # '¨' + 169: 104, # '©' + 170: 125, # '×' + 171: 225, # '«' + 172: 226, # '¬' + 173: 87, # '\xad' + 174: 99, # '®' + 175: 227, # '¯' + 176: 106, # '°' + 177: 122, # '±' + 178: 123, # '²' + 179: 228, # '³' + 180: 55, # '´' + 181: 229, # 'µ' + 182: 230, # '¶' + 183: 101, # '·' + 184: 231, # '¸' + 185: 232, # '¹' + 186: 120, # '÷' + 187: 233, # '»' + 188: 48, # '¼' + 189: 39, # '½' + 190: 57, # '¾' + 191: 234, # '¿' + 192: 30, # 'ְ' + 193: 59, # 'ֱ' + 194: 41, # 'ֲ' + 195: 88, # 'ֳ' + 196: 33, # 'ִ' + 197: 37, # 'ֵ' + 198: 36, # 'ֶ' + 199: 31, # 'ַ' + 200: 29, # 'ָ' + 201: 35, # 'ֹ' + 202: 235, # None + 203: 62, # 'ֻ' + 204: 28, # 'ּ' + 205: 236, # 'ֽ' + 206: 126, # '־' + 207: 237, # 'ֿ' + 208: 238, # '׀' + 209: 38, # 'ׁ' + 210: 45, # 'ׂ' + 211: 239, # '׃' + 212: 240, # 'װ' + 213: 241, # 'ױ' + 214: 242, # 'ײ' + 215: 243, # '׳' + 216: 127, # '״' + 217: 244, # None + 218: 245, # None + 219: 246, # None + 220: 247, # None + 221: 248, # None + 222: 249, # None + 223: 250, # None + 224: 9, # 'א' + 225: 8, # 'ב' + 226: 20, # 'ג' + 227: 16, # 'ד' + 228: 3, # 'ה' + 229: 2, # 'ו' + 230: 24, # 'ז' + 231: 14, # 'ח' + 232: 22, # 'ט' + 233: 1, # 'י' + 234: 25, # 'ך' + 235: 15, # 'כ' + 236: 4, # 'ל' + 237: 11, # 'ם' + 238: 6, # 'מ' + 239: 23, # 'ן' + 240: 12, # 'נ' + 241: 19, # 'ס' + 242: 13, # 'ע' + 243: 26, # 'ף' + 244: 18, # 'פ' + 245: 27, # 'ץ' + 246: 21, # 'צ' + 247: 17, # 'ק' + 248: 7, # 'ר' + 249: 10, # 'ש' + 250: 5, # 'ת' + 251: 251, # None + 252: 252, # None + 253: 128, # '\u200e' + 254: 96, # '\u200f' + 255: 253, # None } -WINDOWS_1255_HEBREW_MODEL = SingleByteCharSetModel(charset_name='windows-1255', - language='Hebrew', - char_to_order_map=WINDOWS_1255_HEBREW_CHAR_TO_ORDER, - language_model=HEBREW_LANG_MODEL, - typical_positive_ratio=0.984004, - keep_ascii_letters=False, - alphabet='אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ') - +WINDOWS_1255_HEBREW_MODEL = SingleByteCharSetModel( + charset_name="windows-1255", + language="Hebrew", + char_to_order_map=WINDOWS_1255_HEBREW_CHAR_TO_ORDER, + language_model=HEBREW_LANG_MODEL, + typical_positive_ratio=0.984004, + keep_ascii_letters=False, + alphabet="אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ", +) |