summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Dower <steve.dower@python.org>2019-06-25 08:20:43 -0700
committerGitHub <noreply@github.com>2019-06-25 08:20:43 -0700
commit25fbe33b92cd938e809839feaa3fda97e6ad0980 (patch)
tree92747f58fe4ac6d89432ea2a44893e016efb889b
parent8d6668c92b90d951aa6ab350ed26687141abcab9 (diff)
downloadcpython-git-25fbe33b92cd938e809839feaa3fda97e6ad0980.tar.gz
bpo-4963: Fix for initialization and non-deterministic behavior issues in mimetypes (GH-14375)
-rw-r--r--Doc/library/mimetypes.rst4
-rw-r--r--Lib/mimetypes.py251
-rw-r--r--Lib/test/test_mimetypes.py51
-rw-r--r--Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst2
4 files changed, 188 insertions, 120 deletions
diff --git a/Doc/library/mimetypes.rst b/Doc/library/mimetypes.rst
index 5728407cb3..f610032acb 100644
--- a/Doc/library/mimetypes.rst
+++ b/Doc/library/mimetypes.rst
@@ -93,6 +93,10 @@ behavior of the module.
Specifying an empty list for *files* will prevent the system defaults from
being applied: only the well-known values will be present from a built-in list.
+ If *files* is ``None`` the internal data structure is completely rebuilt to its
+ initial default value. This is a stable operation and will produce the same results
+ when called multiple times.
+
.. versionchanged:: 3.2
Previously, Windows registry settings were ignored.
diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py
index 8861b75362..01a16fdf9a 100644
--- a/Lib/mimetypes.py
+++ b/Lib/mimetypes.py
@@ -66,13 +66,13 @@ class MimeTypes:
def __init__(self, filenames=(), strict=True):
if not inited:
init()
- self.encodings_map = encodings_map.copy()
- self.suffix_map = suffix_map.copy()
+ self.encodings_map = _encodings_map_default.copy()
+ self.suffix_map = _suffix_map_default.copy()
self.types_map = ({}, {}) # dict for (non-strict, strict)
self.types_map_inv = ({}, {})
- for (ext, type) in types_map.items():
+ for (ext, type) in _types_map_default.items():
self.add_type(type, ext, True)
- for (ext, type) in common_types.items():
+ for (ext, type) in _common_types_default.items():
self.add_type(type, ext, False)
for name in filenames:
self.read(name, strict)
@@ -346,11 +346,19 @@ def init(files=None):
global suffix_map, types_map, encodings_map, common_types
global inited, _db
inited = True # so that MimeTypes.__init__() doesn't call us again
- db = MimeTypes()
- if files is None:
+
+ if files is None or _db is None:
+ db = MimeTypes()
if _winreg:
db.read_windows_registry()
- files = knownfiles
+
+ if files is None:
+ files = knownfiles
+ else:
+ files = knownfiles + list(files)
+ else:
+ db = _db
+
for file in files:
if os.path.isfile(file):
db.read(file)
@@ -374,12 +382,12 @@ def read_mime_types(file):
def _default_mime_types():
- global suffix_map
- global encodings_map
- global types_map
- global common_types
+ global suffix_map, _suffix_map_default
+ global encodings_map, _encodings_map_default
+ global types_map, _types_map_default
+ global common_types, _common_types_default
- suffix_map = {
+ suffix_map = _suffix_map_default = {
'.svgz': '.svg.gz',
'.tgz': '.tar.gz',
'.taz': '.tar.gz',
@@ -388,7 +396,7 @@ def _default_mime_types():
'.txz': '.tar.xz',
}
- encodings_map = {
+ encodings_map = _encodings_map_default = {
'.gz': 'gzip',
'.Z': 'compress',
'.bz2': 'bzip2',
@@ -399,152 +407,155 @@ def _default_mime_types():
# at http://www.iana.org/assignments/media-types
# or extensions, i.e. using the x- prefix
- # If you add to these, please keep them sorted!
- types_map = {
+ # If you add to these, please keep them sorted by mime type.
+ # Make sure the entry with the preferred file extension for a particular mime type
+ # appears before any others of the same mimetype.
+ types_map = _types_map_default = {
+ '.js' : 'application/javascript',
+ '.mjs' : 'application/javascript',
+ '.json' : 'application/json',
+ '.doc' : 'application/msword',
+ '.dot' : 'application/msword',
+ '.wiz' : 'application/msword',
+ '.bin' : 'application/octet-stream',
'.a' : 'application/octet-stream',
+ '.dll' : 'application/octet-stream',
+ '.exe' : 'application/octet-stream',
+ '.o' : 'application/octet-stream',
+ '.obj' : 'application/octet-stream',
+ '.so' : 'application/octet-stream',
+ '.oda' : 'application/oda',
+ '.pdf' : 'application/pdf',
+ '.p7c' : 'application/pkcs7-mime',
+ '.ps' : 'application/postscript',
'.ai' : 'application/postscript',
- '.aif' : 'audio/x-aiff',
- '.aifc' : 'audio/x-aiff',
- '.aiff' : 'audio/x-aiff',
- '.au' : 'audio/basic',
- '.avi' : 'video/x-msvideo',
- '.bat' : 'text/plain',
+ '.eps' : 'application/postscript',
+ '.m3u' : 'application/vnd.apple.mpegurl',
+ '.m3u8' : 'application/vnd.apple.mpegurl',
+ '.xls' : 'application/vnd.ms-excel',
+ '.xlb' : 'application/vnd.ms-excel',
+ '.ppt' : 'application/vnd.ms-powerpoint',
+ '.pot' : 'application/vnd.ms-powerpoint',
+ '.ppa' : 'application/vnd.ms-powerpoint',
+ '.pps' : 'application/vnd.ms-powerpoint',
+ '.pwz' : 'application/vnd.ms-powerpoint',
+ '.wasm' : 'application/wasm',
'.bcpio' : 'application/x-bcpio',
- '.bin' : 'application/octet-stream',
- '.bmp' : 'image/bmp',
- '.c' : 'text/plain',
- '.cdf' : 'application/x-netcdf',
'.cpio' : 'application/x-cpio',
'.csh' : 'application/x-csh',
- '.css' : 'text/css',
- '.csv' : 'text/csv',
- '.dll' : 'application/octet-stream',
- '.doc' : 'application/msword',
- '.dot' : 'application/msword',
'.dvi' : 'application/x-dvi',
- '.eml' : 'message/rfc822',
- '.eps' : 'application/postscript',
- '.etx' : 'text/x-setext',
- '.exe' : 'application/octet-stream',
- '.gif' : 'image/gif',
'.gtar' : 'application/x-gtar',
- '.h' : 'text/plain',
'.hdf' : 'application/x-hdf',
- '.htm' : 'text/html',
- '.html' : 'text/html',
- '.ico' : 'image/vnd.microsoft.icon',
- '.ief' : 'image/ief',
- '.jpe' : 'image/jpeg',
- '.jpeg' : 'image/jpeg',
- '.jpg' : 'image/jpeg',
- '.js' : 'application/javascript',
- '.json' : 'application/json',
- '.ksh' : 'text/plain',
'.latex' : 'application/x-latex',
- '.m1v' : 'video/mpeg',
- '.m3u' : 'application/vnd.apple.mpegurl',
- '.m3u8' : 'application/vnd.apple.mpegurl',
- '.man' : 'application/x-troff-man',
- '.me' : 'application/x-troff-me',
- '.mht' : 'message/rfc822',
- '.mhtml' : 'message/rfc822',
'.mif' : 'application/x-mif',
- '.mjs' : 'application/javascript',
- '.mov' : 'video/quicktime',
- '.movie' : 'video/x-sgi-movie',
- '.mp2' : 'audio/mpeg',
- '.mp3' : 'audio/mpeg',
- '.mp4' : 'video/mp4',
- '.mpa' : 'video/mpeg',
- '.mpe' : 'video/mpeg',
- '.mpeg' : 'video/mpeg',
- '.mpg' : 'video/mpeg',
- '.ms' : 'application/x-troff-ms',
+ '.cdf' : 'application/x-netcdf',
'.nc' : 'application/x-netcdf',
- '.nws' : 'message/rfc822',
- '.o' : 'application/octet-stream',
- '.obj' : 'application/octet-stream',
- '.oda' : 'application/oda',
'.p12' : 'application/x-pkcs12',
- '.p7c' : 'application/pkcs7-mime',
- '.pbm' : 'image/x-portable-bitmap',
- '.pdf' : 'application/pdf',
'.pfx' : 'application/x-pkcs12',
- '.pgm' : 'image/x-portable-graymap',
- '.pl' : 'text/plain',
- '.png' : 'image/png',
- '.pnm' : 'image/x-portable-anymap',
- '.pot' : 'application/vnd.ms-powerpoint',
- '.ppa' : 'application/vnd.ms-powerpoint',
- '.ppm' : 'image/x-portable-pixmap',
- '.pps' : 'application/vnd.ms-powerpoint',
- '.ppt' : 'application/vnd.ms-powerpoint',
- '.ps' : 'application/postscript',
- '.pwz' : 'application/vnd.ms-powerpoint',
- '.py' : 'text/x-python',
+ '.ram' : 'application/x-pn-realaudio',
'.pyc' : 'application/x-python-code',
'.pyo' : 'application/x-python-code',
- '.qt' : 'video/quicktime',
- '.ra' : 'audio/x-pn-realaudio',
- '.ram' : 'application/x-pn-realaudio',
- '.ras' : 'image/x-cmu-raster',
- '.rdf' : 'application/xml',
- '.rgb' : 'image/x-rgb',
- '.roff' : 'application/x-troff',
- '.rtx' : 'text/richtext',
- '.sgm' : 'text/x-sgml',
- '.sgml' : 'text/x-sgml',
'.sh' : 'application/x-sh',
'.shar' : 'application/x-shar',
- '.snd' : 'audio/basic',
- '.so' : 'application/octet-stream',
- '.src' : 'application/x-wais-source',
+ '.swf' : 'application/x-shockwave-flash',
'.sv4cpio': 'application/x-sv4cpio',
'.sv4crc' : 'application/x-sv4crc',
- '.svg' : 'image/svg+xml',
- '.swf' : 'application/x-shockwave-flash',
- '.t' : 'application/x-troff',
'.tar' : 'application/x-tar',
'.tcl' : 'application/x-tcl',
'.tex' : 'application/x-tex',
'.texi' : 'application/x-texinfo',
'.texinfo': 'application/x-texinfo',
- '.tif' : 'image/tiff',
- '.tiff' : 'image/tiff',
+ '.roff' : 'application/x-troff',
+ '.t' : 'application/x-troff',
'.tr' : 'application/x-troff',
- '.tsv' : 'text/tab-separated-values',
- '.txt' : 'text/plain',
+ '.man' : 'application/x-troff-man',
+ '.me' : 'application/x-troff-me',
+ '.ms' : 'application/x-troff-ms',
'.ustar' : 'application/x-ustar',
- '.vcf' : 'text/x-vcard',
- '.wasm' : 'application/wasm',
- '.wav' : 'audio/x-wav',
- '.webm' : 'video/webm',
- '.wiz' : 'application/msword',
+ '.src' : 'application/x-wais-source',
+ '.xsl' : 'application/xml',
+ '.rdf' : 'application/xml',
'.wsdl' : 'application/xml',
- '.xbm' : 'image/x-xbitmap',
- '.xlb' : 'application/vnd.ms-excel',
- '.xls' : 'application/vnd.ms-excel',
- '.xml' : 'text/xml',
'.xpdl' : 'application/xml',
+ '.zip' : 'application/zip',
+ '.au' : 'audio/basic',
+ '.snd' : 'audio/basic',
+ '.mp3' : 'audio/mpeg',
+ '.mp2' : 'audio/mpeg',
+ '.aif' : 'audio/x-aiff',
+ '.aifc' : 'audio/x-aiff',
+ '.aiff' : 'audio/x-aiff',
+ '.ra' : 'audio/x-pn-realaudio',
+ '.wav' : 'audio/x-wav',
+ '.bmp' : 'image/bmp',
+ '.gif' : 'image/gif',
+ '.ief' : 'image/ief',
+ '.jpg' : 'image/jpeg',
+ '.jpe' : 'image/jpeg',
+ '.jpeg' : 'image/jpeg',
+ '.png' : 'image/png',
+ '.svg' : 'image/svg+xml',
+ '.tiff' : 'image/tiff',
+ '.tif' : 'image/tiff',
+ '.ico' : 'image/vnd.microsoft.icon',
+ '.ras' : 'image/x-cmu-raster',
+ '.bmp' : 'image/x-ms-bmp',
+ '.pnm' : 'image/x-portable-anymap',
+ '.pbm' : 'image/x-portable-bitmap',
+ '.pgm' : 'image/x-portable-graymap',
+ '.ppm' : 'image/x-portable-pixmap',
+ '.rgb' : 'image/x-rgb',
+ '.xbm' : 'image/x-xbitmap',
'.xpm' : 'image/x-xpixmap',
- '.xsl' : 'application/xml',
'.xwd' : 'image/x-xwindowdump',
- '.zip' : 'application/zip',
+ '.eml' : 'message/rfc822',
+ '.mht' : 'message/rfc822',
+ '.mhtml' : 'message/rfc822',
+ '.nws' : 'message/rfc822',
+ '.css' : 'text/css',
+ '.csv' : 'text/csv',
+ '.html' : 'text/html',
+ '.htm' : 'text/html',
+ '.txt' : 'text/plain',
+ '.bat' : 'text/plain',
+ '.c' : 'text/plain',
+ '.h' : 'text/plain',
+ '.ksh' : 'text/plain',
+ '.pl' : 'text/plain',
+ '.rtx' : 'text/richtext',
+ '.tsv' : 'text/tab-separated-values',
+ '.py' : 'text/x-python',
+ '.etx' : 'text/x-setext',
+ '.sgm' : 'text/x-sgml',
+ '.sgml' : 'text/x-sgml',
+ '.vcf' : 'text/x-vcard',
+ '.xml' : 'text/xml',
+ '.mp4' : 'video/mp4',
+ '.mpeg' : 'video/mpeg',
+ '.m1v' : 'video/mpeg',
+ '.mpa' : 'video/mpeg',
+ '.mpe' : 'video/mpeg',
+ '.mpg' : 'video/mpeg',
+ '.mov' : 'video/quicktime',
+ '.qt' : 'video/quicktime',
+ '.webm' : 'video/webm',
+ '.avi' : 'video/x-msvideo',
+ '.movie' : 'video/x-sgi-movie',
}
# These are non-standard types, commonly found in the wild. They will
# only match if strict=0 flag is given to the API methods.
# Please sort these too
- common_types = {
- '.jpg' : 'image/jpg',
- '.mid' : 'audio/midi',
+ common_types = _common_types_default = {
+ '.rtf' : 'application/rtf',
'.midi': 'audio/midi',
+ '.mid' : 'audio/midi',
+ '.jpg' : 'image/jpg',
+ '.pict': 'image/pict',
'.pct' : 'image/pict',
'.pic' : 'image/pict',
- '.pict': 'image/pict',
- '.rtf' : 'application/rtf',
- '.xul' : 'text/xul'
+ '.xul' : 'text/xul',
}
diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py
index c4b2fe2047..bfd5eeedaa 100644
--- a/Lib/test/test_mimetypes.py
+++ b/Lib/test/test_mimetypes.py
@@ -79,6 +79,57 @@ class MimeTypesTestCase(unittest.TestCase):
strict=True)
self.assertEqual(exts, ['.g3', '.g\xb3'])
+ def test_init_reinitializes(self):
+ # Issue 4936: make sure an init starts clean
+ # First, put some poison into the types table
+ mimetypes.add_type('foo/bar', '.foobar')
+ self.assertEqual(mimetypes.guess_extension('foo/bar'), '.foobar')
+ # Reinitialize
+ mimetypes.init()
+ # Poison should be gone.
+ self.assertEqual(mimetypes.guess_extension('foo/bar'), None)
+
+ def test_preferred_extension(self):
+ def check_extensions():
+ self.assertEqual(mimetypes.guess_extension('application/octet-stream'), '.bin')
+ self.assertEqual(mimetypes.guess_extension('application/postscript'), '.ps')
+ self.assertEqual(mimetypes.guess_extension('application/vnd.apple.mpegurl'), '.m3u')
+ self.assertEqual(mimetypes.guess_extension('application/vnd.ms-excel'), '.xls')
+ self.assertEqual(mimetypes.guess_extension('application/vnd.ms-powerpoint'), '.ppt')
+ self.assertEqual(mimetypes.guess_extension('application/x-texinfo'), '.texi')
+ self.assertEqual(mimetypes.guess_extension('application/x-troff'), '.roff')
+ self.assertEqual(mimetypes.guess_extension('application/xml'), '.xsl')
+ self.assertEqual(mimetypes.guess_extension('audio/mpeg'), '.mp3')
+ self.assertEqual(mimetypes.guess_extension('image/jpeg'), '.jpg')
+ self.assertEqual(mimetypes.guess_extension('image/tiff'), '.tiff')
+ self.assertEqual(mimetypes.guess_extension('message/rfc822'), '.eml')
+ self.assertEqual(mimetypes.guess_extension('text/html'), '.html')
+ self.assertEqual(mimetypes.guess_extension('text/plain'), '.txt')
+ self.assertEqual(mimetypes.guess_extension('video/mpeg'), '.mpeg')
+ self.assertEqual(mimetypes.guess_extension('video/quicktime'), '.mov')
+
+ check_extensions()
+ mimetypes.init()
+ check_extensions()
+
+ def test_init_stability(self):
+ mimetypes.init()
+
+ suffix_map = mimetypes.suffix_map
+ encodings_map = mimetypes.encodings_map
+ types_map = mimetypes.types_map
+ common_types = mimetypes.common_types
+
+ mimetypes.init()
+ self.assertIsNot(suffix_map, mimetypes.suffix_map)
+ self.assertIsNot(encodings_map, mimetypes.encodings_map)
+ self.assertIsNot(types_map, mimetypes.types_map)
+ self.assertIsNot(common_types, mimetypes.common_types)
+ self.assertEqual(suffix_map, mimetypes.suffix_map)
+ self.assertEqual(encodings_map, mimetypes.encodings_map)
+ self.assertEqual(types_map, mimetypes.types_map)
+ self.assertEqual(common_types, mimetypes.common_types)
+
def test_path_like_ob(self):
filename = "LICENSE.txt"
filepath = pathlib.Path(filename)
diff --git a/Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst b/Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst
new file mode 100644
index 0000000000..3b060052fd
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst
@@ -0,0 +1,2 @@
+Fixed non-deterministic behavior related to mimetypes extension mapping and
+module reinitialization.