update mimeparse

author: Ian Bicking <ianb@colorstudy.com> 2010-03-31 12:52:33 -0500
committer: Ian Bicking <ianb@colorstudy.com> 2010-03-31 12:52:33 -0500
commit: f57891686d88b9754017fe2aaf462e2549323b77 (patch)
tree: 0bea06e7937aa3d805df7c065354d7cf55d40fef /paste/util
parent: 40761dc8b9b71ba2788a4caa7a696709748eb2f5 (diff)
download: paste-f57891686d88b9754017fe2aaf462e2549323b77.tar.gz
1 files changed, 39 insertions, 97 deletions
diff --git a/paste/util/mimeparse.py b/paste/util/mimeparse.py
index 2523da1..0fd91e7 100644
--- a/paste/util/mimeparse.py
+++ b/paste/util/mimeparse.py
@@ -2,26 +2,25 @@
 
 This module provides basic functions for handling mime-types. It can handle
 matching mime-types against a list of media-ranges. See section 14.1 of 
-the HTTP specification [RFC 2616] for a complete explaination.
+the HTTP specification [RFC 2616] for a complete explanation.
 
    http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1
 
 Contents:
-    - parse_mime_type():   Parses a mime-type into it's component parts.
+    - parse_mime_type():   Parses a mime-type into its component parts.
     - parse_media_range(): Media-ranges are mime-types with wild-cards and a 'q' quality parameter.
     - quality():           Determines the quality ('q') of a mime-type when compared against a list of media-ranges.
     - quality_parsed():    Just like quality() except the second parameter must be pre-parsed.
     - best_match():        Choose the mime-type with the highest quality ('q') from a list of candidates. 
-    - desired_matches():   Provide a list in order of server-desired priorities from a list of candidates.
 """
 
-__version__ = "0.1.1"
+__version__ = "0.1.2"
 __author__ = 'Joe Gregorio'
 __email__ = "joe@bitworking.org"
 __credits__ = ""
 
 def parse_mime_type(mime_type):
-    """Carves up a mime_type and returns a tuple of the
+    """Carves up a mime-type and returns a tuple of the
        (type, subtype, params) where 'params' is a dictionary
        of all the parameters for the media range.
        For example, the media range 'application/xhtml;q=0.5' would
@@ -32,21 +31,21 @@ def parse_mime_type(mime_type):
     parts = mime_type.split(";")
     params = dict([tuple([s.strip() for s in param.split("=")])\
             for param in parts[1:] ])
-    try:
-        (type, subtype) = parts[0].split("/")
-    except ValueError:
-        type, subtype = parts[0], '*'
+    full_type = parts[0].strip()
+    # Java URLConnection class sends an Accept header that includes a single "*"
+    # Turn it into a legal wildcard.
+    if full_type == '*': full_type = '*/*'
+    (type, subtype) = full_type.split("/")
     return (type.strip(), subtype.strip(), params)
 
 def parse_media_range(range):
     """Carves up a media range and returns a tuple of the
        (type, subtype, params) where 'params' is a dictionary
        of all the parameters for the media range.
-       
-       For example, the media range ``application/*;q=0.5`` would
-       get parsed into::
+       For example, the media range 'application/*;q=0.5' would
+       get parsed into:
 
-         ('application', '*', {'q', '0.5'})
+       ('application', '*', {'q', '0.5'})
 
        In addition this function also guarantees that there 
        is a value for 'q' in the params dictionary, filling it
@@ -59,25 +58,24 @@ def parse_media_range(range):
         params['q'] = '1'
     return (type, subtype, params)
 
-def quality_parsed(mime_type, parsed_ranges):
-    """Find the best match for a given mime_type against 
+def fitness_and_quality_parsed(mime_type, parsed_ranges):
+    """Find the best match for a given mime-type against 
        a list of media_ranges that have already been 
-       parsed by parse_media_range(). Returns the 
-       'q' quality parameter of the best match, 0 if no
-       match was found. This function bahaves the same as quality()
-       except that 'parsed_ranges' must be a list of
-       parsed media ranges. """
+       parsed by parse_media_range(). Returns a tuple of
+       the fitness value and the value of the 'q' quality
+       parameter of the best match, or (-1, 0) if no match
+       was found. Just as for quality_parsed(), 'parsed_ranges'
+       must be a list of parsed media ranges. """
     best_fitness = -1 
-    best_match = ""
     best_fit_q = 0
     (target_type, target_subtype, target_params) =\
             parse_media_range(mime_type)
     for (type, subtype, params) in parsed_ranges:
-        param_matches = sum([1 for (key, value) in \
-                target_params.iteritems() if key != 'q' and \
-                params.has_key(key) and value == params[key]])
-        if (type == target_type or type == '*') and \
-                (subtype == target_subtype or subtype == "*"):
+        if (type == target_type or type == '*' or target_type == '*') and \
+                (subtype == target_subtype or subtype == '*' or target_subtype == '*'):
+            param_matches = reduce(lambda x, y: x+y, [1 for (key, value) in \
+                    target_params.iteritems() if key != 'q' and \
+                    params.has_key(key) and value == params[key]], 0)
             fitness = (type == target_type) and 100 or 0
             fitness += (subtype == target_subtype) and 10 or 0
             fitness += param_matches
@@ -85,10 +83,20 @@ def quality_parsed(mime_type, parsed_ranges):
                 best_fitness = fitness
                 best_fit_q = params['q']
             
-    return float(best_fit_q)
-    
+    return best_fitness, float(best_fit_q)
+
+def quality_parsed(mime_type, parsed_ranges):
+    """Find the best match for a given mime-type against
+    a list of media_ranges that have already been
+    parsed by parse_media_range(). Returns the
+    'q' quality parameter of the best match, 0 if no
+    match was found. This function bahaves the same as quality()
+    except that 'parsed_ranges' must be a list of
+    parsed media ranges. """
+    return fitness_and_quality_parsed(mime_type, parsed_ranges)[1]
+
 def quality(mime_type, ranges):
-    """Returns the quality 'q' of a mime_type when compared
+    """Returns the quality 'q' of a mime-type when compared
     against the media-ranges in ranges. For example:
 
     >>> quality('text/html','text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5')
@@ -109,73 +117,7 @@ def best_match(supported, header):
     'text/xml'
     """
     parsed_header = [parse_media_range(r) for r in header.split(",")]
-    weighted_matches = [(quality_parsed(mime_type, parsed_header), mime_type)\
+    weighted_matches = [(fitness_and_quality_parsed(mime_type, parsed_header), mime_type)\
             for mime_type in supported]
     weighted_matches.sort()
-    return weighted_matches[-1][0] and weighted_matches[-1][1] or ''
-
-def desired_matches(desired, header):
-    """Takes a list of desired mime-types in the order the server prefers to
-    send them regardless of the browsers preference.
-    
-    Browsers (such as Firefox) technically want XML over HTML depending on how
-    one reads the specification. This function is provided for a server to 
-    declare a set of desired mime-types it supports, and returns a subset of 
-    the desired list in the same order should each one be Accepted by the
-    browser.
-    
-    >>> sorted_match(['text/html', 'application/xml'], \
-    ...     'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png')
-    ['text/html', 'application/xml']
-    >>> sorted_match(['text/html', 'application/xml'], 'application/xml,application/json')
-    ['application/xml']
-    """
-    matches = []
-    parsed_ranges = [parse_media_range(r) for r in header.split(",")]
-    for mimetype in desired:
-        if quality_parsed(mimetype, parsed_ranges):
-            matches.append(mimetype)
-    return matches
-
-if __name__ == "__main__":
-    import unittest
-
-    class TestMimeParsing(unittest.TestCase):
-
-        def test_parse_media_range(self):
-            self.assert_(('application', 'xml', {'q': '1'}) == parse_media_range('application/xml;q=1'))
-            self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml'))
-            self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml;q='))
-            self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml ; q='))
-            self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=1;b=other'))
-            self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=2;b=other'))
-
-        def test_rfc_2616_example(self):
-            accept = "text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5"
-            self.assertEqual(1, quality("text/html;level=1", accept))
-            self.assertEqual(0.7, quality("text/html", accept))
-            self.assertEqual(0.3, quality("text/plain", accept))
-            self.assertEqual(0.5, quality("image/jpeg", accept))
-            self.assertEqual(0.4, quality("text/html;level=2", accept))
-            self.assertEqual(0.7, quality("text/html;level=3", accept))
-
-        def test_best_match(self):
-            mime_types_supported = ['application/xbel+xml', 'application/xml']
-            # direct match
-            self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml'), 'application/xbel+xml')
-            # direct match with a q parameter
-            self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml; q=1'), 'application/xbel+xml')
-            # direct match of our second choice with a q parameter
-            self.assertEqual(best_match(mime_types_supported, 'application/xml; q=1'), 'application/xml')
-            # match using a subtype wildcard
-            self.assertEqual(best_match(mime_types_supported, 'application/*; q=1'), 'application/xml')
-            # match using a type wildcard
-            self.assertEqual(best_match(mime_types_supported, '*/*'), 'application/xml')
-
-            mime_types_supported = ['application/xbel+xml', 'text/xml']
-            # match using a type versus a lower weighted subtype
-            self.assertEqual(best_match(mime_types_supported, 'text/*;q=0.5,*/*; q=0.1'), 'text/xml')
-            # fail to match anything
-            self.assertEqual(best_match(mime_types_supported, 'text/html,application/atom+xml; q=0.9'), '')
-
-    unittest.main() 
+    return weighted_matches[-1][0][1] and weighted_matches[-1][1] or ''
author	Ian Bicking <ianb@colorstudy.com>	2010-03-31 12:52:33 -0500
committer	Ian Bicking <ianb@colorstudy.com>	2010-03-31 12:52:33 -0500
commit	f57891686d88b9754017fe2aaf462e2549323b77 (patch)
tree	0bea06e7937aa3d805df7c065354d7cf55d40fef /paste/util
parent	40761dc8b9b71ba2788a4caa7a696709748eb2f5 (diff)
download	paste-f57891686d88b9754017fe2aaf462e2549323b77.tar.gz