diff options
author | Ian Bicking <ianb@colorstudy.com> | 2010-03-31 12:52:33 -0500 |
---|---|---|
committer | Ian Bicking <ianb@colorstudy.com> | 2010-03-31 12:52:33 -0500 |
commit | f57891686d88b9754017fe2aaf462e2549323b77 (patch) | |
tree | 0bea06e7937aa3d805df7c065354d7cf55d40fef /paste/util | |
parent | 40761dc8b9b71ba2788a4caa7a696709748eb2f5 (diff) | |
download | paste-f57891686d88b9754017fe2aaf462e2549323b77.tar.gz |
update mimeparse
Diffstat (limited to 'paste/util')
-rw-r--r-- | paste/util/mimeparse.py | 136 |
1 files changed, 39 insertions, 97 deletions
diff --git a/paste/util/mimeparse.py b/paste/util/mimeparse.py index 2523da1..0fd91e7 100644 --- a/paste/util/mimeparse.py +++ b/paste/util/mimeparse.py @@ -2,26 +2,25 @@ This module provides basic functions for handling mime-types. It can handle matching mime-types against a list of media-ranges. See section 14.1 of -the HTTP specification [RFC 2616] for a complete explaination. +the HTTP specification [RFC 2616] for a complete explanation. http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 Contents: - - parse_mime_type(): Parses a mime-type into it's component parts. + - parse_mime_type(): Parses a mime-type into its component parts. - parse_media_range(): Media-ranges are mime-types with wild-cards and a 'q' quality parameter. - quality(): Determines the quality ('q') of a mime-type when compared against a list of media-ranges. - quality_parsed(): Just like quality() except the second parameter must be pre-parsed. - best_match(): Choose the mime-type with the highest quality ('q') from a list of candidates. - - desired_matches(): Provide a list in order of server-desired priorities from a list of candidates. """ -__version__ = "0.1.1" +__version__ = "0.1.2" __author__ = 'Joe Gregorio' __email__ = "joe@bitworking.org" __credits__ = "" def parse_mime_type(mime_type): - """Carves up a mime_type and returns a tuple of the + """Carves up a mime-type and returns a tuple of the (type, subtype, params) where 'params' is a dictionary of all the parameters for the media range. For example, the media range 'application/xhtml;q=0.5' would @@ -32,21 +31,21 @@ def parse_mime_type(mime_type): parts = mime_type.split(";") params = dict([tuple([s.strip() for s in param.split("=")])\ for param in parts[1:] ]) - try: - (type, subtype) = parts[0].split("/") - except ValueError: - type, subtype = parts[0], '*' + full_type = parts[0].strip() + # Java URLConnection class sends an Accept header that includes a single "*" + # Turn it into a legal wildcard. + if full_type == '*': full_type = '*/*' + (type, subtype) = full_type.split("/") return (type.strip(), subtype.strip(), params) def parse_media_range(range): """Carves up a media range and returns a tuple of the (type, subtype, params) where 'params' is a dictionary of all the parameters for the media range. - - For example, the media range ``application/*;q=0.5`` would - get parsed into:: + For example, the media range 'application/*;q=0.5' would + get parsed into: - ('application', '*', {'q', '0.5'}) + ('application', '*', {'q', '0.5'}) In addition this function also guarantees that there is a value for 'q' in the params dictionary, filling it @@ -59,25 +58,24 @@ def parse_media_range(range): params['q'] = '1' return (type, subtype, params) -def quality_parsed(mime_type, parsed_ranges): - """Find the best match for a given mime_type against +def fitness_and_quality_parsed(mime_type, parsed_ranges): + """Find the best match for a given mime-type against a list of media_ranges that have already been - parsed by parse_media_range(). Returns the - 'q' quality parameter of the best match, 0 if no - match was found. This function bahaves the same as quality() - except that 'parsed_ranges' must be a list of - parsed media ranges. """ + parsed by parse_media_range(). Returns a tuple of + the fitness value and the value of the 'q' quality + parameter of the best match, or (-1, 0) if no match + was found. Just as for quality_parsed(), 'parsed_ranges' + must be a list of parsed media ranges. """ best_fitness = -1 - best_match = "" best_fit_q = 0 (target_type, target_subtype, target_params) =\ parse_media_range(mime_type) for (type, subtype, params) in parsed_ranges: - param_matches = sum([1 for (key, value) in \ - target_params.iteritems() if key != 'q' and \ - params.has_key(key) and value == params[key]]) - if (type == target_type or type == '*') and \ - (subtype == target_subtype or subtype == "*"): + if (type == target_type or type == '*' or target_type == '*') and \ + (subtype == target_subtype or subtype == '*' or target_subtype == '*'): + param_matches = reduce(lambda x, y: x+y, [1 for (key, value) in \ + target_params.iteritems() if key != 'q' and \ + params.has_key(key) and value == params[key]], 0) fitness = (type == target_type) and 100 or 0 fitness += (subtype == target_subtype) and 10 or 0 fitness += param_matches @@ -85,10 +83,20 @@ def quality_parsed(mime_type, parsed_ranges): best_fitness = fitness best_fit_q = params['q'] - return float(best_fit_q) - + return best_fitness, float(best_fit_q) + +def quality_parsed(mime_type, parsed_ranges): + """Find the best match for a given mime-type against + a list of media_ranges that have already been + parsed by parse_media_range(). Returns the + 'q' quality parameter of the best match, 0 if no + match was found. This function bahaves the same as quality() + except that 'parsed_ranges' must be a list of + parsed media ranges. """ + return fitness_and_quality_parsed(mime_type, parsed_ranges)[1] + def quality(mime_type, ranges): - """Returns the quality 'q' of a mime_type when compared + """Returns the quality 'q' of a mime-type when compared against the media-ranges in ranges. For example: >>> quality('text/html','text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5') @@ -109,73 +117,7 @@ def best_match(supported, header): 'text/xml' """ parsed_header = [parse_media_range(r) for r in header.split(",")] - weighted_matches = [(quality_parsed(mime_type, parsed_header), mime_type)\ + weighted_matches = [(fitness_and_quality_parsed(mime_type, parsed_header), mime_type)\ for mime_type in supported] weighted_matches.sort() - return weighted_matches[-1][0] and weighted_matches[-1][1] or '' - -def desired_matches(desired, header): - """Takes a list of desired mime-types in the order the server prefers to - send them regardless of the browsers preference. - - Browsers (such as Firefox) technically want XML over HTML depending on how - one reads the specification. This function is provided for a server to - declare a set of desired mime-types it supports, and returns a subset of - the desired list in the same order should each one be Accepted by the - browser. - - >>> sorted_match(['text/html', 'application/xml'], \ - ... 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png') - ['text/html', 'application/xml'] - >>> sorted_match(['text/html', 'application/xml'], 'application/xml,application/json') - ['application/xml'] - """ - matches = [] - parsed_ranges = [parse_media_range(r) for r in header.split(",")] - for mimetype in desired: - if quality_parsed(mimetype, parsed_ranges): - matches.append(mimetype) - return matches - -if __name__ == "__main__": - import unittest - - class TestMimeParsing(unittest.TestCase): - - def test_parse_media_range(self): - self.assert_(('application', 'xml', {'q': '1'}) == parse_media_range('application/xml;q=1')) - self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml')) - self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml;q=')) - self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml ; q=')) - self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=1;b=other')) - self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=2;b=other')) - - def test_rfc_2616_example(self): - accept = "text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5" - self.assertEqual(1, quality("text/html;level=1", accept)) - self.assertEqual(0.7, quality("text/html", accept)) - self.assertEqual(0.3, quality("text/plain", accept)) - self.assertEqual(0.5, quality("image/jpeg", accept)) - self.assertEqual(0.4, quality("text/html;level=2", accept)) - self.assertEqual(0.7, quality("text/html;level=3", accept)) - - def test_best_match(self): - mime_types_supported = ['application/xbel+xml', 'application/xml'] - # direct match - self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml'), 'application/xbel+xml') - # direct match with a q parameter - self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml; q=1'), 'application/xbel+xml') - # direct match of our second choice with a q parameter - self.assertEqual(best_match(mime_types_supported, 'application/xml; q=1'), 'application/xml') - # match using a subtype wildcard - self.assertEqual(best_match(mime_types_supported, 'application/*; q=1'), 'application/xml') - # match using a type wildcard - self.assertEqual(best_match(mime_types_supported, '*/*'), 'application/xml') - - mime_types_supported = ['application/xbel+xml', 'text/xml'] - # match using a type versus a lower weighted subtype - self.assertEqual(best_match(mime_types_supported, 'text/*;q=0.5,*/*; q=0.1'), 'text/xml') - # fail to match anything - self.assertEqual(best_match(mime_types_supported, 'text/html,application/atom+xml; q=0.9'), '') - - unittest.main() + return weighted_matches[-1][0][1] and weighted_matches[-1][1] or '' |