1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
#!/usr/bin/env python3
"""
Compare list of IANA media type registrations against ours.
Copyright (C) 2020 Ville Skyttä <ville.skytta@iki.fi>, GPL v2 or later.
"""
import argparse
import os
import re
from urllib.request import urlopen
import xml.etree.ElementTree as ET
MEDIA_TYPES_URL = \
"https://www.iana.org/assignments/media-types/media-types.xml"
MEDIA_TYPES_XMLNS = "{http://www.iana.org/assignments}"
FDO_XMLNS = "{http://www.freedesktop.org/standards/shared-mime-info}"
MIME_TYPE_RE = re.compile(
r"^[a-z0-9][\w!#$&^.+-]{0,126}/[a-z0-9][\w!#$&^.+-]{0,126}$", re.IGNORECASE
)
def compare_iana(iana_url: str, fdo_xml: str) -> None:
with urlopen(iana_url) as f:
tree = ET.parse(f)
iana_types = set()
iana_obsolete = set()
for elem in tree.findall(f"{MEDIA_TYPES_XMLNS}registry"):
main_type = elem.attrib["id"]
for elem in elem.findall(
f"{MEDIA_TYPES_XMLNS}record/{MEDIA_TYPES_XMLNS}name"):
whole_type = f"{main_type}/{elem.text}"
type_words = whole_type.split()
if any("OBSOLETE" in x or "DEPRECATED" in x for x in type_words):
mime_type = type_words[0]
iana_obsolete.add(mime_type)
else:
mime_type = whole_type
iana_types.add(mime_type)
assert MIME_TYPE_RE.match(mime_type), \
f"invalid IANA type '{mime_type}'"
assert iana_types
with open(fdo_xml, "rb") as f:
tree = ET.parse(f)
fdo_types = set()
fdo_aliases = set()
for elem in tree.findall(f"{FDO_XMLNS}mime-type"):
fdo_types.add(elem.attrib["type"])
for elem in elem.findall(f"{FDO_XMLNS}alias"):
fdo_aliases.add(elem.attrib["type"])
assert fdo_types
for mime_type in fdo_types:
assert MIME_TYPE_RE.match(mime_type), f"invalid FDO type '{mime_type}'"
assert fdo_aliases
for mime_type in fdo_aliases:
assert MIME_TYPE_RE.match(mime_type), \
f"invalid FDO alias '{mime_type}'"
fdo_types_lower = {x.lower(): x for x in fdo_types}
fdo_aliases_lower = {x.lower(): x for x in fdo_aliases}
for mime_type in sorted(iana_types):
if mime_type in fdo_types:
print(f"+ {mime_type} present with us")
elif mime_type in fdo_aliases:
print(f"~ {mime_type} present as alias with us")
elif mime_type.lower() in fdo_types_lower:
print(
f"~ {mime_type} present with us, case differs"
f": {fdo_types_lower[mime_type.lower()]}"
)
elif mime_type.lower() in fdo_aliases_lower:
print(
f"~ {mime_type} present as alias with us, case differs"
f": {fdo_aliases_lower[mime_type.lower()]}"
)
else:
print(f"- {mime_type} missing from us")
if __name__ == "__main__":
media_types_url = os.environ.get("MEDIA_TYPES_URL", MEDIA_TYPES_URL)
parser = argparse.ArgumentParser()
parser.add_argument(
"--media-types-url", type=str, default=media_types_url,
help=f"default ${{MEDIA_TYPES_URL-{MEDIA_TYPES_URL}}}"
)
parser.add_argument("FREEDESKTOP_ORG_XML", type=str)
args = parser.parse_args()
compare_iana(args.media_types_url, args.FREEDESKTOP_ORG_XML)
|