summaryrefslogtreecommitdiff
path: root/tests/compare_iana.py
blob: 353732a5e3e40b1e4105c45e769e5d39e4dcee5e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3

"""
Compare list of IANA media type registrations against ours.
Copyright (C) 2020 Ville Skyttä <ville.skytta@iki.fi>, GPL v2 or later.
"""

import argparse
import os
import re
from urllib.request import urlopen
import xml.etree.ElementTree as ET


MEDIA_TYPES_URL = \
    "https://www.iana.org/assignments/media-types/media-types.xml"
MEDIA_TYPES_XMLNS = "{http://www.iana.org/assignments}"
FDO_XMLNS = "{http://www.freedesktop.org/standards/shared-mime-info}"

MIME_TYPE_RE = re.compile(
    r"^[a-z0-9][\w!#$&^.+-]{0,126}/[a-z0-9][\w!#$&^.+-]{0,126}$", re.IGNORECASE
)


def compare_iana(iana_url: str, fdo_xml: str) -> None:

    with urlopen(iana_url) as f:
        tree = ET.parse(f)

    iana_types = set()
    iana_obsolete = set()
    for elem in tree.findall(f"{MEDIA_TYPES_XMLNS}registry"):
        main_type = elem.attrib["id"]
        for elem in elem.findall(
                f"{MEDIA_TYPES_XMLNS}record/{MEDIA_TYPES_XMLNS}name"):
            whole_type = f"{main_type}/{elem.text}"
            type_words = whole_type.split()
            if any("OBSOLETE" in x or "DEPRECATED" in x for x in type_words):
                mime_type = type_words[0]
                iana_obsolete.add(mime_type)
            else:
                mime_type = whole_type
                iana_types.add(mime_type)
            assert MIME_TYPE_RE.match(mime_type), \
                f"invalid IANA type '{mime_type}'"
    assert iana_types

    with open(fdo_xml, "rb") as f:
        tree = ET.parse(f)

    fdo_types = set()
    fdo_aliases = set()
    for elem in tree.findall(f"{FDO_XMLNS}mime-type"):
        fdo_types.add(elem.attrib["type"])
        for elem in elem.findall(f"{FDO_XMLNS}alias"):
            fdo_aliases.add(elem.attrib["type"])
    assert fdo_types
    for mime_type in fdo_types:
        assert MIME_TYPE_RE.match(mime_type), f"invalid FDO type '{mime_type}'"
    assert fdo_aliases
    for mime_type in fdo_aliases:
        assert MIME_TYPE_RE.match(mime_type), \
            f"invalid FDO alias '{mime_type}'"

    fdo_types_lower = {x.lower(): x for x in fdo_types}
    fdo_aliases_lower = {x.lower(): x for x in fdo_aliases}

    for mime_type in sorted(iana_types):
        if mime_type in fdo_types:
            print(f"+ {mime_type} present with us")
        elif mime_type in fdo_aliases:
            print(f"~ {mime_type} present as alias with us")
        elif mime_type.lower() in fdo_types_lower:
            print(
                f"~ {mime_type} present with us, case differs"
                f": {fdo_types_lower[mime_type.lower()]}"
            )
        elif mime_type.lower() in fdo_aliases_lower:
            print(
                f"~ {mime_type} present as alias with us, case differs"
                f": {fdo_aliases_lower[mime_type.lower()]}"
            )
        else:
            print(f"- {mime_type} missing from us")


if __name__ == "__main__":
    media_types_url = os.environ.get("MEDIA_TYPES_URL", MEDIA_TYPES_URL)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--media-types-url", type=str, default=media_types_url,
        help=f"default ${{MEDIA_TYPES_URL-{MEDIA_TYPES_URL}}}"
    )
    parser.add_argument("FREEDESKTOP_ORG_XML", type=str)
    args = parser.parse_args()

    compare_iana(args.media_types_url, args.FREEDESKTOP_ORG_XML)