1 files changed, 0 insertions, 457 deletions
diff --git a/chromium/docs/website/scripts/export.py b/chromium/docs/website/scripts/export.py
deleted file mode 100755
index 4e53aea81bd..00000000000
--- a/chromium/docs/website/scripts/export.py
+++ /dev/null
@@ -1,457 +0,0 @@
-#!/usr/bin/env vpython3
-# Copyright 2021 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Export www.chromium.org to local files.
-
-This script uses the Google GData and Google Sites APIs to extract the
-content from http://www.chromium.org/ and write it into local files
-that can be used to serve the same content.
-
-The APIs are documented at
-
-https://developers.google.com/sites/docs/1.0/developers_guide_protocol
-https://developers.google.com/gdata/docs/json
-
-Because www.chromium.org is a public site, this script requires no
-authentication to work.
-
-The exporting process attempts to convert the original content into
-sane modern HTML as much as possible without changing the appearance
-of any page significantly, with some minor exceptions.
-"""
-
-import argparse
-import collections
-import io
-import json
-import os
-import pdb
-import sys
-import time
-import traceback
-import xml.etree.ElementTree as ET
-
-from urllib.parse import urlparse
-from urllib.request import urlopen
-from urllib.error import HTTPError, URLError
-
-import yaml
-
-import common
-import html2markdown
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--force', action='store_true',
-                        help='ignore updated timestamps in local cache')
-    parser.add_argument('-j', '--jobs', type=int, default=common.cpu_count())
-    parser.add_argument('-t', '--test', action='store_true')
-    parser.add_argument('-r', '--raw', action='store_true')
-    parser.add_argument('-v', '--verbose', action='count')
-    parser.add_argument('--max_results', type=int, default=5000)
-    parser.add_argument('--start-index', type=int, default=1)
-    parser.add_argument('--paths-to-skip')
-    parser.add_argument('--path-list')
-    parser.add_argument('path', nargs='*')
-    args = parser.parse_args()
-
-    entries = _entries(args)
-
-    if args.path:
-        paths_to_export = ['%s%s' % ('/' if not path.startswith('/') else '',
-                                     path)
-                           for path in args.path]
-    elif args.path_list:
-        paths_to_export = common.read_paths(args.path_list)
-    else:
-        paths_to_export = []
-
-    if args.paths_to_skip:
-        paths_to_skip = set(common.read_paths(args.paths_to_skip))
-    else:
-        paths_to_skip = set(
-            common.read_paths(os.path.join(common.REPO_DIR,
-                                           'scripts', 'paths_to_skip.txt')))
-
-    max_input_mtime = max(os.stat(__file__).st_mtime,
-                          os.stat(common.__file__).st_mtime,
-                          os.stat(html2markdown.__file__).st_mtime)
-
-    updated = 0
-    paths = []
-
-    if args.test:
-        entry = _find_entry_by_path(paths_to_export[0], entries)
-        if entry:
-            metadata = _metadata(entry, entries)
-            path = _path(entry, entries)
-            _ = _handle_entry(path,
-                              (entry, metadata, max_input_mtime, args.force,
-                               args.raw))
-            content = common.read_text_file('%s%s/index.md' %
-                                            (common.SITE_DIR, path))
-            print(content)
-            return 0
-        else:
-            print('%s not found' % paths_to_export[0])
-            return 1
-
-    q = common.JobQueue(_handle_entry, args.jobs)
-
-    paths_to_export = set(paths_to_export)
-    exported_pages = set()
-    for i, entry in enumerate(list(entries.values())[:args.max_results]):
-        if entry['kind'] in ('webpage', 'listpage',
-                             'announcementspage', 'filecabinet'):
-            metadata = _metadata(entry, entries)
-            path = _path(entry, entries)
-
-            if path in paths_to_skip:
-                continue
-            exported_pages.add(path)
-        elif entry['kind'] == 'attachment':
-            metadata = {}
-            path = entry['url'].replace(
-                 'https://sites.google.com/a/chromium.org/dev/', '/').rstrip('/')
-            if path in paths_to_skip:
-                continue
-        else:
-            continue
-        if not paths_to_export or (path in paths_to_export):
-            q.request(path, (entry, metadata, max_input_mtime, args.force,
-                             False))
-
-    ret = 0
-    for path, res, did_update in q.results():
-        if res:
-            ret = 1
-        if did_update:
-            updated += 1
-
-    print('updated %d entries' % updated)
-    return ret
-
-
-def _find_entry_by_path(path, entries):
-    for entry in entries.values():
-        if entry['kind'] not in ('webpage', 'listpage',
-                                 'announcmentspage', 'filecabinet'):
-          continue
-        entry_path = _path(entry, entries)
-        if entry_path == path:
-          return entry
-    return None
-
-
-def _handle_entry(task, obj):
-    entry, metadata, max_input_mtime, force, raw = obj
-    err = ''
-    did_update = False
-
-    if not task.startswith('/'):
-        return 'malformed task', False
-
-    yaml.SafeDumper.org_represent_str = yaml.SafeDumper.represent_str
-
-    if task in (
-        '/developers/jinja',
-        '/developers/polymer-1-0',
-        '/devtools/breakpoints-tutorial/index.html',
-        '/devtools/breakpoints-tutorial/script.js',
-        ):
-        # TODO: Eleventy chokes on these files.
-        return '', False
-
-    def repr_str(dumper, data):
-        if '\n' in data:
-            return dumper.represent_scalar(u'tag:yaml.org,2002:str', data,
-                                           style='|')
-        return dumper.org_represent_str(data)
-
-    yaml.add_representer(str, repr_str, Dumper=yaml.SafeDumper)
-
-
-    mtime = _to_ts(entry['updated'])
-    target_mtime = max(mtime, max_input_mtime)
-    if entry['kind'] in ('webpage',
-                         'listpage',
-                         'announcementspage',
-                         'filecabinet'):
-        path = '%s%s/%s' % (common.SITE_DIR, task, 'index.md')
-        if _needs_update(path, target_mtime, force):
-            if raw:
-                content = entry['content']
-            else:
-                content_sio = io.StringIO(entry['content'])
-                md_sio = io.StringIO()
-                md_sio.write('---\n')
-                md_sio.write(yaml.safe_dump(metadata))
-                md_sio.write('---\n\n')
-                url_converter = _URLConverter()
-                html2markdown.Convert(content_sio, md_sio, url_converter)
-                if entry['kind'] == 'listpage':
-                    md_sio.write('\n\n')
-                    _write_listitems(md_sio, entry)
-                content = md_sio.getvalue()
-                content = content.replace(
-                    'chromium.googlesource.com/chromium/src/+/master/',
-                    'chromium.googlesource.com/chromium/src/+/HEAD/')
-                content = content.replace('    \b\b\b\b', '')
-
-            did_update = common.write_if_changed(path, content, mode='w')
-        else:
-            did_update = False
-    elif entry['kind'] == 'listitem':
-        # Handled as part of the corresponding 'listpage' entry.
-        pass
-    elif entry['kind'] == 'announcement':
-        # TODO: implement me.
-        pass
-    elif entry['kind'] == 'attachment':
-        path = '%s%s' % (common.SITE_DIR, task)
-        path = path.replace(':', '_')
-        path = path.replace('%20', ' ')
-        path = path.replace('%2B', '+')
-        if task in (
-            '/developers/design-documents/network-stack/cookiemonster/CM-method-calls-new.png',
-            '/developers/design-documents/cookie-split-loading/objects.png',
-        ):
-            # These are expected 404's that we ignore.
-            did_update = False
-        elif _needs_update(path, mtime, force):
-            try:
-                fp = urlopen(entry['url'])
-                content = fp.read()
-                did_update = common.write_if_changed(path, content)
-            except (HTTPError, URLError, TimeoutError) as e:
-                err = 'Error: %s' % e
-
-    elif entry['kind'] == 'comment':
-        # ignore comments in the migration
-        pass
-    elif entry['kind'] == 'tag':
-        err = 'tag kind not implemented'
-    else:
-        err = 'unknown kind %s' % entry['kind']
-
-    return err, did_update
-
-
-def _write_listitems(content, entry):
-    if not entry['listitems']:
-        return
-
-    headers = entry['listitems'][0].keys()
-    rows = sorted(entry['listitems'],
-                  key=lambda row: row.get('Release') or '')
-
-    content.write('<table>\n')
-    content.write('  <tr>\n')
-    for header in headers:
-        content.write('    <th>%s</th>\n' % header)
-    content.write('  </tr>\n')
-    for row in rows:
-        content.write('  <tr>\n')
-        for value in row.values():
-            if value and value.startswith('<a xmlns='):
-                value = value.replace(' xmlns="http://www.w3.org/1999/xhtml"', '')
-            content.write('    <td>%s</td>\n' % (value or ''))
-        content.write('  </tr>\n')
-    content.write('</table>\n')
-
-
-class _URLConverter:
-    def Translate(self, href):
-        if not href:
-            return ''
-
-        for path in common.alternates:
-            if href.startswith(path):
-                href = href.replace(path, '')
-
-        if href.startswith('/_/rsrc'):
-            href = '/' + '/'.join(href.split('/')[4:])
-
-        url = urlparse(href)
-        if '?' in href and url.netloc == '':
-            href = href[0:href.index('?')]
-        if 'Screenshot' in href:
-            head, tail = href.split('Screenshot')
-            tail = tail.replace(':', '%3A')
-            href = head + 'Screenshot' + tail
-        return href
-
-
-def _path(entry, entries):
-    path = entry['page_name']
-    parent_id = entry.get('parent_id')
-    while parent_id:
-        path = entries[parent_id]['page_name'] + '/' + path
-        parent_id = entries[parent_id].get('parent_id')
-
-    path = ('/' + path).rstrip('/') or '/'
-    return path
-
-
-def _metadata(entry, entries):
-    metadata = {}
-    metadata['page_name'] = entry['page_name']
-    metadata['title'] = entry['title']
-
-    crumbs = []
-    parent_id = entry.get('parent_id')
-    while parent_id:
-        parent = entries[parent_id]
-        path = _path(parent, entries)
-        title = parent['title']
-        crumbs = [[path, title]] + crumbs
-        parent_id = parent.get('parent_id')
-
-    metadata['breadcrumbs'] = crumbs
-
-    if metadata['page_name'] in (
-        'chromium-projects',
-        'chromium',
-    ):
-        metadata['use_title_as_h1'] = False
-
-    return metadata
-
-
-def _needs_update(path, mtime, force):
-    if force:
-        return True
-    if os.path.exists(path):
-        st = os.stat(path)
-        return mtime > st.st_mtime
-    return True
-
-
-def _entries(args):
-    entries = {}
-    parents = {}
-
-    # Looks like Sites probably caps results at 500 entries per request,
-    # even if we request more than that.
-    rownum = 0
-    url = ('https://sites.google.com/feeds/content/chromium.org/dev'
-           '?start-index=%d&max-results=%d&alt=json' %
-               (args.start_index, 500 - rownum))
-    doc, next_url = _fetch(url, args.force)
-
-    for rownum, entry in enumerate(doc['feed']['entry'], start=1):
-        row = _to_row(entry, rownum)
-        entries[row['id']] = row
-        if row.get('parent_id'):
-            parents.setdefault(row['parent_id'], set()).add(row['id'])
-    if args.verbose:
-        print(' ... [%d]' % rownum)
-    while next_url:
-        doc, next_url = _fetch(next_url, args.force)
-        for rownum, entry in enumerate(doc['feed']['entry'], start=rownum):
-            row = _to_row(entry, rownum)
-            entries[row['id']] = row
-            if row.get('parent_id'):
-              parents.setdefault(row['parent_id'], set()).add(row['id'])
-        if args.verbose:
-            print(' ... [%d]' % rownum)
-
-    for entry_id, entry in entries.items():
-        if entry['kind'] == 'listpage':
-            entry['listitems'] = [entries[child_id]['fields'] for child_id
-                                  in parents[entry_id]
-                                  if entries[child_id]['kind'] == 'listitem']
-
-    return entries
-
-
-def _fetch(url, force):
-    path = url.replace('https://sites.google.com/feeds/', 'scripts/feeds/')
-    if _needs_update(path, 0, force):
-        fp = urlopen(url)
-        content = fp.read()
-        doc = json.loads(content)
-        updated = _to_ts(doc['feed']['updated']['$t'])
-        common.write_if_changed(path, content)
-    else:
-        with open(path) as fp:
-            doc = json.load(fp)
-    next_url = _find_link(doc['feed'], 'next')
-    return doc, next_url
-
-
-def _find_link(doc, rel):
-    for ent in doc['link']:
-        if ent['rel'] == rel:
-            return ent['href']
-    return None
-
-
-def _to_row(entry, rownum):
-    row = {
-        'rownum': rownum,
-        'content': entry.get('content', {}).get('$t'),
-        'id': _to_id(entry['id']['$t']),
-        'kind': entry['category'][0]['label'],
-        'published': entry['published']['$t'],
-        'updated': entry['updated']['$t'],
-    }
-
-    row['page_name'] = entry.get('sites$pageName', {}).get('$t')
-    row['title'] = entry.get('title', {}).get('$t')
-    row['alt_url'] = _find_link(entry, 'alternate')
-
-    if row['kind'] == 'attachment':
-        row['url'] = _find_link(entry, 'alternate')
-    else:
-        row['url'] = _find_link(entry, 'self')
-
-    if row['kind'] == 'listitem':
-        path = row['url'].replace('https://sites.google.com',
-                                  os.path.join(common.REPO_DIR, 'scripts'))
-        if os.path.exists(path):
-          xml_content = common.read_text_file(path)
-        else:
-          print('fetching %s' % row['url'])
-          with urlopen(row['url']) as fp:
-            xml_content = fp.read()
-            common.write_if_changed(path, xml_content)
-
-        root = ET.fromstring(xml_content)
-        fields = root.findall('{http://schemas.google.com/spreadsheets/2006}field')
-        row['fields'] = collections.OrderedDict((el.attrib['name'], el.text) for el in fields)
-
-    parent_url = _find_link(entry,
-                            'http://schemas.google.com/sites/2008#parent')
-    if parent_url:
-        row['parent_id'] = _to_id(parent_url)
-    return row
-
-
-def _to_id(url):
-    return url[url.rfind('/') + 1:]
-
-
-def _to_ts(iso_time):
-    return time.mktime(time.strptime(iso_time, '%Y-%m-%dT%H:%M:%S.%fZ'))
-
-if __name__ == '__main__':
-    try:
-        main()
-    except Exception:
-        extype, value, tb = sys.exc_info()
-        traceback.print_exc()
-        pdb.post_mortem(tb)