summaryrefslogtreecommitdiff
path: root/chromium/third_party/nasm/find_patches.py
blob: 3a9f87c03bdc45f64d5490470f3c604cef1c1352 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#!/usr/bin/env python
#
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
"""Usage: find_patches.py [origin_branch] [> patch_file]

This will find all changes in |origin_branch| that are not part of upstream,
and print a report.  It tries to include deleted lines, though these are
heuristic at best.  If |origin_branch| is omitted, it will default to HEAD.

Changes in the working directory are ignored.

Output will be written to stdout, so you probably want to redirect it.

For example, to generate the patches file for origin/merge-m68:
find_patches.py origin/merge-m68 > patches.68
"""

from __future__ import print_function
import collections
import os
import re
import sys
import subprocess

# What directory will we look for patches in?
# TODO(liberato): Should we find the root of the ffmpeg tree?
PATH = "."


def log(str):
  print("[%s]" % str, file=sys.stderr)


def run(command):
  """ Runs a command and returns stdout.

  Args:
    command: Array of argv[] entries. E.g., ["path_to_executable", "arg1", ...].

  Returns:
    stdout as a a string.
  """
  return subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0]


class PatchInfo:
  """ Structure to keep track of one patch in a diff.

  This class encapsulates how to handle inserted / deleted lines in a patch,
  mostly so that we can decide if we should apply "deleted lines only"
  processing to any them, to find what commit deleted them.  Because deleted
  lines result in an approximate search, we want to be reasonably sure that
  any deleted lines aren't actually just changes ("delete old, add new").
  """

  def __init__(self):
    # Does a diff insert any lines?
    self._does_insert = False
    # Set of lines that a diff deletes.
    self._deleted_lines = set()

  def record_inserted_line(self, line):
    """ Records that |line| was inserted as part of the patch.

    |line| is a string from the patch, e.g., "+ foo that was added;"
    """
    self._does_insert = True

  def record_deleted_line(self, line):
    """ Records that |line| was deleted as part of the patch.

    |line| is a string from the patch, e.g., "- foo that was removed;"
    """
    self._deleted_lines.add(line)

  def interesting_deleted_lines(self):
    """ Return the (possibly empty) set of deleted lines that we should track.

    In general, things that remove but also add probably are changes, and
    can be ignored as noise.  While, with perfect deleted line tracking,
    this wouldn't actually change the result, we really just do a text
    search for deleted lines later.  So, avoiding noise is good.

    Note that this is approximate -- a diff could have deleted and
    inserted lines near each other, but from different patches.  In other
    words, patch A could delete lines and patch B could add / change them.
    If those changes end up in the same diff block, then we'll miss A
    because of this test.  However, in practice, checking for both seems
    to remove some noise.
    """
    if self._deleted_lines and not self._does_insert:
      return self._deleted_lines
    return set()


def main(argv):
  # Origin branch that contains the patches we want to find.
  # Can specify, for example "origin/merge-m68" to get the  patches file for
  # that revision, regardless of the state of the working tree.
  if len(argv) > 1:
    origin_branch = argv[1]
  else:
    origin_branch = "HEAD"

  # Make sure that upstream is up-to-date, else many things will likely not
  # be reachable from it.  We don't do this if run as part of a script.
  if subprocess.call(["git", "fetch", "upstream"]):
    raise Exception("Could not fetch from upstream")

  write_patches_file(origin_branch, sys.stdout)


def write_patches_file(origin_branch, output_file):
  """Write the patches file for |origin_branch| to |output_file|."""
  # Get the latest upstream commit that's reachable from the origin branch.
  # We'll use that to compare against.
  upstream = run(["git", "merge-base", "upstream/master",
                  origin_branch]).strip()
  if not upstream:
    raise Exception("Could not find upstream commit")

  # "Everything reachable from |origin_branch| but not |upstream|".  In other
  # words, all and only chromium changes.  Note that there are non-chromium
  # authors here, since it will include cherry-picks to origin.
  revision_range = "%s..%s" % (upstream, origin_branch)

  log("Origin is %s" % origin_branch)
  log("Upstream is %s" % upstream)

  # Find diffs between the versions, excluding all files that are only on
  # origin.  We explicitly exclude .gitignore, since it exists in both places.
  # Ask for no context, since we ignore it anyway.
  diff = run([
      "git", "diff", "--diff-filter=a", "-U0", revision_range, PATH,
      ":!.gitignore"
  ])

  # Set of chromium patch sha1s we've seen.
  sha1s = set()
  # Map of sha1 to set of files that it affects.
  sha1ToFiles = collections.defaultdict(set)
  # Mapping of filename to set of lines that were deleted.
  files_to_deleted_lines = {}
  patch_info = PatchInfo()
  filename = None

  # Process each diff.  Include a dummy line to flush out the last diff.
  log("Scanning diffs between origin and upstream")
  for line in diff.splitlines() + ["+++ just to handle deleted lines properly"]:
    if line.startswith("+++"):
      # If the previous patch was delete-only, then we need to search for it
      # differently, since we don't get blame entries for deleted lines.
      # Add the set of deleted lines to this filename.
      deleted_lines = patch_info.interesting_deleted_lines()
      if deleted_lines:
        files_to_deleted_lines[filename] = deleted_lines

      # Update to the new filename.
      filename = line[6:]
      log("Checking diffs in %s" % filename)

      # Start of a new diff.  We don't know if it inserts / deletes lines.
      patch_info = PatchInfo()
    elif line.startswith("@@"):
      # @@ -linespec +linespec @@
      # linespec is either "line_number,number_of_lines" or "line_number".
      # Extract the "+linespec", which is what was added by |origin|.
      # If the number of lines is specified as 0, then it's a deletion only.
      # If the number of lines is unspecified, then it's 1.
      added_linespec = re.sub(r"^.*\+(.*) @@.*", r"\1", line)
      # Figure out the lines to blame.  This is just "starting_line,+number".
      if "," in added_linespec:
        # linespec is "line_number,number_of_lines"
        added_parts = added_linespec.split(",")
        # Skip if this is a deletion.
        if added_parts[1] == "0":
          continue
        blame_range = "%s,+%s" % (added_parts[0], added_parts[1])
      else:
        # One-line change
        blame_range = "%s,+1" % added_linespec

      blame = run([
          "git", "blame", "-l",
          "-L %s" % blame_range, revision_range, "--", filename
      ])

      # Collect sha1 lines, and create a mapping of files that is changed by
      # each sha1.
      for blame_line in blame.splitlines():
        sha1 = blame_line.split(" ", 1)[0]
        if sha1:
          sha1s.add(sha1)
          sha1ToFiles[sha1].add(filename)
    elif line.startswith("---"):
      # Do nothing.  Just avoid matching "---" when we check for "-"
      pass
    elif line.startswith("-"):
      # This diff does delete lines.
      patch_info.record_deleted_line(line[1:])
    elif line.startswith("+"):
      # This diff does insert lines.
      patch_info.record_inserted_line(line[1:])

  # For all files that have deleted lines, look for the sha1 that deleted them.
  # This is heuristic only; we're looking for "commits that contain some text".
  for filename, deleted_lines in files_to_deleted_lines.items():
    for deleted_line in deleted_lines:
      # Make sure that the deleted line is long enough to provide context.
      if len(deleted_line) < 4:
        continue

      log("Checking for deleted lines in %s" % filename)
      # Specify "--first-parent" so that we find commits on (presumably) origin.
      sha1 = run([
          "git", "log", "-1", revision_range, "--format=%H", "-S", deleted_line,
          origin_branch, "--", filename
      ]).strip()

      # Add the sha1 to the sets
      sha1s.add(sha1)
      sha1ToFiles[sha1].add(filename)

  # Look up dates from sha1 hashes.  We want to output them in a canonical order
  # so that we can diff easier.  Date order seems more convenient that sha1.
  log("Looking up sha1 dates to sort them")
  sha1_to_date = {}
  for sha1 in sha1s:
    date = run(["git", "log", "-1", "--format=%at", "%s" % sha1]).strip()
    sha1_to_date[sha1] = date

  # Print the patches file.
  log("Writing patch file")
  print(
      "---------------------------------------------------------------------",
      file=output_file)
  print(
      "-- Chromium Patches. Autogenerated by " + os.path.basename(__file__) +
      ", do not edit --",
      file=output_file)
  print(
      "---------------------------------------------------------------------",
      file=output_file)
  print("\n", file=output_file)
  wd = os.getcwd()
  for sha1, date in sorted(sha1_to_date.iteritems(), key=lambda (k, v): v):
    print(
        "------------------------------------------------------------------",
        file=output_file)
    for line in run(["git", "log", "-1", "%s" % sha1]).splitlines():
      print(line.rstrip(), file=output_file)
    print("\nAffects:", file=output_file)
    # TODO(liberato): maybe add the lines that were affected.
    for file in sorted(sha1ToFiles[sha1]):
      relfile = os.path.relpath(file, wd).replace('\\', '/')
      print("    " + relfile, file=output_file)
    print(file=output_file)

  log("Done")


if __name__ == "__main__":
  main(sys.argv)