summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJelmer Vernooij <jelmer@jelmer.uk>2018-03-30 18:24:19 +0100
committerJelmer Vernooij <jelmer@jelmer.uk>2018-03-30 18:24:19 +0100
commitfe5b3d2b2d1f4298342c1b1a1d5631dbbf9270ce (patch)
treedba1340fb42fcf39acf3d9e05685ae2d5dd15ee8
parentb65623b0ddba5a4ec99d5576e4888fcc1200de97 (diff)
downloadpython-fastimport-git-fe5b3d2b2d1f4298342c1b1a1d5631dbbf9270ce.tar.gz
Add fast-import-{filter,query,info} scripts.
-rw-r--r--NEWS3
-rwxr-xr-xbin/fast-import-filter100
-rwxr-xr-xbin/fast-import-info53
-rwxr-xr-xbin/fast-import-query77
-rw-r--r--fastimport/helpers.py69
-rw-r--r--fastimport/processors/info_processor.py286
-rw-r--r--fastimport/reftracker.py68
-rw-r--r--fastimport/tests/__init__.py1
-rw-r--r--fastimport/tests/test_info_processor.py77
-rwxr-xr-xsetup.py5
10 files changed, 739 insertions, 0 deletions
diff --git a/NEWS b/NEWS
index 35542e2..ef200d4 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,9 @@
* Don't attempt to encode bytestrings in utf8_bytes_helper().
(Jelmer Vernooij, #1647101)
+ * Add fast-import-filter, fast-import-query and fast-import-info
+ script. (Jelmer Vernooij)
+
0.9.6 2016-04-19
* Add python3.4 support (Jelmer Vernooij)
diff --git a/bin/fast-import-filter b/bin/fast-import-filter
new file mode 100755
index 0000000..03dbd01
--- /dev/null
+++ b/bin/fast-import-filter
@@ -0,0 +1,100 @@
+#!/usr/bin/python
+
+__doc__ = """Filter a fast-import stream to include/exclude files & directories.
+
+This command is useful for splitting a subdirectory or bunch of
+files out from a project to create a new project complete with history
+for just those files. It can also be used to create a new project
+repository that removes all references to files that should not have
+been committed, e.g. security-related information (like passwords),
+commercially sensitive material, files with an incompatible license or
+large binary files like CD images.
+
+To specify standard input as the input stream, use a source name
+of '-'. If the source name ends in '.gz', it is assumed to be
+compressed in gzip format.
+
+:File/directory filtering:
+
+ This is supported by the -i and -x options. Excludes take precedence
+ over includes.
+
+ When filtering out a subdirectory (or file), the new stream uses the
+ subdirectory (or subdirectory containing the file) as the root. As
+ fast-import doesn't know in advance whether a path is a file or
+ directory in the stream, you need to specify a trailing '/' on
+ directories passed to the `--includes option`. If multiple files or
+ directories are given, the new root is the deepest common directory.
+
+ Note: If a path has been renamed, take care to specify the *original*
+ path name, not the final name that it ends up with.
+
+:History rewriting:
+
+ By default fast-import-filter does quite aggressive history rewriting.
+ Empty commits (or commits which had all their content filtered out) will
+ be removed, and so are the references to commits not included in the stream.
+
+ Flag --dont-squash-empty-commits reverses this behavior and makes it possible to
+ use fast-import-filter on incremental streams.
+
+:Examples:
+
+ Create a new project from a library (note the trailing / on the
+ directory name of the library)::
+
+ front-end | fast-import-filter -i lib/xxx/ > xxx.fi
+ fast-import xxx.fi mylibrary.bzr
+ (lib/xxx/foo is now foo)
+
+ Create a new repository without a sensitive file::
+
+ front-end | fast-import-filter -x missile-codes.txt > clean.fi
+ fast-import clean.fi clean.bzr
+"""
+
+import optparse
+import sys
+
+parser = optparse.OptionParser('fast-import-filter [options] SOURCE?')
+
+parser.add_option('-v', '--verbose', dest="verbose", action="store_true",
+ help="Be verbose.", default=False)
+parser.add_option('-i', '--include-paths', dest="include_paths",
+ action="append", type=str,
+ help="Only include commits affecting these paths."
+ " Directories should have a trailing /.")
+parser.add_option('-x', '--exclude-paths', dest="exclude_paths",
+ type=str, help="Exclude these paths from commits.")
+parser.add_option('--dont-squash-empty-commits',
+ dest="dont_squash_empty_commits", action="store_true",
+ help="Preserve all commits and links between them",
+ default=False)
+
+(opts, args) = parser.parse_args()
+
+if len(args) == 0:
+ source_path = "-"
+elif len(args) == 1:
+ source_path = args[0]
+else:
+ parser.print_usage()
+
+from fastimport.processors import filter_processor
+params = {
+ 'include_paths': opts.include_paths,
+ 'exclude_paths': opts.exclude_paths,
+ }
+params['squash_empty_commits'] = (not opts.dont_squash_empty_commits)
+
+from fastimport.errors import ParsingError
+from fastimport import parser
+from fastimport.helpers import get_source_stream
+stream = get_source_stream(source_path)
+proc = filter_processor.FilterProcessor(params=params, verbose=opts.verbose)
+p = parser.ImportParser(stream, verbose=opts.verbose)
+try:
+ sys.exit(proc.process(p.iter_commands))
+except ParsingError as e:
+ sys.stderr.write("%d: Parse error: %s\n" % (e.lineno, e))
+ sys.exit(1)
diff --git a/bin/fast-import-info b/bin/fast-import-info
new file mode 100755
index 0000000..6f67443
--- /dev/null
+++ b/bin/fast-import-info
@@ -0,0 +1,53 @@
+#!/usr/bin/python
+__doc__ = """Output information about a fast-import stream.
+
+This command reads a fast-import stream and outputs
+statistics and interesting properties about what it finds.
+When run in verbose mode, the information is output as a
+configuration file that can be passed to fast-import to
+assist it in intelligently caching objects.
+
+To specify standard input as the input stream, use a source name
+of '-'. If the source name ends in '.gz', it is assumed to be
+compressed in gzip format.
+
+:Examples:
+
+ Display statistics about the import stream produced by front-end::
+
+ front-end | fast-import-info -
+
+ Create a hints file for running fast-import on a large repository::
+
+ front-end | fast-import-info -v - > front-end.cfg
+"""
+
+import optparse
+import sys
+
+parser = optparse.OptionParser('fast-import-info [options] SOURCE')
+
+parser.add_option('-v', '--verbose', dest="verbose",
+ help="Be verbose.")
+
+(options, args) = parser.parse_args()
+
+if len(args) == 0:
+ source_path = "-"
+elif len(args) == 1:
+ source_path = args[0]
+else:
+ parser.print_usage()
+
+from fastimport.processors import info_processor
+from fastimport.errors import ParsingError
+from fastimport.helpers import get_source_stream
+from fastimport import parser
+stream = get_source_stream(source_path)
+proc = info_processor.InfoProcessor(verbose=options.verbose)
+p = parser.ImportParser(stream, verbose=options.verbose)
+try:
+ sys.exit(proc.process(p.iter_commands))
+except ParsingError as e:
+ sys.stderr.write("%d: Parse error: %s\n" % (e.lineno, e))
+ sys.exit(1)
diff --git a/bin/fast-import-query b/bin/fast-import-query
new file mode 100755
index 0000000..6be68c4
--- /dev/null
+++ b/bin/fast-import-query
@@ -0,0 +1,77 @@
+#!/usr/bin/python
+__doc__ = """Query a fast-import stream displaying selected commands.
+
+To specify standard input as the input stream, use a source name
+of '-'. If the source name ends in '.gz', it is assumed to be
+compressed in gzip format.
+
+To specify a commit to display, give its mark using the
+--commit-mark option. The commit will be displayed with
+file-commands included but with inline blobs hidden.
+
+To specify the commands to display, use the -C option one or
+more times. To specify just some fields for a command, use the
+syntax::
+
+ command=field1,...
+
+By default, the nominated fields for the nominated commands
+are displayed tab separated. To see the information in
+a name:value format, use verbose mode.
+
+Note: Binary fields (e.g. data for blobs) are masked out
+so it is generally safe to view the output in a terminal.
+
+:Examples:
+
+ Show the commit with mark 429::
+
+ fast-import-query xxx.fi -m429
+
+ Show all the fields of the reset and tag commands::
+
+ fast-import-query xxx.fi -Creset -Ctag
+
+ Show the mark and merge fields of the commit commands::
+
+ fast-import-query xxx.fi -Ccommit=mark,merge
+"""
+
+import optparse
+import sys
+
+parser = optparse.OptionParser('fast-import-query [options] SOURCE?')
+
+parser.add_option('-v', '--verbose', dest="verbose",
+ action="store_true", help="Be verbose")
+parser.add_option('-m', '--commit-mark', dest="commit_mark",
+ type=str, help="Mark of the commit to display.")
+parser.add_option('-C', '--commands', type=str,
+ help="Display fields for these commands.")
+
+(opts, args) = parser.parse_args()
+
+if len(args) == 0:
+ source_path = "-"
+elif len(args) == 1:
+ source_path = args[0]
+else:
+ parser.print_usage()
+
+from fastimport.processors import query_processor
+from fastimport.helpers import defines_to_dict, get_source_stream
+from fastimport.errors import ParsingError
+from fastimport import parser
+
+params = defines_to_dict(opts.commands) or {}
+if opts.commit_mark:
+ params['commit-mark'] = opts.commit_mark
+
+stream = get_source_stream(source_path)
+proc = query_processor.QueryProcessor(verbose=opts.verbose, params=params)
+p = parser.ImportParser(stream, verbose=opts.verbose)
+try:
+ sys.exit(proc.process(p.iter_commands))
+except ParsingError as e:
+ sys.stderr.write("%d: Parse error: %s\n" % (e.lineno, e))
+ sys.exit(1)
diff --git a/fastimport/helpers.py b/fastimport/helpers.py
index abb7014..67072be 100644
--- a/fastimport/helpers.py
+++ b/fastimport/helpers.py
@@ -194,3 +194,72 @@ class newobject(object):
Hook for the future.utils.native() function
"""
return object(self)
+
+
+def binary_stream(stream):
+ """Ensure a stream is binary on Windows.
+
+ :return: the stream
+ """
+ try:
+ import os
+ if os.name == 'nt':
+ fileno = getattr(stream, 'fileno', None)
+ if fileno:
+ no = fileno()
+ if no >= 0: # -1 means we're working as subprocess
+ import msvcrt
+ msvcrt.setmode(no, os.O_BINARY)
+ except ImportError:
+ pass
+ return stream
+
+
+def invert_dictset(d):
+ """Invert a dictionary with keys matching a set of values, turned into lists."""
+ # Based on recipe from ASPN
+ result = {}
+ for k, c in d.items():
+ for v in c:
+ keys = result.setdefault(v, [])
+ keys.append(k)
+ return result
+
+
+def invert_dict(d):
+ """Invert a dictionary with keys matching each value turned into a list."""
+ # Based on recipe from ASPN
+ result = {}
+ for k, v in d.items():
+ keys = result.setdefault(v, [])
+ keys.append(k)
+ return result
+
+
+def defines_to_dict(defines):
+ """Convert a list of definition strings to a dictionary."""
+ if defines is None:
+ return None
+ result = {}
+ for define in defines:
+ kv = define.split('=', 1)
+ if len(kv) == 1:
+ result[define.strip()] = 1
+ else:
+ result[kv[0].strip()] = kv[1].strip()
+ return result
+
+
+def get_source_stream(source):
+ if source == '-' or source is None:
+ import sys
+ stream = binary_stream(sys.stdin)
+ elif source.endswith('.gz'):
+ import gzip
+ stream = gzip.open(source, "rb")
+ else:
+ stream = open(source, "rb")
+ return stream
+
+
+
diff --git a/fastimport/processors/info_processor.py b/fastimport/processors/info_processor.py
new file mode 100644
index 0000000..28c7300
--- /dev/null
+++ b/fastimport/processors/info_processor.py
@@ -0,0 +1,286 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Import processor that dump stats about the input (and doesn't import)."""
+
+from __future__ import absolute_import
+
+from .. import (
+ reftracker,
+ )
+from ..helpers import (
+ invert_dict,
+ invert_dictset,
+ )
+from fastimport import (
+ commands,
+ processor,
+ )
+import stat
+
+
+class InfoProcessor(processor.ImportProcessor):
+ """An import processor that dumps statistics about the input.
+
+ No changes to the current repository are made.
+
+ As well as providing useful information about an import
+ stream before importing it, this processor is useful for
+ benchmarking the speed at which data can be extracted from
+ the source.
+ """
+
+ def __init__(self, params=None, verbose=0, outf=None):
+ processor.ImportProcessor.__init__(self, params, verbose,
+ outf=outf)
+
+ def pre_process(self):
+ # Init statistics
+ self.cmd_counts = {}
+ for cmd in commands.COMMAND_NAMES:
+ self.cmd_counts[cmd] = 0
+ self.file_cmd_counts = {}
+ for fc in commands.FILE_COMMAND_NAMES:
+ self.file_cmd_counts[fc] = 0
+ self.parent_counts = {}
+ self.max_parent_count = 0
+ self.committers = set()
+ self.separate_authors_found = False
+ self.symlinks_found = False
+ self.executables_found = False
+ self.sha_blob_references = False
+ self.lightweight_tags = 0
+ # Blob usage tracking
+ self.blobs = {}
+ for usage in ['new', 'used', 'unknown', 'unmarked']:
+ self.blobs[usage] = set()
+ self.blob_ref_counts = {}
+ # Head tracking
+ self.reftracker = reftracker.RefTracker()
+ # Stuff to cache: a map from mark to # of times that mark is merged
+ self.merges = {}
+ # Stuff to cache: these are maps from mark to sets
+ self.rename_old_paths = {}
+ self.copy_source_paths = {}
+
+ def post_process(self):
+ # Dump statistics
+ cmd_names = commands.COMMAND_NAMES
+ fc_names = commands.FILE_COMMAND_NAMES
+ self._dump_stats_group("Command counts",
+ [(c.decode('utf-8'), self.cmd_counts[c]) for c in cmd_names], str)
+ self._dump_stats_group("File command counts",
+ [(c.decode('utf-8'), self.file_cmd_counts[c]) for c in fc_names], str)
+
+ # Commit stats
+ if self.cmd_counts[b'commit']:
+ p_items = []
+ for i in range(self.max_parent_count + 1):
+ if i in self.parent_counts:
+ count = self.parent_counts[i]
+ p_items.append(("parents-%d" % i, count))
+ merges_count = len(self.merges)
+ p_items.append(('total revisions merged', merges_count))
+ flags = {
+ 'separate authors found': self.separate_authors_found,
+ 'executables': self.executables_found,
+ 'symlinks': self.symlinks_found,
+ 'blobs referenced by SHA': self.sha_blob_references,
+ }
+ self._dump_stats_group("Parent counts", p_items, str)
+ self._dump_stats_group("Commit analysis", sorted(flags.items()), _found)
+ heads = invert_dictset(self.reftracker.heads)
+ self._dump_stats_group(
+ "Head analysis",
+ [(k.decode('utf-8'),
+ ', '.join([m.decode('utf-8') for m in v]))
+ for (k, v) in heads.items()], None,
+ _iterable_as_config_list)
+ # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
+ self._dump_stats_group("Merges", self.merges.items(), None)
+ # We only show the rename old path and copy source paths when -vv
+ # (verbose=2) is specified. The output here for mysql's data can't
+ # be parsed currently so this bit of code needs more work anyhow ..
+ if self.verbose >= 2:
+ self._dump_stats_group("Rename old paths",
+ self.rename_old_paths.items(), len,
+ _iterable_as_config_list)
+ self._dump_stats_group("Copy source paths",
+ self.copy_source_paths.items(), len,
+ _iterable_as_config_list)
+
+ # Blob stats
+ if self.cmd_counts[b'blob']:
+ # In verbose mode, don't list every blob used
+ if self.verbose:
+ del self.blobs['used']
+ self._dump_stats_group("Blob usage tracking",
+ self.blobs.items(), len, _iterable_as_config_list)
+ if self.blob_ref_counts:
+ blobs_by_count = invert_dict(self.blob_ref_counts)
+ blob_items = sorted(blobs_by_count.items())
+ self._dump_stats_group("Blob reference counts",
+ blob_items, len, _iterable_as_config_list)
+
+ # Other stats
+ if self.cmd_counts[b'reset']:
+ reset_stats = {
+ 'lightweight tags': self.lightweight_tags,
+ }
+ self._dump_stats_group("Reset analysis", reset_stats.items())
+
+ def _dump_stats_group(self, title, items, normal_formatter=None,
+ verbose_formatter=None):
+ """Dump a statistics group.
+
+ In verbose mode, do so as a config file so
+ that other processors can load the information if they want to.
+ :param normal_formatter: the callable to apply to the value
+ before displaying it in normal mode
+ :param verbose_formatter: the callable to apply to the value
+ before displaying it in verbose mode
+ """
+ if self.verbose:
+ self.outf.write("[%s]\n" % (title,))
+ for name, value in items:
+ if verbose_formatter is not None:
+ value = verbose_formatter(value)
+ if type(name) == str:
+ name = name.replace(' ', '-')
+ self.outf.write("%s = %s\n" % (name, value))
+ self.outf.write("\n")
+ else:
+ self.outf.write("%s:\n" % (title,))
+ for name, value in items:
+ if normal_formatter is not None:
+ value = normal_formatter(value)
+ self.outf.write("\t%s\t%s\n" % (value, name))
+
+ def progress_handler(self, cmd):
+ """Process a ProgressCommand."""
+ self.cmd_counts[cmd.name] += 1
+
+ def blob_handler(self, cmd):
+ """Process a BlobCommand."""
+ self.cmd_counts[cmd.name] += 1
+ if cmd.mark is None:
+ self.blobs['unmarked'].add(cmd.id)
+ else:
+ self.blobs['new'].add(cmd.id)
+ # Marks can be re-used so remove it from used if already there.
+ # Note: we definitely do NOT want to remove it from multi if
+ # it's already in that set.
+ try:
+ self.blobs['used'].remove(cmd.id)
+ except KeyError:
+ pass
+
+ def checkpoint_handler(self, cmd):
+ """Process a CheckpointCommand."""
+ self.cmd_counts[cmd.name] += 1
+
+ def commit_handler(self, cmd):
+ """Process a CommitCommand."""
+ self.cmd_counts[cmd.name] += 1
+ self.committers.add(cmd.committer)
+ if cmd.author is not None:
+ self.separate_authors_found = True
+ for fc in cmd.iter_files():
+ self.file_cmd_counts[fc.name] += 1
+ if isinstance(fc, commands.FileModifyCommand):
+ if fc.mode & 0o111:
+ self.executables_found = True
+ if stat.S_ISLNK(fc.mode):
+ self.symlinks_found = True
+ if fc.dataref is not None:
+ if fc.dataref[0] == ':':
+ self._track_blob(fc.dataref)
+ else:
+ self.sha_blob_references = True
+ elif isinstance(fc, commands.FileRenameCommand):
+ self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
+ elif isinstance(fc, commands.FileCopyCommand):
+ self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
+
+ # Track the heads
+ parents = self.reftracker.track_heads(cmd)
+
+ # Track the parent counts
+ parent_count = len(parents)
+ try:
+ self.parent_counts[parent_count] += 1
+ except KeyError:
+ self.parent_counts[parent_count] = 1
+ if parent_count > self.max_parent_count:
+ self.max_parent_count = parent_count
+
+ # Remember the merges
+ if cmd.merges:
+ #self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
+ for merge in cmd.merges:
+ if merge in self.merges:
+ self.merges[merge] += 1
+ else:
+ self.merges[merge] = 1
+
+ def reset_handler(self, cmd):
+ """Process a ResetCommand."""
+ self.cmd_counts[cmd.name] += 1
+ if cmd.ref.startswith('refs/tags/'):
+ self.lightweight_tags += 1
+ else:
+ if cmd.from_ is not None:
+ self.reftracker.track_heads_for_ref(
+ cmd.ref, cmd.from_)
+
+ def tag_handler(self, cmd):
+ """Process a TagCommand."""
+ self.cmd_counts[cmd.name] += 1
+
+ def feature_handler(self, cmd):
+ """Process a FeatureCommand."""
+ self.cmd_counts[cmd.name] += 1
+ feature = cmd.feature_name
+ if feature not in commands.FEATURE_NAMES:
+ self.warning("feature %s is not supported - parsing may fail"
+ % (feature,))
+
+ def _track_blob(self, mark):
+ if mark in self.blob_ref_counts:
+ self.blob_ref_counts[mark] += 1
+ pass
+ elif mark in self.blobs['used']:
+ self.blob_ref_counts[mark] = 2
+ self.blobs['used'].remove(mark)
+ elif mark in self.blobs['new']:
+ self.blobs['used'].add(mark)
+ self.blobs['new'].remove(mark)
+ else:
+ self.blobs['unknown'].add(mark)
+
+def _found(b):
+ """Format a found boolean as a string."""
+ return ['no', 'found'][b]
+
+def _iterable_as_config_list(s):
+ """Format an iterable as a sequence of comma-separated strings.
+
+ To match what ConfigObj expects, a single item list has a trailing comma.
+ """
+ items = sorted(s)
+ if len(items) == 1:
+ return "%s," % (items[0],)
+ else:
+ return ", ".join(items)
diff --git a/fastimport/reftracker.py b/fastimport/reftracker.py
new file mode 100644
index 0000000..16a5e45
--- /dev/null
+++ b/fastimport/reftracker.py
@@ -0,0 +1,68 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+"""Tracker of refs."""
+
+from __future__ import absolute_import
+
+
+class RefTracker(object):
+
+ def __init__(self):
+ # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
+ self.last_ref = None
+ self.last_ids = {}
+ self.heads = {}
+
+ def dump_stats(self, note):
+ self._show_stats_for(self.last_ids, "last-ids", note=note)
+ self._show_stats_for(self.heads, "heads", note=note)
+
+ def clear(self):
+ self.last_ids.clear()
+ self.heads.clear()
+
+ def track_heads(self, cmd):
+ """Track the repository heads given a CommitCommand.
+
+ :param cmd: the CommitCommand
+ :return: the list of parents in terms of commit-ids
+ """
+ # Get the true set of parents
+ if cmd.from_ is not None:
+ parents = [cmd.from_]
+ else:
+ last_id = self.last_ids.get(cmd.ref)
+ if last_id is not None:
+ parents = [last_id]
+ else:
+ parents = []
+ parents.extend(cmd.merges)
+
+ # Track the heads
+ self.track_heads_for_ref(cmd.ref, cmd.id, parents)
+ return parents
+
+ def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
+ if parents is not None:
+ for parent in parents:
+ if parent in self.heads:
+ del self.heads[parent]
+ self.heads.setdefault(cmd_id, set()).add(cmd_ref)
+ self.last_ids[cmd_ref] = cmd_id
+ self.last_ref = cmd_ref
+
+
diff --git a/fastimport/tests/__init__.py b/fastimport/tests/__init__.py
index ae5acb7..01a681b 100644
--- a/fastimport/tests/__init__.py
+++ b/fastimport/tests/__init__.py
@@ -26,6 +26,7 @@ def test_suite():
'test_dates',
'test_errors',
'test_filter_processor',
+ 'test_info_processor',
'test_helpers',
'test_parser',
]
diff --git a/fastimport/tests/test_info_processor.py b/fastimport/tests/test_info_processor.py
new file mode 100644
index 0000000..43dd50b
--- /dev/null
+++ b/fastimport/tests/test_info_processor.py
@@ -0,0 +1,77 @@
+# Copyright (C) 2018 Jelmer Vernooij
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Test InfoProcessor"""
+from io import BytesIO
+
+try:
+ from StringIO import StringIO
+except ImportError:
+ from io import StringIO
+
+from unittest import TestCase
+
+from fastimport import (
+ parser,
+ )
+
+from fastimport.processors import (
+ info_processor,
+ )
+
+simple_fast_import_stream = b"""commit refs/heads/master
+mark :1
+committer Jelmer Vernooij <jelmer@samba.org> 1299718135 +0100
+data 7
+initial
+
+"""
+
+class TestFastImportInfo(TestCase):
+
+ def test_simple(self):
+ stream = BytesIO(simple_fast_import_stream)
+ outf = StringIO()
+ proc = info_processor.InfoProcessor(outf=outf)
+ p = parser.ImportParser(stream)
+ proc.process(p.iter_commands)
+
+ self.maxDiff = None
+ self.assertMultiLineEqual(outf.getvalue(), """Command counts:
+\t0\tblob
+\t0\tcheckpoint
+\t1\tcommit
+\t0\tfeature
+\t0\tprogress
+\t0\treset
+\t0\ttag
+File command counts:
+\t0\tfilemodify
+\t0\tfiledelete
+\t0\tfilecopy
+\t0\tfilerename
+\t0\tfiledeleteall
+Parent counts:
+\t1\tparents-0
+\t0\ttotal revisions merged
+Commit analysis:
+\tno\tblobs referenced by SHA
+\tno\texecutables
+\tno\tseparate authors found
+\tno\tsymlinks
+Head analysis:
+\t:1\trefs/heads/master
+Merges:
+""")
diff --git a/setup.py b/setup.py
index 2e6d8dd..815c436 100755
--- a/setup.py
+++ b/setup.py
@@ -13,6 +13,11 @@ setup(name="fastimport",
license="GNU GPL v2 or later",
url="htps://github.com/jelmer/python-fastimport",
packages=['fastimport', 'fastimport.tests', 'fastimport.processors'],
+ scripts=[
+ 'bin/fast-import-query',
+ 'bin/fast-import-filter',
+ 'bin/fast-import-info',
+ ],
classifiers=[
'Development Status :: 4 - Beta',
'License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)',