From 4a24bfc22081a6c2771b62d464222f81c470192c Mon Sep 17 00:00:00 2001
From: Jakub Narebski <jnareb@gmail.com>
Date: Tue, 9 Dec 2008 23:46:16 +0100
Subject: gitweb: Move 'lineno' id from link to row element in git_blame

Move l<line number> ID from <a> link element inside table row (inside
cell element for column with line numbers), to encompassing <tr> table
row element.  It was done to make it easier to manipulate result HTML
with DOM, and to be able write 'blame_incremental' view with the same,
or nearly the same result.

Signed-off-by: Jakub Narebski <jnareb@gmail.com>
Acked-by: Luben Tuikov <ltuikov@yahoo.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 gitweb/gitweb.perl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'gitweb/gitweb.perl')

diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index 951739210a..e01e1afe95 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -4645,7 +4645,7 @@ HTML
 		if ($group_size) {
 			$current_color = ++$current_color % $num_colors;
 		}
-		print "<tr class=\"$rev_color[$current_color]\">\n";
+		print "<tr id=\"l$lineno\" class=\"$rev_color[$current_color]\">\n";
 		if ($group_size) {
 			print "<td class=\"sha1\"";
 			print " title=\"". esc_html($author) . ", $date\"";
@@ -4667,7 +4667,6 @@ HTML
 		                  hash_base => $parent_commit);
 		print "<td class=\"linenr\">";
 		print $cgi->a({ -href => "$blamed#l$orig_lineno",
-		                -id => "l$lineno",
 		                -class => "linenr" },
 		              esc_html($lineno));
 		print "</td>";
-- 
cgit v1.2.1


From d2ce10d7b7d67ff8b50ae749ce4c5b1a2b8d133c Mon Sep 17 00:00:00 2001
From: Jakub Narebski <jnareb@gmail.com>
Date: Tue, 9 Dec 2008 23:48:51 +0100
Subject: gitweb: A bit of code cleanup in git_blame()

Among others, here are the highlights:

 * move variable declaration closer to the place it is set and used,
   if possible,

 * uniquify and simplify coding style a bit, which includes removing
   unnecessary '()'.

 * check type only if $hash was defined, as otherwise from the way
   git_get_hash_by_path() is called (and works), we know that it is
   a blob,

 * use modern calling convention for git-blame,

 * remove unused variable,

 * don't use implicit variables ($_),

 * add some comments

Signed-off-by: Jakub Narebski <jnareb@gmail.com>
Acked-by: Luben Tuikov <ltuikov@yahoo.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 gitweb/gitweb.perl | 67 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 28 deletions(-)

(limited to 'gitweb/gitweb.perl')

diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index e01e1afe95..ccbf5d4745 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -4575,28 +4575,33 @@ sub git_tag {
 }
 
 sub git_blame {
-	my $fd;
-	my $ftype;
-
+	# permissions
 	gitweb_check_feature('blame')
-	    or die_error(403, "Blame view not allowed");
+		or die_error(403, "Blame view not allowed");
 
+	# error checking
 	die_error(400, "No file name given") unless $file_name;
 	$hash_base ||= git_get_head_hash($project);
-	die_error(404, "Couldn't find base commit") unless ($hash_base);
+	die_error(404, "Couldn't find base commit") unless $hash_base;
 	my %co = parse_commit($hash_base)
 		or die_error(404, "Commit not found");
+	my $ftype = "blob";
 	if (!defined $hash) {
 		$hash = git_get_hash_by_path($hash_base, $file_name, "blob")
 			or die_error(404, "Error looking up file");
+	} else {
+		$ftype = git_get_type($hash);
+		if ($ftype !~ "blob") {
+			die_error(400, "Object is not a blob");
+		}
 	}
-	$ftype = git_get_type($hash);
-	if ($ftype !~ "blob") {
-		die_error(400, "Object is not a blob");
-	}
-	open ($fd, "-|", git_cmd(), "blame", '-p', '--',
-	      $file_name, $hash_base)
+
+	# run git-blame --porcelain
+	open my $fd, "-|", git_cmd(), "blame", '-p',
+		$hash_base, '--', $file_name
 		or die_error(500, "Open git-blame failed");
+
+	# page header
 	git_header_html();
 	my $formats_nav =
 		$cgi->a({-href => href(action=>"blob", -replay=>1)},
@@ -4610,40 +4615,44 @@ sub git_blame {
 	git_print_page_nav('','', $hash_base,$co{'tree'},$hash_base, $formats_nav);
 	git_print_header_div('commit', esc_html($co{'title'}), $hash_base);
 	git_print_page_path($file_name, $ftype, $hash_base);
-	my @rev_color = (qw(light2 dark2));
+
+	# page body
+	my @rev_color = qw(light2 dark2);
 	my $num_colors = scalar(@rev_color);
 	my $current_color = 0;
-	my $last_rev;
+	my %metainfo = ();
+
 	print <<HTML;
 <div class="page_body">
 <table class="blame">
 <tr><th>Commit</th><th>Line</th><th>Data</th></tr>
 HTML
-	my %metainfo = ();
-	while (1) {
-		$_ = <$fd>;
-		last unless defined $_;
+ LINE:
+	while (my $line = <$fd>) {
+		chomp $line;
+		# the header: <SHA-1> <src lineno> <dst lineno> [<lines in group>]
+		# no <lines in group> for subsequent lines in group of lines
 		my ($full_rev, $orig_lineno, $lineno, $group_size) =
-		    /^([0-9a-f]{40}) (\d+) (\d+)(?: (\d+))?$/;
+		   ($line =~ /^([0-9a-f]{40}) (\d+) (\d+)(?: (\d+))?$/);
 		if (!exists $metainfo{$full_rev}) {
 			$metainfo{$full_rev} = {};
 		}
 		my $meta = $metainfo{$full_rev};
-		while (<$fd>) {
-			last if (s/^\t//);
-			if (/^(\S+) (.*)$/) {
+		my $data;
+		while ($data = <$fd>) {
+			chomp $data;
+			last if ($data =~ s/^\t//); # contents of line
+			if ($data =~ /^(\S+) (.*)$/) {
 				$meta->{$1} = $2;
 			}
 		}
-		my $data = $_;
-		chomp $data;
-		my $rev = substr($full_rev, 0, 8);
+		my $short_rev = substr($full_rev, 0, 8);
 		my $author = $meta->{'author'};
-		my %date = parse_date($meta->{'author-time'},
-		                      $meta->{'author-tz'});
+		my %date =
+			parse_date($meta->{'author-time'}, $meta->{'author-tz'});
 		my $date = $date{'iso-tz'};
 		if ($group_size) {
-			$current_color = ++$current_color % $num_colors;
+			$current_color = ($current_color + 1) % $num_colors;
 		}
 		print "<tr id=\"l$lineno\" class=\"$rev_color[$current_color]\">\n";
 		if ($group_size) {
@@ -4654,7 +4663,7 @@ HTML
 			print $cgi->a({-href => href(action=>"commit",
 			                             hash=>$full_rev,
 			                             file_name=>$file_name)},
-			              esc_html($rev));
+			              esc_html($short_rev));
 			print "</td>\n";
 		}
 		open (my $dd, "-|", git_cmd(), "rev-parse", "$full_rev^")
@@ -4677,6 +4686,8 @@ HTML
 	print "</div>";
 	close $fd
 		or print "Reading blob failed\n";
+
+	# page footer
 	git_footer_html();
 }
 
-- 
cgit v1.2.1


From 39c19ce2755830dd1dfdabf36e2b0166df3546f8 Mon Sep 17 00:00:00 2001
From: Jakub Narebski <jnareb@gmail.com>
Date: Thu, 11 Dec 2008 01:33:29 +0100
Subject: gitweb: cache $parent_commit info in git_blame()

Luben Tuikov changed 'lineno' link from leading to commit which gave
current version of given block of lines, to leading to parent of this
commit in 244a70e (Blame "linenr" link jumps to previous state at
"orig_lineno").  This made possible data mining using 'blame' view.

The current implementation calls rev-parse once per each blamed line
to find parent revision of blamed commit, even when the same commit
appears more than once, which is inefficient.

This patch mitigates this issue by caching $parent_commit info in
%metainfo, which makes gitweb call rev-parse only once per each
unique commit in the output from "git blame".

In the tables below you can see simple benchmark comparing gitweb
performance before and after this patch

File               | L[1] | C[2] || Time0[3] | Before[4] | After[4]
====================================================================
blob.h             |   18 |    4 || 0m1.727s |  0m2.545s |  0m2.474s
GIT-VERSION-GEN    |   42 |   13 || 0m2.165s |  0m2.448s |  0m2.071s
README             |   46 |    6 || 0m1.593s |  0m2.727s |  0m2.242s
revision.c         | 1923 |  121 || 0m2.357s | 0m30.365s |  0m7.028s
gitweb/gitweb.perl | 6291 |  428 || 0m8.080s | 1m37.244s | 0m20.627s

File               | L/C  | Before/After
=========================================
blob.h             |  4.5 |         1.03
GIT-VERSION-GEN    |  3.2 |         1.18
README             |  7.7 |         1.22
revision.c         | 15.9 |         4.32
gitweb/gitweb.perl | 14.7 |         4.71

As you can see the greater ratio of lines in file to unique commits
in blame output, the greater gain from the new implementation.

  Legend:

  [1] Number of lines:
      $ wc -l <file>
  [2] Number of unique commits in the blame output:
      $ git blame -p <file> | grep author-time | wc -l
  [3] Time for running "git blame -p" (user time, single run):
      $ time git blame -p <file> >/dev/null
  [4] Time to run gitweb as Perl script from command line:
      $ gitweb-run.sh "p=.git;a=blame;f=<file>" > /dev/null 2>&1

The gitweb-run.sh script includes slightly modified (with adjusted
pathnames) code from gitweb_run() function from the test script
t/t9500-gitweb-standalone-no-errors.sh; gitweb config file
gitweb_config.perl contents (again up to adjusting pathnames; in
particular $projectroot variable should point to top directory of git
repository) can be found in the same place.

Discussion
~~~~~~~~~~

A possible future improvement would be to open a bidi pipe to
"git cat-file --batch-check", (like in Git::Repo in gitweb caching by
Lea Wiemann), feed $long_rev^ to it, and parse its output, which is
in the following form:

  926b07e694599d86cec668475071b32147c95034 commit 637

This would mean one call to git-cat-file for the whole 'blame' view,
instead of one call to git-rev-parse per each unique commit in blame
output.

Yet another solution would be to change use of validate_refname() to
validate_revision() when checking script parameters (CGI query or
path_info), with validate_revision being something like the following:

  sub validate_revision {
        my $rev = shift;
        return validate_refname(strip_rev_suffixes($rev));
  }

so we don't need to calculate $long_rev^, but can pass "$long_rev^" as
'hb' parameter.

This solution has the advantage that it can be easily adapted to future
incremental blame output.

Signed-off-by: Jakub Narebski <jnareb@gmail.com>
Acked-by: Luben Tuikov <ltuikov@yahoo.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 gitweb/gitweb.perl | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'gitweb/gitweb.perl')

diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index ccbf5d4745..f992de223d 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -4666,11 +4666,17 @@ HTML
 			              esc_html($short_rev));
 			print "</td>\n";
 		}
-		open (my $dd, "-|", git_cmd(), "rev-parse", "$full_rev^")
-			or die_error(500, "Open git-rev-parse failed");
-		my $parent_commit = <$dd>;
-		close $dd;
-		chomp($parent_commit);
+		my $parent_commit;
+		if (!exists $meta->{'parent'}) {
+			open (my $dd, "-|", git_cmd(), "rev-parse", "$full_rev^")
+				or die_error(500, "Open git-rev-parse failed");
+			$parent_commit = <$dd>;
+			close $dd;
+			chomp($parent_commit);
+			$meta->{'parent'} = $parent_commit;
+		} else {
+			$parent_commit = $meta->{'parent'};
+		}
 		my $blamed = href(action => 'blame',
 		                  file_name => $meta->{'filename'},
 		                  hash_base => $parent_commit);
-- 
cgit v1.2.1