summaryrefslogtreecommitdiff
path: root/perl/contrib/checklinks.pl.in
diff options
context:
space:
mode:
Diffstat (limited to 'perl/contrib/checklinks.pl.in')
-rw-r--r--perl/contrib/checklinks.pl.in278
1 files changed, 139 insertions, 139 deletions
diff --git a/perl/contrib/checklinks.pl.in b/perl/contrib/checklinks.pl.in
index 1b453742f..db70bb112 100644
--- a/perl/contrib/checklinks.pl.in
+++ b/perl/contrib/checklinks.pl.in
@@ -88,32 +88,32 @@ sub SplitURL {
my $inurl = $_[0];
if($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)\/(.*)/ ) {
- $getprotocol = $1;
- $getserver = $2;
- $getpath = $3;
- $getdocument = $4;
+ $getprotocol = $1;
+ $getserver = $2;
+ $getpath = $3;
+ $getdocument = $4;
}
elsif ($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)/ ) {
- $getprotocol = $1;
- $getserver = $2;
- $getpath = $3;
- $getdocument = "";
-
- if($getpath !~ /\//) {
- $getpath ="";
- $getdocument = $3;
- }
+ $getprotocol = $1;
+ $getserver = $2;
+ $getpath = $3;
+ $getdocument = "";
+
+ if($getpath !~ /\//) {
+ $getpath ="";
+ $getdocument = $3;
+ }
}
elsif ($inurl=~ /^([^:]+):\/\/(.*)/ ) {
- $getprotocol = $1;
- $getserver = $2;
- $getpath = "";
- $getdocument = "";
+ $getprotocol = $1;
+ $getserver = $2;
+ $getpath = "";
+ $getdocument = "";
}
else {
- print "Couldn't parse the specified URL, retry please!\n";
- exit;
+ print "Couldn't parse the specified URL, retry please!\n";
+ exit;
}
}
@@ -127,52 +127,52 @@ sub SplitURL {
if(!$usestdin) {
open(HEADGET, "$linkcheck $geturl|") ||
- die "Couldn't get web page for some reason";
+ die "Couldn't get web page for some reason";
headget:
while(<HEADGET>) {
-# print $_;
- if($_ =~ /HTTP\/.*3\d\d /) {
- $pagemoved=1;
- }
- elsif($pagemoved &&
- ($_ =~ /^Location: (.*)/)) {
- $geturl = $1;
-
- &SplitURL($geturl);
-
- $pagemoved++;
- last headget;
- }
+# print $_;
+ if($_ =~ /HTTP\/.*3\d\d /) {
+ $pagemoved=1;
+ }
+ elsif($pagemoved &&
+ ($_ =~ /^Location: (.*)/)) {
+ $geturl = $1;
+
+ &SplitURL($geturl);
+
+ $pagemoved++;
+ last headget;
+ }
}
close(HEADGET);
if($pagemoved == 1) {
- print "Page is moved but we don't know where. Did you forget the ",
- "traling slash?\n";
- exit;
+ print "Page is moved but we don't know where. Did you forget the ",
+ "traling slash?\n";
+ exit;
}
open(WEBGET, "$htmlget $geturl|") ||
- die "Couldn't get web page for some reason";
+ die "Couldn't get web page for some reason";
while(<WEBGET>) {
- $line = $_;
- push @indoc, $line;
- $line=~ s/\n//g;
- $line=~ s/\r//g;
-# print $line."\n";
- $in=$in.$line;
+ $line = $_;
+ push @indoc, $line;
+ $line=~ s/\n//g;
+ $line=~ s/\r//g;
+# print $line."\n";
+ $in=$in.$line;
}
close(WEBGET);
}
else {
while(<STDIN>) {
- $line = $_;
- push @indoc, $line;
- $line=~ s/\n//g;
- $line=~ s/\r//g;
- $in=$in.$line;
+ $line = $_;
+ push @indoc, $line;
+ $line=~ s/\n//g;
+ $line=~ s/\r//g;
+ $in=$in.$line;
}
}
@@ -193,21 +193,21 @@ sub LinkWorks {
boo:
if( $doc[0] =~ /^HTTP[^ ]+ (\d+)/ ) {
- $error = $1;
-
- if($error < 400 ) {
- return "GOOD";
- }
- else {
-
- if($head && ($error >= 500)) {
- # This server doesn't like HEAD!
- @doc = `$linkcheckfull \"$check\"`;
- $head = 0;
- goto boo;
- }
- return "BAD";
- }
+ $error = $1;
+
+ if($error < 400 ) {
+ return "GOOD";
+ }
+ else {
+
+ if($head && ($error >= 500)) {
+ # This server doesn't like HEAD!
+ @doc = `$linkcheckfull \"$check\"`;
+ $head = 0;
+ goto boo;
+ }
+ return "BAD";
+ }
}
return "BAD";
}
@@ -219,43 +219,43 @@ sub GetLinks {
getlinkloop:
while($in =~ /[^<]*(<[^>]+>)/g ) {
- # we have a tag in $1
- $tag = $1;
-
- if($tag =~ /^<!--/) {
- # this is a comment tag, ignore it
- }
- else {
- if($tag =~ /(src|href|background|archive) *= *(\"[^\"]\"|[^ )>]*)/i) {
- $url=$2;
- if($url =~ /^\"(.*)\"$/) {
- # this was a "string" now $1 has removed the quotes:
- $url=$1;
- }
-
-
- $url =~ s/([^\#]*)\#.*/$1/g;
-
- if($url eq "") {
- # if the link was nothing than a #-link it may now have
- # been emptied completely so then we skip the rest
- next getlinkloop;
- }
-
- if($done{$url}) {
- # if this url already is done, do next
- $done{$url}++;
- next getlinkloop;
- }
-
- $done{$url} = 1; # this is "done"
-
- push @result, $url;
- if($tag =~ /< *([^ ]+)/) {
-# print "TAG: $1\n";
- $tagtype{$url}=$1;
- }
- }
+ # we have a tag in $1
+ $tag = $1;
+
+ if($tag =~ /^<!--/) {
+ # this is a comment tag, ignore it
+ }
+ else {
+ if($tag =~ /(src|href|background|archive) *= *(\"[^\"]\"|[^ )>]*)/i) {
+ $url=$2;
+ if($url =~ /^\"(.*)\"$/) {
+ # this was a "string" now $1 has removed the quotes:
+ $url=$1;
+ }
+
+
+ $url =~ s/([^\#]*)\#.*/$1/g;
+
+ if($url eq "") {
+ # if the link was nothing than a #-link it may now have
+ # been emptied completely so then we skip the rest
+ next getlinkloop;
+ }
+
+ if($done{$url}) {
+ # if this url already is done, do next
+ $done{$url}++;
+ next getlinkloop;
+ }
+
+ $done{$url} = 1; # this is "done"
+
+ push @result, $url;
+ if($tag =~ /< *([^ ]+)/) {
+# print "TAG: $1\n";
+ $tagtype{$url}=$1;
+ }
+ }
}
}
return @result;
@@ -268,36 +268,36 @@ for(@links) {
$url = $_;
if($url =~ /^([^:]+):/) {
- $prot = $1;
-# if($prot !~ /(http|ftp)/i) {
- if($prot !~ /http/i) {
- # this is an unsupported protocol, we ignore this
- next linkloop;
- }
- $link = $url;
+ $prot = $1;
+# if($prot !~ /(http|ftp)/i) {
+ if($prot !~ /http/i) {
+ # this is an unsupported protocol, we ignore this
+ next linkloop;
+ }
+ $link = $url;
}
else {
- if($external) {
- next linkloop;
- }
-
- # this is a link on the save server:
- if($url =~ /^\//) {
- # from root
- $link = "$getprotocol://$getserver$url";
- }
- else {
- # from the scanned page's dir
- $nyurl=$url;
-
- if(length($getpath) &&
- ($getpath !~ /\/$/) &&
- ($nyurl !~ /^\//)) {
- # lacks ending slash, add one to the document part:
- $nyurl = "/".$nyurl;
- }
- $link = "$getprotocol://$getserver/$getpath$nyurl";
- }
+ if($external) {
+ next linkloop;
+ }
+
+ # this is a link on the save server:
+ if($url =~ /^\//) {
+ # from root
+ $link = "$getprotocol://$getserver$url";
+ }
+ else {
+ # from the scanned page's dir
+ $nyurl=$url;
+
+ if(length($getpath) &&
+ ($getpath !~ /\/$/) &&
+ ($nyurl !~ /^\//)) {
+ # lacks ending slash, add one to the document part:
+ $nyurl = "/".$nyurl;
+ }
+ $link = "$getprotocol://$getserver/$getpath$nyurl";
+ }
}
#print "test $link\n";
@@ -313,16 +313,16 @@ for(@links) {
# If bad and -l, present the line numbers of the usage
if("BAD" eq $success) {
- $badlinks++;
- if($linenumber) {
- $line =1;
- for(@indoc) {
- if($_ =~ /$url/) {
- print " line $line\n";
- }
- $line++;
- }
- }
+ $badlinks++;
+ if($linenumber) {
+ $line =1;
+ for(@indoc) {
+ if($_ =~ /$url/) {
+ print " line $line\n";
+ }
+ $line++;
+ }
+ }
}
}
@@ -330,7 +330,7 @@ for(@links) {
if($verbose) {
print "$allcount links were checked";
if($badlinks > 0) {
- print ", $badlinks were found bad";
+ print ", $badlinks were found bad";
}
print "\n";
}