summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2011-02-22 15:08:30 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2011-02-22 15:08:30 +0000
commit7c186823d68db1d2bd078fb91a95d30cd12890da (patch)
tree1bf768748d59a49c2e101773b24a67da4bc6bbc8
downloadText-Glob-tarball-7c186823d68db1d2bd078fb91a95d30cd12890da.tar.gz
-rw-r--r--Build.PL12
-rw-r--r--Changes37
-rw-r--r--MANIFEST7
-rw-r--r--META.yml21
-rw-r--r--Makefile.PL14
-rw-r--r--lib/Text/Glob.pm195
-rw-r--r--t/Text-Glob.t78
7 files changed, 364 insertions, 0 deletions
diff --git a/Build.PL b/Build.PL
new file mode 100644
index 0000000..fc5febe
--- /dev/null
+++ b/Build.PL
@@ -0,0 +1,12 @@
+use strict;
+use Module::Build;
+
+Module::Build
+ ->new( module_name => "Text::Glob",
+ license => 'perl',
+ build_requires => {
+ 'Test::More' => 0,
+ },
+ create_makefile_pl => 'traditional',
+ )
+ ->create_build_script;
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..1e56c25
--- /dev/null
+++ b/Changes
@@ -0,0 +1,37 @@
+0.09 Tuesday 22nd February, 2010
+ Compiled documentation fixes (collected by Tom Hukins from fixes on rt.cpan)
+
+0.08 Wednesday 2nd May, 2007
+ Expose glob_to_regex_string (Joshua Hoblitt)
+
+0.07 Friday 14th July, 2006
+ Explictly quote @ and %. Though they don't really need it to work
+ normally, it's needed for when you roundtrip the
+ regex back into text (like File::Find::Rule does).
+
+0.06 Monday 1st September, 2003
+ - port to Module::Build
+ - Nested alternations fix from Mike Benson
+
+0.05 15th August, 2002
+ - !match_glob("*.foo", "foo/.foo");
+ - test suite overhaul
+ - backslash expansion fixed
+ - /[+^$|]/ made less 'special'
+ - handle embedded newlines in glob patterns
+ - add tests for 'foo[abc]'
+ - Many thanks go to Nick Cleaton for finding these
+
+0.04 14th August, 2002
+ - $Text::Glob::{strict_leading_dot,strict_wildcard_slash} from
+ bug report from Nick Cleaton
+ - (quite poor) documentation of supported globbing constructs
+
+0.03 2nd August, 2002
+ - complete work of 0.02 by escaping ) too. bug found by Andy Lester
+
+0.02 29th July, 2002
+ - fix handling of ( and ? tokens
+
+0.01 21st July 2002
+ - initital release
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..da39bc6
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,7 @@
+Changes
+MANIFEST
+lib/Text/Glob.pm
+Makefile.PL
+Build.PL
+META.yml
+t/Text-Glob.t
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..8024fc0
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,21 @@
+---
+abstract: 'match globbing patterns against text'
+author:
+ - 'Richard Clamp <richardc@unixbeard.net>'
+build_requires:
+ Test::More: 0
+configure_requires:
+ Module::Build: 0.36
+generated_by: 'Module::Build version 0.3603'
+license: perl
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: 1.4
+name: Text-Glob
+provides:
+ Text::Glob:
+ file: lib/Text/Glob.pm
+ version: 0.09
+resources:
+ license: http://dev.perl.org/licenses/
+version: 0.09
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..5211baa
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,14 @@
+# Note: this file was auto-generated by Module::Build::Compat version 0.3603
+use ExtUtils::MakeMaker;
+WriteMakefile
+(
+ 'PL_FILES' => {},
+ 'INSTALLDIRS' => 'site',
+ 'NAME' => 'Text::Glob',
+ 'EXE_FILES' => [],
+ 'VERSION_FROM' => 'lib/Text/Glob.pm',
+ 'PREREQ_PM' => {
+ 'Test::More' => 0
+ }
+ )
+;
diff --git a/lib/Text/Glob.pm b/lib/Text/Glob.pm
new file mode 100644
index 0000000..8580fe6
--- /dev/null
+++ b/lib/Text/Glob.pm
@@ -0,0 +1,195 @@
+package Text::Glob;
+use strict;
+use Exporter;
+use vars qw/$VERSION @ISA @EXPORT_OK
+ $strict_leading_dot $strict_wildcard_slash/;
+$VERSION = '0.09';
+@ISA = 'Exporter';
+@EXPORT_OK = qw( glob_to_regex glob_to_regex_string match_glob );
+
+$strict_leading_dot = 1;
+$strict_wildcard_slash = 1;
+
+use constant debug => 0;
+
+sub glob_to_regex {
+ my $glob = shift;
+ my $regex = glob_to_regex_string($glob);
+ return qr/^$regex$/;
+}
+
+sub glob_to_regex_string
+{
+ my $glob = shift;
+ my ($regex, $in_curlies, $escaping);
+ local $_;
+ my $first_byte = 1;
+ for ($glob =~ m/(.)/gs) {
+ if ($first_byte) {
+ if ($strict_leading_dot) {
+ $regex .= '(?=[^\.])' unless $_ eq '.';
+ }
+ $first_byte = 0;
+ }
+ if ($_ eq '/') {
+ $first_byte = 1;
+ }
+ if ($_ eq '.' || $_ eq '(' || $_ eq ')' || $_ eq '|' ||
+ $_ eq '+' || $_ eq '^' || $_ eq '$' || $_ eq '@' || $_ eq '%' ) {
+ $regex .= "\\$_";
+ }
+ elsif ($_ eq '*') {
+ $regex .= $escaping ? "\\*" :
+ $strict_wildcard_slash ? "[^/]*" : ".*";
+ }
+ elsif ($_ eq '?') {
+ $regex .= $escaping ? "\\?" :
+ $strict_wildcard_slash ? "[^/]" : ".";
+ }
+ elsif ($_ eq '{') {
+ $regex .= $escaping ? "\\{" : "(";
+ ++$in_curlies unless $escaping;
+ }
+ elsif ($_ eq '}' && $in_curlies) {
+ $regex .= $escaping ? "}" : ")";
+ --$in_curlies unless $escaping;
+ }
+ elsif ($_ eq ',' && $in_curlies) {
+ $regex .= $escaping ? "," : "|";
+ }
+ elsif ($_ eq "\\") {
+ if ($escaping) {
+ $regex .= "\\\\";
+ $escaping = 0;
+ }
+ else {
+ $escaping = 1;
+ }
+ next;
+ }
+ else {
+ $regex .= $_;
+ $escaping = 0;
+ }
+ $escaping = 0;
+ }
+ print "# $glob $regex\n" if debug;
+
+ return $regex;
+}
+
+sub match_glob {
+ print "# ", join(', ', map { "'$_'" } @_), "\n" if debug;
+ my $glob = shift;
+ my $regex = glob_to_regex $glob;
+ local $_;
+ grep { $_ =~ $regex } @_;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Text::Glob - match globbing patterns against text
+
+=head1 SYNOPSIS
+
+ use Text::Glob qw( match_glob glob_to_regex );
+
+ print "matched\n" if match_glob( "foo.*", "foo.bar" );
+
+ # prints foo.bar and foo.baz
+ my $regex = glob_to_regex( "foo.*" );
+ for ( qw( foo.bar foo.baz foo bar ) ) {
+ print "matched: $_\n" if /$regex/;
+ }
+
+=head1 DESCRIPTION
+
+Text::Glob implements glob(3) style matching that can be used to match
+against text, rather than fetching names from a filesystem. If you
+want to do full file globbing use the File::Glob module instead.
+
+=head2 Routines
+
+=over
+
+=item match_glob( $glob, @things_to_test )
+
+Returns the list of things which match the glob from the source list.
+
+=item glob_to_regex( $glob )
+
+Returns a compiled regex which is the equivalent of the globbing
+pattern.
+
+=item glob_to_regex_string( $glob )
+
+Returns a regex string which is the equivalent of the globbing
+pattern.
+
+=back
+
+=head1 SYNTAX
+
+The following metacharacters and rules are respected.
+
+=over
+
+=item C<*> - match zero or more characters
+
+C<a*> matches C<a>, C<aa>, C<aaaa> and many many more.
+
+=item C<?> - match exactly one character
+
+C<a?> matches C<aa>, but not C<a>, or C<aaa>
+
+=item Character sets/ranges
+
+C<example.[ch]> matches C<example.c> and C<example.h>
+
+C<demo.[a-c]> matches C<demo.a>, C<demo.b>, and C<demo.c>
+
+=item alternation
+
+C<example.{foo,bar,baz}> matches C<example.foo>, C<example.bar>, and
+C<example.baz>
+
+=item leading . must be explictly matched
+
+C<*.foo> does not match C<.bar.foo>. For this you must either specify
+the leading . in the glob pattern (C<.*.foo>), or set
+C<$Text::Glob::strict_leading_dot> to a false value while compiling
+the regex.
+
+=item C<*> and C<?> do not match /
+
+C<*.foo> does not match C<bar/baz.foo>. For this you must either
+explicitly match the / in the glob (C<*/*.foo>), or set
+C<$Text::Glob::strict_wildcard_slash> to a false value with compiling
+the regex.
+
+=back
+
+=head1 BUGS
+
+The code uses qr// to produce compiled regexes, therefore this module
+requires perl version 5.005_03 or newer.
+
+=head1 AUTHOR
+
+Richard Clamp <richardc@unixbeard.net>
+
+=head1 COPYRIGHT
+
+Copyright (C) 2002, 2003, 2006, 2007 Richard Clamp. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+L<File::Glob>, glob(3)
+
+=cut
diff --git a/t/Text-Glob.t b/t/Text-Glob.t
new file mode 100644
index 0000000..effb719
--- /dev/null
+++ b/t/Text-Glob.t
@@ -0,0 +1,78 @@
+#!perl -w
+use strict;
+use Test::More tests => 44;
+
+BEGIN { use_ok('Text::Glob', qw( glob_to_regex match_glob ) ) }
+
+my $regex = glob_to_regex( 'foo' );
+is( ref $regex, 'Regexp', "glob_to_regex hands back a regex" );
+ok( 'foo' =~ $regex, "matched foo" );
+ok( 'foobar' !~ $regex, "didn't match foobar" );
+
+ok( match_glob( 'foo', 'foo' ), "absolute string" );
+ok( !match_glob( 'foo', 'foobar' ) );
+
+ok( match_glob( 'foo.*', 'foo.' ), "* wildcard" );
+ok( match_glob( 'foo.*', 'foo.bar' ) );
+ok( !match_glob( 'foo.*', 'gfoo.bar' ) );
+
+ok( match_glob( 'foo.?p', 'foo.cp' ), "? wildcard" );
+ok( !match_glob( 'foo.?p', 'foo.cd' ) );
+
+ok( match_glob( 'foo.{c,h}', 'foo.h' ), ".{alternation,or,something}" );
+ok( match_glob( 'foo.{c,h}', 'foo.c' ) );
+ok( !match_glob( 'foo.{c,h}', 'foo.o' ) );
+
+ok( match_glob( 'foo.\\{c,h}\\*', 'foo.{c,h}*' ), '\escaping' );
+ok( !match_glob( 'foo.\\{c,h}\\*', 'foo.\\c' ) );
+
+ok( match_glob( 'foo.(bar)', 'foo.(bar)'), "escape ()" );
+
+ok( !match_glob( '*.foo', '.file.foo' ), "strict . rule fail" );
+ok( match_glob( '.*.foo', '.file.foo' ), "strict . rule match" );
+{
+local $Text::Glob::strict_leading_dot;
+ok( match_glob( '*.foo', '.file.foo' ), "relaxed . rule" );
+}
+
+ok( !match_glob( '*.fo?', 'foo/file.fob' ), "strict wildcard / fail" );
+ok( match_glob( '*/*.fo?', 'foo/file.fob' ), "strict wildcard / match" );
+{
+local $Text::Glob::strict_wildcard_slash;
+ok( match_glob( '*.fo?', 'foo/file.fob' ), "relaxed wildcard /" );
+}
+
+
+ok( !match_glob( 'foo/*.foo', 'foo/.foo' ), "more strict wildcard / fail" );
+ok( match_glob( 'foo/.f*', 'foo/.foo' ), "more strict wildcard / match" );
+{
+local $Text::Glob::strict_wildcard_slash;
+ok( match_glob( '*.foo', 'foo/.foo' ), "relaxed wildcard /" );
+}
+
+ok( match_glob( 'f+.foo', 'f+.foo' ), "properly escape +" );
+ok( !match_glob( 'f+.foo', 'ffff.foo' ) );
+
+ok( match_glob( "foo\nbar", "foo\nbar" ), "handle embedded \\n" );
+ok( !match_glob( "foo\nbar", "foobar" ) );
+
+ok( match_glob( 'test[abc]', 'testa' ), "[abc]" );
+ok( match_glob( 'test[abc]', 'testb' ) );
+ok( match_glob( 'test[abc]', 'testc' ) );
+ok( !match_glob( 'test[abc]', 'testd' ) );
+
+ok( match_glob( 'foo$bar.*', 'foo$bar.c'), "escaping \$" );
+
+ok( match_glob( 'foo^bar.*', 'foo^bar.c'), "escaping ^" );
+
+ok( match_glob( 'foo|bar.*', 'foo|bar.c'), "escaping |" );
+
+
+ok( match_glob( '{foo,{bar,baz}}', 'foo'), "{foo,{bar,baz}}" );
+ok( match_glob( '{foo,{bar,baz}}', 'bar') );
+ok( match_glob( '{foo,{bar,baz}}', 'baz') );
+ok( !match_glob( '{foo,{bar,baz}}', 'foz') );
+
+ok( match_glob( 'foo@bar', 'foo@bar'), '@ character');
+ok( match_glob( 'foo$bar', 'foo$bar'), '$ character');
+ok( match_glob( 'foo%bar', 'foo%bar'), '% character');