From 7c186823d68db1d2bd078fb91a95d30cd12890da Mon Sep 17 00:00:00 2001 From: Lorry Tar Creator Date: Tue, 22 Feb 2011 15:08:30 +0000 Subject: Text-Glob-0.09 --- Build.PL | 12 ++++ Changes | 37 +++++++++++ MANIFEST | 7 ++ META.yml | 21 ++++++ Makefile.PL | 14 ++++ lib/Text/Glob.pm | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ t/Text-Glob.t | 78 ++++++++++++++++++++++ 7 files changed, 364 insertions(+) create mode 100644 Build.PL create mode 100644 Changes create mode 100644 MANIFEST create mode 100644 META.yml create mode 100644 Makefile.PL create mode 100644 lib/Text/Glob.pm create mode 100644 t/Text-Glob.t diff --git a/Build.PL b/Build.PL new file mode 100644 index 0000000..fc5febe --- /dev/null +++ b/Build.PL @@ -0,0 +1,12 @@ +use strict; +use Module::Build; + +Module::Build + ->new( module_name => "Text::Glob", + license => 'perl', + build_requires => { + 'Test::More' => 0, + }, + create_makefile_pl => 'traditional', + ) + ->create_build_script; diff --git a/Changes b/Changes new file mode 100644 index 0000000..1e56c25 --- /dev/null +++ b/Changes @@ -0,0 +1,37 @@ +0.09 Tuesday 22nd February, 2010 + Compiled documentation fixes (collected by Tom Hukins from fixes on rt.cpan) + +0.08 Wednesday 2nd May, 2007 + Expose glob_to_regex_string (Joshua Hoblitt) + +0.07 Friday 14th July, 2006 + Explictly quote @ and %. Though they don't really need it to work + normally, it's needed for when you roundtrip the + regex back into text (like File::Find::Rule does). + +0.06 Monday 1st September, 2003 + - port to Module::Build + - Nested alternations fix from Mike Benson + +0.05 15th August, 2002 + - !match_glob("*.foo", "foo/.foo"); + - test suite overhaul + - backslash expansion fixed + - /[+^$|]/ made less 'special' + - handle embedded newlines in glob patterns + - add tests for 'foo[abc]' + - Many thanks go to Nick Cleaton for finding these + +0.04 14th August, 2002 + - $Text::Glob::{strict_leading_dot,strict_wildcard_slash} from + bug report from Nick Cleaton + - (quite poor) documentation of supported globbing constructs + +0.03 2nd August, 2002 + - complete work of 0.02 by escaping ) too. bug found by Andy Lester + +0.02 29th July, 2002 + - fix handling of ( and ? tokens + +0.01 21st July 2002 + - initital release diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..da39bc6 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,7 @@ +Changes +MANIFEST +lib/Text/Glob.pm +Makefile.PL +Build.PL +META.yml +t/Text-Glob.t diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..8024fc0 --- /dev/null +++ b/META.yml @@ -0,0 +1,21 @@ +--- +abstract: 'match globbing patterns against text' +author: + - 'Richard Clamp ' +build_requires: + Test::More: 0 +configure_requires: + Module::Build: 0.36 +generated_by: 'Module::Build version 0.3603' +license: perl +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: 1.4 +name: Text-Glob +provides: + Text::Glob: + file: lib/Text/Glob.pm + version: 0.09 +resources: + license: http://dev.perl.org/licenses/ +version: 0.09 diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..5211baa --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,14 @@ +# Note: this file was auto-generated by Module::Build::Compat version 0.3603 +use ExtUtils::MakeMaker; +WriteMakefile +( + 'PL_FILES' => {}, + 'INSTALLDIRS' => 'site', + 'NAME' => 'Text::Glob', + 'EXE_FILES' => [], + 'VERSION_FROM' => 'lib/Text/Glob.pm', + 'PREREQ_PM' => { + 'Test::More' => 0 + } + ) +; diff --git a/lib/Text/Glob.pm b/lib/Text/Glob.pm new file mode 100644 index 0000000..8580fe6 --- /dev/null +++ b/lib/Text/Glob.pm @@ -0,0 +1,195 @@ +package Text::Glob; +use strict; +use Exporter; +use vars qw/$VERSION @ISA @EXPORT_OK + $strict_leading_dot $strict_wildcard_slash/; +$VERSION = '0.09'; +@ISA = 'Exporter'; +@EXPORT_OK = qw( glob_to_regex glob_to_regex_string match_glob ); + +$strict_leading_dot = 1; +$strict_wildcard_slash = 1; + +use constant debug => 0; + +sub glob_to_regex { + my $glob = shift; + my $regex = glob_to_regex_string($glob); + return qr/^$regex$/; +} + +sub glob_to_regex_string +{ + my $glob = shift; + my ($regex, $in_curlies, $escaping); + local $_; + my $first_byte = 1; + for ($glob =~ m/(.)/gs) { + if ($first_byte) { + if ($strict_leading_dot) { + $regex .= '(?=[^\.])' unless $_ eq '.'; + } + $first_byte = 0; + } + if ($_ eq '/') { + $first_byte = 1; + } + if ($_ eq '.' || $_ eq '(' || $_ eq ')' || $_ eq '|' || + $_ eq '+' || $_ eq '^' || $_ eq '$' || $_ eq '@' || $_ eq '%' ) { + $regex .= "\\$_"; + } + elsif ($_ eq '*') { + $regex .= $escaping ? "\\*" : + $strict_wildcard_slash ? "[^/]*" : ".*"; + } + elsif ($_ eq '?') { + $regex .= $escaping ? "\\?" : + $strict_wildcard_slash ? "[^/]" : "."; + } + elsif ($_ eq '{') { + $regex .= $escaping ? "\\{" : "("; + ++$in_curlies unless $escaping; + } + elsif ($_ eq '}' && $in_curlies) { + $regex .= $escaping ? "}" : ")"; + --$in_curlies unless $escaping; + } + elsif ($_ eq ',' && $in_curlies) { + $regex .= $escaping ? "," : "|"; + } + elsif ($_ eq "\\") { + if ($escaping) { + $regex .= "\\\\"; + $escaping = 0; + } + else { + $escaping = 1; + } + next; + } + else { + $regex .= $_; + $escaping = 0; + } + $escaping = 0; + } + print "# $glob $regex\n" if debug; + + return $regex; +} + +sub match_glob { + print "# ", join(', ', map { "'$_'" } @_), "\n" if debug; + my $glob = shift; + my $regex = glob_to_regex $glob; + local $_; + grep { $_ =~ $regex } @_; +} + +1; +__END__ + +=head1 NAME + +Text::Glob - match globbing patterns against text + +=head1 SYNOPSIS + + use Text::Glob qw( match_glob glob_to_regex ); + + print "matched\n" if match_glob( "foo.*", "foo.bar" ); + + # prints foo.bar and foo.baz + my $regex = glob_to_regex( "foo.*" ); + for ( qw( foo.bar foo.baz foo bar ) ) { + print "matched: $_\n" if /$regex/; + } + +=head1 DESCRIPTION + +Text::Glob implements glob(3) style matching that can be used to match +against text, rather than fetching names from a filesystem. If you +want to do full file globbing use the File::Glob module instead. + +=head2 Routines + +=over + +=item match_glob( $glob, @things_to_test ) + +Returns the list of things which match the glob from the source list. + +=item glob_to_regex( $glob ) + +Returns a compiled regex which is the equivalent of the globbing +pattern. + +=item glob_to_regex_string( $glob ) + +Returns a regex string which is the equivalent of the globbing +pattern. + +=back + +=head1 SYNTAX + +The following metacharacters and rules are respected. + +=over + +=item C<*> - match zero or more characters + +C matches C, C, C and many many more. + +=item C - match exactly one character + +C matches C, but not C, or C + +=item Character sets/ranges + +C matches C and C + +C matches C, C, and C + +=item alternation + +C matches C, C, and +C + +=item leading . must be explictly matched + +C<*.foo> does not match C<.bar.foo>. For this you must either specify +the leading . in the glob pattern (C<.*.foo>), or set +C<$Text::Glob::strict_leading_dot> to a false value while compiling +the regex. + +=item C<*> and C do not match / + +C<*.foo> does not match C. For this you must either +explicitly match the / in the glob (C<*/*.foo>), or set +C<$Text::Glob::strict_wildcard_slash> to a false value with compiling +the regex. + +=back + +=head1 BUGS + +The code uses qr// to produce compiled regexes, therefore this module +requires perl version 5.005_03 or newer. + +=head1 AUTHOR + +Richard Clamp + +=head1 COPYRIGHT + +Copyright (C) 2002, 2003, 2006, 2007 Richard Clamp. All Rights Reserved. + +This module is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=head1 SEE ALSO + +L, glob(3) + +=cut diff --git a/t/Text-Glob.t b/t/Text-Glob.t new file mode 100644 index 0000000..effb719 --- /dev/null +++ b/t/Text-Glob.t @@ -0,0 +1,78 @@ +#!perl -w +use strict; +use Test::More tests => 44; + +BEGIN { use_ok('Text::Glob', qw( glob_to_regex match_glob ) ) } + +my $regex = glob_to_regex( 'foo' ); +is( ref $regex, 'Regexp', "glob_to_regex hands back a regex" ); +ok( 'foo' =~ $regex, "matched foo" ); +ok( 'foobar' !~ $regex, "didn't match foobar" ); + +ok( match_glob( 'foo', 'foo' ), "absolute string" ); +ok( !match_glob( 'foo', 'foobar' ) ); + +ok( match_glob( 'foo.*', 'foo.' ), "* wildcard" ); +ok( match_glob( 'foo.*', 'foo.bar' ) ); +ok( !match_glob( 'foo.*', 'gfoo.bar' ) ); + +ok( match_glob( 'foo.?p', 'foo.cp' ), "? wildcard" ); +ok( !match_glob( 'foo.?p', 'foo.cd' ) ); + +ok( match_glob( 'foo.{c,h}', 'foo.h' ), ".{alternation,or,something}" ); +ok( match_glob( 'foo.{c,h}', 'foo.c' ) ); +ok( !match_glob( 'foo.{c,h}', 'foo.o' ) ); + +ok( match_glob( 'foo.\\{c,h}\\*', 'foo.{c,h}*' ), '\escaping' ); +ok( !match_glob( 'foo.\\{c,h}\\*', 'foo.\\c' ) ); + +ok( match_glob( 'foo.(bar)', 'foo.(bar)'), "escape ()" ); + +ok( !match_glob( '*.foo', '.file.foo' ), "strict . rule fail" ); +ok( match_glob( '.*.foo', '.file.foo' ), "strict . rule match" ); +{ +local $Text::Glob::strict_leading_dot; +ok( match_glob( '*.foo', '.file.foo' ), "relaxed . rule" ); +} + +ok( !match_glob( '*.fo?', 'foo/file.fob' ), "strict wildcard / fail" ); +ok( match_glob( '*/*.fo?', 'foo/file.fob' ), "strict wildcard / match" ); +{ +local $Text::Glob::strict_wildcard_slash; +ok( match_glob( '*.fo?', 'foo/file.fob' ), "relaxed wildcard /" ); +} + + +ok( !match_glob( 'foo/*.foo', 'foo/.foo' ), "more strict wildcard / fail" ); +ok( match_glob( 'foo/.f*', 'foo/.foo' ), "more strict wildcard / match" ); +{ +local $Text::Glob::strict_wildcard_slash; +ok( match_glob( '*.foo', 'foo/.foo' ), "relaxed wildcard /" ); +} + +ok( match_glob( 'f+.foo', 'f+.foo' ), "properly escape +" ); +ok( !match_glob( 'f+.foo', 'ffff.foo' ) ); + +ok( match_glob( "foo\nbar", "foo\nbar" ), "handle embedded \\n" ); +ok( !match_glob( "foo\nbar", "foobar" ) ); + +ok( match_glob( 'test[abc]', 'testa' ), "[abc]" ); +ok( match_glob( 'test[abc]', 'testb' ) ); +ok( match_glob( 'test[abc]', 'testc' ) ); +ok( !match_glob( 'test[abc]', 'testd' ) ); + +ok( match_glob( 'foo$bar.*', 'foo$bar.c'), "escaping \$" ); + +ok( match_glob( 'foo^bar.*', 'foo^bar.c'), "escaping ^" ); + +ok( match_glob( 'foo|bar.*', 'foo|bar.c'), "escaping |" ); + + +ok( match_glob( '{foo,{bar,baz}}', 'foo'), "{foo,{bar,baz}}" ); +ok( match_glob( '{foo,{bar,baz}}', 'bar') ); +ok( match_glob( '{foo,{bar,baz}}', 'baz') ); +ok( !match_glob( '{foo,{bar,baz}}', 'foz') ); + +ok( match_glob( 'foo@bar', 'foo@bar'), '@ character'); +ok( match_glob( 'foo$bar', 'foo$bar'), '$ character'); +ok( match_glob( 'foo%bar', 'foo%bar'), '% character'); -- cgit v1.2.1