summaryrefslogtreecommitdiffstats
path: root/src/regexp/syntax/make_perl_groups.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/regexp/syntax/make_perl_groups.pl')
-rwxr-xr-xsrc/regexp/syntax/make_perl_groups.pl113
1 files changed, 113 insertions, 0 deletions
diff --git a/src/regexp/syntax/make_perl_groups.pl b/src/regexp/syntax/make_perl_groups.pl
new file mode 100755
index 0000000..80a2c9a
--- /dev/null
+++ b/src/regexp/syntax/make_perl_groups.pl
@@ -0,0 +1,113 @@
+#!/usr/bin/perl
+# Copyright 2008 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Modified version of RE2's make_perl_groups.pl.
+
+# Generate table entries giving character ranges
+# for POSIX/Perl character classes. Rather than
+# figure out what the definition is, it is easier to ask
+# Perl about each letter from 0-128 and write down
+# its answer.
+
+@posixclasses = (
+ "[:alnum:]",
+ "[:alpha:]",
+ "[:ascii:]",
+ "[:blank:]",
+ "[:cntrl:]",
+ "[:digit:]",
+ "[:graph:]",
+ "[:lower:]",
+ "[:print:]",
+ "[:punct:]",
+ "[:space:]",
+ "[:upper:]",
+ "[:word:]",
+ "[:xdigit:]",
+);
+
+@perlclasses = (
+ "\\d",
+ "\\s",
+ "\\w",
+);
+
+%overrides = (
+ # Prior to Perl 5.18, \s did not match vertical tab.
+ # RE2 preserves that original behaviour.
+ "\\s:11" => 0,
+);
+
+sub ComputeClass($) {
+ my @ranges;
+ my ($class) = @_;
+ my $regexp = "[$class]";
+ my $start = -1;
+ for (my $i=0; $i<=129; $i++) {
+ if ($i == 129) { $i = 256; }
+ if ($i <= 128 && ($overrides{"$class:$i"} // chr($i) =~ $regexp)) {
+ if ($start < 0) {
+ $start = $i;
+ }
+ } else {
+ if ($start >= 0) {
+ push @ranges, [$start, $i-1];
+ }
+ $start = -1;
+ }
+ }
+ return @ranges;
+}
+
+sub PrintClass($$@) {
+ my ($cname, $name, @ranges) = @_;
+ print "var code$cname = []rune{ /* $name */\n";
+ for (my $i=0; $i<@ranges; $i++) {
+ my @a = @{$ranges[$i]};
+ printf "\t0x%x, 0x%x,\n", $a[0], $a[1];
+ }
+ print "}\n\n";
+ my $n = @ranges;
+ $negname = $name;
+ if ($negname =~ /:/) {
+ $negname =~ s/:/:^/;
+ } else {
+ $negname =~ y/a-z/A-Z/;
+ }
+ return "\t`$name`: {+1, code$cname},\n" .
+ "\t`$negname`: {-1, code$cname},\n";
+}
+
+my $gen = 0;
+
+sub PrintClasses($@) {
+ my ($cname, @classes) = @_;
+ my @entries;
+ foreach my $cl (@classes) {
+ my @ranges = ComputeClass($cl);
+ push @entries, PrintClass(++$gen, $cl, @ranges);
+ }
+ print "var ${cname}Group = map[string]charGroup{\n";
+ foreach my $e (@entries) {
+ print $e;
+ }
+ print "}\n";
+ my $count = @entries;
+}
+
+print <<EOF;
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
+// make_perl_groups.pl >perl_groups.go
+
+package syntax
+
+EOF
+
+PrintClasses("perl", @perlclasses);
+PrintClasses("posix", @posixclasses);