diff options
Diffstat (limited to '')
-rw-r--r-- | src/terminal-regex.cc | 390 |
1 files changed, 390 insertions, 0 deletions
diff --git a/src/terminal-regex.cc b/src/terminal-regex.cc new file mode 100644 index 0000000..3856c64 --- /dev/null +++ b/src/terminal-regex.cc @@ -0,0 +1,390 @@ +/* + * Copyright © 2015 Egmont Koblinger + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "config.h" + +#include <glib.h> +#include <stdio.h> + +#include "terminal-regex.hh" + +#ifdef TERMINAL_REGEX_MAIN + +/* Shorthand for expecting the pattern to match the entire input string */ +#define ENTIRE ((char *) 1) + +static char* +get_match (const char *pattern, const char *string, GRegexMatchFlags match_flags) +{ + GRegex *regex; + GMatchInfo *match_info; + gchar *match; + + regex = g_regex_new (pattern, GRegexCompileFlags(0), GRegexMatchFlags(0), nullptr); + g_regex_match (regex, string, match_flags, &match_info); + match = g_match_info_fetch (match_info, 0); + + g_free (regex); + g_free (match_info); + return match; +} + +/* Macros rather than functions to report useful line numbers on failure. */ +#define assert_match(__pattern, __string, __expected) do { \ + gchar *__actual_match = get_match(__pattern, __string, GRegexMatchFlags(0)); \ + const gchar *__expected_match = __expected; \ + if (__expected_match == ENTIRE) __expected_match = __string; \ + g_assert_cmpstr(__actual_match, ==, __expected_match); \ + g_free (__actual_match); \ +} while (0) + +#define assert_match_anchored(__pattern, __string, __expected) do { \ + gchar *__actual_match = get_match(__pattern, __string, G_REGEX_MATCH_ANCHORED); \ + const gchar *__expected_match = __expected; \ + if (__expected_match == ENTIRE) __expected_match = __string; \ + g_assert_cmpstr(__actual_match, ==, __expected_match); \ + g_free (__actual_match); \ +} while (0) + +int +main (int argc, char **argv) +{ + /* SCHEME is case insensitive */ + assert_match_anchored (SCHEME, "http", ENTIRE); + assert_match_anchored (SCHEME, "HTTPS", ENTIRE); + + /* USER is nonempty, alphanumeric, dot, plus and dash */ + assert_match_anchored (USER, "", nullptr); + assert_match_anchored (USER, "dr.john-smith", ENTIRE); + assert_match_anchored (USER, "abc+def@ghi", "abc+def"); + + /* PASS is optional colon-prefixed value, allowing quite some characters, but definitely not @ */ + assert_match_anchored (PASS, "", ENTIRE); + assert_match_anchored (PASS, "nocolon", ""); + assert_match_anchored (PASS, ":s3cr3T", ENTIRE); + assert_match_anchored (PASS, ":$?#@host", ":$?#"); + + /* Hostname of at least 1 component, containing at least one non-digit in at least one of the segments */ + assert_match_anchored (HOSTNAME1, "example.com", ENTIRE); + assert_match_anchored (HOSTNAME1, "a-b.c-d", ENTIRE); + assert_match_anchored (HOSTNAME1, "a_b", "a"); /* TODO: can/should we totally abort here? */ + assert_match_anchored (HOSTNAME1, "déjà-vu.com", ENTIRE); + assert_match_anchored (HOSTNAME1, "➡.ws", ENTIRE); + assert_match_anchored (HOSTNAME1, "cömbining-áccents", ENTIRE); + assert_match_anchored (HOSTNAME1, "12", nullptr); + assert_match_anchored (HOSTNAME1, "12.34", nullptr); + assert_match_anchored (HOSTNAME1, "12.ab", ENTIRE); +// assert_match_anchored (HOSTNAME1, "ab.12", nullptr); /* errr... could we fail here?? */ + + /* Hostname of at least 2 components, containing at least one non-digit in at least one of the segments */ + assert_match_anchored (HOSTNAME2, "example.com", ENTIRE); + assert_match_anchored (HOSTNAME2, "example", nullptr); + assert_match_anchored (HOSTNAME2, "12", nullptr); + assert_match_anchored (HOSTNAME2, "12.34", nullptr); + assert_match_anchored (HOSTNAME2, "12.ab", ENTIRE); + assert_match_anchored (HOSTNAME2, "ab.12", nullptr); +// assert_match_anchored (HOSTNAME2, "ab.cd.12", nullptr); /* errr... could we fail here?? */ + + /* IPv4 segment (number between 0 and 255) */ + assert_match_anchored (DEFS "(?&S4)", "0", ENTIRE); + assert_match_anchored (DEFS "(?&S4)", "1", ENTIRE); + assert_match_anchored (DEFS "(?&S4)", "9", ENTIRE); + assert_match_anchored (DEFS "(?&S4)", "10", ENTIRE); + assert_match_anchored (DEFS "(?&S4)", "99", ENTIRE); + assert_match_anchored (DEFS "(?&S4)", "100", ENTIRE); + assert_match_anchored (DEFS "(?&S4)", "200", ENTIRE); + assert_match_anchored (DEFS "(?&S4)", "250", ENTIRE); + assert_match_anchored (DEFS "(?&S4)", "255", ENTIRE); + assert_match_anchored (DEFS "(?&S4)", "256", nullptr); + assert_match_anchored (DEFS "(?&S4)", "260", nullptr); + assert_match_anchored (DEFS "(?&S4)", "300", nullptr); + assert_match_anchored (DEFS "(?&S4)", "1000", nullptr); + assert_match_anchored (DEFS "(?&S4)", "", nullptr); + assert_match_anchored (DEFS "(?&S4)", "a1b", nullptr); + + /* IPv4 addresses */ + assert_match_anchored (DEFS "(?&IPV4)", "11.22.33.44", ENTIRE); + assert_match_anchored (DEFS "(?&IPV4)", "0.1.254.255", ENTIRE); + assert_match_anchored (DEFS "(?&IPV4)", "75.150.225.300", nullptr); + assert_match_anchored (DEFS "(?&IPV4)", "1.2.3.4.5", "1.2.3.4"); /* we could also bail out and not match at all */ + + /* IPv6 addresses */ + assert_match_anchored (DEFS "(?&IPV6)", "11:::22", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "11:22::33:44::55:66", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "dead::beef", ENTIRE); + assert_match_anchored (DEFS "(?&IPV6)", "faded::bee", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "live::pork", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "::1", ENTIRE); + assert_match_anchored (DEFS "(?&IPV6)", "11::22:33::44", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "11:22:::33", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "dead:beef::192.168.1.1", ENTIRE); + assert_match_anchored (DEFS "(?&IPV6)", "192.168.1.1", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55:66:77:87654", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "11:22::33:45678", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55:66:192.168.1.12345", nullptr); + + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55:66:77", nullptr); /* no :: */ + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55:66:77:88", ENTIRE); + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55:66:77:88:99", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "::11:22:33:44:55:66:77", ENTIRE); /* :: at the start */ + assert_match_anchored (DEFS "(?&IPV6)", "::11:22:33:44:55:66:77:88", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33::44:55:66:77", ENTIRE); /* :: in the middle */ + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33::44:55:66:77:88", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55:66:77::", ENTIRE); /* :: at the end */ + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55:66:77:88::", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "::", ENTIRE); /* :: only */ + + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55:192.168.1.1", nullptr); /* no :: */ + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55:66:192.168.1.1", ENTIRE); + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55:66:77:192.168.1.1", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "::11:22:33:44:55:192.168.1.1", ENTIRE); /* :: at the start */ + assert_match_anchored (DEFS "(?&IPV6)", "::11:22:33:44:55:66:192.168.1.1", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33::44:55:192.168.1.1", ENTIRE); /* :: in the imddle */ + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33::44:55:66:192.168.1.1", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55::192.168.1.1", ENTIRE); /* :: at the end(ish) */ + assert_match_anchored (DEFS "(?&IPV6)", "11:22:33:44:55:66::192.168.1.1", nullptr); + assert_match_anchored (DEFS "(?&IPV6)", "::192.168.1.1", ENTIRE); /* :: only(ish) */ + + /* URL_HOST is either a hostname, or an IPv4 address, or a bracket-enclosed IPv6 address */ + assert_match_anchored (DEFS URL_HOST, "example", ENTIRE); + assert_match_anchored (DEFS URL_HOST, "example.com", ENTIRE); + assert_match_anchored (DEFS URL_HOST, "11.22.33.44", ENTIRE); + assert_match_anchored (DEFS URL_HOST, "[11.22.33.44]", nullptr); + assert_match_anchored (DEFS URL_HOST, "dead::be:ef", "dead"); /* TODO: can/should we totally abort here? */ + assert_match_anchored (DEFS URL_HOST, "[dead::be:ef]", ENTIRE); + + /* EMAIL_HOST is either an at least two-component hostname, or a bracket-enclosed IPv[46] address */ + assert_match_anchored (DEFS EMAIL_HOST, "example", nullptr); + assert_match_anchored (DEFS EMAIL_HOST, "example.com", ENTIRE); + assert_match_anchored (DEFS EMAIL_HOST, "11.22.33.44", nullptr); + assert_match_anchored (DEFS EMAIL_HOST, "[11.22.33.44]", ENTIRE); + assert_match_anchored (DEFS EMAIL_HOST, "[11.22.33.456]", nullptr); + assert_match_anchored (DEFS EMAIL_HOST, "dead::be:ef", nullptr); + assert_match_anchored (DEFS EMAIL_HOST, "[dead::be:ef]", ENTIRE); + + /* Number between 1 and 65535 (helper for port) */ + assert_match_anchored (N_1_65535, "0", nullptr); + assert_match_anchored (N_1_65535, "1", ENTIRE); + assert_match_anchored (N_1_65535, "10", ENTIRE); + assert_match_anchored (N_1_65535, "100", ENTIRE); + assert_match_anchored (N_1_65535, "1000", ENTIRE); + assert_match_anchored (N_1_65535, "10000", ENTIRE); + assert_match_anchored (N_1_65535, "60000", ENTIRE); + assert_match_anchored (N_1_65535, "65000", ENTIRE); + assert_match_anchored (N_1_65535, "65500", ENTIRE); + assert_match_anchored (N_1_65535, "65530", ENTIRE); + assert_match_anchored (N_1_65535, "65535", ENTIRE); + assert_match_anchored (N_1_65535, "65536", nullptr); + assert_match_anchored (N_1_65535, "65540", nullptr); + assert_match_anchored (N_1_65535, "65600", nullptr); + assert_match_anchored (N_1_65535, "66000", nullptr); + assert_match_anchored (N_1_65535, "70000", nullptr); + assert_match_anchored (N_1_65535, "100000", nullptr); + assert_match_anchored (N_1_65535, "", nullptr); + assert_match_anchored (N_1_65535, "a1b", nullptr); + + /* PORT is an optional colon-prefixed value */ + assert_match_anchored (PORT, "", ENTIRE); + assert_match_anchored (PORT, ":1", ENTIRE); + assert_match_anchored (PORT, ":65535", ENTIRE); + assert_match_anchored (PORT, ":65536", ""); /* TODO: can/should we totally abort here? */ + + /* Parentheses are only allowed in matching pairs, see bug 763980. */ + /* TODO: add tests for PATHCHARS and PATHNONTERM; and/or URLPATH */ + assert_match_anchored (DEFS URLPATH, "/ab/cd", ENTIRE); + assert_match_anchored (DEFS URLPATH, "/ab/cd.html.", "/ab/cd.html"); + assert_match_anchored (DEFS URLPATH, "/The_Offspring_(album)", ENTIRE); + assert_match_anchored (DEFS URLPATH, "/The_Offspring)", "/The_Offspring"); + assert_match_anchored (DEFS URLPATH, "/a((b(c)d)e(f))", ENTIRE); + assert_match_anchored (DEFS URLPATH, "/a((b(c)d)e(f)))", "/a((b(c)d)e(f))"); + assert_match_anchored (DEFS URLPATH, "/a(b).(c).", "/a(b).(c)"); + assert_match_anchored (DEFS URLPATH, "/a.(b.(c.).).(d.(e.).).)", "/a.(b.(c.).).(d.(e.).)"); + assert_match_anchored (DEFS URLPATH, "/a)b(c", "/a"); + assert_match_anchored (DEFS URLPATH, "/.", "/"); + assert_match_anchored (DEFS URLPATH, "/(.", "/"); + assert_match_anchored (DEFS URLPATH, "/).", "/"); + assert_match_anchored (DEFS URLPATH, "/().", "/()"); + assert_match_anchored (DEFS URLPATH, "/", ENTIRE); + assert_match_anchored (DEFS URLPATH, "", ENTIRE); + assert_match_anchored (DEFS URLPATH, "?", ENTIRE); + assert_match_anchored (DEFS URLPATH, "?param=value", ENTIRE); + assert_match_anchored (DEFS URLPATH, "#", ENTIRE); + assert_match_anchored (DEFS URLPATH, "#anchor", ENTIRE); + assert_match_anchored (DEFS URLPATH, "/php?param[]=value1¶m[]=value2", ENTIRE); + assert_match_anchored (DEFS URLPATH, "/foo?param1[index1]=value1¶m2[index2]=value2", ENTIRE); + assert_match_anchored (DEFS URLPATH, "/[[[]][]]", ENTIRE); + assert_match_anchored (DEFS URLPATH, "/[([])]([()])", ENTIRE); + assert_match_anchored (DEFS URLPATH, "/([()])[([])]", ENTIRE); + assert_match_anchored (DEFS URLPATH, "/[(])", "/"); + assert_match_anchored (DEFS URLPATH, "/([)]", "/"); + + + /* Put the components together and test the big picture */ + + assert_match (REGEX_URL_AS_IS, "There's no URL here http:/foo", nullptr); + assert_match (REGEX_URL_AS_IS, "Visit http://example.com for details", "http://example.com"); + assert_match (REGEX_URL_AS_IS, "Trailing dot http://foo/bar.html.", "http://foo/bar.html"); + assert_match (REGEX_URL_AS_IS, "Trailing ellipsis http://foo/bar.html...", "http://foo/bar.html"); + assert_match (REGEX_URL_AS_IS, "Trailing comma http://foo/bar,baz,", "http://foo/bar,baz"); + assert_match (REGEX_URL_AS_IS, "Trailing semicolon http://foo/bar;baz;", "http://foo/bar;baz"); + assert_match (REGEX_URL_AS_IS, "See <http://foo/bar>", "http://foo/bar"); + assert_match (REGEX_URL_AS_IS, "<http://foo.bar/asdf.qwer.html>", "http://foo.bar/asdf.qwer.html"); + assert_match (REGEX_URL_AS_IS, "Go to http://192.168.1.1.", "http://192.168.1.1"); + assert_match (REGEX_URL_AS_IS, "If not, see <http://www.gnu.org/licenses/>.", "http://www.gnu.org/licenses/"); + assert_match (REGEX_URL_AS_IS, "<a href=\"http://foo/bar\">foo</a>", "http://foo/bar"); + assert_match (REGEX_URL_AS_IS, "<a href='http://foo/bar'>foo</a>", "http://foo/bar"); + assert_match (REGEX_URL_AS_IS, "<url>http://foo/bar</url>", "http://foo/bar"); + + assert_match (REGEX_URL_AS_IS, "http://", nullptr); + assert_match (REGEX_URL_AS_IS, "http://a", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://aa.", "http://aa"); + assert_match (REGEX_URL_AS_IS, "http://aa.b", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://aa.bb", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://aa.bb/c", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://aa.bb/cc", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://aa.bb/cc/", ENTIRE); + + assert_match (REGEX_URL_AS_IS, "HtTp://déjà-vu.com:10000/déjà/vu", ENTIRE); + assert_match (REGEX_URL_AS_IS, "HTTP://joe:sEcReT@➡.ws:1080", ENTIRE); + assert_match (REGEX_URL_AS_IS, "https://cömbining-áccents", ENTIRE); + + assert_match (REGEX_URL_AS_IS, "http://111.222.33.44", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://111.222.33.44/", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://111.222.33.44/foo", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://1.2.3.4:5555/xyz", ENTIRE); + assert_match (REGEX_URL_AS_IS, "https://[dead::beef]:12345/ipv6", ENTIRE); + assert_match (REGEX_URL_AS_IS, "https://[dead::beef:11.22.33.44]", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://1.2.3.4:", "http://1.2.3.4"); /* TODO: can/should we totally abort here? */ + assert_match (REGEX_URL_AS_IS, "https://dead::beef/no-brackets-ipv6", "https://dead"); /* ditto */ + assert_match (REGEX_URL_AS_IS, "http://111.222.333.444/", nullptr); + assert_match (REGEX_URL_AS_IS, "http://1.2.3.4:70000", "http://1.2.3.4"); /* TODO: can/should we totally abort here? */ + assert_match (REGEX_URL_AS_IS, "http://[dead::beef:111.222.333.444]", nullptr); + + /* '?' or '#' without '/', #7888 */ + assert_match (REGEX_URL_AS_IS, "http://foo.bar?", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://foo.bar?param=value", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://foo.bar:12345?param=value", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://1.2.3.4?param=value", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://[dead::beef]?param=value", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://foo.bar#", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://foo.bar#anchor", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://foo.bar:12345#anchor", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://1.2.3.4#anchor", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://[dead::beef]#anchor", ENTIRE); + + /* Username, password */ + assert_match (REGEX_URL_AS_IS, "http://joe@example.com", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://user.name:sec.ret@host.name", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://joe:secret@[::1]", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://dudewithnopassword:@example.com", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://safeguy:!#$%^&*@host", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http://invalidusername!@host", "http://invalidusername"); + + assert_match (REGEX_URL_AS_IS, "http://ab.cd/ef?g=h&i=j|k=l#m=n:o=p", ENTIRE); + assert_match (REGEX_URL_AS_IS, "http:///foo", nullptr); + + /* Parentheses are only allowed in matching pairs, see bug 763980. */ + assert_match (REGEX_URL_AS_IS, "https://en.wikipedia.org/wiki/The_Offspring_(album)", ENTIRE); + assert_match (REGEX_URL_AS_IS, "[markdown](https://en.wikipedia.org/wiki/The_Offspring)", "https://en.wikipedia.org/wiki/The_Offspring"); + assert_match (REGEX_URL_AS_IS, "[markdown](https://en.wikipedia.org/wiki/The_Offspring_(album))", "https://en.wikipedia.org/wiki/The_Offspring_(album)"); + assert_match (REGEX_URL_AS_IS, "[markdown](http://foo.bar/(a(b)c)d)e)f", "http://foo.bar/(a(b)c)d"); + assert_match (REGEX_URL_AS_IS, "[markdown](http://foo.bar/a)b(c", "http://foo.bar/a"); + + /* Apostrophes are allowed, except at trailing position if the URL is preceded by an apostrophe, see bug 448044. */ + assert_match (REGEX_URL_AS_IS, "https://en.wikipedia.org/wiki/Moore's_law", ENTIRE); + assert_match (REGEX_URL_AS_IS, "<a href=\"https://en.wikipedia.org/wiki/Moore's_law\">", "https://en.wikipedia.org/wiki/Moore's_law"); + assert_match (REGEX_URL_AS_IS, "https://en.wikipedia.org/wiki/Cryin'", ENTIRE); + assert_match (REGEX_URL_AS_IS, "<a href=\"https://en.wikipedia.org/wiki/Cryin'\">", "https://en.wikipedia.org/wiki/Cryin'"); + assert_match (REGEX_URL_AS_IS, "<a href='https://en.wikipedia.org/wiki/Aerosmith'>", "https://en.wikipedia.org/wiki/Aerosmith"); + + /* No scheme */ + assert_match (REGEX_URL_HTTP, "www.foo.bar/baz", ENTIRE); + assert_match (REGEX_URL_HTTP, "WWW3.foo.bar/baz", ENTIRE); + assert_match (REGEX_URL_HTTP, "FTP.FOO.BAR/BAZ", ENTIRE); /* FIXME if no scheme is given and url starts with ftp, can we make the protocol ftp instead of http? */ + assert_match (REGEX_URL_HTTP, "ftpxy.foo.bar/baz", ENTIRE); +// assert_match (REGEX_URL_HTTP, "ftp.123/baz", nullptr); /* errr... could we fail here?? */ + assert_match (REGEX_URL_HTTP, "foo.bar/baz", nullptr); + assert_match (REGEX_URL_HTTP, "abc.www.foo.bar/baz", nullptr); + assert_match (REGEX_URL_HTTP, "uvwww.foo.bar/baz", nullptr); + assert_match (REGEX_URL_HTTP, "xftp.foo.bar/baz", nullptr); + + /* file:/ or file://(hostname)?/ */ + assert_match (REGEX_URL_FILE, "file:", nullptr); + assert_match (REGEX_URL_FILE, "file:/", ENTIRE); + assert_match (REGEX_URL_FILE, "file://", nullptr); + assert_match (REGEX_URL_FILE, "file:///", ENTIRE); + assert_match (REGEX_URL_FILE, "file:////", nullptr); + assert_match (REGEX_URL_FILE, "file:etc/passwd", nullptr); + assert_match (REGEX_URL_FILE, "File:/etc/passwd", ENTIRE); + assert_match (REGEX_URL_FILE, "FILE:///etc/passwd", ENTIRE); + assert_match (REGEX_URL_FILE, "file:////etc/passwd", nullptr); + assert_match (REGEX_URL_FILE, "file://host.name", nullptr); + assert_match (REGEX_URL_FILE, "file://host.name/", ENTIRE); + assert_match (REGEX_URL_FILE, "file://host.name/etc", ENTIRE); + + assert_match (REGEX_URL_FILE, "See file:/.", "file:/"); + assert_match (REGEX_URL_FILE, "See file:///.", "file:///"); + assert_match (REGEX_URL_FILE, "See file:/lost+found.", "file:/lost+found"); + assert_match (REGEX_URL_FILE, "See file:///lost+found.", "file:///lost+found"); + + /* Email */ + assert_match (REGEX_EMAIL, "Write to foo@bar.com.", "foo@bar.com"); + assert_match (REGEX_EMAIL, "Write to <foo@bar.com>", "foo@bar.com"); + assert_match (REGEX_EMAIL, "Write to mailto:foo@bar.com.", "mailto:foo@bar.com"); + assert_match (REGEX_EMAIL, "Write to MAILTO:FOO@BAR.COM.", "MAILTO:FOO@BAR.COM"); + assert_match (REGEX_EMAIL, "Write to foo@[1.2.3.4]", "foo@[1.2.3.4]"); + assert_match (REGEX_EMAIL, "Write to foo@[1.2.3.456]", nullptr); + assert_match (REGEX_EMAIL, "Write to foo@[1::2345]", "foo@[1::2345]"); + assert_match (REGEX_EMAIL, "Write to foo@[dead::beef]", "foo@[dead::beef]"); + assert_match (REGEX_EMAIL, "Write to foo@1.2.3.4", nullptr); + assert_match (REGEX_EMAIL, "Write to foo@1.2.3.456", nullptr); + assert_match (REGEX_EMAIL, "Write to foo@1::2345", nullptr); + assert_match (REGEX_EMAIL, "Write to foo@dead::beef", nullptr); + assert_match (REGEX_EMAIL, "<baz email=\"foo@bar.com\"/>", "foo@bar.com"); + assert_match (REGEX_EMAIL, "<baz email='foo@bar.com'/>", "foo@bar.com"); + assert_match (REGEX_EMAIL, "<email>foo@bar.com</email>", "foo@bar.com"); + + /* Sip, examples from rfc 3261 */ + assert_match (REGEX_URL_VOIP, "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15", ENTIRE); + assert_match (REGEX_URL_VOIP, "sip:alice@atlanta.com", ENTIRE); + assert_match (REGEX_URL_VOIP, "sip:alice:secretword@atlanta.com;transport=tcp", ENTIRE); + assert_match (REGEX_URL_VOIP, "sips:alice@atlanta.com?subject=project%20x&priority=urgent", ENTIRE); + assert_match (REGEX_URL_VOIP, "sip:+1-212-555-1212:1234@gateway.com;user=phone", ENTIRE); + assert_match (REGEX_URL_VOIP, "sips:1212@gateway.com", ENTIRE); + assert_match (REGEX_URL_VOIP, "sip:alice@192.0.2.4", ENTIRE); + assert_match (REGEX_URL_VOIP, "sip:atlanta.com;method=REGISTER?to=alice%40atlanta.com", ENTIRE); + assert_match (REGEX_URL_VOIP, "SIP:alice;day=tuesday@atlanta.com", ENTIRE); + assert_match (REGEX_URL_VOIP, "Dial sip:alice@192.0.2.4.", "sip:alice@192.0.2.4"); + + /* Extremely long match, bug 770147 */ + assert_match (REGEX_URL_AS_IS, "http://www.example.com/ThisPathConsistsOfMoreThan1024Characters" + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890", ENTIRE); + + printf("terminal-regex tests passed :)\n"); + return 0; +} + +#endif /* TERMINAL_REGEX_MAIN */ |