summaryrefslogtreecommitdiffstats
path: root/tests/Test-meta-robots.px
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:04:52 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:04:52 +0000
commit5e03c718f4e7ff13cb6834eda737c269ebed02ad (patch)
treebfad3f5be123f000fdb03e26400050dece33d72f /tests/Test-meta-robots.px
parentInitial commit. (diff)
downloadwget-5e03c718f4e7ff13cb6834eda737c269ebed02ad.tar.xz
wget-5e03c718f4e7ff13cb6834eda737c269ebed02ad.zip
Adding upstream version 1.21.3.upstream/1.21.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests/Test-meta-robots.px')
-rwxr-xr-xtests/Test-meta-robots.px113
1 files changed, 113 insertions, 0 deletions
diff --git a/tests/Test-meta-robots.px b/tests/Test-meta-robots.px
new file mode 100755
index 0000000..4a90338
--- /dev/null
+++ b/tests/Test-meta-robots.px
@@ -0,0 +1,113 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use HTTPTest;
+
+# This test checks that Wget parses "nofollow" when it appears in <meta
+# name="robots"> tags, regardless of where in a list of comma-separated
+# values it appears, and regardless of spelling.
+#
+# Three different files contain links to the file "bombshell.html", each
+# with "nofollow" set, at various positions in a list of values for a
+# <meta name="robots"> tag, and with various degrees of separating
+# whitesspace. If bombshell.html is downloaded, the test
+# has failed.
+
+###############################################################################
+
+my $nofollow_start = <<EOF;
+<meta name="roBoTS" content="noFolLow , foo, bar ">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+my $nofollow_mid = <<EOF;
+<meta name="rObOts" content=" foo , NOfOllow , bar ">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+my $nofollow_end = <<EOF;
+<meta name="RoBotS" content="foo,BAr, nofOLLOw ">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+my $nofollow_solo = <<EOF;
+<meta name="robots" content="nofollow">
+<a href="/bombshell.html">Don't follow me!</a>
+EOF
+
+# code, msg, headers, content
+my %urls = (
+ '/start.html' => {
+ code => "200",
+ msg => "Ok",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => $nofollow_start,
+ },
+ '/mid.html' => {
+ code => "200",
+ msg => "Ok",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => $nofollow_mid,
+ },
+ '/end.html' => {
+ code => "200",
+ msg => "Ok",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => $nofollow_end,
+ },
+ '/solo.html' => {
+ code => "200",
+ msg => "Ok",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => $nofollow_solo,
+ },
+ '/bombshell.html' => {
+ code => "200",
+ msg => "Ok",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => 'Hello',
+ },
+);
+
+my $cmdline = $WgetTest::WGETPATH . " -r -nd "
+ . join(' ',(map "http://localhost:{{port}}/$_.html",
+ qw(start mid end solo)));
+
+my $expected_error_code = 0;
+
+my %expected_downloaded_files = (
+ 'start.html' => {
+ content => $nofollow_start,
+ },
+ 'mid.html' => {
+ content => $nofollow_mid,
+ },
+ 'end.html' => {
+ content => $nofollow_end,
+ },
+ 'solo.html' => {
+ content => $nofollow_solo,
+ }
+);
+
+###############################################################################
+
+my $the_test = HTTPTest->new (input => \%urls,
+ cmdline => $cmdline,
+ errcode => $expected_error_code,
+ output => \%expected_downloaded_files);
+exit $the_test->run();
+
+# vim: et ts=4 sw=4