testenv/Test--rejected-log.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

#!/usr/bin/env python3
from sys import exit
from test.http_test import HTTPTest
from misc.wget_file import WgetFile

"""
    This test executed Wget in recursive mode with a rejected log outputted.
"""
############# File Definitions ###############################################
mainpage = """
<html>
<head>
  <title>Main Page</title>
</head>
<body>
  <p>
    Recurse to a <a href="http://localhost:{{port}}/secondpage.html">second page</a>.
  </p>
</body>
</html>
"""

secondpage = """
<html>
<head>
  <title>Second Page</title>
</head>
<body>
  <p>
    Recurse to a <a href="http://localhost:{{port}}/thirdpage.html">third page</a>.
    Try the blacklisted <a href="http://localhost:{{port}}/index.html">main page</a>.
  </p>
</body>
</html>
"""

thirdpage = """
<html>
<head>
  <title>Third Page</title>
</head>
<body>
  <p>
    Try a hidden <a href="http://localhost:{{port}}/dummy.txt">dummy file</a>.
    Try to leave to <a href="http://no.such.domain/">another domain</a>.
  </p>
</body>
</html>
"""

robots = """
User-agent: *
Disallow: /dummy.txt
"""

log = """\
REASON\tU_URL\tU_SCHEME\tU_HOST\tU_PORT\tU_PATH\tU_PARAMS\tU_QUERY\tU_FRAGMENT\tP_URL\tP_SCHEME\tP_HOST\tP_PORT\tP_PATH\tP_PARAMS\tP_QUERY\tP_FRAGMENT
BLACKLIST\thttp%3A//localhost%3A{{port}}/index.html\tSCHEME_HTTP\tlocalhost\t{{port}}\tindex.html\t\t\t\thttp%3A//localhost%3A{{port}}/secondpage.html\tSCHEME_HTTP\tlocalhost\t{{port}}\tsecondpage.html\t\t\t
ROBOTS\thttp%3A//localhost%3A{{port}}/dummy.txt\tSCHEME_HTTP\tlocalhost\t{{port}}\tdummy.txt\t\t\t\thttp%3A//localhost%3A{{port}}/thirdpage.html\tSCHEME_HTTP\tlocalhost\t{{port}}\tthirdpage.html\t\t\t
SPANNEDHOST\thttp%3A//no.such.domain/\tSCHEME_HTTP\tno.such.domain\t80\t\t\t\t\thttp%3A//localhost%3A{{port}}/thirdpage.html\tSCHEME_HTTP\tlocalhost\t{{port}}\tthirdpage.html\t\t\t
"""

dummyfile = "Don't care."


index_html = WgetFile ("index.html", mainpage)
secondpage_html = WgetFile ("secondpage.html", secondpage)
thirdpage_html = WgetFile ("thirdpage.html", thirdpage)
robots_txt = WgetFile ("robots.txt", robots)
dummy_txt = WgetFile ("dummy.txt", dummyfile)
log_csv = WgetFile ("log.csv", log)

WGET_OPTIONS = "-nd -r --rejected-log log.csv"
WGET_URLS = [["index.html"]]

Files = [[index_html, secondpage_html, thirdpage_html, robots_txt, dummy_txt]]

ExpectedReturnCode = 0
ExpectedDownloadedFiles = [index_html, secondpage_html, thirdpage_html, robots_txt, log_csv]

################ Pre and Post Test Hooks #####################################
pre_test = {
    "ServerFiles"       : Files
}
test_options = {
    "WgetCommands"      : WGET_OPTIONS,
    "Urls"              : WGET_URLS
}
post_test = {
    "ExpectedFiles"     : ExpectedDownloadedFiles,
    "ExpectedRetcode"   : ExpectedReturnCode
}

err = HTTPTest (
                pre_hook=pre_test,
                test_params=test_options,
                post_hook=post_test
).begin ()

exit (err)