summaryrefslogtreecommitdiffstats
path: root/data/files
diff options
context:
space:
mode:
Diffstat (limited to 'data/files')
-rw-r--r--data/files/allowed-ancient-files7
-rw-r--r--data/files/allowed-python-files7
-rw-r--r--data/files/banned/lenna/blacklist23
-rw-r--r--data/files/banned/lenna/whitelist4
-rw-r--r--data/files/binary-file-extensions100
-rw-r--r--data/files/build-path-regex6
-rw-r--r--data/files/compressed-file-extensions9
-rw-r--r--data/files/generic-python-modules9
-rw-r--r--data/files/obsolete-paths6
-rw-r--r--data/files/pkg-config-bad-regex47
-rw-r--r--data/files/privacy-breaker-fragments34
-rw-r--r--data/files/privacy-breaker-tag-attr22
-rw-r--r--data/files/privacy-breaker-websites34
-rw-r--r--data/files/standard-files28
-rw-r--r--data/files/vcs-control-files8
15 files changed, 344 insertions, 0 deletions
diff --git a/data/files/allowed-ancient-files b/data/files/allowed-ancient-files
new file mode 100644
index 0000000..cbc4000
--- /dev/null
+++ b/data/files/allowed-ancient-files
@@ -0,0 +1,7 @@
+# Filename regexes to exclude from package-contains-ancient-file
+
+# Rust crates, upstream, have their Cargo.toml canonicalized on upload, and
+# that and other cargo-generated files have their timestamps set to 0 in the
+# resulting crate tarball.
+^usr/share/cargo/registry/.*/Cargo\.toml$
+^usr/share/cargo/registry/.*/\.cargo_vcs_info\.json$
diff --git a/data/files/allowed-python-files b/data/files/allowed-python-files
new file mode 100644
index 0000000..d559938
--- /dev/null
+++ b/data/files/allowed-python-files
@@ -0,0 +1,7 @@
+# Regular expressions (/i) of filenames that are allowed in top-level Python
+# directories such as /usr/lib/python3/dist-packages
+
+\.egg-info$
+\.so$
+\.pth$
+\.py[ic]?$
diff --git a/data/files/banned/lenna/blacklist b/data/files/banned/lenna/blacklist
new file mode 100644
index 0000000..3d7b7e8
--- /dev/null
+++ b/data/files/banned/lenna/blacklist
@@ -0,0 +1,23 @@
+# format is
+# md5sum ~~ sha1 ~~ sha256 ~~ common name ~~ reference
+#
+# current name is the usual name of the file
+# reference is a link to the license/official source
+
+36ad1cdb7c7855fcc253062c767c5748 ~~ 13bb28a3b6a8f07783636e7611c25db20d36e6ba ~~ 67f03c78826e88f8fbefc01195803497c6c4ff452a4585a8a1c8a8eeb538e1e4 ~~ lena.jpg ~~ https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=758442
+
+3752999d8a4142d93eed4c8c97f0997c ~~ 4a8b9c9207de05db6b26604083202f05419e9afc ~~ 1f24a4dd0b5dbddf247f935b10b50665191943c813ff81f4b808fcffdbe422f1 ~~ lena.tif ~~ https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=758442
+
+49d8d697422232eeb28b71846e993ef5 ~~ d712504a0fdf2425ab08debe01c6b2737c4da553 ~~ 0e8c262873c283681ebf7594295603b78cd505f8cafbde4af0bdc6289f6bdb01 ~~ lena.jpg ~~ https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=758442
+
+9393259555eaaf91b58fe24a687fb60a ~~ 95beced0503c13c08beb6a8b157704382afa2f5f ~~ f9be71887ece95fb34c60d3910e4af5ae072b5f2d3c4863f0ec4afcf7bbf32b7 ~~ lena.png ~~ https://bugs.debian.org/827941
+
+bb431a4c0c3280c7ca62f43e9882174c ~~ 73ed70526b5f4ad49c5f43d5849087a4a8dd00cc ~~ 6054bdfd37d3a147cd99920f1921d8278879399aabdedbf118f0960f592766ee ~~ lena.jpg ~~ https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=758442
+
+bcb76439e9848b22830ee44973664084 ~~ 658b1ca8c82055e2f8d57773b7dbcdead0af6c14 ~~ d4268e35d21def2ba215607d202fc52501c91fda5f7a67c5bd0e2942f9afbcb6 ~~ lena.mat ~~ https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=758442
+
+ec2daed3fa3835749a7d0423d36c652a ~~ 69e4bd806b20e51b5caa062f6d510ed796e1d067 ~~ 8974279a1c6d4711bea6d7db49e4fde2458615ccc80265ef1b4bc5ba16b84cad ~~ lenna.pgm ~~ Lenna test image ~~ https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=758442
+
+ef70b3083e06b236bd804215c0382d09 ~~ bbbe2215e4bc6f38e3829bbe78ac6ca8008249f5 ~~ 1b33c59bcd9ced166f4b6f726ce05bd3c182ac049444178837ddede2a4b4550b ~~ lena.jpg ~~ https://bugs.debian.org/827941
+
+dc0f99bbc6d31e7ce4000cc7c1117dbb ~~ 3712585117d7f181f76a67cbbdca9c558cb39110 ~~ 8640b22549789d5aeabebb487b0b6cda05119c3bff8f67699850a0f8177532d7 ~~ lena_std.jpg ~~ https://bugs.debian.org/996093
diff --git a/data/files/banned/lenna/whitelist b/data/files/banned/lenna/whitelist
new file mode 100644
index 0000000..2ca0315
--- /dev/null
+++ b/data/files/banned/lenna/whitelist
@@ -0,0 +1,4 @@
+# md5sums of known-good Lenna images
+
+# libplacebo demos/lena.jpg
+0ea16a2c4b94f0c1a83d11278655ac9f
diff --git a/data/files/binary-file-extensions b/data/files/binary-file-extensions
new file mode 100644
index 0000000..a6a61d8
--- /dev/null
+++ b/data/files/binary-file-extensions
@@ -0,0 +1,100 @@
+3g2
+3gp
+TTF
+aac
+aif
+ani
+asf
+aps
+avi
+avif
+avifs
+bmp
+cb7
+cbr
+cbt
+cbz
+chm
+dbf
+der
+djvu
+dll
+dmg
+doc
+docx
+emf
+epub
+exe
+flac
+gif
+gmo
+gpg
+icc
+icm
+icns
+ico
+jar
+jpeg
+jpg
+jsonlz4
+kbx
+m4a
+m4s
+mjpg
+mkv
+mo
+mov
+mp3
+mp4
+mpg
+odg
+odp
+ods
+odt
+oga
+ogg
+ogv
+opus
+otf
+otp
+p12
+pdf
+pfx
+pgp
+png
+ppt
+pptx
+psd
+qix
+sbn
+sbx
+shp
+shx
+spx
+sqlite
+sqlite3
+suo
+svgz
+swf
+tar
+tga
+tgz
+tbz2
+txz
+tif
+tiff
+ttf
+war
+wasm
+wav
+webp
+webm
+woff
+woff2
+wv
+xcf
+xls
+xlsx
+xpi
+xv
+zip
diff --git a/data/files/build-path-regex b/data/files/build-path-regex
new file mode 100644
index 0000000..b99807b
--- /dev/null
+++ b/data/files/build-path-regex
@@ -0,0 +1,6 @@
+# a list of known build path regex (xms) without leading /
+^var/cache/pbuilder/build/
+^var/lib/sbuild/
+^var/lib/buildd/
+^build/
+^tmp/buildd/
diff --git a/data/files/compressed-file-extensions b/data/files/compressed-file-extensions
new file mode 100644
index 0000000..adebbdf
--- /dev/null
+++ b/data/files/compressed-file-extensions
@@ -0,0 +1,9 @@
+# list of compressed file extensions
+# please alpha sort
+bz
+bz2
+gz
+tgz
+xz
+z
+Z
diff --git a/data/files/generic-python-modules b/data/files/generic-python-modules
new file mode 100644
index 0000000..b8777d5
--- /dev/null
+++ b/data/files/generic-python-modules
@@ -0,0 +1,9 @@
+# Regular expressions (/i) of overly-generic Python module names.
+
+backports?
+core
+docs?
+site
+tests?
+examples?
+scripts?
diff --git a/data/files/obsolete-paths b/data/files/obsolete-paths
new file mode 100644
index 0000000..825bb40
--- /dev/null
+++ b/data/files/obsolete-paths
@@ -0,0 +1,6 @@
+# obsolete path (regexp) -> newer path ~~ Reference
+#
+# Please alpha sort
+^etc/bash_completion.d/ -> usr/share/bash-completion/completions ~~ Ensure new filename matches stricter requirements (see https://bugs.debian.org/776954 and https://bugs.debian.org/814599)
+^etc/dhcp3/ -> etc/dhcp ~~ (see also https://bugs.debian.org/673029)
+^etc/X11/fonts/X11R7/ -> etc/X11/fonts ~~ (see also https://bugs.debian.org/364530#24)
diff --git a/data/files/pkg-config-bad-regex b/data/files/pkg-config-bad-regex
new file mode 100644
index 0000000..0b8500a
--- /dev/null
+++ b/data/files/pkg-config-bad-regex
@@ -0,0 +1,47 @@
+# a list of pkg-config bad constructions
+# regex (xms)
+# found in omnithread3.pc
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-[DU]__linux__) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-[DU]__x86_64__) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-D__OSVERSION__=\d+) \s
+# found in znc.pc
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-g\d*) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-D_FORTIFY_SOURCE=\d+) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-O[s0-9]) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-W\S*) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-fvisibility=\w+) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-fPI[CE]) \s
+# found in dolfin.pc
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-f(?:no-)?stack-protector) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (--param=ssp-buffer-size=\d+) \s
+# found in clam_core.pc
+[:=\h](-[IL](?:/usr/local/|/var/cache/pbuilder/build/|/var/lib/s?buildd?/)\S*)\s
+# found in scilab.pc
+^((?:[cC]flags\h*:|CFLAGS\h*=) (?:\V*\h)? -l\S+) \s
+# found meep.pc
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-march=\S+) \s
+# found opensaml.pc
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-[DU]NDEBUG) \s
+# found in libspatialindex.pc
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-pedantic) \s
+# found in common-cpp.pc
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-[DU]NEW_STDCPP) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-f(?:no-)?check-new) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-f(?:no-)?inline) \s
+# found in postgresql, see 687208
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-pie) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-f(?:no-)?strict-aliasing) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-f(?:no-)?wrapv) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-fexcess-precision=\w+) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-f(?:no-)?unsafe-math-optimizations) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-f(?:no-)?signed-zeros) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-f(?:no-)?fp-contract) \s
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-f(?:no-)?finite-math-only) \s
+# generic math
+^(?:[^:]*flags\h*:|[^=]*FLAGS\h*=) (?:\V*\h)? (-f\S+math) \s
+# found in libbt.pc
+^(?:[lL]ibs\h*:|LDFLAGS\h*=) (?:\V*\h)? (-Wl,z,relro) \s
+# unexpanded automake variable
+(@[^@\s]*@)
+
+
diff --git a/data/files/privacy-breaker-fragments b/data/files/privacy-breaker-fragments
new file mode 100644
index 0000000..2a329d0
--- /dev/null
+++ b/data/files/privacy-breaker-fragments
@@ -0,0 +1,34 @@
+# Manually maintained table of well known privacy breakers html/js fragment
+# syntax:
+# keyword ~~regex (xsm) ~~ tag
+#
+# Note spaces on the right hand side of ~~ are assumed to be a part
+# of the regex. Also note that the regex will be applied to a string
+# in all lowercase.
+#
+# Please keep this sorted by tag.
+google_ad_client ~~ google_ad_client\s*= ~~ privacy-breach-google-adsense
+google-analytics.com/ga.js ~~ [\"\'](?:[^/.]?\.)?google-analytics.com/(?:ga|analytics).js[\"\'] ~~ privacy-breach-google-adsense
+urchintracker ~~ urchintracker\s*\(\s*\)\s*; ~~ privacy-breach-google-adsense
+adsense ~~ </?\s*adsense\s*> ~~ privacy-breach-google-adsense
+data-ad-client ~~ \s+data-ad-client\s* ~~ privacy-breach-google-adsense
+adsbygoogle ~~ =\s*window\.adsbygoogle ~~ privacy-breach-google-adsense
+googlesearchdomain ~~ googlesearchdomain\s*= ~~ privacy-breach-google-cse
+cse-branding ~~ <div\s+class="cse-branding-(?:logo|form)"\s*> ~~ privacy-breach-google-cse
+customsearchcontrol ~~ google\.search\.customsearchcontrol\s*\(\s*' ~~ privacy-breach-google-cse
+google.load ~~ google\.load\s*\(\s*'search' ~~ privacy-breach-google-cse
+customsearchengine ~~ </?\s*customsearchengine\s*> ~~ privacy-breach-google-cse
+partner-pub ~~ value\s*=\s*"partner-pub-\d+:?\d+" ~~ privacy-breach-google-cse
+cse.js ~~ //www[.]google[.]com/cse/cse[.]js\?cx[=] ~~ privacy-breach-google-cse
+plusone ~~ (?:<g:plusone|google\.com/js/plusone\.js\?onload=onloadcallback|src\s*=\s*'https?://apis\.google\.com/js/plusone\.js') ~~ privacy-breach-google-plus
+flattr ~~ (?:api\.flattr\.com/js/\d(\.\d)?/load\.js|<a\s+class\s*=\s*"flattrbutton"|data-flattr-uid\s*=|flattrloader\.(?:setup\s*\(\)|render\s*\(|addloadevent\s*\()) ~~ privacy-breach-donation
+facebook ~~ (?:facebook-jssdk|xmlns:fb="https?://www.facebook.com/20\d\d/fbml) ~~ privacy-breach-facebook
+ogp.me/ns/fb ~~ xmlns:fb="https?://ogp.me/ns/fb\#?" ~~ privacy-breach-facebook
+fb. ~~ fb\.(?:canvas|api|init|login|getloginstatus|event|ui|xfbml)(?!\()(?!<) ~~ privacy-breach-facebook
+fb: ~~ </?fb:(?:activity|comments|friendpile|like|like-box|login-button|name|profile-pic|recommendations)\s+[^>]*> ~~ privacy-breach-facebook
+piwik ~~ (?:piwik_url\s*=|[\'"]piwik\.js[\'\"]|end\s+piwik\h+(?:tag|code)|[\'\"]settrackerurl[\'\"]) ~~ privacy-breach-piwik
+pkbaseurl ~~ pkbaseurl\s*= ~~ privacy-breach-piwik
+sc_project ~~ var\s+sc_project\s*=\s*\d+\s*; ~~ privacy-breach-statistics-website
+twitter ~~ (?:src\s*=\s*["']https?|[+]\s*['"])://platform\.twitter\.com/widgets\.js['"] ~~ privacy-breach-twitter
+
+
diff --git a/data/files/privacy-breaker-tag-attr b/data/files/privacy-breaker-tag-attr
new file mode 100644
index 0000000..508f259
--- /dev/null
+++ b/data/files/privacy-breaker-tag-attr
@@ -0,0 +1,22 @@
+# tag attribute url file helper
+# format is name ~~ keyword1 && keyword2 || keyword3 && keyword4~~ secondary keyword ~~ regex (xsm) $1 is whole $2 tag $3 is url
+# in regex &URL is replaced by URL regex
+# could not use @import
+import ~~ @import && url ~~ (([@]import) \s+ url \s* \( \s* \"(&URL)\" \s* \) \s*;)
+applet ~~ codebase="http && <applet || codebase="ftp && <applet || codebase="// && <applet ~~ (<(applet) (?:\s[^>]+)? \s+ codebase="(&URL)" [^>]*>)
+div src= ~~ src="http && <div || src="ftp && <div || src="// && <div ~~ (<(div) (?:\s[^>]+)? \s+ src="(&URL)" [^>]*>)
+div data-href= ~~ data-href="http && <div || data-href="ftp && <div || data-href="// && <div ~~ (<(div) (?:\s[^>]+)? \s+ data-href="(&URL)" [^>]*>)
+embed ~~ src="http && <embed || src="ftp && <embed || src="// && <embed ~~ (<(embed) (?:\s[^>]+)? \s+ src="(&URL)" [^>]*>)
+frame ~~ src="http && <frame || src="ftp && <frame || src="// && <frame ~~ (<(frame) (?:\s[^>]+)? \s+ src="(&URL)" [^>]*>)
+iframe ~~ src="http && <iframe || src="ftp && <iframe || src="// && <iframe ~~ (<(iframe) (?:\s[^>]+)? \s+ src="(&URL)" [^>]*>)
+img ~~ src="http && <img || src="ftp && <img || src="// && <img ~~ (<(img) (?:\s[^>]+)? \s+ src="(&URL)" [^>]*>)
+input ~~ src="http && <input || src="ftp && <input || src="// && <input ~~ (<(input) (?:\s[^>]+)? \s+ src="(&URL)" [^>]*>)
+link ~~ <link && href="http || <link && href="ftp|| <link && href="//~~ (<(link) (?:\s[^>]+)? \s+ href="(&URL)" [^>]*>)
+object codebase= ~~ codebase="http && <object || codebase="ftp && <object || codebase="// && <object ~~ (<(object) (?:\s[^>]+)? \s+ codebase="(&URL)" [^>]*>)
+object data= ~~ data="http && <object || data="ftp && <object || data="// && <object ~~ (<(object) (?:\s[^>]+)? \s+ data="(&URL)" [^>]*>)
+script ~~ src="http && <script || src="ftp && <script || src="// && <script ~~ (<(script) (?:\s[^>]+)? \s+ src="(&URL)" [^>]*>)
+source ~~ src="http && <source || src="ftp && <source || src="// && <source ~~ (<(source) (?:\s[^>]+)? \s+ src="(&URL)" [^>]*>)
+track ~~ src="http && <track || src="ftp && <track || src="// && <track ~~ (<(track) (?:\s[^>]+)? \s+ src="(&URL)" [^>]*>)
+video src= ~~ src="http && <video || src="ftp && <video || src="// && <video ~~ (<(video) (?:\s[^>]+)? \s+ src="(&URL)" [^>]*>)
+video poster= ~~ poster="http && <video || poster="ftp && <video || poster="// && <video ~~ (<(video) (?:\s[^>]+)? \s+ poster="(&URL)" [^>]*>)
+audio ~~ src="http && <audio || src="ftp && <audio || src="// && <audio ~~ (<(audio) (?:\s[^>]+)? \s+ src="(&URL)" [^>]*>)
diff --git a/data/files/privacy-breaker-websites b/data/files/privacy-breaker-websites
new file mode 100644
index 0000000..2e957c7
--- /dev/null
+++ b/data/files/privacy-breaker-websites
@@ -0,0 +1,34 @@
+# Manually maintained table of well known privacy breakers site
+# syntax:
+# key ~~regex (xsm) ~~ tag (optional assumed to be key) ~~ hint (optional)
+#
+# please always use lowercase for regex
+# Note spaces on the right hand side of ~~ are assumed to be a part
+# of the regex.
+#
+# Please keep this sorted by tag.
+privacy-breach-google-adsense ~~^(?:[^\./]+\.)?(?:googlesyndication\.com/pagead/(?:show_ads\.js|js/)|google-analytics\.com/(?:ga|urchin)\.js)
+privacy-breach-google-cse ~~^(?:[^\./]+\.)?google\.[^\./]+\.?/(:?afsonline/show_afs_search\.js|cse/api/branding\.css|coop/cse/brand\?[^/]+)$
+privacy-breach-google-plus ~~(?:^(?:[^\./]+\.)?google\.[^\./]+\.?/js/plusone\.[^\./]+\Z|^plus\.google\.com/)
+privacy-breach-donation ~~(?:(?:donate|paypal|support)(?:[^/_.]*)?(?:_?small)?\.(gif|jpe?g|png|svg)$|^(?:[^\./]+\.)?(?:flattr\.(?:com|net)|paypal(?:objects)?\.com|pledgie\.com|xoom\.com)/)
+privacy-breach-facebook ~~^(?:[^\./]+\.)?(?:facebook\.com|static\.ak\.fbcdn\.net)(?:/|\Z)
+privacy-breach-logo ~~(?:^(?:sflogo\.)?(?:sourceforge|sf)\.[^\./]+/sflogo\.php\?|/logos?/|(?:acrobat|clanlib|(?:fav|github-)?icons?|logos?_?(?:\d+|small)|mascot[^\./]*|youtube-icon[^\./]*|twitter_logo[^\./]*|doxygen|forkme_[^/\.]*|logos?_[^/\.]*|logos?)\.(gif|ico|jpe?g|png|svg)$)
+privacy-breach-piwik ~~/piwik\.php\?
+privacy-breach-statistics-website ~~^(?:(?:[^\./]+\.)?(?:cruel-carlota\.pagodabox\.com|linkexchange\.com|nedstatbasic\.net|onestat\.com|sitemeter\.com|statcounter\.com|webstats\.motigo\.com|digit\.com)(?:/|\Z)|/count(?:er)?\.cgi\?[^/]*\Z|\Acount(?:er)?\.)
+privacy-breach-w3c-valid-html ~~^(?:(?:[^\./]+\.)?w3.org/(?:icons/valid-|css-validator/images/)(?:[^/]+)?$|validator\.w3\.org(?:/|\Z))
+# already packaged under debian (please alpha sort by package name
+privacy-breach-uses-embedded-file-bootstrap ~~/bootstrap(?:-(?:\d\.?)+(b\d+)?)?(?:\.min)?\.(?:js|css)\Z ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-bootstrap package.
+privacy-breach-uses-embedded-file-expect.js ~~/expect\.js/\d+(?:\.\d+)*/index(?:\.min)?\.js\Z ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-expect.js package.
+privacy-breach-uses-embedded-file-chai ~~/chai(?:\d+(?:\.\d+)*)?(?:\.min)?\.js\Z ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-chai package.
+privacy-breach-uses-embedded-file-html5shiv ~~/html5shiv(?:-printshiv)?(?:\d+(?:\.\d+)*)?(?:\.min)?\.js\Z ~~ privacy-breach-uses-embedded-file ~~ You may use the node-html5shiv package (virtual package).
+privacy-breach-uses-embedded-file-jquery ~~(?:(?!doxygen.*)/jquery(?:-(?:\d\.?)+(b\d+)?)?(?:\.min)?\.js\Z) ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-jquery package.
+privacy-breach-uses-embedded-file-jquery-flot ~~(?:/jquery[\.-]flot(?:(?:-(?:\d\.?)+)?(?:\.min)?\.js|\.css)\Z) ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-jquery-flot package.
+privacy-breach-uses-embedded-file-jquery-mobile ~~(?:/jquery[\.-]mobile(?:(?:-(?:\d\.?)+)?(?:\.min)?\.js|\.css)\Z|^code\.jquery\.com/mobile/) ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-jquery-mobile package.
+privacy-breach-uses-embedded-file-jquery-ui ~~(?:/jquery[\.-]ui(?:(?:-(?:\d\.?)+)?(?:\.min)?\.js|\.css)\Z|^(?:[^\./]+\.)?jqueryui\.com/|/jquery/ui/[^/]+\Z) ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-jquery-ui package.
+privacy-breach-uses-embedded-file-mathjax ~~ /mathjax(?:\.min)?\.js(\Z|\?) ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-mathjax package.
+privacy-breach-uses-embedded-file-modernizr ~~ /modernizr(?:\.min)?\.js\Z ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-modernizr package.
+privacy-breach-uses-embedded-file-mocha ~~ /mocha(?:\.min)?\.(?:js|css)\Z ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-mocha package.
+privacy-breach-uses-embedded-file-normalize.css ~~ /normalize(?:\.min)?\.css\Z ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-normalize (virtual) package.
+privacy-breach-uses-embedded-file-openlayers ~~ /openlayers(?:\.min)?\.js\Z ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-openlayers package.
+privacy-breach-uses-embedded-file-openlayer ~~ /prototype(?:\.min)?\.js\Z ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-prototype package.
+privacy-breach-uses-embedded-file-rx ~~ /rx(?:\.min|\.umd)?\.js\Z ~~ privacy-breach-uses-embedded-file ~~ You may use the libjs-rx package.
diff --git a/data/files/standard-files b/data/files/standard-files
new file mode 100644
index 0000000..231ef53
--- /dev/null
+++ b/data/files/standard-files
@@ -0,0 +1,28 @@
+# Manually maintained list of "standard" files in /usr/share/doc/$pkg
+# These are not enough to consider the package "not-empty"
+#
+# NB: There is a special case in Lintian for ignoring all "README.*"
+# files as well. They need not be listed here.
+#
+# Please keep this sorted (ignoring case)
+
+AUTHORS
+AUTHORS.gz
+BUGS
+BUGS.gz
+changelog.Debian.gz
+changelog.gz
+CHANGES
+CHANGES.gz
+COPYING.gz
+copyright
+HACKING
+HACKING.gz
+NEWS
+NEWS.Debian
+NEWS.Debian.gz
+NEWS.gz
+TODO
+TODO.gz
+
+
diff --git a/data/files/vcs-control-files b/data/files/vcs-control-files
new file mode 100644
index 0000000..9a7a6e7
--- /dev/null
+++ b/data/files/vcs-control-files
@@ -0,0 +1,8 @@
+# vcs control files regex
+# ${COMPRESS_EXT} will be replaced by compression extension list
+# please alpha sort
+/\.arch-inventory(?:\.${COMPRESS_EXT})?\Z
+/\.hgtags(?:\.${COMPRESS_EXT})?\Z
+/\.(?:cvs|git|hg)_?ignore(?:\.${COMPRESS_EXT})?\Z
+/\.git_?(?:modules|attributes)(?:\.${COMPRESS_EXT})?\Z
+/\.hg_archival\.txt(?:\.${COMPRESS_EXT})?\Z