diff options
Diffstat (limited to 'src/init.c')
-rw-r--r-- | src/init.c | 2135 |
1 files changed, 2135 insertions, 0 deletions
diff --git a/src/init.c b/src/init.c new file mode 100644 index 0000000..aa526de --- /dev/null +++ b/src/init.c @@ -0,0 +1,2135 @@ +/* Reading/parsing the initialization file. + Copyright (C) 1996-2012, 2014-2015, 2018-2020 Free Software + Foundation, Inc. + +This file is part of GNU Wget. + +GNU Wget is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +GNU Wget is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Wget. If not, see <http://www.gnu.org/licenses/>. + +Additional permission under GNU GPL version 3 section 7 + +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ + +#include "wget.h" +#include "exits.h" + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <limits.h> +/* not all systems provide PATH_MAX in limits.h */ +#ifndef PATH_MAX +# include <sys/param.h> +# ifndef PATH_MAX +# define PATH_MAX MAXPATHLEN +# endif +#endif + +#include <regex.h> + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif +#include <assert.h> + +#include "utils.h" +#include "init.h" +#include "host.h" +#include "netrc.h" +#include "progress.h" +#include "connect.h" /* for connect_cleanup */ +#include "ssl.h" /* for ssl_cleanup */ +#include "recur.h" /* for INFINITE_RECURSION */ +#include "convert.h" /* for convert_cleanup */ +#include "res.h" /* for res_cleanup */ +#include "http.h" /* for http_cleanup */ +#include "retr.h" /* for output_stream */ +#include "warc.h" /* for warc_close */ +#include "spider.h" /* for spider_cleanup */ +#include "html-url.h" /* for cleanup_html_url */ +#include "ptimer.h" /* for ptimer_destroy */ +#include "c-strcase.h" + +#ifdef TESTING +#include "../tests/unit-tests.h" +#endif + + + +#define CMD_DECLARE(func) static bool func (const char *, const char *, void *) + +CMD_DECLARE (cmd_boolean); +CMD_DECLARE (cmd_bytes); +CMD_DECLARE (cmd_bytes_sum); +#ifdef HAVE_SSL +CMD_DECLARE (cmd_cert_type); +#endif +CMD_DECLARE (cmd_directory_vector); +CMD_DECLARE (cmd_number); +CMD_DECLARE (cmd_number_inf); +CMD_DECLARE (cmd_string); +CMD_DECLARE (cmd_string_uppercase); +CMD_DECLARE (cmd_file); +CMD_DECLARE (cmd_file_once); +CMD_DECLARE (cmd_directory); +CMD_DECLARE (cmd_time); +CMD_DECLARE (cmd_vector); + +CMD_DECLARE (cmd_use_askpass); + +#ifdef HAVE_LIBZ +CMD_DECLARE (cmd_spec_compression); +#endif +CMD_DECLARE (cmd_spec_dirstruct); +CMD_DECLARE (cmd_spec_header); +CMD_DECLARE (cmd_spec_warc_header); +CMD_DECLARE (cmd_spec_htmlify); +CMD_DECLARE (cmd_spec_mirror); +CMD_DECLARE (cmd_spec_prefer_family); +CMD_DECLARE (cmd_spec_progress); +CMD_DECLARE (cmd_spec_progressdisp); +CMD_DECLARE (cmd_spec_recursive); +CMD_DECLARE (cmd_spec_regex_type); +CMD_DECLARE (cmd_spec_restrict_file_names); +CMD_DECLARE (cmd_spec_report_speed); +#ifdef HAVE_SSL +CMD_DECLARE (cmd_spec_secure_protocol); +#endif +CMD_DECLARE (cmd_spec_timeout); +CMD_DECLARE (cmd_spec_useragent); +CMD_DECLARE (cmd_spec_verbose); +CMD_DECLARE (cmd_check_cert); + +/* List of recognized commands, each consisting of name, place and + function. When adding a new command, simply add it to the list, + but be sure to keep the list sorted alphabetically, as + command_by_name's binary search depends on it. Also, be sure to + add any entries that allocate memory (e.g. cmd_string and + cmd_vector) to the cleanup() function below. */ + +static const struct { + const char *name; + void *place; + bool (*action) (const char *, const char *, void *); +} commands[] = { + /* KEEP THIS LIST ALPHABETICALLY SORTED */ + { "accept", &opt.accepts, cmd_vector }, + { "acceptregex", &opt.acceptregex_s, cmd_string }, + { "addhostdir", &opt.add_hostdir, cmd_boolean }, + { "adjustextension", &opt.adjust_extension, cmd_boolean }, + { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */ + { "askpassword", &opt.ask_passwd, cmd_boolean }, + { "authnochallenge", &opt.auth_without_challenge, + cmd_boolean }, + { "background", &opt.background, cmd_boolean }, + { "backupconverted", &opt.backup_converted, cmd_boolean }, + { "backups", &opt.backups, cmd_number }, + { "base", &opt.base_href, cmd_string }, + { "bindaddress", &opt.bind_address, cmd_string }, +#ifdef HAVE_LIBCARES + { "binddnsaddress", &opt.bind_dns_address, cmd_string }, +#endif + { "bodydata", &opt.body_data, cmd_string }, + { "bodyfile", &opt.body_file, cmd_string }, +#ifdef HAVE_SSL + { "cacertificate", &opt.ca_cert, cmd_file }, +#endif + { "cache", &opt.allow_cache, cmd_boolean }, +#ifdef HAVE_SSL + { "cadirectory", &opt.ca_directory, cmd_directory }, + { "certificate", &opt.cert_file, cmd_file }, + { "certificatetype", &opt.cert_type, cmd_cert_type }, + { "checkcertificate", &opt.check_cert, cmd_check_cert }, +#endif + { "chooseconfig", &opt.choose_config, cmd_file }, +#ifdef HAVE_SSL + { "ciphers", &opt.tls_ciphers_string, cmd_string }, +#endif +#ifdef HAVE_LIBZ + { "compression", &opt.compression, cmd_spec_compression }, +#endif + { "connecttimeout", &opt.connect_timeout, cmd_time }, + { "contentdisposition", &opt.content_disposition, cmd_boolean }, + { "contentonerror", &opt.content_on_error, cmd_boolean }, + { "continue", &opt.always_rest, cmd_boolean }, + { "convertfileonly", &opt.convert_file_only, cmd_boolean }, + { "convertlinks", &opt.convert_links, cmd_boolean }, + { "cookies", &opt.cookies, cmd_boolean }, +#ifdef HAVE_SSL + { "crlfile", &opt.crl_file, cmd_file_once }, +#endif + { "cutdirs", &opt.cut_dirs, cmd_number }, + { "debug", &opt.debug, cmd_boolean }, + { "defaultpage", &opt.default_page, cmd_string }, + { "deleteafter", &opt.delete_after, cmd_boolean }, + { "dirprefix", &opt.dir_prefix, cmd_directory }, + { "dirstruct", NULL, cmd_spec_dirstruct }, + { "dnscache", &opt.dns_cache, cmd_boolean }, +#ifdef HAVE_LIBCARES + { "dnsservers", &opt.dns_servers, cmd_string }, +#endif + { "dnstimeout", &opt.dns_timeout, cmd_time }, + { "domains", &opt.domains, cmd_vector }, + { "dotbytes", &opt.dot_bytes, cmd_bytes }, + { "dotsinline", &opt.dots_in_line, cmd_number }, + { "dotspacing", &opt.dot_spacing, cmd_number }, + { "dotstyle", &opt.dot_style, cmd_string }, /* deprecated */ +#ifdef HAVE_SSL + { "egdfile", &opt.egd_file, cmd_file }, +#endif + { "excludedirectories", &opt.excludes, cmd_directory_vector }, + { "excludedomains", &opt.exclude_domains, cmd_vector }, + { "followftp", &opt.follow_ftp, cmd_boolean }, + { "followtags", &opt.follow_tags, cmd_vector }, + { "forcehtml", &opt.force_html, cmd_boolean }, + { "ftppasswd", &opt.ftp_passwd, cmd_string }, /* deprecated */ + { "ftppassword", &opt.ftp_passwd, cmd_string }, + { "ftpproxy", &opt.ftp_proxy, cmd_string }, +#ifdef HAVE_SSL + { "ftpscleardataconnection", &opt.ftps_clear_data_connection, cmd_boolean }, + { "ftpsfallbacktoftp", &opt.ftps_fallback_to_ftp, cmd_boolean }, + { "ftpsimplicit", &opt.ftps_implicit, cmd_boolean }, + { "ftpsresumessl", &opt.ftps_resume_ssl, cmd_boolean }, +#endif +#ifdef __VMS + { "ftpstmlf", &opt.ftp_stmlf, cmd_boolean }, +#endif /* def __VMS */ + { "ftpuser", &opt.ftp_user, cmd_string }, + { "glob", &opt.ftp_glob, cmd_boolean }, + { "header", NULL, cmd_spec_header }, +#ifdef HAVE_HSTS + { "hsts", &opt.hsts, cmd_boolean }, + { "hstsfile", &opt.hsts_file, cmd_file }, +#endif + { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */ + { "htmlify", NULL, cmd_spec_htmlify }, + { "httpkeepalive", &opt.http_keep_alive, cmd_boolean }, + { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */ + { "httppassword", &opt.http_passwd, cmd_string }, + { "httpproxy", &opt.http_proxy, cmd_string }, +#ifdef HAVE_SSL + { "httpsonly", &opt.https_only, cmd_boolean }, +#endif + { "httpsproxy", &opt.https_proxy, cmd_string }, + { "httpuser", &opt.http_user, cmd_string }, + { "ifmodifiedsince", &opt.if_modified_since, cmd_boolean }, + { "ignorecase", &opt.ignore_case, cmd_boolean }, + { "ignorelength", &opt.ignore_length, cmd_boolean }, + { "ignoretags", &opt.ignore_tags, cmd_vector }, + { "includedirectories", &opt.includes, cmd_directory_vector }, +#ifdef ENABLE_IPV6 + { "inet4only", &opt.ipv4_only, cmd_boolean }, + { "inet6only", &opt.ipv6_only, cmd_boolean }, +#endif + { "input", &opt.input_filename, cmd_file }, +#ifdef HAVE_METALINK + { "inputmetalink", &opt.input_metalink, cmd_file }, +#endif + { "iri", &opt.enable_iri, cmd_boolean }, + { "keepbadhash", &opt.keep_badhash, cmd_boolean }, + { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean }, + { "limitrate", &opt.limit_rate, cmd_bytes }, + { "loadcookies", &opt.cookies_input, cmd_file }, + { "localencoding", &opt.locale, cmd_string }, + { "logfile", &opt.lfilename, cmd_file }, + { "login", &opt.ftp_user, cmd_string },/* deprecated*/ + { "maxredirect", &opt.max_redirect, cmd_number }, +#ifdef HAVE_METALINK + { "metalinkindex", &opt.metalink_index, cmd_number_inf }, + { "metalinkoverhttp", &opt.metalink_over_http, cmd_boolean }, +#endif + { "method", &opt.method, cmd_string_uppercase }, + { "mirror", NULL, cmd_spec_mirror }, + { "netrc", &opt.netrc, cmd_boolean }, + { "noclobber", &opt.noclobber, cmd_boolean }, + { "noconfig", &opt.noconfig, cmd_boolean }, + { "noparent", &opt.no_parent, cmd_boolean }, + { "noproxy", &opt.no_proxy, cmd_vector }, + { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/ + { "outputdocument", &opt.output_document, cmd_file }, + { "pagerequisites", &opt.page_requisites, cmd_boolean }, + { "passiveftp", &opt.ftp_pasv, cmd_boolean }, + { "passwd", &opt.ftp_passwd, cmd_string },/* deprecated*/ + { "password", &opt.passwd, cmd_string }, +#ifdef HAVE_SSL + { "pinnedpubkey", &opt.pinnedpubkey, cmd_string }, +#endif + { "postdata", &opt.post_data, cmd_string }, + { "postfile", &opt.post_file_name, cmd_file }, + { "preferfamily", NULL, cmd_spec_prefer_family }, +#ifdef HAVE_METALINK + { "preferredlocation", &opt.preferred_location, cmd_string }, +#endif + { "preservepermissions", &opt.preserve_perm, cmd_boolean }, +#ifdef HAVE_SSL + { "privatekey", &opt.private_key, cmd_file }, + { "privatekeytype", &opt.private_key_type, cmd_cert_type }, +#endif + { "progress", &opt.progress_type, cmd_spec_progress }, + { "protocoldirectories", &opt.protocol_directories, cmd_boolean }, + { "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */ + { "proxypassword", &opt.proxy_passwd, cmd_string }, + { "proxyuser", &opt.proxy_user, cmd_string }, + { "quiet", &opt.quiet, cmd_boolean }, + { "quota", &opt.quota, cmd_bytes_sum }, +#ifdef HAVE_SSL + { "randomfile", &opt.random_file, cmd_file }, +#endif + { "randomwait", &opt.random_wait, cmd_boolean }, + { "readtimeout", &opt.read_timeout, cmd_time }, + { "reclevel", &opt.reclevel, cmd_number_inf }, + { "recursive", NULL, cmd_spec_recursive }, + { "referer", &opt.referer, cmd_string }, + { "regextype", &opt.regex_type, cmd_spec_regex_type }, + { "reject", &opt.rejects, cmd_vector }, + { "rejectedlog", &opt.rejected_log, cmd_file }, + { "rejectregex", &opt.rejectregex_s, cmd_string }, + { "relativeonly", &opt.relative_only, cmd_boolean }, + { "remoteencoding", &opt.encoding_remote, cmd_string }, + { "removelisting", &opt.remove_listing, cmd_boolean }, + { "reportspeed", &opt.report_bps, cmd_spec_report_speed}, + { "restrictfilenames", NULL, cmd_spec_restrict_file_names }, + { "retrsymlinks", &opt.retr_symlinks, cmd_boolean }, + { "retryconnrefused", &opt.retry_connrefused, cmd_boolean }, + { "retryonhosterror", &opt.retry_on_host_error, cmd_boolean }, + { "retryonhttperror", &opt.retry_on_http_error, cmd_string }, + { "robots", &opt.use_robots, cmd_boolean }, + { "savecookies", &opt.cookies_output, cmd_file }, + { "saveheaders", &opt.save_headers, cmd_boolean }, +#ifdef HAVE_SSL + { "secureprotocol", &opt.secure_protocol, cmd_spec_secure_protocol }, +#endif + { "serverresponse", &opt.server_response, cmd_boolean }, + { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean }, + { "showprogress", &opt.show_progress, cmd_spec_progressdisp }, + { "spanhosts", &opt.spanhost, cmd_boolean }, + { "spider", &opt.spider, cmd_boolean }, + { "startpos", &opt.start_pos, cmd_bytes }, + { "strictcomments", &opt.strict_comments, cmd_boolean }, + { "timeout", NULL, cmd_spec_timeout }, + { "timestamping", &opt.timestamping, cmd_boolean }, + { "tries", &opt.ntry, cmd_number_inf }, + { "trustservernames", &opt.trustservernames, cmd_boolean }, + { "unlink", &opt.unlink_requested, cmd_boolean }, +#ifndef __VMS + { "useaskpass" , &opt.use_askpass, cmd_use_askpass }, +#endif + { "useproxy", &opt.use_proxy, cmd_boolean }, + { "user", &opt.user, cmd_string }, + { "useragent", NULL, cmd_spec_useragent }, + { "useservertimestamps", &opt.useservertimestamps, cmd_boolean }, + { "verbose", NULL, cmd_spec_verbose }, + { "wait", &opt.wait, cmd_time }, + { "waitretry", &opt.waitretry, cmd_time }, + { "warccdx", &opt.warc_cdx_enabled, cmd_boolean }, + { "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file }, +#ifdef HAVE_LIBZ + { "warccompression", &opt.warc_compression_enabled, cmd_boolean }, +#endif + { "warcdigests", &opt.warc_digests_enabled, cmd_boolean }, + { "warcfile", &opt.warc_filename, cmd_file }, + { "warcheader", NULL, cmd_spec_warc_header }, + { "warckeeplog", &opt.warc_keep_log, cmd_boolean }, + { "warcmaxsize", &opt.warc_maxsize, cmd_bytes }, + { "warctempdir", &opt.warc_tempdir, cmd_directory }, +#ifdef USE_WATT32 + { "wdebug", &opt.wdebug, cmd_boolean }, +#endif +#ifdef ENABLE_XATTR + { "xattr", &opt.enable_xattr, cmd_boolean }, +#endif +}; + +/* Look up CMDNAME in the commands[] and return its position in the + array. If CMDNAME is not found, return -1. */ + +static int +command_by_name (const char *cmdname) +{ + /* Use binary search for speed. Wget has ~100 commands, which + guarantees a worst case performance of 7 string comparisons. */ + int lo = 0, hi = countof (commands) - 1; + + while (lo <= hi) + { + int mid = (lo + hi) >> 1; + int cmp = c_strcasecmp (cmdname, commands[mid].name); + if (cmp < 0) + hi = mid - 1; + else if (cmp > 0) + lo = mid + 1; + else + return mid; + } + return -1; +} + +/* Reset the variables to default values. */ +void +defaults (void) +{ + char *tmp; + + /* Most of the default values are 0 (and 0.0, NULL, and false). + Just reset everything, and fill in the non-zero values. Note + that initializing pointers to NULL this way is technically + illegal, but porting Wget to a machine where NULL is not all-zero + bit pattern will be the least of the implementors' worries. */ + xzero (opt); + +#ifdef HAVE_METALINK + opt.metalink_index = -1; +#endif + + opt.cookies = true; + opt.verbose = -1; + opt.ntry = 20; + opt.reclevel = 5; + opt.add_hostdir = true; + opt.netrc = true; + opt.ftp_glob = true; + opt.htmlify = true; + opt.http_keep_alive = true; + opt.use_proxy = true; + opt.convert_file_only = false; + tmp = getenv ("no_proxy"); + if (tmp) + opt.no_proxy = sepstring (tmp); + opt.prefer_family = prefer_none; + opt.allow_cache = true; + opt.if_modified_since = true; + + opt.read_timeout = 900; + opt.use_robots = true; + + opt.remove_listing = true; + + opt.dot_bytes = 1024; + opt.dot_spacing = 10; + opt.dots_in_line = 50; + + opt.dns_cache = true; + opt.ftp_pasv = true; + /* 2014-09-07 Darshit Shah <darnir@gmail.com> + * opt.retr_symlinks is set to true by default. Creating symbolic links on the + * local filesystem pose a security threat by malicious FTP Servers that + * server a specially crafted .listing file akin to this: + * + * lrwxrwxrwx 1 root root 33 Dec 25 2012 JoCxl6d8rFU -> / + * drwxrwxr-x 15 1024 106 4096 Aug 28 02:02 JoCxl6d8rFU + * + * A .listing file in this fashion makes Wget susceptiple to a symlink attack + * wherein the attacker is able to create arbitrary files, directories and + * symbolic links on the target system and even set permissions. + * + * Hence, by default Wget attempts to retrieve the pointed-to files and does + * not create the symbolic links locally. + */ + opt.retr_symlinks = true; + +#ifdef HAVE_SSL + opt.check_cert = CHECK_CERT_ON; + opt.ftps_resume_ssl = true; + opt.ftps_fallback_to_ftp = false; + opt.ftps_implicit = false; + opt.ftps_clear_data_connection = false; +#endif + +#ifdef HAVE_LIBZ + opt.compression = compression_none; +#endif + + /* The default for file name restriction defaults to the OS type. */ +#if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__) + opt.restrict_files_os = restrict_windows; +#elif defined(__VMS) + opt.restrict_files_os = restrict_vms; +#else + opt.restrict_files_os = restrict_unix; +#endif + opt.restrict_files_ctrl = true; + opt.restrict_files_nonascii = false; + opt.restrict_files_case = restrict_no_case_restriction; + + opt.regex_type = regex_type_posix; + + opt.max_redirect = 20; + + opt.waitretry = 10; + +#ifdef ENABLE_IRI + opt.enable_iri = true; +#else + opt.enable_iri = false; +#endif + opt.locale = NULL; + opt.encoding_remote = NULL; + + opt.useservertimestamps = true; + opt.show_all_dns_entries = false; + + opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */ +#ifdef HAVE_LIBZ + opt.warc_compression_enabled = true; +#else + opt.warc_compression_enabled = false; +#endif + opt.warc_digests_enabled = true; + opt.warc_cdx_enabled = false; + opt.warc_cdx_dedup_filename = NULL; + opt.warc_tempdir = NULL; + opt.warc_keep_log = true; + + /* Use a negative value to mark the absence of --start-pos option */ + opt.start_pos = -1; + opt.show_progress = -1; + opt.noscroll = false; + +#ifdef HAVE_HSTS + /* HSTS is enabled by default */ + opt.hsts = true; +#endif + + opt.enable_xattr = false; +} + +/* Return the user's home directory (strdup-ed), or NULL if none is + found. */ +char * +home_dir (void) +{ + static char *buf = NULL; + static char *home, *ret; + + if (!home) + { + home = getenv ("HOME"); + if (!home) + { +#if defined(MSDOS) + int len; + + /* Under MSDOS, if $HOME isn't defined, use the directory where + `wget.exe' resides. */ + const char *_w32_get_argv0 (void); /* in libwatt.a/pcconfig.c */ + char *p; + + buff = _w32_get_argv0 (); + + p = strrchr (buf, '/'); /* djgpp */ + if (!p) + p = strrchr (buf, '\\'); /* others */ + assert (p); + + len = p - buff + 1; + buff = strdup (_w32_get_argv0 ()); + + home = buf; +#elif !defined(WINDOWS) + /* If HOME is not defined, try getting it from the password + file. */ + struct passwd *pwd = getpwuid (getuid ()); + if (!pwd || !pwd->pw_dir) + return NULL; + home = pwd->pw_dir; +#else /* !WINDOWS */ + /* Under Windows, if $HOME isn't defined, use the directory where + `wget.exe' resides. */ + home = ws_mypath (); +#endif /* WINDOWS */ + } + } + + ret = home ? xstrdup (home) : NULL; + xfree (buf); + + return ret; +} + +/* Check the 'WGETRC' environment variable and return the file name + if 'WGETRC' is set and is a valid file. + If the `WGETRC' variable exists but the file does not exist, the + function will exit(). */ +char * +wgetrc_env_file_name (void) +{ + char *env = getenv ("WGETRC"); + if (env && *env) + { + file_stats_t flstat; + if (!file_exists_p (env, &flstat)) + { + fprintf (stderr, _("%s: WGETRC points to %s, which couldn't be accessed because of error: %s.\n"), + exec_name, env, strerror(flstat.access_err)); + exit (WGET_EXIT_GENERIC_ERROR); + } + return xstrdup (env); + } + return NULL; +} + +/* Append file name to (locally appropriate) directory spec. + Return pointer to allocated storage. */ +char * +ajoin_dir_file (const char *dir, const char *file) +{ + char *dir_file; +#ifdef __VMS + /* No separator: "dev:[dir]" + "name.type" */ + dir_file = aprintf ("%s%s", dir, file); +#else /* def __VMS */ + /* Slash separator: "/a/b" + "/" + "name.type" */ + dir_file = aprintf ("%s/%s", dir, file); +#endif /* def __VMS [else] */ + return dir_file; +} + +/* Check for the existence of '$HOME/.wgetrc' and return its path + if it exists and is set. */ +char * +wgetrc_user_file_name (void) +{ + char *file = NULL; + + /* Join opt.homedir ($HOME) and ".wgetrc" */ + if (opt.homedir) { + file = ajoin_dir_file(opt.homedir, ".wgetrc"); + } + + if (!file) + return NULL; +#ifndef FUZZING + if (!file_exists_p (file, NULL)) + { + xfree (file); + return NULL; + } +#endif + return file; +} + +/* Return the path to the user's .wgetrc. This is either the value of + `WGETRC' environment variable, or `$HOME/.wgetrc'. + + Additionally, for windows, look in the directory where wget.exe + resides. */ +char * +wgetrc_file_name (void) +{ + char *file = wgetrc_env_file_name (); + if (file && *file) + return file; + + file = wgetrc_user_file_name (); + +#ifdef WINDOWS + /* Under Windows, if we still haven't found .wgetrc, look for the file + `wget.ini' in the directory where `wget.exe' resides; we do this for + backward compatibility with previous versions of Wget. + SYSTEM_WGETRC should not be defined under WINDOWS. */ + if (!file) + { + const char *home = ws_mypath (); + if (home) + { + file = aprintf ("%s/wget.ini", home); + if (!file_exists_p (file, NULL)) + { + xfree (file); + } + } + } +#endif /* WINDOWS */ + + return file; +} + +/* Return values of parse_line. */ +enum parse_line { + line_ok, + line_empty, + line_syntax_error, + line_unknown_command +}; + +static enum parse_line parse_line (const char *, char **, char **, int *); +static bool setval_internal (int, const char *, const char *); +static bool setval_internal_tilde (int, const char *, const char *); + +/* Initialize variables from a wgetrc file. Returns zero (failure) if + there were errors in the file. */ + +bool +run_wgetrc (const char *file, file_stats_t *flstats) +{ + FILE *fp; + char *line = NULL; + size_t bufsize = 0; + int ln; + int errcnt = 0; + + fp = fopen_stat (file, "r", flstats); + if (!fp) + { + fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name, + file, strerror (errno)); + return true; /* not a fatal error */ + } + ln = 1; + while (getline (&line, &bufsize, fp) > 0) + { + char *com = NULL, *val = NULL; + int comind; + + /* Parse the line. */ + switch (parse_line (line, &com, &val, &comind)) + { + case line_ok: + /* If everything is OK, set the value. */ + if (!setval_internal_tilde (comind, com, val)) + { + fprintf (stderr, _("%s: Error in %s at line %d.\n"), + exec_name, file, ln); + ++errcnt; + } + break; + case line_syntax_error: + fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"), + exec_name, file, ln); + ++errcnt; + break; + case line_unknown_command: + fprintf (stderr, _("%s: Unknown command %s in %s at line %d.\n"), + exec_name, quote (com), file, ln); + ++errcnt; + break; + case line_empty: + break; + default: + abort (); + } + xfree (com); + xfree (val); + ++ln; + } + xfree (line); + fclose (fp); + + return errcnt == 0; +} + +/* Initialize the defaults and run the system wgetrc and user's own + wgetrc. */ +int +initialize (void) +{ + char *env_sysrc; + file_stats_t flstats; + bool ok = true; + + memset(&flstats, 0, sizeof(flstats)); + /* Run a non-standard system rc file when the according environment + variable has been set. For internal testing purposes only! */ + env_sysrc = getenv ("SYSTEM_WGETRC"); + if (env_sysrc && file_exists_p (env_sysrc, &flstats)) + { + ok &= run_wgetrc (env_sysrc, &flstats); + /* If there are any problems parsing the system wgetrc file, tell + the user and exit */ + if (! ok) + { + fprintf (stderr, _("\ +Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\ +'%s',\n\ +or specify a different file using --config.\n"), env_sysrc); + return WGET_EXIT_PARSE_ERROR; + } + } + /* Otherwise, if SYSTEM_WGETRC is defined, use it. */ +#ifdef SYSTEM_WGETRC + else if (file_exists_p (SYSTEM_WGETRC, &flstats)) + ok &= run_wgetrc (SYSTEM_WGETRC, &flstats); + /* If there are any problems parsing the system wgetrc file, tell + the user and exit */ + if (! ok) + { + fprintf (stderr, _("\ +Parsing system wgetrc file failed. Please check\n\ +'%s',\n\ +or specify a different file using --config.\n"), SYSTEM_WGETRC); + return WGET_EXIT_PARSE_ERROR; + } +#endif + /* Override it with your own, if one exists. */ + opt.wgetrcfile = wgetrc_file_name (); + if (!opt.wgetrcfile) + return 0; + /* #### We should canonicalize `file' and SYSTEM_WGETRC with + something like realpath() before comparing them with `strcmp' */ +#ifdef SYSTEM_WGETRC + if (!strcmp (opt.wgetrcfile, SYSTEM_WGETRC)) + { + fprintf (stderr, _("\ +%s: Warning: Both system and user wgetrc point to %s.\n"), + exec_name, quote (opt.wgetrcfile)); + } + else +#endif +#ifndef FUZZING + if (file_exists_p (opt.wgetrcfile, &flstats)) +#endif + ok &= run_wgetrc (opt.wgetrcfile, &flstats); + + xfree (opt.wgetrcfile); + + /* If there were errors processing either `.wgetrc', abort. */ + if (!ok) + return WGET_EXIT_PARSE_ERROR; + + return 0; +} + +/* Remove dashes and underscores from S, modifying S in the + process. */ + +static void +dehyphen (char *s) +{ + char *t = s; /* t - tortoise */ + char *h = s; /* h - hare */ + while (*h) + if (*h == '_' || *h == '-') + ++h; + else + *t++ = *h++; + *t = '\0'; +} + +/* Parse the line pointed by line, with the syntax: + <sp>* command <sp>* = <sp>* value <sp>* + Uses malloc to allocate space for command and value. + + Returns one of line_ok, line_empty, line_syntax_error, or + line_unknown_command. + + In case of line_ok, *COM and *VAL point to freshly allocated + strings, and *COMIND points to com's index. In case of error or + empty line, their values are unmodified. */ + +static enum parse_line +parse_line (const char *line, char **com, char **val, int *comind) +{ + const char *p; + const char *end = line + strlen (line); + const char *cmdstart, *cmdend; + const char *valstart, *valend; + char buf[1024]; + size_t len; + + char *cmdcopy; + int ind; + + /* Skip leading and trailing whitespace. */ + while (*line && c_isspace (*line)) + ++line; + while (end > line && c_isspace (end[-1])) + --end; + + /* Skip empty lines and comments. */ + if (!*line || *line == '#') + return line_empty; + + p = line; + + cmdstart = p; + while (p < end && (c_isalnum (*p) || *p == '_' || *p == '-')) + ++p; + cmdend = p; + + /* Skip '=', as well as any space before or after it. */ + while (p < end && c_isspace (*p)) + ++p; + if (p == end || *p != '=') + return line_syntax_error; + ++p; + while (p < end && c_isspace (*p)) + ++p; + + valstart = p; + valend = end; + + /* The syntax is valid (even though the command might not be). Fill + in the command name and value. */ + *com = strdupdelim (cmdstart, cmdend); + *val = strdupdelim (valstart, valend); + + /* The line now known to be syntactically correct. Check whether + the command is valid. */ + len = cmdend - cmdstart; + if (len < sizeof (buf)) + cmdcopy = buf; + else + cmdcopy = xmalloc (len + 1); + memcpy (cmdcopy, cmdstart, len); + cmdcopy[len] = 0; + + dehyphen (cmdcopy); + ind = command_by_name (cmdcopy); + if (cmdcopy != buf) + xfree (cmdcopy); + if (ind == -1) + return line_unknown_command; + + /* Report success to the caller. */ + *comind = ind; + return line_ok; +} + +#if defined(WINDOWS) || defined(MSDOS) +# define ISSEP(c) ((c) == '/' || (c) == '\\') +# define SEPSTRING "/\\" +#else +# define ISSEP(c) ((c) == '/') +# define SEPSTRING "/" +#endif + +/* Run commands[comind].action. */ + +static bool +setval_internal (int comind, const char *com, const char *val) +{ + assert (0 <= comind && ((size_t) comind) < countof (commands)); + + if ((unsigned) comind >= countof (commands)) + return NULL; + + DEBUGP (("Setting %s (%s) to %s\n", com, commands[comind].name, val)); + return commands[comind].action (com, val, commands[comind].place); +} + +static bool +setval_internal_tilde (int comind, const char *com, const char *val) +{ + bool ret; + int homelen; + char **pstring; + ret = setval_internal (comind, com, val); + + /* We make tilde expansion for cmd_file and cmd_directory */ + if (((commands[comind].action == cmd_file) || + (commands[comind].action == cmd_directory)) + && ret && (*val == '~' && ISSEP (val[1]))) + { + pstring = commands[comind].place; + if (opt.homedir) + { + char *home = xstrdup(opt.homedir); + homelen = strlen (home); + while (homelen && ISSEP (home[homelen - 1])) + home[--homelen] = '\0'; + + xfree (*pstring); + + /* Skip the leading "~/". */ + val += strspn(val + 1, SEPSTRING) + 1; + *pstring = concat_strings (home, "/", val, (char *)0); + xfree (home); + } + } + return ret; +} + +/* Run command COM with value VAL. If running the command produces an + error, report the error and exit. + + This is intended to be called from main to modify Wget's behavior + through command-line switches. Since COM is hard-coded in main, + it is not canonicalized, and this aborts when COM is not found. + + If COMIND's are exported to init.h, this function will be changed + to accept COMIND directly. */ + +void +setoptval (const char *com, const char *val, const char *optname) +{ + /* Prepend "--" to OPTNAME. */ + char dd_optname[2 + MAX_LONGOPTION + 1]; + + if ((unsigned) snprintf(dd_optname, sizeof (dd_optname), "--%s", optname) > sizeof (dd_optname)) + exit (WGET_EXIT_PARSE_ERROR); + + assert (val != NULL); + + if (!setval_internal (command_by_name (com), dd_optname, val)) + exit (WGET_EXIT_PARSE_ERROR); +} + +/* Parse OPT into command and value and run it. For example, + run_command("foo=bar") is equivalent to setoptval("foo", "bar"). + This is used by the `--execute' flag in main.c. */ + +void +run_command (const char *cmdopt) +{ + char *com, *val; + int comind; + switch (parse_line (cmdopt, &com, &val, &comind)) + { + case line_ok: + if (!setval_internal (comind, com, val)) + exit (WGET_EXIT_PARSE_ERROR); + xfree (com); + xfree (val); + break; + default: + fprintf (stderr, _("%s: Invalid --execute command %s\n"), + exec_name, quote (cmdopt)); + exit (WGET_EXIT_PARSE_ERROR); + } +} + +/* Generic helper functions, for use with `commands'. */ + +/* Forward declarations: */ +struct decode_item { + const char *name; + int code; +}; +static bool decode_string (const char *, const struct decode_item *, int, int *); +static bool simple_atof (const char *, const char *, double *); + +#define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0') + +#define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \ + && c_tolower((p)[1]) == (c1) \ + && (p)[2] == '\0') + +#define CMP3(p, c0, c1, c2) (c_tolower((p)[0]) == (c0) \ + && c_tolower((p)[1]) == (c1) \ + && c_tolower((p)[2]) == (c2) \ + && (p)[3] == '\0') + + +static int +cmd_boolean_internal (const char *com _GL_UNUSED, const char *val, void *place _GL_UNUSED) +{ + if (CMP2 (val, 'o', 'n') || CMP3 (val, 'y', 'e', 's') || CMP1 (val, '1')) + /* "on", "yes" and "1" mean true. */ + return 1; + else if (CMP3 (val, 'o', 'f', 'f') || CMP2 (val, 'n', 'o') || CMP1 (val, '0')) + /* "off", "no" and "0" mean false. */ + return 0; + return -1; +} + +/* Store the boolean value from VAL to PLACE. COM is ignored, + except for error messages. */ +static bool +cmd_boolean (const char *com, const char *val, void *place) +{ + bool value; + + switch (cmd_boolean_internal (com, val, place)) + { + case 0: + value = false; + break; + + case 1: + value = true; + break; + + default: + { + fprintf (stderr, + _("%s: %s: Invalid boolean %s; use `on' or `off'.\n"), + exec_name, com, quote (val)); + return false; + } + } + *(bool *) place = value; + return true; +} + +/* Store the check_cert value from VAL to PLACE. COM is ignored, + except for error messages. */ +static bool +cmd_check_cert (const char *com, const char *val, void *place) +{ + int value; + + switch (cmd_boolean_internal (com, val, place)) + { + case 0: + value = CHECK_CERT_OFF; + break; + + case 1: + value = CHECK_CERT_ON; + break; + + default: + { + if (!c_strcasecmp (val, "quiet")) + value = CHECK_CERT_QUIET; + else + { + fprintf (stderr, + _("%s: %s: Invalid %s; use `on', `off' or `quiet'.\n"), + exec_name, com, quote (val)); + return false; + } + } + } + *(int *) place = value; + return true; +} + +/* Set the non-negative integer value from VAL to PLACE. With + incorrect specification, the number remains unchanged. */ +static bool +cmd_number (const char *com, const char *val, void *place) +{ + long l = strtol(val, NULL, 10); + + if (((l == LONG_MIN || l == LONG_MAX) && errno == ERANGE) + || l < 0 || l > INT_MAX) + { + fprintf (stderr, _("%s: %s: Invalid number %s.\n"), + exec_name, com, quote (val)); + return false; + } + *(int *) place = (int) l; + return true; +} + +/* Similar to cmd_number(), only accepts `inf' as a synonym for 0. */ +static bool +cmd_number_inf (const char *com, const char *val, void *place) +{ + if (!c_strcasecmp (val, "inf")) + { + *(int *) place = 0; + return true; + } + return cmd_number (com, val, place); +} + +/* Copy (strdup) the string at COM to a new location and place a + pointer to *PLACE. */ +static bool +cmd_string (const char *com _GL_UNUSED, const char *val, void *place) +{ + char **pstring = (char **)place; + + xfree (*pstring); + *pstring = xstrdup (val); + return true; +} + +/* Like cmd_string but ensure the string is upper case. */ +static bool +cmd_string_uppercase (const char *com _GL_UNUSED, const char *val, void *place) +{ + char *q, **pstring; + pstring = (char **)place; + xfree (*pstring); + + *pstring = xmalloc (strlen (val) + 1); + + for (q = *pstring; *val; val++, q++) + *q = c_toupper (*val); + + *q = '\0'; + return true; +} + + +/* Like cmd_string, but handles tilde-expansion when reading a user's + `.wgetrc'. In that case, and if VAL begins with `~', the tilde + gets expanded to the user's home directory. */ +static bool +cmd_file (const char *com _GL_UNUSED, const char *val, void *place) +{ + char **pstring = (char **)place; + + xfree (*pstring); + + /* #### If VAL is empty, perhaps should set *PLACE to NULL. */ + + *pstring = xstrdup (val); + +#if defined(WINDOWS) || defined(MSDOS) + /* Convert "\" to "/". */ + { + char *s; + for (s = *pstring; *s; s++) + if (*s == '\\') + *s = '/'; + } +#endif + return true; +} + +/* like cmd_file, but insist on just a single option usage */ +static bool +cmd_file_once (const char *com _GL_UNUSED, const char *val, void *place) +{ + if (*(char **)place) + { + fprintf (stderr, _("%s: %s must only be used once\n"), + exec_name, com); + return false; + } + + return cmd_file(com, val, place); +} + +/* Like cmd_file, but strips trailing '/' characters. */ +static bool +cmd_directory (const char *com, const char *val, void *place) +{ + char *s, *t; + + /* Call cmd_file() for tilde expansion and separator + canonicalization (backslash -> slash under Windows). These + things should perhaps be in a separate function. */ + if (!cmd_file (com, val, place)) + return false; + + s = *(char **)place; + t = s + strlen (s); + while (t > s && *--t == '/') + *t = '\0'; + + return true; +} + +/* Split VAL by space to a vector of values, and append those values + to vector pointed to by the PLACE argument. If VAL is empty, the + PLACE vector is cleared instead. */ + +static bool +cmd_vector (const char *com _GL_UNUSED, const char *val, void *place) +{ + char ***pvec = (char ***)place; + + if (*val) + *pvec = merge_vecs (*pvec, sepstring (val)); + else + { + free_vec (*pvec); + *pvec = NULL; + } + return true; +} + +static bool +cmd_directory_vector (const char *com _GL_UNUSED, const char *val, void *place) +{ + char ***pvec = (char ***)place; + + if (*val) + { + /* Strip the trailing slashes from directories. */ + char **t, **seps; + + seps = sepstring (val); + for (t = seps; t && *t; t++) + { + int len = strlen (*t); + /* Skip degenerate case of root directory. */ + if (len > 1) + { + if ((*t)[len - 1] == '/') + (*t)[len - 1] = '\0'; + } + } + *pvec = merge_vecs (*pvec, seps); + } + else + { + free_vec (*pvec); + *pvec = NULL; + } + return true; +} + +/* Engine for cmd_bytes and cmd_bytes_sum: converts a string such as + "100k" or "2.5G" to a floating point number. */ + +static bool +parse_bytes_helper (const char *val, double *result) +{ + double number, mult; + const char *end = val + strlen (val); + + /* Check for "inf". */ + if (0 == strcmp (val, "inf")) + { + *result = 0; + return true; + } + + /* Strip trailing whitespace. */ + while (val < end && c_isspace (end[-1])) + --end; + if (val == end) + return false; + + switch (c_tolower (end[-1])) + { + case 'k': + --end, mult = 1024.0; + break; + case 'm': + --end, mult = 1048576.0; + break; + case 'g': + --end, mult = 1073741824.0; + break; + case 't': + --end, mult = 1099511627776.0; + break; + default: + /* Not a recognized suffix: assume it's a digit. (If not, + simple_atof will raise an error.) */ + mult = 1; + } + + /* Skip leading and trailing whitespace. */ + while (val < end && c_isspace (*val)) + ++val; + while (val < end && c_isspace (end[-1])) + --end; + if (val == end) + return false; + + if (!simple_atof (val, end, &number) || number < 0) + return false; + + *result = number * mult; + return true; +} + +/* Parse VAL as a number and set its value to PLACE (which should + point to a wgint). + + By default, the value is assumed to be in bytes. If "K", "M", or + "G" are appended, the value is multiplied with 1<<10, 1<<20, or + 1<<30, respectively. Floating point values are allowed and are + cast to integer before use. The idea is to be able to use things + like 1.5k instead of "1536". + + The string "inf" is returned as 0. + + In case of error, false is returned and memory pointed to by PLACE + remains unmodified. */ + +static bool +cmd_bytes (const char *com, const char *val, void *place) +{ + double byte_value; + if (!parse_bytes_helper (val, &byte_value)) + { + fprintf (stderr, _("%s: %s: Invalid byte value %s\n"), + exec_name, com, quote (val)); + return false; + } + *(wgint *)place = (wgint)byte_value; + return true; +} + +/* Like cmd_bytes, but PLACE is interpreted as a pointer to + SIZE_SUM. It works by converting the string to double, therefore + working with values up to 2^53-1 without loss of precision. This + value (8192 TB) is large enough to serve for a while. */ + +static bool +cmd_bytes_sum (const char *com, const char *val, void *place) +{ + double byte_value; + + if (!parse_bytes_helper (val, &byte_value) + || byte_value < LONG_MIN || byte_value > LONG_MAX) + { + fprintf (stderr, _("%s: %s: Invalid byte value %s\n"), + exec_name, com, quote (val)); + return false; + } + *(SUM_SIZE_INT *) place = (SUM_SIZE_INT) byte_value; + return true; +} + +/* Store the value of VAL to *OUT. The value is a time period, by + default expressed in seconds, but also accepting suffixes "m", "h", + "d", and "w" for minutes, hours, days, and weeks respectively. */ + +static bool +cmd_time (const char *com, const char *val, void *place) +{ + double number, mult; + const char *end = val + strlen (val); + + /* Strip trailing whitespace. */ + while (val < end && c_isspace (end[-1])) + --end; + + if (val == end) + { + err: + fprintf (stderr, _("%s: %s: Invalid time period %s\n"), + exec_name, com, quote (val)); + return false; + } + + switch (c_tolower (end[-1])) + { + case 's': + --end, mult = 1; /* seconds */ + break; + case 'm': + --end, mult = 60; /* minutes */ + break; + case 'h': + --end, mult = 3600; /* hours */ + break; + case 'd': + --end, mult = 86400.0; /* days */ + break; + case 'w': + --end, mult = 604800.0; /* weeks */ + break; + default: + /* Not a recognized suffix: assume it belongs to the number. + (If not, simple_atof will raise an error.) */ + mult = 1; + } + + /* Skip leading and trailing whitespace. */ + while (val < end && c_isspace (*val)) + ++val; + while (val < end && c_isspace (end[-1])) + --end; + if (val == end) + goto err; + + if (!simple_atof (val, end, &number)) + goto err; + + if (number < 0) + { + fprintf (stderr, _("%s: %s: Negative time period %s\n"), + exec_name, com, quote (val)); + return false; + } + + *(double *)place = number * mult; + return true; +} + + +static bool +cmd_use_askpass (const char *com _GL_UNUSED, const char *val, void *place) +{ + const char *env_name = "WGET_ASKPASS"; + const char *env; + + if (val && *val) + return cmd_string (com, val, place); + + env = getenv (env_name); + if (!(env && *env)) + { + env_name = "SSH_ASKPASS"; + env = getenv (env_name); + } + + if (!(env && *env)) + { + fprintf (stderr, _("use-askpass requires a string or either environment variable WGET_ASKPASS or SSH_ASKPASS to be set.\n")); + return false; + } + + return cmd_string (com, env, place); +} + +#ifdef HAVE_SSL +static bool +cmd_cert_type (const char *com, const char *val, void *place) +{ + static const struct decode_item choices[] = { + { "pem", keyfile_pem }, + { "der", keyfile_asn1 }, + { "asn1", keyfile_asn1 }, + }; + int ok = decode_string (val, choices, countof (choices), place); + if (!ok) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + return ok; +} +#endif + +/* Specialized helper functions, used by `commands' to handle some + options specially. */ + +static bool check_user_specified_header (const char *); + +#ifdef HAVE_LIBZ +static bool +cmd_spec_compression (const char *com, const char *val, void *place) +{ + static const struct decode_item choices[] = { + { "auto", compression_auto }, + { "gzip", compression_gzip }, + { "none", compression_none }, + }; + int ok = decode_string (val, choices, countof (choices), place); + if (!ok) + { + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, + quote (val)); + } + return ok; +} +#endif + +static bool +cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + if (!cmd_boolean (com, val, &opt.dirstruct)) + return false; + /* Since dirstruct behaviour is explicitly changed, no_dirstruct + must be affected inversely. */ + if (opt.dirstruct) + opt.no_dirstruct = false; + else + opt.no_dirstruct = true; + return true; +} + +static bool +cmd_spec_header (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + /* Empty value means reset the list of headers. */ + if (*val == '\0') + { + free_vec (opt.user_headers); + opt.user_headers = NULL; + return true; + } + + if (!check_user_specified_header (val)) + { + fprintf (stderr, _("%s: %s: Invalid header %s.\n"), + exec_name, com, quote (val)); + return false; + } + opt.user_headers = vec_append (opt.user_headers, val); + return true; +} + +static bool +cmd_spec_warc_header (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + /* Empty value means reset the list of headers. */ + if (*val == '\0') + { + free_vec (opt.warc_user_headers); + opt.warc_user_headers = NULL; + return true; + } + + if (!check_user_specified_header (val)) + { + fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"), + exec_name, com, quote (val)); + return false; + } + opt.warc_user_headers = vec_append (opt.warc_user_headers, val); + return true; +} + +static bool +cmd_spec_htmlify (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + int flag = cmd_boolean (com, val, &opt.htmlify); + if (flag && !opt.htmlify) + opt.remove_listing = false; + return flag; +} + +/* Set the "mirror" mode. It means: recursive download, timestamping, + no limit on max. recursion depth, and don't remove listings. */ + +static bool +cmd_spec_mirror (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + bool mirror; + + if (!cmd_boolean (com, val, &mirror)) + return false; + if (mirror) + { + opt.recursive = true; + if (!opt.no_dirstruct) + opt.dirstruct = true; + opt.timestamping = true; + opt.reclevel = INFINITE_RECURSION; + opt.remove_listing = false; + } + return true; +} + +/* Validate --prefer-family and set the choice. Allowed values are + "IPv4", "IPv6", and "none". */ + +static bool +cmd_spec_prefer_family (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + static const struct decode_item choices[] = { + { "IPv4", prefer_ipv4 }, + { "IPv6", prefer_ipv6 }, + { "none", prefer_none }, + }; + int prefer_family = prefer_none; + int ok = decode_string (val, choices, countof (choices), &prefer_family); + if (!ok) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + opt.prefer_family = prefer_family; + return ok; +} + +/* Set progress.type to VAL, but verify that it's a valid progress + implementation before that. */ + +static bool +cmd_spec_progress (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + if (!valid_progress_implementation_p (val)) + { + fprintf (stderr, _("%s: %s: Invalid progress type %s.\n"), + exec_name, com, quote (val)); + return false; + } + xfree (opt.progress_type); + + /* Don't call set_progress_implementation here. It will be called + in main when it becomes clear what the log output is. */ + opt.progress_type = xstrdup (val); + return true; +} + +/* Set opt.recursive to VAL as with cmd_boolean. If opt.recursive is + set to true, also set opt.dirstruct to true, unless opt.no_dirstruct + is specified. */ + +static bool +cmd_spec_recursive (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + if (!cmd_boolean (com, val, &opt.recursive)) + return false; + else + { + if (opt.recursive && !opt.no_dirstruct) + opt.dirstruct = true; + } + return true; +} + +/* Validate --regex-type and set the choice. */ + +static bool +cmd_spec_regex_type (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + static const struct decode_item choices[] = { + { "posix", regex_type_posix }, +#if defined HAVE_LIBPCRE || defined HAVE_LIBPCRE2 + { "pcre", regex_type_pcre }, +#endif + }; + int regex_type = regex_type_posix; + int ok = decode_string (val, choices, countof (choices), ®ex_type); + if (!ok) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + opt.regex_type = regex_type; + return ok; +} + +static bool +cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + int restrict_os = opt.restrict_files_os; + int restrict_ctrl = opt.restrict_files_ctrl; + int restrict_case = opt.restrict_files_case; + int restrict_nonascii = opt.restrict_files_nonascii; + + const char *end; + +#define VAL_IS(string_literal) BOUNDED_EQUAL (val, end, string_literal) + + do + { + end = strchr (val, ','); + if (!end) + end = val + strlen (val); + + if (VAL_IS ("unix")) + restrict_os = restrict_unix; + else if (VAL_IS ("vms")) + restrict_os = restrict_vms; + else if (VAL_IS ("windows")) + restrict_os = restrict_windows; + else if (VAL_IS ("lowercase")) + restrict_case = restrict_lowercase; + else if (VAL_IS ("uppercase")) + restrict_case = restrict_uppercase; + else if (VAL_IS ("nocontrol")) + restrict_ctrl = false; + else if (VAL_IS ("ascii")) + restrict_nonascii = true; + else + { + fprintf (stderr, _("\ +%s: %s: Invalid restriction %s,\n\ + use [unix|vms|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"), + exec_name, com, quote (val)); + return false; + } + + if (*end) + val = end + 1; + } + while (*val && *end); + +#undef VAL_IS + + opt.restrict_files_os = restrict_os; + opt.restrict_files_ctrl = restrict_ctrl; + opt.restrict_files_case = restrict_case; + opt.restrict_files_nonascii = restrict_nonascii; + + return true; +} + +static bool +cmd_spec_report_speed (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + opt.report_bps = c_strcasecmp (val, "bits") == 0; + if (!opt.report_bps) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + return opt.report_bps; +} + +#ifdef HAVE_SSL +static bool +cmd_spec_secure_protocol (const char *com, const char *val, void *place) +{ + static const struct decode_item choices[] = { + { "auto", secure_protocol_auto }, + { "sslv2", secure_protocol_sslv2 }, + { "sslv3", secure_protocol_sslv3 }, + { "tlsv1", secure_protocol_tlsv1 }, + { "tlsv1_1", secure_protocol_tlsv1_1 }, + { "tlsv1_2", secure_protocol_tlsv1_2 }, + { "tlsv1_3", secure_protocol_tlsv1_3 }, + { "pfs", secure_protocol_pfs }, + }; + int ok = decode_string (val, choices, countof (choices), place); + if (!ok) + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val)); + return ok; +} +#endif + +/* Set all three timeout values. */ + +static bool +cmd_spec_timeout (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + double value; + if (!cmd_time (com, val, &value)) + return false; + opt.read_timeout = value; + opt.connect_timeout = value; + opt.dns_timeout = value; + return true; +} + +static bool +cmd_spec_useragent (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + /* Disallow embedded newlines. */ + if (strchr (val, '\n')) + { + fprintf (stderr, _("%s: %s: Invalid value %s.\n"), + exec_name, com, quote (val)); + return false; + } + xfree (opt.useragent); + opt.useragent = xstrdup (val); + return true; +} + +/* The --show-progress option is not a cmd_boolean since we need to keep track + * of whether the user explicitly requested the option or not. -1 means + * uninitialized. */ +static bool +cmd_spec_progressdisp (const char *com, const char *val, void *place _GL_UNUSED) +{ + bool flag; + if (cmd_boolean (com, val, &flag)) + { + opt.show_progress = flag; + return true; + } + return false; +} + + +/* The "verbose" option cannot be cmd_boolean because the variable is + not bool -- it's of type int (-1 means uninitialized because of + some random hackery for disallowing -q -v). */ + +static bool +cmd_spec_verbose (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + bool flag; + if (cmd_boolean (com, val, &flag)) + { + opt.verbose = flag; + opt.show_progress = -1; + return true; + } + return false; +} + +/* Miscellaneous useful routines. */ + +/* Trivial atof, with error reporting. Handles "<digits>[.<digits>]", + doesn't handle exponential notation. Returns true on success, + false on failure. In case of success, stores its result to + *DEST. */ + +static bool +simple_atof (const char *beg, const char *end, double *dest) +{ + double result = 0; + + bool negative = false; + bool seen_dot = false; + bool seen_digit = false; + double divider = 1; + + const char *p = beg; + + while (p < end && c_isspace (*p)) + ++p; + if (p < end && (*p == '-' || *p == '+')) + { + negative = (*p == '-'); + ++p; + } + + for (; p < end; p++) + { + char ch = *p; + if (c_isdigit (ch)) + { + if (!seen_dot) + result = (10 * result) + (ch - '0'); + else + result += (ch - '0') / (divider *= 10); + seen_digit = true; + } + else if (ch == '.') + { + if (!seen_dot) + seen_dot = true; + else + return false; + } + else + return false; + } + if (!seen_digit) + return false; + if (negative) + result = -result; + + *dest = result; + return true; +} + +/* Verify that the user-specified header in S is valid. It must + contain a colon preceded by non-white-space characters and must not + contain newlines. */ + +static bool +check_user_specified_header (const char *s) +{ + const char *p; + + for (p = s; *p && *p != ':' && !c_isspace (*p); p++) + ; + /* The header MUST contain `:' preceded by at least one + non-whitespace character. */ + if (*p != ':' || p == s) + return false; + /* The header MUST NOT contain newlines. */ + if (strchr (s, '\n')) + return false; + return true; +} + +/* Decode VAL into a number, according to ITEMS. */ + +static bool +decode_string (const char *val, const struct decode_item *items, int itemcount, + int *place) +{ + int i; + for (i = 0; i < itemcount; i++) + if (0 == c_strcasecmp (val, items[i].name)) + { + *place = items[i].code; + return true; + } + return false; +} + +extern struct ptimer *timer; +extern int cleaned_up; + +/* Free the memory allocated by global variables. */ +void +cleanup (void) +{ + /* Free external resources, close files, etc. */ + + if (cleaned_up++) + return; /* cleanup() must not be called twice */ + + /* Close WARC file. */ + if (opt.warc_filename != 0) + warc_close (); + + log_close (); + + if (output_stream && output_stream != stderr) + { + FILE *fp = output_stream; + output_stream = NULL; + if (fclose (fp) == EOF) + inform_exit_status (CLOSEFAILED); + } + + /* No need to check for error because Wget flushes its output (and + checks for errors) after any data arrives. */ + + /* We're exiting anyway so there's no real need to call free() + hundreds of times. Skipping the frees will make Wget exit + faster. + * + However, when detecting leaks, it's crucial to free() everything + because then you can find the real leaks, i.e. the allocated + memory which grows with the size of the program. */ + +#if defined DEBUG_MALLOC || defined TESTING + convert_cleanup (); + res_cleanup (); + http_cleanup (); + cleanup_html_url (); + spider_cleanup (); + host_cleanup (); + log_cleanup (); + netrc_cleanup (); +#ifdef HAVE_SSL + ssl_cleanup (); +#endif + connect_cleanup (); + + xfree (opt.choose_config); + xfree (opt.lfilename); + xfree (opt.dir_prefix); + xfree (opt.input_filename); +#ifdef HAVE_METALINK + xfree (opt.input_metalink); + xfree (opt.preferred_location); +#endif + xfree (opt.output_document); + xfree (opt.default_page); + if (opt.regex_type == regex_type_posix) + { + if (opt.acceptregex) + regfree (opt.acceptregex); + if (opt.rejectregex) + regfree (opt.rejectregex); + } + xfree (opt.acceptregex); + xfree (opt.rejectregex); + xfree (opt.acceptregex_s); + xfree (opt.rejectregex_s); + free_vec (opt.accepts); + free_vec (opt.rejects); + free_vec ((char **)opt.excludes); + free_vec ((char **)opt.includes); + free_vec (opt.domains); + free_vec (opt.exclude_domains); + free_vec (opt.follow_tags); + free_vec (opt.ignore_tags); + xfree (opt.progress_type); + xfree (opt.warc_filename); + xfree (opt.warc_tempdir); + xfree (opt.warc_cdx_dedup_filename); + xfree (opt.ftp_user); + xfree (opt.ftp_passwd); + xfree (opt.ftp_proxy); + xfree (opt.https_proxy); + xfree (opt.http_proxy); + free_vec (opt.no_proxy); + xfree (opt.proxy_user); + xfree (opt.proxy_passwd); + xfree (opt.useragent); + xfree (opt.referer); + xfree (opt.http_user); + xfree (opt.http_passwd); + xfree (opt.dot_style); + free_vec (opt.user_headers); + free_vec (opt.warc_user_headers); +# ifdef HAVE_SSL + xfree (opt.cert_file); + xfree (opt.private_key); + xfree (opt.ca_directory); + xfree (opt.ca_cert); + xfree (opt.crl_file); + xfree (opt.pinnedpubkey); + xfree (opt.random_file); + xfree (opt.egd_file); +# endif + xfree (opt.bind_address); + xfree (opt.cookies_input); + xfree (opt.cookies_output); + xfree (opt.user); + xfree (opt.passwd); + xfree (opt.base_href); + xfree (opt.method); + xfree (opt.post_file_name); + xfree (opt.post_data); + xfree (opt.body_data); + xfree (opt.body_file); + xfree (opt.rejected_log); + xfree (opt.use_askpass); + xfree (opt.retry_on_http_error); + + xfree (opt.encoding_remote); + xfree (opt.locale); +#ifdef HAVE_HSTS + xfree (opt.hsts_file); +#endif + + xfree (opt.wgetrcfile); + xfree (opt.homedir); + xfree (exec_name); + xfree (program_argstring); + ptimer_destroy (timer); timer = NULL; + +#ifdef HAVE_LIBCARES +#include <ares.h> + { + extern ares_channel ares; + + xfree (opt.bind_dns_address); + xfree (opt.dns_servers); + ares_destroy (ares); + ares_library_cleanup (); + } +#endif + + quotearg_free (); + +#endif /* DEBUG_MALLOC || TESTING */ +} + +/* Unit testing routines. */ + +#ifdef TESTING + +const char * +test_commands_sorted(void) +{ + unsigned i; + + for (i = 1; i < countof(commands); ++i) + { + if (c_strcasecmp (commands[i - 1].name, commands[i].name) > 0) + { + mu_assert ("FAILED", false); + break; + } + } + return NULL; +} + +const char * +test_cmd_spec_restrict_file_names(void) +{ + unsigned i; + static const struct { + const char *val; + int expected_restrict_files_os; + bool expected_restrict_files_ctrl; + int expected_restrict_files_case; + bool result; + } test_array[] = { + { "windows", restrict_windows, true, restrict_no_case_restriction, true }, + { "windows,", restrict_windows, true, restrict_no_case_restriction, true }, + { "windows,lowercase", restrict_windows, true, restrict_lowercase, true }, + { "unix,nocontrol,lowercase,", restrict_unix, false, restrict_lowercase, true }, + }; + + for (i = 0; i < countof(test_array); ++i) + { + bool res; + + defaults(); + res = cmd_spec_restrict_file_names ("dummy", test_array[i].val, NULL); + + /* + fprintf (stderr, "test_cmd_spec_restrict_file_names: TEST %d\n", i); fflush (stderr); + fprintf (stderr, "opt.restrict_files_os: %d\n", opt.restrict_files_os); fflush (stderr); + fprintf (stderr, "opt.restrict_files_ctrl: %d\n", opt.restrict_files_ctrl); fflush (stderr); + fprintf (stderr, "opt.restrict_files_case: %d\n", opt.restrict_files_case); fflush (stderr); + */ + mu_assert ("test_cmd_spec_restrict_file_names: wrong result", + res == test_array[i].result + && (int) opt.restrict_files_os == test_array[i].expected_restrict_files_os + && opt.restrict_files_ctrl == test_array[i].expected_restrict_files_ctrl + && (int) opt.restrict_files_case == test_array[i].expected_restrict_files_case); + } + + return NULL; +} + +#endif /* TESTING */ |