diff options
Diffstat (limited to 'lib/ansible/modules/unarchive.py')
-rw-r--r-- | lib/ansible/modules/unarchive.py | 1115 |
1 files changed, 1115 insertions, 0 deletions
diff --git a/lib/ansible/modules/unarchive.py b/lib/ansible/modules/unarchive.py new file mode 100644 index 0000000..26890b5 --- /dev/null +++ b/lib/ansible/modules/unarchive.py @@ -0,0 +1,1115 @@ +# -*- coding: utf-8 -*- + +# Copyright: (c) 2012, Michael DeHaan <michael.dehaan@gmail.com> +# Copyright: (c) 2013, Dylan Martin <dmartin@seattlecentral.edu> +# Copyright: (c) 2015, Toshio Kuratomi <tkuratomi@ansible.com> +# Copyright: (c) 2016, Dag Wieers <dag@wieers.com> +# Copyright: (c) 2017, Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +from __future__ import absolute_import, division, print_function +__metaclass__ = type + + +DOCUMENTATION = r''' +--- +module: unarchive +version_added: '1.4' +short_description: Unpacks an archive after (optionally) copying it from the local machine +description: + - The C(unarchive) module unpacks an archive. It will not unpack a compressed file that does not contain an archive. + - By default, it will copy the source file from the local system to the target before unpacking. + - Set C(remote_src=yes) to unpack an archive which already exists on the target. + - If checksum validation is desired, use M(ansible.builtin.get_url) or M(ansible.builtin.uri) instead to fetch the file and set C(remote_src=yes). + - For Windows targets, use the M(community.windows.win_unzip) module instead. +options: + src: + description: + - If C(remote_src=no) (default), local path to archive file to copy to the target server; can be absolute or relative. If C(remote_src=yes), path on the + target server to existing archive file to unpack. + - If C(remote_src=yes) and C(src) contains C(://), the remote machine will download the file from the URL first. (version_added 2.0). This is only for + simple cases, for full download support use the M(ansible.builtin.get_url) module. + type: path + required: true + dest: + description: + - Remote absolute path where the archive should be unpacked. + - The given path must exist. Base directory is not created by this module. + type: path + required: true + copy: + description: + - If true, the file is copied from local controller to the managed (remote) node, otherwise, the plugin will look for src archive on the managed machine. + - This option has been deprecated in favor of C(remote_src). + - This option is mutually exclusive with C(remote_src). + type: bool + default: yes + creates: + description: + - If the specified absolute path (file or directory) already exists, this step will B(not) be run. + - The specified absolute path (file or directory) must be below the base path given with C(dest:). + type: path + version_added: "1.6" + io_buffer_size: + description: + - Size of the volatile memory buffer that is used for extracting files from the archive in bytes. + type: int + default: 65536 + version_added: "2.12" + list_files: + description: + - If set to True, return the list of files that are contained in the tarball. + type: bool + default: no + version_added: "2.0" + exclude: + description: + - List the directory and file entries that you would like to exclude from the unarchive action. + - Mutually exclusive with C(include). + type: list + default: [] + elements: str + version_added: "2.1" + include: + description: + - List of directory and file entries that you would like to extract from the archive. If C(include) + is not empty, only files listed here will be extracted. + - Mutually exclusive with C(exclude). + type: list + default: [] + elements: str + version_added: "2.11" + keep_newer: + description: + - Do not replace existing files that are newer than files from the archive. + type: bool + default: no + version_added: "2.1" + extra_opts: + description: + - Specify additional options by passing in an array. + - Each space-separated command-line option should be a new element of the array. See examples. + - Command-line options with multiple elements must use multiple lines in the array, one for each element. + type: list + elements: str + default: "" + version_added: "2.1" + remote_src: + description: + - Set to C(true) to indicate the archived file is already on the remote system and not local to the Ansible controller. + - This option is mutually exclusive with C(copy). + type: bool + default: no + version_added: "2.2" + validate_certs: + description: + - This only applies if using a https URL as the source of the file. + - This should only set to C(false) used on personally controlled sites using self-signed certificate. + - Prior to 2.2 the code worked as if this was set to C(true). + type: bool + default: yes + version_added: "2.2" +extends_documentation_fragment: +- action_common_attributes +- action_common_attributes.flow +- action_common_attributes.files +- decrypt +- files +attributes: + action: + support: full + async: + support: none + bypass_host_loop: + support: none + check_mode: + support: partial + details: Not supported for gzipped tar files. + diff_mode: + support: partial + details: Uses gtar's C(--diff) arg to calculate if changed or not. If this C(arg) is not supported, it will always unpack the archive. + platform: + platforms: posix + safe_file_operations: + support: none + vault: + support: full +todo: + - Re-implement tar support using native tarfile module. + - Re-implement zip support using native zipfile module. +notes: + - Requires C(zipinfo) and C(gtar)/C(unzip) command on target host. + - Requires C(zstd) command on target host to expand I(.tar.zst) files. + - Can handle I(.zip) files using C(unzip) as well as I(.tar), I(.tar.gz), I(.tar.bz2), I(.tar.xz), and I(.tar.zst) files using C(gtar). + - Does not handle I(.gz) files, I(.bz2) files, I(.xz), or I(.zst) files that do not contain a I(.tar) archive. + - Existing files/directories in the destination which are not in the archive + are not touched. This is the same behavior as a normal archive extraction. + - Existing files/directories in the destination which are not in the archive + are ignored for purposes of deciding if the archive should be unpacked or not. +seealso: +- module: community.general.archive +- module: community.general.iso_extract +- module: community.windows.win_unzip +author: Michael DeHaan +''' + +EXAMPLES = r''' +- name: Extract foo.tgz into /var/lib/foo + ansible.builtin.unarchive: + src: foo.tgz + dest: /var/lib/foo + +- name: Unarchive a file that is already on the remote machine + ansible.builtin.unarchive: + src: /tmp/foo.zip + dest: /usr/local/bin + remote_src: yes + +- name: Unarchive a file that needs to be downloaded (added in 2.0) + ansible.builtin.unarchive: + src: https://example.com/example.zip + dest: /usr/local/bin + remote_src: yes + +- name: Unarchive a file with extra options + ansible.builtin.unarchive: + src: /tmp/foo.zip + dest: /usr/local/bin + extra_opts: + - --transform + - s/^xxx/yyy/ +''' + +RETURN = r''' +dest: + description: Path to the destination directory. + returned: always + type: str + sample: /opt/software +files: + description: List of all the files in the archive. + returned: When I(list_files) is True + type: list + sample: '["file1", "file2"]' +gid: + description: Numerical ID of the group that owns the destination directory. + returned: always + type: int + sample: 1000 +group: + description: Name of the group that owns the destination directory. + returned: always + type: str + sample: "librarians" +handler: + description: Archive software handler used to extract and decompress the archive. + returned: always + type: str + sample: "TgzArchive" +mode: + description: String that represents the octal permissions of the destination directory. + returned: always + type: str + sample: "0755" +owner: + description: Name of the user that owns the destination directory. + returned: always + type: str + sample: "paul" +size: + description: The size of destination directory in bytes. Does not include the size of files or subdirectories contained within. + returned: always + type: int + sample: 36 +src: + description: + - The source archive's path. + - If I(src) was a remote web URL, or from the local ansible controller, this shows the temporary location where the download was stored. + returned: always + type: str + sample: "/home/paul/test.tar.gz" +state: + description: State of the destination. Effectively always "directory". + returned: always + type: str + sample: "directory" +uid: + description: Numerical ID of the user that owns the destination directory. + returned: always + type: int + sample: 1000 +''' + +import binascii +import codecs +import datetime +import fnmatch +import grp +import os +import platform +import pwd +import re +import stat +import time +import traceback +from functools import partial +from zipfile import ZipFile, BadZipfile + +from ansible.module_utils._text import to_bytes, to_native, to_text +from ansible.module_utils.basic import AnsibleModule +from ansible.module_utils.common.process import get_bin_path +from ansible.module_utils.common.locale import get_best_parsable_locale +from ansible.module_utils.urls import fetch_file + +try: # python 3.3+ + from shlex import quote # type: ignore[attr-defined] +except ImportError: # older python + from pipes import quote + +# String from tar that shows the tar contents are different from the +# filesystem +OWNER_DIFF_RE = re.compile(r': Uid differs$') +GROUP_DIFF_RE = re.compile(r': Gid differs$') +MODE_DIFF_RE = re.compile(r': Mode differs$') +MOD_TIME_DIFF_RE = re.compile(r': Mod time differs$') +# NEWER_DIFF_RE = re.compile(r' is newer or same age.$') +EMPTY_FILE_RE = re.compile(r': : Warning: Cannot stat: No such file or directory$') +MISSING_FILE_RE = re.compile(r': Warning: Cannot stat: No such file or directory$') +ZIP_FILE_MODE_RE = re.compile(r'([r-][w-][SsTtx-]){3}') +INVALID_OWNER_RE = re.compile(r': Invalid owner') +INVALID_GROUP_RE = re.compile(r': Invalid group') + + +def crc32(path, buffer_size): + ''' Return a CRC32 checksum of a file ''' + + crc = binascii.crc32(b'') + with open(path, 'rb') as f: + for b_block in iter(partial(f.read, buffer_size), b''): + crc = binascii.crc32(b_block, crc) + return crc & 0xffffffff + + +def shell_escape(string): + ''' Quote meta-characters in the args for the unix shell ''' + return re.sub(r'([^A-Za-z0-9_])', r'\\\1', string) + + +class UnarchiveError(Exception): + pass + + +class ZipArchive(object): + + def __init__(self, src, b_dest, file_args, module): + self.src = src + self.b_dest = b_dest + self.file_args = file_args + self.opts = module.params['extra_opts'] + self.module = module + self.io_buffer_size = module.params["io_buffer_size"] + self.excludes = module.params['exclude'] + self.includes = [] + self.include_files = self.module.params['include'] + self.cmd_path = None + self.zipinfo_cmd_path = None + self._files_in_archive = [] + self._infodict = dict() + self.zipinfoflag = '' + self.binaries = ( + ('unzip', 'cmd_path'), + ('zipinfo', 'zipinfo_cmd_path'), + ) + + def _permstr_to_octal(self, modestr, umask): + ''' Convert a Unix permission string (rw-r--r--) into a mode (0644) ''' + revstr = modestr[::-1] + mode = 0 + for j in range(0, 3): + for i in range(0, 3): + if revstr[i + 3 * j] in ['r', 'w', 'x', 's', 't']: + mode += 2 ** (i + 3 * j) + # The unzip utility does not support setting the stST bits +# if revstr[i + 3 * j] in ['s', 't', 'S', 'T' ]: +# mode += 2 ** (9 + j) + return (mode & ~umask) + + def _legacy_file_list(self): + rc, out, err = self.module.run_command([self.cmd_path, '-v', self.src]) + if rc: + raise UnarchiveError('Neither python zipfile nor unzip can read %s' % self.src) + + for line in out.splitlines()[3:-2]: + fields = line.split(None, 7) + self._files_in_archive.append(fields[7]) + self._infodict[fields[7]] = int(fields[6]) + + def _crc32(self, path): + if self._infodict: + return self._infodict[path] + + try: + archive = ZipFile(self.src) + except BadZipfile as e: + if e.args[0].lower().startswith('bad magic number'): + # Python2.4 can't handle zipfiles with > 64K files. Try using + # /usr/bin/unzip instead + self._legacy_file_list() + else: + raise + else: + try: + for item in archive.infolist(): + self._infodict[item.filename] = int(item.CRC) + except Exception: + archive.close() + raise UnarchiveError('Unable to list files in the archive') + + return self._infodict[path] + + @property + def files_in_archive(self): + if self._files_in_archive: + return self._files_in_archive + + self._files_in_archive = [] + try: + archive = ZipFile(self.src) + except BadZipfile as e: + if e.args[0].lower().startswith('bad magic number'): + # Python2.4 can't handle zipfiles with > 64K files. Try using + # /usr/bin/unzip instead + self._legacy_file_list() + else: + raise + else: + try: + for member in archive.namelist(): + if self.include_files: + for include in self.include_files: + if fnmatch.fnmatch(member, include): + self._files_in_archive.append(to_native(member)) + else: + exclude_flag = False + if self.excludes: + for exclude in self.excludes: + if fnmatch.fnmatch(member, exclude): + exclude_flag = True + break + if not exclude_flag: + self._files_in_archive.append(to_native(member)) + except Exception as e: + archive.close() + raise UnarchiveError('Unable to list files in the archive: %s' % to_native(e)) + + archive.close() + return self._files_in_archive + + def is_unarchived(self): + # BSD unzip doesn't support zipinfo listings with timestamp. + if self.zipinfoflag: + cmd = [self.zipinfo_cmd_path, self.zipinfoflag, '-T', '-s', self.src] + else: + cmd = [self.zipinfo_cmd_path, '-T', '-s', self.src] + + if self.excludes: + cmd.extend(['-x', ] + self.excludes) + if self.include_files: + cmd.extend(self.include_files) + rc, out, err = self.module.run_command(cmd) + + old_out = out + diff = '' + out = '' + if rc == 0: + unarchived = True + else: + unarchived = False + + # Get some information related to user/group ownership + umask = os.umask(0) + os.umask(umask) + systemtype = platform.system() + + # Get current user and group information + groups = os.getgroups() + run_uid = os.getuid() + run_gid = os.getgid() + try: + run_owner = pwd.getpwuid(run_uid).pw_name + except (TypeError, KeyError): + run_owner = run_uid + try: + run_group = grp.getgrgid(run_gid).gr_name + except (KeyError, ValueError, OverflowError): + run_group = run_gid + + # Get future user ownership + fut_owner = fut_uid = None + if self.file_args['owner']: + try: + tpw = pwd.getpwnam(self.file_args['owner']) + except KeyError: + try: + tpw = pwd.getpwuid(int(self.file_args['owner'])) + except (TypeError, KeyError, ValueError): + tpw = pwd.getpwuid(run_uid) + fut_owner = tpw.pw_name + fut_uid = tpw.pw_uid + else: + try: + fut_owner = run_owner + except Exception: + pass + fut_uid = run_uid + + # Get future group ownership + fut_group = fut_gid = None + if self.file_args['group']: + try: + tgr = grp.getgrnam(self.file_args['group']) + except (ValueError, KeyError): + try: + # no need to check isdigit() explicitly here, if we fail to + # parse, the ValueError will be caught. + tgr = grp.getgrgid(int(self.file_args['group'])) + except (KeyError, ValueError, OverflowError): + tgr = grp.getgrgid(run_gid) + fut_group = tgr.gr_name + fut_gid = tgr.gr_gid + else: + try: + fut_group = run_group + except Exception: + pass + fut_gid = run_gid + + for line in old_out.splitlines(): + change = False + + pcs = line.split(None, 7) + if len(pcs) != 8: + # Too few fields... probably a piece of the header or footer + continue + + # Check first and seventh field in order to skip header/footer + if len(pcs[0]) != 7 and len(pcs[0]) != 10: + continue + if len(pcs[6]) != 15: + continue + + # Possible entries: + # -rw-rws--- 1.9 unx 2802 t- defX 11-Aug-91 13:48 perms.2660 + # -rw-a-- 1.0 hpf 5358 Tl i4:3 4-Dec-91 11:33 longfilename.hpfs + # -r--ahs 1.1 fat 4096 b- i4:2 14-Jul-91 12:58 EA DATA. SF + # --w------- 1.0 mac 17357 bx i8:2 4-May-92 04:02 unzip.macr + if pcs[0][0] not in 'dl-?' or not frozenset(pcs[0][1:]).issubset('rwxstah-'): + continue + + ztype = pcs[0][0] + permstr = pcs[0][1:] + version = pcs[1] + ostype = pcs[2] + size = int(pcs[3]) + path = to_text(pcs[7], errors='surrogate_or_strict') + + # Skip excluded files + if path in self.excludes: + out += 'Path %s is excluded on request\n' % path + continue + + # Itemized change requires L for symlink + if path[-1] == '/': + if ztype != 'd': + err += 'Path %s incorrectly tagged as "%s", but is a directory.\n' % (path, ztype) + ftype = 'd' + elif ztype == 'l': + ftype = 'L' + elif ztype == '-': + ftype = 'f' + elif ztype == '?': + ftype = 'f' + + # Some files may be storing FAT permissions, not Unix permissions + # For FAT permissions, we will use a base permissions set of 777 if the item is a directory or has the execute bit set. Otherwise, 666. + # This permission will then be modified by the system UMask. + # BSD always applies the Umask, even to Unix permissions. + # For Unix style permissions on Linux or Mac, we want to use them directly. + # So we set the UMask for this file to zero. That permission set will then be unchanged when calling _permstr_to_octal + + if len(permstr) == 6: + if path[-1] == '/': + permstr = 'rwxrwxrwx' + elif permstr == 'rwx---': + permstr = 'rwxrwxrwx' + else: + permstr = 'rw-rw-rw-' + file_umask = umask + elif 'bsd' in systemtype.lower(): + file_umask = umask + else: + file_umask = 0 + + # Test string conformity + if len(permstr) != 9 or not ZIP_FILE_MODE_RE.match(permstr): + raise UnarchiveError('ZIP info perm format incorrect, %s' % permstr) + + # DEBUG +# err += "%s%s %10d %s\n" % (ztype, permstr, size, path) + + b_dest = os.path.join(self.b_dest, to_bytes(path, errors='surrogate_or_strict')) + try: + st = os.lstat(b_dest) + except Exception: + change = True + self.includes.append(path) + err += 'Path %s is missing\n' % path + diff += '>%s++++++.?? %s\n' % (ftype, path) + continue + + # Compare file types + if ftype == 'd' and not stat.S_ISDIR(st.st_mode): + change = True + self.includes.append(path) + err += 'File %s already exists, but not as a directory\n' % path + diff += 'c%s++++++.?? %s\n' % (ftype, path) + continue + + if ftype == 'f' and not stat.S_ISREG(st.st_mode): + change = True + unarchived = False + self.includes.append(path) + err += 'Directory %s already exists, but not as a regular file\n' % path + diff += 'c%s++++++.?? %s\n' % (ftype, path) + continue + + if ftype == 'L' and not stat.S_ISLNK(st.st_mode): + change = True + self.includes.append(path) + err += 'Directory %s already exists, but not as a symlink\n' % path + diff += 'c%s++++++.?? %s\n' % (ftype, path) + continue + + itemized = list('.%s.......??' % ftype) + + # Note: this timestamp calculation has a rounding error + # somewhere... unzip and this timestamp can be one second off + # When that happens, we report a change and re-unzip the file + dt_object = datetime.datetime(*(time.strptime(pcs[6], '%Y%m%d.%H%M%S')[0:6])) + timestamp = time.mktime(dt_object.timetuple()) + + # Compare file timestamps + if stat.S_ISREG(st.st_mode): + if self.module.params['keep_newer']: + if timestamp > st.st_mtime: + change = True + self.includes.append(path) + err += 'File %s is older, replacing file\n' % path + itemized[4] = 't' + elif stat.S_ISREG(st.st_mode) and timestamp < st.st_mtime: + # Add to excluded files, ignore other changes + out += 'File %s is newer, excluding file\n' % path + self.excludes.append(path) + continue + else: + if timestamp != st.st_mtime: + change = True + self.includes.append(path) + err += 'File %s differs in mtime (%f vs %f)\n' % (path, timestamp, st.st_mtime) + itemized[4] = 't' + + # Compare file sizes + if stat.S_ISREG(st.st_mode) and size != st.st_size: + change = True + err += 'File %s differs in size (%d vs %d)\n' % (path, size, st.st_size) + itemized[3] = 's' + + # Compare file checksums + if stat.S_ISREG(st.st_mode): + crc = crc32(b_dest, self.io_buffer_size) + if crc != self._crc32(path): + change = True + err += 'File %s differs in CRC32 checksum (0x%08x vs 0x%08x)\n' % (path, self._crc32(path), crc) + itemized[2] = 'c' + + # Compare file permissions + + # Do not handle permissions of symlinks + if ftype != 'L': + + # Use the new mode provided with the action, if there is one + if self.file_args['mode']: + if isinstance(self.file_args['mode'], int): + mode = self.file_args['mode'] + else: + try: + mode = int(self.file_args['mode'], 8) + except Exception as e: + try: + mode = AnsibleModule._symbolic_mode_to_octal(st, self.file_args['mode']) + except ValueError as e: + self.module.fail_json(path=path, msg="%s" % to_native(e), exception=traceback.format_exc()) + # Only special files require no umask-handling + elif ztype == '?': + mode = self._permstr_to_octal(permstr, 0) + else: + mode = self._permstr_to_octal(permstr, file_umask) + + if mode != stat.S_IMODE(st.st_mode): + change = True + itemized[5] = 'p' + err += 'Path %s differs in permissions (%o vs %o)\n' % (path, mode, stat.S_IMODE(st.st_mode)) + + # Compare file user ownership + owner = uid = None + try: + owner = pwd.getpwuid(st.st_uid).pw_name + except (TypeError, KeyError): + uid = st.st_uid + + # If we are not root and requested owner is not our user, fail + if run_uid != 0 and (fut_owner != run_owner or fut_uid != run_uid): + raise UnarchiveError('Cannot change ownership of %s to %s, as user %s' % (path, fut_owner, run_owner)) + + if owner and owner != fut_owner: + change = True + err += 'Path %s is owned by user %s, not by user %s as expected\n' % (path, owner, fut_owner) + itemized[6] = 'o' + elif uid and uid != fut_uid: + change = True + err += 'Path %s is owned by uid %s, not by uid %s as expected\n' % (path, uid, fut_uid) + itemized[6] = 'o' + + # Compare file group ownership + group = gid = None + try: + group = grp.getgrgid(st.st_gid).gr_name + except (KeyError, ValueError, OverflowError): + gid = st.st_gid + + if run_uid != 0 and (fut_group != run_group or fut_gid != run_gid) and fut_gid not in groups: + raise UnarchiveError('Cannot change group ownership of %s to %s, as user %s' % (path, fut_group, run_owner)) + + if group and group != fut_group: + change = True + err += 'Path %s is owned by group %s, not by group %s as expected\n' % (path, group, fut_group) + itemized[6] = 'g' + elif gid and gid != fut_gid: + change = True + err += 'Path %s is owned by gid %s, not by gid %s as expected\n' % (path, gid, fut_gid) + itemized[6] = 'g' + + # Register changed files and finalize diff output + if change: + if path not in self.includes: + self.includes.append(path) + diff += '%s %s\n' % (''.join(itemized), path) + + if self.includes: + unarchived = False + + # DEBUG +# out = old_out + out + + return dict(unarchived=unarchived, rc=rc, out=out, err=err, cmd=cmd, diff=diff) + + def unarchive(self): + cmd = [self.cmd_path, '-o'] + if self.opts: + cmd.extend(self.opts) + cmd.append(self.src) + # NOTE: Including (changed) files as arguments is problematic (limits on command line/arguments) + # if self.includes: + # NOTE: Command unzip has this strange behaviour where it expects quoted filenames to also be escaped + # cmd.extend(map(shell_escape, self.includes)) + if self.excludes: + cmd.extend(['-x'] + self.excludes) + if self.include_files: + cmd.extend(self.include_files) + cmd.extend(['-d', self.b_dest]) + rc, out, err = self.module.run_command(cmd) + return dict(cmd=cmd, rc=rc, out=out, err=err) + + def can_handle_archive(self): + missing = [] + for b in self.binaries: + try: + setattr(self, b[1], get_bin_path(b[0])) + except ValueError: + missing.append(b[0]) + + if missing: + return False, "Unable to find required '{missing}' binary in the path.".format(missing="' or '".join(missing)) + + cmd = [self.cmd_path, '-l', self.src] + rc, out, err = self.module.run_command(cmd) + if rc == 0: + return True, None + return False, 'Command "%s" could not handle archive: %s' % (self.cmd_path, err) + + +class TgzArchive(object): + + def __init__(self, src, b_dest, file_args, module): + self.src = src + self.b_dest = b_dest + self.file_args = file_args + self.opts = module.params['extra_opts'] + self.module = module + if self.module.check_mode: + self.module.exit_json(skipped=True, msg="remote module (%s) does not support check mode when using gtar" % self.module._name) + self.excludes = [path.rstrip('/') for path in self.module.params['exclude']] + self.include_files = self.module.params['include'] + self.cmd_path = None + self.tar_type = None + self.zipflag = '-z' + self._files_in_archive = [] + + def _get_tar_type(self): + cmd = [self.cmd_path, '--version'] + (rc, out, err) = self.module.run_command(cmd) + tar_type = None + if out.startswith('bsdtar'): + tar_type = 'bsd' + elif out.startswith('tar') and 'GNU' in out: + tar_type = 'gnu' + return tar_type + + @property + def files_in_archive(self): + if self._files_in_archive: + return self._files_in_archive + + cmd = [self.cmd_path, '--list', '-C', self.b_dest] + if self.zipflag: + cmd.append(self.zipflag) + if self.opts: + cmd.extend(['--show-transformed-names'] + self.opts) + if self.excludes: + cmd.extend(['--exclude=' + f for f in self.excludes]) + cmd.extend(['-f', self.src]) + if self.include_files: + cmd.extend(self.include_files) + + locale = get_best_parsable_locale(self.module) + rc, out, err = self.module.run_command(cmd, cwd=self.b_dest, environ_update=dict(LANG=locale, LC_ALL=locale, LC_MESSAGES=locale, LANGUAGE=locale)) + if rc != 0: + raise UnarchiveError('Unable to list files in the archive: %s' % err) + + for filename in out.splitlines(): + # Compensate for locale-related problems in gtar output (octal unicode representation) #11348 + # filename = filename.decode('string_escape') + filename = to_native(codecs.escape_decode(filename)[0]) + + # We don't allow absolute filenames. If the user wants to unarchive rooted in "/" + # they need to use "dest: '/'". This follows the defaults for gtar, pax, etc. + # Allowing absolute filenames here also causes bugs: https://github.com/ansible/ansible/issues/21397 + if filename.startswith('/'): + filename = filename[1:] + + exclude_flag = False + if self.excludes: + for exclude in self.excludes: + if fnmatch.fnmatch(filename, exclude): + exclude_flag = True + break + + if not exclude_flag: + self._files_in_archive.append(to_native(filename)) + + return self._files_in_archive + + def is_unarchived(self): + cmd = [self.cmd_path, '--diff', '-C', self.b_dest] + if self.zipflag: + cmd.append(self.zipflag) + if self.opts: + cmd.extend(['--show-transformed-names'] + self.opts) + if self.file_args['owner']: + cmd.append('--owner=' + quote(self.file_args['owner'])) + if self.file_args['group']: + cmd.append('--group=' + quote(self.file_args['group'])) + if self.module.params['keep_newer']: + cmd.append('--keep-newer-files') + if self.excludes: + cmd.extend(['--exclude=' + f for f in self.excludes]) + cmd.extend(['-f', self.src]) + if self.include_files: + cmd.extend(self.include_files) + locale = get_best_parsable_locale(self.module) + rc, out, err = self.module.run_command(cmd, cwd=self.b_dest, environ_update=dict(LANG=locale, LC_ALL=locale, LC_MESSAGES=locale, LANGUAGE=locale)) + + # Check whether the differences are in something that we're + # setting anyway + + # What is different + unarchived = True + old_out = out + out = '' + run_uid = os.getuid() + # When unarchiving as a user, or when owner/group/mode is supplied --diff is insufficient + # Only way to be sure is to check request with what is on disk (as we do for zip) + # Leave this up to set_fs_attributes_if_different() instead of inducing a (false) change + for line in old_out.splitlines() + err.splitlines(): + # FIXME: Remove the bogus lines from error-output as well ! + # Ignore bogus errors on empty filenames (when using --split-component) + if EMPTY_FILE_RE.search(line): + continue + if run_uid == 0 and not self.file_args['owner'] and OWNER_DIFF_RE.search(line): + out += line + '\n' + if run_uid == 0 and not self.file_args['group'] and GROUP_DIFF_RE.search(line): + out += line + '\n' + if not self.file_args['mode'] and MODE_DIFF_RE.search(line): + out += line + '\n' + if MOD_TIME_DIFF_RE.search(line): + out += line + '\n' + if MISSING_FILE_RE.search(line): + out += line + '\n' + if INVALID_OWNER_RE.search(line): + out += line + '\n' + if INVALID_GROUP_RE.search(line): + out += line + '\n' + if out: + unarchived = False + return dict(unarchived=unarchived, rc=rc, out=out, err=err, cmd=cmd) + + def unarchive(self): + cmd = [self.cmd_path, '--extract', '-C', self.b_dest] + if self.zipflag: + cmd.append(self.zipflag) + if self.opts: + cmd.extend(['--show-transformed-names'] + self.opts) + if self.file_args['owner']: + cmd.append('--owner=' + quote(self.file_args['owner'])) + if self.file_args['group']: + cmd.append('--group=' + quote(self.file_args['group'])) + if self.module.params['keep_newer']: + cmd.append('--keep-newer-files') + if self.excludes: + cmd.extend(['--exclude=' + f for f in self.excludes]) + cmd.extend(['-f', self.src]) + if self.include_files: + cmd.extend(self.include_files) + locale = get_best_parsable_locale(self.module) + rc, out, err = self.module.run_command(cmd, cwd=self.b_dest, environ_update=dict(LANG=locale, LC_ALL=locale, LC_MESSAGES=locale, LANGUAGE=locale)) + return dict(cmd=cmd, rc=rc, out=out, err=err) + + def can_handle_archive(self): + # Prefer gtar (GNU tar) as it supports the compression options -z, -j and -J + try: + self.cmd_path = get_bin_path('gtar') + except ValueError: + # Fallback to tar + try: + self.cmd_path = get_bin_path('tar') + except ValueError: + return False, "Unable to find required 'gtar' or 'tar' binary in the path" + + self.tar_type = self._get_tar_type() + + if self.tar_type != 'gnu': + return False, 'Command "%s" detected as tar type %s. GNU tar required.' % (self.cmd_path, self.tar_type) + + try: + if self.files_in_archive: + return True, None + except UnarchiveError as e: + return False, 'Command "%s" could not handle archive: %s' % (self.cmd_path, to_native(e)) + # Errors and no files in archive assume that we weren't able to + # properly unarchive it + return False, 'Command "%s" found no files in archive. Empty archive files are not supported.' % self.cmd_path + + +# Class to handle tar files that aren't compressed +class TarArchive(TgzArchive): + def __init__(self, src, b_dest, file_args, module): + super(TarArchive, self).__init__(src, b_dest, file_args, module) + # argument to tar + self.zipflag = '' + + +# Class to handle bzip2 compressed tar files +class TarBzipArchive(TgzArchive): + def __init__(self, src, b_dest, file_args, module): + super(TarBzipArchive, self).__init__(src, b_dest, file_args, module) + self.zipflag = '-j' + + +# Class to handle xz compressed tar files +class TarXzArchive(TgzArchive): + def __init__(self, src, b_dest, file_args, module): + super(TarXzArchive, self).__init__(src, b_dest, file_args, module) + self.zipflag = '-J' + + +# Class to handle zstd compressed tar files +class TarZstdArchive(TgzArchive): + def __init__(self, src, b_dest, file_args, module): + super(TarZstdArchive, self).__init__(src, b_dest, file_args, module) + # GNU Tar supports the --use-compress-program option to + # specify which executable to use for + # compression/decompression. + # + # Note: some flavors of BSD tar support --zstd (e.g., FreeBSD + # 12.2), but the TgzArchive class only supports GNU Tar. + self.zipflag = '--use-compress-program=zstd' + + +class ZipZArchive(ZipArchive): + def __init__(self, src, b_dest, file_args, module): + super(ZipZArchive, self).__init__(src, b_dest, file_args, module) + self.zipinfoflag = '-Z' + self.binaries = ( + ('unzip', 'cmd_path'), + ('unzip', 'zipinfo_cmd_path'), + ) + + def can_handle_archive(self): + unzip_available, error_msg = super(ZipZArchive, self).can_handle_archive() + + if not unzip_available: + return unzip_available, error_msg + + # Ensure unzip -Z is available before we use it in is_unarchive + cmd = [self.zipinfo_cmd_path, self.zipinfoflag] + rc, out, err = self.module.run_command(cmd) + if 'zipinfo' in out.lower(): + return True, None + return False, 'Command "unzip -Z" could not handle archive: %s' % err + + +# try handlers in order and return the one that works or bail if none work +def pick_handler(src, dest, file_args, module): + handlers = [ZipArchive, ZipZArchive, TgzArchive, TarArchive, TarBzipArchive, TarXzArchive, TarZstdArchive] + reasons = set() + for handler in handlers: + obj = handler(src, dest, file_args, module) + (can_handle, reason) = obj.can_handle_archive() + if can_handle: + return obj + reasons.add(reason) + reason_msg = '\n'.join(reasons) + module.fail_json(msg='Failed to find handler for "%s". Make sure the required command to extract the file is installed.\n%s' % (src, reason_msg)) + + +def main(): + module = AnsibleModule( + # not checking because of daisy chain to file module + argument_spec=dict( + src=dict(type='path', required=True), + dest=dict(type='path', required=True), + remote_src=dict(type='bool', default=False), + creates=dict(type='path'), + list_files=dict(type='bool', default=False), + keep_newer=dict(type='bool', default=False), + exclude=dict(type='list', elements='str', default=[]), + include=dict(type='list', elements='str', default=[]), + extra_opts=dict(type='list', elements='str', default=[]), + validate_certs=dict(type='bool', default=True), + io_buffer_size=dict(type='int', default=64 * 1024), + + # Options that are for the action plugin, but ignored by the module itself. + # We have them here so that the sanity tests pass without ignores, which + # reduces the likelihood of further bugs added. + copy=dict(type='bool', default=True), + decrypt=dict(type='bool', default=True), + ), + add_file_common_args=True, + # check-mode only works for zip files, we cover that later + supports_check_mode=True, + mutually_exclusive=[('include', 'exclude')], + ) + + src = module.params['src'] + dest = module.params['dest'] + b_dest = to_bytes(dest, errors='surrogate_or_strict') + remote_src = module.params['remote_src'] + file_args = module.load_file_common_arguments(module.params) + + # did tar file arrive? + if not os.path.exists(src): + if not remote_src: + module.fail_json(msg="Source '%s' failed to transfer" % src) + # If remote_src=true, and src= contains ://, try and download the file to a temp directory. + elif '://' in src: + src = fetch_file(module, src) + else: + module.fail_json(msg="Source '%s' does not exist" % src) + if not os.access(src, os.R_OK): + module.fail_json(msg="Source '%s' not readable" % src) + + # skip working with 0 size archives + try: + if os.path.getsize(src) == 0: + module.fail_json(msg="Invalid archive '%s', the file is 0 bytes" % src) + except Exception as e: + module.fail_json(msg="Source '%s' not readable, %s" % (src, to_native(e))) + + # is dest OK to receive tar file? + if not os.path.isdir(b_dest): + module.fail_json(msg="Destination '%s' is not a directory" % dest) + + handler = pick_handler(src, b_dest, file_args, module) + + res_args = dict(handler=handler.__class__.__name__, dest=dest, src=src) + + # do we need to do unpack? + check_results = handler.is_unarchived() + + # DEBUG + # res_args['check_results'] = check_results + + if module.check_mode: + res_args['changed'] = not check_results['unarchived'] + elif check_results['unarchived']: + res_args['changed'] = False + else: + # do the unpack + try: + res_args['extract_results'] = handler.unarchive() + if res_args['extract_results']['rc'] != 0: + module.fail_json(msg="failed to unpack %s to %s" % (src, dest), **res_args) + except IOError: + module.fail_json(msg="failed to unpack %s to %s" % (src, dest), **res_args) + else: + res_args['changed'] = True + + # Get diff if required + if check_results.get('diff', False): + res_args['diff'] = {'prepared': check_results['diff']} + + # Run only if we found differences (idempotence) or diff was missing + if res_args.get('diff', True) and not module.check_mode: + # do we need to change perms? + top_folders = [] + for filename in handler.files_in_archive: + file_args['path'] = os.path.join(b_dest, to_bytes(filename, errors='surrogate_or_strict')) + + try: + res_args['changed'] = module.set_fs_attributes_if_different(file_args, res_args['changed'], expand=False) + except (IOError, OSError) as e: + module.fail_json(msg="Unexpected error when accessing exploded file: %s" % to_native(e), **res_args) + + if '/' in filename: + top_folder_path = filename.split('/')[0] + if top_folder_path not in top_folders: + top_folders.append(top_folder_path) + + # make sure top folders have the right permissions + # https://github.com/ansible/ansible/issues/35426 + if top_folders: + for f in top_folders: + file_args['path'] = "%s/%s" % (dest, f) + try: + res_args['changed'] = module.set_fs_attributes_if_different(file_args, res_args['changed'], expand=False) + except (IOError, OSError) as e: + module.fail_json(msg="Unexpected error when accessing exploded file: %s" % to_native(e), **res_args) + + if module.params['list_files']: + res_args['files'] = handler.files_in_archive + + module.exit_json(**res_args) + + +if __name__ == '__main__': + main() |