diff options
Diffstat (limited to 'lib/ansible/modules/find.py')
-rw-r--r-- | lib/ansible/modules/find.py | 534 |
1 files changed, 534 insertions, 0 deletions
diff --git a/lib/ansible/modules/find.py b/lib/ansible/modules/find.py new file mode 100644 index 0000000..b13c841 --- /dev/null +++ b/lib/ansible/modules/find.py @@ -0,0 +1,534 @@ +# -*- coding: utf-8 -*- + +# Copyright: (c) 2014, Ruggero Marchei <ruggero.marchei@daemonzone.net> +# Copyright: (c) 2015, Brian Coca <bcoca@ansible.com> +# Copyright: (c) 2016-2017, Konstantin Shalygin <k0ste@k0ste.ru> +# Copyright: (c) 2017, Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +from __future__ import absolute_import, division, print_function +__metaclass__ = type + + +DOCUMENTATION = r''' +--- +module: find +author: Brian Coca (@bcoca) +version_added: "2.0" +short_description: Return a list of files based on specific criteria +description: + - Return a list of files based on specific criteria. Multiple criteria are AND'd together. + - For Windows targets, use the M(ansible.windows.win_find) module instead. +options: + age: + description: + - Select files whose age is equal to or greater than the specified time. + - Use a negative age to find files equal to or less than the specified time. + - You can choose seconds, minutes, hours, days, or weeks by specifying the + first letter of any of those words (e.g., "1w"). + type: str + patterns: + default: [] + description: + - One or more (shell or regex) patterns, which type is controlled by C(use_regex) option. + - The patterns restrict the list of files to be returned to those whose basenames match at + least one of the patterns specified. Multiple patterns can be specified using a list. + - The pattern is matched against the file base name, excluding the directory. + - When using regexen, the pattern MUST match the ENTIRE file name, not just parts of it. So + if you are looking to match all files ending in .default, you'd need to use C(.*\.default) + as a regexp and not just C(\.default). + - This parameter expects a list, which can be either comma separated or YAML. If any of the + patterns contain a comma, make sure to put them in a list to avoid splitting the patterns + in undesirable ways. + - Defaults to C(*) when I(use_regex=False), or C(.*) when I(use_regex=True). + type: list + aliases: [ pattern ] + elements: str + excludes: + description: + - One or more (shell or regex) patterns, which type is controlled by I(use_regex) option. + - Items whose basenames match an I(excludes) pattern are culled from I(patterns) matches. + Multiple patterns can be specified using a list. + type: list + aliases: [ exclude ] + version_added: "2.5" + elements: str + contains: + description: + - A regular expression or pattern which should be matched against the file content. + - Works only when I(file_type) is C(file). + type: str + read_whole_file: + description: + - When doing a C(contains) search, determines whether the whole file should be read into + memory or if the regex should be applied to the file line-by-line. + - Setting this to C(true) can have performance and memory implications for large files. + - This uses C(re.search()) instead of C(re.match()). + type: bool + default: false + version_added: "2.11" + paths: + description: + - List of paths of directories to search. All paths must be fully qualified. + type: list + required: true + aliases: [ name, path ] + elements: str + file_type: + description: + - Type of file to select. + - The 'link' and 'any' choices were added in Ansible 2.3. + type: str + choices: [ any, directory, file, link ] + default: file + recurse: + description: + - If target is a directory, recursively descend into the directory looking for files. + type: bool + default: no + size: + description: + - Select files whose size is equal to or greater than the specified size. + - Use a negative size to find files equal to or less than the specified size. + - Unqualified values are in bytes but b, k, m, g, and t can be appended to specify + bytes, kilobytes, megabytes, gigabytes, and terabytes, respectively. + - Size is not evaluated for directories. + type: str + age_stamp: + description: + - Choose the file property against which we compare age. + type: str + choices: [ atime, ctime, mtime ] + default: mtime + hidden: + description: + - Set this to C(true) to include hidden files, otherwise they will be ignored. + type: bool + default: no + follow: + description: + - Set this to C(true) to follow symlinks in path for systems with python 2.6+. + type: bool + default: no + get_checksum: + description: + - Set this to C(true) to retrieve a file's SHA1 checksum. + type: bool + default: no + use_regex: + description: + - If C(false), the patterns are file globs (shell). + - If C(true), they are python regexes. + type: bool + default: no + depth: + description: + - Set the maximum number of levels to descend into. + - Setting recurse to C(false) will override this value, which is effectively depth 1. + - Default is unlimited depth. + type: int + version_added: "2.6" +extends_documentation_fragment: action_common_attributes +attributes: + check_mode: + details: since this action does not modify the target it just executes normally during check mode + support: full + diff_mode: + support: none + platform: + platforms: posix +seealso: +- module: ansible.windows.win_find +''' + + +EXAMPLES = r''' +- name: Recursively find /tmp files older than 2 days + ansible.builtin.find: + paths: /tmp + age: 2d + recurse: yes + +- name: Recursively find /tmp files older than 4 weeks and equal or greater than 1 megabyte + ansible.builtin.find: + paths: /tmp + age: 4w + size: 1m + recurse: yes + +- name: Recursively find /var/tmp files with last access time greater than 3600 seconds + ansible.builtin.find: + paths: /var/tmp + age: 3600 + age_stamp: atime + recurse: yes + +- name: Find /var/log files equal or greater than 10 megabytes ending with .old or .log.gz + ansible.builtin.find: + paths: /var/log + patterns: '*.old,*.log.gz' + size: 10m + +# Note that YAML double quotes require escaping backslashes but yaml single quotes do not. +- name: Find /var/log files equal or greater than 10 megabytes ending with .old or .log.gz via regex + ansible.builtin.find: + paths: /var/log + patterns: "^.*?\\.(?:old|log\\.gz)$" + size: 10m + use_regex: yes + +- name: Find /var/log all directories, exclude nginx and mysql + ansible.builtin.find: + paths: /var/log + recurse: no + file_type: directory + excludes: 'nginx,mysql' + +# When using patterns that contain a comma, make sure they are formatted as lists to avoid splitting the pattern +- name: Use a single pattern that contains a comma formatted as a list + ansible.builtin.find: + paths: /var/log + file_type: file + use_regex: yes + patterns: ['^_[0-9]{2,4}_.*.log$'] + +- name: Use multiple patterns that contain a comma formatted as a YAML list + ansible.builtin.find: + paths: /var/log + file_type: file + use_regex: yes + patterns: + - '^_[0-9]{2,4}_.*.log$' + - '^[a-z]{1,5}_.*log$' + +''' + +RETURN = r''' +files: + description: All matches found with the specified criteria (see stat module for full output of each dictionary) + returned: success + type: list + sample: [ + { path: "/var/tmp/test1", + mode: "0644", + "...": "...", + checksum: 16fac7be61a6e4591a33ef4b729c5c3302307523 + }, + { path: "/var/tmp/test2", + "...": "..." + }, + ] +matched: + description: Number of matches + returned: success + type: int + sample: 14 +examined: + description: Number of filesystem objects looked at + returned: success + type: int + sample: 34 +skipped_paths: + description: skipped paths and reasons they were skipped + returned: success + type: dict + sample: {"/laskdfj": "'/laskdfj' is not a directory"} + version_added: '2.12' +''' + +import fnmatch +import grp +import os +import pwd +import re +import stat +import time + +from ansible.module_utils._text import to_text, to_native +from ansible.module_utils.basic import AnsibleModule + + +def pfilter(f, patterns=None, excludes=None, use_regex=False): + '''filter using glob patterns''' + if not patterns and not excludes: + return True + + if use_regex: + if patterns and not excludes: + for p in patterns: + r = re.compile(p) + if r.match(f): + return True + + elif patterns and excludes: + for p in patterns: + r = re.compile(p) + if r.match(f): + for e in excludes: + r = re.compile(e) + if r.match(f): + return False + return True + + else: + if patterns and not excludes: + for p in patterns: + if fnmatch.fnmatch(f, p): + return True + + elif patterns and excludes: + for p in patterns: + if fnmatch.fnmatch(f, p): + for e in excludes: + if fnmatch.fnmatch(f, e): + return False + return True + + return False + + +def agefilter(st, now, age, timestamp): + '''filter files older than age''' + if age is None: + return True + elif age >= 0 and now - getattr(st, "st_%s" % timestamp) >= abs(age): + return True + elif age < 0 and now - getattr(st, "st_%s" % timestamp) <= abs(age): + return True + return False + + +def sizefilter(st, size): + '''filter files greater than size''' + if size is None: + return True + elif size >= 0 and st.st_size >= abs(size): + return True + elif size < 0 and st.st_size <= abs(size): + return True + return False + + +def contentfilter(fsname, pattern, read_whole_file=False): + """ + Filter files which contain the given expression + :arg fsname: Filename to scan for lines matching a pattern + :arg pattern: Pattern to look for inside of line + :arg read_whole_file: If true, the whole file is read into memory before the regex is applied against it. Otherwise, the regex is applied line-by-line. + :rtype: bool + :returns: True if one of the lines in fsname matches the pattern. Otherwise False + """ + if pattern is None: + return True + + prog = re.compile(pattern) + + try: + with open(fsname) as f: + if read_whole_file: + return bool(prog.search(f.read())) + + for line in f: + if prog.match(line): + return True + + except Exception: + pass + + return False + + +def statinfo(st): + pw_name = "" + gr_name = "" + + try: # user data + pw_name = pwd.getpwuid(st.st_uid).pw_name + except Exception: + pass + + try: # group data + gr_name = grp.getgrgid(st.st_gid).gr_name + except Exception: + pass + + return { + 'mode': "%04o" % stat.S_IMODE(st.st_mode), + 'isdir': stat.S_ISDIR(st.st_mode), + 'ischr': stat.S_ISCHR(st.st_mode), + 'isblk': stat.S_ISBLK(st.st_mode), + 'isreg': stat.S_ISREG(st.st_mode), + 'isfifo': stat.S_ISFIFO(st.st_mode), + 'islnk': stat.S_ISLNK(st.st_mode), + 'issock': stat.S_ISSOCK(st.st_mode), + 'uid': st.st_uid, + 'gid': st.st_gid, + 'size': st.st_size, + 'inode': st.st_ino, + 'dev': st.st_dev, + 'nlink': st.st_nlink, + 'atime': st.st_atime, + 'mtime': st.st_mtime, + 'ctime': st.st_ctime, + 'gr_name': gr_name, + 'pw_name': pw_name, + 'wusr': bool(st.st_mode & stat.S_IWUSR), + 'rusr': bool(st.st_mode & stat.S_IRUSR), + 'xusr': bool(st.st_mode & stat.S_IXUSR), + 'wgrp': bool(st.st_mode & stat.S_IWGRP), + 'rgrp': bool(st.st_mode & stat.S_IRGRP), + 'xgrp': bool(st.st_mode & stat.S_IXGRP), + 'woth': bool(st.st_mode & stat.S_IWOTH), + 'roth': bool(st.st_mode & stat.S_IROTH), + 'xoth': bool(st.st_mode & stat.S_IXOTH), + 'isuid': bool(st.st_mode & stat.S_ISUID), + 'isgid': bool(st.st_mode & stat.S_ISGID), + } + + +def handle_walk_errors(e): + raise e + + +def main(): + module = AnsibleModule( + argument_spec=dict( + paths=dict(type='list', required=True, aliases=['name', 'path'], elements='str'), + patterns=dict(type='list', default=[], aliases=['pattern'], elements='str'), + excludes=dict(type='list', aliases=['exclude'], elements='str'), + contains=dict(type='str'), + read_whole_file=dict(type='bool', default=False), + file_type=dict(type='str', default="file", choices=['any', 'directory', 'file', 'link']), + age=dict(type='str'), + age_stamp=dict(type='str', default="mtime", choices=['atime', 'ctime', 'mtime']), + size=dict(type='str'), + recurse=dict(type='bool', default=False), + hidden=dict(type='bool', default=False), + follow=dict(type='bool', default=False), + get_checksum=dict(type='bool', default=False), + use_regex=dict(type='bool', default=False), + depth=dict(type='int'), + ), + supports_check_mode=True, + ) + + params = module.params + + # Set the default match pattern to either a match-all glob or + # regex depending on use_regex being set. This makes sure if you + # set excludes: without a pattern pfilter gets something it can + # handle. + if not params['patterns']: + if params['use_regex']: + params['patterns'] = ['.*'] + else: + params['patterns'] = ['*'] + + filelist = [] + skipped = {} + + if params['age'] is None: + age = None + else: + # convert age to seconds: + m = re.match(r"^(-?\d+)(s|m|h|d|w)?$", params['age'].lower()) + seconds_per_unit = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800} + if m: + age = int(m.group(1)) * seconds_per_unit.get(m.group(2), 1) + else: + module.fail_json(age=params['age'], msg="failed to process age") + + if params['size'] is None: + size = None + else: + # convert size to bytes: + m = re.match(r"^(-?\d+)(b|k|m|g|t)?$", params['size'].lower()) + bytes_per_unit = {"b": 1, "k": 1024, "m": 1024**2, "g": 1024**3, "t": 1024**4} + if m: + size = int(m.group(1)) * bytes_per_unit.get(m.group(2), 1) + else: + module.fail_json(size=params['size'], msg="failed to process size") + + now = time.time() + msg = 'All paths examined' + looked = 0 + has_warnings = False + for npath in params['paths']: + npath = os.path.expanduser(os.path.expandvars(npath)) + try: + if not os.path.isdir(npath): + raise Exception("'%s' is not a directory" % to_native(npath)) + + for root, dirs, files in os.walk(npath, onerror=handle_walk_errors, followlinks=params['follow']): + looked = looked + len(files) + len(dirs) + for fsobj in (files + dirs): + fsname = os.path.normpath(os.path.join(root, fsobj)) + if params['depth']: + wpath = npath.rstrip(os.path.sep) + os.path.sep + depth = int(fsname.count(os.path.sep)) - int(wpath.count(os.path.sep)) + 1 + if depth > params['depth']: + # Empty the list used by os.walk to avoid traversing deeper unnecessarily + del dirs[:] + continue + if os.path.basename(fsname).startswith('.') and not params['hidden']: + continue + + try: + st = os.lstat(fsname) + except (IOError, OSError) as e: + module.warn("Skipped entry '%s' due to this access issue: %s\n" % (fsname, to_text(e))) + skipped[fsname] = to_text(e) + has_warnings = True + continue + + r = {'path': fsname} + if params['file_type'] == 'any': + if pfilter(fsobj, params['patterns'], params['excludes'], params['use_regex']) and agefilter(st, now, age, params['age_stamp']): + + r.update(statinfo(st)) + if stat.S_ISREG(st.st_mode) and params['get_checksum']: + r['checksum'] = module.sha1(fsname) + + if stat.S_ISREG(st.st_mode): + if sizefilter(st, size): + filelist.append(r) + else: + filelist.append(r) + + elif stat.S_ISDIR(st.st_mode) and params['file_type'] == 'directory': + if pfilter(fsobj, params['patterns'], params['excludes'], params['use_regex']) and agefilter(st, now, age, params['age_stamp']): + + r.update(statinfo(st)) + filelist.append(r) + + elif stat.S_ISREG(st.st_mode) and params['file_type'] == 'file': + if pfilter(fsobj, params['patterns'], params['excludes'], params['use_regex']) and \ + agefilter(st, now, age, params['age_stamp']) and \ + sizefilter(st, size) and contentfilter(fsname, params['contains'], params['read_whole_file']): + + r.update(statinfo(st)) + if params['get_checksum']: + r['checksum'] = module.sha1(fsname) + filelist.append(r) + + elif stat.S_ISLNK(st.st_mode) and params['file_type'] == 'link': + if pfilter(fsobj, params['patterns'], params['excludes'], params['use_regex']) and agefilter(st, now, age, params['age_stamp']): + + r.update(statinfo(st)) + filelist.append(r) + + if not params['recurse']: + break + except Exception as e: + skipped[npath] = to_text(e) + module.warn("Skipped '%s' path due to this access issue: %s\n" % (to_text(npath), skipped[npath])) + has_warnings = True + + if has_warnings: + msg = 'Not all paths examined, check warnings for details' + matched = len(filelist) + module.exit_json(files=filelist, changed=False, msg=msg, matched=matched, examined=looked, skipped_paths=skipped) + + +if __name__ == '__main__': + main() |