diff options
Diffstat (limited to 'lib/ansible/parsing/dataloader.py')
-rw-r--r-- | lib/ansible/parsing/dataloader.py | 468 |
1 files changed, 468 insertions, 0 deletions
diff --git a/lib/ansible/parsing/dataloader.py b/lib/ansible/parsing/dataloader.py new file mode 100644 index 0000000..cbba966 --- /dev/null +++ b/lib/ansible/parsing/dataloader.py @@ -0,0 +1,468 @@ +# (c) 2012-2014, Michael DeHaan <michael.dehaan@gmail.com> +# Copyright: (c) 2017, Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import copy +import os +import os.path +import re +import tempfile + +from ansible import constants as C +from ansible.errors import AnsibleFileNotFound, AnsibleParserError +from ansible.module_utils.basic import is_executable +from ansible.module_utils.six import binary_type, text_type +from ansible.module_utils._text import to_bytes, to_native, to_text +from ansible.parsing.quoting import unquote +from ansible.parsing.utils.yaml import from_yaml +from ansible.parsing.vault import VaultLib, b_HEADER, is_encrypted, is_encrypted_file, parse_vaulttext_envelope +from ansible.utils.path import unfrackpath +from ansible.utils.display import Display + +display = Display() + + +# Tries to determine if a path is inside a role, last dir must be 'tasks' +# this is not perfect but people should really avoid 'tasks' dirs outside roles when using Ansible. +RE_TASKS = re.compile(u'(?:^|%s)+tasks%s?$' % (os.path.sep, os.path.sep)) + + +class DataLoader: + + ''' + The DataLoader class is used to load and parse YAML or JSON content, + either from a given file name or from a string that was previously + read in through other means. A Vault password can be specified, and + any vault-encrypted files will be decrypted. + + Data read from files will also be cached, so the file will never be + read from disk more than once. + + Usage: + + dl = DataLoader() + # optionally: dl.set_vault_password('foo') + ds = dl.load('...') + ds = dl.load_from_file('/path/to/file') + ''' + + def __init__(self): + + self._basedir = '.' + + # NOTE: not effective with forks as the main copy does not get updated. + # avoids rereading files + self._FILE_CACHE = dict() + + # NOTE: not thread safe, also issues with forks not returning data to main proc + # so they need to be cleaned independently. See WorkerProcess for example. + # used to keep track of temp files for cleaning + self._tempfiles = set() + + # initialize the vault stuff with an empty password + # TODO: replace with a ref to something that can get the password + # a creds/auth provider + # self.set_vault_password(None) + self._vaults = {} + self._vault = VaultLib() + self.set_vault_secrets(None) + + # TODO: since we can query vault_secrets late, we could provide this to DataLoader init + def set_vault_secrets(self, vault_secrets): + self._vault.secrets = vault_secrets + + def load(self, data, file_name='<string>', show_content=True, json_only=False): + '''Backwards compat for now''' + return from_yaml(data, file_name, show_content, self._vault.secrets, json_only=json_only) + + def load_from_file(self, file_name, cache=True, unsafe=False, json_only=False): + ''' Loads data from a file, which can contain either JSON or YAML. ''' + + file_name = self.path_dwim(file_name) + display.debug("Loading data from %s" % file_name) + + # if the file has already been read in and cached, we'll + # return those results to avoid more file/vault operations + if cache and file_name in self._FILE_CACHE: + parsed_data = self._FILE_CACHE[file_name] + else: + # read the file contents and load the data structure from them + (b_file_data, show_content) = self._get_file_contents(file_name) + + file_data = to_text(b_file_data, errors='surrogate_or_strict') + parsed_data = self.load(data=file_data, file_name=file_name, show_content=show_content, json_only=json_only) + + # cache the file contents for next time + self._FILE_CACHE[file_name] = parsed_data + + if unsafe: + return parsed_data + else: + # return a deep copy here, so the cache is not affected + return copy.deepcopy(parsed_data) + + def path_exists(self, path): + path = self.path_dwim(path) + return os.path.exists(to_bytes(path, errors='surrogate_or_strict')) + + def is_file(self, path): + path = self.path_dwim(path) + return os.path.isfile(to_bytes(path, errors='surrogate_or_strict')) or path == os.devnull + + def is_directory(self, path): + path = self.path_dwim(path) + return os.path.isdir(to_bytes(path, errors='surrogate_or_strict')) + + def list_directory(self, path): + path = self.path_dwim(path) + return os.listdir(path) + + def is_executable(self, path): + '''is the given path executable?''' + path = self.path_dwim(path) + return is_executable(path) + + def _decrypt_if_vault_data(self, b_vault_data, b_file_name=None): + '''Decrypt b_vault_data if encrypted and return b_data and the show_content flag''' + + if not is_encrypted(b_vault_data): + show_content = True + return b_vault_data, show_content + + b_ciphertext, b_version, cipher_name, vault_id = parse_vaulttext_envelope(b_vault_data) + b_data = self._vault.decrypt(b_vault_data, filename=b_file_name) + + show_content = False + return b_data, show_content + + def _get_file_contents(self, file_name): + ''' + Reads the file contents from the given file name + + If the contents are vault-encrypted, it will decrypt them and return + the decrypted data + + :arg file_name: The name of the file to read. If this is a relative + path, it will be expanded relative to the basedir + :raises AnsibleFileNotFound: if the file_name does not refer to a file + :raises AnsibleParserError: if we were unable to read the file + :return: Returns a byte string of the file contents + ''' + if not file_name or not isinstance(file_name, (binary_type, text_type)): + raise AnsibleParserError("Invalid filename: '%s'" % to_native(file_name)) + + b_file_name = to_bytes(self.path_dwim(file_name)) + # This is what we really want but have to fix unittests to make it pass + # if not os.path.exists(b_file_name) or not os.path.isfile(b_file_name): + if not self.path_exists(b_file_name): + raise AnsibleFileNotFound("Unable to retrieve file contents", file_name=file_name) + + try: + with open(b_file_name, 'rb') as f: + data = f.read() + return self._decrypt_if_vault_data(data, b_file_name) + except (IOError, OSError) as e: + raise AnsibleParserError("an error occurred while trying to read the file '%s': %s" % (file_name, to_native(e)), orig_exc=e) + + def get_basedir(self): + ''' returns the current basedir ''' + return self._basedir + + def set_basedir(self, basedir): + ''' sets the base directory, used to find files when a relative path is given ''' + + if basedir is not None: + self._basedir = to_text(basedir) + + def path_dwim(self, given): + ''' + make relative paths work like folks expect. + ''' + + given = unquote(given) + given = to_text(given, errors='surrogate_or_strict') + + if given.startswith(to_text(os.path.sep)) or given.startswith(u'~'): + path = given + else: + basedir = to_text(self._basedir, errors='surrogate_or_strict') + path = os.path.join(basedir, given) + + return unfrackpath(path, follow=False) + + def _is_role(self, path): + ''' imperfect role detection, roles are still valid w/o tasks|meta/main.yml|yaml|etc ''' + + b_path = to_bytes(path, errors='surrogate_or_strict') + b_path_dirname = os.path.dirname(b_path) + b_upath = to_bytes(unfrackpath(path, follow=False), errors='surrogate_or_strict') + + untasked_paths = ( + os.path.join(b_path, b'main.yml'), + os.path.join(b_path, b'main.yaml'), + os.path.join(b_path, b'main'), + ) + tasked_paths = ( + os.path.join(b_upath, b'tasks/main.yml'), + os.path.join(b_upath, b'tasks/main.yaml'), + os.path.join(b_upath, b'tasks/main'), + os.path.join(b_upath, b'meta/main.yml'), + os.path.join(b_upath, b'meta/main.yaml'), + os.path.join(b_upath, b'meta/main'), + os.path.join(b_path_dirname, b'tasks/main.yml'), + os.path.join(b_path_dirname, b'tasks/main.yaml'), + os.path.join(b_path_dirname, b'tasks/main'), + os.path.join(b_path_dirname, b'meta/main.yml'), + os.path.join(b_path_dirname, b'meta/main.yaml'), + os.path.join(b_path_dirname, b'meta/main'), + ) + + exists_untasked = map(os.path.exists, untasked_paths) + exists_tasked = map(os.path.exists, tasked_paths) + if RE_TASKS.search(path) and any(exists_untasked) or any(exists_tasked): + return True + + return False + + def path_dwim_relative(self, path, dirname, source, is_role=False): + ''' + find one file in either a role or playbook dir with or without + explicitly named dirname subdirs + + Used in action plugins and lookups to find supplemental files that + could be in either place. + ''' + + search = [] + source = to_text(source, errors='surrogate_or_strict') + + # I have full path, nothing else needs to be looked at + if source.startswith(to_text(os.path.sep)) or source.startswith(u'~'): + search.append(unfrackpath(source, follow=False)) + else: + # base role/play path + templates/files/vars + relative filename + search.append(os.path.join(path, dirname, source)) + basedir = unfrackpath(path, follow=False) + + # not told if role, but detect if it is a role and if so make sure you get correct base path + if not is_role: + is_role = self._is_role(path) + + if is_role and RE_TASKS.search(path): + basedir = unfrackpath(os.path.dirname(path), follow=False) + + cur_basedir = self._basedir + self.set_basedir(basedir) + # resolved base role/play path + templates/files/vars + relative filename + search.append(unfrackpath(os.path.join(basedir, dirname, source), follow=False)) + self.set_basedir(cur_basedir) + + if is_role and not source.endswith(dirname): + # look in role's tasks dir w/o dirname + search.append(unfrackpath(os.path.join(basedir, 'tasks', source), follow=False)) + + # try to create absolute path for loader basedir + templates/files/vars + filename + search.append(unfrackpath(os.path.join(dirname, source), follow=False)) + + # try to create absolute path for loader basedir + search.append(unfrackpath(os.path.join(basedir, source), follow=False)) + + # try to create absolute path for dirname + filename + search.append(self.path_dwim(os.path.join(dirname, source))) + + # try to create absolute path for filename + search.append(self.path_dwim(source)) + + for candidate in search: + if os.path.exists(to_bytes(candidate, errors='surrogate_or_strict')): + break + + return candidate + + def path_dwim_relative_stack(self, paths, dirname, source, is_role=False): + ''' + find one file in first path in stack taking roles into account and adding play basedir as fallback + + :arg paths: A list of text strings which are the paths to look for the filename in. + :arg dirname: A text string representing a directory. The directory + is prepended to the source to form the path to search for. + :arg source: A text string which is the filename to search for + :rtype: A text string + :returns: An absolute path to the filename ``source`` if found + :raises: An AnsibleFileNotFound Exception if the file is found to exist in the search paths + ''' + b_dirname = to_bytes(dirname, errors='surrogate_or_strict') + b_source = to_bytes(source, errors='surrogate_or_strict') + + result = None + search = [] + if source is None: + display.warning('Invalid request to find a file that matches a "null" value') + elif source and (source.startswith('~') or source.startswith(os.path.sep)): + # path is absolute, no relative needed, check existence and return source + test_path = unfrackpath(b_source, follow=False) + if os.path.exists(to_bytes(test_path, errors='surrogate_or_strict')): + result = test_path + else: + display.debug(u'evaluation_path:\n\t%s' % '\n\t'.join(paths)) + for path in paths: + upath = unfrackpath(path, follow=False) + b_upath = to_bytes(upath, errors='surrogate_or_strict') + b_pb_base_dir = os.path.dirname(b_upath) + + # if path is in role and 'tasks' not there already, add it into the search + if (is_role or self._is_role(path)) and b_pb_base_dir.endswith(b'/tasks'): + search.append(os.path.join(os.path.dirname(b_pb_base_dir), b_dirname, b_source)) + search.append(os.path.join(b_pb_base_dir, b_source)) + else: + # don't add dirname if user already is using it in source + if b_source.split(b'/')[0] != dirname: + search.append(os.path.join(b_upath, b_dirname, b_source)) + search.append(os.path.join(b_upath, b_source)) + + # always append basedir as last resort + # don't add dirname if user already is using it in source + if b_source.split(b'/')[0] != dirname: + search.append(os.path.join(to_bytes(self.get_basedir(), errors='surrogate_or_strict'), b_dirname, b_source)) + search.append(os.path.join(to_bytes(self.get_basedir(), errors='surrogate_or_strict'), b_source)) + + display.debug(u'search_path:\n\t%s' % to_text(b'\n\t'.join(search))) + for b_candidate in search: + display.vvvvv(u'looking for "%s" at "%s"' % (source, to_text(b_candidate))) + if os.path.exists(b_candidate): + result = to_text(b_candidate) + break + + if result is None: + raise AnsibleFileNotFound(file_name=source, paths=[to_native(p) for p in search]) + + return result + + def _create_content_tempfile(self, content): + ''' Create a tempfile containing defined content ''' + fd, content_tempfile = tempfile.mkstemp(dir=C.DEFAULT_LOCAL_TMP) + f = os.fdopen(fd, 'wb') + content = to_bytes(content) + try: + f.write(content) + except Exception as err: + os.remove(content_tempfile) + raise Exception(err) + finally: + f.close() + return content_tempfile + + def get_real_file(self, file_path, decrypt=True): + """ + If the file is vault encrypted return a path to a temporary decrypted file + If the file is not encrypted then the path is returned + Temporary files are cleanup in the destructor + """ + + if not file_path or not isinstance(file_path, (binary_type, text_type)): + raise AnsibleParserError("Invalid filename: '%s'" % to_native(file_path)) + + b_file_path = to_bytes(file_path, errors='surrogate_or_strict') + if not self.path_exists(b_file_path) or not self.is_file(b_file_path): + raise AnsibleFileNotFound(file_name=file_path) + + real_path = self.path_dwim(file_path) + + try: + if decrypt: + with open(to_bytes(real_path), 'rb') as f: + # Limit how much of the file is read since we do not know + # whether this is a vault file and therefore it could be very + # large. + if is_encrypted_file(f, count=len(b_HEADER)): + # if the file is encrypted and no password was specified, + # the decrypt call would throw an error, but we check first + # since the decrypt function doesn't know the file name + data = f.read() + if not self._vault.secrets: + raise AnsibleParserError("A vault password or secret must be specified to decrypt %s" % to_native(file_path)) + + data = self._vault.decrypt(data, filename=real_path) + # Make a temp file + real_path = self._create_content_tempfile(data) + self._tempfiles.add(real_path) + + return real_path + + except (IOError, OSError) as e: + raise AnsibleParserError("an error occurred while trying to read the file '%s': %s" % (to_native(real_path), to_native(e)), orig_exc=e) + + def cleanup_tmp_file(self, file_path): + """ + Removes any temporary files created from a previous call to + get_real_file. file_path must be the path returned from a + previous call to get_real_file. + """ + if file_path in self._tempfiles: + os.unlink(file_path) + self._tempfiles.remove(file_path) + + def cleanup_all_tmp_files(self): + """ + Removes all temporary files that DataLoader has created + NOTE: not thread safe, forks also need special handling see __init__ for details. + """ + for f in list(self._tempfiles): + try: + self.cleanup_tmp_file(f) + except Exception as e: + display.warning("Unable to cleanup temp files: %s" % to_text(e)) + + def find_vars_files(self, path, name, extensions=None, allow_dir=True): + """ + Find vars files in a given path with specified name. This will find + files in a dir named <name>/ or a file called <name> ending in known + extensions. + """ + + b_path = to_bytes(os.path.join(path, name)) + found = [] + + if extensions is None: + # Look for file with no extension first to find dir before file + extensions = [''] + C.YAML_FILENAME_EXTENSIONS + # add valid extensions to name + for ext in extensions: + + if '.' in ext: + full_path = b_path + to_bytes(ext) + elif ext: + full_path = b'.'.join([b_path, to_bytes(ext)]) + else: + full_path = b_path + + if self.path_exists(full_path): + if self.is_directory(full_path): + if allow_dir: + found.extend(self._get_dir_vars_files(to_text(full_path), extensions)) + else: + continue + else: + found.append(full_path) + break + return found + + def _get_dir_vars_files(self, path, extensions): + found = [] + for spath in sorted(self.list_directory(path)): + if not spath.startswith(u'.') and not spath.endswith(u'~'): # skip hidden and backups + + ext = os.path.splitext(spath)[-1] + full_spath = os.path.join(path, spath) + + if self.is_directory(full_spath) and not ext: # recursive search if dir + found.extend(self._get_dir_vars_files(full_spath, extensions)) + elif self.is_file(full_spath) and (not ext or to_text(ext) in extensions): + # only consider files with valid extensions or no extension + found.append(full_spath) + + return found |