summaryrefslogtreecommitdiffstats
path: root/lib/ansible/parsing/dataloader.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/ansible/parsing/dataloader.py')
-rw-r--r--lib/ansible/parsing/dataloader.py468
1 files changed, 468 insertions, 0 deletions
diff --git a/lib/ansible/parsing/dataloader.py b/lib/ansible/parsing/dataloader.py
new file mode 100644
index 0000000..cbba966
--- /dev/null
+++ b/lib/ansible/parsing/dataloader.py
@@ -0,0 +1,468 @@
+# (c) 2012-2014, Michael DeHaan <michael.dehaan@gmail.com>
+# Copyright: (c) 2017, Ansible Project
+# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
+
+# Make coding more python3-ish
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
+
+import copy
+import os
+import os.path
+import re
+import tempfile
+
+from ansible import constants as C
+from ansible.errors import AnsibleFileNotFound, AnsibleParserError
+from ansible.module_utils.basic import is_executable
+from ansible.module_utils.six import binary_type, text_type
+from ansible.module_utils._text import to_bytes, to_native, to_text
+from ansible.parsing.quoting import unquote
+from ansible.parsing.utils.yaml import from_yaml
+from ansible.parsing.vault import VaultLib, b_HEADER, is_encrypted, is_encrypted_file, parse_vaulttext_envelope
+from ansible.utils.path import unfrackpath
+from ansible.utils.display import Display
+
+display = Display()
+
+
+# Tries to determine if a path is inside a role, last dir must be 'tasks'
+# this is not perfect but people should really avoid 'tasks' dirs outside roles when using Ansible.
+RE_TASKS = re.compile(u'(?:^|%s)+tasks%s?$' % (os.path.sep, os.path.sep))
+
+
+class DataLoader:
+
+ '''
+ The DataLoader class is used to load and parse YAML or JSON content,
+ either from a given file name or from a string that was previously
+ read in through other means. A Vault password can be specified, and
+ any vault-encrypted files will be decrypted.
+
+ Data read from files will also be cached, so the file will never be
+ read from disk more than once.
+
+ Usage:
+
+ dl = DataLoader()
+ # optionally: dl.set_vault_password('foo')
+ ds = dl.load('...')
+ ds = dl.load_from_file('/path/to/file')
+ '''
+
+ def __init__(self):
+
+ self._basedir = '.'
+
+ # NOTE: not effective with forks as the main copy does not get updated.
+ # avoids rereading files
+ self._FILE_CACHE = dict()
+
+ # NOTE: not thread safe, also issues with forks not returning data to main proc
+ # so they need to be cleaned independently. See WorkerProcess for example.
+ # used to keep track of temp files for cleaning
+ self._tempfiles = set()
+
+ # initialize the vault stuff with an empty password
+ # TODO: replace with a ref to something that can get the password
+ # a creds/auth provider
+ # self.set_vault_password(None)
+ self._vaults = {}
+ self._vault = VaultLib()
+ self.set_vault_secrets(None)
+
+ # TODO: since we can query vault_secrets late, we could provide this to DataLoader init
+ def set_vault_secrets(self, vault_secrets):
+ self._vault.secrets = vault_secrets
+
+ def load(self, data, file_name='<string>', show_content=True, json_only=False):
+ '''Backwards compat for now'''
+ return from_yaml(data, file_name, show_content, self._vault.secrets, json_only=json_only)
+
+ def load_from_file(self, file_name, cache=True, unsafe=False, json_only=False):
+ ''' Loads data from a file, which can contain either JSON or YAML. '''
+
+ file_name = self.path_dwim(file_name)
+ display.debug("Loading data from %s" % file_name)
+
+ # if the file has already been read in and cached, we'll
+ # return those results to avoid more file/vault operations
+ if cache and file_name in self._FILE_CACHE:
+ parsed_data = self._FILE_CACHE[file_name]
+ else:
+ # read the file contents and load the data structure from them
+ (b_file_data, show_content) = self._get_file_contents(file_name)
+
+ file_data = to_text(b_file_data, errors='surrogate_or_strict')
+ parsed_data = self.load(data=file_data, file_name=file_name, show_content=show_content, json_only=json_only)
+
+ # cache the file contents for next time
+ self._FILE_CACHE[file_name] = parsed_data
+
+ if unsafe:
+ return parsed_data
+ else:
+ # return a deep copy here, so the cache is not affected
+ return copy.deepcopy(parsed_data)
+
+ def path_exists(self, path):
+ path = self.path_dwim(path)
+ return os.path.exists(to_bytes(path, errors='surrogate_or_strict'))
+
+ def is_file(self, path):
+ path = self.path_dwim(path)
+ return os.path.isfile(to_bytes(path, errors='surrogate_or_strict')) or path == os.devnull
+
+ def is_directory(self, path):
+ path = self.path_dwim(path)
+ return os.path.isdir(to_bytes(path, errors='surrogate_or_strict'))
+
+ def list_directory(self, path):
+ path = self.path_dwim(path)
+ return os.listdir(path)
+
+ def is_executable(self, path):
+ '''is the given path executable?'''
+ path = self.path_dwim(path)
+ return is_executable(path)
+
+ def _decrypt_if_vault_data(self, b_vault_data, b_file_name=None):
+ '''Decrypt b_vault_data if encrypted and return b_data and the show_content flag'''
+
+ if not is_encrypted(b_vault_data):
+ show_content = True
+ return b_vault_data, show_content
+
+ b_ciphertext, b_version, cipher_name, vault_id = parse_vaulttext_envelope(b_vault_data)
+ b_data = self._vault.decrypt(b_vault_data, filename=b_file_name)
+
+ show_content = False
+ return b_data, show_content
+
+ def _get_file_contents(self, file_name):
+ '''
+ Reads the file contents from the given file name
+
+ If the contents are vault-encrypted, it will decrypt them and return
+ the decrypted data
+
+ :arg file_name: The name of the file to read. If this is a relative
+ path, it will be expanded relative to the basedir
+ :raises AnsibleFileNotFound: if the file_name does not refer to a file
+ :raises AnsibleParserError: if we were unable to read the file
+ :return: Returns a byte string of the file contents
+ '''
+ if not file_name or not isinstance(file_name, (binary_type, text_type)):
+ raise AnsibleParserError("Invalid filename: '%s'" % to_native(file_name))
+
+ b_file_name = to_bytes(self.path_dwim(file_name))
+ # This is what we really want but have to fix unittests to make it pass
+ # if not os.path.exists(b_file_name) or not os.path.isfile(b_file_name):
+ if not self.path_exists(b_file_name):
+ raise AnsibleFileNotFound("Unable to retrieve file contents", file_name=file_name)
+
+ try:
+ with open(b_file_name, 'rb') as f:
+ data = f.read()
+ return self._decrypt_if_vault_data(data, b_file_name)
+ except (IOError, OSError) as e:
+ raise AnsibleParserError("an error occurred while trying to read the file '%s': %s" % (file_name, to_native(e)), orig_exc=e)
+
+ def get_basedir(self):
+ ''' returns the current basedir '''
+ return self._basedir
+
+ def set_basedir(self, basedir):
+ ''' sets the base directory, used to find files when a relative path is given '''
+
+ if basedir is not None:
+ self._basedir = to_text(basedir)
+
+ def path_dwim(self, given):
+ '''
+ make relative paths work like folks expect.
+ '''
+
+ given = unquote(given)
+ given = to_text(given, errors='surrogate_or_strict')
+
+ if given.startswith(to_text(os.path.sep)) or given.startswith(u'~'):
+ path = given
+ else:
+ basedir = to_text(self._basedir, errors='surrogate_or_strict')
+ path = os.path.join(basedir, given)
+
+ return unfrackpath(path, follow=False)
+
+ def _is_role(self, path):
+ ''' imperfect role detection, roles are still valid w/o tasks|meta/main.yml|yaml|etc '''
+
+ b_path = to_bytes(path, errors='surrogate_or_strict')
+ b_path_dirname = os.path.dirname(b_path)
+ b_upath = to_bytes(unfrackpath(path, follow=False), errors='surrogate_or_strict')
+
+ untasked_paths = (
+ os.path.join(b_path, b'main.yml'),
+ os.path.join(b_path, b'main.yaml'),
+ os.path.join(b_path, b'main'),
+ )
+ tasked_paths = (
+ os.path.join(b_upath, b'tasks/main.yml'),
+ os.path.join(b_upath, b'tasks/main.yaml'),
+ os.path.join(b_upath, b'tasks/main'),
+ os.path.join(b_upath, b'meta/main.yml'),
+ os.path.join(b_upath, b'meta/main.yaml'),
+ os.path.join(b_upath, b'meta/main'),
+ os.path.join(b_path_dirname, b'tasks/main.yml'),
+ os.path.join(b_path_dirname, b'tasks/main.yaml'),
+ os.path.join(b_path_dirname, b'tasks/main'),
+ os.path.join(b_path_dirname, b'meta/main.yml'),
+ os.path.join(b_path_dirname, b'meta/main.yaml'),
+ os.path.join(b_path_dirname, b'meta/main'),
+ )
+
+ exists_untasked = map(os.path.exists, untasked_paths)
+ exists_tasked = map(os.path.exists, tasked_paths)
+ if RE_TASKS.search(path) and any(exists_untasked) or any(exists_tasked):
+ return True
+
+ return False
+
+ def path_dwim_relative(self, path, dirname, source, is_role=False):
+ '''
+ find one file in either a role or playbook dir with or without
+ explicitly named dirname subdirs
+
+ Used in action plugins and lookups to find supplemental files that
+ could be in either place.
+ '''
+
+ search = []
+ source = to_text(source, errors='surrogate_or_strict')
+
+ # I have full path, nothing else needs to be looked at
+ if source.startswith(to_text(os.path.sep)) or source.startswith(u'~'):
+ search.append(unfrackpath(source, follow=False))
+ else:
+ # base role/play path + templates/files/vars + relative filename
+ search.append(os.path.join(path, dirname, source))
+ basedir = unfrackpath(path, follow=False)
+
+ # not told if role, but detect if it is a role and if so make sure you get correct base path
+ if not is_role:
+ is_role = self._is_role(path)
+
+ if is_role and RE_TASKS.search(path):
+ basedir = unfrackpath(os.path.dirname(path), follow=False)
+
+ cur_basedir = self._basedir
+ self.set_basedir(basedir)
+ # resolved base role/play path + templates/files/vars + relative filename
+ search.append(unfrackpath(os.path.join(basedir, dirname, source), follow=False))
+ self.set_basedir(cur_basedir)
+
+ if is_role and not source.endswith(dirname):
+ # look in role's tasks dir w/o dirname
+ search.append(unfrackpath(os.path.join(basedir, 'tasks', source), follow=False))
+
+ # try to create absolute path for loader basedir + templates/files/vars + filename
+ search.append(unfrackpath(os.path.join(dirname, source), follow=False))
+
+ # try to create absolute path for loader basedir
+ search.append(unfrackpath(os.path.join(basedir, source), follow=False))
+
+ # try to create absolute path for dirname + filename
+ search.append(self.path_dwim(os.path.join(dirname, source)))
+
+ # try to create absolute path for filename
+ search.append(self.path_dwim(source))
+
+ for candidate in search:
+ if os.path.exists(to_bytes(candidate, errors='surrogate_or_strict')):
+ break
+
+ return candidate
+
+ def path_dwim_relative_stack(self, paths, dirname, source, is_role=False):
+ '''
+ find one file in first path in stack taking roles into account and adding play basedir as fallback
+
+ :arg paths: A list of text strings which are the paths to look for the filename in.
+ :arg dirname: A text string representing a directory. The directory
+ is prepended to the source to form the path to search for.
+ :arg source: A text string which is the filename to search for
+ :rtype: A text string
+ :returns: An absolute path to the filename ``source`` if found
+ :raises: An AnsibleFileNotFound Exception if the file is found to exist in the search paths
+ '''
+ b_dirname = to_bytes(dirname, errors='surrogate_or_strict')
+ b_source = to_bytes(source, errors='surrogate_or_strict')
+
+ result = None
+ search = []
+ if source is None:
+ display.warning('Invalid request to find a file that matches a "null" value')
+ elif source and (source.startswith('~') or source.startswith(os.path.sep)):
+ # path is absolute, no relative needed, check existence and return source
+ test_path = unfrackpath(b_source, follow=False)
+ if os.path.exists(to_bytes(test_path, errors='surrogate_or_strict')):
+ result = test_path
+ else:
+ display.debug(u'evaluation_path:\n\t%s' % '\n\t'.join(paths))
+ for path in paths:
+ upath = unfrackpath(path, follow=False)
+ b_upath = to_bytes(upath, errors='surrogate_or_strict')
+ b_pb_base_dir = os.path.dirname(b_upath)
+
+ # if path is in role and 'tasks' not there already, add it into the search
+ if (is_role or self._is_role(path)) and b_pb_base_dir.endswith(b'/tasks'):
+ search.append(os.path.join(os.path.dirname(b_pb_base_dir), b_dirname, b_source))
+ search.append(os.path.join(b_pb_base_dir, b_source))
+ else:
+ # don't add dirname if user already is using it in source
+ if b_source.split(b'/')[0] != dirname:
+ search.append(os.path.join(b_upath, b_dirname, b_source))
+ search.append(os.path.join(b_upath, b_source))
+
+ # always append basedir as last resort
+ # don't add dirname if user already is using it in source
+ if b_source.split(b'/')[0] != dirname:
+ search.append(os.path.join(to_bytes(self.get_basedir(), errors='surrogate_or_strict'), b_dirname, b_source))
+ search.append(os.path.join(to_bytes(self.get_basedir(), errors='surrogate_or_strict'), b_source))
+
+ display.debug(u'search_path:\n\t%s' % to_text(b'\n\t'.join(search)))
+ for b_candidate in search:
+ display.vvvvv(u'looking for "%s" at "%s"' % (source, to_text(b_candidate)))
+ if os.path.exists(b_candidate):
+ result = to_text(b_candidate)
+ break
+
+ if result is None:
+ raise AnsibleFileNotFound(file_name=source, paths=[to_native(p) for p in search])
+
+ return result
+
+ def _create_content_tempfile(self, content):
+ ''' Create a tempfile containing defined content '''
+ fd, content_tempfile = tempfile.mkstemp(dir=C.DEFAULT_LOCAL_TMP)
+ f = os.fdopen(fd, 'wb')
+ content = to_bytes(content)
+ try:
+ f.write(content)
+ except Exception as err:
+ os.remove(content_tempfile)
+ raise Exception(err)
+ finally:
+ f.close()
+ return content_tempfile
+
+ def get_real_file(self, file_path, decrypt=True):
+ """
+ If the file is vault encrypted return a path to a temporary decrypted file
+ If the file is not encrypted then the path is returned
+ Temporary files are cleanup in the destructor
+ """
+
+ if not file_path or not isinstance(file_path, (binary_type, text_type)):
+ raise AnsibleParserError("Invalid filename: '%s'" % to_native(file_path))
+
+ b_file_path = to_bytes(file_path, errors='surrogate_or_strict')
+ if not self.path_exists(b_file_path) or not self.is_file(b_file_path):
+ raise AnsibleFileNotFound(file_name=file_path)
+
+ real_path = self.path_dwim(file_path)
+
+ try:
+ if decrypt:
+ with open(to_bytes(real_path), 'rb') as f:
+ # Limit how much of the file is read since we do not know
+ # whether this is a vault file and therefore it could be very
+ # large.
+ if is_encrypted_file(f, count=len(b_HEADER)):
+ # if the file is encrypted and no password was specified,
+ # the decrypt call would throw an error, but we check first
+ # since the decrypt function doesn't know the file name
+ data = f.read()
+ if not self._vault.secrets:
+ raise AnsibleParserError("A vault password or secret must be specified to decrypt %s" % to_native(file_path))
+
+ data = self._vault.decrypt(data, filename=real_path)
+ # Make a temp file
+ real_path = self._create_content_tempfile(data)
+ self._tempfiles.add(real_path)
+
+ return real_path
+
+ except (IOError, OSError) as e:
+ raise AnsibleParserError("an error occurred while trying to read the file '%s': %s" % (to_native(real_path), to_native(e)), orig_exc=e)
+
+ def cleanup_tmp_file(self, file_path):
+ """
+ Removes any temporary files created from a previous call to
+ get_real_file. file_path must be the path returned from a
+ previous call to get_real_file.
+ """
+ if file_path in self._tempfiles:
+ os.unlink(file_path)
+ self._tempfiles.remove(file_path)
+
+ def cleanup_all_tmp_files(self):
+ """
+ Removes all temporary files that DataLoader has created
+ NOTE: not thread safe, forks also need special handling see __init__ for details.
+ """
+ for f in list(self._tempfiles):
+ try:
+ self.cleanup_tmp_file(f)
+ except Exception as e:
+ display.warning("Unable to cleanup temp files: %s" % to_text(e))
+
+ def find_vars_files(self, path, name, extensions=None, allow_dir=True):
+ """
+ Find vars files in a given path with specified name. This will find
+ files in a dir named <name>/ or a file called <name> ending in known
+ extensions.
+ """
+
+ b_path = to_bytes(os.path.join(path, name))
+ found = []
+
+ if extensions is None:
+ # Look for file with no extension first to find dir before file
+ extensions = [''] + C.YAML_FILENAME_EXTENSIONS
+ # add valid extensions to name
+ for ext in extensions:
+
+ if '.' in ext:
+ full_path = b_path + to_bytes(ext)
+ elif ext:
+ full_path = b'.'.join([b_path, to_bytes(ext)])
+ else:
+ full_path = b_path
+
+ if self.path_exists(full_path):
+ if self.is_directory(full_path):
+ if allow_dir:
+ found.extend(self._get_dir_vars_files(to_text(full_path), extensions))
+ else:
+ continue
+ else:
+ found.append(full_path)
+ break
+ return found
+
+ def _get_dir_vars_files(self, path, extensions):
+ found = []
+ for spath in sorted(self.list_directory(path)):
+ if not spath.startswith(u'.') and not spath.endswith(u'~'): # skip hidden and backups
+
+ ext = os.path.splitext(spath)[-1]
+ full_spath = os.path.join(path, spath)
+
+ if self.is_directory(full_spath) and not ext: # recursive search if dir
+ found.extend(self._get_dir_vars_files(full_spath, extensions))
+ elif self.is_file(full_spath) and (not ext or to_text(ext) in extensions):
+ # only consider files with valid extensions or no extension
+ found.append(full_spath)
+
+ return found