summaryrefslogtreecommitdiffstats
path: root/qa/tasks/s3a_hadoop.py
diff options
context:
space:
mode:
Diffstat (limited to 'qa/tasks/s3a_hadoop.py')
-rw-r--r--qa/tasks/s3a_hadoop.py285
1 files changed, 285 insertions, 0 deletions
diff --git a/qa/tasks/s3a_hadoop.py b/qa/tasks/s3a_hadoop.py
new file mode 100644
index 000000000..7b77359fc
--- /dev/null
+++ b/qa/tasks/s3a_hadoop.py
@@ -0,0 +1,285 @@
+import contextlib
+import logging
+from teuthology import misc
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run Hadoop S3A tests using Ceph
+ usage:
+ -tasks:
+ ceph-ansible:
+ s3a-hadoop:
+ maven-version: '3.6.3' (default)
+ hadoop-version: '2.9.2'
+ bucket-name: 's3atest' (default)
+ access-key: 'anykey' (uses a default value)
+ secret-key: 'secretkey' ( uses a default value)
+ role: client.0
+ """
+ if config is None:
+ config = {}
+
+ assert isinstance(config, dict), \
+ "task only supports a dictionary for configuration"
+
+ assert hasattr(ctx, 'rgw'), 's3a-hadoop must run after the rgw task'
+
+ overrides = ctx.config.get('overrides', {})
+ misc.deep_merge(config, overrides.get('s3a-hadoop', {}))
+ testdir = misc.get_testdir(ctx)
+
+ role = config.get('role')
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ endpoint = ctx.rgw.role_endpoints.get(role)
+ assert endpoint, 's3tests: no rgw endpoint for {}'.format(role)
+
+ # get versions
+ maven_major = config.get('maven-major', 'maven-3')
+ maven_version = config.get('maven-version', '3.6.3')
+ hadoop_ver = config.get('hadoop-version', '2.9.2')
+ bucket_name = config.get('bucket-name', 's3atest')
+ access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F')
+ secret_key = config.get(
+ 'secret-key',
+ 'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb')
+
+ # set versions for cloning the repo
+ apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format(
+ maven_version=maven_version)
+ maven_link = 'http://archive.apache.org/dist/maven/' + \
+ '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + apache_maven
+ hadoop_git = 'https://github.com/apache/hadoop'
+ hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver)
+ if hadoop_ver == 'trunk':
+ # just checkout a new branch out of trunk
+ hadoop_rel = 'hadoop-ceph-trunk'
+ install_prereq(remote)
+ remote.run(
+ args=[
+ 'cd',
+ testdir,
+ run.Raw('&&'),
+ 'wget',
+ maven_link,
+ run.Raw('&&'),
+ 'tar',
+ '-xvf',
+ apache_maven,
+ run.Raw('&&'),
+ 'git',
+ 'clone',
+ run.Raw(hadoop_git),
+ run.Raw('&&'),
+ 'cd',
+ 'hadoop',
+ run.Raw('&&'),
+ 'git',
+ 'checkout',
+ '-b',
+ run.Raw(hadoop_rel)
+ ]
+ )
+ configure_s3a(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir)
+ setup_user_bucket(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir)
+ if hadoop_ver.startswith('2.8'):
+ # test all ITtests but skip AWS test using public bucket landsat-pds
+ # which is not available from within this test
+ test_options = '-Dit.test=ITestS3A* -Dparallel-tests -Dscale \
+ -Dfs.s3a.scale.test.timeout=1200 \
+ -Dfs.s3a.scale.test.huge.filesize=256M verify'
+ else:
+ test_options = 'test -Dtest=S3a*,TestS3A*'
+ try:
+ run_s3atest(remote, maven_version, testdir, test_options)
+ yield
+ finally:
+ log.info("Done s3a testing, Cleaning up")
+ for fil in ['apache*', 'hadoop*', 'venv*', 'create*']:
+ remote.run(args=['rm', run.Raw('-rf'), run.Raw('{tdir}/{file}'.format(tdir=testdir, file=fil))])
+
+
+def install_prereq(client):
+ """
+ Install pre requisites for RHEL and CentOS
+ TBD: Ubuntu
+ """
+ if client.os.name == 'rhel' or client.os.name == 'centos':
+ client.run(
+ args=[
+ 'sudo',
+ 'yum',
+ 'install',
+ '-y',
+ 'protobuf-c.x86_64',
+ 'java',
+ 'java-1.8.0-openjdk-devel',
+ 'dnsmasq'
+ ]
+ )
+
+
+def setup_user_bucket(client, dns_name, access_key, secret_key, bucket_name, testdir):
+ """
+ Create user with access_key and secret_key that will be
+ used for the s3a testdir
+ """
+ client.run(
+ args=[
+ 'sudo',
+ 'radosgw-admin',
+ 'user',
+ 'create',
+ run.Raw('--uid'),
+ 's3a',
+ run.Raw('--display-name="s3a cephtests"'),
+ run.Raw('--access-key={access_key}'.format(access_key=access_key)),
+ run.Raw('--secret-key={secret_key}'.format(secret_key=secret_key)),
+ run.Raw('--email=s3a@ceph.com'),
+ ]
+ )
+ client.run(
+ args=[
+ 'python3',
+ '-m',
+ 'venv',
+ '{testdir}/venv'.format(testdir=testdir),
+ run.Raw('&&'),
+ run.Raw('{testdir}/venv/bin/pip'.format(testdir=testdir)),
+ 'install',
+ 'boto'
+ ]
+ )
+ create_bucket = """
+#!/usr/bin/env python
+import boto
+import boto.s3.connection
+access_key = '{access_key}'
+secret_key = '{secret_key}'
+
+conn = boto.connect_s3(
+ aws_access_key_id = access_key,
+ aws_secret_access_key = secret_key,
+ host = '{dns_name}',
+ is_secure=False,
+ calling_format = boto.s3.connection.OrdinaryCallingFormat(),
+ )
+bucket = conn.create_bucket('{bucket_name}')
+for bucket in conn.get_all_buckets():
+ print(bucket.name + "\t" + bucket.creation_date)
+""".format(access_key=access_key, secret_key=secret_key, dns_name=dns_name, bucket_name=bucket_name)
+ py_bucket_file = '{testdir}/create_bucket.py'.format(testdir=testdir)
+ client.sudo_write_file(py_bucket_file, create_bucket, mode='0744')
+ client.run(
+ args=[
+ 'cat',
+ '{testdir}/create_bucket.py'.format(testdir=testdir),
+ ]
+ )
+ client.run(
+ args=[
+ '{testdir}/venv/bin/python'.format(testdir=testdir),
+ '{testdir}/create_bucket.py'.format(testdir=testdir),
+ ]
+ )
+
+
+def run_s3atest(client, maven_version, testdir, test_options):
+ """
+ Finally run the s3a test
+ """
+ aws_testdir = '{testdir}/hadoop/hadoop-tools/hadoop-aws/'.format(testdir=testdir)
+ run_test = '{testdir}/apache-maven-{maven_version}/bin/mvn'.format(testdir=testdir, maven_version=maven_version)
+ # Remove AWS CredentialsProvider tests as it hits public bucket from AWS
+ # better solution is to create the public bucket on local server and test
+ rm_test = 'rm src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java'
+ client.run(
+ args=[
+ 'cd',
+ run.Raw(aws_testdir),
+ run.Raw('&&'),
+ run.Raw(rm_test),
+ run.Raw('&&'),
+ run.Raw(run_test),
+ run.Raw(test_options)
+ ]
+ )
+
+
+def configure_s3a(client, dns_name, access_key, secret_key, bucket_name, testdir):
+ """
+ Use the template to configure s3a test, Fill in access_key, secret_key
+ and other details required for test.
+ """
+ config_template = """<configuration>
+<property>
+<name>fs.s3a.endpoint</name>
+<value>{name}</value>
+</property>
+
+<property>
+<name>fs.contract.test.fs.s3a</name>
+<value>s3a://{bucket_name}/</value>
+</property>
+
+<property>
+<name>fs.s3a.connection.ssl.enabled</name>
+<value>false</value>
+</property>
+
+<property>
+<name>test.fs.s3n.name</name>
+<value>s3n://{bucket_name}/</value>
+</property>
+
+<property>
+<name>test.fs.s3a.name</name>
+<value>s3a://{bucket_name}/</value>
+</property>
+
+<property>
+<name>test.fs.s3.name</name>
+<value>s3://{bucket_name}/</value>
+</property>
+
+<property>
+<name>fs.s3.awsAccessKeyId</name>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3.awsSecretAccessKey</name>
+<value>{secret_key}</value>
+</property>
+
+<property>
+<name>fs.s3n.awsAccessKeyId</name>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3n.awsSecretAccessKey</name>
+<value>{secret_key}</value>
+</property>
+
+<property>
+<name>fs.s3a.access.key</name>
+<description>AWS access key ID. Omit for Role-based authentication.</description>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3a.secret.key</name>
+<description>AWS secret key. Omit for Role-based authentication.</description>
+<value>{secret_key}</value>
+</property>
+</configuration>
+""".format(name=dns_name, bucket_name=bucket_name, access_key=access_key, secret_key=secret_key)
+ config_path = testdir + '/hadoop/hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml'
+ client.write_file(config_path, config_template)
+ # output for debug
+ client.run(args=['cat', config_path])