diff options
Diffstat (limited to 'src/s3select/container')
-rw-r--r-- | src/s3select/container/trino/hms_trino.yaml | 31 | ||||
-rw-r--r-- | src/s3select/container/trino/run_trino_on_ceph.bash | 86 | ||||
-rw-r--r-- | src/s3select/container/trino/trino/catalog/hive.properties | 33 | ||||
-rw-r--r-- | src/s3select/container/trino/trino/config.properties | 5 | ||||
-rw-r--r-- | src/s3select/container/trino/trino/jvm.config | 19 | ||||
-rw-r--r-- | src/s3select/container/trino/trino/log.properties | 2 | ||||
-rw-r--r-- | src/s3select/container/trino/trino/node.properties | 2 |
7 files changed, 178 insertions, 0 deletions
diff --git a/src/s3select/container/trino/hms_trino.yaml b/src/s3select/container/trino/hms_trino.yaml new file mode 100644 index 000000000..42d22f842 --- /dev/null +++ b/src/s3select/container/trino/hms_trino.yaml @@ -0,0 +1,31 @@ +version: '3' +services: + hms: + image: galsl/hms:dev + container_name: hms + environment: + # S3_ENDPOINT the CEPH/RGW end-point-url + - S3_ENDPOINT=http://10.0.209.201:80 + - S3_ACCESS_KEY=abc1 + - S3_SECRET_KEY=abc1 + # the container starts with booting the hive metastore + command: sh -c '. ~/.bashrc; start_hive_metastore' + ports: + - 9083:9083 + networks: + - trino_hms + + trino: + image: trinodb/trino + container_name: trino + volumes: + # the trino directory contains the necessary configuration + - ./trino:/etc/trino + ports: + - 8080:8080 + networks: + - trino_hms + +networks: + trino_hms: + diff --git a/src/s3select/container/trino/run_trino_on_ceph.bash b/src/s3select/container/trino/run_trino_on_ceph.bash new file mode 100644 index 000000000..a9b1583d0 --- /dev/null +++ b/src/s3select/container/trino/run_trino_on_ceph.bash @@ -0,0 +1,86 @@ +#!/bin/bash + +root_dir() +{ + cd $(git rev-parse --show-toplevel) +} + +modify_end_point_on_hive_properties() +{ +#not in use +return; +#TODO if ./trino/catalog/hive.properties exist + + [ $# -lt 1 ] && echo type s3-endpoint-url && return + root_dir + export S3_ENDPOINT=$1 + cat container/trino/trino/catalog/hive.properties | awk -v x=${S3_ENDPOINT:-NO_SET} '{if(/hive.s3.endpoint/){print "hive.s3.endpoint="x"\n";} else {print $0;}}' > /tmp/hive.properties + cp /tmp/hive.properties container/trino/trino/catalog/hive.properties + cat ./container/trino/hms_trino.yaml | awk -v x=${S3_ENDPOINT:-NOT_SET} '{if(/[ *]- S3_ENDPOINT/){print "\t- S3_ENDPOINT="x"\n";} else {print $0;}}' > /tmp/hms_trino.yaml + cp /tmp/hms_trino.yaml ./container/trino/hms_trino.yaml + cd - +} + +trino_exec_command() +{ +## run SQL statement on trino + sudo docker exec -it trino /bin/bash -c "time trino --catalog hive --schema cephs3 --execute \"$@\"" +} + +boot_trino_hms() +{ + root_dir + [ -z ${S3_ENDPOINT} ] && echo "missing end-variable S3_ENDPOINT (URL)" && return + [ -z ${S3_ACCESS_KEY} ] && echo missing end-variable S3_ACCESS_KEY && return + [ -z ${S3_SECRET_KEY} ] && echo missing end-variable S3_SECRET_KEY && return + + # modify hms_trino.yaml according to user setup (environment variables) + cat ./container/trino/hms_trino.yaml | \ + awk -v x=${S3_ENDPOINT:-NOT_SET} '{if(/- S3_ENDPOINT/){print " - S3_ENDPOINT="x;} else {print $0;}}' | \ + awk -v x=${S3_ACCESS_KEY:-NOT_SET} '{if(/- S3_ACCESS_KEY/){print " - S3_ACCESS_KEY="x;} else {print $0;}}' | \ + awk -v x=${S3_SECRET_KEY:-NOT_SET} '{if(/- S3_SECRET_KEY/){print " - S3_SECRET_KEY="x;} else {print $0;}}' > /tmp/hms_trino.yaml + cp /tmp/hms_trino.yaml ./container/trino/hms_trino.yaml + + + + # modify hive.properties according to user setup (environment variables) + cat container/trino/trino/catalog/hive.properties | \ + awk -v x=${S3_ENDPOINT:-NO_SET} '{if(/hive.s3.endpoint/){print "hive.s3.endpoint="x"\n";} else {print $0;}}' | \ + awk -v x=${S3_ACCESS_KEY:-NO_SET} '{if(/hive.s3.aws-access-key/){print "hive.s3.aws-access-key="x;} else {print $0;}}' | \ + awk -v x=${S3_SECRET_KEY:-NO_SET} '{if(/hive.s3.aws-secret-key/){print "hive.s3.aws-secret-key="x;} else {print $0;}}' > /tmp/hive.properties + cp /tmp/hive.properties ./container/trino/trino/catalog/hive.properties + + sudo docker compose -f ./container/trino/hms_trino.yaml up -d + cd - +} + +shutdown_trino_hms() +{ + root_dir + sudo docker compose -f ./container/trino/hms_trino.yaml down + cd - +} + +trino_create_table() +{ +table_name=$1 +create_table_comm="create table hive.cephs3.${table_name}(c1 varchar,c2 varchar,c3 varchar,c4 varchar, c5 varchar,c6 varchar,c7 varchar,c8 varchar,c9 varchar,c10 varchar) + WITH ( external_location = 's3a://hive/warehouse/cephs3/${table_name}/',format = 'TEXTFILE',textfile_field_separator = ',');" +sudo docker exec -it trino /bin/bash -c "trino --catalog hive --schema cephs3 --execute \"${create_table_comm}\"" +} + +tpcds_cli() +{ +## a CLI example for generating TPCDS data +sudo docker run --env S3_ENDPOINT=172.17.0.1:8000 --env S3_ACCESS_KEY=b2345678901234567890 --env S3_SECRET_KEY=b234567890123456789012345678901234567890 --env BUCKET_NAME=hive --env SCALE=2 -it galsl/hadoop:tpcds bash -c '/root/run_tpcds_with_scale' +} + +update_table_external_location() +{ +root_dir +[ -z ${BUCKET_NAME} ] && echo need to define BUCKET_NAME && return +[ -z ${SCALE} ] && echo need to define SCALE && return + +cat TPCDS/ddl/create_tpcds_tables.sql | sed "s/tpcds2\/4/${BUCKET_NAME}\/SCALE_${SCALE}/" +} + diff --git a/src/s3select/container/trino/trino/catalog/hive.properties b/src/s3select/container/trino/trino/catalog/hive.properties new file mode 100644 index 000000000..645948f24 --- /dev/null +++ b/src/s3select/container/trino/trino/catalog/hive.properties @@ -0,0 +1,33 @@ +connector.name=hive +hive.metastore.uri=thrift://hms:9083 + +#hive.metastore.warehouse.dir=s3a://hive/ + +hive.allow-drop-table=true +hive.allow-rename-table=true +hive.allow-add-column=true +hive.allow-drop-column=true +hive.allow-rename-column=true + +hive.non-managed-table-writes-enabled=true +hive.s3select-pushdown.enabled=true +hive.s3.aws-access-key=abc1 +hive.s3.aws-secret-key=abc1 + +# should modify per s3-endpoint-url +hive.s3.endpoint=http://10.0.209.201:80 + + + + + + + + +#hive.s3.max-connections=1 +#hive.s3select-pushdown.max-connections=1 + +hive.s3.connect-timeout=100s +hive.s3.socket-timeout=100s +hive.max-splits-per-second=10000 +hive.max-split-size=128MB diff --git a/src/s3select/container/trino/trino/config.properties b/src/s3select/container/trino/trino/config.properties new file mode 100644 index 000000000..a11cba39d --- /dev/null +++ b/src/s3select/container/trino/trino/config.properties @@ -0,0 +1,5 @@ +#single node install config +coordinator=true +node-scheduler.include-coordinator=true +http-server.http.port=8080 +discovery.uri=http://localhost:8080 diff --git a/src/s3select/container/trino/trino/jvm.config b/src/s3select/container/trino/trino/jvm.config new file mode 100644 index 000000000..47e9e3176 --- /dev/null +++ b/src/s3select/container/trino/trino/jvm.config @@ -0,0 +1,19 @@ +-server +-agentpath:/usr/lib/trino/bin/libjvmkill.so +-XX:InitialRAMPercentage=80 +-XX:MaxRAMPercentage=80 +-XX:G1HeapRegionSize=32M +-XX:+ExplicitGCInvokesConcurrent +-XX:+HeapDumpOnOutOfMemoryError +-XX:+ExitOnOutOfMemoryError +-XX:-OmitStackTraceInFastThrow +-XX:ReservedCodeCacheSize=256M +-XX:PerMethodRecompilationCutoff=10000 +-XX:PerBytecodeRecompilationCutoff=10000 +-Djdk.attach.allowAttachSelf=true +-Djdk.nio.maxCachedBufferSize=2000000 +# Improve AES performance for S3, etc. on ARM64 (JDK-8271567) +-XX:+UnlockDiagnosticVMOptions +-XX:+UseAESCTRIntrinsics +# Disable Preventive GC for performance reasons (JDK-8293861) +-XX:-G1UsePreventiveGC diff --git a/src/s3select/container/trino/trino/log.properties b/src/s3select/container/trino/trino/log.properties new file mode 100644 index 000000000..abee45ebc --- /dev/null +++ b/src/s3select/container/trino/trino/log.properties @@ -0,0 +1,2 @@ +# Enable verbose logging from Trino +#io.trino=DEBUG diff --git a/src/s3select/container/trino/trino/node.properties b/src/s3select/container/trino/trino/node.properties new file mode 100644 index 000000000..5b02ff7f0 --- /dev/null +++ b/src/s3select/container/trino/trino/node.properties @@ -0,0 +1,2 @@ +node.environment=docker +node.data-dir=/data/trino |