summaryrefslogtreecommitdiffstats
path: root/src/s3select/container/trino
diff options
context:
space:
mode:
Diffstat (limited to 'src/s3select/container/trino')
-rw-r--r--src/s3select/container/trino/hms_trino.yaml31
-rw-r--r--src/s3select/container/trino/run_trino_on_ceph.bash86
-rw-r--r--src/s3select/container/trino/trino/catalog/hive.properties33
-rw-r--r--src/s3select/container/trino/trino/config.properties5
-rw-r--r--src/s3select/container/trino/trino/jvm.config19
-rw-r--r--src/s3select/container/trino/trino/log.properties2
-rw-r--r--src/s3select/container/trino/trino/node.properties2
7 files changed, 178 insertions, 0 deletions
diff --git a/src/s3select/container/trino/hms_trino.yaml b/src/s3select/container/trino/hms_trino.yaml
new file mode 100644
index 000000000..42d22f842
--- /dev/null
+++ b/src/s3select/container/trino/hms_trino.yaml
@@ -0,0 +1,31 @@
+version: '3'
+services:
+ hms:
+ image: galsl/hms:dev
+ container_name: hms
+ environment:
+ # S3_ENDPOINT the CEPH/RGW end-point-url
+ - S3_ENDPOINT=http://10.0.209.201:80
+ - S3_ACCESS_KEY=abc1
+ - S3_SECRET_KEY=abc1
+ # the container starts with booting the hive metastore
+ command: sh -c '. ~/.bashrc; start_hive_metastore'
+ ports:
+ - 9083:9083
+ networks:
+ - trino_hms
+
+ trino:
+ image: trinodb/trino
+ container_name: trino
+ volumes:
+ # the trino directory contains the necessary configuration
+ - ./trino:/etc/trino
+ ports:
+ - 8080:8080
+ networks:
+ - trino_hms
+
+networks:
+ trino_hms:
+
diff --git a/src/s3select/container/trino/run_trino_on_ceph.bash b/src/s3select/container/trino/run_trino_on_ceph.bash
new file mode 100644
index 000000000..a9b1583d0
--- /dev/null
+++ b/src/s3select/container/trino/run_trino_on_ceph.bash
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+root_dir()
+{
+ cd $(git rev-parse --show-toplevel)
+}
+
+modify_end_point_on_hive_properties()
+{
+#not in use
+return;
+#TODO if ./trino/catalog/hive.properties exist
+
+ [ $# -lt 1 ] && echo type s3-endpoint-url && return
+ root_dir
+ export S3_ENDPOINT=$1
+ cat container/trino/trino/catalog/hive.properties | awk -v x=${S3_ENDPOINT:-NO_SET} '{if(/hive.s3.endpoint/){print "hive.s3.endpoint="x"\n";} else {print $0;}}' > /tmp/hive.properties
+ cp /tmp/hive.properties container/trino/trino/catalog/hive.properties
+ cat ./container/trino/hms_trino.yaml | awk -v x=${S3_ENDPOINT:-NOT_SET} '{if(/[ *]- S3_ENDPOINT/){print "\t- S3_ENDPOINT="x"\n";} else {print $0;}}' > /tmp/hms_trino.yaml
+ cp /tmp/hms_trino.yaml ./container/trino/hms_trino.yaml
+ cd -
+}
+
+trino_exec_command()
+{
+## run SQL statement on trino
+ sudo docker exec -it trino /bin/bash -c "time trino --catalog hive --schema cephs3 --execute \"$@\""
+}
+
+boot_trino_hms()
+{
+ root_dir
+ [ -z ${S3_ENDPOINT} ] && echo "missing end-variable S3_ENDPOINT (URL)" && return
+ [ -z ${S3_ACCESS_KEY} ] && echo missing end-variable S3_ACCESS_KEY && return
+ [ -z ${S3_SECRET_KEY} ] && echo missing end-variable S3_SECRET_KEY && return
+
+ # modify hms_trino.yaml according to user setup (environment variables)
+ cat ./container/trino/hms_trino.yaml | \
+ awk -v x=${S3_ENDPOINT:-NOT_SET} '{if(/- S3_ENDPOINT/){print " - S3_ENDPOINT="x;} else {print $0;}}' | \
+ awk -v x=${S3_ACCESS_KEY:-NOT_SET} '{if(/- S3_ACCESS_KEY/){print " - S3_ACCESS_KEY="x;} else {print $0;}}' | \
+ awk -v x=${S3_SECRET_KEY:-NOT_SET} '{if(/- S3_SECRET_KEY/){print " - S3_SECRET_KEY="x;} else {print $0;}}' > /tmp/hms_trino.yaml
+ cp /tmp/hms_trino.yaml ./container/trino/hms_trino.yaml
+
+
+
+ # modify hive.properties according to user setup (environment variables)
+ cat container/trino/trino/catalog/hive.properties | \
+ awk -v x=${S3_ENDPOINT:-NO_SET} '{if(/hive.s3.endpoint/){print "hive.s3.endpoint="x"\n";} else {print $0;}}' | \
+ awk -v x=${S3_ACCESS_KEY:-NO_SET} '{if(/hive.s3.aws-access-key/){print "hive.s3.aws-access-key="x;} else {print $0;}}' | \
+ awk -v x=${S3_SECRET_KEY:-NO_SET} '{if(/hive.s3.aws-secret-key/){print "hive.s3.aws-secret-key="x;} else {print $0;}}' > /tmp/hive.properties
+ cp /tmp/hive.properties ./container/trino/trino/catalog/hive.properties
+
+ sudo docker compose -f ./container/trino/hms_trino.yaml up -d
+ cd -
+}
+
+shutdown_trino_hms()
+{
+ root_dir
+ sudo docker compose -f ./container/trino/hms_trino.yaml down
+ cd -
+}
+
+trino_create_table()
+{
+table_name=$1
+create_table_comm="create table hive.cephs3.${table_name}(c1 varchar,c2 varchar,c3 varchar,c4 varchar, c5 varchar,c6 varchar,c7 varchar,c8 varchar,c9 varchar,c10 varchar)
+ WITH ( external_location = 's3a://hive/warehouse/cephs3/${table_name}/',format = 'TEXTFILE',textfile_field_separator = ',');"
+sudo docker exec -it trino /bin/bash -c "trino --catalog hive --schema cephs3 --execute \"${create_table_comm}\""
+}
+
+tpcds_cli()
+{
+## a CLI example for generating TPCDS data
+sudo docker run --env S3_ENDPOINT=172.17.0.1:8000 --env S3_ACCESS_KEY=b2345678901234567890 --env S3_SECRET_KEY=b234567890123456789012345678901234567890 --env BUCKET_NAME=hive --env SCALE=2 -it galsl/hadoop:tpcds bash -c '/root/run_tpcds_with_scale'
+}
+
+update_table_external_location()
+{
+root_dir
+[ -z ${BUCKET_NAME} ] && echo need to define BUCKET_NAME && return
+[ -z ${SCALE} ] && echo need to define SCALE && return
+
+cat TPCDS/ddl/create_tpcds_tables.sql | sed "s/tpcds2\/4/${BUCKET_NAME}\/SCALE_${SCALE}/"
+}
+
diff --git a/src/s3select/container/trino/trino/catalog/hive.properties b/src/s3select/container/trino/trino/catalog/hive.properties
new file mode 100644
index 000000000..645948f24
--- /dev/null
+++ b/src/s3select/container/trino/trino/catalog/hive.properties
@@ -0,0 +1,33 @@
+connector.name=hive
+hive.metastore.uri=thrift://hms:9083
+
+#hive.metastore.warehouse.dir=s3a://hive/
+
+hive.allow-drop-table=true
+hive.allow-rename-table=true
+hive.allow-add-column=true
+hive.allow-drop-column=true
+hive.allow-rename-column=true
+
+hive.non-managed-table-writes-enabled=true
+hive.s3select-pushdown.enabled=true
+hive.s3.aws-access-key=abc1
+hive.s3.aws-secret-key=abc1
+
+# should modify per s3-endpoint-url
+hive.s3.endpoint=http://10.0.209.201:80
+
+
+
+
+
+
+
+
+#hive.s3.max-connections=1
+#hive.s3select-pushdown.max-connections=1
+
+hive.s3.connect-timeout=100s
+hive.s3.socket-timeout=100s
+hive.max-splits-per-second=10000
+hive.max-split-size=128MB
diff --git a/src/s3select/container/trino/trino/config.properties b/src/s3select/container/trino/trino/config.properties
new file mode 100644
index 000000000..a11cba39d
--- /dev/null
+++ b/src/s3select/container/trino/trino/config.properties
@@ -0,0 +1,5 @@
+#single node install config
+coordinator=true
+node-scheduler.include-coordinator=true
+http-server.http.port=8080
+discovery.uri=http://localhost:8080
diff --git a/src/s3select/container/trino/trino/jvm.config b/src/s3select/container/trino/trino/jvm.config
new file mode 100644
index 000000000..47e9e3176
--- /dev/null
+++ b/src/s3select/container/trino/trino/jvm.config
@@ -0,0 +1,19 @@
+-server
+-agentpath:/usr/lib/trino/bin/libjvmkill.so
+-XX:InitialRAMPercentage=80
+-XX:MaxRAMPercentage=80
+-XX:G1HeapRegionSize=32M
+-XX:+ExplicitGCInvokesConcurrent
+-XX:+HeapDumpOnOutOfMemoryError
+-XX:+ExitOnOutOfMemoryError
+-XX:-OmitStackTraceInFastThrow
+-XX:ReservedCodeCacheSize=256M
+-XX:PerMethodRecompilationCutoff=10000
+-XX:PerBytecodeRecompilationCutoff=10000
+-Djdk.attach.allowAttachSelf=true
+-Djdk.nio.maxCachedBufferSize=2000000
+# Improve AES performance for S3, etc. on ARM64 (JDK-8271567)
+-XX:+UnlockDiagnosticVMOptions
+-XX:+UseAESCTRIntrinsics
+# Disable Preventive GC for performance reasons (JDK-8293861)
+-XX:-G1UsePreventiveGC
diff --git a/src/s3select/container/trino/trino/log.properties b/src/s3select/container/trino/trino/log.properties
new file mode 100644
index 000000000..abee45ebc
--- /dev/null
+++ b/src/s3select/container/trino/trino/log.properties
@@ -0,0 +1,2 @@
+# Enable verbose logging from Trino
+#io.trino=DEBUG
diff --git a/src/s3select/container/trino/trino/node.properties b/src/s3select/container/trino/trino/node.properties
new file mode 100644
index 000000000..5b02ff7f0
--- /dev/null
+++ b/src/s3select/container/trino/trino/node.properties
@@ -0,0 +1,2 @@
+node.environment=docker
+node.data-dir=/data/trino