summaryrefslogtreecommitdiffstats
path: root/src/s3select/container/trino/run_trino_on_ceph.bash
blob: a9b1583d054daaf60a0b5bb56081ffc46cceff64 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash

root_dir()
{
  cd $(git rev-parse --show-toplevel)
}

modify_end_point_on_hive_properties()
{
#not in use
return;
#TODO if ./trino/catalog/hive.properties exist

  [ $# -lt 1 ] && echo type s3-endpoint-url && return
  root_dir
  export S3_ENDPOINT=$1
  cat container/trino/trino/catalog/hive.properties  | awk -v x=${S3_ENDPOINT:-NO_SET} '{if(/hive.s3.endpoint/){print "hive.s3.endpoint="x"\n";} else {print $0;}}' > /tmp/hive.properties
  cp /tmp/hive.properties container/trino/trino/catalog/hive.properties
  cat ./container/trino/hms_trino.yaml | awk -v x=${S3_ENDPOINT:-NOT_SET} '{if(/[ *]- S3_ENDPOINT/){print "\t- S3_ENDPOINT="x"\n";} else {print $0;}}' > /tmp/hms_trino.yaml
  cp /tmp/hms_trino.yaml ./container/trino/hms_trino.yaml
  cd -
}

trino_exec_command()
{
## run SQL statement on trino 
  sudo docker exec -it trino /bin/bash -c "time trino --catalog hive --schema cephs3 --execute \"$@\""
}

boot_trino_hms()
{
  root_dir
  [ -z ${S3_ENDPOINT} ] && echo "missing end-variable S3_ENDPOINT (URL)" && return
  [ -z ${S3_ACCESS_KEY} ] && echo missing end-variable S3_ACCESS_KEY && return
  [ -z ${S3_SECRET_KEY} ] && echo missing end-variable S3_SECRET_KEY && return

  # modify hms_trino.yaml according to user setup (environment variables)
  cat ./container/trino/hms_trino.yaml | \
  awk -v x=${S3_ENDPOINT:-NOT_SET} '{if(/- S3_ENDPOINT/){print "      - S3_ENDPOINT="x;} else {print $0;}}' | \
  awk -v x=${S3_ACCESS_KEY:-NOT_SET} '{if(/- S3_ACCESS_KEY/){print "      - S3_ACCESS_KEY="x;} else {print $0;}}' | \
  awk -v x=${S3_SECRET_KEY:-NOT_SET} '{if(/- S3_SECRET_KEY/){print "      - S3_SECRET_KEY="x;} else {print $0;}}' > /tmp/hms_trino.yaml
  cp /tmp/hms_trino.yaml ./container/trino/hms_trino.yaml



  # modify hive.properties according to user setup (environment variables)
  cat container/trino/trino/catalog/hive.properties | \
  awk -v x=${S3_ENDPOINT:-NO_SET} '{if(/hive.s3.endpoint/){print "hive.s3.endpoint="x"\n";} else {print $0;}}' | \
  awk -v x=${S3_ACCESS_KEY:-NO_SET} '{if(/hive.s3.aws-access-key/){print "hive.s3.aws-access-key="x;} else {print $0;}}' | \
  awk -v x=${S3_SECRET_KEY:-NO_SET} '{if(/hive.s3.aws-secret-key/){print "hive.s3.aws-secret-key="x;} else {print $0;}}' > /tmp/hive.properties
  cp /tmp/hive.properties ./container/trino/trino/catalog/hive.properties

  sudo docker compose -f ./container/trino/hms_trino.yaml up -d
  cd -
}

shutdown_trino_hms()
{
  root_dir
  sudo docker compose -f ./container/trino/hms_trino.yaml down
  cd -
}

trino_create_table()
{
table_name=$1
create_table_comm="create table hive.cephs3.${table_name}(c1 varchar,c2 varchar,c3 varchar,c4 varchar, c5 varchar,c6 varchar,c7 varchar,c8 varchar,c9 varchar,c10 varchar)
 WITH ( external_location = 's3a://hive/warehouse/cephs3/${table_name}/',format = 'TEXTFILE',textfile_field_separator = ',');"
sudo docker exec -it trino /bin/bash -c "trino --catalog hive --schema cephs3 --execute \"${create_table_comm}\""
}

tpcds_cli()
{
## a CLI example for generating TPCDS data
sudo docker run --env S3_ENDPOINT=172.17.0.1:8000 --env S3_ACCESS_KEY=b2345678901234567890 --env S3_SECRET_KEY=b234567890123456789012345678901234567890 --env BUCKET_NAME=hive --env SCALE=2 -it galsl/hadoop:tpcds bash -c '/root/run_tpcds_with_scale'
}

update_table_external_location()
{
root_dir
[ -z ${BUCKET_NAME} ] && echo need to define BUCKET_NAME && return
[ -z ${SCALE} ] && echo need to define SCALE && return

cat TPCDS/ddl/create_tpcds_tables.sql  | sed "s/tpcds2\/4/${BUCKET_NAME}\/SCALE_${SCALE}/"
}