summaryrefslogtreecommitdiffstats
path: root/src/s3select/example
diff options
context:
space:
mode:
Diffstat (limited to 'src/s3select/example')
-rw-r--r--src/s3select/example/CMakeLists.txt12
-rwxr-xr-xsrc/s3select/example/expr_genrator.py9
-rw-r--r--src/s3select/example/generate_rand_csv.c28
-rwxr-xr-xsrc/s3select/example/parse_csv.py12
-rwxr-xr-xsrc/s3select/example/run_test.bash96
-rw-r--r--src/s3select/example/s3select_example.cpp136
6 files changed, 293 insertions, 0 deletions
diff --git a/src/s3select/example/CMakeLists.txt b/src/s3select/example/CMakeLists.txt
new file mode 100644
index 000000000..37f99f0b3
--- /dev/null
+++ b/src/s3select/example/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_executable(s3select_example s3select_example.cpp)
+target_include_directories(s3select_example PUBLIC ../include)
+target_link_libraries(s3select_example boost_date_time)
+
+add_executable(generate_rand_csv generate_rand_csv.c)
+
+add_custom_command(OUTPUT expr_genrator.py COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/expr_genrator.py expr_genrator.py
+ COMMENT "Copy expr_genrator.py"
+ VERBATIM)
+
+add_custom_target(expr_generator ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/expr_genrator.py)
+
diff --git a/src/s3select/example/expr_genrator.py b/src/s3select/example/expr_genrator.py
new file mode 100755
index 000000000..0d21fcee6
--- /dev/null
+++ b/src/s3select/example/expr_genrator.py
@@ -0,0 +1,9 @@
+import random
+import sys
+
+def expr(depth):
+ if depth==1 or random.random()<1.0/(2**depth-1):
+ return str(int(random.random() * 100) + 1)+".0"
+ return '(' + expr(depth-1) + random.choice(['+','-','*','/']) + expr(depth-1) + ')'
+
+print expr( int(sys.argv[1]) )
diff --git a/src/s3select/example/generate_rand_csv.c b/src/s3select/example/generate_rand_csv.c
new file mode 100644
index 000000000..67d52adaa
--- /dev/null
+++ b/src/s3select/example/generate_rand_csv.c
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+
+int main(int argc, char** argv)
+{
+ if (argc<3)
+ {
+ printf("%s <num-of-rows> <num-of-columns> \n", argv[0]);
+ return -1;
+ }
+
+ srand(1234);
+ int line_no=0;
+ for(int i=0; i<atoi(argv[1]); i++)
+ {
+ printf("%d,", i);
+ for(int y=0; y<atoi(argv[2]); y++)
+ {
+ printf("%d,", rand()%1000);
+ }
+ printf("\n");
+ }
+
+
+
+
+}
diff --git a/src/s3select/example/parse_csv.py b/src/s3select/example/parse_csv.py
new file mode 100755
index 000000000..23fe14add
--- /dev/null
+++ b/src/s3select/example/parse_csv.py
@@ -0,0 +1,12 @@
+#!/usr/bin/python
+
+import csv
+
+with open('stam.txt') as csv_file:
+ csv_reader = csv.reader(csv_file, delimiter=',')
+ line_count = 0
+ for row in csv_reader:
+ #if (int(row[0])==465 and int(row[5])==268): # casting is slower
+ if (row[0]=="465" and row[5]=="268"):
+ print row
+
diff --git a/src/s3select/example/run_test.bash b/src/s3select/example/run_test.bash
new file mode 100755
index 000000000..56654d7ab
--- /dev/null
+++ b/src/s3select/example/run_test.bash
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+set -e
+
+PREFIX=${1:-"./example"}
+
+## purpose : sanity tests
+
+s3select_calc()
+{
+l="$*"
+res=$( echo 1 | "$PREFIX"/s3select_example -q "select ${l} from stdin;" )
+echo "$res" | sed 's/.$//'
+}
+
+# create c file with expression , compile it and run it.
+c_calc()
+{
+cat << @@ > "$PREFIX"/tmp.c
+
+#include <stdio.h>
+int main()
+{
+printf("%f\n",$*);
+}
+@@
+gcc -o "$PREFIX"/a.out "$PREFIX"/tmp.c
+"$PREFIX"/a.out
+}
+
+expr_test()
+{
+## test the arithmetic evaluation of s3select against C program
+for i in {1..100}
+do
+ e=$(python2 "$PREFIX"/expr_genrator.py 5)
+ echo expression["$i"]="$e"
+ r1=$(s3select_calc "$e")
+ r2=$(c_calc "$e")
+ echo "$r1" "$r2"
+
+ ## should be zero or very close to zero; ( s3select is C compile program )
+ res=$(echo "" | awk -v e="$e" -v r1="$r1" -v r2="$r2" 'function abs(n){if (n<0) return -n; else return n;}{if (abs(r1-r2) > 0.00001) {print "MISSMATCH result for expression",e;}}')
+ if test "$res" != ""; then
+ echo "$res"
+ exit 1
+ fi
+done
+}
+
+aggregate_test()
+{
+## generate_rand_csv is generating with the same seed
+echo check sum
+s3select_val=$("$PREFIX"/generate_rand_csv 10 10 | "$PREFIX"/s3select_example -q 'select sum(int(_1)) from stdin;')
+awk_val=$("$PREFIX"/generate_rand_csv 10 10 | awk 'BEGIN{FS=",";} {s+=$1;} END{print s;}')
+s3select_val=${s3select_val::-1}
+echo "$s3select_val" "$awk_val"
+if test "$s3select_val" -ne "$awk_val"; then
+ exit 1
+fi
+echo check min
+s3select_val=$("$PREFIX"/generate_rand_csv 10 10 | "$PREFIX"/s3select_example -q 'select min(int(_1)) from stdin;')
+awk_val=$("$PREFIX"/generate_rand_csv 10 10 | awk 'BEGIN{FS=",";min=100000;} {if(min>$1) min=$1;} END{print min;}')
+s3select_val=${s3select_val::-1}
+echo "$s3select_val" "$awk_val"
+if test "$s3select_val" -ne "$awk_val"; then
+ exit 1
+fi
+echo check max
+s3select_val=$("$PREFIX"/generate_rand_csv 10 10 | "$PREFIX"/s3select_example -q 'select max(int(_1)) from stdin;')
+awk_val=$("$PREFIX"/generate_rand_csv 10 10 | awk 'BEGIN{FS=",";max=0;} {if(max<$1) max=$1;} END{print max;}' )
+s3select_val=${s3select_val::-1}
+echo "$s3select_val" "$awk_val"
+if test "$s3select_val" -ne "$awk_val"; then
+ exit 1
+fi
+echo check substr and count
+s3select_val=$("$PREFIX"/generate_rand_csv 10000 10 | "$PREFIX"/s3select_example -q 'select count(int(_1)) from stdin where int(_1)>200 and int(_1)<250;')
+awk_val=$("$PREFIX"/generate_rand_csv 10000 10 | "$PREFIX"/s3select_example -q 'select substr(_1,1,1) from stdin where int(_1)>200 and int(_1)<250;' | uniq -c | awk '{print $1;}')
+s3select_val=${s3select_val::-1}
+echo "$s3select_val" "$awk_val"
+if test "$s3select_val" -ne "$awk_val"; then
+ exit 1
+fi
+}
+
+###############################################################
+
+expr_test
+aggregate_test
+
+rm "$PREFIX"/tmp.c "$PREFIX"/a.out
+
+exit 0
+
diff --git a/src/s3select/example/s3select_example.cpp b/src/s3select/example/s3select_example.cpp
new file mode 100644
index 000000000..840b62c6a
--- /dev/null
+++ b/src/s3select/example/s3select_example.cpp
@@ -0,0 +1,136 @@
+#include "s3select.h"
+#include <fstream>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+using namespace s3selectEngine;
+using namespace BOOST_SPIRIT_CLASSIC_NS;
+
+int cli_get_schema(const char* input_schema, actionQ& x)
+{
+ g_push_column.set_action_q(&x);
+
+ rule<> column_name_rule = lexeme_d[(+alpha_p >> *digit_p)];
+
+ //TODO an issue to resolve with trailing space
+ parse_info<> info = parse(input_schema, ((column_name_rule)[BOOST_BIND_ACTION(push_column)] >> *(',' >> (column_name_rule)[BOOST_BIND_ACTION(push_column)])), space_p);
+
+ if (!info.full)
+ {
+ std::cout << "failure in schema description " << input_schema << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char** argv)
+{
+
+ //purpose: demostrate the s3select functionalities
+ s3select s3select_syntax;
+
+ char* input_query = 0;
+
+ for (int i = 0; i < argc; i++)
+ {
+
+ if (!strcmp(argv[i], "-q"))
+ {
+ input_query = argv[i + 1];
+ }
+ }
+
+
+ if (!input_query)
+ {
+ std::cout << "type -q 'select ... from ... '" << std::endl;
+ return -1;
+ }
+
+
+ bool to_aggregate = false;
+
+ int status = s3select_syntax.parse_query(input_query);
+ if (status != 0)
+ {
+ std::cout << "failed to parse query " << s3select_syntax.get_error_description() << std::endl;
+ return -1;
+ }
+
+ std::string object_name = s3select_syntax.get_from_clause(); //TODO stdin
+
+ FILE* fp;
+
+ if (object_name.compare("stdin")==0)
+ {
+ fp = stdin;
+ }
+ else
+ {
+ fp = fopen(object_name.c_str(), "r");
+ }
+
+
+ if(!fp)
+ {
+ std::cout << " input stream is not valid, abort;" << std::endl;
+ return -1;
+ }
+
+ struct stat statbuf;
+
+ lstat(object_name.c_str(), &statbuf);
+
+ std::string s3select_result;
+ s3selectEngine::csv_object::csv_defintions csv;
+ csv.use_header_info = false;
+ //csv.column_delimiter='|';
+ //csv.row_delimiter='\t';
+
+
+ s3selectEngine::csv_object s3_csv_object(&s3select_syntax, csv);
+ //s3selectEngine::csv_object s3_csv_object(&s3select_syntax);
+
+#define BUFF_SIZE 1024*1024*4
+ char* buff = (char*)malloc( BUFF_SIZE );
+ while(1)
+ {
+ //char buff[4096];
+
+ //char * in = fgets(buff,sizeof(buff),fp);
+ size_t input_sz = fread(buff, 1, BUFF_SIZE, fp);
+ char* in=buff;
+ //input_sz = strlen(buff);
+ //size_t input_sz = in == 0 ? 0 : strlen(in);
+
+ //if (!input_sz) to_aggregate = true;
+
+
+ //int status = s3_csv_object.run_s3select_on_object(s3select_result,in,input_sz,false,false,to_aggregate);
+ int status = s3_csv_object.run_s3select_on_stream(s3select_result, in, input_sz, statbuf.st_size);
+ if(status<0)
+ {
+ std::cout << "failure on execution " << std::endl;
+ break;
+ }
+
+ if(s3select_result.size()>1)
+ {
+ std::cout << s3select_result;
+ }
+
+ s3select_result = "";
+ if(!input_sz || feof(fp))
+ {
+ break;
+ }
+
+ }
+
+ free(buff);
+ fclose(fp);
+
+
+}