diff options
Diffstat (limited to 'src/s3select/example')
-rw-r--r-- | src/s3select/example/CMakeLists.txt | 12 | ||||
-rwxr-xr-x | src/s3select/example/expr_genrator.py | 9 | ||||
-rw-r--r-- | src/s3select/example/generate_rand_csv.c | 28 | ||||
-rwxr-xr-x | src/s3select/example/parse_csv.py | 12 | ||||
-rwxr-xr-x | src/s3select/example/run_test.bash | 96 | ||||
-rw-r--r-- | src/s3select/example/s3select_example.cpp | 136 |
6 files changed, 293 insertions, 0 deletions
diff --git a/src/s3select/example/CMakeLists.txt b/src/s3select/example/CMakeLists.txt new file mode 100644 index 000000000..37f99f0b3 --- /dev/null +++ b/src/s3select/example/CMakeLists.txt @@ -0,0 +1,12 @@ +add_executable(s3select_example s3select_example.cpp) +target_include_directories(s3select_example PUBLIC ../include) +target_link_libraries(s3select_example boost_date_time) + +add_executable(generate_rand_csv generate_rand_csv.c) + +add_custom_command(OUTPUT expr_genrator.py COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/expr_genrator.py expr_genrator.py + COMMENT "Copy expr_genrator.py" + VERBATIM) + +add_custom_target(expr_generator ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/expr_genrator.py) + diff --git a/src/s3select/example/expr_genrator.py b/src/s3select/example/expr_genrator.py new file mode 100755 index 000000000..0d21fcee6 --- /dev/null +++ b/src/s3select/example/expr_genrator.py @@ -0,0 +1,9 @@ +import random +import sys + +def expr(depth): + if depth==1 or random.random()<1.0/(2**depth-1): + return str(int(random.random() * 100) + 1)+".0" + return '(' + expr(depth-1) + random.choice(['+','-','*','/']) + expr(depth-1) + ')' + +print expr( int(sys.argv[1]) ) diff --git a/src/s3select/example/generate_rand_csv.c b/src/s3select/example/generate_rand_csv.c new file mode 100644 index 000000000..67d52adaa --- /dev/null +++ b/src/s3select/example/generate_rand_csv.c @@ -0,0 +1,28 @@ +#include <stdio.h> +#include <stdlib.h> + + +int main(int argc, char** argv) +{ + if (argc<3) + { + printf("%s <num-of-rows> <num-of-columns> \n", argv[0]); + return -1; + } + + srand(1234); + int line_no=0; + for(int i=0; i<atoi(argv[1]); i++) + { + printf("%d,", i); + for(int y=0; y<atoi(argv[2]); y++) + { + printf("%d,", rand()%1000); + } + printf("\n"); + } + + + + +} diff --git a/src/s3select/example/parse_csv.py b/src/s3select/example/parse_csv.py new file mode 100755 index 000000000..23fe14add --- /dev/null +++ b/src/s3select/example/parse_csv.py @@ -0,0 +1,12 @@ +#!/usr/bin/python + +import csv + +with open('stam.txt') as csv_file: + csv_reader = csv.reader(csv_file, delimiter=',') + line_count = 0 + for row in csv_reader: + #if (int(row[0])==465 and int(row[5])==268): # casting is slower + if (row[0]=="465" and row[5]=="268"): + print row + diff --git a/src/s3select/example/run_test.bash b/src/s3select/example/run_test.bash new file mode 100755 index 000000000..56654d7ab --- /dev/null +++ b/src/s3select/example/run_test.bash @@ -0,0 +1,96 @@ +#!/bin/bash + +set -e + +PREFIX=${1:-"./example"} + +## purpose : sanity tests + +s3select_calc() +{ +l="$*" +res=$( echo 1 | "$PREFIX"/s3select_example -q "select ${l} from stdin;" ) +echo "$res" | sed 's/.$//' +} + +# create c file with expression , compile it and run it. +c_calc() +{ +cat << @@ > "$PREFIX"/tmp.c + +#include <stdio.h> +int main() +{ +printf("%f\n",$*); +} +@@ +gcc -o "$PREFIX"/a.out "$PREFIX"/tmp.c +"$PREFIX"/a.out +} + +expr_test() +{ +## test the arithmetic evaluation of s3select against C program +for i in {1..100} +do + e=$(python2 "$PREFIX"/expr_genrator.py 5) + echo expression["$i"]="$e" + r1=$(s3select_calc "$e") + r2=$(c_calc "$e") + echo "$r1" "$r2" + + ## should be zero or very close to zero; ( s3select is C compile program ) + res=$(echo "" | awk -v e="$e" -v r1="$r1" -v r2="$r2" 'function abs(n){if (n<0) return -n; else return n;}{if (abs(r1-r2) > 0.00001) {print "MISSMATCH result for expression",e;}}') + if test "$res" != ""; then + echo "$res" + exit 1 + fi +done +} + +aggregate_test() +{ +## generate_rand_csv is generating with the same seed +echo check sum +s3select_val=$("$PREFIX"/generate_rand_csv 10 10 | "$PREFIX"/s3select_example -q 'select sum(int(_1)) from stdin;') +awk_val=$("$PREFIX"/generate_rand_csv 10 10 | awk 'BEGIN{FS=",";} {s+=$1;} END{print s;}') +s3select_val=${s3select_val::-1} +echo "$s3select_val" "$awk_val" +if test "$s3select_val" -ne "$awk_val"; then + exit 1 +fi +echo check min +s3select_val=$("$PREFIX"/generate_rand_csv 10 10 | "$PREFIX"/s3select_example -q 'select min(int(_1)) from stdin;') +awk_val=$("$PREFIX"/generate_rand_csv 10 10 | awk 'BEGIN{FS=",";min=100000;} {if(min>$1) min=$1;} END{print min;}') +s3select_val=${s3select_val::-1} +echo "$s3select_val" "$awk_val" +if test "$s3select_val" -ne "$awk_val"; then + exit 1 +fi +echo check max +s3select_val=$("$PREFIX"/generate_rand_csv 10 10 | "$PREFIX"/s3select_example -q 'select max(int(_1)) from stdin;') +awk_val=$("$PREFIX"/generate_rand_csv 10 10 | awk 'BEGIN{FS=",";max=0;} {if(max<$1) max=$1;} END{print max;}' ) +s3select_val=${s3select_val::-1} +echo "$s3select_val" "$awk_val" +if test "$s3select_val" -ne "$awk_val"; then + exit 1 +fi +echo check substr and count +s3select_val=$("$PREFIX"/generate_rand_csv 10000 10 | "$PREFIX"/s3select_example -q 'select count(int(_1)) from stdin where int(_1)>200 and int(_1)<250;') +awk_val=$("$PREFIX"/generate_rand_csv 10000 10 | "$PREFIX"/s3select_example -q 'select substr(_1,1,1) from stdin where int(_1)>200 and int(_1)<250;' | uniq -c | awk '{print $1;}') +s3select_val=${s3select_val::-1} +echo "$s3select_val" "$awk_val" +if test "$s3select_val" -ne "$awk_val"; then + exit 1 +fi +} + +############################################################### + +expr_test +aggregate_test + +rm "$PREFIX"/tmp.c "$PREFIX"/a.out + +exit 0 + diff --git a/src/s3select/example/s3select_example.cpp b/src/s3select/example/s3select_example.cpp new file mode 100644 index 000000000..840b62c6a --- /dev/null +++ b/src/s3select/example/s3select_example.cpp @@ -0,0 +1,136 @@ +#include "s3select.h" +#include <fstream> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +using namespace s3selectEngine; +using namespace BOOST_SPIRIT_CLASSIC_NS; + +int cli_get_schema(const char* input_schema, actionQ& x) +{ + g_push_column.set_action_q(&x); + + rule<> column_name_rule = lexeme_d[(+alpha_p >> *digit_p)]; + + //TODO an issue to resolve with trailing space + parse_info<> info = parse(input_schema, ((column_name_rule)[BOOST_BIND_ACTION(push_column)] >> *(',' >> (column_name_rule)[BOOST_BIND_ACTION(push_column)])), space_p); + + if (!info.full) + { + std::cout << "failure in schema description " << input_schema << std::endl; + return -1; + } + + return 0; +} + +int main(int argc, char** argv) +{ + + //purpose: demostrate the s3select functionalities + s3select s3select_syntax; + + char* input_query = 0; + + for (int i = 0; i < argc; i++) + { + + if (!strcmp(argv[i], "-q")) + { + input_query = argv[i + 1]; + } + } + + + if (!input_query) + { + std::cout << "type -q 'select ... from ... '" << std::endl; + return -1; + } + + + bool to_aggregate = false; + + int status = s3select_syntax.parse_query(input_query); + if (status != 0) + { + std::cout << "failed to parse query " << s3select_syntax.get_error_description() << std::endl; + return -1; + } + + std::string object_name = s3select_syntax.get_from_clause(); //TODO stdin + + FILE* fp; + + if (object_name.compare("stdin")==0) + { + fp = stdin; + } + else + { + fp = fopen(object_name.c_str(), "r"); + } + + + if(!fp) + { + std::cout << " input stream is not valid, abort;" << std::endl; + return -1; + } + + struct stat statbuf; + + lstat(object_name.c_str(), &statbuf); + + std::string s3select_result; + s3selectEngine::csv_object::csv_defintions csv; + csv.use_header_info = false; + //csv.column_delimiter='|'; + //csv.row_delimiter='\t'; + + + s3selectEngine::csv_object s3_csv_object(&s3select_syntax, csv); + //s3selectEngine::csv_object s3_csv_object(&s3select_syntax); + +#define BUFF_SIZE 1024*1024*4 + char* buff = (char*)malloc( BUFF_SIZE ); + while(1) + { + //char buff[4096]; + + //char * in = fgets(buff,sizeof(buff),fp); + size_t input_sz = fread(buff, 1, BUFF_SIZE, fp); + char* in=buff; + //input_sz = strlen(buff); + //size_t input_sz = in == 0 ? 0 : strlen(in); + + //if (!input_sz) to_aggregate = true; + + + //int status = s3_csv_object.run_s3select_on_object(s3select_result,in,input_sz,false,false,to_aggregate); + int status = s3_csv_object.run_s3select_on_stream(s3select_result, in, input_sz, statbuf.st_size); + if(status<0) + { + std::cout << "failure on execution " << std::endl; + break; + } + + if(s3select_result.size()>1) + { + std::cout << s3select_result; + } + + s3select_result = ""; + if(!input_sz || feof(fp)) + { + break; + } + + } + + free(buff); + fclose(fp); + + +} |