summaryrefslogtreecommitdiffstats
path: root/src/s3select/test
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-23 16:45:17 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-23 16:45:44 +0000
commit17d6a993fc17d533460c5f40f3908c708e057c18 (patch)
tree1a3bd93e0ecd74fa02f93a528fe2f87e5314c4b5 /src/s3select/test
parentReleasing progress-linux version 18.2.2-0progress7.99u1. (diff)
downloadceph-17d6a993fc17d533460c5f40f3908c708e057c18.tar.xz
ceph-17d6a993fc17d533460c5f40f3908c708e057c18.zip
Merging upstream version 18.2.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/s3select/test')
-rw-r--r--src/s3select/test/s3select_test.cpp356
-rw-r--r--src/s3select/test/s3select_test.h98
2 files changed, 414 insertions, 40 deletions
diff --git a/src/s3select/test/s3select_test.cpp b/src/s3select/test/s3select_test.cpp
index 7c372551c..915f14d26 100644
--- a/src/s3select/test/s3select_test.cpp
+++ b/src/s3select/test/s3select_test.cpp
@@ -1,5 +1,6 @@
#include "s3select_test.h"
+
TEST(TestS3SElect, s3select_vs_C)
{
//purpose: validate correct processing of arithmetical expression, it is done by running the same expression
@@ -880,7 +881,10 @@ void test_single_column_single_row(const char* input_query,const char* expected_
{
ASSERT_TRUE(false);
}
- ASSERT_EQ(s3_csv_object.get_error_description(),error_description);
+ if(s3_csv_object.get_error_description().find(error_description) == std::string::npos )
+ {
+ FAIL() << "getting error: " << s3_csv_object.get_error_description() << " instead of: " << error_description << std::endl;
+ }
return;
}
@@ -1730,6 +1734,7 @@ TEST(TestS3selectFunctions, boolcast)
test_single_column_single_row("select cast(0 as bool) from s3object;","false\n");
test_single_column_single_row("select cast(true as bool) from s3object;","true\n");
test_single_column_single_row("select cast('a' as bool) from s3object;","false\n");
+ test_single_column_single_row("select cast(null as bool) from s3object;","null\n");
}
TEST(TestS3selectFunctions, floatcast)
@@ -1737,6 +1742,7 @@ TEST(TestS3selectFunctions, floatcast)
test_single_column_single_row("select cast('1234a' as float) from s3object;","#failure#","extra characters after the number");
test_single_column_single_row("select cast('a1234' as float) from s3object;","#failure#","text cannot be converted to a number");
test_single_column_single_row("select cast('999e+999' as float) from s3object;","#failure#","converted value would fall out of the range of the result type!");
+ test_single_column_single_row("select cast(null as float) from s3object;","null\n");
}
TEST(TestS3selectFunctions, intcast)
@@ -1745,6 +1751,7 @@ TEST(TestS3selectFunctions, intcast)
test_single_column_single_row("select cast('a1234' as int) from s3object;","#failure#","text cannot be converted to a number");
test_single_column_single_row("select cast('9223372036854775808' as int) from s3object;","#failure#","converted value would fall out of the range of the result type!");
test_single_column_single_row("select cast('-9223372036854775809' as int) from s3object;","#failure#","converted value would fall out of the range of the result type!");
+ test_single_column_single_row("select cast(null as int) from s3object;","null\n");
}
TEST(TestS3selectFunctions, predicate_as_projection_column)
@@ -2064,6 +2071,12 @@ TEST(TestS3selectFunctions, mod)
test_single_column_single_row( "select 5%2 from stdin;","1\n");
}
+TEST(TestS3selectFunctions, modfloat)
+{
+test_single_column_single_row( "select 5.2%2 from stdin;","1.2000000000000002\n");
+test_single_column_single_row( "select 5.2%2.5 from stdin;","0.20000000000000018\n");
+}
+
TEST(TestS3selectFunctions, modzero)
{
test_single_column_single_row( "select 0%2 from stdin;","0\n");
@@ -2124,6 +2137,13 @@ TEST(TestS3selectFunctions, isnullnot)
test_single_column_single_row( "select \"true\" from stdin where not nullif(1,2) is null;" ,"true\n");
}
+TEST(TestS3selectFunctions, case_insensitive_not_null)
+{
+test_single_column_single_row( "select \"false\" from stdin where nullif(1,1) is NOT null;" ,"");
+test_single_column_single_row( "select \"false\" from stdin where nullif(1,1) is not Null;" ,"");
+test_single_column_single_row( "select \"true\" from stdin where nullif(1,1) is Null;" ,"true\n");
+}
+
TEST(TestS3selectFunctions, isnull1)
{
test_single_column_single_row( "select \"true\" from stdin where 7 + null is null;" ,"true\n");
@@ -2529,6 +2549,16 @@ TEST(TestS3selectFunctions, trim11)
test_single_column_single_row( "select trim(trailing from trim(leading from \" foobar \")) from stdin ;" ,"foobar\n");
}
+TEST(TestS3selectFunctions, trim12)
+{
+test_single_column_single_row( "select trim(LEADING '1' from '111abcdef111') from s3object ;" ,"abcdef111\n");
+}
+
+TEST(TestS3selectFunctions, trim13)
+{
+test_single_column_single_row( "select trim(TRAILING '1' from '111abcdef111') from s3object ;" ,"111abcdef\n");
+}
+
TEST(TestS3selectFunctions, likescape)
{
test_single_column_single_row("select \"true\" from stdin where \"abc_defgh\" like \"abc$_defgh\" escape \"$\";","true\n");
@@ -3444,3 +3474,327 @@ input_json_data = R"(
}
+
+ TEST(TestS3selectFunctions, json_queries_format)
+{
+ std::string result;
+ std::string expected_result;
+ std::string input_query;
+
+std::string input_json_data = R"(
+ {"root" : [
+ {"c1": 891,"c2": 903,"c3": 78,"c4": 566,"c5": 134,"c6": 121,"c7": 203,"c8": 795,"c9": 82,"c10": 135},
+ {"c1": 218,"c2": 881,"c3": 840,"c4": 385,"c5": 385,"c6": 674,"c7": 618,"c8": 99,"c9": 296,"c10": 545},
+ {"c1": 218,"c2": 881,"c3": 840,"c4": 385,"c5": 385,"c6": 674,"c7": 618,"c8": 99,"c9": 296,"c10": 545}
+ ]
+ }
+ )";
+
+ expected_result=R"({"_1":1327}
+)";
+ input_query = "select sum(_1.c1) from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"_1":1461}
+)";
+ input_query = "select sum(_1.c1) + min(_1.c5) from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+
+ expected_result=R"({"c1":891}
+{"c1":218}
+{"c1":218}
+)";
+ input_query = "select _1.c1 from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"_1":218,"_2":903}
+)";
+ input_query = "select min(_1.c1), max(_1.c2) from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"c1":891,"c2":903}
+{"c1":218,"c2":881}
+{"c1":218,"c2":881}
+)";
+ input_query = "select _1.c1, _1.c2 from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"c2":903,"c1":891,"c4":566}
+{"c2":881,"c1":218,"c4":385}
+{"c2":881,"c1":218,"c4":385}
+)";
+ input_query = "select _1.c2, _1.c1, _1.c4 from s3object[*].root ;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"_1":1794}
+{"_1":1099}
+{"_1":1099}
+)";
+ input_query = "select _1.c2 + _1.c1 from s3object[*].root ;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"_1":891}
+{"_1":218}
+{"_1":218}
+)";
+ input_query = "select nullif(_1.c1, _1.c2) from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"_1":991}
+{"_1":318}
+{"_1":318}
+)";
+ input_query = "select _1.c1 + 100 from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"c13":null}
+{"c13":null}
+{"c13":null}
+)";
+ input_query = "select _1.c13 from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"_1":null}
+{"_1":null}
+{"_1":null}
+)";
+ input_query = "select _1.c15 * 2 from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"_1":null}
+{"_1":null}
+{"_1":null}
+)";
+ input_query = "select _1.c15 + _1.c13 from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"x":891}
+{"x":218}
+{"x":218}
+)";
+ input_query = "select coalesce(_1.c1, 0) as x from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"x":891,"c2":903}
+{"x":218,"c2":881}
+{"x":218,"c2":881}
+)";
+ input_query = "select _1.c1 as x, _1.c2 from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"c2":903,"x":891}
+{"c2":881,"x":218}
+{"c2":881,"x":218}
+)";
+ input_query = "select _1.c2, _1.c1 as x from s3object[*].root;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+}
+
+TEST(TestS3selectFunctions, json_queries_format_1)
+{
+ std::string result;
+ std::string expected_result;
+ std::string input_query;
+
+ std::string input_json_data = R"(
+{
+"firstName": "Joe",
+"lastName": "Jackson",
+"gender": "male",
+"age": "twenty",
+"address": {
+"streetAddress": "101",
+"city": "San Diego",
+"state": "CA"
+},
+"phoneNumbers": [
+{ "type": "home1", "number": "7349282_1", "addr": 11},
+{ "type": "home2", "number": "7349282_2", "addr": 22},
+{ "type": "home3", "number": "734928_3", "addr": 33},
+{ "type": "home4", "number": "734928_4", "addr": 44},
+{ "type": "home5", "number": "734928_5", "addr": 55},
+{ "type": "home6", "number": "734928_6", "addr": 66},
+{ "type": "home7", "number": "734928_7", "addr": 77}
+]
+}
+)";
+
+ expected_result=R"({"gender":male}
+)";
+ input_query = "select _1.gender from s3object[*] ;";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"streetAddress":101}
+)";
+ input_query = "select _1.address.streetAddress from s3object[*];";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+ expected_result=R"({"addr":11}
+)";
+ input_query = "select _1.phoneNumbers[0].addr from s3object[*];";
+
+ run_json_query(input_query.c_str(), input_json_data, result, true);
+ ASSERT_EQ(result,expected_result);
+
+}
+
+TEST(TestS3selectFunctions, json_format_csv_object)
+{
+ std::string input_query{};
+ std::string s3select_res{};
+
+ std::string input = R"(383,886,777,915,793,335,386,492,649,421
+362,27,690,59,763,926,540,426,172,736
+211,368,567,429,782,530,862,123,67,135
+929,802,22,58,69,167,393,456,11,42
+229,373,421,919,784,537,198,324,315,370
+413,526,91,980,956,873,862,170,996,281
+305,925,84,327,336,505,846,729,313,857
+124,895,582,545,814,367,434,364,43,750
+87,808,276,178,788,584,403,651,754,399
+932,60,676,368,739,12,226,586,94,539
+)";
+
+ std::string expected_result{};
+
+ expected_result=R"({"_1":383,"_2":886}
+{"_1":362,"_2":27}
+{"_1":211,"_2":368}
+{"_1":929,"_2":802}
+{"_1":229,"_2":373}
+{"_1":413,"_2":526}
+{"_1":305,"_2":925}
+{"_1":124,"_2":895}
+{"_1":87,"_2":808}
+{"_1":932,"_2":60}
+)";
+
+ input_query = "select _1,_2 from s3object;";
+
+ s3select_res = run_s3select(input_query, input, "", true, true);
+
+ ASSERT_EQ(s3select_res, expected_result);
+
+ expected_result=R"({"_1":3975}
+)";
+
+ input_query = "select sum(int(_1)) from s3object;";
+
+ s3select_res = run_s3select(input_query, input, "", true, true);
+
+ ASSERT_EQ(s3select_res, expected_result);
+
+ expected_result=R"({"x":383,"y":886}
+{"x":362,"y":27}
+{"x":211,"y":368}
+{"x":929,"y":802}
+{"x":229,"y":373}
+{"x":413,"y":526}
+{"x":305,"y":925}
+{"x":124,"y":895}
+{"x":87,"y":808}
+{"x":932,"y":60}
+)";
+
+ input_query = "select _1 as x, _2 as y from s3object;";
+
+ s3select_res = run_s3select(input_query, input, "", true, true);
+
+ ASSERT_EQ(s3select_res, expected_result);
+
+ expected_result = R"({"_1":8}
+{"_1":2}
+{"_1":3}
+{"_1":8}
+{"_1":3}
+{"_1":5}
+{"_1":9}
+{"_1":8}
+{"_1":8}
+{"_1":6}
+)";
+
+ input_query = "select substring(_2, 1, 1) from s3object;";
+
+ s3select_res = run_s3select(input_query, input, "", true, true);
+
+ ASSERT_EQ(s3select_res, expected_result);
+
+ expected_result = R"({"x":8}
+{"x":2}
+{"x":3}
+{"x":8}
+{"x":3}
+{"x":5}
+{"x":9}
+{"x":8}
+{"x":8}
+{"x":6}
+)";
+
+ input_query = "select substring(_2, 1, 1) as x from s3object;";
+
+ s3select_res = run_s3select(input_query, input, "", true, true);
+
+ ASSERT_EQ(s3select_res, expected_result);
+
+ expected_result = R"({"c1":383,"_1":385}
+{"c1":362,"_1":364}
+{"c1":211,"_1":213}
+{"c1":929,"_1":931}
+{"c1":229,"_1":231}
+{"c1":413,"_1":415}
+{"c1":305,"_1":307}
+{"c1":124,"_1":126}
+{"c1":87,"_1":89}
+{"c1":932,"_1":934}
+)";
+
+ input_query = "select cast(_1 as int) as c1, c1 + 2 from s3object;";
+
+ s3select_res = run_s3select(input_query, input, "", true, true);
+
+ ASSERT_EQ(s3select_res, expected_result);
+
+}
+
+
+
+
+
+
diff --git a/src/s3select/test/s3select_test.h b/src/s3select/test/s3select_test.h
index 307db8a4b..9e6fe3a12 100644
--- a/src/s3select/test/s3select_test.h
+++ b/src/s3select/test/s3select_test.h
@@ -617,7 +617,7 @@ std::string run_s3select_opserialization_quot(std::string expression,std::string
}
// JSON tests API's
-int run_json_query(const char* json_query, std::string& json_input,std::string& result)
+int run_json_query(const char* json_query, std::string& json_input,std::string& result, bool json_format = false)
{//purpose: run single-chunk json queries
s3select s3select_syntax;
@@ -628,19 +628,27 @@ int run_json_query(const char* json_query, std::string& json_input,std::string&
return -1;
}
- json_object json_query_processor(&s3select_syntax);
+ json_object m_s3_json_object;
+ json_object::csv_definitions json_definitions;
+
+ if(json_format) {
+ json_definitions.output_json_format = true;
+ }
+
+ m_s3_json_object.set_json_query(&s3select_syntax, json_definitions);
+
result.clear();
- status = json_query_processor.run_s3select_on_stream(result, json_input.data(), json_input.size(), json_input.size());
+ status = m_s3_json_object.run_s3select_on_stream(result, json_input.data(), json_input.size(), json_input.size(), json_format);
std::string prev_result = result;
result.clear();
- status = json_query_processor.run_s3select_on_stream(result, 0, 0, json_input.size());
+ status = m_s3_json_object.run_s3select_on_stream(result, 0, 0, json_input.size(), json_format);
result = prev_result + result;
return status;
}
-std::string run_s3select(std::string expression,std::string input, const char* json_query = "")
+std::string run_s3select(std::string expression,std::string input, const char* json_query = "", bool json_format = false, bool csv_json_format = true)
{//purpose: run query on multiple rows and return result(multiple projections).
s3select s3select_syntax;
std::string parquet_input = input;
@@ -654,52 +662,64 @@ std::string run_s3select(std::string expression,std::string input, const char* j
std::string s3select_result;
std::string json_result;
- s3selectEngine::csv_object s3_csv_object(&s3select_syntax);
- s3_csv_object.m_csv_defintion.redundant_column = false;
+
+ csv_object::csv_defintions csv_definitions;
+
+ if(json_format) {
+ csv_definitions.output_json_format = true;
+ }
+
+ csv_definitions.redundant_column = false;
+
+ s3selectEngine::csv_object s3_csv_object;
+ s3_csv_object.set_csv_query(&s3select_syntax, csv_definitions);
s3_csv_object.run_s3select_on_object(s3select_result, input.c_str(), input.size(), false, false, true);
-#ifdef _ARROW_EXIST
- static int file_no = 1;
- csv_to_parquet(parquet_input);
- std::string parquet_result;
- run_query_on_parquet_file(expression.c_str(),PARQUET_FILENAME,parquet_result);
+ if(!csv_json_format) {
- if (strcmp(parquet_result.c_str(),s3select_result.c_str()))
- {
- std::cout << "failed on query " << expression << std::endl;
- std::cout << "input for query reside on" << "./failed_test_input" << std::to_string(file_no) << ".[csv|parquet]" << std::endl;
+ #ifdef _ARROW_EXIST
+ static int file_no = 1;
+ csv_to_parquet(parquet_input);
+ std::string parquet_result;
+ run_query_on_parquet_file(expression.c_str(),PARQUET_FILENAME,parquet_result);
- {
- std::string buffer;
+ if (strcmp(parquet_result.c_str(),s3select_result.c_str()))
+ {
+ std::cout << "failed on query " << expression << std::endl;
+ std::cout << "input for query reside on" << "./failed_test_input" << std::to_string(file_no) << ".[csv|parquet]" << std::endl;
- std::ifstream f(PARQUET_FILENAME);
- f.seekg(0, std::ios::end);
- buffer.resize(f.tellg());
- f.seekg(0);
- f.read(buffer.data(), buffer.size());
+ {
+ std::string buffer;
- std::string fn = std::string("./failed_test_input_") + std::to_string(file_no) + std::string(".parquet");
- std::ofstream fw(fn.c_str());
- fw.write(buffer.data(), buffer.size());
+ std::ifstream f(PARQUET_FILENAME);
+ f.seekg(0, std::ios::end);
+ buffer.resize(f.tellg());
+ f.seekg(0);
+ f.read(buffer.data(), buffer.size());
- fn = std::string("./failed_test_input_") + std::to_string(file_no++) + std::string(".csv");
- std::ofstream fw2(fn.c_str());
- fw2.write(parquet_input.data(), parquet_input.size());
+ std::string fn = std::string("./failed_test_input_") + std::to_string(file_no) + std::string(".parquet");
+ std::ofstream fw(fn.c_str());
+ fw.write(buffer.data(), buffer.size());
- }
- }
+ fn = std::string("./failed_test_input_") + std::to_string(file_no++) + std::string(".csv");
+ std::ofstream fw2(fn.c_str());
+ fw2.write(parquet_input.data(), parquet_input.size());
- parquet_csv_report_error(parquet_result,s3select_result);
-#endif //_ARROW_EXIST
+ }
+ }
+
+ parquet_csv_report_error(parquet_result,s3select_result);
+ #endif //_ARROW_EXIST
- if(strlen(json_query) == 0) {
- json_query = convert_query(expression);
- }
+ if(strlen(json_query) == 0) {
+ json_query = convert_query(expression);
+ }
- if(strcmp(json_query,JSON_NO_RUN)) {
- run_json_query(json_query, js, json_result);
- json_csv_report_error(json_result, s3select_result);
+ if(strcmp(json_query,JSON_NO_RUN)) {
+ run_json_query(json_query, js, json_result, json_format);
+ json_csv_report_error(json_result, s3select_result);
+ }
}
return s3select_result;