From 28cc22419e32a65fea2d1678400265b8cabc3aff Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 15 Sep 2022 18:46:17 +0200 Subject: Adding upstream version 6.0.4. Signed-off-by: Daniel Baumann --- benchmarks/bench.py | 225 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 benchmarks/bench.py (limited to 'benchmarks') diff --git a/benchmarks/bench.py b/benchmarks/bench.py new file mode 100644 index 0000000..cef62a8 --- /dev/null +++ b/benchmarks/bench.py @@ -0,0 +1,225 @@ +import collections.abc + +# moz_sql_parser 3.10 compatibility +collections.Iterable = collections.abc.Iterable +import gc +import timeit + +import moz_sql_parser +import numpy as np +import sqloxide +import sqlparse +import sqltree + +import sqlglot + +long = """ +SELECT + "e"."employee_id" AS "Employee #", + "e"."first_name" || ' ' || "e"."last_name" AS "Name", + "e"."email" AS "Email", + "e"."phone_number" AS "Phone", + TO_CHAR("e"."hire_date", 'MM/DD/YYYY') AS "Hire Date", + TO_CHAR("e"."salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Salary", + "e"."commission_pct" AS "Comission %", + 'works as ' || "j"."job_title" || ' in ' || "d"."department_name" || ' department (manager: ' || "dm"."first_name" || ' ' || "dm"."last_name" || ') and immediate supervisor: ' || "m"."first_name" || ' ' || "m"."last_name" AS "Current Job", + TO_CHAR("j"."min_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') || ' - ' || TO_CHAR("j"."max_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Current Salary", + "l"."street_address" || ', ' || "l"."postal_code" || ', ' || "l"."city" || ', ' || "l"."state_province" || ', ' || "c"."country_name" || ' (' || "r"."region_name" || ')' AS "Location", + "jh"."job_id" AS "History Job ID", + 'worked from ' || TO_CHAR("jh"."start_date", 'MM/DD/YYYY') || ' to ' || TO_CHAR("jh"."end_date", 'MM/DD/YYYY') || ' as ' || "jj"."job_title" || ' in ' || "dd"."department_name" || ' department' AS "History Job Title", + case when 1 then 1 when 2 then 2 when 3 then 3 when 4 then 4 when 5 then 5 else a(b(c + 1 * 3 % 4)) end +FROM "employees" AS e +JOIN "jobs" AS j + ON "e"."job_id" = "j"."job_id" +LEFT JOIN "employees" AS m + ON "e"."manager_id" = "m"."employee_id" +LEFT JOIN "departments" AS d + ON "d"."department_id" = "e"."department_id" +LEFT JOIN "employees" AS dm + ON "d"."manager_id" = "dm"."employee_id" +LEFT JOIN "locations" AS l + ON "d"."location_id" = "l"."location_id" +LEFT JOIN "countries" AS c + ON "l"."country_id" = "c"."country_id" +LEFT JOIN "regions" AS r + ON "c"."region_id" = "r"."region_id" +LEFT JOIN "job_history" AS jh + ON "e"."employee_id" = "jh"."employee_id" +LEFT JOIN "jobs" AS jj + ON "jj"."job_id" = "jh"."job_id" +LEFT JOIN "departments" AS dd + ON "dd"."department_id" = "jh"."department_id" +ORDER BY + "e"."employee_id" +""" + +short = "select 1 as a, case when 1 then 1 when 2 then 2 else 3 end as b, c from x" + +crazy = "SELECT 1+" +crazy += "+".join(str(i) for i in range(500)) +crazy += " AS a, 2*" +crazy += "*".join(str(i) for i in range(500)) +crazy += " AS b FROM x" + +tpch = """ +WITH "_e_0" AS ( + SELECT + "partsupp"."ps_partkey" AS "ps_partkey", + "partsupp"."ps_suppkey" AS "ps_suppkey", + "partsupp"."ps_supplycost" AS "ps_supplycost" + FROM "partsupp" AS "partsupp" +), "_e_1" AS ( + SELECT + "region"."r_regionkey" AS "r_regionkey", + "region"."r_name" AS "r_name" + FROM "region" AS "region" + WHERE + "region"."r_name" = 'EUROPE' +) +SELECT + "supplier"."s_acctbal" AS "s_acctbal", + "supplier"."s_name" AS "s_name", + "nation"."n_name" AS "n_name", + "part"."p_partkey" AS "p_partkey", + "part"."p_mfgr" AS "p_mfgr", + "supplier"."s_address" AS "s_address", + "supplier"."s_phone" AS "s_phone", + "supplier"."s_comment" AS "s_comment" +FROM ( + SELECT + "part"."p_partkey" AS "p_partkey", + "part"."p_mfgr" AS "p_mfgr", + "part"."p_type" AS "p_type", + "part"."p_size" AS "p_size" + FROM "part" AS "part" + WHERE + "part"."p_size" = 15 + AND "part"."p_type" LIKE '%BRASS' +) AS "part" +LEFT JOIN ( + SELECT + MIN("partsupp"."ps_supplycost") AS "_col_0", + "partsupp"."ps_partkey" AS "_u_1" + FROM "_e_0" AS "partsupp" + CROSS JOIN "_e_1" AS "region" + JOIN ( + SELECT + "nation"."n_nationkey" AS "n_nationkey", + "nation"."n_regionkey" AS "n_regionkey" + FROM "nation" AS "nation" + ) AS "nation" + ON "nation"."n_regionkey" = "region"."r_regionkey" + JOIN ( + SELECT + "supplier"."s_suppkey" AS "s_suppkey", + "supplier"."s_nationkey" AS "s_nationkey" + FROM "supplier" AS "supplier" + ) AS "supplier" + ON "supplier"."s_nationkey" = "nation"."n_nationkey" + AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" + GROUP BY + "partsupp"."ps_partkey" +) AS "_u_0" + ON "part"."p_partkey" = "_u_0"."_u_1" +CROSS JOIN "_e_1" AS "region" +JOIN ( + SELECT + "nation"."n_nationkey" AS "n_nationkey", + "nation"."n_name" AS "n_name", + "nation"."n_regionkey" AS "n_regionkey" + FROM "nation" AS "nation" +) AS "nation" + ON "nation"."n_regionkey" = "region"."r_regionkey" +JOIN "_e_0" AS "partsupp" + ON "part"."p_partkey" = "partsupp"."ps_partkey" +JOIN ( + SELECT + "supplier"."s_suppkey" AS "s_suppkey", + "supplier"."s_name" AS "s_name", + "supplier"."s_address" AS "s_address", + "supplier"."s_nationkey" AS "s_nationkey", + "supplier"."s_phone" AS "s_phone", + "supplier"."s_acctbal" AS "s_acctbal", + "supplier"."s_comment" AS "s_comment" + FROM "supplier" AS "supplier" +) AS "supplier" + ON "supplier"."s_nationkey" = "nation"."n_nationkey" + AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" +WHERE + "partsupp"."ps_supplycost" = "_u_0"."_col_0" + AND NOT "_u_0"."_u_1" IS NULL +ORDER BY + "supplier"."s_acctbal" DESC, + "nation"."n_name", + "supplier"."s_name", + "part"."p_partkey" +LIMIT 100 +""" + + +def sqlglot_parse(sql): + sqlglot.parse(sql, error_level=sqlglot.ErrorLevel.IGNORE) + + +def sqltree_parse(sql): + sqltree.api.sqltree(sql.replace('"', '`').replace("''", '"')) + + +def sqlparse_parse(sql): + sqlparse.parse(sql) + + +def moz_sql_parser_parse(sql): + moz_sql_parser.parse(sql) + + +def sqloxide_parse(sql): + sqloxide.parse_sql(sql, dialect="ansi") + + +def border(columns): + columns = " | ".join(columns) + return f"| {columns} |" + + +def diff(row, column): + if column == "Query": + return "" + column = row[column] + if isinstance(column, str): + return " (N/A)" + return f" ({str(column / row['sqlglot'])[0:5]})" + + +libs = [ + "sqlglot", + "sqltree", + "sqlparse", + "moz_sql_parser", + "sqloxide", +] +table = [] + +for name, sql in {"tpch": tpch, "short": short, "long": long, "crazy": crazy}.items(): + row = {"Query": name} + table.append(row) + for lib in libs: + try: + row[lib] = np.mean(timeit.repeat(lambda: globals()[lib + "_parse"](sql), number=3)) + except: + row[lib] = "error" + +columns = ["Query"] + libs +widths = {column: max(len(column), 15) for column in columns} + +lines = [border(column.rjust(width) for column, width in widths.items())] +lines.append(border(str("-" * width) for width in widths.values())) + +for i, row in enumerate(table): + lines.append(border( + (str(row[column])[0:7] + diff(row, column)).rjust(width)[0 : width] + for column, width in widths.items() + )) + +for line in lines: + print(line) -- cgit v1.2.3