summaryrefslogtreecommitdiffstats
path: root/benchmarks/bench.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--benchmarks/bench.py225
1 files changed, 225 insertions, 0 deletions
diff --git a/benchmarks/bench.py b/benchmarks/bench.py
new file mode 100644
index 0000000..cef62a8
--- /dev/null
+++ b/benchmarks/bench.py
@@ -0,0 +1,225 @@
+import collections.abc
+
+# moz_sql_parser 3.10 compatibility
+collections.Iterable = collections.abc.Iterable
+import gc
+import timeit
+
+import moz_sql_parser
+import numpy as np
+import sqloxide
+import sqlparse
+import sqltree
+
+import sqlglot
+
+long = """
+SELECT
+ "e"."employee_id" AS "Employee #",
+ "e"."first_name" || ' ' || "e"."last_name" AS "Name",
+ "e"."email" AS "Email",
+ "e"."phone_number" AS "Phone",
+ TO_CHAR("e"."hire_date", 'MM/DD/YYYY') AS "Hire Date",
+ TO_CHAR("e"."salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Salary",
+ "e"."commission_pct" AS "Comission %",
+ 'works as ' || "j"."job_title" || ' in ' || "d"."department_name" || ' department (manager: ' || "dm"."first_name" || ' ' || "dm"."last_name" || ') and immediate supervisor: ' || "m"."first_name" || ' ' || "m"."last_name" AS "Current Job",
+ TO_CHAR("j"."min_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') || ' - ' || TO_CHAR("j"."max_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Current Salary",
+ "l"."street_address" || ', ' || "l"."postal_code" || ', ' || "l"."city" || ', ' || "l"."state_province" || ', ' || "c"."country_name" || ' (' || "r"."region_name" || ')' AS "Location",
+ "jh"."job_id" AS "History Job ID",
+ 'worked from ' || TO_CHAR("jh"."start_date", 'MM/DD/YYYY') || ' to ' || TO_CHAR("jh"."end_date", 'MM/DD/YYYY') || ' as ' || "jj"."job_title" || ' in ' || "dd"."department_name" || ' department' AS "History Job Title",
+ case when 1 then 1 when 2 then 2 when 3 then 3 when 4 then 4 when 5 then 5 else a(b(c + 1 * 3 % 4)) end
+FROM "employees" AS e
+JOIN "jobs" AS j
+ ON "e"."job_id" = "j"."job_id"
+LEFT JOIN "employees" AS m
+ ON "e"."manager_id" = "m"."employee_id"
+LEFT JOIN "departments" AS d
+ ON "d"."department_id" = "e"."department_id"
+LEFT JOIN "employees" AS dm
+ ON "d"."manager_id" = "dm"."employee_id"
+LEFT JOIN "locations" AS l
+ ON "d"."location_id" = "l"."location_id"
+LEFT JOIN "countries" AS c
+ ON "l"."country_id" = "c"."country_id"
+LEFT JOIN "regions" AS r
+ ON "c"."region_id" = "r"."region_id"
+LEFT JOIN "job_history" AS jh
+ ON "e"."employee_id" = "jh"."employee_id"
+LEFT JOIN "jobs" AS jj
+ ON "jj"."job_id" = "jh"."job_id"
+LEFT JOIN "departments" AS dd
+ ON "dd"."department_id" = "jh"."department_id"
+ORDER BY
+ "e"."employee_id"
+"""
+
+short = "select 1 as a, case when 1 then 1 when 2 then 2 else 3 end as b, c from x"
+
+crazy = "SELECT 1+"
+crazy += "+".join(str(i) for i in range(500))
+crazy += " AS a, 2*"
+crazy += "*".join(str(i) for i in range(500))
+crazy += " AS b FROM x"
+
+tpch = """
+WITH "_e_0" AS (
+ SELECT
+ "partsupp"."ps_partkey" AS "ps_partkey",
+ "partsupp"."ps_suppkey" AS "ps_suppkey",
+ "partsupp"."ps_supplycost" AS "ps_supplycost"
+ FROM "partsupp" AS "partsupp"
+), "_e_1" AS (
+ SELECT
+ "region"."r_regionkey" AS "r_regionkey",
+ "region"."r_name" AS "r_name"
+ FROM "region" AS "region"
+ WHERE
+ "region"."r_name" = 'EUROPE'
+)
+SELECT
+ "supplier"."s_acctbal" AS "s_acctbal",
+ "supplier"."s_name" AS "s_name",
+ "nation"."n_name" AS "n_name",
+ "part"."p_partkey" AS "p_partkey",
+ "part"."p_mfgr" AS "p_mfgr",
+ "supplier"."s_address" AS "s_address",
+ "supplier"."s_phone" AS "s_phone",
+ "supplier"."s_comment" AS "s_comment"
+FROM (
+ SELECT
+ "part"."p_partkey" AS "p_partkey",
+ "part"."p_mfgr" AS "p_mfgr",
+ "part"."p_type" AS "p_type",
+ "part"."p_size" AS "p_size"
+ FROM "part" AS "part"
+ WHERE
+ "part"."p_size" = 15
+ AND "part"."p_type" LIKE '%BRASS'
+) AS "part"
+LEFT JOIN (
+ SELECT
+ MIN("partsupp"."ps_supplycost") AS "_col_0",
+ "partsupp"."ps_partkey" AS "_u_1"
+ FROM "_e_0" AS "partsupp"
+ CROSS JOIN "_e_1" AS "region"
+ JOIN (
+ SELECT
+ "nation"."n_nationkey" AS "n_nationkey",
+ "nation"."n_regionkey" AS "n_regionkey"
+ FROM "nation" AS "nation"
+ ) AS "nation"
+ ON "nation"."n_regionkey" = "region"."r_regionkey"
+ JOIN (
+ SELECT
+ "supplier"."s_suppkey" AS "s_suppkey",
+ "supplier"."s_nationkey" AS "s_nationkey"
+ FROM "supplier" AS "supplier"
+ ) AS "supplier"
+ ON "supplier"."s_nationkey" = "nation"."n_nationkey"
+ AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
+ GROUP BY
+ "partsupp"."ps_partkey"
+) AS "_u_0"
+ ON "part"."p_partkey" = "_u_0"."_u_1"
+CROSS JOIN "_e_1" AS "region"
+JOIN (
+ SELECT
+ "nation"."n_nationkey" AS "n_nationkey",
+ "nation"."n_name" AS "n_name",
+ "nation"."n_regionkey" AS "n_regionkey"
+ FROM "nation" AS "nation"
+) AS "nation"
+ ON "nation"."n_regionkey" = "region"."r_regionkey"
+JOIN "_e_0" AS "partsupp"
+ ON "part"."p_partkey" = "partsupp"."ps_partkey"
+JOIN (
+ SELECT
+ "supplier"."s_suppkey" AS "s_suppkey",
+ "supplier"."s_name" AS "s_name",
+ "supplier"."s_address" AS "s_address",
+ "supplier"."s_nationkey" AS "s_nationkey",
+ "supplier"."s_phone" AS "s_phone",
+ "supplier"."s_acctbal" AS "s_acctbal",
+ "supplier"."s_comment" AS "s_comment"
+ FROM "supplier" AS "supplier"
+) AS "supplier"
+ ON "supplier"."s_nationkey" = "nation"."n_nationkey"
+ AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
+WHERE
+ "partsupp"."ps_supplycost" = "_u_0"."_col_0"
+ AND NOT "_u_0"."_u_1" IS NULL
+ORDER BY
+ "supplier"."s_acctbal" DESC,
+ "nation"."n_name",
+ "supplier"."s_name",
+ "part"."p_partkey"
+LIMIT 100
+"""
+
+
+def sqlglot_parse(sql):
+ sqlglot.parse(sql, error_level=sqlglot.ErrorLevel.IGNORE)
+
+
+def sqltree_parse(sql):
+ sqltree.api.sqltree(sql.replace('"', '`').replace("''", '"'))
+
+
+def sqlparse_parse(sql):
+ sqlparse.parse(sql)
+
+
+def moz_sql_parser_parse(sql):
+ moz_sql_parser.parse(sql)
+
+
+def sqloxide_parse(sql):
+ sqloxide.parse_sql(sql, dialect="ansi")
+
+
+def border(columns):
+ columns = " | ".join(columns)
+ return f"| {columns} |"
+
+
+def diff(row, column):
+ if column == "Query":
+ return ""
+ column = row[column]
+ if isinstance(column, str):
+ return " (N/A)"
+ return f" ({str(column / row['sqlglot'])[0:5]})"
+
+
+libs = [
+ "sqlglot",
+ "sqltree",
+ "sqlparse",
+ "moz_sql_parser",
+ "sqloxide",
+]
+table = []
+
+for name, sql in {"tpch": tpch, "short": short, "long": long, "crazy": crazy}.items():
+ row = {"Query": name}
+ table.append(row)
+ for lib in libs:
+ try:
+ row[lib] = np.mean(timeit.repeat(lambda: globals()[lib + "_parse"](sql), number=3))
+ except:
+ row[lib] = "error"
+
+columns = ["Query"] + libs
+widths = {column: max(len(column), 15) for column in columns}
+
+lines = [border(column.rjust(width) for column, width in widths.items())]
+lines.append(border(str("-" * width) for width in widths.values()))
+
+for i, row in enumerate(table):
+ lines.append(border(
+ (str(row[column])[0:7] + diff(row, column)).rjust(width)[0 : width]
+ for column, width in widths.items()
+ ))
+
+for line in lines:
+ print(line)