import collections.abc # moz_sql_parser 3.10 compatibility collections.Iterable = collections.abc.Iterable import gc import timeit import numpy as np import sqlfluff import moz_sql_parser import sqloxide import sqlparse import sqltree import sqlglot long = """ SELECT "e"."employee_id" AS "Employee #", "e"."first_name" || ' ' || "e"."last_name" AS "Name", "e"."email" AS "Email", "e"."phone_number" AS "Phone", TO_CHAR("e"."hire_date", 'MM/DD/YYYY') AS "Hire Date", TO_CHAR("e"."salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Salary", "e"."commission_pct" AS "Comission %", 'works as ' || "j"."job_title" || ' in ' || "d"."department_name" || ' department (manager: ' || "dm"."first_name" || ' ' || "dm"."last_name" || ') and immediate supervisor: ' || "m"."first_name" || ' ' || "m"."last_name" AS "Current Job", TO_CHAR("j"."min_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') || ' - ' || TO_CHAR("j"."max_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Current Salary", "l"."street_address" || ', ' || "l"."postal_code" || ', ' || "l"."city" || ', ' || "l"."state_province" || ', ' || "c"."country_name" || ' (' || "r"."region_name" || ')' AS "Location", "jh"."job_id" AS "History Job ID", 'worked from ' || TO_CHAR("jh"."start_date", 'MM/DD/YYYY') || ' to ' || TO_CHAR("jh"."end_date", 'MM/DD/YYYY') || ' as ' || "jj"."job_title" || ' in ' || "dd"."department_name" || ' department' AS "History Job Title", case when 1 then 1 when 2 then 2 when 3 then 3 when 4 then 4 when 5 then 5 else a(b(c + 1 * 3 % 4)) end FROM "employees" AS e JOIN "jobs" AS j ON "e"."job_id" = "j"."job_id" LEFT JOIN "employees" AS m ON "e"."manager_id" = "m"."employee_id" LEFT JOIN "departments" AS d ON "d"."department_id" = "e"."department_id" LEFT JOIN "employees" AS dm ON "d"."manager_id" = "dm"."employee_id" LEFT JOIN "locations" AS l ON "d"."location_id" = "l"."location_id" LEFT JOIN "countries" AS c ON "l"."country_id" = "c"."country_id" LEFT JOIN "regions" AS r ON "c"."region_id" = "r"."region_id" LEFT JOIN "job_history" AS jh ON "e"."employee_id" = "jh"."employee_id" LEFT JOIN "jobs" AS jj ON "jj"."job_id" = "jh"."job_id" LEFT JOIN "departments" AS dd ON "dd"."department_id" = "jh"."department_id" ORDER BY "e"."employee_id" """ short = "select 1 as a, case when 1 then 1 when 2 then 2 else 3 end as b, c from x" crazy = "SELECT 1+" crazy += "+".join(str(i) for i in range(500)) crazy += " AS a, 2*" crazy += "*".join(str(i) for i in range(500)) crazy += " AS b FROM x" tpch = """ WITH "_e_0" AS ( SELECT "partsupp"."ps_partkey" AS "ps_partkey", "partsupp"."ps_suppkey" AS "ps_suppkey", "partsupp"."ps_supplycost" AS "ps_supplycost" FROM "partsupp" AS "partsupp" ), "_e_1" AS ( SELECT "region"."r_regionkey" AS "r_regionkey", "region"."r_name" AS "r_name" FROM "region" AS "region" WHERE "region"."r_name" = 'EUROPE' ) SELECT "supplier"."s_acctbal" AS "s_acctbal", "supplier"."s_name" AS "s_name", "nation"."n_name" AS "n_name", "part"."p_partkey" AS "p_partkey", "part"."p_mfgr" AS "p_mfgr", "supplier"."s_address" AS "s_address", "supplier"."s_phone" AS "s_phone", "supplier"."s_comment" AS "s_comment" FROM ( SELECT "part"."p_partkey" AS "p_partkey", "part"."p_mfgr" AS "p_mfgr", "part"."p_type" AS "p_type", "part"."p_size" AS "p_size" FROM "part" AS "part" WHERE "part"."p_size" = 15 AND "part"."p_type" LIKE '%BRASS' ) AS "part" LEFT JOIN ( SELECT MIN("partsupp"."ps_supplycost") AS "_col_0", "partsupp"."ps_partkey" AS "_u_1" FROM "_e_0" AS "partsupp" CROSS JOIN "_e_1" AS "region" JOIN ( SELECT "nation"."n_nationkey" AS "n_nationkey", "nation"."n_regionkey" AS "n_regionkey" FROM "nation" AS "nation" ) AS "nation" ON "nation"."n_regionkey" = "region"."r_regionkey" JOIN ( SELECT "supplier"."s_suppkey" AS "s_suppkey", "supplier"."s_nationkey" AS "s_nationkey" FROM "supplier" AS "supplier" ) AS "supplier" ON "supplier"."s_nationkey" = "nation"."n_nationkey" AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" GROUP BY "partsupp"."ps_partkey" ) AS "_u_0" ON "part"."p_partkey" = "_u_0"."_u_1" CROSS JOIN "_e_1" AS "region" JOIN ( SELECT "nation"."n_nationkey" AS "n_nationkey", "nation"."n_name" AS "n_name", "nation"."n_regionkey" AS "n_regionkey" FROM "nation" AS "nation" ) AS "nation" ON "nation"."n_regionkey" = "region"."r_regionkey" JOIN "_e_0" AS "partsupp" ON "part"."p_partkey" = "partsupp"."ps_partkey" JOIN ( SELECT "supplier"."s_suppkey" AS "s_suppkey", "supplier"."s_name" AS "s_name", "supplier"."s_address" AS "s_address", "supplier"."s_nationkey" AS "s_nationkey", "supplier"."s_phone" AS "s_phone", "supplier"."s_acctbal" AS "s_acctbal", "supplier"."s_comment" AS "s_comment" FROM "supplier" AS "supplier" ) AS "supplier" ON "supplier"."s_nationkey" = "nation"."n_nationkey" AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" WHERE "partsupp"."ps_supplycost" = "_u_0"."_col_0" AND NOT "_u_0"."_u_1" IS NULL ORDER BY "supplier"."s_acctbal" DESC, "nation"."n_name", "supplier"."s_name", "part"."p_partkey" LIMIT 100 """ def sqlglot_parse(sql): sqlglot.parse(sql, error_level=sqlglot.ErrorLevel.IGNORE) def sqltree_parse(sql): sqltree.api.sqltree(sql.replace('"', '`').replace("''", '"')) def sqlparse_parse(sql): sqlparse.parse(sql) def moz_sql_parser_parse(sql): moz_sql_parser.parse(sql) def sqloxide_parse(sql): sqloxide.parse_sql(sql, dialect="ansi") def sqlfluff_parse(sql): sqlfluff.parse(sql) def border(columns): columns = " | ".join(columns) return f"| {columns} |" def diff(row, column): if column == "Query": return "" column = row[column] if isinstance(column, str): return " (N/A)" return f" ({str(column / row['sqlglot'])[0:5]})" libs = [ "sqlglot", "sqlfluff", "sqltree", "sqlparse", "moz_sql_parser", "sqloxide", ] table = [] for name, sql in {"tpch": tpch, "short": short, "long": long, "crazy": crazy}.items(): row = {"Query": name} table.append(row) for lib in libs: try: row[lib] = np.mean(timeit.repeat(lambda: globals()[lib + "_parse"](sql), number=3)) except Exception as e: print(e) row[lib] = "error" columns = ["Query"] + libs widths = {column: max(len(column), 15) for column in columns} lines = [border(column.rjust(width) for column, width in widths.items())] lines.append(border(str("-" * width) for width in widths.values())) for i, row in enumerate(table): lines.append(border( (str(row[column])[0:7] + diff(row, column)).rjust(width)[0 : width] for column, width in widths.items() )) for line in lines: print(line)