From a37ced9b20e9a4ceba81f5567b5ba0cc7f6a18b6 Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Thu, 12 Mar 2026 08:19:21 -0700 Subject: [PATCH 01/12] Fix: detect mutating keywords inside CTEs in read-only mode The read-only SQL check only examined the first keyword, so queries like `WITH updated AS (UPDATE ...) SELECT ...` bypassed the restriction. Now scans the full statement (after stripping comments and strings) for DML/DDL keywords. Closes #271 Co-Authored-By: Claude Opus 4.6 --- src/utils/__tests__/allowed-keywords.test.ts | 37 ++++++++++++++++ src/utils/allowed-keywords.ts | 45 ++++++++++++++++++-- 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index 7a3d551..8814183 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -68,6 +68,43 @@ describe("isReadOnlySQL", () => { }); }); + describe("CTE with mutating operations", () => { + it("should reject UPDATE inside a CTE", () => { + const sql = "WITH updated AS (UPDATE contracts SET site_location_postcode = 'SW11' WHERE id = 1 RETURNING id) SELECT * FROM updated"; + expect(isReadOnlySQL(sql, "postgres")).toBe(false); + }); + + it("should reject DELETE inside a CTE", () => { + const sql = "WITH deleted AS (DELETE FROM users WHERE id = 1 RETURNING *) SELECT * FROM deleted"; + expect(isReadOnlySQL(sql, "postgres")).toBe(false); + }); + + it("should reject INSERT inside a CTE", () => { + const sql = "WITH inserted AS (INSERT INTO users (name) VALUES ('test') RETURNING *) SELECT * FROM inserted"; + expect(isReadOnlySQL(sql, "postgres")).toBe(false); + }); + + it("should allow a pure SELECT CTE", () => { + const sql = "WITH cte AS (SELECT * FROM users) SELECT * FROM cte"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + + it("should reject DROP inside a CTE-like construct", () => { + const sql = "WITH x AS (SELECT 1) DROP TABLE users"; + expect(isReadOnlySQL(sql, "postgres")).toBe(false); + }); + + it("should not be fooled by mutating keywords in string literals", () => { + const sql = "SELECT * FROM users WHERE name = 'UPDATE me'"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + + it("should not be fooled by mutating keywords in comments", () => { + const sql = "/* UPDATE users SET x = 1 */ SELECT * FROM users"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + }); + describe("edge cases", () => { it("should treat empty SQL after comment stripping as not read-only", () => { expect(isReadOnlySQL("-- just a comment", "postgres")).toBe(false); diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index 00280aa..d88f5e8 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -15,11 +15,38 @@ export const allowedKeywords: Record = { }; /** - * Check if a SQL query is read-only based on its first keyword. - * Strips comments and strings before analyzing to avoid false positives. + * Keywords that indicate data-modifying operations. + * Used to detect DML/DDL hidden inside CTEs or other constructs. + */ +const mutatingKeywords = [ + "insert", + "update", + "delete", + "drop", + "alter", + "create", + "truncate", + "replace", + "merge", + "grant", + "revoke", + "rename", +]; + +/** Matches any of the mutating keywords as whole words */ +const mutatingPattern = new RegExp( + `\\b(?:${mutatingKeywords.join("|")})\\b`, + "i", +); + +/** + * Check if a SQL query is read-only. + * 1. Strips comments and string literals before analyzing. + * 2. Verifies the first keyword is in the allow-list. + * 3. Scans the full statement for mutating keywords (e.g. UPDATE inside a CTE). * @param sql The SQL query to check * @param connectorType The database type to check against - * @returns True if the query is read-only (starts with allowed keywords) + * @returns True if the query is read-only */ export function isReadOnlySQL(sql: string, connectorType: ConnectorType | string): boolean { // Strip comments and strings before analyzing @@ -37,5 +64,15 @@ export function isReadOnlySQL(sql: string, connectorType: ConnectorType | string const keywordList = allowedKeywords[connectorType as ConnectorType] || []; - return keywordList.includes(firstWord); + if (!keywordList.includes(firstWord)) { + return false; + } + + // Even if the first keyword is allowed (e.g. WITH), reject if the statement + // contains data-modifying keywords anywhere (e.g. WITH cte AS (UPDATE ...)) + if (mutatingPattern.test(cleanedSQL)) { + return false; + } + + return true; } From 04cfa48140bb056ceca6994a8d18c31a35e65126 Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Thu, 12 Mar 2026 08:27:48 -0700 Subject: [PATCH 02/12] Fix: exclude REPLACE() string function from mutating keyword detection REPLACE followed by ( is a string function call (e.g. SELECT REPLACE(...)), not a mutating REPLACE INTO statement. Use negative lookahead to avoid false positives. Co-Authored-By: Claude Opus 4.6 --- src/utils/__tests__/allowed-keywords.test.ts | 10 ++++++++++ src/utils/allowed-keywords.ts | 9 +++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index 8814183..f50f96c 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -103,6 +103,16 @@ describe("isReadOnlySQL", () => { const sql = "/* UPDATE users SET x = 1 */ SELECT * FROM users"; expect(isReadOnlySQL(sql, "postgres")).toBe(true); }); + + it("should allow REPLACE() as a string function in SELECT", () => { + const sql = "SELECT REPLACE(name, 'a', 'b') FROM users"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + + it("should reject REPLACE INTO as a mutating statement", () => { + const sql = "REPLACE INTO users (id, name) VALUES (1, 'test')"; + expect(isReadOnlySQL(sql, "mysql")).toBe(false); + }); }); describe("edge cases", () => { diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index d88f5e8..3abd7c8 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -33,9 +33,14 @@ const mutatingKeywords = [ "rename", ]; -/** Matches any of the mutating keywords as whole words */ +/** + * Matches any of the mutating keywords as whole words. + * Special-cases REPLACE so that function calls like REPLACE(...) + * in SELECT queries are not treated as mutating. + */ +const nonReplaceKeywords = mutatingKeywords.filter(k => k !== "replace"); const mutatingPattern = new RegExp( - `\\b(?:${mutatingKeywords.join("|")})\\b`, + `\\b(?:${nonReplaceKeywords.join("|")}|replace(?!\\s*\\())\\b`, "i", ); From 4ce777ad2b138cd6d0971a3e4a488d25655c0c1c Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Thu, 12 Mar 2026 08:35:32 -0700 Subject: [PATCH 03/12] Fix: scope mutating scan to WITH statements, add SELECT INTO detection - Only scan for mutating keywords when first keyword is WITH (fixes false rejection of SHOW CREATE TABLE in MySQL/MariaDB) - Add SELECT ... INTO detection for Postgres/MySQL/SQL Server - EXPLAIN with DML is allowed since it doesn't execute the statement - Add 6 more tests for SHOW CREATE, EXPLAIN, and SELECT INTO Co-Authored-By: Claude Opus 4.6 --- src/utils/__tests__/allowed-keywords.test.ts | 29 ++++++++++++++++++++ src/utils/allowed-keywords.ts | 17 +++++++++--- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index f50f96c..521f332 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -115,6 +115,35 @@ describe("isReadOnlySQL", () => { }); }); + describe("SHOW CREATE and metadata queries", () => { + it("should allow SHOW CREATE TABLE in MySQL", () => { + expect(isReadOnlySQL("SHOW CREATE TABLE users", "mysql")).toBe(true); + }); + + it("should allow SHOW CREATE PROCEDURE in MariaDB", () => { + expect(isReadOnlySQL("SHOW CREATE PROCEDURE my_proc", "mariadb")).toBe(true); + }); + + it("should allow EXPLAIN with mutating statement", () => { + // EXPLAIN doesn't execute the statement, just shows the plan + expect(isReadOnlySQL("EXPLAIN DELETE FROM users", "postgres")).toBe(true); + }); + }); + + describe("SELECT INTO", () => { + it("should reject SELECT INTO (Postgres table creation)", () => { + expect(isReadOnlySQL("SELECT * INTO new_table FROM users", "postgres")).toBe(false); + }); + + it("should reject SELECT INTO OUTFILE (MySQL)", () => { + expect(isReadOnlySQL("SELECT * INTO OUTFILE '/tmp/data.csv' FROM users", "mysql")).toBe(false); + }); + + it("should reject SELECT INTO with WHERE clause", () => { + expect(isReadOnlySQL("SELECT id, name INTO backup_table FROM users WHERE active = true", "sqlserver")).toBe(false); + }); + }); + describe("edge cases", () => { it("should treat empty SQL after comment stripping as not read-only", () => { expect(isReadOnlySQL("-- just a comment", "postgres")).toBe(false); diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index 3abd7c8..f2a5e47 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -44,11 +44,15 @@ const mutatingPattern = new RegExp( "i", ); +/** Detects SELECT ... INTO which writes data despite starting with SELECT */ +const selectIntoPattern = /\bselect\b[\s\S]+\binto\b/i; + /** * Check if a SQL query is read-only. * 1. Strips comments and string literals before analyzing. * 2. Verifies the first keyword is in the allow-list. - * 3. Scans the full statement for mutating keywords (e.g. UPDATE inside a CTE). + * 3. For WITH statements, scans for mutating keywords (e.g. UPDATE inside a CTE). + * 4. For SELECT statements, checks for SELECT ... INTO. * @param sql The SQL query to check * @param connectorType The database type to check against * @returns True if the query is read-only @@ -73,9 +77,14 @@ export function isReadOnlySQL(sql: string, connectorType: ConnectorType | string return false; } - // Even if the first keyword is allowed (e.g. WITH), reject if the statement - // contains data-modifying keywords anywhere (e.g. WITH cte AS (UPDATE ...)) - if (mutatingPattern.test(cleanedSQL)) { + // WITH statements can embed DML in CTEs (e.g. WITH cte AS (UPDATE ...)) + // Scan the full statement for mutating keywords. + if (firstWord === "with" && mutatingPattern.test(cleanedSQL)) { + return false; + } + + // SELECT ... INTO writes data (creates tables or writes to files) + if (firstWord === "select" && selectIntoPattern.test(cleanedSQL)) { return false; } From e488858ab8d5f87f0b45485814a86c729d9567c7 Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Thu, 12 Mar 2026 09:37:41 -0700 Subject: [PATCH 04/12] Fix: handle WITH...SELECT INTO, EXPLAIN ANALYZE with DML, REPLACE() in CTEs - WITH statements now also check for SELECT ... INTO - EXPLAIN ANALYZE with DML is rejected (Postgres executes the statement) - EXPLAIN without ANALYZE remains allowed with DML (plan only) - Fixed regex word boundary for parenthesized EXPLAIN (ANALYZE) form - Added tests for REPLACE() inside CTEs, WITH...SELECT INTO, EXPLAIN ANALYZE DELETE, EXPLAIN (ANALYZE) DELETE Co-Authored-By: Claude Opus 4.6 --- src/utils/__tests__/allowed-keywords.test.ts | 22 +++++++++++++++ src/utils/allowed-keywords.ts | 28 +++++++++++++++++--- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index 521f332..4a41609 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -109,10 +109,20 @@ describe("isReadOnlySQL", () => { expect(isReadOnlySQL(sql, "postgres")).toBe(true); }); + it("should allow REPLACE() as a string function inside a WITH CTE", () => { + const sql = "WITH cte AS (SELECT REPLACE(name, 'a', 'b') AS name FROM users) SELECT * FROM cte"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + it("should reject REPLACE INTO as a mutating statement", () => { const sql = "REPLACE INTO users (id, name) VALUES (1, 'test')"; expect(isReadOnlySQL(sql, "mysql")).toBe(false); }); + + it("should reject WITH ... SELECT INTO", () => { + const sql = "WITH cte AS (SELECT * FROM users) SELECT * INTO new_table FROM cte"; + expect(isReadOnlySQL(sql, "postgres")).toBe(false); + }); }); describe("SHOW CREATE and metadata queries", () => { @@ -128,6 +138,18 @@ describe("isReadOnlySQL", () => { // EXPLAIN doesn't execute the statement, just shows the plan expect(isReadOnlySQL("EXPLAIN DELETE FROM users", "postgres")).toBe(true); }); + + it("should reject EXPLAIN ANALYZE with DML (Postgres executes the statement)", () => { + expect(isReadOnlySQL("EXPLAIN ANALYZE DELETE FROM users", "postgres")).toBe(false); + }); + + it("should reject EXPLAIN (ANALYZE) with DML", () => { + expect(isReadOnlySQL("EXPLAIN (ANALYZE) DELETE FROM users", "postgres")).toBe(false); + }); + + it("should allow EXPLAIN ANALYZE with SELECT", () => { + expect(isReadOnlySQL("EXPLAIN ANALYZE SELECT * FROM users", "postgres")).toBe(true); + }); }); describe("SELECT INTO", () => { diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index f2a5e47..55c4510 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -47,12 +47,17 @@ const mutatingPattern = new RegExp( /** Detects SELECT ... INTO which writes data despite starting with SELECT */ const selectIntoPattern = /\bselect\b[\s\S]+\binto\b/i; +/** Detects EXPLAIN ANALYZE which actually executes the statement. + * Matches both `EXPLAIN ANALYZE ...` and `EXPLAIN (ANALYZE) ...` / `EXPLAIN (ANALYZE, ...) ...` */ +const explainAnalyzePattern = /^explain\s+(?:\([^)]*\banalyze\b[^)]*\)|\banalyze\b)/i; + /** * Check if a SQL query is read-only. * 1. Strips comments and string literals before analyzing. * 2. Verifies the first keyword is in the allow-list. - * 3. For WITH statements, scans for mutating keywords (e.g. UPDATE inside a CTE). + * 3. For WITH statements, scans for mutating keywords and SELECT INTO. * 4. For SELECT statements, checks for SELECT ... INTO. + * 5. For EXPLAIN statements, rejects EXPLAIN ANALYZE with DML. * @param sql The SQL query to check * @param connectorType The database type to check against * @returns True if the query is read-only @@ -78,9 +83,14 @@ export function isReadOnlySQL(sql: string, connectorType: ConnectorType | string } // WITH statements can embed DML in CTEs (e.g. WITH cte AS (UPDATE ...)) - // Scan the full statement for mutating keywords. - if (firstWord === "with" && mutatingPattern.test(cleanedSQL)) { - return false; + // or use SELECT ... INTO in the final query. + if (firstWord === "with") { + if (mutatingPattern.test(cleanedSQL)) { + return false; + } + if (selectIntoPattern.test(cleanedSQL)) { + return false; + } } // SELECT ... INTO writes data (creates tables or writes to files) @@ -88,5 +98,15 @@ export function isReadOnlySQL(sql: string, connectorType: ConnectorType | string return false; } + // EXPLAIN ANALYZE actually executes the statement (Postgres) + // Reject if it contains DML after the ANALYZE keyword + if (firstWord === "explain" && explainAnalyzePattern.test(cleanedSQL)) { + // Extract the part after EXPLAIN [ANALYZE|(...)] and check for DML + const afterExplain = cleanedSQL.replace(explainAnalyzePattern, "").trim(); + if (afterExplain && mutatingPattern.test(afterExplain)) { + return false; + } + } + return true; } From 88f9e42368d45ae4c8e2cea3f6a7dca4b927a47b Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Thu, 12 Mar 2026 09:53:36 -0700 Subject: [PATCH 05/12] Fix: validate EXPLAIN ANALYZE inner statement with full read-only logic Reuse isReadOnlySQL recursively on the statement after EXPLAIN ANALYZE, so SELECT INTO and other write-capable forms are also caught. Co-Authored-By: Claude Opus 4.6 --- src/utils/__tests__/allowed-keywords.test.ts | 4 ++++ src/utils/allowed-keywords.ts | 5 ++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index 4a41609..e260188 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -150,6 +150,10 @@ describe("isReadOnlySQL", () => { it("should allow EXPLAIN ANALYZE with SELECT", () => { expect(isReadOnlySQL("EXPLAIN ANALYZE SELECT * FROM users", "postgres")).toBe(true); }); + + it("should reject EXPLAIN ANALYZE with SELECT INTO", () => { + expect(isReadOnlySQL("EXPLAIN ANALYZE SELECT * INTO new_table FROM users", "postgres")).toBe(false); + }); }); describe("SELECT INTO", () => { diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index 55c4510..c846d5c 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -99,11 +99,10 @@ export function isReadOnlySQL(sql: string, connectorType: ConnectorType | string } // EXPLAIN ANALYZE actually executes the statement (Postgres) - // Reject if it contains DML after the ANALYZE keyword + // Validate the inner statement using the same read-only logic if (firstWord === "explain" && explainAnalyzePattern.test(cleanedSQL)) { - // Extract the part after EXPLAIN [ANALYZE|(...)] and check for DML const afterExplain = cleanedSQL.replace(explainAnalyzePattern, "").trim(); - if (afterExplain && mutatingPattern.test(afterExplain)) { + if (afterExplain && !isReadOnlySQL(afterExplain, connectorType)) { return false; } } From 20aef0e1157f1cd36bc404b52a2ad16e34abac61 Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Thu, 12 Mar 2026 10:16:09 -0700 Subject: [PATCH 06/12] Fix: handle EXPLAIN ANALYZE VERBOSE in read-only check Strip optional VERBOSE keyword after ANALYZE so the recursive isReadOnlySQL check receives the actual inner statement. Co-Authored-By: Claude Opus 4.6 --- src/utils/__tests__/allowed-keywords.test.ts | 8 ++++++++ src/utils/allowed-keywords.ts | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index e260188..c3b56e1 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -154,6 +154,14 @@ describe("isReadOnlySQL", () => { it("should reject EXPLAIN ANALYZE with SELECT INTO", () => { expect(isReadOnlySQL("EXPLAIN ANALYZE SELECT * INTO new_table FROM users", "postgres")).toBe(false); }); + + it("should allow EXPLAIN ANALYZE VERBOSE with SELECT", () => { + expect(isReadOnlySQL("EXPLAIN ANALYZE VERBOSE SELECT * FROM users", "postgres")).toBe(true); + }); + + it("should reject EXPLAIN ANALYZE VERBOSE with DML", () => { + expect(isReadOnlySQL("EXPLAIN ANALYZE VERBOSE DELETE FROM users", "postgres")).toBe(false); + }); }); describe("SELECT INTO", () => { diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index c846d5c..fe6cbc9 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -49,7 +49,7 @@ const selectIntoPattern = /\bselect\b[\s\S]+\binto\b/i; /** Detects EXPLAIN ANALYZE which actually executes the statement. * Matches both `EXPLAIN ANALYZE ...` and `EXPLAIN (ANALYZE) ...` / `EXPLAIN (ANALYZE, ...) ...` */ -const explainAnalyzePattern = /^explain\s+(?:\([^)]*\banalyze\b[^)]*\)|\banalyze\b)/i; +const explainAnalyzePattern = /^explain\s+(?:\([^)]*\banalyze\b[^)]*\)|\banalyze\b(?:\s+verbose\b)?)/i; /** * Check if a SQL query is read-only. From 439fca7d684f3638ae5e9571030dea0421d44058 Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Thu, 12 Mar 2026 10:34:39 -0700 Subject: [PATCH 07/12] Fix: make REPLACE detection dialect-aware (MySQL/MariaDB only) REPLACE INTO is a MySQL/MariaDB-specific statement. In Postgres/SQLite/ SQL Server, REPLACE is only a function, so it should not trigger the mutating keyword check. This avoids false rejections when 'replace' appears as a CTE name or alias in non-MySQL dialects. Co-Authored-By: Claude Opus 4.6 --- src/utils/__tests__/allowed-keywords.test.ts | 10 ++++++++ src/utils/allowed-keywords.ts | 26 ++++++++++++++------ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index c3b56e1..8c803d7 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -119,6 +119,16 @@ describe("isReadOnlySQL", () => { expect(isReadOnlySQL(sql, "mysql")).toBe(false); }); + it("should allow a CTE named 'replace' in Postgres", () => { + const sql = "WITH replace AS (SELECT 1) SELECT * FROM replace"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + + it("should reject REPLACE (non-function) inside WITH in MySQL", () => { + const sql = "WITH cte AS (SELECT 1) REPLACE INTO users VALUES (1, 'test')"; + expect(isReadOnlySQL(sql, "mysql")).toBe(false); + }); + it("should reject WITH ... SELECT INTO", () => { const sql = "WITH cte AS (SELECT * FROM users) SELECT * INTO new_table FROM cte"; expect(isReadOnlySQL(sql, "postgres")).toBe(false); diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index fe6cbc9..e59d0b7 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -26,24 +26,34 @@ const mutatingKeywords = [ "alter", "create", "truncate", - "replace", "merge", "grant", "revoke", "rename", ]; +/** Base pattern: matches any mutating keyword as a whole word */ +const mutatingPattern = new RegExp( + `\\b(?:${mutatingKeywords.join("|")})\\b`, + "i", +); + /** - * Matches any of the mutating keywords as whole words. - * Special-cases REPLACE so that function calls like REPLACE(...) - * in SELECT queries are not treated as mutating. + * Extended pattern for MySQL/MariaDB that also detects REPLACE as a statement. + * REPLACE(...) function calls are excluded via negative lookahead. */ -const nonReplaceKeywords = mutatingKeywords.filter(k => k !== "replace"); -const mutatingPattern = new RegExp( - `\\b(?:${nonReplaceKeywords.join("|")}|replace(?!\\s*\\())\\b`, +const mutatingPatternMySQL = new RegExp( + `\\b(?:${mutatingKeywords.join("|")}|replace(?!\\s*\\())\\b`, "i", ); +function getMutatingPattern(connectorType: ConnectorType | string): RegExp { + if (connectorType === "mysql" || connectorType === "mariadb") { + return mutatingPatternMySQL; + } + return mutatingPattern; +} + /** Detects SELECT ... INTO which writes data despite starting with SELECT */ const selectIntoPattern = /\bselect\b[\s\S]+\binto\b/i; @@ -85,7 +95,7 @@ export function isReadOnlySQL(sql: string, connectorType: ConnectorType | string // WITH statements can embed DML in CTEs (e.g. WITH cte AS (UPDATE ...)) // or use SELECT ... INTO in the final query. if (firstWord === "with") { - if (mutatingPattern.test(cleanedSQL)) { + if (getMutatingPattern(connectorType).test(cleanedSQL)) { return false; } if (selectIntoPattern.test(cleanedSQL)) { From 7f6116089b7da2ebf4c5ec91bde0875c2d254c38 Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Thu, 12 Mar 2026 11:21:03 -0700 Subject: [PATCH 08/12] Fix: reject standalone ANALYZE, handle EXPLAIN (ANALYZE false/off) - Remove ANALYZE from allowed first keywords since it updates statistics (writes to system catalogs) in Postgres/MySQL/SQLite - EXPLAIN (ANALYZE false/off/0) does not execute, so allow it with DML - Add MySQL/MariaDB REPLACE() function tests to ensure no false positives Co-Authored-By: Claude Opus 4.6 --- src/utils/__tests__/allowed-keywords.test.ts | 22 ++++++++++++++++++++ src/utils/allowed-keywords.ts | 14 +++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index 8c803d7..9e35d7d 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -66,6 +66,20 @@ describe("isReadOnlySQL", () => { it("should not recognize SHOW as read-only for SQLite", () => { expect(isReadOnlySQL("SHOW TABLES", "sqlite")).toBe(false); }); + + it("should reject standalone ANALYZE (updates statistics)", () => { + expect(isReadOnlySQL("ANALYZE users", "postgres")).toBe(false); + expect(isReadOnlySQL("ANALYZE", "mysql")).toBe(false); + }); + + it("should allow REPLACE() as a function in MySQL SELECT", () => { + expect(isReadOnlySQL("SELECT REPLACE(name, 'a', 'b') FROM users", "mysql")).toBe(true); + }); + + it("should allow REPLACE() inside a WITH CTE in MySQL", () => { + const sql = "WITH cte AS (SELECT REPLACE(name, 'a', 'b') AS cleaned FROM users) SELECT * FROM cte"; + expect(isReadOnlySQL(sql, "mysql")).toBe(true); + }); }); describe("CTE with mutating operations", () => { @@ -172,6 +186,14 @@ describe("isReadOnlySQL", () => { it("should reject EXPLAIN ANALYZE VERBOSE with DML", () => { expect(isReadOnlySQL("EXPLAIN ANALYZE VERBOSE DELETE FROM users", "postgres")).toBe(false); }); + + it("should allow EXPLAIN (ANALYZE false) with DML (not executed)", () => { + expect(isReadOnlySQL("EXPLAIN (ANALYZE false) DELETE FROM users", "postgres")).toBe(true); + }); + + it("should allow EXPLAIN (ANALYZE off) with DML (not executed)", () => { + expect(isReadOnlySQL("EXPLAIN (ANALYZE off) DELETE FROM users", "postgres")).toBe(true); + }); }); describe("SELECT INTO", () => { diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index e59d0b7..f6a2dca 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -7,10 +7,10 @@ import { stripCommentsAndStrings } from "./sql-parser.js"; * but also other queries that are not destructive */ export const allowedKeywords: Record = { - postgres: ["select", "with", "explain", "analyze", "show"], - mysql: ["select", "with", "explain", "analyze", "show", "describe", "desc"], - mariadb: ["select", "with", "explain", "analyze", "show", "describe", "desc"], - sqlite: ["select", "with", "explain", "analyze", "pragma"], + postgres: ["select", "with", "explain", "show"], + mysql: ["select", "with", "explain", "show", "describe", "desc"], + mariadb: ["select", "with", "explain", "show", "describe", "desc"], + sqlite: ["select", "with", "explain", "pragma"], sqlserver: ["select", "with", "explain", "showplan"], }; @@ -58,8 +58,10 @@ function getMutatingPattern(connectorType: ConnectorType | string): RegExp { const selectIntoPattern = /\bselect\b[\s\S]+\binto\b/i; /** Detects EXPLAIN ANALYZE which actually executes the statement. - * Matches both `EXPLAIN ANALYZE ...` and `EXPLAIN (ANALYZE) ...` / `EXPLAIN (ANALYZE, ...) ...` */ -const explainAnalyzePattern = /^explain\s+(?:\([^)]*\banalyze\b[^)]*\)|\banalyze\b(?:\s+verbose\b)?)/i; + * Matches both `EXPLAIN ANALYZE ...` and `EXPLAIN (ANALYZE) ...` / `EXPLAIN (ANALYZE, ...) ...`. + * Excludes explicitly disabled forms: ANALYZE false/off/0 */ +const explainAnalyzePattern = + /^explain\s+(?:\([^)]*\banalyze\b(?!\s*(?:=\s*)?(?:false|off|0)\b)[^)]*\)|\banalyze\b(?!\s*(?:=\s*)?(?:false|off|0)\b)(?:\s+verbose\b)?)/i; /** * Check if a SQL query is read-only. From b37af1b0bb9bdf0a8b51fc61ea3a734c2691915d Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Sat, 28 Mar 2026 06:26:51 -0700 Subject: [PATCH 09/12] Fix: narrow MySQL REPLACE detection to REPLACE INTO statement form The previous regex `replace(?!\s*\()` rejected any standalone word `replace` that wasn't a function call, causing false positives for identifiers named `replace` (e.g. CTE aliases). Now only matches `REPLACE [LOW_PRIORITY|DELAYED] INTO` which is the actual MySQL REPLACE statement syntax. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/utils/__tests__/allowed-keywords.test.ts | 5 +++++ src/utils/allowed-keywords.ts | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index 9e35d7d..fff9363 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -138,6 +138,11 @@ describe("isReadOnlySQL", () => { expect(isReadOnlySQL(sql, "postgres")).toBe(true); }); + it("should allow a CTE named 'replace' in MySQL", () => { + const sql = "WITH replace AS (SELECT 1) SELECT * FROM replace"; + expect(isReadOnlySQL(sql, "mysql")).toBe(true); + }); + it("should reject REPLACE (non-function) inside WITH in MySQL", () => { const sql = "WITH cte AS (SELECT 1) REPLACE INTO users VALUES (1, 'test')"; expect(isReadOnlySQL(sql, "mysql")).toBe(false); diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index f6a2dca..f399173 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -40,10 +40,11 @@ const mutatingPattern = new RegExp( /** * Extended pattern for MySQL/MariaDB that also detects REPLACE as a statement. - * REPLACE(...) function calls are excluded via negative lookahead. + * Only matches `REPLACE INTO` (with optional LOW_PRIORITY/DELAYED), not + * REPLACE() function calls or identifiers named `replace`. */ const mutatingPatternMySQL = new RegExp( - `\\b(?:${mutatingKeywords.join("|")}|replace(?!\\s*\\())\\b`, + `\\b(?:${mutatingKeywords.join("|")}|replace\\s+(?:(?:low_priority|delayed)\\s+)?into)\\b`, "i", ); From 7719d4e0c0991198df8f27006b66daeacf97b22f Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Sat, 28 Mar 2026 06:33:42 -0700 Subject: [PATCH 10/12] Fix: add REPLACE INTO detection for SQLite in read-only mode SQLite also supports REPLACE INTO, so the mutating keyword scan needs to detect it for SQLite connectors, not just MySQL/MariaDB. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/utils/__tests__/allowed-keywords.test.ts | 5 +++++ src/utils/allowed-keywords.ts | 12 ++++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index fff9363..4416877 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -148,6 +148,11 @@ describe("isReadOnlySQL", () => { expect(isReadOnlySQL(sql, "mysql")).toBe(false); }); + it("should reject REPLACE (non-function) inside WITH in SQLite", () => { + const sql = "WITH cte AS (SELECT 1) REPLACE INTO users VALUES (1, 'test')"; + expect(isReadOnlySQL(sql, "sqlite")).toBe(false); + }); + it("should reject WITH ... SELECT INTO", () => { const sql = "WITH cte AS (SELECT * FROM users) SELECT * INTO new_table FROM cte"; expect(isReadOnlySQL(sql, "postgres")).toBe(false); diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index f399173..b528d01 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -48,10 +48,22 @@ const mutatingPatternMySQL = new RegExp( "i", ); +/** + * Extended pattern for SQLite that also detects REPLACE INTO. + * SQLite supports REPLACE INTO but not LOW_PRIORITY/DELAYED. + */ +const mutatingPatternSQLite = new RegExp( + `\\b(?:${mutatingKeywords.join("|")}|replace\\s+into)\\b`, + "i", +); + function getMutatingPattern(connectorType: ConnectorType | string): RegExp { if (connectorType === "mysql" || connectorType === "mariadb") { return mutatingPatternMySQL; } + if (connectorType === "sqlite") { + return mutatingPatternSQLite; + } return mutatingPattern; } From f3b32da5be3ec543886f5e082f2f3aae3680bc6e Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Sat, 28 Mar 2026 06:57:11 -0700 Subject: [PATCH 11/12] Refactor: simplify mutating pattern dispatch and avoid redundant work - Merge mutatingPatternSQLite into shared mutatingPatternWithReplace - Replace getMutatingPattern() with Record lookup - Use match(/\S+/) instead of split(/\s+/)[0] to avoid full array alloc - Use exec() + slice() instead of test() + replace() to avoid double regex - Extract checkReadOnly() to skip re-running stripCommentsAndStrings on recursive EXPLAIN ANALYZE calls - Unify selectIntoPattern branches for select/with Co-Authored-By: Claude Opus 4.6 (1M context) --- src/utils/allowed-keywords.ts | 77 ++++++++++++++--------------------- 1 file changed, 31 insertions(+), 46 deletions(-) diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index b528d01..82de797 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -39,40 +39,27 @@ const mutatingPattern = new RegExp( ); /** - * Extended pattern for MySQL/MariaDB that also detects REPLACE as a statement. + * Extended pattern for dialects that support REPLACE INTO (MySQL/MariaDB/SQLite). * Only matches `REPLACE INTO` (with optional LOW_PRIORITY/DELAYED), not * REPLACE() function calls or identifiers named `replace`. */ -const mutatingPatternMySQL = new RegExp( +const mutatingPatternWithReplace = new RegExp( `\\b(?:${mutatingKeywords.join("|")}|replace\\s+(?:(?:low_priority|delayed)\\s+)?into)\\b`, "i", ); -/** - * Extended pattern for SQLite that also detects REPLACE INTO. - * SQLite supports REPLACE INTO but not LOW_PRIORITY/DELAYED. - */ -const mutatingPatternSQLite = new RegExp( - `\\b(?:${mutatingKeywords.join("|")}|replace\\s+into)\\b`, - "i", -); - -function getMutatingPattern(connectorType: ConnectorType | string): RegExp { - if (connectorType === "mysql" || connectorType === "mariadb") { - return mutatingPatternMySQL; - } - if (connectorType === "sqlite") { - return mutatingPatternSQLite; - } - return mutatingPattern; -} +/** Per-dialect mutating keyword pattern */ +const mutatingPatterns: Record = { + postgres: mutatingPattern, + mysql: mutatingPatternWithReplace, + mariadb: mutatingPatternWithReplace, + sqlite: mutatingPatternWithReplace, + sqlserver: mutatingPattern, +}; -/** Detects SELECT ... INTO which writes data despite starting with SELECT */ const selectIntoPattern = /\bselect\b[\s\S]+\binto\b/i; -/** Detects EXPLAIN ANALYZE which actually executes the statement. - * Matches both `EXPLAIN ANALYZE ...` and `EXPLAIN (ANALYZE) ...` / `EXPLAIN (ANALYZE, ...) ...`. - * Excludes explicitly disabled forms: ANALYZE false/off/0 */ +/** Matches EXPLAIN ANALYZE (or parenthesized form), excluding disabled forms (false/off/0) */ const explainAnalyzePattern = /^explain\s+(?:\([^)]*\banalyze\b(?!\s*(?:=\s*)?(?:false|off|0)\b)[^)]*\)|\banalyze\b(?!\s*(?:=\s*)?(?:false|off|0)\b)(?:\s+verbose\b)?)/i; @@ -80,26 +67,25 @@ const explainAnalyzePattern = * Check if a SQL query is read-only. * 1. Strips comments and string literals before analyzing. * 2. Verifies the first keyword is in the allow-list. - * 3. For WITH statements, scans for mutating keywords and SELECT INTO. - * 4. For SELECT statements, checks for SELECT ... INTO. - * 5. For EXPLAIN statements, rejects EXPLAIN ANALYZE with DML. - * @param sql The SQL query to check - * @param connectorType The database type to check against - * @returns True if the query is read-only + * 3. For WITH/SELECT statements, checks for mutating keywords and SELECT INTO. + * 4. For EXPLAIN statements, rejects EXPLAIN ANALYZE with DML. */ export function isReadOnlySQL(sql: string, connectorType: ConnectorType | string): boolean { - // Strip comments and strings before analyzing - const cleanedSQL = stripCommentsAndStrings(sql, connectorType as ConnectorType).trim().toLowerCase(); + return checkReadOnly( + stripCommentsAndStrings(sql, connectorType as ConnectorType).trim().toLowerCase(), + connectorType, + ); +} +function checkReadOnly(cleanedSQL: string, connectorType: ConnectorType | string): boolean { // Empty after stripping → deny. Attacker-crafted inputs may reduce to // empty strings after comment/string removal to evade keyword checks. if (!cleanedSQL) { return false; } - const firstWord = cleanedSQL.split(/\s+/)[0]; + const firstWord = cleanedSQL.match(/\S+/)?.[0] ?? ""; - // Get the appropriate allowed keywords list for this database type const keywordList = allowedKeywords[connectorType as ConnectorType] || []; @@ -108,27 +94,26 @@ export function isReadOnlySQL(sql: string, connectorType: ConnectorType | string } // WITH statements can embed DML in CTEs (e.g. WITH cte AS (UPDATE ...)) - // or use SELECT ... INTO in the final query. if (firstWord === "with") { - if (getMutatingPattern(connectorType).test(cleanedSQL)) { - return false; - } - if (selectIntoPattern.test(cleanedSQL)) { + const pattern = mutatingPatterns[connectorType as ConnectorType] ?? mutatingPattern; + if (pattern.test(cleanedSQL)) { return false; } } - // SELECT ... INTO writes data (creates tables or writes to files) - if (firstWord === "select" && selectIntoPattern.test(cleanedSQL)) { + // SELECT/WITH ... INTO writes data (creates tables or writes to files) + if ((firstWord === "select" || firstWord === "with") && selectIntoPattern.test(cleanedSQL)) { return false; } // EXPLAIN ANALYZE actually executes the statement (Postgres) - // Validate the inner statement using the same read-only logic - if (firstWord === "explain" && explainAnalyzePattern.test(cleanedSQL)) { - const afterExplain = cleanedSQL.replace(explainAnalyzePattern, "").trim(); - if (afterExplain && !isReadOnlySQL(afterExplain, connectorType)) { - return false; + if (firstWord === "explain") { + const m = explainAnalyzePattern.exec(cleanedSQL); + if (m) { + const afterExplain = cleanedSQL.slice(m[0].length).trim(); + if (afterExplain && !checkReadOnly(afterExplain, connectorType)) { + return false; + } } } From 87f75fe51359bd1b970d394948023a91ef7425be Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Sat, 28 Mar 2026 07:35:24 -0700 Subject: [PATCH 12/12] Fix: detect REPLACE without INTO in MySQL/MariaDB/SQLite MySQL/MariaDB allow REPLACE without INTO (e.g. REPLACE tbl SET ...). Changed pattern from matching only REPLACE INTO to matching any REPLACE not followed by ( to catch all REPLACE statement forms. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/utils/__tests__/allowed-keywords.test.ts | 14 ++++++++++++-- src/utils/allowed-keywords.ts | 8 ++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index 4416877..61370af 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -138,9 +138,19 @@ describe("isReadOnlySQL", () => { expect(isReadOnlySQL(sql, "postgres")).toBe(true); }); - it("should allow a CTE named 'replace' in MySQL", () => { + it("should reject a CTE named 'replace' in MySQL (accepted false positive for security)", () => { const sql = "WITH replace AS (SELECT 1) SELECT * FROM replace"; - expect(isReadOnlySQL(sql, "mysql")).toBe(true); + expect(isReadOnlySQL(sql, "mysql")).toBe(false); + }); + + it("should reject REPLACE without INTO in MySQL (REPLACE tbl SET ...)", () => { + const sql = "REPLACE users SET name = 'test'"; + expect(isReadOnlySQL(sql, "mysql")).toBe(false); + }); + + it("should reject REPLACE without INTO inside WITH in MySQL", () => { + const sql = "WITH cte AS (SELECT 1) REPLACE users SET name = 'test'"; + expect(isReadOnlySQL(sql, "mysql")).toBe(false); }); it("should reject REPLACE (non-function) inside WITH in MySQL", () => { diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index 82de797..8723a88 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -39,12 +39,12 @@ const mutatingPattern = new RegExp( ); /** - * Extended pattern for dialects that support REPLACE INTO (MySQL/MariaDB/SQLite). - * Only matches `REPLACE INTO` (with optional LOW_PRIORITY/DELAYED), not - * REPLACE() function calls or identifiers named `replace`. + * Extended pattern for dialects that support REPLACE (MySQL/MariaDB/SQLite). + * Matches REPLACE statements (including those without INTO, e.g. REPLACE tbl SET ...) + * but not REPLACE() function calls (excluded via negative lookahead for `(`). */ const mutatingPatternWithReplace = new RegExp( - `\\b(?:${mutatingKeywords.join("|")}|replace\\s+(?:(?:low_priority|delayed)\\s+)?into)\\b`, + `\\b(?:${mutatingKeywords.join("|")}|replace(?!\\s*\\())\\b`, "i", );