diff --git a/src/utils/__tests__/allowed-keywords.test.ts b/src/utils/__tests__/allowed-keywords.test.ts index 7a3d551..61370af 100644 --- a/src/utils/__tests__/allowed-keywords.test.ts +++ b/src/utils/__tests__/allowed-keywords.test.ts @@ -66,6 +66,168 @@ describe("isReadOnlySQL", () => { it("should not recognize SHOW as read-only for SQLite", () => { expect(isReadOnlySQL("SHOW TABLES", "sqlite")).toBe(false); }); + + it("should reject standalone ANALYZE (updates statistics)", () => { + expect(isReadOnlySQL("ANALYZE users", "postgres")).toBe(false); + expect(isReadOnlySQL("ANALYZE", "mysql")).toBe(false); + }); + + it("should allow REPLACE() as a function in MySQL SELECT", () => { + expect(isReadOnlySQL("SELECT REPLACE(name, 'a', 'b') FROM users", "mysql")).toBe(true); + }); + + it("should allow REPLACE() inside a WITH CTE in MySQL", () => { + const sql = "WITH cte AS (SELECT REPLACE(name, 'a', 'b') AS cleaned FROM users) SELECT * FROM cte"; + expect(isReadOnlySQL(sql, "mysql")).toBe(true); + }); + }); + + describe("CTE with mutating operations", () => { + it("should reject UPDATE inside a CTE", () => { + const sql = "WITH updated AS (UPDATE contracts SET site_location_postcode = 'SW11' WHERE id = 1 RETURNING id) SELECT * FROM updated"; + expect(isReadOnlySQL(sql, "postgres")).toBe(false); + }); + + it("should reject DELETE inside a CTE", () => { + const sql = "WITH deleted AS (DELETE FROM users WHERE id = 1 RETURNING *) SELECT * FROM deleted"; + expect(isReadOnlySQL(sql, "postgres")).toBe(false); + }); + + it("should reject INSERT inside a CTE", () => { + const sql = "WITH inserted AS (INSERT INTO users (name) VALUES ('test') RETURNING *) SELECT * FROM inserted"; + expect(isReadOnlySQL(sql, "postgres")).toBe(false); + }); + + it("should allow a pure SELECT CTE", () => { + const sql = "WITH cte AS (SELECT * FROM users) SELECT * FROM cte"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + + it("should reject DROP inside a CTE-like construct", () => { + const sql = "WITH x AS (SELECT 1) DROP TABLE users"; + expect(isReadOnlySQL(sql, "postgres")).toBe(false); + }); + + it("should not be fooled by mutating keywords in string literals", () => { + const sql = "SELECT * FROM users WHERE name = 'UPDATE me'"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + + it("should not be fooled by mutating keywords in comments", () => { + const sql = "/* UPDATE users SET x = 1 */ SELECT * FROM users"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + + it("should allow REPLACE() as a string function in SELECT", () => { + const sql = "SELECT REPLACE(name, 'a', 'b') FROM users"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + + it("should allow REPLACE() as a string function inside a WITH CTE", () => { + const sql = "WITH cte AS (SELECT REPLACE(name, 'a', 'b') AS name FROM users) SELECT * FROM cte"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + + it("should reject REPLACE INTO as a mutating statement", () => { + const sql = "REPLACE INTO users (id, name) VALUES (1, 'test')"; + expect(isReadOnlySQL(sql, "mysql")).toBe(false); + }); + + it("should allow a CTE named 'replace' in Postgres", () => { + const sql = "WITH replace AS (SELECT 1) SELECT * FROM replace"; + expect(isReadOnlySQL(sql, "postgres")).toBe(true); + }); + + it("should reject a CTE named 'replace' in MySQL (accepted false positive for security)", () => { + const sql = "WITH replace AS (SELECT 1) SELECT * FROM replace"; + expect(isReadOnlySQL(sql, "mysql")).toBe(false); + }); + + it("should reject REPLACE without INTO in MySQL (REPLACE tbl SET ...)", () => { + const sql = "REPLACE users SET name = 'test'"; + expect(isReadOnlySQL(sql, "mysql")).toBe(false); + }); + + it("should reject REPLACE without INTO inside WITH in MySQL", () => { + const sql = "WITH cte AS (SELECT 1) REPLACE users SET name = 'test'"; + expect(isReadOnlySQL(sql, "mysql")).toBe(false); + }); + + it("should reject REPLACE (non-function) inside WITH in MySQL", () => { + const sql = "WITH cte AS (SELECT 1) REPLACE INTO users VALUES (1, 'test')"; + expect(isReadOnlySQL(sql, "mysql")).toBe(false); + }); + + it("should reject REPLACE (non-function) inside WITH in SQLite", () => { + const sql = "WITH cte AS (SELECT 1) REPLACE INTO users VALUES (1, 'test')"; + expect(isReadOnlySQL(sql, "sqlite")).toBe(false); + }); + + it("should reject WITH ... SELECT INTO", () => { + const sql = "WITH cte AS (SELECT * FROM users) SELECT * INTO new_table FROM cte"; + expect(isReadOnlySQL(sql, "postgres")).toBe(false); + }); + }); + + describe("SHOW CREATE and metadata queries", () => { + it("should allow SHOW CREATE TABLE in MySQL", () => { + expect(isReadOnlySQL("SHOW CREATE TABLE users", "mysql")).toBe(true); + }); + + it("should allow SHOW CREATE PROCEDURE in MariaDB", () => { + expect(isReadOnlySQL("SHOW CREATE PROCEDURE my_proc", "mariadb")).toBe(true); + }); + + it("should allow EXPLAIN with mutating statement", () => { + // EXPLAIN doesn't execute the statement, just shows the plan + expect(isReadOnlySQL("EXPLAIN DELETE FROM users", "postgres")).toBe(true); + }); + + it("should reject EXPLAIN ANALYZE with DML (Postgres executes the statement)", () => { + expect(isReadOnlySQL("EXPLAIN ANALYZE DELETE FROM users", "postgres")).toBe(false); + }); + + it("should reject EXPLAIN (ANALYZE) with DML", () => { + expect(isReadOnlySQL("EXPLAIN (ANALYZE) DELETE FROM users", "postgres")).toBe(false); + }); + + it("should allow EXPLAIN ANALYZE with SELECT", () => { + expect(isReadOnlySQL("EXPLAIN ANALYZE SELECT * FROM users", "postgres")).toBe(true); + }); + + it("should reject EXPLAIN ANALYZE with SELECT INTO", () => { + expect(isReadOnlySQL("EXPLAIN ANALYZE SELECT * INTO new_table FROM users", "postgres")).toBe(false); + }); + + it("should allow EXPLAIN ANALYZE VERBOSE with SELECT", () => { + expect(isReadOnlySQL("EXPLAIN ANALYZE VERBOSE SELECT * FROM users", "postgres")).toBe(true); + }); + + it("should reject EXPLAIN ANALYZE VERBOSE with DML", () => { + expect(isReadOnlySQL("EXPLAIN ANALYZE VERBOSE DELETE FROM users", "postgres")).toBe(false); + }); + + it("should allow EXPLAIN (ANALYZE false) with DML (not executed)", () => { + expect(isReadOnlySQL("EXPLAIN (ANALYZE false) DELETE FROM users", "postgres")).toBe(true); + }); + + it("should allow EXPLAIN (ANALYZE off) with DML (not executed)", () => { + expect(isReadOnlySQL("EXPLAIN (ANALYZE off) DELETE FROM users", "postgres")).toBe(true); + }); + }); + + describe("SELECT INTO", () => { + it("should reject SELECT INTO (Postgres table creation)", () => { + expect(isReadOnlySQL("SELECT * INTO new_table FROM users", "postgres")).toBe(false); + }); + + it("should reject SELECT INTO OUTFILE (MySQL)", () => { + expect(isReadOnlySQL("SELECT * INTO OUTFILE '/tmp/data.csv' FROM users", "mysql")).toBe(false); + }); + + it("should reject SELECT INTO with WHERE clause", () => { + expect(isReadOnlySQL("SELECT id, name INTO backup_table FROM users WHERE active = true", "sqlserver")).toBe(false); + }); }); describe("edge cases", () => { diff --git a/src/utils/allowed-keywords.ts b/src/utils/allowed-keywords.ts index 00280aa..8723a88 100644 --- a/src/utils/allowed-keywords.ts +++ b/src/utils/allowed-keywords.ts @@ -7,35 +7,115 @@ import { stripCommentsAndStrings } from "./sql-parser.js"; * but also other queries that are not destructive */ export const allowedKeywords: Record = { - postgres: ["select", "with", "explain", "analyze", "show"], - mysql: ["select", "with", "explain", "analyze", "show", "describe", "desc"], - mariadb: ["select", "with", "explain", "analyze", "show", "describe", "desc"], - sqlite: ["select", "with", "explain", "analyze", "pragma"], + postgres: ["select", "with", "explain", "show"], + mysql: ["select", "with", "explain", "show", "describe", "desc"], + mariadb: ["select", "with", "explain", "show", "describe", "desc"], + sqlite: ["select", "with", "explain", "pragma"], sqlserver: ["select", "with", "explain", "showplan"], }; /** - * Check if a SQL query is read-only based on its first keyword. - * Strips comments and strings before analyzing to avoid false positives. - * @param sql The SQL query to check - * @param connectorType The database type to check against - * @returns True if the query is read-only (starts with allowed keywords) + * Keywords that indicate data-modifying operations. + * Used to detect DML/DDL hidden inside CTEs or other constructs. + */ +const mutatingKeywords = [ + "insert", + "update", + "delete", + "drop", + "alter", + "create", + "truncate", + "merge", + "grant", + "revoke", + "rename", +]; + +/** Base pattern: matches any mutating keyword as a whole word */ +const mutatingPattern = new RegExp( + `\\b(?:${mutatingKeywords.join("|")})\\b`, + "i", +); + +/** + * Extended pattern for dialects that support REPLACE (MySQL/MariaDB/SQLite). + * Matches REPLACE statements (including those without INTO, e.g. REPLACE tbl SET ...) + * but not REPLACE() function calls (excluded via negative lookahead for `(`). + */ +const mutatingPatternWithReplace = new RegExp( + `\\b(?:${mutatingKeywords.join("|")}|replace(?!\\s*\\())\\b`, + "i", +); + +/** Per-dialect mutating keyword pattern */ +const mutatingPatterns: Record = { + postgres: mutatingPattern, + mysql: mutatingPatternWithReplace, + mariadb: mutatingPatternWithReplace, + sqlite: mutatingPatternWithReplace, + sqlserver: mutatingPattern, +}; + +const selectIntoPattern = /\bselect\b[\s\S]+\binto\b/i; + +/** Matches EXPLAIN ANALYZE (or parenthesized form), excluding disabled forms (false/off/0) */ +const explainAnalyzePattern = + /^explain\s+(?:\([^)]*\banalyze\b(?!\s*(?:=\s*)?(?:false|off|0)\b)[^)]*\)|\banalyze\b(?!\s*(?:=\s*)?(?:false|off|0)\b)(?:\s+verbose\b)?)/i; + +/** + * Check if a SQL query is read-only. + * 1. Strips comments and string literals before analyzing. + * 2. Verifies the first keyword is in the allow-list. + * 3. For WITH/SELECT statements, checks for mutating keywords and SELECT INTO. + * 4. For EXPLAIN statements, rejects EXPLAIN ANALYZE with DML. */ export function isReadOnlySQL(sql: string, connectorType: ConnectorType | string): boolean { - // Strip comments and strings before analyzing - const cleanedSQL = stripCommentsAndStrings(sql, connectorType as ConnectorType).trim().toLowerCase(); + return checkReadOnly( + stripCommentsAndStrings(sql, connectorType as ConnectorType).trim().toLowerCase(), + connectorType, + ); +} +function checkReadOnly(cleanedSQL: string, connectorType: ConnectorType | string): boolean { // Empty after stripping → deny. Attacker-crafted inputs may reduce to // empty strings after comment/string removal to evade keyword checks. if (!cleanedSQL) { return false; } - const firstWord = cleanedSQL.split(/\s+/)[0]; + const firstWord = cleanedSQL.match(/\S+/)?.[0] ?? ""; - // Get the appropriate allowed keywords list for this database type const keywordList = allowedKeywords[connectorType as ConnectorType] || []; - return keywordList.includes(firstWord); + if (!keywordList.includes(firstWord)) { + return false; + } + + // WITH statements can embed DML in CTEs (e.g. WITH cte AS (UPDATE ...)) + if (firstWord === "with") { + const pattern = mutatingPatterns[connectorType as ConnectorType] ?? mutatingPattern; + if (pattern.test(cleanedSQL)) { + return false; + } + } + + // SELECT/WITH ... INTO writes data (creates tables or writes to files) + if ((firstWord === "select" || firstWord === "with") && selectIntoPattern.test(cleanedSQL)) { + return false; + } + + // EXPLAIN ANALYZE actually executes the statement (Postgres) + if (firstWord === "explain") { + const m = explainAnalyzePattern.exec(cleanedSQL); + if (m) { + const afterExplain = cleanedSQL.slice(m[0].length).trim(); + if (afterExplain && !checkReadOnly(afterExplain, connectorType)) { + return false; + } + } + } + + return true; }