Skip to content
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 152 additions & 0 deletions src/utils/__tests__/allowed-keywords.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,158 @@ describe("isReadOnlySQL", () => {
it("should not recognize SHOW as read-only for SQLite", () => {
expect(isReadOnlySQL("SHOW TABLES", "sqlite")).toBe(false);
});

it("should reject standalone ANALYZE (updates statistics)", () => {
expect(isReadOnlySQL("ANALYZE users", "postgres")).toBe(false);
expect(isReadOnlySQL("ANALYZE", "mysql")).toBe(false);
});

it("should allow REPLACE() as a function in MySQL SELECT", () => {
expect(isReadOnlySQL("SELECT REPLACE(name, 'a', 'b') FROM users", "mysql")).toBe(true);
});

it("should allow REPLACE() inside a WITH CTE in MySQL", () => {
const sql = "WITH cte AS (SELECT REPLACE(name, 'a', 'b') AS cleaned FROM users) SELECT * FROM cte";
expect(isReadOnlySQL(sql, "mysql")).toBe(true);
});
});

describe("CTE with mutating operations", () => {
it("should reject UPDATE inside a CTE", () => {
const sql = "WITH updated AS (UPDATE contracts SET site_location_postcode = 'SW11' WHERE id = 1 RETURNING id) SELECT * FROM updated";
expect(isReadOnlySQL(sql, "postgres")).toBe(false);
});

it("should reject DELETE inside a CTE", () => {
const sql = "WITH deleted AS (DELETE FROM users WHERE id = 1 RETURNING *) SELECT * FROM deleted";
expect(isReadOnlySQL(sql, "postgres")).toBe(false);
});

it("should reject INSERT inside a CTE", () => {
const sql = "WITH inserted AS (INSERT INTO users (name) VALUES ('test') RETURNING *) SELECT * FROM inserted";
expect(isReadOnlySQL(sql, "postgres")).toBe(false);
});

it("should allow a pure SELECT CTE", () => {
const sql = "WITH cte AS (SELECT * FROM users) SELECT * FROM cte";
expect(isReadOnlySQL(sql, "postgres")).toBe(true);
});

it("should reject DROP inside a CTE-like construct", () => {
const sql = "WITH x AS (SELECT 1) DROP TABLE users";
expect(isReadOnlySQL(sql, "postgres")).toBe(false);
});

it("should not be fooled by mutating keywords in string literals", () => {
const sql = "SELECT * FROM users WHERE name = 'UPDATE me'";
expect(isReadOnlySQL(sql, "postgres")).toBe(true);
});

it("should not be fooled by mutating keywords in comments", () => {
const sql = "/* UPDATE users SET x = 1 */ SELECT * FROM users";
expect(isReadOnlySQL(sql, "postgres")).toBe(true);
});

it("should allow REPLACE() as a string function in SELECT", () => {
const sql = "SELECT REPLACE(name, 'a', 'b') FROM users";
expect(isReadOnlySQL(sql, "postgres")).toBe(true);
});

it("should allow REPLACE() as a string function inside a WITH CTE", () => {
const sql = "WITH cte AS (SELECT REPLACE(name, 'a', 'b') AS name FROM users) SELECT * FROM cte";
expect(isReadOnlySQL(sql, "postgres")).toBe(true);
});

it("should reject REPLACE INTO as a mutating statement", () => {
const sql = "REPLACE INTO users (id, name) VALUES (1, 'test')";
expect(isReadOnlySQL(sql, "mysql")).toBe(false);
});

it("should allow a CTE named 'replace' in Postgres", () => {
const sql = "WITH replace AS (SELECT 1) SELECT * FROM replace";
expect(isReadOnlySQL(sql, "postgres")).toBe(true);
});

it("should allow a CTE named 'replace' in MySQL", () => {
const sql = "WITH replace AS (SELECT 1) SELECT * FROM replace";
expect(isReadOnlySQL(sql, "mysql")).toBe(true);
});

it("should reject REPLACE (non-function) inside WITH in MySQL", () => {
const sql = "WITH cte AS (SELECT 1) REPLACE INTO users VALUES (1, 'test')";
expect(isReadOnlySQL(sql, "mysql")).toBe(false);
});
Comment on lines +131 to +159

Copilot AI Mar 28, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There’s test coverage for REPLACE INTO ..., but no test that asserts readonly rejection for the valid MySQL/MariaDB form REPLACE tbl_name SET ... (where INTO is optional). Adding such a test would prevent regressions and would have caught the current bypass in the pattern matcher.

Copilot uses AI. Check for mistakes.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in 87f75fe. Added tests for REPLACE tbl SET ... (both standalone and inside WITH).


it("should reject REPLACE (non-function) inside WITH in SQLite", () => {
const sql = "WITH cte AS (SELECT 1) REPLACE INTO users VALUES (1, 'test')";
expect(isReadOnlySQL(sql, "sqlite")).toBe(false);
});

it("should reject WITH ... SELECT INTO", () => {
const sql = "WITH cte AS (SELECT * FROM users) SELECT * INTO new_table FROM cte";
expect(isReadOnlySQL(sql, "postgres")).toBe(false);
});
});

describe("SHOW CREATE and metadata queries", () => {
it("should allow SHOW CREATE TABLE in MySQL", () => {
expect(isReadOnlySQL("SHOW CREATE TABLE users", "mysql")).toBe(true);
});

it("should allow SHOW CREATE PROCEDURE in MariaDB", () => {
expect(isReadOnlySQL("SHOW CREATE PROCEDURE my_proc", "mariadb")).toBe(true);
});

it("should allow EXPLAIN with mutating statement", () => {
// EXPLAIN doesn't execute the statement, just shows the plan
expect(isReadOnlySQL("EXPLAIN DELETE FROM users", "postgres")).toBe(true);
});

it("should reject EXPLAIN ANALYZE with DML (Postgres executes the statement)", () => {
expect(isReadOnlySQL("EXPLAIN ANALYZE DELETE FROM users", "postgres")).toBe(false);
});

it("should reject EXPLAIN (ANALYZE) with DML", () => {
expect(isReadOnlySQL("EXPLAIN (ANALYZE) DELETE FROM users", "postgres")).toBe(false);
});

it("should allow EXPLAIN ANALYZE with SELECT", () => {
expect(isReadOnlySQL("EXPLAIN ANALYZE SELECT * FROM users", "postgres")).toBe(true);
});

it("should reject EXPLAIN ANALYZE with SELECT INTO", () => {
expect(isReadOnlySQL("EXPLAIN ANALYZE SELECT * INTO new_table FROM users", "postgres")).toBe(false);
});

it("should allow EXPLAIN ANALYZE VERBOSE with SELECT", () => {
expect(isReadOnlySQL("EXPLAIN ANALYZE VERBOSE SELECT * FROM users", "postgres")).toBe(true);
});

it("should reject EXPLAIN ANALYZE VERBOSE with DML", () => {
expect(isReadOnlySQL("EXPLAIN ANALYZE VERBOSE DELETE FROM users", "postgres")).toBe(false);
});

it("should allow EXPLAIN (ANALYZE false) with DML (not executed)", () => {
expect(isReadOnlySQL("EXPLAIN (ANALYZE false) DELETE FROM users", "postgres")).toBe(true);
});

it("should allow EXPLAIN (ANALYZE off) with DML (not executed)", () => {
expect(isReadOnlySQL("EXPLAIN (ANALYZE off) DELETE FROM users", "postgres")).toBe(true);
});
});

describe("SELECT INTO", () => {
it("should reject SELECT INTO (Postgres table creation)", () => {
expect(isReadOnlySQL("SELECT * INTO new_table FROM users", "postgres")).toBe(false);
});

it("should reject SELECT INTO OUTFILE (MySQL)", () => {
expect(isReadOnlySQL("SELECT * INTO OUTFILE '/tmp/data.csv' FROM users", "mysql")).toBe(false);
});

it("should reject SELECT INTO with WHERE clause", () => {
expect(isReadOnlySQL("SELECT id, name INTO backup_table FROM users WHERE active = true", "sqlserver")).toBe(false);
});
});

describe("edge cases", () => {
Expand Down
108 changes: 94 additions & 14 deletions src/utils/allowed-keywords.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,115 @@ import { stripCommentsAndStrings } from "./sql-parser.js";
* but also other queries that are not destructive
*/
export const allowedKeywords: Record<ConnectorType, string[]> = {
postgres: ["select", "with", "explain", "analyze", "show"],
mysql: ["select", "with", "explain", "analyze", "show", "describe", "desc"],
mariadb: ["select", "with", "explain", "analyze", "show", "describe", "desc"],
sqlite: ["select", "with", "explain", "analyze", "pragma"],
postgres: ["select", "with", "explain", "show"],
mysql: ["select", "with", "explain", "show", "describe", "desc"],
mariadb: ["select", "with", "explain", "show", "describe", "desc"],
sqlite: ["select", "with", "explain", "pragma"],
sqlserver: ["select", "with", "explain", "showplan"],
};

/**
* Check if a SQL query is read-only based on its first keyword.
* Strips comments and strings before analyzing to avoid false positives.
* @param sql The SQL query to check
* @param connectorType The database type to check against
* @returns True if the query is read-only (starts with allowed keywords)
* Keywords that indicate data-modifying operations.
* Used to detect DML/DDL hidden inside CTEs or other constructs.
*/
const mutatingKeywords = [
"insert",
"update",
"delete",
"drop",
"alter",
"create",
"truncate",
"merge",
"grant",
"revoke",
"rename",
];

/** Base pattern: matches any mutating keyword as a whole word */
const mutatingPattern = new RegExp(
`\\b(?:${mutatingKeywords.join("|")})\\b`,
"i",
);

/**
* Extended pattern for dialects that support REPLACE INTO (MySQL/MariaDB/SQLite).
* Only matches `REPLACE INTO` (with optional LOW_PRIORITY/DELAYED), not
* REPLACE() function calls or identifiers named `replace`.
*/
const mutatingPatternWithReplace = new RegExp(
`\\b(?:${mutatingKeywords.join("|")}|replace\\s+(?:(?:low_priority|delayed)\\s+)?into)\\b`,

Copilot AI Mar 28, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mutatingPatternWithReplace only matches REPLACE ... INTO, but in MySQL/MariaDB the INTO keyword is optional for REPLACE statements (e.g. REPLACE tbl_name SET ...). As a result, a mutating statement can still bypass readonly mode when wrapped in a WITH statement.

Adjust the dialect pattern to also detect REPLACE statements without INTO (while still avoiding false positives on REPLACE() function calls, e.g. by rejecting replace when it’s not immediately followed by ().

Suggested change
* Extended pattern for dialects that support REPLACE INTO (MySQL/MariaDB/SQLite).
* Only matches `REPLACE INTO` (with optional LOW_PRIORITY/DELAYED), not
* REPLACE() function calls or identifiers named `replace`.
*/
const mutatingPatternWithReplace = new RegExp(
`\\b(?:${mutatingKeywords.join("|")}|replace\\s+(?:(?:low_priority|delayed)\\s+)?into)\\b`,
* Extended pattern for dialects that support REPLACE (MySQL/MariaDB/SQLite).
* Matches REPLACE statements (including those without INTO) but not REPLACE()
* function calls, by rejecting `replace` when it is immediately followed by `(`.
*/
const mutatingPatternWithReplace = new RegExp(
`\\b(?:${mutatingKeywords.join("|")}|replace(?!\\s*\\()))\\b`,

Copilot uses AI. Check for mistakes.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 87f75fe. Changed pattern to replace(?!\s*\() to catch all REPLACE statement forms (including REPLACE tbl SET ... without INTO). Accepts the minor false positive on identifiers named replace in MySQL/MariaDB/SQLite as a security tradeoff.

"i",
);

/** Per-dialect mutating keyword pattern */
const mutatingPatterns: Record<ConnectorType, RegExp> = {
postgres: mutatingPattern,
mysql: mutatingPatternWithReplace,
mariadb: mutatingPatternWithReplace,
sqlite: mutatingPatternWithReplace,
sqlserver: mutatingPattern,
};

const selectIntoPattern = /\bselect\b[\s\S]+\binto\b/i;

/** Matches EXPLAIN ANALYZE (or parenthesized form), excluding disabled forms (false/off/0) */
const explainAnalyzePattern =
/^explain\s+(?:\([^)]*\banalyze\b(?!\s*(?:=\s*)?(?:false|off|0)\b)[^)]*\)|\banalyze\b(?!\s*(?:=\s*)?(?:false|off|0)\b)(?:\s+verbose\b)?)/i;

/**
* Check if a SQL query is read-only.
* 1. Strips comments and string literals before analyzing.
* 2. Verifies the first keyword is in the allow-list.
* 3. For WITH/SELECT statements, checks for mutating keywords and SELECT INTO.
* 4. For EXPLAIN statements, rejects EXPLAIN ANALYZE with DML.
*/
export function isReadOnlySQL(sql: string, connectorType: ConnectorType | string): boolean {
// Strip comments and strings before analyzing
const cleanedSQL = stripCommentsAndStrings(sql, connectorType as ConnectorType).trim().toLowerCase();
return checkReadOnly(
stripCommentsAndStrings(sql, connectorType as ConnectorType).trim().toLowerCase(),
connectorType,
);
}

function checkReadOnly(cleanedSQL: string, connectorType: ConnectorType | string): boolean {
// Empty after stripping → deny. Attacker-crafted inputs may reduce to
// empty strings after comment/string removal to evade keyword checks.
if (!cleanedSQL) {
return false;
}

const firstWord = cleanedSQL.split(/\s+/)[0];
const firstWord = cleanedSQL.match(/\S+/)?.[0] ?? "";

// Get the appropriate allowed keywords list for this database type
const keywordList =
allowedKeywords[connectorType as ConnectorType] || [];

return keywordList.includes(firstWord);
if (!keywordList.includes(firstWord)) {
return false;
}

// WITH statements can embed DML in CTEs (e.g. WITH cte AS (UPDATE ...))
if (firstWord === "with") {
const pattern = mutatingPatterns[connectorType as ConnectorType] ?? mutatingPattern;
if (pattern.test(cleanedSQL)) {
return false;
}
}

// SELECT/WITH ... INTO writes data (creates tables or writes to files)
if ((firstWord === "select" || firstWord === "with") && selectIntoPattern.test(cleanedSQL)) {
return false;
}

// EXPLAIN ANALYZE actually executes the statement (Postgres)
if (firstWord === "explain") {
const m = explainAnalyzePattern.exec(cleanedSQL);
if (m) {
const afterExplain = cleanedSQL.slice(m[0].length).trim();
if (afterExplain && !checkReadOnly(afterExplain, connectorType)) {
return false;
}
}
}

return true;
}
Loading