Skip to content

Commit 5ea48ef

Browse files
authored
Merge pull request #275 from firecrawl/mog/updates
chore: updates
2 parents 8b62904 + ee18b4c commit 5ea48ef

2 files changed

Lines changed: 131 additions & 10 deletions

File tree

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "firecrawl-mcp",
3-
"version": "3.20.4",
3+
"version": "3.20.5",
44
"description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
55
"type": "module",
66
"mcpName": "io.github.firecrawl/firecrawl-mcp-server",

src/index.ts

Lines changed: 130 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ interface SessionData {
1717
* `Authorization: Bearer ...` to the Firecrawl API.
1818
*/
1919
firecrawlApiKey?: string;
20+
/**
21+
* For keyless requests over the hosted (CLOUD_SERVICE) MCP, the end-user's
22+
* real client IP, forwarded to the API so it can rate-limit per real IP
23+
* instead of the shared server IP.
24+
*/
25+
keylessClientIp?: string;
2026
/**
2127
* Whether the (experimental) research tools are exposed for this session.
2228
* Enabled locally via `FIRECRAWL_RESEARCH=true`, or per-request via the
@@ -298,6 +304,21 @@ const server = new FastMCP<SessionData>({
298304
299305
if (process.env.CLOUD_SERVICE === 'true') {
300306
if (!headerCred) {
307+
// Keyless free tier over the hosted MCP: serve it only when a forwarding
308+
// secret is configured, we know the end-user's client IP (so the API can
309+
// rate-limit per real IP, not the shared server IP), AND that IP still
310+
// has free quota. If the IP is out of quota (or keyless is off), fall
311+
// through to throw so FastMCP emits the OAuth 401 + WWW-Authenticate
312+
// challenge — i.e. prompt the user to connect an account exactly when
313+
// their free quota runs out.
314+
const clientIp = extractClientIp(request);
315+
if (
316+
process.env.KEYLESS_PROXY_SECRET &&
317+
clientIp &&
318+
(await keylessEligible(clientIp))
319+
) {
320+
return { firecrawlApiKey: undefined, research, keylessClientIp: clientIp };
321+
}
301322
throw new Error(
302323
'Firecrawl credentials required: OAuth access token (Authorization: Bearer fco_...) or API key (x-firecrawl-api-key)'
303324
);
@@ -314,10 +335,14 @@ const server = new FastMCP<SessionData>({
314335
!process.env.FIRECRAWL_API_KEY &&
315336
!process.env.FIRECRAWL_API_URL
316337
) {
338+
// No credential and no self-hosted URL: run in keyless mode. scrape and
339+
// search work for free (rate-limited per IP) against the Firecrawl cloud;
340+
// every other tool needs an API key and will return Unauthorized.
317341
console.error(
318-
'Either FIRECRAWL_API_KEY or FIRECRAWL_API_URL must be provided'
342+
'No FIRECRAWL_API_KEY or FIRECRAWL_API_URL set — running in keyless mode. ' +
343+
'firecrawl_scrape and firecrawl_search are free (rate-limited per IP) against the Firecrawl cloud; ' +
344+
'other tools require an API key (get one free at https://firecrawl.dev).'
319345
);
320-
process.exit(1);
321346
}
322347
323348
if (httpStreaming && !credential && !process.env.FIRECRAWL_API_URL) {
@@ -687,7 +712,6 @@ ${
687712
string,
688713
unknown
689714
>;
690-
const client = getClient(session);
691715
const transformed = transformScrapeParams(
692716
options as Record<string, unknown>
693717
);
@@ -697,6 +721,19 @@ ${
697721
} else {
698722
log.info('Scraping URL', { url: String(url) });
699723
}
724+
if (isKeylessMode(session)) {
725+
const json = await keylessPost(
726+
'/v2/scrape',
727+
{
728+
url: String(url),
729+
...cleaned,
730+
origin: ORIGIN,
731+
},
732+
session
733+
);
734+
return asText(json?.data ?? json);
735+
}
736+
const client = getClient(session);
700737
const res = await client.scrape(String(url), {
701738
...cleaned,
702739
origin: ORIGIN,
@@ -867,7 +904,6 @@ The query also supports search operators, that you can use if needed to refine t
867904
args: unknown,
868905
{ session, log }: { session?: SessionData; log: Logger }
869906
): Promise<string> => {
870-
const client = getClient(session);
871907
const { query, ...opts } = args as Record<string, unknown>;
872908

873909
const searchOpts = { ...opts } as Record<string, unknown>;
@@ -889,16 +925,22 @@ The query also supports search operators, that you can use if needed to refine t
889925
excludeDomains
890926
);
891927
log.info('Searching', { query: searchQuery });
928+
const searchBody = {
929+
query: searchQuery,
930+
...(cleaned as any),
931+
origin: ORIGIN,
932+
};
933+
if (isKeylessMode(session)) {
934+
const json = await keylessPost('/v2/search', searchBody, session);
935+
return asText(json ?? {});
936+
}
892937
// Call /v2/search through the SDK's HTTP layer (auth + retries) instead
893938
// of `client.search()` so we preserve the full response envelope. The
894939
// high-level `search()` helper strips `id` and `creditsUsed`, which
895940
// breaks the `firecrawl_search_feedback` workflow that this server
896941
// explicitly tells the LLM to use after every search.
897-
const httpRes = await (client as any).http.post('/v2/search', {
898-
query: searchQuery,
899-
...(cleaned as any),
900-
origin: ORIGIN,
901-
});
942+
const client = getClient(session);
943+
const httpRes = await (client as any).http.post('/v2/search', searchBody);
902944
return asText(httpRes?.data ?? {});
903945
},
904946
});
@@ -912,6 +954,85 @@ function resolveApiBaseUrl(): string {
912954
);
913955
}
914956

957+
// Keyless free tier: when no credential is configured and we're targeting the
958+
// Firecrawl cloud (not self-hosted via FIRECRAWL_API_URL, not the multi-tenant
959+
// CLOUD_SERVICE deployment), scrape and search are free, rate-limited per IP.
960+
// The cloud only grants this when NO Authorization header is sent, so we bypass
961+
// the SDK — which always attaches a Bearer header — and post directly.
962+
/** Best-effort end-user client IP from the incoming MCP request headers. */
963+
function extractClientIp(request?: {
964+
headers: IncomingHttpHeaders;
965+
}): string | undefined {
966+
const xff = request?.headers?.['x-forwarded-for'];
967+
const raw = Array.isArray(xff) ? xff[0] : xff;
968+
const first = typeof raw === 'string' ? raw.split(',')[0].trim() : undefined;
969+
return first || undefined;
970+
}
971+
972+
/**
973+
* Read-only check (no quota consumed) of whether a client IP can still use the
974+
* keyless free tier, via the API's secret-gated eligibility endpoint. Fails
975+
* closed: anything other than a clear "eligible: true" means fall through to the
976+
* OAuth challenge rather than silently granting keyless.
977+
*/
978+
async function keylessEligible(clientIp: string): Promise<boolean> {
979+
const secret = process.env.KEYLESS_PROXY_SECRET;
980+
if (!secret) return false;
981+
try {
982+
const response = await fetch(
983+
`${resolveApiBaseUrl()}/v2/keyless/eligibility`,
984+
{
985+
headers: {
986+
'x-firecrawl-keyless-ip': clientIp,
987+
'x-firecrawl-keyless-secret': secret,
988+
},
989+
}
990+
);
991+
if (!response.ok) return false;
992+
const json: any = await response.json().catch(() => ({}));
993+
return json?.eligible === true;
994+
} catch {
995+
return false;
996+
}
997+
}
998+
999+
function isKeylessMode(session?: SessionData): boolean {
1000+
if (session?.firecrawlApiKey) return false;
1001+
if (process.env.CLOUD_SERVICE === 'true') {
1002+
// Hosted: keyless only for secret-gated sessions carrying the forwarded
1003+
// client IP (so the per-IP cap is meaningful, not the shared server IP).
1004+
return !!session?.keylessClientIp;
1005+
}
1006+
// Local/stdio against the cloud (not a self-hosted FIRECRAWL_API_URL).
1007+
return !process.env.FIRECRAWL_API_URL;
1008+
}
1009+
1010+
async function keylessPost(
1011+
path: string,
1012+
body: Record<string, unknown>,
1013+
session?: SessionData
1014+
): Promise<any> {
1015+
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
1016+
// Forward the real client IP (secret-authenticated) when proxying keyless
1017+
// requests through the hosted MCP, so the API rate-limits per real IP.
1018+
if (session?.keylessClientIp && process.env.KEYLESS_PROXY_SECRET) {
1019+
headers['x-firecrawl-keyless-ip'] = session.keylessClientIp;
1020+
headers['x-firecrawl-keyless-secret'] = process.env.KEYLESS_PROXY_SECRET;
1021+
}
1022+
const response = await fetch(`${resolveApiBaseUrl()}${path}`, {
1023+
method: 'POST',
1024+
headers,
1025+
body: JSON.stringify(body),
1026+
});
1027+
const json: any = await response.json().catch(() => ({}));
1028+
if (!response.ok) {
1029+
throw new Error(
1030+
json?.error || `Firecrawl request failed (HTTP ${response.status})`
1031+
);
1032+
}
1033+
return json;
1034+
}
1035+
9151036
const SEARCH_FEEDBACK_DISABLED = ['1', 'true', 'yes', 'on'].includes(
9161037
(
9171038
process.env.FIRECRAWL_NO_SEARCH_FEEDBACK ||

0 commit comments

Comments
 (0)