Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 67 additions & 1 deletion lib/routes/openai/common.tsx
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import https from 'node:https';

import { load } from 'cheerio';

import { config } from '@/config';
Expand All @@ -8,11 +10,75 @@ import { parseDate } from '@/utils/parse-date';

export const BASE_URL = new URL('https://openai.com');

const redirectStatusCodes = new Set([301, 302, 303, 307, 308]);
const maxRedirects = 5;

// OpenAI article pages return Cloudflare 403 (`cf-mitigated: challenge`) through request-rewriter's undici.fetch path.
// The patched https.request path works, so keep this route-local.
const fetchArticleHtml = async (url: string, redirects: number = 0): Promise<string> =>
await new Promise<string>((resolve, reject) => {
const request = https.request(
url,
{
headers: {
'User-Agent': config.ua,
'accept-encoding': 'identity',
},
},
(response) => {
const { statusCode } = response;

// Follow legacy article redirects to match ofetch's previous behavior.
if (statusCode && redirectStatusCodes.has(statusCode)) {
response.resume();

const location = response.headers.location;
if (!location) {
reject(new Error(`[GET] "${url}": ${statusCode} redirect without location`));
return;
}

if (redirects >= maxRedirects) {
reject(new Error(`[GET] "${url}": too many redirects`));
return;
}

void (async () => {
try {
resolve(await fetchArticleHtml(new URL(location, url).href, redirects + 1));
} catch (error) {
reject(error);
}
})();
return;
}

if (!statusCode || statusCode < 200 || statusCode >= 300) {
response.resume();
reject(new Error(`[GET] "${url}": ${statusCode}`));
return;
}

let html = '';
response.setEncoding('utf8');
response.on('data', (chunk) => {
html += chunk;
});
response.on('end', () => {
resolve(html);
});
}
);

request.on('error', reject);
request.end();
});

/** Fetch the details of an article. */
export const fetchArticleDetails = async (url: string) => {
// Ensure trailing slash to avoid 301 redirect
const normalizedUrl = url.endsWith('/') ? url : `${url}/`;
const html = await ofetch(normalizedUrl, { responseType: 'text' });
const html = await fetchArticleHtml(normalizedUrl);
const $ = load(html);

const $article = $('#main article');
Expand Down