-
Notifications
You must be signed in to change notification settings - Fork 419
feat(cli): add stats subcommand — per-agent React Doctor leaderboard
#932
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
a35e16e
7b0e7b1
fe9f110
8769924
755b8aa
721470d
f15f640
0d13c86
dad2a5c
9a20e3d
f6b2f03
3f50df7
509f229
f26f960
ac04d51
db52fc6
83a9210
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| --- | ||
| "react-doctor": minor | ||
| --- | ||
|
|
||
| Add a `react-doctor stats` subcommand — a per-model code-quality leaderboard built from local AI agent chat history. | ||
|
|
||
| `stats` reads local agent history — Claude Code (`~/.claude`) and Codex (`~/.codex`) transcripts, plus the Cursor composer database — reconstructs the file content each model actually wrote (Claude post-edit snapshots, Cursor full post-edit file snapshots, Codex `apply_patch` envelopes), lints that content with the existing engine, and ranks models and providers by their React Doctor score and diagnostics-per-file. The job: answer "which agent/model writes the cleanest React code in my repo". | ||
|
|
||
| - Only the React code each model wrote is scored. Reconstructed files are filtered to actual React (JSX/TSX, `use client`/`use server` directives, or a React-ecosystem import) before linting, so a model's plain backend/util/config files don't pad its file count or dilute its diagnostics-per-file. | ||
| - Ranking is by a confidence-weighted score, not the raw score: each group's score is regressed toward the global mean by its evidence, so a model with a handful of clean files can't top the board on a tiny sample. Files are the dominant signal; sessions only lightly discount the file weight (many files from one session are one correlated sample) and never below a floor. The terminal table shows both the raw `Score` and the `Weighted` score it ranks on. | ||
| - Cursor attribution reads the canonical composer database (`state.vscdb`) directly, so each session carries its real model (e.g. `claude-opus-4-8`, `gpt-5.5`, `composer-2`) and an exact post-edit snapshot of every edited file — the model-less agent-transcript JSONL files are no longer used. Attribution falls back to `unknown` only for chats left on the "Auto" model. | ||
| - Default scope is the current repository (sessions whose cwd or edits touch the repo root); `--global` ranks across every repo on the machine. `--since`, `--limit`, and `--provider` bound the work. | ||
| - `--json` emits a structured leaderboard (`{ schemaVersion, scope, models, providers, best, worst, … }`); the terminal output shows model + provider tables with score bars and a best/worst callout. | ||
| - Coverage is honest about its limits: Codex shell-based edits are not faithfully reconstructable (surfaced as skipped), the Cursor composer database requires `node:sqlite` (Node 22.13+) and covers GUI agent sessions (not cursor-agent CLI runs), and the score requires network access. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| import * as path from "node:path"; | ||
| import { resolveScanTarget, type ReactDoctorConfig } from "@react-doctor/core"; | ||
| import { aggregateStats } from "../../stats/aggregate-stats.js"; | ||
| import { STATS_DEFAULT_SESSION_LIMIT } from "../../stats/constants.js"; | ||
| import { discoverSessions } from "../../stats/discover-sessions.js"; | ||
| import { renderStatsReport } from "../../stats/render-stats.js"; | ||
| import { runStatsScan } from "../../stats/run-stats-scan.js"; | ||
| import type { StatsProvider, StatsReport, StatsScopeOptions } from "../../stats/types.js"; | ||
| import { METRIC } from "../utils/constants.js"; | ||
| import { recordCount } from "../utils/record-metric.js"; | ||
| import { spinner } from "../utils/spinner.js"; | ||
|
|
||
| export interface StatsFlags { | ||
| global?: boolean; | ||
| since?: string; | ||
| limit?: string; | ||
| provider?: string; | ||
| json?: boolean; | ||
| cwd?: string; | ||
| } | ||
|
|
||
| const VALID_PROVIDERS = new Set<StatsProvider>(["claude", "codex", "cursor"]); | ||
|
|
||
| const parseProvider = (value: string | undefined): StatsProvider | undefined => { | ||
| if (value === undefined) return undefined; | ||
| if (!VALID_PROVIDERS.has(value as StatsProvider)) { | ||
| throw new Error(`Unknown provider "${value}". Expected one of: claude, codex, cursor.`); | ||
| } | ||
| return value as StatsProvider; | ||
| }; | ||
|
|
||
| const parseSince = (value: string | undefined): Date | undefined => { | ||
| if (value === undefined) return undefined; | ||
| const parsed = new Date(value); | ||
| if (Number.isNaN(parsed.getTime())) { | ||
| throw new Error(`Invalid --since date "${value}". Use e.g. 2026-06-01.`); | ||
| } | ||
| return parsed; | ||
| }; | ||
|
|
||
| const parseLimit = (value: string | undefined): number => { | ||
| if (value === undefined) return STATS_DEFAULT_SESSION_LIMIT; | ||
| const parsed = Number.parseInt(value, 10); | ||
| return Number.isFinite(parsed) && parsed > 0 ? parsed : STATS_DEFAULT_SESSION_LIMIT; | ||
| }; | ||
|
|
||
| const resolveTarget = async ( | ||
| directory: string, | ||
| ): Promise<{ root: string; userConfig: ReactDoctorConfig | null }> => { | ||
| try { | ||
| const target = await resolveScanTarget(directory); | ||
| return { root: target.resolvedDirectory, userConfig: target.userConfig }; | ||
| } catch { | ||
| return { root: path.resolve(directory), userConfig: null }; | ||
| } | ||
| }; | ||
|
|
||
| export const statsAction = async (flags: StatsFlags): Promise<void> => { | ||
| const directory = flags.cwd ?? process.cwd(); | ||
| const scope: StatsScopeOptions = { | ||
| global: flags.global ?? false, | ||
| since: parseSince(flags.since), | ||
| limit: parseLimit(flags.limit), | ||
| provider: parseProvider(flags.provider), | ||
| }; | ||
|
|
||
| const { root, userConfig } = await resolveTarget(directory); | ||
|
|
||
| // ora renders to stderr; suppress it in JSON mode so the run stays quiet. | ||
| const progress = flags.json ? null : spinner("Looking through your agent history…").start(); | ||
| let report: StatsReport; | ||
| let providerCount: number; | ||
| try { | ||
| const sessions = await discoverSessions(root, scope, (foundCount) => | ||
| progress?.update(`Looking through your agent history… (${foundCount} found)`), | ||
| ); | ||
| progress?.update("Checking the code each agent wrote…"); | ||
| const results = await runStatsScan(sessions, scope.global ? null : root, { | ||
| onProgress: (completedCount, totalCount) => | ||
| progress?.update(`Checking the code each agent wrote… (${completedCount}/${totalCount})`), | ||
| }); | ||
| progress?.update("Scoring…"); | ||
| const aggregated = await aggregateStats(results, userConfig); | ||
| providerCount = aggregated.providers.length; | ||
|
|
||
| report = { | ||
| scope: scope.global ? "global" : "repo", | ||
| directory: root, | ||
| models: aggregated.models, | ||
| providers: aggregated.providers, | ||
| best: aggregated.best, | ||
| worst: aggregated.worst, | ||
| sessionsAnalyzed: results.length, | ||
| sessionsRanked: results.filter((result) => result.filesScanned > 0).length, | ||
| sessionsNonReact: results.filter( | ||
| (result) => result.filesScanned === 0 && result.reconstructedFiles > 0, | ||
| ).length, | ||
| sessionsUnreconstructable: results.filter( | ||
| (result) => result.filesScanned === 0 && result.reconstructedFiles === 0, | ||
| ).length, | ||
|
aidenybai marked this conversation as resolved.
Outdated
|
||
| generatedAt: new Date().toISOString(), | ||
| }; | ||
| progress?.succeed("Done."); | ||
| } finally { | ||
| progress?.stop(); | ||
| } | ||
|
|
||
| recordCount(METRIC.statsRun, 1, { | ||
| scope: report.scope, | ||
| sessions: report.sessionsAnalyzed, | ||
| providers: providerCount, | ||
| provider: scope.provider ?? "all", | ||
| }); | ||
|
|
||
| if (flags.json) { | ||
| process.stdout.write(`${JSON.stringify({ schemaVersion: 1, ...report }, null, 2)}\n`); | ||
| return; | ||
|
aidenybai marked this conversation as resolved.
|
||
| } | ||
|
|
||
| process.stdout.write(`${renderStatsReport(report)}\n`); | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -99,12 +99,22 @@ const WHY_FLAG_SPEC: CliFlagSpec = { | |
| shortOptionsWithRequiredValues: new Set(["-c"]), | ||
| }; | ||
|
|
||
| // `stats` takes no positionals — just the scope/output options. | ||
| const STATS_FLAG_SPEC: CliFlagSpec = { | ||
| longOptionsWithoutValues: new Set(["--color", "--global", "--help", "--json", "--no-color"]), | ||
| longOptionsWithRequiredValues: new Set(["--cwd", "--limit", "--provider", "--since"]), | ||
| longOptionsWithOptionalValues: new Set(), | ||
| shortOptionsWithoutValues: new Set(["-h"]), | ||
| shortOptionsWithRequiredValues: new Set(["-c"]), | ||
| }; | ||
|
|
||
| const COMMAND_FLAG_SPECS = new Map<string, CliFlagSpec>([ | ||
| ["install", INSTALL_FLAG_SPEC], | ||
| ["setup", INSTALL_FLAG_SPEC], | ||
| ["version", VERSION_FLAG_SPEC], | ||
| ["rules", RULES_FLAG_SPEC], | ||
| ["why", WHY_FLAG_SPEC], | ||
| ["stats", STATS_FLAG_SPEC], | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stats strips trailing no-scoreMedium Severity For Additional Locations (1)Reviewed by Cursor Bugbot for commit db52fc6. Configure here. |
||
| ]); | ||
|
|
||
| const isFlagLike = (argument: string): boolean => argument.startsWith("-") && argument !== "-"; | ||
|
|
||


Uh oh!
There was an error while loading. Please reload this page.