millionco · aidenybai · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026
diff --git a/.changeset/stats-agent-leaderboard.md b/.changeset/stats-agent-leaderboard.md
@@ -0,0 +1,14 @@
+---
+"react-doctor": minor
+---
+
+Add a `react-doctor stats` subcommand — a per-model code-quality leaderboard built from local AI agent chat history.
+
+`stats` reads local agent history — Claude Code (`~/.claude`) and Codex (`~/.codex`) transcripts, plus the Cursor composer database — reconstructs the file content each model actually wrote (Claude post-edit snapshots, Cursor full post-edit file snapshots, Codex `apply_patch` envelopes), lints that content with the existing engine, and ranks models and providers by their React Doctor score and diagnostics-per-file. The job: answer "which agent/model writes the cleanest React code in my repo".
+
+- Only the React code each model wrote is scored. Reconstructed files are filtered to actual React (JSX/TSX, `use client`/`use server` directives, or a React-ecosystem import) before linting, so a model's plain backend/util/config files don't pad its file count or dilute its diagnostics-per-file. A scan that errors, is skipped, or whose lint phase fails is dropped rather than counted as zero-diagnostic "clean" code, so un-lintable output can't inflate a model's score.
+- Ranking is by a confidence-weighted score, not the raw score: each group's score is regressed toward the global mean by its evidence, so a model with a handful of clean files can't top the board on a tiny sample. Files are the dominant signal; sessions only lightly discount the file weight (many files from one session are one correlated sample) and never below a floor.
+- Cursor attribution reads the canonical composer database (`state.vscdb`) directly, so each session carries its real model (e.g. `claude-opus-4-8`, `gpt-5.5`, `composer-2`) and an exact post-edit snapshot of every edited file — the model-less agent-transcript JSONL files are no longer used. Attribution falls back to `unknown` only for chats left on the "Auto" model.
+- Default scope is the current repository (sessions whose cwd or edits touch the repo root); `--global` ranks across every repo on the machine. `--since`, `--limit`, and `--provider` bound the work.
+- `--json` emits a structured leaderboard (`{ schemaVersion, scope, models, providers, best, worst, … }`); the terminal output shows the top models and per-tool tables with a single score bar (the confidence-weighted score) and a best/worst callout.
+- Coverage is honest about its limits: Codex shell-based edits are not faithfully reconstructable (surfaced as skipped), the Cursor composer database requires `node:sqlite` (Node 22.13+) and covers GUI agent sessions (not cursor-agent CLI runs), and the score requires network access.
diff --git a/packages/core/src/highlighter.ts b/packages/core/src/highlighter.ts
@@ -1,12 +1,21 @@
 import pc from "picocolors";
 
+// picocolors only ships the 16-color palette, so orange (Claude's brand) is a
+// 256-color escape built by hand. Honors color-disabled by returning the input.
+const ORANGE_ANSI_CODE = 208;
+const makeOrange =
+  (enabled: boolean): ((input: string | number) => string) =>
+  (input) =>
+    enabled ? `\u001b[38;5;${ORANGE_ANSI_CODE}m${input}\u001b[39m` : String(input);
+
 export const highlighter = {
   error: pc.red,
   warn: pc.yellow,
   info: pc.cyan,
   success: pc.green,
   dim: pc.dim,
   gray: pc.gray,
+  orange: makeOrange(pc.isColorSupported),
   bold: pc.bold,
 };
 
@@ -27,5 +36,6 @@ export const setColorEnabled = (enabled: boolean): void => {
   highlighter.success = colors.green;
   highlighter.dim = colors.dim;
   highlighter.gray = colors.gray;
+  highlighter.orange = makeOrange(enabled);
   highlighter.bold = colors.bold;
 };
diff --git a/packages/react-doctor/src/cli/commands/stats.ts b/packages/react-doctor/src/cli/commands/stats.ts
@@ -0,0 +1,135 @@
+import * as path from "node:path";
+import { resolveScanTarget, type ReactDoctorConfig } from "@react-doctor/core";
+import { aggregateStats } from "../../stats/aggregate-stats.js";
+import { STATS_DEFAULT_SESSION_LIMIT } from "../../stats/constants.js";
+import { discoverSessions } from "../../stats/discover-sessions.js";
+import { renderStatsReport } from "../../stats/render-stats.js";
+import { runStatsScan } from "../../stats/run-stats-scan.js";
+import type { StatsProvider, StatsReport, StatsScopeOptions } from "../../stats/types.js";
+import { METRIC } from "../utils/constants.js";
+import { enableJsonMode } from "../utils/json-mode.js";
+import { recordCount } from "../utils/record-metric.js";
+import { spinner } from "../utils/spinner.js";
+
+export interface StatsFlags {
+  global?: boolean;
+  since?: string;
+  limit?: string;
+  provider?: string;
+  json?: boolean;
+  cwd?: string;
+}
+
+const VALID_PROVIDERS = new Set<string>(["claude", "codex", "cursor"]);
+
+const isStatsProvider = (value: string): value is StatsProvider => VALID_PROVIDERS.has(value);
+
+const parseProvider = (value: string | undefined): StatsProvider | undefined => {
+  if (value === undefined) return undefined;
+  if (!isStatsProvider(value)) {
+    throw new Error(`Unknown provider "${value}". Expected one of: claude, codex, cursor.`);
+  }
+  return value;
+};
+
+const parseSince = (value: string | undefined): Date | undefined => {
+  if (value === undefined) return undefined;
+  const parsed = new Date(value);
+  if (Number.isNaN(parsed.getTime())) {
+    throw new Error(`Invalid --since date "${value}". Use e.g. 2026-06-01.`);
+  }
+  return parsed;
+};
+
+const parseLimit = (value: string | undefined): number => {
+  if (value === undefined) return STATS_DEFAULT_SESSION_LIMIT;
+  const parsed = Number.parseInt(value, 10);
+  if (!Number.isFinite(parsed) || parsed <= 0) {
+    throw new Error(`Invalid --limit "${value}". Use a positive integer, e.g. 200.`);
+  }
+  return parsed;
+};
+
+const resolveTarget = async (
+  directory: string,
+): Promise<{ root: string; userConfig: ReactDoctorConfig | null }> => {
+  try {
+    const target = await resolveScanTarget(directory);
+    return { root: target.resolvedDirectory, userConfig: target.userConfig };
+  } catch {
+    return { root: path.resolve(directory), userConfig: null };
+  }
+};
+
+export const statsAction = async (flags: StatsFlags): Promise<void> => {
+  const directory = flags.cwd ?? process.cwd();
+  // Register JSON mode up front so any throw (flag parsing, scan, or score API
+  // failure) is emitted as a structured JSON error by the top-level handler
+  // instead of plain text — and so incidental logs (e.g. a score-API warning)
+  // never corrupt the report on stdout.
+  if (flags.json) enableJsonMode({ compact: false, directory });
+  const scope: StatsScopeOptions = {
+    global: flags.global ?? false,
+    since: parseSince(flags.since),
+    limit: parseLimit(flags.limit),
+    provider: parseProvider(flags.provider),
+  };
+
+  const { root, userConfig } = await resolveTarget(directory);
+
+  // ora renders to stderr; suppress it in JSON mode so the run stays quiet.
+  const progress = flags.json ? null : spinner("Looking through your agent history…").start();
+  let report: StatsReport;
+  let providerCount: number;
+  try {
+    const sessions = await discoverSessions(root, scope, (foundCount) =>
+      progress?.update(`Looking through your agent history… (${foundCount} found)`),
+    );
+    progress?.update("Checking the code each agent wrote…");
+    const results = await runStatsScan(sessions, scope.global ? null : root, {
+      onProgress: (completedCount, totalCount) =>
+        progress?.update(`Checking the code each agent wrote… (${completedCount}/${totalCount})`),
+    });
+    progress?.update("Scoring…");
+    const aggregated = await aggregateStats(results, userConfig);
+    providerCount = aggregated.providers.length;
+
+    report = {
+      scope: scope.global ? "global" : "repo",
+      directory: root,
+      models: aggregated.models,
+      providers: aggregated.providers,
+      best: aggregated.best,
+      worst: aggregated.worst,
+      sessionsAnalyzed: results.length,
+      sessionsRanked: results.filter((result) => result.filesScanned > 0).length,
+      sessionsNonReact: results.filter(
+        (result) => result.filesScanned === 0 && result.reconstructedFiles > 0,
+      ).length,
+      sessionsUnreconstructable: results.filter(
+        (result) =>
+          result.filesScanned === 0 &&
+          result.reconstructedFiles === 0 &&
+          result.unreconstructable > 0,
+      ).length,
+      generatedAt: new Date().toISOString(),
+    };
+    progress?.succeed("Done.");
+  } finally {
+    progress?.stop();
+  }
+
+  recordCount(METRIC.statsRun, 1, {
+    scope: report.scope,
+    sessions: report.sessionsAnalyzed,
+    providers: providerCount,
+    provider: scope.provider ?? "all",
+  });
+
+  if (flags.json) {
+    process.stdout.write(`${JSON.stringify({ schemaVersion: 1, ...report }, null, 2)}\n`);
+    return;
+  }
+
+  process.stdout.write(`${renderStatsReport(report)}\n`);
+};
diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts
@@ -13,6 +13,7 @@ import {
   rulesSetAction,
   rulesUnignoreTagAction,
 } from "./commands/rules.js";
+import { statsAction } from "./commands/stats.js";
 import { versionAction } from "./commands/version.js";
 import { whyAction } from "./commands/why.js";
 import { applyColorPreference } from "./utils/apply-color-preference.js";
@@ -80,8 +81,12 @@ ${formatExampleLines([
 ])}
 
 ${highlighter.dim("Configuration:")}
-  Add a ${highlighter.info("doctor.config.ts")} (or .js/.mjs/.json — or a ${highlighter.info('"reactDoctor"')} key in your package.json) in the project root.
-  Use ${highlighter.info("react-doctor rules")} to list, explain, and configure rules. CLI flags always override config values.
+  Add a ${highlighter.info("doctor.config.ts")} (or .js/.mjs/.json — or a ${highlighter.info(
+    '"reactDoctor"',
+  )} key in your package.json) in the project root.
+  Use ${highlighter.info(
+    "react-doctor rules",
+  )} to list, explain, and configure rules. CLI flags always override config values.
 
 ${highlighter.dim("Feedback & bug reports:")}
   ${highlighter.info(`${CANONICAL_GITHUB_URL}/issues`)}
@@ -103,6 +108,31 @@ ${highlighter.dim("Learn more:")}
   ${highlighter.info(CANONICAL_GITHUB_URL)}
 `;
 
+const renderStatsHelpEpilog = (): string => `
+${highlighter.dim("Examples:")}
+${formatExampleLines([
+  ["react-doctor stats", "rank agents on sessions that touched this repo"],
+  ["react-doctor stats --global", "rank across every repository on this machine"],
+  ["react-doctor stats --provider claude", "only Claude Code sessions"],
+  ["react-doctor stats --since 2026-06-01", "only recent sessions"],
+  ["react-doctor stats --json", "machine-readable leaderboard"],
+])}
+
+${highlighter.dim("How it works:")}
+  Reads local agent history (Claude Code + Codex transcripts, the Cursor
+  composer database), reconstructs the code each model wrote, lints it, and
+  ranks models + providers by score.
+
+${highlighter.dim("Caveats:")}
+  Codex shell-based edits aren't reconstructable (partial coverage). Cursor uses
+  the GUI composer database (cursor-agent CLI transcripts are not included), and
+  attribution falls back to "unknown" only for chats left on "Auto". The score
+  requires network access.
+
+${highlighter.dim("Learn more:")}
+  ${highlighter.info(CANONICAL_GITHUB_URL)}
+`;
+
 const collectCategoryOption = (value: string, previousValues: string[] | undefined): string[] => [
   ...(previousValues ?? []),
   value,
@@ -227,6 +257,24 @@ program
   .option("--no-color", "disable colored output (also honors NO_COLOR)")
   .action(versionAction);
 
+program
+  .command("stats")
+  .description("Rank agents/models by the React Doctor health of the code they wrote")
+  .option("--global", "include sessions from every repository (default: this repo only)")
+  .option("--since <date>", "only sessions modified on or after this date (e.g. 2026-06-01)")
+  .option("--limit <n>", "max sessions to analyze, newest first (default: 200)")
+  .option("--provider <name>", "only one source: claude, codex, or cursor")
+  .option("--json", "output a structured JSON leaderboard")
+  .option("-c, --cwd <cwd>", "working directory", process.cwd())
+  .option("--color", "force colored output")
+  .option("--no-color", "disable colored output (also honors NO_COLOR)")
+  .addHelpText("after", renderStatsHelpEpilog)
+  // stats redeclares --json/--cwd/--color, but the root program also exposes
+  // them as globals (e.g. --json for the default inspect command). Merge via
+  // optsWithGlobals() so a flag works whether it lands before or after the
+  // subcommand.
+  .action((_options, command) => statsAction(command.optsWithGlobals()));
+
 const rules = program
   .command("rules")
   .description("List, explain, and configure which React Doctor rules run");

diff --git a/packages/react-doctor/src/cli/utils/constants.ts b/packages/react-doctor/src/cli/utils/constants.ts
@@ -181,6 +181,9 @@ export const METRIC = {
   installDependency: "install.dependency",
   rulesChanged: "rules.changed",
   rulesQueried: "rules.queried",
+  // `react-doctor stats`: one counter per run (adoption), with the providers
+  // discovered and the number of agent sessions scored as attributes.
+  statsRun: "stats.run",
   // Editor language server (`react-doctor experimental-lsp`). Each workspace
   // scan burst is one wide-event span (op `lsp.scan`) plus these metrics.
   lspSessionStarted: "lsp.session.started",

diff --git a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
@@ -99,12 +99,22 @@ const WHY_FLAG_SPEC: CliFlagSpec = {
   shortOptionsWithRequiredValues: new Set(["-c"]),
 };
 
+// `stats` takes no positionals — just the scope/output options.
+const STATS_FLAG_SPEC: CliFlagSpec = {
+  longOptionsWithoutValues: new Set(["--color", "--global", "--help", "--json", "--no-color"]),
+  longOptionsWithRequiredValues: new Set(["--cwd", "--limit", "--provider", "--since"]),
+  longOptionsWithOptionalValues: new Set(),
+  shortOptionsWithoutValues: new Set(["-h"]),
+  shortOptionsWithRequiredValues: new Set(["-c"]),
+};
+
 const COMMAND_FLAG_SPECS = new Map<string, CliFlagSpec>([
   ["install", INSTALL_FLAG_SPEC],
   ["setup", INSTALL_FLAG_SPEC],
   ["version", VERSION_FLAG_SPEC],
   ["rules", RULES_FLAG_SPEC],
   ["why", WHY_FLAG_SPEC],
+  ["stats", STATS_FLAG_SPEC],
 ]);
 
 const isFlagLike = (argument: string): boolean => argument.startsWith("-") && argument !== "-";