Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
e955bdd
feat: scaffold security contacts worker (CM-1243)
mbani01 Jun 29, 2026
7bc5c4f
feat: security contacts worker scaffold, scoring and reconciliation (…
mbani01 Jun 29, 2026
55fee28
feat: add security-insights extractor (A1) for security contacts (CM-…
mbani01 Jun 29, 2026
9a851a6
feat: add SECURITY_CONTACTS extractor (A3) for security contacts
mbani01 Jun 29, 2026
351a6c9
feat: add security.txt extractor (A4) for security contacts
mbani01 Jun 29, 2026
6eb4137
feat: add SECURITY.md extractor (B1) for security contacts
mbani01 Jun 29, 2026
17ddd3d
feat: add npm registry contact extractor (B2/npm)
mbani01 Jun 29, 2026
2712873
feat: wire security contacts pipeline + DB write
mbani01 Jun 30, 2026
cd86897
chore: wire security-contacts-worker ops (CLI, compose, scripts, env)
mbani01 Jun 30, 2026
8cb71f6
chore: use unified github host
mbani01 Jun 30, 2026
efba8cd
fix: code review fixes
mbani01 Jul 1, 2026
f295c3a
chore: raise security contacts worker concurrency; hardcode tuning co…
mbani01 Jul 1, 2026
95dcf42
fix: code review fixes
mbani01 Jul 1, 2026
c50d5e9
fix: isolate per-repo failures in security contacts batch
mbani01 Jul 1, 2026
5f32ed1
fix: clear PVR reporting URL on disable; drain poison repos
mbani01 Jul 1, 2026
1cf1e69
Merge branch 'main' into feat/security_contacts_worker
mbani01 Jul 1, 2026
85cb09d
fix: incorrect SECURITY_CONTACTS_USER_AGENT value
mbani01 Jul 1, 2026
082f6a5
fix: skip PVR for archived repos; treat PVR 422 as unknown
mbani01 Jul 1, 2026
2ece757
fix: rate-limit-safe GitHub access for security contacts
mbani01 Jul 1, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backend/.env.dist.composed
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,6 @@ CROWD_PACKAGES_DB_PORT=5432
CROWD_PACKAGES_DB_USERNAME=postgres
CROWD_PACKAGES_DB_PASSWORD=example
CROWD_PACKAGES_DB_DATABASE=packages-db

# security-contacts-worker
SECURITY_CONTACTS_USER_AGENT="lfx-security-contacts-worker (security@linuxfoundation.org)"
3 changes: 3 additions & 0 deletions backend/.env.dist.local
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ ENRICHER_BATCH_SIZE=100
ENRICHER_REPO_UPDATE_INTERVAL_HOURS=24
ENRICHER_IDLE_SLEEP_SEC=60

# security-contacts-worker
SECURITY_CONTACTS_USER_AGENT="lfx-security-contacts-worker (security@linuxfoundation.org)"

OSSPCKGS_GCP_PROJECT=local-dev
OSSPCKGS_GCS_BUCKET=local-dev
OSSPCKGS_GCP_CREDENTIALS_B64=e30=
Expand Down
27 changes: 27 additions & 0 deletions backend/src/osspckgs/migrations/V1782950400__security_contacts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
CREATE TABLE IF NOT EXISTS security_contacts (
id BIGSERIAL PRIMARY KEY,
repo_id BIGINT NOT NULL REFERENCES repos(id) ON DELETE CASCADE,
channel TEXT NOT NULL,
value TEXT NOT NULL,
role TEXT NOT NULL,
name TEXT,
score NUMERIC(4,3) NOT NULL,
confidence TEXT NOT NULL,
provenance JSONB NOT NULL DEFAULT '[]',
last_refreshed TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);

CREATE UNIQUE INDEX IF NOT EXISTS security_contacts_repo_channel_value_uq
ON security_contacts (repo_id, channel, value);

CREATE INDEX IF NOT EXISTS security_contacts_repo_confidence_idx
ON security_contacts (repo_id, confidence);

ALTER TABLE repos ADD COLUMN IF NOT EXISTS pvr_enabled BOOL;
ALTER TABLE repos ADD COLUMN IF NOT EXISTS security_policy_url TEXT;
ALTER TABLE repos ADD COLUMN IF NOT EXISTS vulnerability_reporting_url TEXT;
ALTER TABLE repos ADD COLUMN IF NOT EXISTS bug_bounty_url TEXT;
ALTER TABLE repos ADD COLUMN IF NOT EXISTS security_txt_url TEXT;
ALTER TABLE repos ADD COLUMN IF NOT EXISTS contacts_last_refreshed TIMESTAMPTZ;
16 changes: 7 additions & 9 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion scripts/builders/packages.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
DOCKERFILE="./services/docker/Dockerfile.packages"
CONTEXT="../"
REPO="sjc.ocir.io/axbydjxa5zuh/packages"
SERVICES="github-repos-enricher bq-dataset-ingest npm-worker maven-worker osv-worker dockerhub-sync cargo-worker go-worker nuget-worker"
SERVICES="github-repos-enricher bq-dataset-ingest npm-worker maven-worker osv-worker dockerhub-sync cargo-worker go-worker nuget-worker security-contacts-worker"
65 changes: 65 additions & 0 deletions scripts/services/security-contacts-worker.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
version: '3.1'

x-env-args: &env-args
DOCKER_BUILDKIT: 1
NODE_ENV: docker
SERVICE: security-contacts-worker
SHELL: /bin/sh
SUPPRESS_NO_CONFIG_WARNING: 'true'
CROWD_TEMPORAL_TASKQUEUE: packages-worker

services:
security-contacts-worker:
build:
context: ../../
dockerfile: ./scripts/services/docker/Dockerfile.packages
command: 'pnpm run start:security-contacts-worker'
working_dir: /usr/crowd/app/services/apps/packages_worker
env_file:
- ../../backend/.env.dist.local
- ../../backend/.env.dist.composed
- ../../backend/.env.override.local
- ../../backend/.env.override.composed
environment:
<<: *env-args
restart: always
networks:
- crowd-bridge

security-contacts-worker-dev:
build:
context: ../../
dockerfile: ./scripts/services/docker/Dockerfile.packages
command: 'pnpm run dev:security-contacts-worker'
working_dir: /usr/crowd/app/services/apps/packages_worker
env_file:
- ../../backend/.env.dist.local
- ../../backend/.env.dist.composed
- ../../backend/.env.override.local
- ../../backend/.env.override.composed
environment:
<<: *env-args
hostname: security-contacts-worker
networks:
- crowd-bridge
volumes:
- ../../services/libs/audit-logs/src:/usr/crowd/app/services/libs/audit-logs/src
- ../../services/libs/common/src:/usr/crowd/app/services/libs/common/src
- ../../services/libs/common_services/src:/usr/crowd/app/services/libs/common_services/src
- ../../services/libs/data-access-layer/src:/usr/crowd/app/services/libs/data-access-layer/src
- ../../services/libs/database/src:/usr/crowd/app/services/libs/database/src
- ../../services/libs/integrations/src:/usr/crowd/app/services/libs/integrations/src
- ../../services/libs/logging/src:/usr/crowd/app/services/libs/logging/src
- ../../services/libs/nango/src:/usr/crowd/app/services/libs/nango/src
- ../../services/libs/opensearch/src:/usr/crowd/app/services/libs/opensearch/src
- ../../services/libs/queue/src:/usr/crowd/app/services/libs/queue/src
- ../../services/libs/redis/src:/usr/crowd/app/services/libs/redis/src
- ../../services/libs/snowflake/src:/usr/crowd/app/services/libs/snowflake/src
- ../../services/libs/telemetry/src:/usr/crowd/app/services/libs/telemetry/src
- ../../services/libs/temporal/src:/usr/crowd/app/services/libs/temporal/src
- ../../services/libs/types/src:/usr/crowd/app/services/libs/types/src
- ../../services/apps/packages_worker/src:/usr/crowd/app/services/apps/packages_worker/src

networks:
crowd-bridge:
external: true
5 changes: 5 additions & 0 deletions services/apps/packages_worker/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
"start:go-worker": "CROWD_TEMPORAL_TASKQUEUE=go-worker SERVICE=go-worker tsx src/bin/go-worker.ts",
"dev:go-worker": "CROWD_TEMPORAL_TASKQUEUE=go-worker SERVICE=go-worker LOG_LEVEL=trace nodemon --watch src --watch ../../libs --ext ts --exec tsx --inspect=0.0.0.0:9241 src/bin/go-worker.ts",
"dev:go-worker:local": "set -a && . ../../../backend/.env.dist.local && . ../../../backend/.env.override.local && set +a && CROWD_TEMPORAL_TASKQUEUE=go-worker SERVICE=go-worker LOG_LEVEL=trace nodemon --watch src --watch ../../libs --ext ts --exec tsx --inspect=0.0.0.0:9241 src/bin/go-worker.ts",
"start:security-contacts-worker": "CROWD_TEMPORAL_TASKQUEUE=packages-worker SERVICE=security-contacts-worker tsx src/bin/security-contacts-worker.ts",
"dev:security-contacts-worker": "CROWD_TEMPORAL_TASKQUEUE=packages-worker SERVICE=security-contacts-worker LOG_LEVEL=trace nodemon --watch src --watch ../../libs --ext ts --exec tsx --inspect=0.0.0.0:9243 src/bin/security-contacts-worker.ts",
"dev:security-contacts-worker:local": "set -a && . ../../../backend/.env.dist.local && . ../../../backend/.env.override.local && set +a && CROWD_TEMPORAL_TASKQUEUE=packages-worker SERVICE=security-contacts-worker LOG_LEVEL=trace nodemon --watch src --watch ../../libs --ext ts --exec tsx --inspect=0.0.0.0:9243 src/bin/security-contacts-worker.ts",
"start:nuget-worker": "CROWD_TEMPORAL_TASKQUEUE=nuget-worker SERVICE=nuget-worker tsx src/bin/nuget-worker.ts",
"dev:nuget-worker": "CROWD_TEMPORAL_TASKQUEUE=nuget-worker SERVICE=nuget-worker LOG_LEVEL=trace nodemon --watch src --watch ../../libs --ext ts --exec tsx --inspect=0.0.0.0:9242 src/bin/nuget-worker.ts",
"dev:nuget-worker:local": "set -a && . ../../../backend/.env.dist.local && . ../../../backend/.env.override.local && set +a && CROWD_TEMPORAL_TASKQUEUE=nuget-worker SERVICE=nuget-worker LOG_LEVEL=trace nodemon --watch src --watch ../../libs --ext ts --exec tsx --inspect=0.0.0.0:9242 src/bin/nuget-worker.ts",
Expand Down Expand Up @@ -72,6 +75,7 @@
"@temporalio/activity": "~1.17.2",
"@temporalio/client": "~1.17.2",
"@temporalio/workflow": "~1.17.2",
"js-yaml": "^4.1.1",
"jsonwebtoken": "^9.0.0",
"semver": "^7.6.0",
"axios": "^1.16.1",
Expand All @@ -83,6 +87,7 @@
"unzipper": "^0.12.3"
},
"devDependencies": {
"@types/js-yaml": "^4.0.9",
"@types/jsonwebtoken": "^9.0.0",
"@types/node": "^20.8.2",
"@types/pg-copy-streams": "^1.2.5",
Expand Down
1 change: 1 addition & 0 deletions services/apps/packages_worker/src/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ export {
} from './cargo/activities'
export { enrichGoVersionsBatch, enrichGoStatusBatch } from './go/activities'
export { processNuGetBatch } from './nuget/activities'
export { processSecurityContactsBatch } from './security-contacts/activities'
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { scheduleSecurityContactsIngestion } from '../security-contacts/schedule'
import { svc } from '../service'

setImmediate(async () => {
await svc.init()
await scheduleSecurityContactsIngestion()
await svc.start()
})
7 changes: 7 additions & 0 deletions services/apps/packages_worker/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ export function getEnricherConfig() {
}
}

export function getSecurityContactsConfig() {
return {
// Sent on all registry calls; crates.io rejects requests without an identifying UA.
userAgent: requireEnv('SECURITY_CONTACTS_USER_AGENT'),
}
}
Comment thread
mbani01 marked this conversation as resolved.
Comment on lines +49 to +54

export function getMavenConfig() {
return {
batchSize: requireEnvInt('MAVEN_FETCHER_BATCH_SIZE'),
Expand Down
56 changes: 56 additions & 0 deletions services/apps/packages_worker/src/enricher/installationPool.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import { getServiceChildLogger } from '@crowd/logging'

const log = getServiceChildLogger('installation-pool')

// Park an installation before GitHub starts rejecting — avoids a failed request + requeue
const PROACTIVE_PARK_REMAINING = 50

/** Round-robins over installations, skipping ones parked until their rate-limit reset. */
export class InstallationPool {
private readonly parkedUntil = new Map<number, number>()
private roundRobinIdx = 0

constructor(private readonly ids: number[]) {}

select(): { installationId: number; waitMs: number } {
const now = Date.now()
const n = this.ids.length

for (let i = 0; i < n; i++) {
const idx = (this.roundRobinIdx + i) % n
const id = this.ids[idx]
if ((this.parkedUntil.get(id) ?? 0) <= now) {
this.roundRobinIdx = (idx + 1) % n
return { installationId: id, waitMs: 0 }
}
}

let soonestReset = Infinity
let soonestId = this.ids[0]
for (const id of this.ids) {
const reset = this.parkedUntil.get(id) ?? 0
if (reset < soonestReset) {
soonestReset = reset
soonestId = id
}
}
return { installationId: soonestId, waitMs: Math.max(1_000, soonestReset - now) }
}

park(installationId: number, untilMs: number): void {
this.parkedUntil.set(installationId, untilMs)
}

parkIfBudgetLow(
installationId: number,
remaining: number | null | undefined,
resetAt: string | null | undefined,
): void {
if (remaining == null || resetAt == null || remaining >= PROACTIVE_PARK_REMAINING) return
this.park(installationId, new Date(resetAt).getTime() + 5_000)
log.info(
{ installationId, remaining, resetAt },
'Budget low — proactively parking installation',
)
}
}
55 changes: 1 addition & 54 deletions services/apps/packages_worker/src/enricher/runEnrichmentLoop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { getEnricherConfig } from '../config'
import { fetchActivitySnapshot } from './fetchActivitySnapshot'
import { fetchLightRepo, parseGithubUrl } from './fetchLightRepo'
import { GithubAppConfig, getInstallationToken } from './githubAppAuth'
import { InstallationPool } from './installationPool'
import { FetchError, LightRepoResult, RepoActivitySnapshot } from './types'
import { bulkUpdateEnrichedRepos, markReposSkipped } from './updateEnrichedRepos'
import { bulkUpsertRepoActivitySnapshot } from './updateRepoActivitySnapshot'
Expand All @@ -17,65 +18,11 @@ const DB_FETCH_SIZE = 2000
const WRITE_FLUSH_SIZE = 500
const WRITE_FLUSH_MS = 5000
const MAX_FLUSH_FAILURES = 3
// Park an installation before GitHub starts rejecting — avoids a failed request + requeue
const PROACTIVE_PARK_REMAINING = 50
// Rate-limited snapshots retry once with another installation before being skipped
const SNAPSHOT_RATE_LIMIT_RETRIES = 1
// Installations whose token mint fails (e.g. org IP allowlist) sit out for an hour
const MINT_FAILURE_PARK_MS = 60 * 60 * 1000

// ─── Installation pool ────────────────────────────────────────────────────────

/** Round-robins over installations, skipping ones parked until their rate-limit reset. */
class InstallationPool {
private readonly parkedUntil = new Map<number, number>()
private roundRobinIdx = 0

constructor(private readonly ids: number[]) {}

select(): { installationId: number; waitMs: number } {
const now = Date.now()
const n = this.ids.length

for (let i = 0; i < n; i++) {
const idx = (this.roundRobinIdx + i) % n
const id = this.ids[idx]
if ((this.parkedUntil.get(id) ?? 0) <= now) {
this.roundRobinIdx = (idx + 1) % n
return { installationId: id, waitMs: 0 }
}
}

let soonestReset = Infinity
let soonestId = this.ids[0]
for (const id of this.ids) {
const reset = this.parkedUntil.get(id) ?? 0
if (reset < soonestReset) {
soonestReset = reset
soonestId = id
}
}
return { installationId: soonestId, waitMs: Math.max(1_000, soonestReset - now) }
}

park(installationId: number, untilMs: number): void {
this.parkedUntil.set(installationId, untilMs)
}

parkIfBudgetLow(
installationId: number,
remaining: number | null | undefined,
resetAt: string | null | undefined,
): void {
if (remaining == null || resetAt == null || remaining >= PROACTIVE_PARK_REMAINING) return
this.park(installationId, new Date(resetAt).getTime() + 5_000)
log.info(
{ installationId, remaining, resetAt },
'Budget low — proactively parking installation',
)
}
}

// ─── Fetch with retries ───────────────────────────────────────────────────────

type FetchOutcome =
Expand Down
17 changes: 17 additions & 0 deletions services/apps/packages_worker/src/security-contacts/activities.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { getServiceChildLogger } from '@crowd/logging'

import { getSecurityContactsConfig } from '../config'
import { getPackagesDb } from '../db'

import { BatchResult, processBatch } from './processBatch'

const log = getServiceChildLogger('security-contacts-activity')

export async function processSecurityContactsBatch(): Promise<BatchResult> {
const config = getSecurityContactsConfig()
const qx = await getPackagesDb()

const result = await processBatch(qx, config)
log.info({ ...result }, 'Security contacts batch activity complete')
return result
}
Loading
Loading