Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions .github/workflows/check-python-code.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@ on:
- 'packages/**'
- 'pyproject.toml'
- 'uv.lock'
- 'Makefile'
- '.github/workflows/check-python-code.yaml'
push:
branches: [main, dev]
paths:
- 'packages/**'
- 'pyproject.toml'
- 'uv.lock'
- 'Makefile'
- '.github/workflows/check-python-code.yaml'

permissions:
contents: read
Expand All @@ -31,7 +35,8 @@ jobs:
# Default resolution exercises the committed lock against every
# supported Python minor version. The lowest-direct cell pins each
# direct dependency to its declared floor (see UV_RESOLUTION below)
# and runs only on the Python floor.
# and runs only on the Python floor, since the resolved-low pyspark
# 3.4 wheels exist for 3.10/3.11 only.
python: ["3.10", "3.11", "3.12", "3.13", "3.14"]
resolution: [default]
include:
Expand All @@ -56,9 +61,19 @@ jobs:
with:
python-version: ${{ matrix.python }}

# PySpark 3.4 (the declared minimum) does not support Java 21, which is
# the default JDK on ubuntu-latest runners. Pin to Java 17 for the
# lowest-direct cell so the resolved pyspark==3.4.0 can actually start.
- name: Set up JDK 17
if: matrix.resolution == 'lowest-direct'
uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0
with:
distribution: temurin
java-version: '17'

# UV_RESOLUTION=lowest-direct makes `uv sync` re-resolve every direct
# dependency to the lowest version permitted by pyproject.toml. This
# exercises the declared floor (e.g. pydantic==2.12.0) instead of
# exercises the declared floor (e.g. pyspark==3.4.0) instead of
# whatever the committed lock happens to point at. Failures here mean
# a direct dep's minimum needs to be bumped. Set via GITHUB_ENV only
# in the relevant cell so default cells run with no UV_RESOLUTION at
Expand All @@ -69,3 +84,14 @@ jobs:

- name: Run make check
run: make check

# `make check` regenerates the committed PySpark output in place. If a PR
# changed the schema or codegen without committing the regenerated files,
# the working tree now differs from what was committed -- fail loudly so
# stale generated output cannot pass CI.
- name: Verify generated PySpark output is committed
run: |
git diff --exit-code -- \
packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated \
packages/overture-schema-pyspark/tests/generated \
|| { echo "::error::Generated PySpark output is stale. Run 'make generate-pyspark' and commit."; exit 1; }
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
docs/docusaurus
__pycache__/
.coverage
.testmondata*
36 changes: 31 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,23 +1,49 @@
.PHONY: default uv-sync check test-all test test-only docformat doctest doctest-only mypy mypy-only lint-only update-baselines
.PHONY: default uv-sync clean-pyspark generate-pyspark verify-pyspark-generated check test-all test test-only docformat doctest doctest-only mypy mypy-only lint-only update-baselines

TESTMON ?= --testmon

default: test-all

install: uv-sync

uv-sync:
@uv sync --all-packages 2> /dev/null
@uv sync --all-packages --all-extras -q

PYSPARK_EXPRESSIONS := packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated
PYSPARK_GENERATED_TESTS := packages/overture-schema-pyspark/tests/generated

clean-pyspark:
@rm -rf $(PYSPARK_EXPRESSIONS) $(PYSPARK_GENERATED_TESTS)

check: uv-sync
generate-pyspark: uv-sync clean-pyspark
@uv run overture-codegen generate --format pyspark \
--output-dir $(PYSPARK_EXPRESSIONS) \
--test-output-dir $(PYSPARK_GENERATED_TESTS)
@uv run ruff check --fix --quiet $(PYSPARK_EXPRESSIONS) $(PYSPARK_GENERATED_TESTS)
@uv run ruff format --quiet $(PYSPARK_EXPRESSIONS) $(PYSPARK_GENERATED_TESTS)

check: uv-sync generate-pyspark
@$(MAKE) -j test-only doctest-only lint-only mypy-only

# Regenerate and fail if the committed generated output differs. Catches PRs
# that change the schema or codegen without committing the regenerated files --
# `check` itself regenerates, so without this guard stale committed output is
# silently overwritten before the tests run and never verified.
verify-pyspark-generated: generate-pyspark
@git diff --exit-code -- $(PYSPARK_EXPRESSIONS) $(PYSPARK_GENERATED_TESTS) \
|| { echo "Generated PySpark output is stale; run 'make generate-pyspark' and commit."; exit 1; }

# test-all is the unconditional full run -- testmon-independent, unlike the
# incremental test/test-only targets -- so data-only changes (golden JSON,
# [[examples]]) that testmon cannot see still get exercised.
test-all: uv-sync
@uv run pytest -W error packages/

test: uv-sync
@uv run pytest -W error packages/ -x -q --tb=short
@uv run pytest -W error $(TESTMON) packages/ -x -q --tb=short

test-only:
@uv run pytest -W error packages/ -x -q --tb=short
@uv run pytest -W error $(TESTMON) packages/ -x -q --tb=short

coverage: uv-sync
@uv run pytest packages/ --cov overture.schema --cov-report=term --cov-report=html && open htmlcov/index.html
Expand Down
4 changes: 2 additions & 2 deletions packages/overture-schema-codegen/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ Rendering Output formatting, all presentation decisions
^
Output Layout What to generate, where it goes, how outputs link
^
Extraction TypeInfo, FieldSpec, ModelSpec, UnionSpec
Extraction TypeInfo, FieldSpec, RecordSpec, UnionSpec
^
Discovery discover_models() from overture-schema-common
```

**Discovery** loads registered Pydantic models via entry points. The return dict
includes both concrete `BaseModel` subclasses (like `Building`) and discriminated union
type aliases (like `Segment`). Both satisfy the `FeatureSpec` protocol and flow through
type aliases (like `Segment`). Both satisfy the `ModelSpec` protocol and flow through
the same pipeline.

**Extraction** unwraps type annotations into specs. `analyze_type()` is the central
Expand Down
Loading
Loading