From bc5d6811f63397cd74362ffb4ba5336f526f4b01 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Tue, 28 Apr 2026 15:35:50 -0700 Subject: [PATCH 01/11] chore: enable testmon for incremental test runs pytest-testmon tracks which tests cover which source files and skips unaffected tests on subsequent runs. Activated via a TESTMON Makefile variable so the default `make check` uses incremental selection while `make check TESTMON=` runs the full suite. Lock the dependency in the dev group, gitignore the local cache file, and thread $(TESTMON) through the test, test-all, and test-only targets. Signed-off-by: Seth Fitzsimmons --- .gitignore | 1 + Makefile | 8 +++++--- pyproject.toml | 1 + uv.lock | 15 +++++++++++++++ 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 6a40dfafd..26c73e5f7 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ docs/docusaurus __pycache__/ .coverage +.testmondata* diff --git a/Makefile b/Makefile index edea9b34d..a88a9917e 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,7 @@ .PHONY: default uv-sync check test-all test test-only docformat doctest doctest-only mypy mypy-only lint-only update-baselines +TESTMON ?= --testmon + default: test-all install: uv-sync @@ -11,13 +13,13 @@ check: uv-sync @$(MAKE) -j test-only doctest-only lint-only mypy-only test-all: uv-sync - @uv run pytest -W error packages/ + @uv run pytest -W error $(TESTMON) packages/ test: uv-sync - @uv run pytest -W error packages/ -x -q --tb=short + @uv run pytest -W error $(TESTMON) packages/ -x -q --tb=short test-only: - @uv run pytest -W error packages/ -x -q --tb=short + @uv run pytest -W error $(TESTMON) packages/ -x -q --tb=short coverage: uv-sync @uv run pytest packages/ --cov overture.schema --cov-report=term --cov-report=html && open htmlcov/index.html diff --git a/pyproject.toml b/pyproject.toml index 6046d76a6..c21f4bc17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ dev = [ "pydocstyle>=6.3.0", "pytest>=9.0.0", "pytest-cov>=7.0.0", + "pytest-testmon>=2.2.0", "ruff>=0.13.0", ] diff --git a/uv.lock b/uv.lock index 0bbdf1cda..e22235af7 100644 --- a/uv.lock +++ b/uv.lock @@ -934,6 +934,7 @@ dev = [ { name = "pydocstyle" }, { name = "pytest" }, { name = "pytest-cov" }, + { name = "pytest-testmon" }, { name = "ruff" }, ] @@ -946,6 +947,7 @@ dev = [ { name = "pydocstyle", specifier = ">=6.3.0" }, { name = "pytest", specifier = ">=9.0.0" }, { name = "pytest-cov", specifier = ">=7.0.0" }, + { name = "pytest-testmon", specifier = ">=2.2.0" }, { name = "ruff", specifier = ">=0.13.0" }, ] @@ -1191,6 +1193,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, ] +[[package]] +name = "pytest-testmon" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/1d/3e4230cc67cd6205bbe03c3527500c0ccaf7f0c78b436537eac71590ee4a/pytest_testmon-2.2.0.tar.gz", hash = "sha256:01f488e955ed0e0049777bee598bf1f647dd524e06f544c31a24e68f8d775a51", size = 23108, upload-time = "2025-12-01T07:30:24.76Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/55/ebb3c2f59fb089f08d00f764830d35780fc4e4c41dffcadafa3264682b65/pytest_testmon-2.2.0-py3-none-any.whl", hash = "sha256:2604ca44a54d61a2e830d9ce828b41a837075e4ebc1f81b148add8e90d34815b", size = 25199, upload-time = "2025-12-01T07:30:23.623Z" }, +] + [[package]] name = "pyyaml" version = "6.0.3" From 5c792399f1b207e540bba71f163298b736fa95d3 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Thu, 7 May 2026 12:12:59 -0700 Subject: [PATCH 02/11] refactor(core): extract VehicleSelectorBase Pull the shared `dimension` and `comparison` fields of the five vehicle selector subtypes into a `VehicleSelectorBase` parent, and thread `discriminator="dimension"` through the `VehicleSelector` annotated union. The discriminator turns the union into a Pydantic discriminated union, so it serializes as JSON Schema's `oneOf` + `discriminator` rather than `anyOf`. Regenerated segment_baseline_schema.json captures the new shape. This is a prerequisite for downstream tooling that walks discriminated unions structurally (e.g. PySpark codegen for segment's nested vehicle scoping). Signed-off-by: Seth Fitzsimmons --- .../overture/schema/common/scoping/vehicle.py | 46 +++++++++--------- .../tests/segment_baseline_schema.json | 48 +++++++++++++++---- 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/packages/overture-schema-common/src/overture/schema/common/scoping/vehicle.py b/packages/overture-schema-common/src/overture/schema/common/scoping/vehicle.py index 801d35aa3..287b25c55 100644 --- a/packages/overture-schema-common/src/overture/schema/common/scoping/vehicle.py +++ b/packages/overture-schema-common/src/overture/schema/common/scoping/vehicle.py @@ -38,24 +38,30 @@ class VehicleRelation(str, Enum): @no_extra_fields -class VehicleAxleCountSelector(BaseModel): +class VehicleSelectorBase(BaseModel): """ - Selects vehicles based on the number of axles they have. + Common fields shared by all vehicle selector subtypes. + + See also: `VehicleSelector`. """ - dimension: Literal[VehicleDimension.AXLE_COUNT] + dimension: VehicleDimension comparison: VehicleRelation + + +@no_extra_fields +class VehicleAxleCountSelector(VehicleSelectorBase): + """Selects vehicles based on the number of axles they have.""" + + dimension: Literal[VehicleDimension.AXLE_COUNT] value: uint8 = Field(description="Number of axles on the vehicle") @no_extra_fields -class VehicleHeightSelector(BaseModel): - """ - Selects vehicles based on their height. - """ +class VehicleHeightSelector(VehicleSelectorBase): + """Selects vehicles based on their height.""" dimension: Literal[VehicleDimension.HEIGHT] - comparison: VehicleRelation value: Annotated[ float64, Field( @@ -66,13 +72,10 @@ class VehicleHeightSelector(BaseModel): @no_extra_fields -class VehicleLengthSelector(BaseModel): - """ - Selects vehicles based on their length. - """ +class VehicleLengthSelector(VehicleSelectorBase): + """Selects vehicles based on their length.""" dimension: Literal[VehicleDimension.LENGTH] - comparison: VehicleRelation value: Annotated[ float64, Field( @@ -83,13 +86,10 @@ class VehicleLengthSelector(BaseModel): @no_extra_fields -class VehicleWeightSelector(BaseModel): - """ - Selects vehicles based on their weight. - """ +class VehicleWeightSelector(VehicleSelectorBase): + """Selects vehicles based on their weight.""" dimension: Literal[VehicleDimension.WEIGHT] - comparison: VehicleRelation value: Annotated[ float64, Field( @@ -100,13 +100,10 @@ class VehicleWeightSelector(BaseModel): @no_extra_fields -class VehicleWidthSelector(BaseModel): - """ - Selects vehicles based on their width. - """ +class VehicleWidthSelector(VehicleSelectorBase): + """Selects vehicles based on their width.""" dimension: Literal[VehicleDimension.WIDTH] - comparison: VehicleRelation value: Annotated[ float64, Field( @@ -123,7 +120,8 @@ class VehicleWidthSelector(BaseModel): | VehicleWeightSelector | VehicleWidthSelector, Field( - description="Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count." + discriminator="dimension", + description="Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count.", ), ] """ diff --git a/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json b/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json index 4ec108313..20f144003 100644 --- a/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json +++ b/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json @@ -1862,7 +1862,18 @@ "vehicle": { "description": "A list of one or more vehicle parameters that limit the vehicles the containing AccessRestrictionRule applies to.", "items": { - "anyOf": [ + "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count.", + "discriminator": { + "mapping": { + "axle_count": "#/$defs/VehicleAxleCountSelector", + "height": "#/$defs/VehicleHeightSelector", + "length": "#/$defs/VehicleLengthSelector", + "weight": "#/$defs/VehicleWeightSelector", + "width": "#/$defs/VehicleWidthSelector" + }, + "propertyName": "dimension" + }, + "oneOf": [ { "$ref": "#/$defs/VehicleAxleCountSelector" }, @@ -1878,8 +1889,7 @@ { "$ref": "#/$defs/VehicleWidthSelector" } - ], - "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count." + ] }, "minItems": 1, "title": "Vehicle", @@ -2025,7 +2035,18 @@ "vehicle": { "description": "A list of one or more vehicle parameters that limit the vehicles the containing ProhibitedTransitionRule applies to.", "items": { - "anyOf": [ + "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count.", + "discriminator": { + "mapping": { + "axle_count": "#/$defs/VehicleAxleCountSelector", + "height": "#/$defs/VehicleHeightSelector", + "length": "#/$defs/VehicleLengthSelector", + "weight": "#/$defs/VehicleWeightSelector", + "width": "#/$defs/VehicleWidthSelector" + }, + "propertyName": "dimension" + }, + "oneOf": [ { "$ref": "#/$defs/VehicleAxleCountSelector" }, @@ -2041,8 +2062,7 @@ { "$ref": "#/$defs/VehicleWidthSelector" } - ], - "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count." + ] }, "minItems": 1, "title": "Vehicle", @@ -2173,7 +2193,18 @@ "vehicle": { "description": "A list of one or more vehicle parameters that limit the vehicles the containing SpeedLimitRule applies to.", "items": { - "anyOf": [ + "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count.", + "discriminator": { + "mapping": { + "axle_count": "#/$defs/VehicleAxleCountSelector", + "height": "#/$defs/VehicleHeightSelector", + "length": "#/$defs/VehicleLengthSelector", + "weight": "#/$defs/VehicleWeightSelector", + "width": "#/$defs/VehicleWidthSelector" + }, + "propertyName": "dimension" + }, + "oneOf": [ { "$ref": "#/$defs/VehicleAxleCountSelector" }, @@ -2189,8 +2220,7 @@ { "$ref": "#/$defs/VehicleWidthSelector" } - ], - "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count." + ] }, "minItems": 1, "title": "Vehicle", From 8b4e0f532f1cd01d458eb515e09fb62fc4ec4dd3 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Thu, 7 May 2026 23:58:32 -0700 Subject: [PATCH 03/11] chore(themes): refresh example data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the Tonga-based Division/DivisionArea/DivisionBoundary fixtures with Kauaʻi County samples that exercise admin_level, capital_division_ids, wikidata, and source license alongside the existing fields. Replace the Tonga-based Connector/Segment fixtures with a Vermooten Street junction in Pretoria that exercises access_restrictions with when.vehicle, speed_limits with when.heading, routes with ref, road_surface, and multi-source attribution. Reformat the TOML with 4-space indents and sorted keys to match sibling theme packages. Signed-off-by: Seth Fitzsimmons --- .../pyproject.toml | 143 +++++++++-------- .../pyproject.toml | 151 ++++++++++++++---- 2 files changed, 203 insertions(+), 91 deletions(-) diff --git a/packages/overture-schema-divisions-theme/pyproject.toml b/packages/overture-schema-divisions-theme/pyproject.toml index 0314d8d1b..ccec15fd7 100644 --- a/packages/overture-schema-divisions-theme/pyproject.toml +++ b/packages/overture-schema-divisions-theme/pyproject.toml @@ -1,6 +1,6 @@ [project] maintainers = [ - {name = "Overture Maps Schema Working Group"}, + { name = "Overture Maps Schema Working Group" }, ] dependencies = [ "overture-schema-common", @@ -42,111 +42,126 @@ division_boundary = "overture.schema.divisions:DivisionBoundary" overture_baselines = "overture.schema.system.testing.plugin" [[examples.Division]] -id = "350e85f6-68ba-4114-9906-c2844815988b" -geometry = "POINT (-175.2551522 -21.1353686)" -country = "TO" -version = 1 -subtype = "locality" -class = "village" -region = "TO-04" +admin_level = 2 +capital_division_ids = ["958c67d3-868a-482d-9dd8-1c65b0c1a9e8"] +country = "US" +geometry = "POINT (-159.4945109 22.0557204)" hierarchies = [ - [ - {division_id = "fef8748b-0c91-46ad-9f2d-976d8d2de3e9", subtype = "country", name = "Tonga"}, - {division_id = "4d67561a-2292-41bd-8996-7853d276a42c", subtype = "region", name = "Tongatapu"}, - {division_id = "8730f0cc-d436-4f11-a7d3-49085813ef44", subtype = "county", name = "Vahe Kolomotu'a"}, - {division_id = "350e85f6-68ba-4114-9906-c2844815988b", subtype = "locality", name = "Sia'atoutai"}, - ], + [ + { division_id = "f39eb4af-5206-481b-b19e-bd784ded3f05", subtype = "country", name = "United States" }, + { division_id = "a2a08395-e968-4be5-bdd7-7f63db8c2165", subtype = "region", name = "Hawaii" }, + { division_id = "c9b8adc9-4639-4392-8efe-8401eeb19929", subtype = "county", name = "Kauaʻi County" }, + ], ] -parent_division_id = "8730f0cc-d436-4f11-a7d3-49085813ef44" -population = 534 +id = "c9b8adc9-4639-4392-8efe-8401eeb19929" +parent_division_id = "a2a08395-e968-4be5-bdd7-7f63db8c2165" +population = 71735 +region = "US-HI" +subtype = "county" theme = "divisions" type = "division" +version = 5 +wikidata = "Q111517" [examples.Division.bbox] -xmin = -175.25515747070312 -xmax = -175.255126953125 -ymin = -21.1353702545166 -ymax = -21.13536834716797 +xmax = -159.4945068359375 +xmin = -159.49453735351562 +ymax = 22.055721282958984 +ymin = 22.05571746826172 [[examples.Division.sources]] -property = "" dataset = "OpenStreetMap" -record_id = "n3173231082@4" -update_time = "2014-12-18T09:17:03Z" - -[examples.Division.cartography] -prominence = 29 +license = "ODbL-1.0" +property = "" +record_id = "r166560@15" +update_time = "2025-08-16T14:25:55Z" [examples.Division.names] -primary = "Sia'atoutai" +primary = "Kauaʻi County" + +[examples.Division.names.common] +haw = "kalana Kauaʻi" [[examples.Division.names.rules]] +value = "Kauai County" variant = "alternate" -value = "Nafualu" [examples.Division.local_type] -en = "village" +en = "county" [[examples.DivisionArea]] -id = "eb9b112f-ec3c-47f7-b519-6f9f2e6fc2bd" -geometry = "MULTIPOLYGON (((-174.9553949 -21.4730179, -174.9514163 -21.4719978, -174.9520108 -21.4681253, -174.9566122 -21.4687535, -174.9553949 -21.4730179)), ((-174.9634398 -21.3476807, -174.9753507 -21.3833656, -174.9702168 -21.4037277, -174.950488 -21.4269887, -174.9082983 -21.4577763, -174.9004303 -21.4398142, -174.9048159 -21.3698688, -174.9165467 -21.3035402, -174.9126977 -21.2903268, -174.9199765 -21.2834922, -174.9634398 -21.3476807)))" -country = "TO" -version = 2 -subtype = "region" +admin_level = 2 class = "land" +country = "US" +division_id = "c9b8adc9-4639-4392-8efe-8401eeb19929" +geometry = "MULTIPOLYGON (((-160.5408313 21.6535414, -160.544491 21.6514764, -160.5369253 21.6495783, -160.5408313 21.6535414)), ((-160.0921364 22.005401, -160.1251483 21.9547066, -160.2260792 21.8900136, -160.2470031 21.841157, -160.2339532 21.7921394, -160.2022486 21.7794359, -160.1588705 21.864646, -160.0737812 21.8957962, -160.0826438 21.9302968, -160.05025 21.9848306, -160.0921364 22.005401)), ((-160.0881165 22.0243325, -160.1005193 22.0287469, -160.1026828 22.0166419, -160.0881165 22.0243325)), ((-159.7508422 21.9762242, -159.6679712 21.9531973, -159.604686 21.8923573, -159.5913126 21.9041661, -159.443465 21.8684403, -159.3461084 21.9373297, -159.3624768 21.952083, -159.3309486 21.9590284, -159.3357151 22.0450997, -159.2964497 22.1053479, -159.2926896 22.1434001, -159.335189 22.2040351, -159.4024533 22.2327054, -159.4282799 22.2164343, -159.4864746 22.2298148, -159.5060386 22.2031539, -159.5808829 22.2237075, -159.7228708 22.1497995, -159.7443872 22.099889, -159.7826601 22.0669155, -159.7849578 22.0149525, -159.7508422 21.9762242)))" +id = "109cfa53-bb13-4e37-aeb0-14f9a08c737d" is_land = true is_territorial = false -region = "TO-01" -division_id = "21597af0-b564-463c-a356-42c29e712b7d" +region = "US-HI" +subtype = "county" theme = "divisions" type = "division_area" +version = 7 [examples.DivisionArea.bbox] -xmin = -174.97535705566406 -xmax = -174.90040588378906 -ymin = -21.473018646240234 -ymax = -21.283489227294922 +xmax = -159.29266357421875 +xmin = -160.54449462890625 +ymax = 22.23270606994629 +ymin = 21.649578094482422 [[examples.DivisionArea.sources]] -property = "" dataset = "OpenStreetMap" -record_id = "r7247527@3" -update_time = "2020-12-30T18:41:56Z" +license = "ODbL-1.0" +property = "" +record_id = "r166560@15" +update_time = "2025-08-16T14:25:55Z" [examples.DivisionArea.names] -primary = "ʻEua" +primary = "Kauaʻi County" + +[examples.DivisionArea.names.common] +haw = "kalana Kauaʻi" + +[[examples.DivisionArea.names.rules]] +value = "Kauai County" +variant = "alternate" [[examples.DivisionBoundary]] -id = "2bdf68e4-860d-3d8c-a472-ccf439a5302a" -geometry = "LINESTRING (-147.064823 -15.4231537, -147.0519131 -15.2885069, -147.048482 -15.1511701)" -country = "PF" -version = 1 -subtype = "county" -class = "maritime" -is_land = false -is_territorial = true +admin_level = 2 +class = "land" +country = "US" division_ids = [ - "ae266459-63a4-4508-8295-0101e27d039b", - "d4a6873d-885a-4f2a-bc0f-37e9d9e874e4" + "a546a5ef-ba43-44ce-a7c3-1999b005b20f", + "0554d43c-de68-433c-9a6c-db58161c8b4a", ] +geometry = "LINESTRING (-157.0195397035514 21.183903480793326, -157.0201257 21.180663, -157.0084983 21.1786971, -157.0053877 21.1769627, -157.0060747 21.1738084, -157.0003054 21.1722773, -156.9809248 21.1746356, -156.9710241 21.172431, -156.9670723 21.1613703, -156.9550334 21.1531141, -156.9552315 21.1564667, -156.9607515 21.1621168, -156.9584334 21.162083, -156.9598705 21.1636745, -156.958371 21.1708964, -156.9602176 21.1762534, -156.9577173 21.1760764, -156.9556416 21.1715505, -156.9554893 21.157221, -156.9528625 21.1523977, -156.9442066 21.1504038, -156.9389218 21.1568278, -156.938089 21.1530661, -156.9348977 21.15157, -156.9283241 21.1419085, -156.9262384 21.1360023, -156.9186457 21.1279636, -156.9124605 21.1262614, -156.8975854 21.1285336, -156.8979892 21.1337635, -156.8958138 21.1376515, -156.8970853 21.1421243, -156.8929598 21.1451006, -156.8928438 21.1467406, -156.897116 21.1457141, -156.9036748 21.1485179, -156.9059125 21.1527496, -156.910528 21.1563906, -156.917332 21.1590391, -156.9140393 21.1691869)" +id = "321ed3ec-23c2-376e-a4fd-0df484cb99ca" is_disputed = false +is_land = true +is_territorial = false +region = "US-HI" +subtype = "county" theme = "divisions" type = "division_boundary" +version = 2 [examples.DivisionBoundary.bbox] -xmin = -147.06483459472656 -xmax = -147.04847717285156 -ymin = -15.4231538772583 -ymax = -15.151169776916504 +xmax = -156.89283752441406 +xmin = -157.02012634277344 +ymax = 21.18390655517578 +ymin = 21.12626075744629 [[examples.DivisionBoundary.sources]] -property = "" dataset = "OpenStreetMap" -record_id = "r6063055@9" -update_time = "2023-07-20T00:28:40Z" +license = "ODbL-1.0" +property = "" +record_id = "r166564@15" +update_time = "2025-02-21T16:35:23Z" [[examples.DivisionBoundary.sources]] -property = "" dataset = "OpenStreetMap" -record_id = "r6063063@12" -update_time = "2023-07-20T00:28:40Z" +license = "ODbL-1.0" +property = "" +record_id = "r166561@26" +update_time = "2025-08-16T14:01:24Z" diff --git a/packages/overture-schema-transportation-theme/pyproject.toml b/packages/overture-schema-transportation-theme/pyproject.toml index 547b54401..51614e4ae 100644 --- a/packages/overture-schema-transportation-theme/pyproject.toml +++ b/packages/overture-schema-transportation-theme/pyproject.toml @@ -1,6 +1,6 @@ [project] maintainers = [ - {name = "Overture Maps Schema Working Group"}, + { name = "Overture Maps Schema Working Group" }, ] dependencies = [ "overture-schema-common", @@ -41,62 +41,159 @@ segment = "overture.schema.transportation:Segment" [project.entry-points.pytest11] overture_baselines = "overture.schema.system.testing.plugin" +# Connector: Vermooten Street junction, Pretoria, South Africa (2026-02-18.0) [[examples.Connector]] -id = "39542bee-230f-4b91-b7e5-a9b58e0c59b1" -geometry = "POINT (-176.5472979 -43.9679472)" -version = 1 +geometry = "POINT (30.048398 -25.708697)" +id = "73a46c48-dc5a-4162-b9c8-1643298784c3" theme = "transportation" type = "connector" +version = 1 [examples.Connector.bbox] -xmin = -176.54730224609375 -xmax = -176.54727172851562 -ymin = -43.96794891357422 -ymax = -43.96794128417969 +xmax = 30.04840087890625 +xmin = 30.048397064208984 +ymax = -25.708696365356445 +ymin = -25.70870018005371 [[examples.Connector.sources]] -property = "" dataset = "OpenStreetMap" +license = "ODbL-1.0" +property = "" +record_id = "n252436807@6" +update_time = "2025-01-06T20:44:06Z" +# Road segment: Vermooten Street / R33, Pretoria, South Africa (2026-02-18.0) +# Populates access_restrictions with when.vehicle, speed_limits with +# when.heading, routes with ref, road_surface, and names. [[examples.Segment]] -id = "1bc62f3b-08b5-42b8-89fe-36f685f60455" -geometry = "LINESTRING (-176.5636191 -43.954404, -176.5643637 -43.9538145, -176.5647264 -43.9535274, -176.5649947 -43.953251)" -version = 1 +class = "primary" +geometry = "LINESTRING (30.048398 -25.708697, 30.0485458 -25.708892, 30.0487074 -25.7090728, 30.0488875 -25.709252, 30.049138 -25.7094697, 30.0493666 -25.7096603, 30.0497209 -25.7099369, 30.0509508 -25.710904, 30.0511567 -25.7110786, 30.0515268 -25.7113855, 30.0518399 -25.711661, 30.052143 -25.7119631, 30.0523513 -25.7121619, 30.0526875 -25.7124827, 30.0531992 -25.7129799, 30.0535874 -25.7133575)" +id = "621e0a00-9466-4c3f-bb4a-64a83cd7a934" subtype = "road" -class = "residential" theme = "transportation" type = "segment" +version = 4 [examples.Segment.bbox] -xmin = -176.5650177001953 -xmax = -176.56361389160156 -ymin = -43.954410552978516 -ymax = -43.953250885009766 +xmax = 30.05359 +xmin = 30.048397 +ymax = -25.708696 +ymin = -25.713358 [[examples.Segment.sources]] -property = "" dataset = "OpenStreetMap" -record_id = "w53435546@6" -update_time = "2021-05-03T06:37:03Z" +license = "ODbL-1.0" +property = "/routes" +record_id = "r1808544@180" + +[[examples.Segment.sources]] +dataset = "OpenStreetMap" +license = "ODbL-1.0" +property = "/routes" +record_id = "r1808545@177" + +[[examples.Segment.sources]] +dataset = "OpenStreetMap" +license = "ODbL-1.0" +property = "" +record_id = "w338134264@15" +update_time = "2025-04-21T09:53:35Z" [examples.Segment.names] -primary = "Meteorological Lane" +primary = "Vermooten Street" [[examples.Segment.names.rules]] +value = "Vermooten Street" variant = "common" -value = "Meteorological Lane" [[examples.Segment.connectors]] -connector_id = "15b2c131-9137-4add-88c6-2acd3fa61355" at = 0.0 +connector_id = "73a46c48-dc5a-4162-b9c8-1643298784c3" + +[[examples.Segment.connectors]] +at = 0.154695182 +connector_id = "e81188ed-9b2f-48b4-99f2-d894044d88f5" [[examples.Segment.connectors]] -connector_id = "23ae2702-ef77-4d2e-b39d-77360b696d20" -at = 0.523536154 +at = 0.483463065 +connector_id = "11124794-8830-4aff-bd09-f578d3a196b1" + +[[examples.Segment.connectors]] +at = 0.753014135 +connector_id = "cfda5f80-ffe6-4b5c-b219-4f208e0a3832" [[examples.Segment.connectors]] -connector_id = "8e944ce1-4b81-49eb-a823-7d98779c855c" at = 1.0 +connector_id = "a5871213-947e-4342-b486-f560f0ac22f3" + +[[examples.Segment.access_restrictions]] +access_type = "denied" + +[examples.Segment.access_restrictions.when] + +[[examples.Segment.access_restrictions.when.vehicle]] +comparison = "greater_than" +dimension = "height" +unit = "m" +value = 5.2 + +[[examples.Segment.speed_limits]] + +[examples.Segment.speed_limits.max_speed] +unit = "km/h" +value = 60 + +[examples.Segment.speed_limits.when] +heading = "forward" [[examples.Segment.road_surface]] -value = "gravel" +value = "paved" + +[[examples.Segment.routes]] +name = "R33 (northbound)" +network = "za:regional" +ref = "R33" + +[[examples.Segment.routes]] +name = "R33 (southbound)" +network = "za:regional" +ref = "R33" + +# Rail segment: disused railway, Mpulungu, Zambia (2026-02-18.0) +# Populates rail_flags with values to cover the rail_flags[].values xfail. +[[examples.Segment]] +class = "unknown" +geometry = "LINESTRING (30.9844394 -12.7185733, 30.9818611 -12.7207838, 30.9815908 -12.7210751)" +id = "2a9415ed-fa07-4734-9d8e-1d8ff69451c2" +subtype = "rail" +theme = "transportation" +type = "segment" +version = 1 + +[examples.Segment.bbox] +xmax = 30.98444 +xmin = 30.98159 +ymax = -12.718572 +ymin = -12.721077 + +[[examples.Segment.sources]] +dataset = "OpenStreetMap" +license = "ODbL-1.0" +property = "" +record_id = "w414442537@2" +update_time = "2026-02-05T14:25:06Z" + +[[examples.Segment.connectors]] +at = 0.0 +connector_id = "2da12352-29c5-479e-932f-68fbe90c8229" + +[[examples.Segment.connectors]] +at = 0.895049489 +connector_id = "feed87bb-7abf-4254-9e14-efd6bdb3e428" + +[[examples.Segment.connectors]] +at = 1.0 +connector_id = "e37ca4ff-ab09-4c84-8d3c-450c703d7308" + +[[examples.Segment.rail_flags]] +values = ["is_disused"] From 5843a254281a8bc8770cd3e7bc3bc82468146e39 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Thu, 7 May 2026 12:14:09 -0700 Subject: [PATCH 04/11] feat(pyspark): introduce package with codegen pipeline and CLI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce overture-schema-pyspark, a runtime PySpark validation package whose per-feature expression modules and conformance tests are generated from the same Pydantic models that define the schema, along with an `overture-validate` CLI. Runtime (overture-schema-pyspark/src/overture/schema/pyspark/): - check.py — Check, CheckShape, FeatureValidation dataclasses. - schema_check.py — write-first comparison of Spark schemas against an expected StructType, with structural type matching and SchemaMismatch reporting. - validate.py — public API: validate_feature(), evaluate_checks(), explain_errors(). The explain stage UNPIVOTs per-row check results into one row per violation, preserving all input columns for downstream join-back. - cli.py — `overture-validate ` runs the validation pipeline against a path of GeoParquet files. Output is one row per violation: feature ID, theme/type, failing field, check name, offending value. Single-pass evaluation keeps memory bounded for arbitrarily large inputs. - expressions/ — shared runtime utilities (constraint_expressions, column_patterns, _schema_structs). Per-feature expression modules live under expressions/overture/ and are added by the codegen in a follow-up commit. - tests/_support/ — conformance test infrastructure (scenarios, harness, helpers, mutations). The harness builds one DataFrame per feature, applies all scenarios as deterministic-UUID-tagged rows, runs validation once, and indexes violations back to scenario IDs — O(checks) rather than O(checks * scenarios). CLI filtering options: --theme limit to one theme --feature limit to one feature type --skip-schema-check run only constraint checks (no schema comparison) --count-only print violation counts per check rather than rows --suppress suppress specific (feature, field, check) triples per a YAML config Codegen pipeline (overture-schema-codegen/src/.../pyspark/): FeatureSpec | constraint_dispatch.py map constraints to descriptors | check_builder.py walk FieldSpec -> CheckNode IR; resolve array nesting, variant gating | schema_builder.py FieldSpec -> SchemaField list (StructType source) | renderer.py CheckNode -> per-feature expression module test_renderer.py CheckNode -> per-feature conformance test module synthetic.py FeatureSpec -> BASE_ROW + invalid values | pipeline.py orchestrate, return GeneratedModule list The dispatch tables map every supported constraint (Ge/Gt/Le/Lt/ Interval, MinLen/MaxLen, StrippedConstraint, PatternConstraint, UniqueItemsConstraint, GeometryTypeConstraint, JsonPointerConstraint, RequireAnyOfConstraint, RadioGroupConstraint, RequireIfConstraint, ForbidIfConstraint, MinFieldsSetConstraint), NewType (Country- CodeAlpha2, LinearlyReferencedRange, RegionCode), and base type (HttpUrl, EmailStr) to constraint_expressions check functions. Discriminated unions (segment is the canonical hard case) split into per-arm test files. The codegen handles arm splitting via generate_arm_rows in synthetic.py and _filter_field_nodes_for_arm in test_renderer.py. The Makefile gains a `generate-pyspark` target and gates `check` on it so a stale generation surfaces immediately. The CLI is exposed as a `[project.scripts]` entry point so `overture-validate` becomes available after `pip install` / `uv sync`. Signed-off-by: Seth Fitzsimmons --- Makefile | 19 +- .../overture-schema-codegen/docs/design.md | 326 ++- .../docs/walkthrough.md | 434 ++-- .../overture-schema-codegen/pyproject.toml | 16 + .../src/overture/schema/codegen/cli.py | 53 +- .../codegen/extraction/case_conversion.py | 41 - .../schema/codegen/extraction/field.py | 172 ++ .../codegen/extraction/field_constraints.py | 26 +- .../schema/codegen/extraction/field_walk.py | 215 ++ .../codegen/extraction/length_constraints.py | 47 + .../codegen/extraction/model_extraction.py | 239 ++- .../codegen/extraction/newtype_extraction.py | 29 +- .../codegen/extraction/numeric_extraction.py | 32 +- .../schema/codegen/extraction/specs.py | 106 +- .../codegen/extraction/type_analyzer.py | 712 ++++--- .../codegen/extraction/type_registry.py | 112 +- .../codegen/extraction/union_extraction.py | 143 +- .../schema/codegen/layout/module_layout.py | 19 +- .../schema/codegen/layout/type_collection.py | 144 +- .../codegen/markdown/link_computation.py | 5 +- .../codegen/markdown/path_assignment.py | 13 +- .../schema/codegen/markdown/pipeline.py | 28 +- .../schema/codegen/markdown/renderer.py | 45 +- .../codegen/markdown/reverse_references.py | 131 +- .../schema/codegen/markdown/type_format.py | 400 ++-- .../schema/codegen/pyspark/__init__.py | 1 + .../schema/codegen/pyspark/_render_common.py | 265 +++ .../schema/codegen/pyspark/check_builder.py | 699 ++++++ .../schema/codegen/pyspark/check_ir.py | 83 + .../codegen/pyspark/constraint_dispatch.py | 509 +++++ .../schema/codegen/pyspark/pipeline.py | 256 +++ .../schema/codegen/pyspark/renderer.py | 647 ++++++ .../schema/codegen/pyspark/schema_builder.py | 183 ++ .../templates/_check_function.py.jinja2 | 10 + .../templates/feature_module.py.jinja2 | 83 + .../pyspark/templates/test_module.py.jinja2 | 124 ++ .../codegen/pyspark/test_data/__init__.py | 9 + .../codegen/pyspark/test_data/base_row.py | 648 ++++++ .../pyspark/test_data/invalid_value.py | 129 ++ .../codegen/pyspark/test_data/scaffold.py | 264 +++ .../schema/codegen/pyspark/test_renderer.py | 423 ++++ .../tests/codegen_test_support.py | 148 +- .../overture-schema-codegen/tests/test_cli.py | 56 + .../tests/test_constraint_description.py | 32 +- .../tests/test_example_loader.py | 24 +- .../tests/test_field_walk.py | 164 ++ .../tests/test_golden_markdown.py | 16 +- .../tests/test_integration_real_models.py | 47 +- .../tests/test_markdown_renderer.py | 54 +- .../tests/test_markdown_type_format.py | 202 +- .../tests/test_model_extraction.py | 43 + .../tests/test_model_extractor.py | 119 +- .../tests/test_newtype_extraction.py | 21 +- .../tests/test_numeric_extraction.py | 16 +- .../tests/test_pyspark_base_row.py | 319 +++ .../tests/test_pyspark_check_builder.py | 1877 +++++++++++++++++ .../tests/test_pyspark_constraint_dispatch.py | 385 ++++ .../tests/test_pyspark_e2e.py | 206 ++ .../tests/test_pyspark_invalid_value.py | 175 ++ .../tests/test_pyspark_pipeline.py | 391 ++++ .../tests/test_pyspark_renderer.py | 1097 ++++++++++ .../tests/test_pyspark_scaffold.py | 245 +++ .../tests/test_pyspark_schema_builder.py | 213 ++ .../tests/test_pyspark_test_renderer.py | 880 ++++++++ .../tests/test_reverse_references.py | 51 +- .../tests/test_specs.py | 222 +- .../tests/test_type_analyzer.py | 927 ++++---- .../tests/test_type_collection.py | 16 +- .../tests/test_type_placement.py | 6 +- .../tests/test_type_registry.py | 141 +- .../tests/test_union_extraction.py | 53 +- packages/overture-schema-pyspark/README.md | 238 +++ .../overture-schema-pyspark/pyproject.toml | 27 + .../src/overture/__init__.py | 1 + .../src/overture/schema/__init__.py | 1 + .../src/overture/schema/pyspark/__about__.py | 1 + .../src/overture/schema/pyspark/__init__.py | 27 + .../src/overture/schema/pyspark/_registry.py | 67 + .../src/overture/schema/pyspark/check.py | 49 + .../src/overture/schema/pyspark/cli.py | 239 +++ .../schema/pyspark/expressions/__init__.py | 1 + .../pyspark/expressions/_schema_structs.py | 22 + .../pyspark/expressions/column_patterns.py | 94 + .../expressions/constraint_expressions.py | 484 +++++ .../src/overture/schema/pyspark/py.typed | 0 .../overture/schema/pyspark/schema_check.py | 109 + .../src/overture/schema/pyspark/validate.py | 334 +++ .../overture-schema-pyspark/tests/__init__.py | 0 .../tests/_support/__init__.py | 0 .../tests/_support/harness.py | 246 +++ .../tests/_support/helpers.py | 135 ++ .../tests/_support/mutations.py | 388 ++++ .../tests/_support/scenarios.py | 34 + .../overture-schema-pyspark/tests/conftest.py | 50 + .../tests/expressions/__init__.py | 0 .../tests/expressions/test_column_patterns.py | 258 +++ .../test_constraint_expressions.py | 1341 ++++++++++++ .../tests/expressions/test_schema_check.py | 268 +++ .../tests/test_check.py | 20 + .../overture-schema-pyspark/tests/test_cli.py | 475 +++++ .../tests/test_harness.py | 361 ++++ .../tests/test_helpers.py | 147 ++ .../tests/test_mutations.py | 263 +++ .../tests/test_validate.py | 516 +++++ .../src/overture/schema/system/case.py | 26 + .../schema/system/discovery/__init__.py | 10 + .../schema/system/discovery/entry_point.py | 119 ++ .../src/overture/schema/system/field_path.py | 301 +++ .../test_string_constraints.py | 2 +- .../tests/test_case.py} | 10 +- .../tests/test_discovery_entry_point.py | 97 + .../tests/test_field_path.py | 376 ++++ .../pyproject.toml | 1 - .../overture/schema/transportation/models.py | 2 +- pyproject.toml | 1 + uv.lock | 1072 +++++----- 116 files changed, 21308 insertions(+), 2891 deletions(-) delete mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/feature_module.py.jinja2 create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/__init__.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py create mode 100644 packages/overture-schema-codegen/tests/test_field_walk.py create mode 100644 packages/overture-schema-codegen/tests/test_model_extraction.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_base_row.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_check_builder.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_e2e.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_pipeline.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_renderer.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_scaffold.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py create mode 100644 packages/overture-schema-pyspark/README.md create mode 100644 packages/overture-schema-pyspark/pyproject.toml create mode 100644 packages/overture-schema-pyspark/src/overture/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/__about__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/_schema_structs.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/py.typed create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py create mode 100644 packages/overture-schema-pyspark/tests/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/_support/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/_support/harness.py create mode 100644 packages/overture-schema-pyspark/tests/_support/helpers.py create mode 100644 packages/overture-schema-pyspark/tests/_support/mutations.py create mode 100644 packages/overture-schema-pyspark/tests/_support/scenarios.py create mode 100644 packages/overture-schema-pyspark/tests/conftest.py create mode 100644 packages/overture-schema-pyspark/tests/expressions/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py create mode 100644 packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py create mode 100644 packages/overture-schema-pyspark/tests/expressions/test_schema_check.py create mode 100644 packages/overture-schema-pyspark/tests/test_check.py create mode 100644 packages/overture-schema-pyspark/tests/test_cli.py create mode 100644 packages/overture-schema-pyspark/tests/test_harness.py create mode 100644 packages/overture-schema-pyspark/tests/test_helpers.py create mode 100644 packages/overture-schema-pyspark/tests/test_mutations.py create mode 100644 packages/overture-schema-pyspark/tests/test_validate.py create mode 100644 packages/overture-schema-system/src/overture/schema/system/case.py create mode 100644 packages/overture-schema-system/src/overture/schema/system/discovery/entry_point.py create mode 100644 packages/overture-schema-system/src/overture/schema/system/field_path.py rename packages/{overture-schema-codegen/tests/test_naming.py => overture-schema-system/tests/test_case.py} (67%) create mode 100644 packages/overture-schema-system/tests/test_discovery_entry_point.py create mode 100644 packages/overture-schema-system/tests/test_field_path.py diff --git a/Makefile b/Makefile index a88a9917e..4488d0125 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: default uv-sync check test-all test test-only docformat doctest doctest-only mypy mypy-only lint-only update-baselines +.PHONY: default uv-sync clean-pyspark generate-pyspark check test-all test test-only docformat doctest doctest-only mypy mypy-only lint-only update-baselines TESTMON ?= --testmon @@ -7,9 +7,22 @@ default: test-all install: uv-sync uv-sync: - @uv sync --all-packages 2> /dev/null + @uv sync --all-packages --all-extras 2> /dev/null -check: uv-sync +PYSPARK_EXPRESSIONS := packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated +PYSPARK_GENERATED_TESTS := packages/overture-schema-pyspark/tests/generated + +clean-pyspark: + @rm -rf $(PYSPARK_EXPRESSIONS) $(PYSPARK_GENERATED_TESTS) + +generate-pyspark: uv-sync clean-pyspark + @uv run overture-codegen generate --format pyspark \ + --output-dir $(PYSPARK_EXPRESSIONS) \ + --test-output-dir $(PYSPARK_GENERATED_TESTS) + @uv run ruff check --fix --quiet $(PYSPARK_EXPRESSIONS) $(PYSPARK_GENERATED_TESTS) + @uv run ruff format --quiet $(PYSPARK_EXPRESSIONS) $(PYSPARK_GENERATED_TESTS) + +check: uv-sync generate-pyspark @$(MAKE) -j test-only doctest-only lint-only mypy-only test-all: uv-sync diff --git a/packages/overture-schema-codegen/docs/design.md b/packages/overture-schema-codegen/docs/design.md index 662d77fc5..67e78892e 100644 --- a/packages/overture-schema-codegen/docs/design.md +++ b/packages/overture-schema-codegen/docs/design.md @@ -30,9 +30,9 @@ definitions regularly nest `Annotated` inside `NewType` inside `Annotated` -- Annotated[int, Field(ge=...)])` -- and constraints at each depth need to be tagged with the NewType that contributed them. -The code generator solves this by extracting type information once into a flat, -navigable representation (`TypeInfo`), then passing that to renderers that produce -output without touching Python's type system. +The code generator solves this by extracting type information once into a tree-shaped +`FieldShape` IR, then passing that to renderers that produce output without touching +Python's type system. ## Inputs and Outputs @@ -41,10 +41,12 @@ points, plus example data from theme `pyproject.toml` files. Examples serve two purposes: rendered examples in documentation pages, and a starting point for generating tests that verify behavior of generated code. -**Current Outputs**: Markdown documentation pages with field tables, cross-page links, -constraint descriptions, and examples. +**Outputs**: -**Planned outputs**: Arrow schemas, PySpark expressions. +- Markdown documentation pages with field tables, cross-page links, constraint + descriptions, and examples. +- PySpark validation modules: per-feature expression builders, StructType schemas, + a feature registry, and generated conformance test modules. ## Architecture @@ -55,15 +57,21 @@ Rendering Output formatting, all presentation decisions ^ Output Layout What to generate, where it goes, how outputs link ^ -Extraction TypeInfo, FieldSpec, ModelSpec, EnumSpec, ... +Extraction FieldShape, FieldSpec, ModelSpec, EnumSpec, ... ^ Discovery discover_models() from overture-schema-common ``` -`markdown/pipeline.py` orchestrates the pipeline without I/O: it expands feature trees, -collects supplementary types, builds placement registries, computes reverse references, -and calls renderers -- returning `RenderedPage` objects. The CLI (`cli.py`) is a thin -Click wrapper that calls `generate_markdown_pages()` and writes files to disk. +Each output format has its own pipeline module that orchestrates without I/O: + +- `markdown/pipeline.py` expands feature trees, collects supplementary types, builds + placement registries, computes reverse references, and calls renderers -- returning + `RenderedPage` objects. +- `pyspark/pipeline.py` expands feature trees, builds checks and schemas, renders + expression modules and test modules -- returning `GeneratedModule` objects. + +The CLI (`cli.py`) is a thin Click wrapper that dispatches to the appropriate pipeline +and writes files to disk. ```mermaid graph TD @@ -75,24 +83,24 @@ graph TD subgraph Extraction EX["extraction/type_analyzer / extractors"] - EX -->|"ModelSpec, UnionSpec"| TREE["expand_model_tree()"] end - TREE -->|"FeatureSpec[]"| OL + EX -->|"FeatureSpec[]"| OL + EX -->|"FeatureSpec[]"| PS - subgraph "Output Layout" + subgraph "Output Layout (Markdown)" OL["layout/type_collection"] OL -->|"SupplementarySpec{}"| PA["markdown/path_assignment"] PA -->|"dict[str, Path]"| LC["markdown/link_computation"] RR["markdown/reverse_references"] end - subgraph Rendering + subgraph "Markdown Rendering" R["markdown/renderer"] TR["extraction/type_registry"] -.->|"type name resolution"| R end - subgraph Orchestration + subgraph "Markdown Orchestration" MP["markdown/pipeline"] end @@ -101,39 +109,64 @@ graph TD RR --> MP MP --> R R -->|"RenderedPage[]"| MP + + subgraph "PySpark Pipeline" + PS["pyspark/pipeline"] + CD["constraint_dispatch"] -->|"ExpressionDescriptor"| CB + CB["check_builder"] -->|"Check, ModelCheck"| PR + SB["schema_builder"] -->|"SchemaField[]"| PR + CB -->|"Check, ModelCheck"| PTR + SY["test_data/"] -->|"BASE_ROW, scaffold, invalid_value"| PTR + PR["renderer"] + PTR["test_renderer"] + end + + PS --> CD + PS --> CB + PS --> SB + PS --> PR + PS --> PTR + MP -->|"list[RenderedPage]"| CLI["cli.py → disk"] + PS -->|"list[GeneratedModule]"| CLI ``` ## Extraction -### `analyze_type` -- iterative type unwrapping - -`analyze_type(annotation)` is a single iterative function that peels type annotation -layers in a fixed order, accumulating information into an `_UnwrapState`: - -1. **NewType**: Records the outermost name (user-facing semantic identity, e.g. - `FeatureVersion`) and updates the "current" name (used for constraint provenance and - as `base_type` at terminal) -2. **Annotated**: Collects constraints from metadata, each tagged with whichever NewType - was most recently entered. Extracts `Field.description` when present -3. **Union**: Filters out `None` (marks optional), `Sentinel`, and `Literal` sentinel - arms. If multiple concrete `BaseModel` arms remain, classifies as `UNION`; otherwise - continues with the single remaining arm -4. **list / dict**: Increments `list_depth` for each `list[...]` layer, sets dict flags, - continues into element types -5. **Terminal**: Classifies as `PRIMITIVE`, `LITERAL`, `ENUM`, `MODEL`, or `UNION` - -The result is `TypeInfo` -- a flat dataclass that fully describes the unwrapped type: -classification (`TypeKind`), optional/dict flags, `list_depth` (count of `list[...]` -layers), `newtype_outer_list_depth` (list layers outside the outermost NewType boundary), -accumulated constraints with provenance, NewType names, source type, literal values, and -(for UNION kind) the tuple of concrete `BaseModel` member types. Dict types carry -recursively analyzed `TypeInfo` for their key and value types. - -Multi-depth `Annotated` layers (common in practice, since NewTypes wrap `Annotated` -types that wrap further NewTypes) are handled naturally by the loop -- each iteration -processes the next wrapper. Constraints from each `Annotated` layer are tagged with the -NewType active at that depth. +### `analyze_type` -- recursive type unwrapping + +`analyze_type(annotation)` recurses through a Python type annotation, peeling one layer +per call frame via the internal `_unwrap` function: + +1. **NewType**: Constructs `_NewTypeCtx` with the NewType's name, recurses into + `__supertype__`, then wraps the result in `NewTypeShape`. `_erase_inner_newtypes` + strips every inner `NewTypeShape` reached through `ArrayOf` layers, so each spine + keeps only its outermost `NewTypeShape` (inner NewType names survive on the + terminal `Primitive.base_type`). +2. **Annotated**: Collects constraints from metadata as `ConstraintSource` objects, + each tagged with the active `_NewTypeCtx`. Extracts `Field.description` when present. + Recurses into the inner annotation, then attaches constraints to the result via + `attach_constraints`, which prepends them to the outermost structural layer. +3. **Union**: Delegates to `_peel_union`, which filters `None` (marks optional), + `Sentinel`, and `Literal` sentinel arms. Multiple concrete `BaseModel` arms invoke + `union_resolver`; a single arm continues with `_unwrap`. +4. **list / dict**: `list[X]` recurses into `X` and wraps in `ArrayOf`. Nested lists + produce nested `ArrayOf` instances -- no numeric depth counter. `dict[K, V]` recurses + for key and value independently and returns `MapOf`. +5. **Terminal**: Classifies as `Primitive`, `LiteralScalar`, `AnyScalar`, `ModelRef`, + or `UnionRef`. + +The result is `tuple[FieldShape, bool, str | None]` -- the structural shape describing +the type as a nested tree, whether the field accepts `None`, and the first +`FieldInfo.description` found during unwrapping. `FieldShape` is a discriminated union +of eight variants (`Primitive`, `LiteralScalar`, `AnyScalar`, `ModelRef`, `UnionRef`, +`ArrayOf`, `MapOf`, `NewTypeShape`) nested to describe arbitrary collection and NewType +wrapping. + +Constraints from each `Annotated` layer attach to the shape layer they annotate -- +`attach_constraints` walks past any `NewTypeShape` wrappers to prepend constraints on +the first `ArrayOf`, `MapOf`, or scalar node. This means array-level and element-level +constraints land on structurally distinct nodes without any numeric bookkeeping. ### Extractors by domain @@ -145,12 +178,14 @@ Extraction is split by entity kind: - `extraction/newtype_extraction.py`: NewType -> `NewTypeSpec` - `extraction/union_extraction.py`: Discriminated union alias -> `UnionSpec` - `extraction/numeric_extraction.py`: Numeric types -> `NumericSpec` +- `extraction/pydantic_extraction.py`: Pydantic built-in type -> `PydanticTypeSpec` -Each calls `analyze_type()` for field types. Tree expansion (`expand_model_tree()`) -walks MODEL-kind fields to populate nested model references, with a shared cache and -cycle detection (`starts_cycle=True`). +Each calls `analyze_type()` for field types. `extract_model` recurses into sub-models +and sub-unions during extraction, building `ModelRef`/`UnionRef` terminals with their +specs resolved. A shared cache and cycle detection (`starts_cycle=True`) prevent +infinite recursion and duplicate extraction. -### Unions and the FeatureSpec protocol +### Unions and FeatureSpec Discriminated unions (e.g. `Segment = Annotated[Union[RoadSegment, ...], Discriminator(...)]`) are type aliases, not classes. `UnionSpec` captures the union @@ -159,10 +194,10 @@ Fields shared across all variants appear once; fields present in some variants a wrapped in `AnnotatedField` with `variant_sources` indicating which members contribute them. The common base class is identified so shared fields can be deduplicated. -`FeatureSpec` is a `Protocol` satisfied by both `ModelSpec` and `UnionSpec`. Code that -operates on "any top-level feature" -- tree expansion, supplementary type collection, -rendering dispatch -- uses `FeatureSpec` rather than a concrete type, so union and model -features flow through the same pipeline. +`FeatureSpec` is a type alias `ModelSpec | UnionSpec`. Code that operates on "any +top-level feature" -- supplementary type collection, rendering dispatch -- uses +`FeatureSpec` so union and model features flow through the same pipeline. Consumers +narrow with `isinstance` when arm-specific attributes are needed. ### Constraints @@ -180,9 +215,12 @@ reference each other. ### Supplementary type collection -`collect_all_supplementary_types()` walks the expanded field trees of all feature specs, -extracting enums, semantic NewTypes, and sub-models that need their own output. Returns -`dict[str, SupplementarySpec]`. +`collect_all_supplementary_types()` walks the field trees of all feature specs to extract +the supplementary types that need their own output: enums, semantic NewTypes, sub-models, +and Pydantic built-in types (`HttpUrl`, `EmailStr`). Returns `dict[TypeIdentity, +SupplementarySpec]`, where `SupplementarySpec = EnumSpec | NewTypeSpec | ModelSpec | +PydanticTypeSpec`. `TypeIdentity` pairs a unique Python object with its display name so +registry lookups remain stable when two distinct types share a name. ### Module-mirrored output paths @@ -195,14 +233,14 @@ directory. ### Link computation -`LinkContext` carries the current output's path and the full type-to-path registry. When -a renderer formats a type reference, it looks up the target in the registry and computes -a relative path. Links exist only for types with registry entries, avoiding broken -references to ungenerated outputs. +`LinkContext` carries the current output's path and the full `dict[TypeIdentity, +PurePosixPath]` registry. When a renderer formats a type reference, it looks up the +target by `TypeIdentity` and computes a relative path. Links exist only for types with +registry entries, avoiding broken references to ungenerated outputs. ### Reverse references -`compute_reverse_references()` walks feature specs to build `dict[type_name, +`compute_reverse_references()` walks feature specs to build `dict[TypeIdentity, list[UsedByEntry]]` for "Used By" sections. ## Rendering @@ -221,10 +259,10 @@ to registered primitives. ### Markdown renderer Jinja2 templates for feature, enum, NewType, primitives, and geometry pages. -`render_feature()` expands MODEL-kind fields inline with dot-notation (e.g., -`sources[].dataset`), stopping at cycle boundaries. `format_type()` in -`markdown/type_format.py` converts `TypeInfo` into link-aware display strings using -`LinkContext`. +`render_feature()` walks each field's `FieldShape` tree and expands `ModelRef` +terminals inline with dot-notation (e.g., `sources[].dataset`), stopping at +`ModelRef.starts_cycle`. `format_type()` in `markdown/type_format.py` converts a +`FieldShape` into link-aware display strings using `LinkContext`. ### Constraint prose @@ -247,17 +285,157 @@ need for external schema information -- the model instance itself encodes the ty structure. `augment_missing_fields` appends `(name, None)` entries for union cross-arm fields absent from the concrete variant instance. +## PySpark Pipeline + +The PySpark codegen transforms extracted `FeatureSpec` trees into validation expression +modules and generated conformance test modules. `pyspark/pipeline.py` exposes +`generate_pyspark_module` (single spec) and `generate_pyspark_modules` (all specs). + +### Constraint Dispatch + +`pyspark/constraint_dispatch.py` maps constraint objects to expression descriptors. +Four dispatch mechanisms: + +1. **`dispatch_constraint`** -- field constraints (bounds, min/max length, pattern, + stripped, geometry type, unique items, JSON pointer). Returns `ExpressionDescriptor` + with function name, args, kwargs. Returns None for skipped constraints (Reference, + Strict). + +2. **`dispatch_newtype`** -- NewType-level overrides: `LinearlyReferencedRange` -> + three range checks. `CountryCodeAlpha2` and `RegionCode` decompose normally + via their `PatternConstraint` subclasses and return None here. + +3. **`dispatch_base_type`** -- base-type overrides for types with no `Annotated` + constraints: `HttpUrl` -> `check_url_format` + `check_url_length`, + `EmailStr` -> `check_email`, `BBox` -> `check_bbox_completeness` + + `check_bbox_lat_ordering` + `check_bbox_lat_range`. + +4. **`dispatch_model_constraint`** -- model constraints: `RequireAnyOfConstraint`, + `RadioGroupConstraint`, `RequireIfConstraint`, `ForbidIfConstraint`, + `MinFieldsSetConstraint`. Returns `ModelConstraintDescriptor`. Returns None for + `NoExtraFieldsConstraint`. + +### Check Builder + +`pyspark/check_builder.py` walks `FieldSpec` trees to produce `Check` and `ModelCheck` +IR. Resolves the mapping from nested field paths to PySpark array iteration patterns, +producing a `FieldPath` (`ScalarPath` or `ArrayPath`) on each `Check`: + +- **Scalar field** -- `ScalarPath`; renders as `F.col("field")` +- **Top-level array** -- `ArrayPath` with one `ArraySegment`; renders as + `array_check("field", lambda el: ...)` +- **Field inside an array element** -- `ArrayPath` with struct navigation after the + array segment; renders as `array_check("array_col", lambda el: el["field"])` +- **Nested array inside an array** -- `ArrayPath` with multiple `ArraySegment`s; + renders as `nested_array_check("outer", lambda el: array_check(el["inner"], ...))` +- **Multiple nesting levels** -- chained `nested_array_check` with struct segments + navigating between array iterations + +Union handling: variant-specific fields are annotated with `ColumnGuard` or +`ElementGuard` discriminator gates. `Check.guards` is AND-composed at render time. +Nested unions (a union field within a union) produce a `ColumnGuard` and an +`ElementGuard` in sequence on the same check. + +`COLUMN_LEVEL_FUNCTIONS` (frozenset) selects checks that split into a +separate `Check`; `_COLUMN_LEVEL_SUFFIXES` (dict) supplies the label +suffix for each: `check_required` (no suffix), `check_array_min_length` +(`_min_length`), `check_array_max_length` (`_max_length`), +`check_struct_unique` (`_unique`). + +### Schema Builder + +`pyspark/schema_builder.py` converts `FieldSpec` trees to `SchemaField` lists for +StructType source generation. Maps types to Spark type expressions via the type registry. +`SHARED_TYPE_REFS` reserves a few base-type names for `_schema_structs.py` constants +when the codegen cannot walk the type -- currently just `BBox` -> `BBOX_STRUCT` (BBox +is a plain class, not a Pydantic `BaseModel`). Pydantic models are inlined into the +StructType expression. Union fields are deduplicated by name with type widening (the +wider Spark numeric type wins). + +### Renderer + +`pyspark/renderer.py` emits per-feature Python modules containing: + +- Private `_fieldname_check()` functions returning `Check(field=, name=, expr=, shape=, root_field=)` +- A public `feature_checks() -> list[Check]` function calling all of them +- A per-feature `FEATURENAME_SCHEMA` StructType constant (e.g. `ADDRESS_SCHEMA`, `SEGMENT_SCHEMA`) +- An `ENTRY_POINT` string, a `PARTITIONS` dict describing the feature's Hive partition + layout (empty when not partitioned), and a `FEATURE_VALIDATION` constant pairing the + schema and checks + +The registry is not generated. `_registry.py` lives hand-written in the +`overture-schema-pyspark` package and walks the `expressions.generated` namespace at +import time, collecting every module that exposes `ENTRY_POINT` and `FEATURE_VALIDATION` +into a `dict[str, FeatureValidation]`. Modules that also expose `PARTITIONS` populate a +parallel partition map keyed by entry point. + +Expression rendering handles scalar expressions, array_check/nested_array_check chains, +variant gating (`F.when(discriminator.isin(...))`), nullable parent gating +(`F.when(gate.isNotNull(), ...)`), and nested lambda variable naming for deep nesting. +Output is formatted with ruff. + +### Test Renderer + +`pyspark/test_renderer.py` emits per-feature pytest modules containing: + +- `BASE_ROW_SPARSE` / `BASE_ROW_POPULATED` -- valid synthetic rows +- `SCENARIOS: list[Scenario]` -- generated test cases, each carrying a + `mutate` callable that produces an invalid row from a merged base +- Fixtures: `checks`, `sparse_results`, `populated_results` +- Tests: `test_baseline_sparse`, `test_baseline_populated`, + `test_scenario_sparse`, `test_scenario_populated` (parametrized). + Schema coverage runs inside `run_validation_pipeline` via + `assert_schema_covers_checks`, not in a separate test. + +Union specs with multiple discriminator arms produce one test module per arm. + +### Test Data Generator + +`pyspark/test_data/` is a subpackage with three modules: + +- `base_row.py` -- `generate_base_row` / `generate_populated_row` produce sparse + (required only) and fully populated valid rows from a `FeatureSpec`. Consults field + constraints to produce constraint-satisfying values (country codes, geometry WKT, + bounds-respecting numbers). `generate_arm_rows` / `generate_populated_arm_rows` + produce one row per discriminator arm for union specs. +- `scaffold.py` -- `generate_scaffold` / `generate_model_scaffold` build sparse dicts + that provide nested structure (optional structs, arrays) needed for test scenarios. +- `invalid_value.py` -- `invalid_value` produces a concrete value that violates each + check function. + +### Known Semantic Gaps + +PySpark validation diverges from Pydantic validation in two documented areas: + +- `UniqueItemsConstraint` uses Spark's `array_distinct`, which compares whole + elements with structural equality (struct- and nested-array-aware) on the raw + stored values. Pydantic compares normalized Python objects -- e.g., + `list[HttpUrl]` is compared after URL normalization (trailing slash, lowercased + scheme/host) -- so it catches duplicates that differ only in normalization. The + PySpark check catches exact duplicates only. + +- `require_any_of` checks `isNotNull` as a proxy for Pydantic's `model_fields_set`. + Parquet has no equivalent of "explicitly provided"; `isNotNull` is stricter (it + rejects fields explicitly set to null). + ## Extension Points -**Adding a new output target** (Arrow schemas next, PySpark expressions after): Add a -column to `TypeMapping` in `extraction/type_registry.py` for type-name resolution. Write -a new renderer module that consumes specs and the type registry. The extraction layer and -output layout are target-independent. +**Adding a new output target**: Add a column to `TypeMapping` in +`extraction/type_registry.py` for type-name resolution. Write a pipeline module that +consumes `FeatureSpec` trees and a renderer that produces output. The extraction layer is +target-independent. Register the format in `cli.py`. -**Adding a new type kind**: Add a variant to `TypeKind` in `extraction/type_analyzer.py`. +**Adding a new type kind**: Add a variant to `FieldShape` in `extraction/field.py`. Handle it in the terminal classification of `analyze_type()`. Add an extraction function -and spec dataclass if needed. Update renderers to handle the new kind. - -**Adding a new constraint type**: The iterative unwrapper collects it automatically (any -`Annotated` metadata becomes a `ConstraintSource`). Add a case to -`describe_field_constraint()` for the prose representation. +and spec dataclass if needed. Update `extraction/field_walk.py` traversal helpers and +all renderers to handle the new variant. + +**Adding a new constraint type**: `_unwrap` collects it automatically (any `Annotated` +metadata becomes a `ConstraintSource`). Add a case to +`describe_field_constraint()` for prose and to `dispatch_constraint()` for PySpark +expression mapping. + +**Adding a new PySpark check function**: Add a case in `dispatch_constraint`, +`dispatch_newtype`, or `dispatch_base_type` in `constraint_dispatch.py`. Add an +`invalid_value` case in `test_data/invalid_value.py` for test generation. The check builder and +renderer handle the new descriptor automatically. diff --git a/packages/overture-schema-codegen/docs/walkthrough.md b/packages/overture-schema-codegen/docs/walkthrough.md index 397e082f5..2cd1e2b27 100644 --- a/packages/overture-schema-codegen/docs/walkthrough.md +++ b/packages/overture-schema-codegen/docs/walkthrough.md @@ -23,8 +23,8 @@ Documentation needs all of this. The codegen exists to preserve it. Navigating Python's type annotation machinery -- NewType chains, nested `Annotated` wrappers, union filtering, generic resolution -- is complex. The codegen does it once. -`analyze_type()` unwraps annotations into `TypeInfo`, a flat target-independent -representation. Extractors build specs from `TypeInfo`. Renderers consume specs without +`analyze_type()` unwraps annotations into `FieldShape`, a tree-shaped target-independent +representation. Extractors build specs from these shapes. Renderers consume specs without re-entering the type system. New output targets add renderers, not extraction logic. The solution decomposes into four layers. Discovery finds models. Extraction unwraps @@ -64,28 +64,15 @@ it. The entry point `overture:transportation:segment` maps to The codegen classifies these at the CLI boundary: `is_model_class` identifies concrete `BaseModel` subclasses, `is_union_alias` calls `analyze_type` to identify discriminated -unions. From that point forward both model features and union features satisfy the -`FeatureSpec` protocol and flow through the same pipeline. +unions. From that point forward both model features and union features are `FeatureSpec` values +(`ModelSpec | UnionSpec`) and flow through the same pipeline. ## 2. Leaf utilities -Two modules with no internal dependencies. Both serve multiple layers. - -### extraction/case_conversion.py - -Converts PascalCase to snake_case with two compiled regexes. `_ACRONYM_BOUNDARY` inserts -an underscore between an uppercase run and a capitalized word start: `HTMLParser` -becomes `HTML_Parser` becomes `html_parser`. `_CAMEL_BOUNDARY` inserts between -lowercase-or-digit and uppercase: `buildingPart` becomes `building_part`. -`to_snake_case` applies them in sequence and lowercases. - -`slug_filename` composes the conversion with a file extension. Every output file path in -the system passes through this function. - -```python ->>> slug_filename("HexColor") -'hex_color.md' -``` +One module with no internal dependencies, serving multiple layers. PascalCase to +snake_case conversion lives in `overture.schema.system.case` (used by the pyspark +generator and the markdown path assignment); markdown output filenames are +`f"{to_snake_case(name)}.md"` at the call site. ### extraction/docstring.py @@ -113,92 +100,85 @@ summaries. ## 3. Type analysis This is the module the entire package exists to house. `analyze_type` takes a raw type -annotation and returns `TypeInfo` -- a flat dataclass that fully describes the unwrapped -type without any reference to Python's typing machinery. - -### The loop - -The function runs a single `while True` loop that peels layers in fixed order. Each -iteration handles one wrapper: - -**NewType** records names at two levels. The first NewType encountered becomes -`outermost_newtype_name` (the user-facing identity, e.g. "FeatureVersion") and snapshots -the current `list_depth` into `newtype_outer_list_depth` -- capturing how many list -layers appeared before the NewType boundary. Subsequent NewTypes update -`last_newtype_name` (the innermost, used for constraint provenance and as the terminal -`base_type`). The loop unwraps via `__supertype__` and continues. - -**Annotated** collects every metadata object as a `ConstraintSource`, tagging each with -whichever NewType was most recently entered. This is how constraint provenance survives: -when `int32`'s `Annotated` layer contributes `Field(ge=0)`, the constraint records -`source="int32"`. If a `FieldInfo` carries a description, the function captures it -- -first description wins, so the outermost NewType's documentation takes precedence. - -**Union** filters out `NoneType` (marks optional), `Sentinel` instances (Pydantic's -`` marker for undeclared defaults), and `Literal` sentinel arms (like -`Literal[""]` used alongside `HttpUrl`). If multiple concrete `BaseModel` subclasses -remain after filtering, the function classifies the type as `UNION` and returns -immediately with the member tuple. Non-BaseModel multi-type unions raise -`UnsupportedUnionError`. A single remaining arm continues the loop. - -The `Literal` filtering has a guard: when a union contains *only* Literal arms (like -`Optional[Literal["x"]]`), the function keeps them rather than filtering everything out. - -**list/dict** increments `list_depth` for each `list[...]` layer (so `list[list[str]]` -records depth 2), sets dict flags, and continues into element types. Dict is the one -case where `analyze_type` recurses -- it calls itself for key and value types, storing -the results as nested `TypeInfo` objects. - -**Terminal** classification in `_classify_terminal` handles what remains after all -wrappers are peeled: `Any` becomes a PRIMITIVE, `Literal` returns with the literal value -(single-value only -- multi-value Literals get `literal_value=None`), `Enum` subclasses -become ENUM, `BaseModel` subclasses become MODEL, everything else becomes PRIMITIVE. +annotation and returns `tuple[FieldShape, bool, str | None]` -- the structural shape, +whether the field is optional, and the first description found in the annotation chain. +`FieldShape` is a discriminated union tree that fully describes the type without any +reference to Python's typing machinery. + +### The recursion + +`_unwrap` peels one annotation layer per call frame and returns a `FieldShape` subtree. +Each case handles one wrapper kind: + +**NewType** constructs a `_NewTypeCtx` carrying the NewType's name and callable +reference, then recurses into `__supertype__` with that context active. After the +recursion returns, `_erase_inner_newtypes` strips every `NewTypeShape` reachable through +the recursion result's `ArrayOf` layers so that exactly one `NewTypeShape` remains per +spine. The frame then wraps the (now wrapper-free) inner shape: +`NewTypeShape(name="FeatureVersion", inner=)`. Inner NewType names +survive as the terminal `Primitive.base_type`. + +**Annotated** collects every metadata object in the `args[1:]` slice as a +`ConstraintSource`, tagging each with the active `newtype_ctx`. If a `FieldInfo` is +present, its `metadata` list contributes additional constraint sources (Pydantic unpacks +`Field(min_length=1)` into annotated-types objects there). Descriptions are captured +from `FieldInfo.description` -- first one found wins, so the outermost annotation's +documentation takes precedence. The collected constraints are then attached to the +recursion result via `attach_constraints`, which walks any leading `NewTypeShape` +wrappers to prepend the constraints on the first structural layer (`ArrayOf`, `MapOf`, +or scalar terminal) that can hold them. Raw `MinLen` / `MaxLen` constraints are wrapped +into typed `ArrayMinLen` / `ScalarMinLen` (and `MaxLen` variants) matching the attachment +layer, so length-constraint dispatch is type-keyed downstream. + +**Union** delegates to `_peel_union`. That helper filters `NoneType` (marks optional), +`Sentinel` instances, and `Literal` sentinel arms. If multiple concrete `BaseModel` +subclasses remain, it invokes `union_resolver` and returns a `_Resolved` short-circuit. +A single remaining arm returns `_ContinueWith`, and `_unwrap` recurses into it. + +**list** recurses into the element type and wraps the result in `ArrayOf`. Nested lists +(`list[list[str]]`) produce nested `ArrayOf` instances -- there is no numeric depth +counter. Constraints contributed by an `Annotated` wrapper at any particular list level +land on that level's `ArrayOf` node because `attach_constraints` prepends to the +outermost structural layer, which is exactly the `ArrayOf` that was just constructed. + +**dict** recurses separately for key and value types (with `newtype_ctx=None` for both, +since dict keys and values are independent spines) and returns `MapOf`. + +**Terminal** classification in `_terminal` handles the base case: `Any` becomes +`AnyScalar`, `Literal` becomes `LiteralScalar`, `BaseModel` subclasses route through +`model_resolver` (or fall back to `Primitive(source_type=cls)`), everything else becomes +`Primitive(base_type=newtype_ctx.name or annotation.__name__)`. ### Concrete walkthroughs -**Segment (union path).** `analyze_type` receives the `Annotated` alias. Iteration 1 -sees `Annotated` -- collects the `FieldInfo` with discriminator metadata as a -constraint, unwraps to `Union[RoadSegment, RailSegment, WaterSegment]`. Iteration 2 sees -the union. No `None` arm, no sentinels. Three concrete `BaseModel` subclasses remain -- -the function classifies the type as `UNION` and returns immediately: `kind=UNION`, -`union_members=(RoadSegment, RailSegment, WaterSegment)`, `base_type="RoadSegment"` (the -first member). Two iterations, done. The union members are raw type objects, not -recursively analyzed -- callers that need field details call `extract_model` on each -member separately. +**Segment (union path).** `_unwrap` receives the `Annotated` alias for Segment. The +`Annotated` case collects discriminator metadata from `FieldInfo`, then sees the inner +annotation is a union. `_peel_union` finds three concrete `BaseModel` arms, invokes +`union_resolver`, and returns `_Resolved(UnionRef(...))` carrying the `UnionSpec` that +the resolver constructed. The `Annotated` handler attaches the discriminator constraints +and returns. Two frames deep, done. **FeatureVersion (NewType chain path).** `FeatureVersion = NewType("FeatureVersion", int32)` where `int32 = NewType("int32", Annotated[int, Field(ge=0, le=2147483647)])`. -Iteration 1 sees `FeatureVersion`. It's a NewType -- record -`outermost_newtype_name="FeatureVersion"`, snapshot `newtype_outer_list_depth=0` (no list -layers yet), unwrap to `int32`, continue. Iteration 2 sees -`int32`. Also a NewType -- update `last_newtype_name="int32"`, unwrap to `Annotated[int, -Field(ge=0, ...)]`, continue. Iteration 3 sees `Annotated`. Collect -`ConstraintSource(source="int32", constraint=)`, unwrap to `int`. The -loop breaks on `int` (not a NewType, not Annotated, not a union, not a container). -`_classify_terminal` returns a `TypeInfo` with `base_type="int32"`, -`newtype_name="FeatureVersion"`, `kind=PRIMITIVE`, and a constraint tuple recording the -provenance chain. - -The two paths demonstrate the function's range. Segment exits early on the union branch -with member types for downstream extraction. FeatureVersion runs the full loop through -NewType and Annotated layers, accumulating constraint provenance that survives to -rendering. - -### _UnwrapState - -The accumulator dataclass carries state across iterations: optional/dict flags, -`list_depth` (incremented per `list[...]` layer), `newtype_outer_list_depth` (snapshotted -from `list_depth` when the first NewType is entered), the constraint list, both NewType -name slots, and the captured description. Its `build_type_info` method assembles the -final `TypeInfo` from accumulated state, freezing the constraint list into a tuple. - -### walk_type_info - -A shared visitor that recurses into dict key/value `TypeInfo` children. Both type -collection and reverse reference computation use it rather than duplicating the descent -pattern. Union members are raw `type` objects (not `TypeInfo` instances), so callers -handle them directly. +Frame 1 sees `FeatureVersion` -- a NewType. Constructs `_NewTypeCtx("FeatureVersion", +FeatureVersion)`, recurses into `int32`. Frame 2 sees `int32` -- also a NewType. +Constructs `_NewTypeCtx("int32", int32)`, recurses into `Annotated[int, Field(ge=0, +...)]`. Frame 3 sees `Annotated`. Collects `ConstraintSource(source_name="int32", +constraint=)`. Recurses into `int`. Frame 4 hits the terminal +`int`. `newtype_ctx` is still `_NewTypeCtx("int32", int32)` -- frame 3 passed frame 2's +context through unchanged, since `Annotated` does not introduce a NewType -- so +`_terminal` uses `newtype_ctx.name` (`"int32"`) as `base_type`. Returns +`Primitive(base_type="int32")`. Frame 3 attaches the constraints: `Primitive` gets the +`ge=0` / `le=2147483647` sources prepended. Frame 2's `_erase_inner_newtypes` sees a +bare `Primitive` -- no `NewTypeShape` to strip -- and wraps the result in +`NewTypeShape(name="int32", inner=Primitive(...))`. Frame 1's `_erase_inner_newtypes` +strips that inner `NewTypeShape`, yielding `Primitive(...)`, and wraps it in +`NewTypeShape(name="FeatureVersion", inner=Primitive(...))`. + +The two paths demonstrate the function's range. Segment exits after two frames via +`union_resolver`. FeatureVersion recurses four frames through a NewType chain, with +constraint provenance tagging surviving to rendering. ## 4. Data structures @@ -206,10 +186,10 @@ handle them directly. a dataclass with no methods beyond field access and, in `UnionSpec`'s case, one cached property. -**FieldSpec** represents one model field: alias-resolved name, `TypeInfo`, description, -required flag. Two fields populated later by tree expansion: `model` (a reference to the -nested `ModelSpec` for MODEL-kind fields) and `starts_cycle` (true when following this -field's model would create a cycle in the ancestor chain). +**FieldSpec** represents one model field: alias-resolved name, `shape: FieldShape`, +description, required flag. `ModelRef` and `UnionRef` shapes carry their resolved specs +(populated during `extract_model` recursion), so consumers can follow the tree without a +separate expansion pass. **ModelSpec** represents one Pydantic model: class name, cleaned docstring, fields in documentation order, source class reference, the entry point string that located it, and @@ -218,33 +198,37 @@ model-level constraints from decorators like `@require_any_of`. **UnionSpec** represents a discriminated union type alias. Segment's `UnionSpec` carries `members=[RoadSegment, RailSegment, WaterSegment]`, `discriminator_field="subtype"`, and `common_base=TransportationSegment`. Its `annotated_fields` list pairs each `FieldSpec` -with `variant_sources` -- a tuple of class names indicating which union members -contribute that field, or `None` for fields from `TransportationSegment` shared across -all members. The `fields` cached property unwraps this for code that doesn't need -provenance. `UnionSpec` uses `eq=False` because it contains mutable lists and a -`cached_property` -- dataclass-generated `__eq__` would be unreliable. - -**FeatureSpec** is a `Protocol` satisfied by both `ModelSpec` and `UnionSpec`. This is -the pipeline's unifying abstraction. Tree expansion, type collection, rendering -dispatch, and example loading all operate on `FeatureSpec` without knowing which -concrete type they hold. +with `variant_sources` -- a tuple of `BaseModel` subclasses indicating which union +members contribute that field, or `None` for fields from `TransportationSegment` shared +across all members. The `fields` cached property unwraps this for code that doesn't need +provenance. Each member also has its already-extracted `ModelSpec` retained in +`member_specs: list[MemberSpec]` so downstream consumers (check builder, base-row +generator) reuse it instead of re-extracting the subtree. `UnionSpec` uses `eq=False` +because it contains mutable lists and a `cached_property` -- dataclass-generated +`__eq__` would be unreliable. + +**FeatureSpec** is the type alias `ModelSpec | UnionSpec`. Type collection, rendering +dispatch, and example loading all operate on `FeatureSpec`. Consumers narrow with +`isinstance` when they need `UnionSpec`-specific attributes like `discriminator_field`. **EnumSpec** and **EnumMemberSpec** serve enums. **NewTypeSpec** serves NewTypes. **NumericSpec** serves numeric primitives with an `Interval` for bounds and optional `float_bits`. -**SupplementarySpec** is the union type alias `EnumSpec | NewTypeSpec | ModelSpec` -- -the set of non-feature types that need their own output pages. `NumericSpec` and -geometry types are excluded because they render on aggregate pages rather than -individual ones. +**SupplementarySpec** is the union type alias `EnumSpec | NewTypeSpec | ModelSpec | +PydanticTypeSpec` -- the set of non-feature types that need their own output pages. +`PydanticTypeSpec` covers Pydantic built-ins like `HttpUrl` and `EmailStr` (carrying the +class plus a pointer back to Pydantic's docs). `NumericSpec` and geometry types are +excluded because they render on aggregate pages rather than individual ones. ### Classification functions -Three functions at the bottom of `extraction/specs.py` classify discovery results. `is_model_class` -is a `TypeGuard` that checks `isinstance(obj, type) and issubclass(obj, BaseModel)`. -`is_union_alias` calls `analyze_type` and checks for `UNION` kind -- the only place -outside the type analyzer that touches Python type annotations. `filter_model_classes` -applies the model guard across the discovery dict's values. +Three functions at the bottom of `extraction/specs.py` classify discovery results. +`is_model_class` is a `TypeGuard` that checks `isinstance(obj, type) and issubclass(obj, +BaseModel)`. `is_union_alias` calls `analyze_type` with a sentinel `union_resolver` that +raises immediately on detection -- the only place outside the type analyzer that touches +Python type annotations. `filter_model_classes` applies the model guard across the +discovery dict's values. ## 5. Type registry @@ -291,27 +275,28 @@ classes. One subtlety: Pydantic strips the `Annotated` wrapper from some fields and moves the metadata to `field_info.metadata`. When this happens, `analyze_type` sees a bare type -and misses the constraints. `_merge_field_metadata` patches them back in, tagging them -with `source=None` since they came from the field's own annotation rather than a NewType -chain. +and misses the constraints. `_attach_field_metadata` routes them through +`attach_constraints` -- tagging them with `source=None` since they came from the field's +own annotation rather than a NewType chain -- so length-constraint typing happens here +just as it does during normal `Annotated` unwrapping. Model-level constraints come from `ModelConstraint.get_model_constraints(model_class)`, which inspects decorators like `@require_any_of` and `@require_if`. -### Tree expansion +### Recursive extraction -`expand_model_tree` is the recursive step that populates `FieldSpec.model` references. -It maintains a shared cache keyed by Python class and an ancestor set for cycle -detection. +`extract_model` recursively resolves sub-models and sub-unions during field extraction, +building `ModelRef`/`UnionRef` shapes with their specs already populated. It maintains a +shared cache keyed by Python class and an ancestor set for cycle detection. The cache insert happens *before* recursion. Without this ordering, a back-edge encounter would find no cached entry and infinite-loop instead of marking -`starts_cycle=True`. The sequence: extract the sub-model, insert it into the cache, then -recurse into its fields. Shared references (the same sub-model used in multiple fields) -reuse the cached `ModelSpec` without marking cycles. +`starts_cycle=True`. The sequence: create the partial `ModelSpec`, insert it into the +cache, then populate its fields. Shared references (the same sub-model used in multiple +fields) reuse the cached `ModelSpec` without marking cycles. -Union-kind fields skip inline expansion -- they appear as a single row in the output, -linking to their members, rather than expanding inline. +`UnionRef` fields resolve via the `union_resolver` callback -- they appear as a single +row in the output, linking to their members, rather than expanding inline. ## 7. Other extractors @@ -326,17 +311,18 @@ per-member check, so members that inherit the class docstring verbatim get ### NewType extraction `extract_newtype` calls `analyze_type` on the NewType callable and extracts the custom -docstring. When the NewType has no explicit docstring, it falls back to -`TypeInfo.description` -- the first `Field.description` found in the `Annotated` +docstring. When the NewType has no explicit docstring, it falls back to the description +returned by `analyze_type` -- the first `Field.description` found in the `Annotated` metadata chain. ### Union extraction The most involved extractor. Walk through `Segment` concretely. -`extract_union("Segment", annotation)` calls `analyze_type` on the -`Annotated[Union[RoadSegment, RailSegment, WaterSegment], ...]` alias. The analyzer -returns `kind=UNION` with the three member types. +`extract_union("Segment", annotation)` calls `_union_members`, which runs `analyze_type` +with a capturing `union_resolver` that raises out of the analysis as soon as it sees a +multi-arm union of `BaseModel` subclasses. The captured tuple gives the three member +types plus any description from enclosing `Annotated` layers. Next, `_find_common_base` intersects each member's filtered MRO (BaseModel subclasses only, excluding `BaseModel` itself). All three share `TransportationSegment` in their @@ -348,13 +334,17 @@ The extractor calls `extract_model(TransportationSegment)` to get the shared fie Fields like `id`, `geometry`, `version`, `sources`, and `subtype` appear in the common base. These become shared `AnnotatedField` entries with `variant_sources=None`. -Then it extracts each member: `RoadSegment`, `RailSegment`, `WaterSegment`. Fields not -in the shared set are variant-specific, deduplicated by `(name, type_identity)` where -`type_identity` captures `base_type`, `kind`, `is_optional`, and `list_depth`. If -`RoadSegment` and `WaterSegment` both define a `width` field with the same type -identity, the `AnnotatedField` accumulates both class names: -`variant_sources=("RoadSegment", "WaterSegment")`. Fields unique to one member get a -single-element tuple. +Then it extracts each member: `RoadSegment`, `RailSegment`, `WaterSegment`. Each result +is retained on the `UnionSpec` as a `MemberSpec(member_cls, spec)` so consumers don't +re-extract. Fields not in the shared set are variant-specific, deduplicated by +`(name, structural_fingerprint)` where the fingerprint walks the field's `FieldShape` +tree, capturing every wrapper layer plus the terminal type. If `RoadSegment` and +`WaterSegment` both define a `width` field with the same fingerprint, the +`AnnotatedField` accumulates both classes: `variant_sources=(RoadSegment, +WaterSegment)`. Fields unique to one member get a single-element tuple. When two members +declare the same field name with the same structural fingerprint but diverging +constraints, the extractor raises rather than silently dropping one member's +constraints. `extract_discriminator` inspects the `Annotated` metadata for a `FieldInfo` with a discriminator attribute. For Segment, it finds `subtype` and builds the mapping: @@ -435,24 +425,23 @@ discover every referenced type that needs its own output page: enums, semantic N and sub-models. The walk maintains a visited set for models and a feature name set for skip detection. -Types that are themselves top-level features get skipped. For UNION-kind fields, the -function extracts and walks each member's fields. For semantic NewTypes, it walks the -`__supertype__` chain to collect intermediate NewTypes -- `Id` wraps -`NoWhitespaceString` wraps `str`, and both `Id` and `NoWhitespaceString` get their own -pages. The `walk_type_info` visitor handles dict key/value recursion. +Types that are themselves top-level features get skipped. For `UnionRef` fields, the function extracts and walks each member's fields. For +semantic NewTypes, it walks the `__supertype__` chain to collect intermediate NewTypes -- +`Id` wraps `NoWhitespaceString` wraps `str`, and both `Id` and `NoWhitespaceString` get +their own pages. `walk_shape` from `field_walk.py` handles recursion into `ArrayOf`, +`MapOf`, and `NewTypeShape` wrappers. -MODEL-kind fields follow `field_spec.model` references that were populated by -`expand_model_tree`. The function raises `RuntimeError` if it encounters a MODEL-kind -field with `model=None` -- a guard against calling collection before tree expansion. +`ModelRef` fields follow their `.model` reference (populated during `extract_model` +recursion) into nested `ModelSpec` trees. A single field matches multiple conditions independently. A semantic NewType wrapping a -MODEL-kind type triggers both NewType extraction and model collection. The checks use +`ModelRef` triggers both NewType extraction and model collection. The checks use independent `if` statements, not `elif`. ## 11. Path assignment -`build_placement_registry` builds the complete mapping from type names to output file -paths. Three tiers: +`build_placement_registry` builds the complete `dict[TypeIdentity, PurePosixPath]` +mapping each type to its output file path. Four tiers: Aggregate pages come first. All numeric primitives point to `system/primitive/primitives.md`. All geometry types point to @@ -460,7 +449,8 @@ Aggregate pages come first. All numeric primitives point to reference page. Feature specs get individual pages. Output directories derive from -`output_dir_for_entry_point`. Filenames use `slug_filename`. +`output_dir_for_entry_point`. Filenames are the snake-case type name with a `.md` +extension. Supplementary specs get module-derived paths from `source_type.__module__`. When a supplementary type's output directory falls under a feature directory, @@ -472,15 +462,20 @@ cluttering feature directories. `_nest_under_types` sorts feature directories by path length (descending) before checking containment, so the most specific match wins. +`PydanticTypeSpec` entries (e.g. `HttpUrl`) bypass module mirroring and land at +`pydantic//.md`, keeping the generated Pydantic reference set +isolated from theme directories. + ## 12. Links and reverse references ### Link computation -`LinkContext` carries the current page's output path and the full type-to-path registry. -When a renderer formats a type reference, it calls `resolve_link` to compute a relative -path from the current page to the target. Types without registry entries return `None`, -telling renderers to show inline code instead of a broken link. `resolve_link_or_slug` -provides a fallback when a link is required regardless. +`LinkContext` carries the current page's output path and the full `dict[TypeIdentity, +PurePosixPath]` registry. When a renderer formats a type reference, it calls +`resolve_link` with the target's `TypeIdentity` to compute a relative path. Identities +without registry entries return `None`, telling renderers to show inline code instead +of a broken link. `resolve_link_or_slug` provides a fallback when a link is required +regardless. `relative_link` computes `../` navigation between any two paths in the output tree. It finds the common prefix of directory components, counts the levels up from the source @@ -490,8 +485,9 @@ rejects `..` components to prevent path traversal surprises. ### Reverse references `compute_reverse_references` walks all feature fields and supplementary specs to build -`dict[str, list[UsedByEntry]]`. Each entry maps a type name to the list of types that -reference it. Entries sort models before NewTypes, alphabetical within each group. +`dict[TypeIdentity, list[UsedByEntry]]`. Each entry maps a target identity to the list +of types that reference it. Entries sort models before NewTypes, alphabetical within +each group. The function tracks references with sets for deduplication, then sorts into lists at the end. It skips self-references and references to types not in the supplementary spec dict @@ -504,29 +500,28 @@ provenance rather than direct field reference. ## 13. Markdown type formatting -`markdown/type_format.py` converts `TypeInfo` into display strings for markdown output. +`markdown/type_format.py` converts a field's `FieldShape` into display strings for +markdown output. -`format_type` handles the full range of field types. Single-value Literals render as -`"value"` in backticks. Semantic NewTypes and enums/models get markdown links via -`_resolve_type_link`, which checks the `LinkContext` registry and falls back to plain +`format_type` handles the full range of field types. Single-value `LiteralScalar`s +render as `"value"` in backticks. Semantic NewTypes and enums/models get markdown links +via `_resolve_type_link`, which checks the `LinkContext` registry and falls back to plain code spans. For types with a linked identity (semantic NewTypes, enums, models), list -rendering depends on where the list layers sit relative to the NewType boundary. -`newtype_outer_list_depth > 0` means the list wraps the NewType (`list[PhoneNumber]`) and -renders as `list`. `is_list` with `newtype_name` set means the NewType -wraps a list internally (`Sources` wrapping `list[SourceItem]`) and renders with a -`(list)` qualifier. Non-NewType identities (enums, models) use `list` syntax. Linked -inner types use broken-backtick syntax (`` `list<` `` ... `` `>` ``) built as a single -wrapper to avoid adjacent backticks that CommonMark would interpret as multi-backtick -code span delimiters. Dict types render as `` `map` ``. Qualifiers (optional, list, -map) append in parentheses. - -Union members format independently -- each gets its own link resolution, joined with -pipe separators escaped for table-cell safety. +rendering depends on where the `ArrayOf` layers sit relative to the `NewTypeShape` +boundary. An `ArrayOf` sitting outside the `NewTypeShape` in the shape tree means the +list wraps the NewType (`list[PhoneNumber]`) and renders as `list`. A +`NewTypeShape` with an `ArrayOf` inner means the NewType wraps a list internally +(`Sources` wrapping `list[SourceItem]`) and renders with a `(list)` qualifier. Non-NewType +identities (enums, models) use `list` syntax. Linked inner types use broken-backtick +syntax (`` `list<` `` ... `` `>` ``) built as a single wrapper to avoid adjacent backticks +that CommonMark would interpret as multi-backtick code span delimiters. `MapOf` shapes +render as `` `map` ``. Qualifiers (optional, list, map) append in parentheses. + +`UnionRef` members format independently -- each gets its own link resolution, joined +with pipe separators escaped for table-cell safety. `format_underlying_type` handles NewType page headers. It links enums and models that -have their own pages but skips the outermost NewType name to avoid self-referencing. The -function uses `source_type.__name__` rather than `base_type` for link resolution, since -`base_type` may carry the outermost NewType name when only one NewType wraps a class. +have their own pages but skips the outermost NewType name to avoid self-referencing. ## 14. Markdown rendering @@ -631,26 +626,23 @@ pipeline. `generate_markdown_pages` in `markdown/pipeline.py` is the "main" function. It takes feature specs and a schema root, returns rendered pages without touching the filesystem. -Eight steps: - -1. **Expand model trees** with a shared cache across all features, so sub-models - referenced by multiple features extract once. +Seven steps (tree expansion now happens inside `extract_model`): -2. **Partition primitive and geometry names** from the system primitive module's +1. **Partition primitive and geometry names** from the system primitive module's `__all__` exports. -3. **Collect supplementary types** by walking expanded feature trees. +2. **Collect supplementary types** by walking feature trees. -4. **Build the placement registry** mapping every type to its output file path. +3. **Build the placement registry** mapping every type to its output file path. -5. **Compute reverse references** across all features and supplements. +4. **Compute reverse references** across all features and supplements. -6. **Render each feature** with its `LinkContext`, loaded examples, and used-by entries. +5. **Render each feature** with its `LinkContext`, loaded examples, and used-by entries. -7. **Render each supplementary type** -- dispatching to `render_enum`, `render_newtype`, - or `render_feature` (for sub-models) based on spec type. +6. **Render each supplementary type** -- dispatching to `render_enum`, `render_newtype`, + `render_feature` (for sub-models), or `render_pydantic_type` based on spec type. -8. **Render aggregate pages** for primitives and geometry. +7. **Render aggregate pages** for primitives and geometry. The return value is `list[RenderedPage]` -- frozen dataclasses carrying content, output path, and a boolean `is_feature` flag. The caller decides what to do with them. @@ -688,36 +680,34 @@ A reader who reached this point has seen every module in isolation. This section entry_point="overture.schema.transportation:Segment")`. **Classification.** The CLI tests each entry. `is_model_class(Segment)` returns false -- -`Segment` is not a class. `is_union_alias(Segment)` calls `analyze_type`, which peels -the `Annotated` wrapper and finds three `BaseModel` subclasses in the union. The -analyzer returns `kind=UNION`. The CLI routes Segment to `extract_union`. - -**Extraction.** `extract_union("Segment", annotation)` calls `analyze_type` again (cheap --- the same two-iteration path), gets the three member types, and finds -`TransportationSegment` as the common base via `_find_common_base`. It extracts the -common base's fields as shared, then extracts each member's fields and partitions the -non-shared ones into `AnnotatedField` entries with variant provenance. +`Segment` is not a class. `is_union_alias(Segment)` calls `analyze_type` with a sentinel +`union_resolver` that raises on detection. The CLI routes Segment to `extract_union`. + +**Extraction.** `extract_union("Segment", annotation)` calls `_union_members`, which +runs `analyze_type` with a capturing `union_resolver` to grab the three member types +plus the union description. `_find_common_base` picks `TransportationSegment` as the +shared parent. The extractor calls `extract_model` on the common base and on each +member -- the results are cached on the `UnionSpec` as `member_specs` -- and partitions +the non-shared fields into `AnnotatedField` entries with variant provenance. `extract_discriminator` finds `subtype` and builds `{"road": RoadSegment, "rail": -RailSegment, "water": WaterSegment}`. The result is a `UnionSpec` satisfying -`FeatureSpec`. +RailSegment, "water": WaterSegment}`. The result is a `UnionSpec` (a `FeatureSpec`). Meanwhile, concrete models like `Building` go through `extract_model`, which calls `analyze_type` on each field annotation. A field typed `FeatureVersion` unwraps through -two NewType layers and an `Annotated` layer, producing a `TypeInfo` with -`base_type="int32"`, `newtype_name="FeatureVersion"`, and constraint provenance linking -`ge=0` back to the `int32` NewType. Both extraction paths produce specs satisfying -`FeatureSpec`. +two NewType layers and an `Annotated` layer, producing a `NewTypeShape(name="FeatureVersion", +inner=Primitive(base_type="int32", constraints=(...)))` shape with constraint provenance +linking `ge=0` back to the `int32` NewType. Both extraction paths produce `FeatureSpec` +values. **Pipeline entry.** The feature specs enter `generate_markdown_pages`. -`expand_model_tree` walks MODEL-kind fields on Segment's `UnionSpec` and populates -`FieldSpec.model` references. The shared cache ensures sub-models referenced by multiple -features (like `Sources`) extract once. Union-kind fields skip inline expansion. +Sub-model `FieldShape` trees are fully resolved -- `ModelRef` nodes already carry their +`ModelSpec` from recursive `extract_model` calls. No separate expansion pass is needed. **Layout.** `partition_numeric_and_geometry_types` reads the system module's exports. -`collect_all_supplementary_types` walks Segment's expanded fields and discovers -referenced enums (like `Subtype`), semantic NewTypes (like `Id`, `Sources`), and -sub-models. The walk follows `FieldSpec.model` references down the tree, and for -UNION-kind fields, extracts and walks each member's fields separately. +`collect_all_supplementary_types` walks Segment's field shapes and discovers referenced +enums (like `Subtype`), semantic NewTypes (like `Id`, `Sources`), and sub-models. The +walk follows `ModelRef.model` references down the tree, and for `UnionRef` shapes, +extracts and walks each member's fields separately. `build_placement_registry` assigns Segment's output path from its entry point: `entry_point_module` extracts `overture.schema.transportation`, `compute_output_dir` @@ -732,10 +722,10 @@ populate "Used By" sections: the `Subtype` enum page shows that Segment uses it. full registry. `render_feature` dispatches to `_expand_union_fields` because the spec is a `UnionSpec`. Shared fields from `TransportationSegment` render as plain rows. Variant-specific fields get italic tags: `` `road_class` *(Road)* ``. The renderer -formats each field's type via `format_type`, which resolves links through the +formats each field's `FieldShape` via `format_type`, which resolves links through the `LinkContext` -- `Subtype` gets a relative link to its enum page, `Id` links to its -NewType page. Constraints with `source=None` annotate field rows; constraints with named -sources appear on the source NewType's page instead. +NewType page. Constraints with `source_name=None` annotate field rows; constraints with +named sources appear on the source NewType's page instead. The example loader finds `pyproject.toml` in the transportation theme package, reads `[examples.Segment]`, validates each example against the union alias (injecting literal diff --git a/packages/overture-schema-codegen/pyproject.toml b/packages/overture-schema-codegen/pyproject.toml index 3019a6a92..044b592ce 100644 --- a/packages/overture-schema-codegen/pyproject.toml +++ b/packages/overture-schema-codegen/pyproject.toml @@ -20,9 +20,25 @@ name = "overture-schema-codegen" overture-codegen = "overture.schema.codegen.cli:main" [tool.uv.sources] +overture-schema-addresses-theme = { workspace = true } +overture-schema-base-theme = { workspace = true } +overture-schema-buildings-theme = { workspace = true } overture-schema-cli = { workspace = true } overture-schema-common = { workspace = true } +overture-schema-divisions-theme = { workspace = true } +overture-schema-places-theme = { workspace = true } overture-schema-system = { workspace = true } +overture-schema-transportation-theme = { workspace = true } + +[dependency-groups] +test = [ + "overture-schema-addresses-theme", + "overture-schema-base-theme", + "overture-schema-buildings-theme", + "overture-schema-divisions-theme", + "overture-schema-places-theme", + "overture-schema-transportation-theme", +] [tool.hatch.version] path = "src/overture/schema/codegen/__about__.py" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py index 279f22a84..fa2610a04 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py @@ -17,6 +17,7 @@ FeatureSpec, is_model_class, is_union_alias, + partitions_from_tags, ) from .extraction.union_extraction import extract_union from .layout.module_layout import ( @@ -26,12 +27,13 @@ entry_point_module, ) from .markdown.pipeline import generate_markdown_pages +from .pyspark.pipeline import generate_pyspark_modules log = logging.getLogger(__name__) __all__ = ["cli"] -_OUTPUT_FORMATS = ("markdown",) +_OUTPUT_FORMATS = ("markdown", "pyspark") _FEATURE_FRONTMATTER = "---\nsidebar_position: 1\n---\n\n" @@ -84,7 +86,15 @@ def list_models() -> None: "--output-dir", type=click.Path(path_type=Path), default=None, - help="Write output to directory (default: stdout)", + help="Write output files directly into this directory (default: stdout). " + "For pyspark, writes expression modules (*.py) and a _registry.py. " + "For markdown, writes theme subdirectories.", +) +@click.option( + "--test-output-dir", + type=click.Path(path_type=Path), + default=None, + help="Write test modules (test_*.py) into this directory (pyspark only).", ) def generate( output_format: str, @@ -92,13 +102,13 @@ def generate( filters: tuple[str, ...], excludes: tuple[str, ...], output_dir: Path | None, + test_output_dir: Path | None, ) -> None: """Generate code/docs from discovered models.""" - all_models = discover_models() + if output_format != "pyspark" and test_output_dir is not None: + raise click.UsageError("--test-output-dir is only valid with --format pyspark") - # Schema root from ALL entry points (before tag filters). - module_paths = [entry_point_module(k.entry_point) for k in all_models] - schema_root = compute_schema_root(module_paths) + all_models = discover_models() models = filter_models(all_models, build_selector(tags, filters, excludes)) @@ -107,18 +117,27 @@ def generate( feature_specs: list[FeatureSpec] = [] for key, entry in models.items(): + partitions = partitions_from_tags(key.tags) if is_model_class(entry): - feature_specs.append(extract_model(entry, entry_point=key.entry_point)) + feature_specs.append( + extract_model(entry, entry_point=key.entry_point, partitions=partitions) + ) elif is_union_alias(entry): feature_specs.append( extract_union( entry_point_class(key.entry_point), entry, entry_point=key.entry_point, + partitions=partitions, ) ) - _generate_markdown(feature_specs, schema_root, output_dir) + if output_format == "pyspark": + _generate_pyspark(feature_specs, output_dir, test_output_dir) + else: + module_paths = [entry_point_module(k.entry_point) for k in all_models] + schema_root = compute_schema_root(module_paths) + _generate_markdown(feature_specs, schema_root, output_dir) def _generate_markdown( @@ -141,6 +160,24 @@ def _generate_markdown( _write_category_files(output_dir, all_paths, feature_paths) +def _generate_pyspark( + feature_specs: list[FeatureSpec], + output_dir: Path | None, + test_output_dir: Path | None = None, +) -> None: + """Generate PySpark validation modules. + + Output is syntactically valid Python; we assume a code formatter runs + over the written directories afterwards to match existing conventions. + """ + modules = generate_pyspark_modules(feature_specs) + for mod in modules.source: + _write_output(mod.content, output_dir, mod.path) + if test_output_dir is not None: + for mod in modules.test: + _write_output(mod.content, test_output_dir, mod.path) + + def _ancestor_dirs(paths: set[PurePosixPath]) -> set[PurePosixPath]: """Collect all ancestor directories for a set of file paths.""" dirs: set[PurePosixPath] = set() diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py deleted file mode 100644 index 9d06341fb..000000000 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py +++ /dev/null @@ -1,41 +0,0 @@ -"""PascalCase to snake_case conversion for code generation.""" - -import re - -__all__ = ["slug_filename", "to_snake_case"] - -# Insert _ between an acronym run and a capitalized word start (HTML|Parser) -_ACRONYM_BOUNDARY = re.compile(r"([A-Z]+)([A-Z][a-z])") -# Insert _ between a lowercase/digit and an uppercase letter (building|Part) -_CAMEL_BOUNDARY = re.compile(r"([a-z0-9])([A-Z])") - - -def to_snake_case(name: str) -> str: - """Convert PascalCase to snake_case. - - Handles acronym runs correctly: "HTMLParser" becomes "html_parser", - not "h_t_m_l_parser". - - >>> to_snake_case("HTMLParser") - 'html_parser' - >>> to_snake_case("BuildingPart") - 'building_part' - >>> to_snake_case("simple") - 'simple' - """ - name = _ACRONYM_BOUNDARY.sub(r"\1_\2", name) - name = _CAMEL_BOUNDARY.sub(r"\1_\2", name) - return name.lower() - - -def slug_filename(name: str, ext: str = ".md") -> str: - """Convert a PascalCase type name to a snake_case filename. - - >>> slug_filename("HexColor") - 'hex_color.md' - >>> slug_filename("BuildingPart") - 'building_part.md' - >>> slug_filename("BuildingPart", ext=".json") - 'building_part.json' - """ - return f"{to_snake_case(name)}{ext}" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py new file mode 100644 index 000000000..1be5d6d7b --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py @@ -0,0 +1,172 @@ +"""Tree-shaped IR for model field types. + +`FieldShape` is a discriminated union -- `Primitive`, `LiteralScalar`, +`AnyScalar`, `ModelRef`, `UnionRef`, `ArrayOf`, `MapOf`, `NewTypeShape` +-- nested to describe arbitrary list / dict / NewType wrapping. Each +variant carries its own constraints (where meaningful), and walkers +encounter each constraint at the layer it targets. + +The three terminal scalar variants (`Primitive`, `LiteralScalar`, +`AnyScalar`) are grouped under the `Scalar` type alias for consumers +that only need to ask "is this a leaf?". + +`NewTypeShape` wraps an inner shape, so its position relative to +`ArrayOf` is structural: `NewTypeShape(inner=ArrayOf(...))` is a +NewType over `list[X]`, while `ArrayOf(element=NewTypeShape(...))` +is a list of NewType-wrapped values. Consumers pattern-match on +shape to distinguish the two. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, TypeAlias + +if TYPE_CHECKING: + from .specs import ModelSpec, UnionSpec + +__all__ = [ + "AnyScalar", + "ArrayOf", + "ConstraintSource", + "FieldShape", + "LiteralScalar", + "MapOf", + "ModelRef", + "NewTypeShape", + "Primitive", + "Scalar", + "UnionRef", +] + + +@dataclass(frozen=True, slots=True) +class ConstraintSource: + """A constraint paired with the NewType that contributed it. + + `source_ref` and `source_name` identify the NewType that declared + the constraint; both are `None` for constraints contributed directly + on a field annotation rather than through a NewType. `constraint` + is the raw metadata object from `Annotated[..., constraint]`. + """ + + source_ref: object | None + source_name: str | None + constraint: object + + +@dataclass(frozen=True, slots=True) +class Primitive: + """Terminal type with a registry lookup key. + + Covers primitives (`int32`, `str`), enums, Pydantic built-ins + (`HttpUrl`, `EmailStr`), and `BaseModel` subclasses that weren't + resolved to a `ModelRef` (e.g. when no `model_resolver` was + supplied). + """ + + base_type: str + source_type: type | None = None + constraints: tuple[ConstraintSource, ...] = () + + +@dataclass(frozen=True, slots=True) +class LiteralScalar: + """`Literal[X, ...]` terminal.""" + + values: tuple[object, ...] + constraints: tuple[ConstraintSource, ...] = () + + +@dataclass(frozen=True, slots=True) +class AnyScalar: + """`typing.Any` terminal.""" + + constraints: tuple[ConstraintSource, ...] = () + + +Scalar: TypeAlias = Primitive | LiteralScalar | AnyScalar +"""Terminal shape: a value that doesn't wrap another shape. + +Consumers that just need "is this a leaf?" check `isinstance(x, Scalar)`; +consumers that need terminal-specific data narrow to a variant. +""" + + +@dataclass(frozen=True, slots=True) +class ModelRef: + """Reference to a Pydantic sub-model. + + `starts_cycle` marks the back-edge of a cycle in the model graph; + consumers that recurse into models must stop at cycle starts. + """ + + model: ModelSpec + starts_cycle: bool = False + + +@dataclass(frozen=True, slots=True) +class UnionRef: + """Reference to a discriminated union of models.""" + + union: UnionSpec + + +@dataclass(frozen=True, slots=True) +class ArrayOf: + """Sequence of values sharing a single element shape. + + Nested arrays are nested `ArrayOf` instances; there is no numeric + depth field. `constraints` carries array-level validation rules + (length, uniqueness). Per-element constraints live on `element` + and its descendants. + """ + + element: FieldShape + constraints: tuple[ConstraintSource, ...] = () + + +@dataclass(frozen=True, slots=True) +class MapOf: + """Mapping from a key shape to a value shape. + + `constraints` carries map-level validation rules. Per-key and + per-value constraints live on `key` / `value` respectively. + """ + + key: FieldShape + value: FieldShape + constraints: tuple[ConstraintSource, ...] = () + + +@dataclass(frozen=True, slots=True) +class NewTypeShape: + """A NewType wrapper around an inner shape. + + Position relative to other wrappers is meaningful: + `NewTypeShape(inner=ArrayOf(...))` is a NewType over `list[X]`; + `ArrayOf(element=NewTypeShape(...))` is a list of NewType-wrapped + values. Consumers distinguish the two by pattern, not a numeric + offset. + + Constraints contributed by the NewType chain attach to the + `Scalar` / `ArrayOf` / `MapOf` layer they target, not to the + wrapper itself. `name` and `ref` identify the NewType for linking + without owning constraint state. + """ + + name: str + ref: object + inner: FieldShape + + +FieldShape: TypeAlias = ( + Primitive + | LiteralScalar + | AnyScalar + | ModelRef + | UnionRef + | ArrayOf + | MapOf + | NewTypeShape +) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py index 0db927065..141af58d2 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py @@ -1,37 +1,31 @@ """Convert field-level constraints to display text. Handles constraints from Annotated metadata and NewType wrappers: -Ge, Gt, Interval, Le, Lt, MaxLen, MinLen, GeometryTypeConstraint, -Reference, and custom constraint classes. +Ge, Gt, Interval, Le, Lt, ArrayMinLen, ArrayMaxLen, ScalarMinLen, +ScalarMaxLen, GeometryTypeConstraint, Reference, and custom constraint +classes. """ from __future__ import annotations from collections.abc import Callable -from annotated_types import Ge, Gt, Interval, Le, Lt, MaxLen, MinLen +from annotated_types import Ge, Gt, Interval, Le, Lt from overture.schema.system.primitive import GeometryTypeConstraint from overture.schema.system.ref import Reference from .docstring import first_docstring_line +from .length_constraints import ArrayMaxLen, ArrayMinLen, ScalarMaxLen, ScalarMinLen from .specs import TypeIdentity from .type_analyzer import ConstraintSource __all__ = [ "constraint_display_text", - "constraint_pattern", "describe_field_constraint", ] -# Bound attribute names paired with display operators. Each entry maps an -# annotated_types constraint attribute (Ge, Gt, Le, Lt, Interval) to its -# mathematical symbol for prose rendering. -# -# numeric_extraction.py has its own _BOUND_ATTRS for numeric extraction. The -# duplication is deliberate: these modules use the same attribute names for -# unrelated purposes (display formatting vs. numeric bound extraction), and -# coupling them for four string literals adds a dependency without value. +# Bound attribute -> mathematical symbol for prose rendering. _BOUND_OPS: tuple[tuple[str, str], ...] = ( ("ge", "≥"), ("gt", ">"), @@ -108,9 +102,9 @@ def describe_field_constraint( result = _first_bound(constraint) if result is not None: return result - if isinstance(constraint, MinLen): + if isinstance(constraint, (ArrayMinLen, ScalarMinLen)): return f"Minimum length: {constraint.min_length}" - if isinstance(constraint, MaxLen): + if isinstance(constraint, (ArrayMaxLen, ScalarMaxLen)): return f"Maximum length: {constraint.max_length}" if _is_opaque_constraint(constraint): @@ -130,7 +124,7 @@ def _constraint_class_description(constraint: object) -> str | None: return line or None -def constraint_pattern(constraint: object) -> str | None: +def _constraint_pattern(constraint: object) -> str | None: """Extract the regex pattern string from a constraint, if present. Traverses two levels: constraint.pattern is a compiled re.Pattern @@ -148,7 +142,7 @@ def constraint_display_text( description = _constraint_class_description(cs.constraint) if _is_opaque_constraint(cs.constraint) and description: cls_name = type(cs.constraint).__name__ - pattern = constraint_pattern(cs.constraint) + pattern = _constraint_pattern(cs.constraint) if pattern: return f"{description} (`{cls_name}`, pattern: `{pattern}`)" return f"{description} (`{cls_name}`)" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py new file mode 100644 index 000000000..86d385d60 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py @@ -0,0 +1,215 @@ +"""Generic traversal helpers over `FieldShape` trees. + +`shape_children` (one-level child enumeration) and `walk_shape` +(pre-order DFS) cover open-ended traversals; `terminal_of`, +`terminal_scalar`, `list_depth`, `newtype_name`, and `all_constraints` +cover the most common derived views. `ModelRef` and `UnionRef` are +leaves -- the walker does not cross model or union boundaries +automatically; that's a per-consumer decision. +""" + +from __future__ import annotations + +from collections.abc import Callable, Iterator + +from typing_extensions import assert_never + +from .field import ( + AnyScalar, + ArrayOf, + ConstraintSource, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + Scalar, + UnionRef, +) + +__all__ = [ + "all_constraints", + "has_array_layer", + "list_depth", + "newtype_name", + "shape_children", + "terminal_model_ref", + "terminal_of", + "terminal_primitive", + "terminal_scalar", + "walk_shape", +] + + +def terminal_of(shape: FieldShape) -> FieldShape: + """Unwrap `ArrayOf` and `NewTypeShape` layers to find the terminal shape. + + Returns the innermost shape that isn't a sequence or NewType wrapper. + `Scalar`, `ModelRef`, `UnionRef`, and `MapOf` count as terminals. + """ + while True: + match shape: + case ArrayOf(element=inner) | NewTypeShape(inner=inner): + shape = inner + case ( + Primitive() + | LiteralScalar() + | AnyScalar() + | ModelRef() + | UnionRef() + | MapOf() + ): + return shape + case _: + assert_never(shape) + + +def terminal_scalar(shape: FieldShape) -> Scalar | None: + """Return the terminal `Scalar`, or `None` for non-scalar terminals.""" + terminal = terminal_of(shape) + return terminal if isinstance(terminal, Scalar) else None + + +def terminal_primitive(shape: FieldShape) -> Primitive | None: + """Return the terminal `Primitive`, or `None` for non-primitive terminals. + + Like `terminal_scalar`, but returns `None` for `LiteralScalar` and + `AnyScalar` — use this when the caller needs `base_type` or + `source_type`, which only exist on `Primitive`. + """ + terminal = terminal_of(shape) + return terminal if isinstance(terminal, Primitive) else None + + +def terminal_model_ref(shape: FieldShape) -> ModelRef | None: + """Return the terminal `ModelRef`, or `None` for non-model terminals.""" + terminal = terminal_of(shape) + return terminal if isinstance(terminal, ModelRef) else None + + +def shape_children(shape: FieldShape) -> Iterator[FieldShape]: + """Yield direct child shapes within *shape* (one level deep). + + `Scalar`, `ModelRef`, and `UnionRef` have no children. + """ + match shape: + case ArrayOf(element=element): + yield element + case MapOf(key=key, value=value): + yield key + yield value + case NewTypeShape(inner=inner): + yield inner + case Primitive() | LiteralScalar() | AnyScalar() | ModelRef() | UnionRef(): + return + case _: + assert_never(shape) + + +def walk_shape(shape: FieldShape, visit: Callable[[FieldShape], None]) -> None: + """Pre-order traversal of a `FieldShape` tree. + + Visits *shape*, then descends into each direct child via + `shape_children`. Stops at `ModelRef` / `UnionRef` -- recursion + across model boundaries is the caller's choice. + """ + visit(shape) + for child in shape_children(shape): + walk_shape(child, visit) + + +def list_depth(shape: FieldShape) -> int: + """Total number of `ArrayOf` layers in *shape*, looking through `NewTypeShape`. + + A NewType wrapping a list counts the same as a list wrapping a + NewType. + """ + depth = 0 + cur = shape + while True: + match cur: + case ArrayOf(element=element): + depth += 1 + cur = element + case NewTypeShape(inner=inner): + cur = inner + case ( + Primitive() + | LiteralScalar() + | AnyScalar() + | ModelRef() + | UnionRef() + | MapOf() + ): + return depth + case _: + assert_never(cur) + + +def has_array_layer(shape: FieldShape) -> bool: + """Whether *shape* has any `ArrayOf` layer, looking through `NewTypeShape`. + + Prefer this over `list_depth(shape) > 0` -- callers that only need + "is this array-shaped" don't need to count layers. + """ + cur = shape + while isinstance(cur, NewTypeShape): + cur = cur.inner + return isinstance(cur, ArrayOf) + + +def newtype_name(shape: FieldShape) -> str | None: + """Return the outermost `NewTypeShape` name, looking through `ArrayOf` layers.""" + cur: FieldShape = shape + while isinstance(cur, ArrayOf): + cur = cur.element + match cur: + case NewTypeShape(name=name): + return name + case ( + Primitive() + | LiteralScalar() + | AnyScalar() + | ModelRef() + | UnionRef() + | MapOf() + ): + return None + case _: + assert_never(cur) + + +def all_constraints(shape: FieldShape) -> tuple[ConstraintSource, ...]: + """Concatenate the field's own constraints from every layer of *shape*. + + Walks `NewTypeShape` and `ArrayOf` wrappers to gather constraints + that apply to this field. Stops at `MapOf` (key/value constraints + belong to nested key/value shapes, not to the enclosing field) and + at `ModelRef` / `UnionRef` (which carry no constraints). Constraints + from outer `ArrayOf` layers appear before constraints from inner + layers, matching the structural order of the shape tree. + """ + collected: list[ConstraintSource] = [] + cur = shape + while True: + match cur: + case ArrayOf(element=inner, constraints=cs): + collected.extend(cs) + cur = inner + case NewTypeShape(inner=inner): + cur = inner + case ( + Primitive(constraints=cs) + | LiteralScalar(constraints=cs) + | AnyScalar(constraints=cs) + ): + collected.extend(cs) + return tuple(collected) + case MapOf(constraints=cs): + collected.extend(cs) + return tuple(collected) + case ModelRef() | UnionRef(): + return tuple(collected) + case _: + assert_never(cur) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py new file mode 100644 index 000000000..36e3cfed6 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py @@ -0,0 +1,47 @@ +"""Internal typed length-constraint classes. + +`annotated_types.MaxLen` and `annotated_types.MinLen` are polysemous: +`MaxLen(10)` on a `str` constrains character count, while `MaxLen(10)` +on a `list[X]` constrains cardinality. The codegen extractor splits +them by attachment layer so each variant carries its own dispatch: +`ArrayMinLen` / `ArrayMaxLen` for `ArrayOf` layers, `ScalarMinLen` / +`ScalarMaxLen` for scalar layers. + +These are codegen-internal classes -- Pydantic users continue to write +`Annotated[X, MinLen(n)]` in their schemas; the wrapping happens inside +`type_analyzer.attach_constraints` when the constraint reaches its +target layer. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from annotated_types import MaxLen, MinLen + +__all__ = [ + "ArrayMaxLen", + "ArrayMinLen", + "ScalarMaxLen", + "ScalarMinLen", +] + + +@dataclass(frozen=True) +class ArrayMinLen(MinLen): + """Cardinality lower bound for an `ArrayOf` layer.""" + + +@dataclass(frozen=True) +class ArrayMaxLen(MaxLen): + """Cardinality upper bound for an `ArrayOf` layer.""" + + +@dataclass(frozen=True) +class ScalarMinLen(MinLen): + """Character-count lower bound for a scalar layer.""" + + +@dataclass(frozen=True) +class ScalarMaxLen(MaxLen): + """Character-count upper bound for a scalar layer.""" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py index 76807e123..d3ef371e9 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py @@ -1,8 +1,8 @@ -"""Model extraction and tree expansion.""" +"""Pydantic model extraction into `ModelSpec`.""" from __future__ import annotations -import dataclasses +from collections.abc import Mapping from pydantic import BaseModel from pydantic.fields import FieldInfo @@ -11,11 +11,22 @@ from overture.schema.system.model_constraint import ModelConstraint from .docstring import clean_docstring -from .specs import FeatureSpec, FieldSpec, ModelSpec, is_model_class -from .type_analyzer import ConstraintSource, TypeInfo, TypeKind, analyze_type +from .field import ( + ConstraintSource, + FieldShape, + ModelRef, + UnionRef, +) +from .specs import FieldSpec, ModelSpec, is_model_class +from .type_analyzer import ( + ModelResolver, + UnionResolver, + analyze_type, + attach_constraints, + unwrap_list, +) __all__ = [ - "expand_model_tree", "extract_model", "resolve_field_alias", ] @@ -37,28 +48,30 @@ def resolve_field_alias(field_name: str, field_info: FieldInfo) -> str: return field_name -def _merge_field_metadata(type_info: TypeInfo, field_info: FieldInfo) -> TypeInfo: - """Merge constraints from field_info.metadata into TypeInfo. - - Pydantic strips the Annotated wrapper from some fields (non-optional, - non-union) and moves the metadata to field_info.metadata. When this - happens, analyze_type sees a bare type and misses the constraints. - The two sets never overlap: field_info.metadata is empty when the - Annotated wrapper survives in the annotation. - """ - if not field_info.metadata: - return type_info - extra = tuple(ConstraintSource(None, None, m) for m in field_info.metadata) - return dataclasses.replace(type_info, constraints=type_info.constraints + extra) - - -def _is_field_required(field_info: FieldInfo, type_info: TypeInfo) -> bool: +def _is_field_required(field_info: FieldInfo, is_optional: bool) -> bool: """Determine whether a field is required (no default and not Optional).""" has_default = ( field_info.default is not PydanticUndefined or field_info.default_factory is not None ) - return not has_default and not type_info.is_optional + return not has_default and not is_optional + + +def _attach_field_metadata(shape: FieldShape, field_info: FieldInfo) -> FieldShape: + """Merge constraints from `field_info.metadata` onto *shape*. + + Pydantic strips the outermost Annotated wrapper from some fields + (non-optional, non-union) and moves its metadata to + `field_info.metadata`. When that happens `analyze_type` sees a bare + type and misses those constraints. They anchor at the topmost + constraint-bearing layer, so we route them through + `attach_constraints` so that length-constraint wrapping applies here + just as it does during normal annotation unwrapping. + """ + if not field_info.metadata: + return shape + extra = tuple(ConstraintSource(None, None, m) for m in field_info.metadata) + return attach_constraints(shape, extra) def _basemodel_bases(cls: type) -> list[type[BaseModel]]: @@ -88,13 +101,13 @@ def _class_order(model_class: type[BaseModel]) -> list[type]: def _field_order(model_class: type[BaseModel]) -> list[str]: - """Return model_fields keys in documentation order. + """Return `model_fields` keys in documentation order. Walks the class hierarchy recursively. At each level of multiple - inheritance, the first base is the "primary chain" and the rest - are "mixins." Primary chain and own fields come first, then mixin - fields in declaration order. Single-inheritance levels use - Pydantic's default reversed-MRO order. + inheritance, the first base is the primary chain and the rest are + mixins. Primary chain and own fields come first, then mixin fields + in declaration order. Single-inheritance levels use Pydantic's + default reversed-MRO order. """ valid_names = set(model_class.model_fields.keys()) result: list[str] = [] @@ -111,94 +124,124 @@ def extract_model( model_class: type[BaseModel], *, entry_point: str | None = None, + partitions: Mapping[str, str] | None = None, ) -> ModelSpec: - """Extract model specification from a Pydantic model class.""" - field_info_map = model_class.model_fields - ordered_keys = _field_order(model_class) - - fields: list[FieldSpec] = [] - for field_name in ordered_keys: - field_info = field_info_map[field_name] - output_name = resolve_field_alias(field_name, field_info) - - # Use field_info.annotation (resolved TypeVars) not get_type_hints - annotation = field_info.annotation - if annotation is None: - continue + """Extract a fully-resolved `ModelSpec` from a Pydantic model class. + + Recurses into sub-models and unions, producing `ModelRef` / + `UnionRef` terminals with their specs resolved. Cycles in the + model graph (a field whose source type is an ancestor on the + current extraction stack) produce a `ModelRef` pointing at the + in-progress ancestor spec with `starts_cycle=True` so consumers + stop recursion at the back-edge. + """ + return _extract_model_recursive( + model_class, + entry_point=entry_point, + partitions=partitions or {}, + cache={}, + ancestors=frozenset(), + ) - type_info = _merge_field_metadata(analyze_type(annotation), field_info) - fields.append( - FieldSpec( - name=output_name, - type_info=type_info, - description=field_info.description or type_info.description, - is_required=_is_field_required(field_info, type_info), - ) - ) +def _extract_model_recursive( + model_class: type[BaseModel], + *, + entry_point: str | None, + partitions: Mapping[str, str], + cache: dict[type, ModelSpec], + ancestors: frozenset[type], +) -> ModelSpec: + """Inner recursive helper for `extract_model`. - return ModelSpec( + Inserts the (partial) `ModelSpec` into `cache` before populating + its fields so cycles can find it. `ancestors` is the set of types + currently on the recursion stack -- a sub-field whose source type + appears there is a back-edge and gets `starts_cycle=True`. + """ + spec = ModelSpec( name=model_class.__name__, description=clean_docstring(model_class.__doc__), - fields=fields, + fields=[], source_type=model_class, entry_point=entry_point, + partitions=partitions, constraints=ModelConstraint.get_model_constraints(model_class), ) + cache[model_class] = spec + descendant_ancestors = ancestors | {model_class} + model_resolver, union_resolver = _make_resolvers(cache, descendant_ancestors) -def expand_model_tree( - spec: FeatureSpec, - cache: dict[type, ModelSpec] | None = None, -) -> FeatureSpec: - """Populate model references on MODEL-kind fields, recursively. - - Walks *spec*'s fields and sets `field.model` for fields whose type - is a Pydantic model. Uses *cache* to reuse already-extracted ModelSpecs - and detect shared references. Marks fields whose model creates a cycle - in the ancestor chain with `starts_cycle=True`. + fields: list[FieldSpec] = [] + for field_name in _field_order(model_class): + field_info = model_class.model_fields[field_name] + annotation = field_info.annotation + if annotation is None: + continue + shape, is_optional, ti_description = analyze_type( + annotation, + model_resolver=model_resolver, + union_resolver=union_resolver, + ) + shape = _attach_field_metadata(shape, field_info) + fields.append( + FieldSpec( + name=resolve_field_alias(field_name, field_info), + shape=shape, + description=field_info.description or ti_description, + is_required=_is_field_required(field_info, is_optional), + is_optional=is_optional, + ) + ) - Mutates *spec* in place and returns it. - """ - if cache is None: - cache = {} - if isinstance(spec, ModelSpec) and spec.source_type is not None: - cache[spec.source_type] = spec - ancestors = frozenset({spec.source_type}) if spec.source_type else frozenset() - _expand_fields(spec.fields, cache, ancestors) + spec.fields = fields return spec -def _expand_fields( - fields: list[FieldSpec], +def _make_resolvers( cache: dict[type, ModelSpec], ancestors: frozenset[type], -) -> None: - """Recursive helper for expand_model_tree. - - Cache insertion happens before recursion — cycle detection depends - on the ancestor's ModelSpec being in the cache when the back-edge - is encountered. +) -> tuple[ModelResolver, UnionResolver]: + """Build the resolvers that recursively extract sub-models / sub-unions. + + `cache` shares already-extracted sub-specs across a single + extraction so sub-models referenced more than once share a + `ModelSpec`. `ancestors` carries the recursion stack for cycle + detection -- a back-edge produces a `ModelRef` pointing at the + in-progress ancestor spec with `starts_cycle=True`. """ - for field_spec in fields: - ti = field_spec.type_info - source = ti.source_type - if ti.kind == TypeKind.UNION: - # Union fields have no single model to recurse into. - # The field row appears in the output; skip inline expansion. - continue - if ti.kind != TypeKind.MODEL or source is None: - continue - if source in ancestors: - # Cycle: reuse existing spec, mark the edge - field_spec.model = cache.get(source) - field_spec.starts_cycle = True - elif source in cache: - # Shared reference: reuse, not a cycle - field_spec.model = cache[source] - else: - sub_spec = extract_model(source) - cache[source] = sub_spec # insert BEFORE recursing - field_spec.model = sub_spec - _expand_fields(sub_spec.fields, cache, ancestors | {source}) + def resolve_model(cls: type[BaseModel]) -> ModelRef: + if cls in ancestors: + return ModelRef(model=cache[cls], starts_cycle=True) + cached = cache.get(cls) + if cached is not None: + return ModelRef(model=cached) + sub_spec = _extract_model_recursive( + cls, + entry_point=None, + partitions={}, + cache=cache, + ancestors=ancestors, + ) + return ModelRef(model=sub_spec) + + def resolve_union( + annotation: object, + members: tuple[type[BaseModel], ...], + _description: str | None, + ) -> UnionRef: + # Late import: extract_union calls back into extract_model for + # member classes. A module-level import would be a cycle. + from .union_extraction import extract_union + + # Recover the union alias name: `analyze_type` reaches the + # union via `members[0].__name__` when the alias name is lost + # (plain `Foo = Annotated[...]` doesn't preserve it pre-PEP-695). + # Convention: members extend `Base`. + placeholder = members[0].__name__ if members else "" + sub_union = extract_union(placeholder, unwrap_list(annotation)) + return UnionRef(union=sub_union) + + return resolve_model, resolve_union diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py index ff11c770a..5e074d259 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py @@ -1,6 +1,7 @@ """NewType extraction.""" from .docstring import clean_docstring, is_custom_docstring +from .field import NewTypeShape from .specs import NewTypeSpec from .type_analyzer import analyze_type @@ -8,19 +9,31 @@ def extract_newtype(newtype_callable: object) -> NewTypeSpec: - """Extract NewType specification from a NewType callable.""" - type_info = analyze_type(newtype_callable) - doc = getattr(newtype_callable, "__doc__", None) - name = type_info.newtype_name or getattr(newtype_callable, "__name__", None) + """Extract a `NewTypeSpec` from a NewType callable. + + `analyze_type(newtype_callable)` returns a shape whose outermost + layer is the NewType's own `NewTypeShape`. We strip that wrapper so + `NewTypeSpec.shape` describes the *underlying* type -- the NewType + isn't a self-reference on its own page. + """ + shape, _, ti_description = analyze_type(newtype_callable) + + name = getattr(newtype_callable, "__name__", None) + if isinstance(shape, NewTypeShape) and shape.name == name: + underlying = shape.inner + else: + underlying = shape + if name is None: msg = f"Cannot determine name for NewType: {newtype_callable!r}" raise ValueError(msg) - description = ( - clean_docstring(doc) if is_custom_docstring(doc) else type_info.description - ) + + doc = getattr(newtype_callable, "__doc__", None) + description = clean_docstring(doc) if is_custom_docstring(doc) else ti_description + return NewTypeSpec( name=name, description=description, - type_info=type_info, + shape=underlying, source_type=newtype_callable, ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/numeric_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/numeric_extraction.py index ae899a4e6..7416d42f8 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/numeric_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/numeric_extraction.py @@ -3,9 +3,10 @@ from annotated_types import Interval from .docstring import first_docstring_line +from .field import FieldShape, Scalar +from .field_walk import terminal_of from .newtype_extraction import extract_newtype from .specs import NumericSpec, TypeIdentity -from .type_analyzer import TypeInfo __all__ = [ "extract_numeric_bounds", @@ -13,24 +14,22 @@ ] -# Bound attribute names on annotated_types constraint objects (Ge, Gt, Le, -# Lt, Interval) used for numeric bound extraction. -# -# field_constraints.py has its own _BOUND_OPS for display formatting. -# The duplication is deliberate: these modules use the same attribute names -# for unrelated purposes (numeric extraction vs. prose rendering), and -# coupling them for four string literals adds a dependency without value. +# Bound attribute names on annotated_types constraints (Ge, Gt, Le, Lt, Interval). _BOUND_ATTRS = ("ge", "gt", "le", "lt") -def extract_numeric_bounds(type_info: TypeInfo) -> Interval: - """Extract numeric bounds from a TypeInfo's constraints. +def extract_numeric_bounds(shape: FieldShape) -> Interval: + """Extract numeric bounds from the constraints on a shape's terminal scalar. - Checks for ge, gt, le, and lt attributes on constraint objects. - Stops at the first constraint defining each bound. + Walks `NewTypeShape` / `ArrayOf` wrappers to find the terminal + `Scalar`, then scans its constraints for `ge`, `gt`, `le`, and `lt` + attributes. Stops at the first constraint defining each bound. """ + terminal = terminal_of(shape) + if not isinstance(terminal, Scalar): + return Interval() found: dict[str, int | float] = {} - for cs in type_info.constraints: + for cs in terminal.constraints: c = cs.constraint for attr in _BOUND_ATTRS: if attr not in found: @@ -47,7 +46,10 @@ def extract_numerics( specs: list[NumericSpec] = [] for tid in numeric_ids: newtype_spec = extract_newtype(tid.obj) - bounds = extract_numeric_bounds(newtype_spec.type_info) + # extract_newtype strips the outer NewTypeShape, so the spec's + # terminal scalar already carries the constraints the NewType + # contributed -- extract_numeric_bounds walks straight to it. + bounds = extract_numeric_bounds(newtype_spec.shape) description = first_docstring_line(getattr(tid.obj, "__doc__", None)) float_bits = _extract_float_bits(tid.name) specs.append( @@ -68,5 +70,5 @@ def extract_numerics( def _extract_float_bits(name: str) -> int | None: - """Extract bit width from a float type name like 'float32'.""" + """Extract bit width from a float type name like `float32`.""" return _FLOAT_BITS.get(name) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py index acba1577d..3aac1e648 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py @@ -3,15 +3,18 @@ from __future__ import annotations import functools +from collections.abc import Mapping from dataclasses import dataclass, field -from typing import Any, Protocol, TypeGuard, runtime_checkable +from typing import Any, TypeAlias, TypeGuard from annotated_types import Interval from pydantic import BaseModel +from overture.schema.system.discovery.tag import get_values_for_key from overture.schema.system.model_constraint import ModelConstraint -from .type_analyzer import TypeInfo, TypeKind, UnsupportedUnionError, analyze_type +from .field import FieldShape +from .type_analyzer import capture_union_members __all__ = [ "AnnotatedField", @@ -19,6 +22,7 @@ "EnumSpec", "FeatureSpec", "FieldSpec", + "MemberSpec", "ModelSpec", "NewTypeSpec", "NumericSpec", @@ -28,11 +32,22 @@ "filter_model_classes", "is_model_class", "is_pydantic_sourced", - "is_pydantic_type", "is_union_alias", + "partitions_from_tags", ] +def partitions_from_tags(tags: frozenset[str]) -> dict[str, str]: + """Map registry tags to Hive partition columns for a feature. + + Today populated only from `overture:theme=`; the value object is + a generic name -> value map so additional partition keys (e.g. release + version) can be added without changing the surrounding pipeline. + """ + theme = next(iter(get_values_for_key(tags, "overture:theme")), None) + return {"theme": theme} if theme is not None else {} + + @dataclass(frozen=True, eq=False) class TypeIdentity: """Unique identity for a type in the codegen system. @@ -106,31 +121,18 @@ class EnumSpec(_SourceTypeIdentityMixin): @dataclass class FieldSpec: - """Specification for a model field.""" - - name: str - type_info: TypeInfo - description: str | None - is_required: bool - model: ModelSpec | None = None - starts_cycle: bool = False + """Specification for a model field: header metadata plus structural shape. - -@runtime_checkable -class FeatureSpec(Protocol): - """Shared interface for feature-level specs (ModelSpec, UnionSpec).""" + `shape` is the full `FieldShape` tree, including any sub-model + (`ModelRef`) and sub-union (`UnionRef`) references already + resolved during extraction. + """ name: str - description: str | None - source_type: type[BaseModel] | None - entry_point: str | None - constraints: tuple[ModelConstraint, ...] - - @property - def fields(self) -> list[FieldSpec]: ... - - @property - def identity(self) -> TypeIdentity: ... + shape: FieldShape + description: str | None = None + is_required: bool = True + is_optional: bool = False @dataclass @@ -142,6 +144,7 @@ class ModelSpec(_SourceTypeIdentityMixin): fields: list[FieldSpec] = field(default_factory=list) source_type: type[BaseModel] | None = None entry_point: str | None = None + partitions: Mapping[str, str] = field(default_factory=dict) constraints: tuple[ModelConstraint, ...] = () @@ -150,12 +153,24 @@ class AnnotatedField: """A FieldSpec paired with union variant provenance.""" field_spec: FieldSpec - variant_sources: tuple[str, ...] | None + variant_sources: tuple[type[BaseModel], ...] | None -# eq=False: contains mutable lists and a cached_property, so -# dataclass-generated __eq__ would be unreliable. -@dataclass(eq=False) +@dataclass +class MemberSpec: + """A union member's class paired with its extracted `ModelSpec`. + + `extract_union` already runs `extract_model` on every member to + build the merged `annotated_fields`; retaining the result here lets + consumers (check builder, base-row generator) reuse it instead of + re-extracting the same subtree. + """ + + member_cls: type[BaseModel] + spec: ModelSpec + + +@dataclass class UnionSpec: """Specification for a discriminated union type alias.""" @@ -167,8 +182,10 @@ class UnionSpec: discriminator_mapping: dict[str, type[BaseModel]] | None source_annotation: object common_base: type[BaseModel] + member_specs: list[MemberSpec] = field(default_factory=list) source_type: type[BaseModel] | None = field(default=None, init=False) entry_point: str | None = None + partitions: Mapping[str, str] = field(default_factory=dict) constraints: tuple[ModelConstraint, ...] = () @functools.cached_property @@ -183,11 +200,16 @@ def identity(self) -> TypeIdentity: @dataclass class NewTypeSpec(_SourceTypeIdentityMixin): - """Specification for a NewType.""" + """Specification for a NewType. + + `shape` is the underlying shape -- i.e. the `inner` of the + NewType's own `NewTypeShape` wrapper, with the wrapper stripped + so the NewType isn't a self-reference on its own page. + """ name: str description: str | None - type_info: TypeInfo + shape: FieldShape source_type: object | None = None @@ -219,6 +241,13 @@ def docs_url(self) -> str: ) +FeatureSpec: TypeAlias = ModelSpec | UnionSpec +"""Top-level feature types passed through the extraction pipeline. + +Consumers narrow with `isinstance` when an arm-specific attribute +is needed (e.g. `UnionSpec.discriminator_field`). +""" + SupplementarySpec = EnumSpec | NewTypeSpec | ModelSpec | PydanticTypeSpec """Non-feature types referenced by feature models. @@ -232,15 +261,6 @@ def is_pydantic_sourced(source_type: type | None) -> bool: return getattr(source_type, "__module__", "").startswith("pydantic") -def is_pydantic_type(ti: TypeInfo) -> bool: - """Check whether a TypeInfo represents a Pydantic built-in type.""" - return ( - ti.kind == TypeKind.PRIMITIVE - and ti.source_type is not None - and is_pydantic_sourced(ti.source_type) - ) - - def is_model_class(obj: object) -> TypeGuard[type[BaseModel]]: """Check whether *obj* is a concrete BaseModel subclass (not a type alias).""" return isinstance(obj, type) and issubclass(obj, BaseModel) @@ -248,11 +268,7 @@ def is_model_class(obj: object) -> TypeGuard[type[BaseModel]]: def is_union_alias(obj: object) -> bool: """Check whether *obj* is a discriminated union type alias of BaseModel subclasses.""" - try: - ti = analyze_type(obj) - except (TypeError, UnsupportedUnionError): - return False - return ti.kind == TypeKind.UNION + return capture_union_members(obj) is not None def filter_model_classes(models: dict[Any, Any]) -> list[type[BaseModel]]: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py index a0cd5314f..349f1a375 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py @@ -1,344 +1,526 @@ -"""Iterative type unwrapping for Pydantic model annotations.""" +"""Annotation-to-`FieldShape` analysis. + +`analyze_type` recurses through a Python type annotation, peeling +`NewType`, `Annotated`, `Optional`, `list`, and `dict` layers one frame +at a time, and produces a `FieldShape` describing the structure with +constraints attached to the layer they target. + +Each `Annotated` frame attaches its metadata to the shape its inner +annotation unwraps to, so that, e.g., the inner and outer `MinLen` in +`Annotated[list[Annotated[str, MinLen(2)]], MinLen(3)]` land on +different layers as different typed variants: `ArrayMinLen(3)` on the +`ArrayOf`, `ScalarMinLen(2)` on the `Primitive`. + +MODEL and UNION terminals are resolved via optional callbacks. When +no resolver is supplied a MODEL terminal falls back to +`Primitive(source_type=cls)`; a multi-arm UNION raises +`UnsupportedUnionError`. Callers that need to recurse into sub-models +pass resolvers that build a `ModelRef`/`UnionRef` with the resolved +spec. +""" from __future__ import annotations import types from collections.abc import Callable -from dataclasses import dataclass, field -from enum import Enum, auto -from typing import Annotated, Any, Literal, Union, get_args, get_origin +from dataclasses import dataclass, replace +from typing import Annotated, Any, Literal, NoReturn, Union, get_args, get_origin +from annotated_types import MaxLen, MinLen from pydantic import BaseModel from pydantic.fields import FieldInfo -from typing_extensions import Sentinel +from typing_extensions import Sentinel, assert_never from .docstring import clean_docstring +from .field import ( + AnyScalar, + ArrayOf, + ConstraintSource, + FieldShape, + LiteralScalar, + MapOf, + NewTypeShape, + Primitive, +) +from .field_walk import terminal_of +from .length_constraints import ArrayMaxLen, ArrayMinLen, ScalarMaxLen, ScalarMinLen + + +@dataclass(frozen=True, slots=True) +class _ContinueWith: + """`_peel_union` result: next annotation to keep peeling.""" + + annotation: object + is_optional: bool + + +@dataclass(frozen=True, slots=True) +class _Resolved: + """`_peel_union` result: finished shape, short-circuit the unwrap.""" + + shape: FieldShape + is_optional: bool + + +@dataclass(frozen=True, slots=True) +class _NewTypeCtx: + """The innermost NewType currently in scope.""" + + name: str + ref: object + __all__ = [ "ConstraintSource", - "TypeKind", - "TypeInfo", + "ModelResolver", + "UnionResolver", "UnsupportedUnionError", "analyze_type", + "attach_constraints", + "capture_union_members", "is_newtype", "single_literal_value", - "walk_type_info", + "unwrap_list", ] class UnsupportedUnionError(TypeError): - """Raised when analyze_type encounters a multi-type union it cannot represent.""" + """Raised when `analyze_type` encounters a multi-type union it cannot represent.""" -class TypeKind(Enum): - """Classification of type kinds.""" +ModelResolver = Callable[[type[BaseModel]], FieldShape] +"""Resolver invoked when `analyze_type` reaches a `BaseModel` terminal.""" - PRIMITIVE = auto() - LITERAL = auto() - ENUM = auto() - MODEL = auto() - UNION = auto() +UnionResolver = Callable[[object, tuple[type[BaseModel], ...], str | None], FieldShape] +"""Resolver invoked at a multi-arm union terminal. +Receives the original union annotation, the tuple of member classes, +and the description accumulated from enclosing `Annotated` layers. +""" -@dataclass(slots=True) -class ConstraintSource: - """A constraint paired with the NewType that contributed it.""" - source_ref: object | None - source_name: str | None - constraint: object +def is_newtype(annotation: object) -> bool: + """Check whether *annotation* is a `typing.NewType`. + NewType creates a callable with a `__supertype__` attribute pointing + to the wrapped type. No public API exists for this check. + """ + return callable(annotation) and hasattr(annotation, "__supertype__") -@dataclass(slots=True) -class TypeInfo: - """Information about a type annotation.""" - base_type: str - kind: TypeKind - is_optional: bool = False - list_depth: int = 0 - newtype_outer_list_depth: int = 0 - is_dict: bool = False - dict_key_type: TypeInfo | None = None - dict_value_type: TypeInfo | None = None - constraints: tuple[ConstraintSource, ...] = () - literal_values: tuple[object, ...] | None = None - source_type: type | None = None - newtype_name: str | None = None - newtype_ref: object | None = None - union_members: tuple[type[BaseModel], ...] | None = None - description: str | None = None +class _UnionCaptured(Exception): # noqa: N818 - control flow, not a true error + """Raised by the capturing union resolver to short-circuit analyze_type.""" - @property - def is_list(self) -> bool: - """Whether this type has any list wrapping.""" - return self.list_depth > 0 + def __init__( + self, members: tuple[type[BaseModel], ...], description: str | None + ) -> None: + self.members = members + self.description = description -def walk_type_info(ti: TypeInfo, visitor: Callable[[TypeInfo], None]) -> None: - """Call *visitor* on *ti*, then recurse into dict key/value types. +def capture_union_members( + annotation: object, +) -> tuple[tuple[type[BaseModel], ...], str | None] | None: + """Peel wrappers from *annotation* and return its union members. - Captures the shared recursive descent pattern used by type collection - and reverse reference computation. Union members are `type` objects - (not `TypeInfo`), so callers handle them directly. + Returns `(members, description)` when *annotation* (possibly wrapped + in `Annotated`) terminates in a multi-arm union of `BaseModel` + subclasses, otherwise `None`. Internally drives `analyze_type` with + a capturing resolver and unwinds via an exception once the union + terminal is reached. The resolver fires only after every enclosing + `Annotated` layer is peeled, so the captured description matches what + `analyze_type` would return. """ - visitor(ti) - if ti.dict_key_type is not None: - walk_type_info(ti.dict_key_type, visitor) - if ti.dict_value_type is not None: - walk_type_info(ti.dict_value_type, visitor) + def _capture( + _ann: object, + members: tuple[type[BaseModel], ...], + description: str | None, + ) -> NoReturn: + raise _UnionCaptured(members, description) -def is_newtype(annotation: object) -> bool: - """Check if annotation is a typing.NewType. - - NewType creates a callable with a __supertype__ attribute pointing - to the wrapped type. No public API exists for this check. - """ - return callable(annotation) and hasattr(annotation, "__supertype__") + try: + analyze_type(annotation, union_resolver=_capture) + except _UnionCaptured as captured: + return captured.members, captured.description + except (TypeError, UnsupportedUnionError): + return None + return None def _is_union(origin: object) -> bool: - """Check if an origin represents a union type (X | Y or Union[X, Y]).""" + """Whether an origin represents a union type (`X | Y` or `Union[X, Y]`).""" return origin in (types.UnionType, Union) -@dataclass(slots=True) -class _UnwrapState: - """Accumulated state from iterative type unwrapping. +def _filter_sentinel_arms(args: tuple[object, ...]) -> list[object]: + """Remove `NoneType` and `Sentinel` arms from union type arguments.""" + return [a for a in args if a is not types.NoneType and not isinstance(a, Sentinel)] + + +def analyze_type( + annotation: object, + *, + model_resolver: ModelResolver | None = None, + union_resolver: UnionResolver | None = None, +) -> tuple[FieldShape, bool, str | None]: + """Analyze an annotation into a `FieldShape` plus field-level metadata. + + Parameters + ---------- + annotation + The annotation to analyze. + model_resolver + Optional callback invoked when the terminal is a `BaseModel` + subclass. Returns the `FieldShape` to use at that position -- + typically a `ModelRef` with a resolved `ModelSpec`. Defaults to + a `Scalar` carrying the class as `source_type` for callers that + cannot resolve sub-models (e.g. dict key/value analysis). + union_resolver + Optional callback invoked when the terminal is a multi-arm + union of `BaseModel` subclasses. Returns the `FieldShape` to + use -- typically a `UnionRef` with a resolved `UnionSpec`. + Required to support unions; raises otherwise. + + Returns + ------- + tuple[FieldShape, bool, str | None] + The structural shape, whether the field accepts `None`, and + the first `FieldInfo.description` encountered during unwrapping. + """ + return _unwrap( + annotation, + newtype_ctx=None, + model_resolver=model_resolver, + union_resolver=union_resolver, + ) + - Tracks NewType names and refs during unwrapping: - - `outermost_newtype_name` / `outermost_newtype_ref`: the first - NewType encountered, exposed as `TypeInfo.newtype_name` / `newtype_ref`. - - `last_newtype_name`: the most recently entered NewType name, used - as the resolved `base_type` for the terminal type. - - `last_newtype_ref`: the most recently entered NewType callable, - used as constraint provenance (which NewType contributed each constraint). - - `newtype_outer_list_depth`: list layers accumulated before entering - the outermost NewType boundary. +def _unwrap( + annotation: object, + *, + newtype_ctx: _NewTypeCtx | None, + model_resolver: ModelResolver | None, + union_resolver: UnionResolver | None, +) -> tuple[FieldShape, bool, str | None]: + """Recurse one annotation layer, returning its `FieldShape` subtree. + + Parameters + ---------- + newtype_ctx + The innermost `NewType` currently in scope, or None. Sets the + terminal `Primitive.base_type` and tags constraints with their + contributing `NewType`. + + Returns + ------- + tuple + The shape subtree, whether this layer or any descendant accepts + `None`, and the first `FieldInfo.description` found. """ - is_optional: bool = False - list_depth: int = 0 - newtype_outer_list_depth: int = 0 - is_dict: bool = False - dict_key_type: TypeInfo | None = None - dict_value_type: TypeInfo | None = None - constraints: list[ConstraintSource] = field(default_factory=list) - outermost_newtype_name: str | None = None - outermost_newtype_ref: object | None = None - last_newtype_name: str | None = None - last_newtype_ref: object | None = None - description: str | None = None - - def add_constraint(self, constraint: object) -> None: - self.constraints.append( - ConstraintSource(self.last_newtype_ref, self.last_newtype_name, constraint) + def _recurse( + annotation: object, newtype_ctx: _NewTypeCtx | None + ) -> tuple[FieldShape, bool, str | None]: + """Recurse into a child annotation, carrying the invariant resolvers.""" + return _unwrap( + annotation, + newtype_ctx=newtype_ctx, + model_resolver=model_resolver, + union_resolver=union_resolver, ) - def build_type_info( - self, - *, - base_type: str, - kind: TypeKind, - literal_values: tuple[object, ...] | None = None, - source_type: type | None = None, - union_members: tuple[type[BaseModel], ...] | None = None, - ) -> TypeInfo: - return TypeInfo( - base_type=base_type, - kind=kind, - is_optional=self.is_optional, - list_depth=self.list_depth, - newtype_outer_list_depth=self.newtype_outer_list_depth, - is_dict=self.is_dict, - dict_key_type=self.dict_key_type, - dict_value_type=self.dict_value_type, - constraints=tuple(self.constraints), - literal_values=literal_values, - source_type=source_type, - newtype_name=self.outermost_newtype_name, - newtype_ref=self.outermost_newtype_ref, - union_members=union_members, - description=self.description, + origin = get_origin(annotation) + + if is_newtype(annotation): + ctx = _NewTypeCtx(annotation.__name__, annotation) # type: ignore[attr-defined] + inner, opt, desc = _recurse(annotation.__supertype__, ctx) # type: ignore[attr-defined] + inner = _erase_inner_newtypes(inner) + return NewTypeShape(name=ctx.name, ref=ctx.ref, inner=inner), opt, desc + + if origin is Annotated: + args = get_args(annotation) + inner_annotation = args[0] + own_desc: str | None = None + collected: list[ConstraintSource] = [] + for c in args[1:]: + if isinstance(c, FieldInfo): + if c.description is not None and own_desc is None: + own_desc = clean_docstring(c.description) + for m in c.metadata: + collected.append(_constraint_source(m, newtype_ctx)) + else: + collected.append(_constraint_source(c, newtype_ctx)) + + # Pick the annotation to recurse into and the optionality this + # Annotated layer contributes. A directly-wrapped union is peeled + # here so the resolver still sees the Annotated form; a `_Resolved` + # union short-circuits with the constraints attached. + next_annotation = inner_annotation + layer_optional = False + if _is_union(get_origin(inner_annotation)): + result = _peel_union( + inner_annotation, + union_resolver, + resolver_annotation=annotation, + description=own_desc, + ) + match result: + case _Resolved(shape): + return ( + attach_constraints(shape, tuple(collected)), + result.is_optional, + own_desc, + ) + case _ContinueWith(next_annotation, layer_optional): + pass + case _: + assert_never(result) + + inner, opt, desc = _recurse(next_annotation, newtype_ctx) + inner = attach_constraints(inner, tuple(collected)) + return ( + inner, + opt or layer_optional, + own_desc if own_desc is not None else desc, ) + if _is_union(origin): + result = _peel_union(annotation, union_resolver) + match result: + case _Resolved(shape): + return shape, result.is_optional, None + case _ContinueWith(next_annotation, is_optional): + inner, opt, desc = _recurse(next_annotation, newtype_ctx) + return inner, opt or is_optional, desc + case _: + assert_never(result) + + if origin is list: + args = get_args(annotation) + if not args: + raise TypeError("Bare list without type argument is not supported") + element, opt, desc = _recurse(args[0], newtype_ctx) + return ArrayOf(element=element, constraints=()), opt, desc + + if origin is dict: + args = get_args(annotation) + if not args: + raise TypeError("Bare dict without type arguments is not supported") + key_shape, _, _ = _recurse(args[0], None) + value_shape, _, _ = _recurse(args[1], None) + return MapOf(key=key_shape, value=value_shape, constraints=()), False, None + + return _terminal(annotation, newtype_ctx, model_resolver), False, None + + +def _constraint_source( + constraint: object, newtype_ctx: _NewTypeCtx | None +) -> ConstraintSource: + return ConstraintSource( + source_ref=newtype_ctx.ref if newtype_ctx else None, + source_name=newtype_ctx.name if newtype_ctx else None, + constraint=constraint, + ) -def analyze_type(annotation: object) -> TypeInfo: - """Analyze a type annotation and return TypeInfo. - Iteratively unwraps type wrappers (Annotated, Optional, list, NewType) until - reaching a terminal type. - """ - state = _UnwrapState() - - while True: - origin = get_origin(annotation) - - # Handle NewType (e.g., int32 = NewType("int32", Annotated[int, ...])) - if is_newtype(annotation): - name = annotation.__name__ # type: ignore[attr-defined] - state.last_newtype_name = name - state.last_newtype_ref = annotation - if state.outermost_newtype_name is None: - state.newtype_outer_list_depth = state.list_depth - state.outermost_newtype_name = name - state.outermost_newtype_ref = annotation - annotation = annotation.__supertype__ # type: ignore[attr-defined] - continue - - # Handle Annotated types (Annotated[X, metadata...]) - if origin is Annotated: - args = get_args(annotation) - annotation = args[0] - for c in args[1:]: - if isinstance(c, FieldInfo): - if c.description is not None and state.description is None: - state.description = clean_docstring(c.description) - for m in c.metadata: - state.add_constraint(m) - else: - state.add_constraint(c) - continue - - # Handle union types (X | None or Optional[X]) - if _is_union(origin): - args = get_args(annotation) - # Filter out None, Sentinel types (Pydantic's ), and - # Literal alternatives (e.g., HttpUrl | Literal[""] where the - # Literal is a special-value sentinel, not the primary type). - if any(a is types.NoneType for a in args): - state.is_optional = True - - non_none_args = [ - a - for a in args - if a is not types.NoneType and not isinstance(a, Sentinel) - ] - - # Only filter out Literal arms when a concrete (non-Literal) type - # exists. Without this guard, Optional[Literal["x"]] would lose - # all args because the Literal *is* the primary type. - concrete_args = [a for a in non_none_args if get_origin(a) is not Literal] - real_args = concrete_args if concrete_args else non_none_args - - if len(real_args) > 1: - # Check if all real args are BaseModel subclasses - # (unwrap Annotated wrappers to get the actual class) - members: list[type[BaseModel]] = [] - for arg in real_args: - inner = arg - if get_origin(inner) is Annotated: - inner = get_args(inner)[0] - if isinstance(inner, type) and issubclass(inner, BaseModel): - members.append(inner) - else: - raise UnsupportedUnionError( - f"Multi-type unions not supported: {annotation}" - ) - return state.build_type_info( - base_type=members[0].__name__, - kind=TypeKind.UNION, - union_members=tuple(members), - ) +def _erase_inner_newtypes(shape: FieldShape) -> FieldShape: + """Drop every `NewTypeShape` reachable through `ArrayOf` layers. - if not real_args: - raise UnsupportedUnionError( - f"Union with no concrete types: {annotation}" - ) + A `NewType` chain — including NewTypes nested as list elements — + collapses to a single `NewTypeShape` (the outermost), with inner + NewType names surviving only as the terminal `Primitive.base_type`. + Each `NewType` frame calls this on its recursion result so that by + the time the outermost frame returns, exactly one `NewTypeShape` + remains per spine. - annotation = real_args[0] - continue + Recurses through `ArrayOf.element` but stops at `MapOf` — `dict` + key/value are independent spines, each keeping its own outermost + `NewTypeShape` — and at scalar / `ModelRef` / `UnionRef` terminals. + """ + match shape: + case NewTypeShape(inner=inner): + return _erase_inner_newtypes(inner) + case ArrayOf(element=element): + return replace(shape, element=_erase_inner_newtypes(element)) + case _: + return shape + + +def attach_constraints( + shape: FieldShape, constraints: tuple[ConstraintSource, ...] +) -> FieldShape: + """Prepend `constraints` to the outermost non-`NewTypeShape` layer. + + Skips any number of leading `NewTypeShape` wrappers, then prepends + to the `.constraints` of the first `ArrayOf`, `MapOf`, `Primitive`, + `LiteralScalar`, or `AnyScalar` reached. Does not descend into + `ArrayOf.element` or `MapOf.key` / `.value`. `ModelRef` / `UnionRef` + carry no constraints -- constraints destined for a model terminal + are dropped (preserved verbatim from current behavior). + + Length constraints (`annotated_types.MinLen` / `MaxLen`) are wrapped + into the typed `length_constraints` variants matching the + attachment layer: `ArrayMinLen` / `ArrayMaxLen` on `ArrayOf`, + `ScalarMinLen` / `ScalarMaxLen` on scalar layers. `MapOf` raises: + map-length constraints have no current schema use and would + otherwise silently take the scalar path. + """ + if not constraints: + return shape + match shape: + case NewTypeShape(inner=inner): + return replace(shape, inner=attach_constraints(inner, constraints)) + case ArrayOf(): + wrapped = tuple(_wrap_length_for_array(cs) for cs in constraints) + return replace(shape, constraints=wrapped + shape.constraints) + case MapOf(): + _reject_length_on_map(constraints) + return replace(shape, constraints=constraints + shape.constraints) + case Primitive() | LiteralScalar() | AnyScalar(): + wrapped = tuple(_wrap_length_for_scalar(cs) for cs in constraints) + return replace(shape, constraints=wrapped + shape.constraints) + case _: + return shape + + +def _wrap_length_for_array(cs: ConstraintSource) -> ConstraintSource: + """Replace a raw `MinLen`/`MaxLen` with its `ArrayOf`-layer variant. + + Uses exact-type checks so already-wrapped variants (`ArrayMinLen`, + `ScalarMinLen`, etc.) are returned unchanged. + """ + if type(cs.constraint) is MinLen: + return replace(cs, constraint=ArrayMinLen(min_length=cs.constraint.min_length)) + if type(cs.constraint) is MaxLen: + return replace(cs, constraint=ArrayMaxLen(max_length=cs.constraint.max_length)) + return cs - # Handle list types (list[X]) - if origin is list: - args = get_args(annotation) - if not args: - raise TypeError("Bare list without type argument is not supported") - state.list_depth += 1 - annotation = args[0] - continue - - # Handle dict types (dict[K, V]) - if origin is dict: - args = get_args(annotation) - if not args: - raise TypeError("Bare dict without type arguments is not supported") - state.is_dict = True - state.dict_key_type = analyze_type(args[0]) - state.dict_value_type = analyze_type(args[1]) - base_type = state.last_newtype_name or "dict" - return state.build_type_info( - base_type=base_type, - kind=TypeKind.PRIMITIVE, - source_type=dict, - ) - break +def _wrap_length_for_scalar(cs: ConstraintSource) -> ConstraintSource: + """Replace a raw `MinLen`/`MaxLen` with its scalar-layer variant. - return _classify_terminal(annotation, state) + Uses exact-type checks so already-wrapped variants (`ArrayMinLen`, + `ScalarMinLen`, etc.) are returned unchanged. + """ + if type(cs.constraint) is MinLen: + return replace(cs, constraint=ScalarMinLen(min_length=cs.constraint.min_length)) + if type(cs.constraint) is MaxLen: + return replace(cs, constraint=ScalarMaxLen(max_length=cs.constraint.max_length)) + return cs + + +def _reject_length_on_map(constraints: tuple[ConstraintSource, ...]) -> None: + """Raise on `MinLen`/`MaxLen` attached to a `MapOf` layer.""" + for cs in constraints: + if isinstance(cs.constraint, (MinLen, MaxLen)): + raise NotImplementedError( + f"{type(cs.constraint).__name__} on a Map type is not supported" + ) -def _classify_terminal(annotation: object, state: _UnwrapState) -> TypeInfo: - """Classify a fully-unwrapped terminal type into a TypeInfo.""" - # typing.Any -- treat as an opaque primitive +def _terminal( + annotation: object, + newtype_ctx: _NewTypeCtx | None, + model_resolver: ModelResolver | None, +) -> FieldShape: + """Classify a fully-unwrapped terminal annotation into a shape.""" if annotation is Any: - return state.build_type_info( - base_type="Any", - kind=TypeKind.PRIMITIVE, - ) - - # Literal types (e.g., Literal["value"] or Literal["a", "b"]) + return AnyScalar(constraints=()) if get_origin(annotation) is Literal: - args = get_args(annotation) - return state.build_type_info( - base_type="Literal", - kind=TypeKind.LITERAL, - literal_values=tuple(args), - ) - + return LiteralScalar(values=tuple(get_args(annotation)), constraints=()) if not isinstance(annotation, type): raise TypeError(f"Unsupported annotation type: {type(annotation)}") - if issubclass(annotation, list): raise TypeError("Bare list without type argument is not supported") - if issubclass(annotation, dict): raise TypeError("Bare dict without type arguments is not supported") + if issubclass(annotation, BaseModel) and model_resolver is not None: + return model_resolver(annotation) + base_type = newtype_ctx.name if newtype_ctx else annotation.__name__ + return Primitive(base_type=base_type, source_type=annotation, constraints=()) + + +def _peel_union( + annotation: object, + union_resolver: UnionResolver | None, + *, + resolver_annotation: object | None = None, + description: str | None = None, +) -> _ContinueWith | _Resolved: + """Process one union layer. + + Filters out `None` / `Sentinel` arms (recording `is_optional`), then + drops `Literal[...]` arms when a concrete (non-Literal) arm exists. + A single remaining arm is returned as `_ContinueWith`; multiple arms + invoke `union_resolver` and the result is returned as `_Resolved` + (raising `UnsupportedUnionError` when no resolver is supplied). + + `resolver_annotation` is passed to `union_resolver` instead of + `annotation` when set. This lets the `Annotated` branch forward the + full `Annotated[X | Y, ...]` form so resolvers can recover + discriminator metadata that the `Annotated` peeling step consumed. + """ + args = get_args(annotation) + is_optional = any(a is types.NoneType for a in args) + + non_none_args = _filter_sentinel_arms(args) + concrete_args = [a for a in non_none_args if get_origin(a) is not Literal] + real_args = concrete_args if concrete_args else non_none_args + + if len(real_args) > 1: + members: list[type[BaseModel]] = [] + for arg in real_args: + inner = arg + if get_origin(inner) is Annotated: + inner = get_args(inner)[0] + if isinstance(inner, type) and issubclass(inner, BaseModel): + members.append(inner) + else: + raise UnsupportedUnionError( + f"Multi-type unions not supported: {annotation}" + ) + if union_resolver is None: + raise UnsupportedUnionError( + f"No union_resolver supplied for multi-arm union: {annotation}" + ) + return _Resolved( + union_resolver( + resolver_annotation or annotation, tuple(members), description + ), + is_optional, + ) - # Determine kind from type hierarchy - if issubclass(annotation, Enum): - kind = TypeKind.ENUM - elif issubclass(annotation, BaseModel): - kind = TypeKind.MODEL - else: - kind = TypeKind.PRIMITIVE + if not real_args: + raise UnsupportedUnionError(f"Union with no concrete types: {annotation}") - base_type = state.last_newtype_name or annotation.__name__ + return _ContinueWith(real_args[0], is_optional) - return state.build_type_info( - base_type=base_type, - kind=kind, - source_type=annotation, - ) + +def unwrap_list(annotation: object) -> object: + """Strip `| None`, `Sentinel`, and outermost `list[]` wrappers.""" + if _is_union(get_origin(annotation)): + args = _filter_sentinel_arms(get_args(annotation)) + if len(args) == 1: + annotation = args[0] + + while get_origin(annotation) is list: + annotation = get_args(annotation)[0] + return annotation def single_literal_value(annotation: object) -> object | None: - """Extract a single literal value from a type annotation, or None. + """Extract a single literal value from a type annotation, or `None`. - Delegates to analyze_type for all unwrapping, then checks - whether the result is a single-value Literal. Multi-value - Literals return None — callers needing all values should use - `analyze_type` and read `literal_values` directly. + Returns `None` for multi-value Literals -- callers needing all + values should use `analyze_type` and inspect the terminal + `LiteralScalar`'s `values`. """ try: - ti = analyze_type(annotation) + shape, _, _ = analyze_type(annotation) except (TypeError, UnsupportedUnionError): return None - if ( - ti.kind == TypeKind.LITERAL - and ti.literal_values - and len(ti.literal_values) == 1 - ): - return ti.literal_values[0] + terminal = terminal_of(shape) + if isinstance(terminal, LiteralScalar) and len(terminal.values) == 1: + return terminal.values[0] return None diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py index 505657866..19a3007e0 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py @@ -2,7 +2,8 @@ from dataclasses import dataclass -from .type_analyzer import TypeInfo +from .field import FieldShape +from .field_walk import newtype_name, terminal_primitive __all__ = [ "TypeMapping", @@ -18,96 +19,79 @@ class TypeMapping: """Maps a type to its representation in different targets.""" markdown: str - - def for_target(self, target: str) -> str: - """Get the type representation for a named target.""" - if target != "markdown": - raise ValueError(f"Unknown target {target!r}, expected 'markdown'") - return self.markdown + spark: str | None = None PRIMITIVE_TYPES: dict[str, TypeMapping] = { # Signed integers - "int8": TypeMapping(markdown="int8"), - "int16": TypeMapping(markdown="int16"), - "int32": TypeMapping(markdown="int32"), - "int64": TypeMapping(markdown="int64"), + "int8": TypeMapping(markdown="int8", spark="IntegerType()"), + "int16": TypeMapping(markdown="int16", spark="IntegerType()"), + "int32": TypeMapping(markdown="int32", spark="IntegerType()"), + "int64": TypeMapping(markdown="int64", spark="LongType()"), # Unsigned integers - "uint8": TypeMapping(markdown="uint8"), - "uint16": TypeMapping(markdown="uint16"), - "uint32": TypeMapping(markdown="uint32"), + "uint8": TypeMapping(markdown="uint8", spark="IntegerType()"), + "uint16": TypeMapping(markdown="uint16", spark="IntegerType()"), + "uint32": TypeMapping(markdown="uint32", spark="IntegerType()"), # Floating point - "float32": TypeMapping(markdown="float32"), - "float64": TypeMapping(markdown="float64"), + "float32": TypeMapping(markdown="float32", spark="FloatType()"), + "float64": TypeMapping(markdown="float64", spark="DoubleType()"), # Basic types - "str": TypeMapping(markdown="string"), - "bool": TypeMapping(markdown="boolean"), + "str": TypeMapping(markdown="string", spark="StringType()"), + "bool": TypeMapping(markdown="boolean", spark="BooleanType()"), # Python builtins (aliases to their portable equivalents) - "int": TypeMapping(markdown="int64"), - "float": TypeMapping(markdown="float64"), + "int": TypeMapping(markdown="int64", spark="LongType()"), + "float": TypeMapping(markdown="float64", spark="DoubleType()"), # Geometry types - "Geometry": TypeMapping(markdown="geometry"), + "Geometry": TypeMapping(markdown="geometry", spark="BinaryType()"), "BBox": TypeMapping(markdown="bbox"), } -def is_semantic_newtype(type_info: TypeInfo) -> bool: - """Whether a type represents a semantic NewType that should be displayed by name. +def is_semantic_newtype(shape: FieldShape) -> bool: + """Whether a shape's outermost NewType should be displayed by name. - Returns True for unregistered NewTypes (HexColor, Sources) and NewTypes - that wrap a different base type (FeatureVersion wrapping int32, Id wrapping - NoWhitespaceString). Returns False for registered primitives (int32, Geometry). + Returns True for unregistered NewTypes (HexColor, Sources) and + NewTypes that wrap a different base type (FeatureVersion wrapping + int32, Id wrapping NoWhitespaceString). Returns False for + registered primitives (int32, Geometry). """ - if type_info.newtype_name is None: + nt_name = newtype_name(shape) + if nt_name is None: return False - if type_info.newtype_name != type_info.base_type: + terminal = terminal_primitive(shape) + if terminal is None: + return True + if nt_name != terminal.base_type: return True - return get_type_mapping(type_info.base_type) is None + return get_type_mapping(terminal.base_type) is None def get_type_mapping(type_name: str) -> TypeMapping | None: """Look up a type mapping by name. - Parameters - ---------- - type_name : str - The type name to look up (e.g., "int32", "str", "Geometry"). - Also accepts Python builtin names ("int" -> int64, "float" -> float64). - - Returns - ------- - TypeMapping or None - The TypeMapping for the type, or None if not found. + Accepts portable type names (`int32`, `str`, `Geometry`) and Python + builtin names (`int` -> int64, `float` -> float64). """ return PRIMITIVE_TYPES.get(type_name) -def resolve_type_name(type_info: TypeInfo, target: str) -> str: - """Resolve a TypeInfo to the base type string for a given target. - - Looks up the type in the registry first (trying source_type if base_type - has no mapping). Falls back to the base_type name as-is. - - Parameters - ---------- - type_info : TypeInfo - The analyzed type information. - target : str - The output target ("markdown"). +def resolve_type_name(shape: FieldShape) -> str: + """Resolve a shape to its markdown base type name string. - Returns - ------- - str - The resolved base type name string for the target. + Looks up the terminal scalar's `base_type` in the registry first, + falling back to `source_type.__name__`. Semantic NewTypes wrapping + unregistered types resolve to the underlying class name (e.g. + `Sources` wrapping `SourceItem` -> `SourceItem`). """ - mapping = get_type_mapping(type_info.base_type) - if mapping is None and type_info.source_type is not None: - mapping = get_type_mapping(type_info.source_type.__name__) + terminal = terminal_primitive(shape) + if terminal is None: + return "?" + mapping = get_type_mapping(terminal.base_type) + if mapping is None and terminal.source_type is not None: + mapping = get_type_mapping(terminal.source_type.__name__) if mapping is not None: - return mapping.for_target(target) + return mapping.markdown - # Semantic NewType wrapping an unregistered type (e.g., Sources wrapping - # SourceItem): use the underlying class name rather than the NewType alias. - if type_info.newtype_name and type_info.source_type is not None: - return type_info.source_type.__name__ - return type_info.base_type + if newtype_name(shape) and terminal.source_type is not None: + return terminal.source_type.__name__ + return terminal.base_type diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py index c555fdba0..cd3870a5e 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py @@ -2,6 +2,8 @@ from __future__ import annotations +from collections.abc import Mapping +from enum import Enum from typing import Annotated, get_args, get_origin from pydantic import BaseModel @@ -9,9 +11,24 @@ from overture.schema.system.feature import resolve_discriminator_field_name +from .field import ( + AnyScalar, + ArrayOf, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + UnionRef, +) +from .field_walk import list_depth, terminal_of, walk_shape from .model_extraction import extract_model, resolve_field_alias -from .specs import AnnotatedField, UnionSpec, is_model_class -from .type_analyzer import TypeInfo, TypeKind, analyze_type, single_literal_value +from .specs import AnnotatedField, FieldSpec, MemberSpec, UnionSpec, is_model_class +from .type_analyzer import ( + capture_union_members, + single_literal_value, +) __all__ = ["extract_discriminator", "extract_union"] @@ -39,7 +56,7 @@ def max_mro_index(cls: type) -> int: def _find_field_by_alias(model: type[BaseModel], alias: str) -> FieldInfo | None: - """Find a field in model_fields by alias-resolved name.""" + """Find a field in `model_fields` by alias-resolved name.""" direct = model.model_fields.get(alias) if direct is not None: return direct @@ -73,18 +90,76 @@ def extract_discriminator( if field_info and field_info.annotation is not None: lit_val = single_literal_value(field_info.annotation) if lit_val is not None: - mapping[str(lit_val)] = member + key = lit_val.value if isinstance(lit_val, Enum) else str(lit_val) + mapping[key] = member return disc_field_name, mapping or None -_TypeShape = tuple[str, TypeKind, bool, int] +_TypeShape = tuple[object, ...] _FieldKey = tuple[str, _TypeShape] -def _type_shape(ti: TypeInfo) -> _TypeShape: - """Structural shape for dedup -- excludes source_type which varies across members.""" - return (ti.base_type, ti.kind, ti.is_optional, ti.list_depth) +def _structural_fingerprint(spec: FieldSpec) -> _TypeShape: + """Structural shape for dedup: ignores per-variant source_type variation. + + Two fields with the same name and same `(terminal_base_type, + terminal_kind, is_optional, list_depth)` collapse to a single + `AnnotatedField` whose `variant_sources` lists the contributing + members. + + `terminal_of` unwraps `ArrayOf` / `NewTypeShape`, so the terminal is + always one of the six leaf variants below; an unrecognized one + raises instead of silently collapsing into a shared fingerprint. + """ + depth = list_depth(spec.shape) + base_type: object + terminal = terminal_of(spec.shape) + match terminal: + case Primitive(base_type=bt): + base_type, kind = bt, "scalar" + case LiteralScalar(values=values): + base_type, kind = ("Literal", values), "scalar" + case AnyScalar(): + base_type, kind = "Any", "scalar" + case ModelRef(model=model): + base_type, kind = model.name, "model" + case UnionRef(union=union): + base_type, kind = union.name, "union" + case MapOf(): + base_type, kind = "dict", "map" + case _: + raise TypeError(f"Unexpected terminal shape: {terminal!r}") + return (base_type, kind, spec.is_optional, depth) + + +def _constraints_fingerprint(spec: FieldSpec) -> frozenset[str]: + """Constraints declared anywhere in *spec*'s shape tree, as a comparable set. + + `_structural_fingerprint` deliberately ignores constraints so that + members declaring the same field with per-variant `Annotated` + metadata still collapse to one `AnnotatedField`. This captures what + that ignores, so collisions with diverging constraints fail loudly + instead of silently keeping the last member's `FieldSpec`. + """ + constraints: list[str] = [] + + def collect(shape: FieldShape) -> None: + match shape: + case ( + Primitive(constraints=cs) + | LiteralScalar(constraints=cs) + | AnyScalar(constraints=cs) + | ArrayOf(constraints=cs) + | MapOf(constraints=cs) + ): + for source in cs: + constraints.append(repr(source.constraint)) + case ModelRef() | UnionRef() | NewTypeShape(): + pass + + walk_shape(spec.shape, collect) + return frozenset(constraints) def extract_union( @@ -92,39 +167,65 @@ def extract_union( annotation: object, *, entry_point: str | None = None, + partitions: Mapping[str, str] | None = None, ) -> UnionSpec: - """Extract a UnionSpec from a discriminated union type alias.""" - ti = analyze_type(annotation) - if ti.kind != TypeKind.UNION or ti.union_members is None: + """Extract a `UnionSpec` from a discriminated union type alias.""" + extracted = capture_union_members(annotation) + if extracted is None: raise TypeError(f"{name} is not a union type alias") + member_tuple, description = extracted + members = list(member_tuple) - members = list(ti.union_members) common_base = _find_common_base(members) + # Plain Python type aliases (`Foo = Annotated[...]`) don't preserve + # the alias name in the annotation. The nested-union path (called + # from extract_model for UNION-kind fields) passes `members[0].__name__` + # as the placeholder name. Recover the alias by convention: members + # extend `Base`, so stripping that suffix yields the alias. + # Top-level unions go through the CLI, which supplies the real name + # and skips this fallback. + # + # PEP 695 (`type Foo = Annotated[...]`) preserves `__name__` as + # `"Foo"` on 3.12+; after migrating, the placeholder hack can go. + member_names = {m.__name__ for m in members} + if name in member_names: + base_name = common_base.__name__ + name = ( + base_name.removesuffix("Base") if base_name.endswith("Base") else base_name + ) + base_spec = extract_model(common_base) shared_field_names = {f.name for f in base_spec.fields} - member_specs = [(m, extract_model(m)) for m in members] + member_specs = [MemberSpec(m, extract_model(m)) for m in members] annotated_fields: list[AnnotatedField] = [] - # Shared fields first (from common base) for fs in base_spec.fields: annotated_fields.append(AnnotatedField(field_spec=fs, variant_sources=None)) - # Variant-specific fields: collect by (name, type identity) for dedup seen: dict[_FieldKey, AnnotatedField] = {} - for member_cls, member_spec in member_specs: - for fs in member_spec.fields: + for member in member_specs: + member_cls = member.member_cls + for fs in member.spec.fields: if fs.name in shared_field_names: continue - key = (fs.name, _type_shape(fs.type_info)) + key = (fs.name, _structural_fingerprint(fs)) existing = seen.get(key) + if existing is not None: + existing_constraints = _constraints_fingerprint(existing.field_spec) + if _constraints_fingerprint(fs) != existing_constraints: + raise ValueError( + f"Union {name!r} field {fs.name!r} has the same structural " + f"shape across members but diverging constraints; dedup " + f"would silently drop one member's constraints" + ) prior_sources = existing.variant_sources or () if existing else () seen[key] = AnnotatedField( field_spec=fs, - variant_sources=(*prior_sources, member_cls.__name__), + variant_sources=(*prior_sources, member_cls), ) annotated_fields.extend(seen.values()) @@ -133,12 +234,14 @@ def extract_union( return UnionSpec( name=name, - description=ti.description, + description=description, annotated_fields=annotated_fields, members=members, + member_specs=member_specs, discriminator_field=disc_field, discriminator_mapping=disc_mapping, source_annotation=annotation, common_base=common_base, entry_point=entry_point, + partitions=partitions or {}, ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py index bb6b92379..f15bb0120 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py @@ -10,6 +10,8 @@ from collections.abc import Iterable, Mapping from pathlib import PurePosixPath +from overture.schema.system.discovery import split_entry_point + __all__ = [ "OUTPUT_ROOT", "compute_output_dir", @@ -24,26 +26,13 @@ OUTPUT_ROOT = PurePosixPath(".") -def _split_entry_point(entry_point_path: str) -> tuple[str, str]: - """Split `"module.path:ClassName"` into its two parts. - - >>> _split_entry_point("overture.schema.buildings:Building") - ('overture.schema.buildings', 'Building') - """ - if ":" not in entry_point_path: - msg = f"Expected 'module:Class' format, got {entry_point_path!r}" - raise ValueError(msg) - module, cls = entry_point_path.split(":", 1) - return module, cls - - def entry_point_module(entry_point_path: str) -> str: """Extract module path from entry-point-style path. >>> entry_point_module("overture.schema.buildings:Building") 'overture.schema.buildings' """ - return _split_entry_point(entry_point_path)[0] + return split_entry_point(entry_point_path)[0] def entry_point_class(entry_point_path: str) -> str: @@ -52,7 +41,7 @@ def entry_point_class(entry_point_path: str) -> str: >>> entry_point_class("overture.schema.buildings:Building") 'Building' """ - return _split_entry_point(entry_point_path)[1] + return split_entry_point(entry_point_path)[1] def compute_schema_root(module_paths: Iterable[str]) -> str: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py index b9072da64..621249ec1 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py @@ -1,14 +1,26 @@ -"""Supplementary type discovery by walking expanded feature trees. +"""Supplementary type discovery by walking feature trees. -Walks FieldSpec.model references for sub-models (already extracted), -and extracts enums and NewTypes on first encounter. +Walks `FieldShape` trees to extract referenced enums, NewTypes, +Pydantic built-ins, and union member sub-models. `ModelRef` and +`UnionRef` carry their resolved specs structurally, so recursion +follows the shape directly. """ from collections.abc import Sequence +from enum import Enum from typing import Annotated, get_args, get_origin +from pydantic import BaseModel + from ..extraction.enum_extraction import extract_enum -from ..extraction.model_extraction import expand_model_tree, extract_model +from ..extraction.field import ( + FieldShape, + ModelRef, + NewTypeShape, + Primitive, + UnionRef, +) +from ..extraction.field_walk import walk_shape from ..extraction.newtype_extraction import extract_newtype from ..extraction.pydantic_extraction import extract_pydantic_type from ..extraction.specs import ( @@ -17,15 +29,9 @@ ModelSpec, SupplementarySpec, TypeIdentity, - is_pydantic_type, -) -from ..extraction.type_analyzer import ( - TypeInfo, - TypeKind, - analyze_type, - is_newtype, - walk_type_info, + is_pydantic_sourced, ) +from ..extraction.type_analyzer import analyze_type, is_newtype from ..extraction.type_registry import is_semantic_newtype __all__ = ["collect_all_supplementary_types"] @@ -36,19 +42,16 @@ def collect_all_supplementary_types( ) -> dict[TypeIdentity, SupplementarySpec]: """Collect supplementary types by walking expanded feature trees. - Requires that expand_model_tree has been called on all feature specs - first. Walks FieldSpec.model references for sub-models (already - extracted), and extracts enums and NewTypes on first encounter. - - Returns a dict mapping TypeIdentity to extracted specs. Two types - with the same class name from different modules are keyed separately. + Walks `ModelRef` references for sub-models (already extracted), + and extracts enums and NewTypes on first encounter. Two types + with the same class name from different modules are keyed + separately. """ feature_objs: set[object] = {spec.identity.obj for spec in feature_specs} all_specs: dict[TypeIdentity, SupplementarySpec] = {} visited_models: set[object] = set() def _register_newtype(newtype_ref: object, name: str) -> bool: - """Register a NewType if not already present. Returns True if registered.""" nt_id = TypeIdentity(newtype_ref, name) if nt_id in all_specs: return False @@ -66,91 +69,52 @@ def _collect_from_model(model_spec: ModelSpec) -> None: _collect_from_fields(model_spec.fields) def _collect_inner_newtypes(newtype_ref: object) -> None: - """Walk a NewType's __supertype__ chain for intermediate semantic NewTypes.""" + """Walk a NewType's `__supertype__` chain for nested semantic NewTypes.""" annotation = getattr(newtype_ref, "__supertype__", None) while annotation is not None: if get_origin(annotation) is Annotated: annotation = get_args(annotation)[0] continue if is_newtype(annotation): - inner_ti = analyze_type(annotation) - if ( - inner_ti.newtype_ref is not None - and inner_ti.newtype_name is not None - and is_semantic_newtype(inner_ti) + inner_shape, _, _ = analyze_type(annotation) + if isinstance(inner_shape, NewTypeShape) and is_semantic_newtype( + inner_shape ): - _register_newtype(inner_ti.newtype_ref, inner_ti.newtype_name) + _register_newtype(inner_shape.ref, inner_shape.name) annotation = getattr(annotation, "__supertype__", None) continue break - def _collect_from_type_info(ti: TypeInfo) -> None: - """Collect supplementary types from a single TypeInfo. - - Uses walk_type_info for dict key/value recursion. Handles all - TypeKind variants without early returns so newtype extraction - and dict recursion apply regardless of kind. - """ - - def _visit(node: TypeInfo) -> None: - # UNION, ENUM, and pydantic (PRIMITIVE) are mutually exclusive - # by TypeKind. NewType extraction is orthogonal -- a node can be - # a NewType-wrapped ENUM, for instance. - if node.kind == TypeKind.UNION and node.union_members: - # Walk each member's fields for supplementary types. - # Members that are also top-level feature specs are skipped - # by the feature_objs guard in _collect_from_model. - for member_cls in node.union_members: - member_spec = extract_model(member_cls) - expand_model_tree(member_spec) - _collect_from_model(member_spec) - elif node.kind == TypeKind.ENUM and node.source_type is not None: - enum_id = TypeIdentity.of(node.source_type) - if enum_id not in all_specs: - all_specs[enum_id] = extract_enum(node.source_type) - elif is_pydantic_type(node): - if node.source_type is None: - raise TypeError( - "is_pydantic_type returned True but source_type is None" - ) - pid = TypeIdentity.of(node.source_type) - if pid not in all_specs: - all_specs[pid] = extract_pydantic_type(node.source_type) - - # Semantic NewTypes always get extracted, including intermediate - # NewTypes in the wrapping chain (e.g., Id wraps NoWhitespaceString - # wraps str -- both Id and NoWhitespaceString get pages). - if ( - node.newtype_ref is not None - and node.newtype_name is not None - and is_semantic_newtype(node) - ): - newly_registered = _register_newtype( - node.newtype_ref, node.newtype_name - ) - if newly_registered: - _collect_inner_newtypes(node.newtype_ref) - - walk_type_info(ti, _visit) + def _collect_from_shape(shape: FieldShape) -> None: + """Walk *shape* and register every supplementary type it touches.""" + + def _visit(node: FieldShape) -> None: + match node: + case NewTypeShape(name=name, ref=ref): + if _register_newtype(ref, name): + _collect_inner_newtypes(ref) + case UnionRef(union=u): + for member in u.member_specs: + _collect_from_model(member.spec) + case ModelRef(model=m, starts_cycle=False): + _collect_from_model(m) + case Primitive(source_type=cls) if cls is not None and isinstance( + cls, type + ): + if issubclass(cls, Enum): + eid = TypeIdentity.of(cls) + if eid not in all_specs: + all_specs[eid] = extract_enum(cls) + elif is_pydantic_sourced(cls) and not issubclass(cls, BaseModel): + pid = TypeIdentity.of(cls) + if pid not in all_specs: + all_specs[pid] = extract_pydantic_type(cls) + + walk_shape(shape, _visit) def _collect_from_fields(fields: list[FieldSpec]) -> None: - # A single field can match multiple conditions (e.g., Sources is both - # a semantic NewType and wraps a MODEL-kind type), so checks are - # independent `if` statements, not `elif`. for field_spec in fields: - ti = field_spec.type_info - _collect_from_type_info(ti) - - # MODEL-kind fields (whether direct or via NewType wrapper) get expanded - if ti.kind == TypeKind.MODEL and ti.source_type is not None: - if field_spec.model is None: - msg = ( - f"MODEL-kind field {field_spec.name!r} has source_type " - f"but model=None — call expand_model_tree first" - ) - raise RuntimeError(msg) - if not field_spec.starts_cycle: - _collect_from_model(field_spec.model) + _collect_from_shape(field_spec.shape) for spec in feature_specs: _collect_from_fields(spec.fields) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py index bf09950c4..a5c34fef7 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py @@ -3,7 +3,8 @@ from dataclasses import dataclass from pathlib import PurePosixPath -from ..extraction.case_conversion import slug_filename +from overture.schema.system.case import to_snake_case + from ..extraction.specs import TypeIdentity __all__ = ["LinkContext", "relative_link"] @@ -28,7 +29,7 @@ def resolve_link_or_slug(self, identity: TypeIdentity) -> str: Always returns a usable link string. Use when the caller needs a link regardless of whether the type has a registered page. """ - return self.resolve_link(identity) or slug_filename(identity.name) + return self.resolve_link(identity) or f"{to_snake_case(identity.name)}.md" def _is_normalized(path: PurePosixPath) -> bool: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py index f0d224ee4..9f38f63a1 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py @@ -7,7 +7,8 @@ from collections.abc import Sequence from pathlib import PurePosixPath -from ..extraction.case_conversion import slug_filename +from overture.schema.system.case import to_snake_case + from ..extraction.specs import ( FeatureSpec, PydanticTypeSpec, @@ -54,10 +55,8 @@ def build_placement_registry( if tid in registry: continue if isinstance(supp_spec, PydanticTypeSpec): - registry[tid] = ( - PurePosixPath("pydantic") - / supp_spec.source_module - / slug_filename(tid.name) + registry[tid] = _md_path( + PurePosixPath("pydantic") / supp_spec.source_module, tid.name ) continue source_module = getattr(supp_spec.source_type, "__module__", None) @@ -77,7 +76,7 @@ def resolve_output_path( """Look up a type's output path from the registry, with flat-file fallback.""" if registry is not None and identity in registry: return registry[identity] - return PurePosixPath(slug_filename(identity.name)) + return _md_path(PurePosixPath(""), identity.name) def _aggregate_page_entries( @@ -112,4 +111,4 @@ def _nest_under_types( def _md_path(directory: PurePosixPath, name: str) -> PurePosixPath: """Build a .md file path from a directory and a PascalCase type name.""" - return directory / slug_filename(name) + return directory / f"{to_snake_case(name)}.md" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py index f7c676c06..8a6bb8348 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py @@ -14,7 +14,6 @@ from overture.schema.system.primitive import GeometryType from ..extraction.examples import ExampleRecord, load_examples -from ..extraction.model_extraction import expand_model_tree from ..extraction.numeric_extraction import extract_numerics from ..extraction.specs import ( EnumSpec, @@ -97,16 +96,19 @@ def _render_supplement( ctx = LinkContext(output_path, registry) used_by = reverse_refs.get(tid) - if isinstance(spec, EnumSpec): - content = render_enum(spec, link_ctx=ctx, used_by=used_by) - elif isinstance(spec, NewTypeSpec): - content = render_newtype(spec, ctx, used_by=used_by) - elif isinstance(spec, ModelSpec): - content = render_feature(spec, ctx, used_by=used_by) - elif isinstance(spec, PydanticTypeSpec): - content = render_pydantic_type(spec, link_ctx=ctx, used_by=used_by) - else: - raise TypeError(f"Unhandled SupplementarySpec variant: {type(spec).__name__}") + match spec: + case EnumSpec(): + content = render_enum(spec, link_ctx=ctx, used_by=used_by) + case NewTypeSpec(): + content = render_newtype(spec, ctx, used_by=used_by) + case ModelSpec(): + content = render_feature(spec, ctx, used_by=used_by) + case PydanticTypeSpec(): + content = render_pydantic_type(spec, link_ctx=ctx, used_by=used_by) + case _: + raise TypeError( + f"Unhandled SupplementarySpec variant: {type(spec).__name__}" + ) return RenderedPage(content=content, path=output_path) @@ -143,10 +145,6 @@ def generate_markdown_pages( I/O, frontmatter injection, and any output-format-specific concerns (like Docusaurus category files). """ - cache: dict[type, ModelSpec] = {} - for spec in feature_specs: - expand_model_tree(spec, cache) - numeric_names, geometry_names = partition_numeric_and_geometry_types( _system_primitive ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py index 0e829d1f4..0a5c9d08f 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py @@ -14,7 +14,9 @@ from typing_extensions import NotRequired from ..extraction.examples import ExampleRecord +from ..extraction.field import ConstraintSource from ..extraction.field_constraints import constraint_display_text +from ..extraction.field_walk import all_constraints, list_depth, terminal_model_ref from ..extraction.model_constraints import analyze_model_constraints from ..extraction.specs import ( AnnotatedField, @@ -28,9 +30,6 @@ TypeIdentity, UnionSpec, ) -from ..extraction.type_analyzer import ( - ConstraintSource, -) from .link_computation import LinkContext from .reverse_references import UsedByEntry from .type_format import ( @@ -237,14 +236,14 @@ def _annotate_field_constraints( ) -> None: """Annotate a field row with constraints from the field's own annotation. - Shows constraints where source is None — those applied directly to + Shows constraints where source is None -- those applied directly to the field, not inherited from NewType chains. NewType-inherited constraints appear on the NewType's own page instead. """ link_fn = _link_fn_from_ctx(ctx) notes = [ constraint_display_text(cs, link_fn=link_fn) - for cs in field.type_info.constraints + for cs in all_constraints(field.shape) if cs.source_ref is None ] if notes: @@ -253,13 +252,11 @@ def _annotate_field_constraints( def _expandable_list_suffix(field_spec: FieldSpec) -> str: """Return `"[]"` per nesting level for list-of-model fields expanded inline.""" - if ( - field_spec.type_info.is_list - and field_spec.model - and not field_spec.starts_cycle - ): - return "[]" * field_spec.type_info.list_depth - return "" + model_ref = terminal_model_ref(field_spec.shape) + if model_ref is None or model_ref.starts_cycle: + return "" + depth = list_depth(field_spec.shape) + return "[]" * depth if depth > 0 else "" def _expand_sub_model( @@ -269,10 +266,13 @@ def _expand_sub_model( result: list[_FieldRow], ) -> None: """Expand sub-model fields inline, appending child rows to *result*.""" - sub = field_spec.model if not field_spec.starts_cycle else None - if sub is not None: - child_prefix = f"{name}{_expandable_list_suffix(field_spec)}." - result.extend(_expand_model_fields(sub.fields, ctx, prefix=child_prefix)) + model_ref = terminal_model_ref(field_spec.shape) + if model_ref is None or model_ref.starts_cycle: + return + child_prefix = f"{name}{_expandable_list_suffix(field_spec)}." + result.extend( + _expand_model_fields(model_ref.model.fields, ctx, prefix=child_prefix) + ) def _annotate_top_level_constraints( @@ -341,7 +341,7 @@ def _variant_tag(annotated: AnnotatedField, union_name: str) -> str | None: if annotated.variant_sources is None: return None short_names = [ - _short_variant_name(v, union_name) for v in annotated.variant_sources + _short_variant_name(v.__name__, union_name) for v in annotated.variant_sources ] return f" *({', '.join(short_names)})*" @@ -385,9 +385,8 @@ def render_feature( examples: list[ExampleRecord] | None = None, used_by: list[UsedByEntry] | None = None, ) -> str: - """Render a FeatureSpec (ModelSpec or UnionSpec) as Markdown documentation. + """Render a feature spec as Markdown documentation. - For ModelSpec, requires expand_model_tree to have been called first. For UnionSpec, adds inline variant tags to variant-specific fields. """ template = _get_jinja_env().get_template("feature.md.jinja2") @@ -491,13 +490,13 @@ def render_newtype( link_ctx: LinkContext | None = None, used_by: list[UsedByEntry] | None = None, ) -> str: - """Render a NewTypeSpec as Markdown documentation.""" + """Render a `NewTypeSpec` as Markdown documentation.""" template = _get_jinja_env().get_template("newtype.md.jinja2") - ti = newtype_spec.type_info - underlying = format_underlying_type(ti, link_ctx) + shape = newtype_spec.shape + underlying = format_underlying_type(shape, link_ctx) constraints = [ _format_constraint(cs, newtype_spec.source_type, link_ctx) - for cs in ti.constraints + for cs in all_constraints(shape) ] return template.render( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py index 2ad471fc1..39f841345 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py @@ -6,6 +6,17 @@ from dataclasses import dataclass from enum import Enum +from pydantic import BaseModel + +from ..extraction.field import ( + FieldShape, + ModelRef, + NewTypeShape, + Primitive, + Scalar, + UnionRef, +) +from ..extraction.field_walk import terminal_of, walk_shape from ..extraction.specs import ( FeatureSpec, FieldSpec, @@ -14,9 +25,8 @@ SupplementarySpec, TypeIdentity, UnionSpec, - is_pydantic_type, + is_pydantic_sourced, ) -from ..extraction.type_analyzer import TypeInfo, TypeKind, walk_type_info __all__ = [ "UsedByEntry", @@ -51,98 +61,84 @@ def compute_reverse_references( Parameters ---------- - feature_specs : Sequence[FeatureSpec] + feature_specs Feature-level specs (ModelSpec or UnionSpec). - all_specs : Mapping[TypeIdentity, SupplementarySpec] + all_specs Supplementary types (enums, newtypes, sub-models). - - Returns - ------- - dict[TypeIdentity, list[UsedByEntry]] - Dict mapping TypeIdentity to sorted lists of UsedByEntry. """ - # Track references with sets to deduplicate references: dict[TypeIdentity, set[UsedByEntry]] = {} def add_reference( target: TypeIdentity, referrer: TypeIdentity, kind: UsedByKind ) -> None: - """Add a reference from referrer to target, with deduplication.""" if target == referrer or target not in all_specs: return references.setdefault(target, set()).add(UsedByEntry(referrer, kind)) - def collect_from_type_info( - ti: TypeInfo, referrer: TypeIdentity, referrer_kind: UsedByKind + def collect_from_shape( + shape: FieldShape, + referrer: TypeIdentity, + referrer_kind: UsedByKind, ) -> None: - """Collect references from a TypeInfo.""" - - def _visit(node: TypeInfo) -> None: - if node.newtype_ref is not None and node.newtype_name is not None: - add_reference( - TypeIdentity(node.newtype_ref, node.newtype_name), - referrer, - referrer_kind, - ) - - # ENUM, MODEL, pydantic (PRIMITIVE), and UNION are mutually - # exclusive by TypeKind. - if ( - node.kind in (TypeKind.ENUM, TypeKind.MODEL) - and node.source_type is not None - ): - add_reference( - TypeIdentity.of(node.source_type), - referrer, - referrer_kind, - ) - elif is_pydantic_type(node): - add_reference( - TypeIdentity.of(node.source_type), referrer, referrer_kind - ) - elif node.union_members is not None: - for member_cls in node.union_members: + """Walk a shape and add references for every type it touches.""" + + def _visit(node: FieldShape) -> None: + match node: + case NewTypeShape(name=name, ref=ref): + add_reference(TypeIdentity(ref, name), referrer, referrer_kind) + case ModelRef(model=m) if m.source_type is not None: add_reference( - TypeIdentity.of(member_cls), - referrer, - referrer_kind, + TypeIdentity.of(m.source_type), referrer, referrer_kind ) - - walk_type_info(ti, _visit) + case UnionRef(union=u): + for member_cls in u.members: + add_reference( + TypeIdentity.of(member_cls), referrer, referrer_kind + ) + case Primitive(source_type=cls) if cls is not None: + if isinstance(cls, type) and ( + issubclass(cls, Enum) + or issubclass(cls, BaseModel) + or is_pydantic_sourced(cls) + ): + add_reference(TypeIdentity.of(cls), referrer, referrer_kind) + + walk_shape(shape, _visit) def collect_from_fields( - fields: list[FieldSpec], referrer: TypeIdentity, referrer_kind: UsedByKind + fields: list[FieldSpec], + referrer: TypeIdentity, + referrer_kind: UsedByKind, ) -> None: - """Collect references from model fields.""" + """Collect references from each field's shape.""" for field_spec in fields: - collect_from_type_info(field_spec.type_info, referrer, referrer_kind) + collect_from_shape(field_spec.shape, referrer, referrer_kind) def collect_from_model_spec(spec: ModelSpec, referrer: TypeIdentity) -> None: - """Collect references from a ModelSpec.""" collect_from_fields(spec.fields, referrer, UsedByKind.MODEL) def collect_from_union_spec(spec: UnionSpec) -> None: - """Collect references from a UnionSpec.""" referrer = spec.identity # Union features reference their members for member_cls in spec.members: - add_reference( - TypeIdentity.of(member_cls), - referrer, - UsedByKind.MODEL, - ) - # Also walk fields for other supplementary types + add_reference(TypeIdentity.of(member_cls), referrer, UsedByKind.MODEL) collect_from_fields(spec.fields, referrer, UsedByKind.MODEL) def collect_from_newtype_spec(spec: NewTypeSpec, referrer: TypeIdentity) -> None: - """Collect references from a NewTypeSpec.""" - collect_from_type_info(spec.type_info, referrer, UsedByKind.NEWTYPE) - - # Collect inherited NewTypes from constraint sources - for cs in spec.type_info.constraints: - if cs.source_ref is not None and cs.source_name is not None: - ref_id = TypeIdentity(cs.source_ref, cs.source_name) - add_reference(ref_id, referrer, UsedByKind.NEWTYPE) + # The NewType's own identity isn't added here (self-reference). + # spec.shape already has the outer NewTypeShape stripped. + collect_from_shape(spec.shape, referrer, UsedByKind.NEWTYPE) + + # Inherited NewTypes from constraint sources (constraint chains). + terminal = terminal_of(spec.shape) + if isinstance(terminal, Scalar): + for cs in terminal.constraints: + if cs.source_ref is not None and cs.source_name is not None: + add_reference( + TypeIdentity(cs.source_ref, cs.source_name), + referrer, + UsedByKind.NEWTYPE, + ) # Collect from features for spec in feature_specs: @@ -151,17 +147,14 @@ def collect_from_newtype_spec(spec: NewTypeSpec, referrer: TypeIdentity) -> None elif isinstance(spec, UnionSpec): collect_from_union_spec(spec) - # Collect from supplementary specs (NewTypes and sub-models reference - # other types; enums do not, so they need no processing here) + # Collect from supplementary specs (enums have no outgoing references) for tid, supp_spec in all_specs.items(): if isinstance(supp_spec, NewTypeSpec): collect_from_newtype_spec(supp_spec, tid) elif isinstance(supp_spec, ModelSpec): collect_from_model_spec(supp_spec, tid) - # Sort into deterministic lists. (kind, name) handles the common case; - # module breaks ties when two referrers share the same display name - # (e.g. identically-named types from different themes/modules). + # Sort into deterministic lists. result: dict[TypeIdentity, list[UsedByEntry]] = {} for target, ref_set in references.items(): entries = sorted( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py index b6bd7a6ec..baaff8668 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py @@ -1,16 +1,33 @@ -"""Format TypeInfo as markdown type strings with cross-page links.""" +"""Format `FieldShape` trees as markdown type strings with cross-page links.""" from __future__ import annotations +from collections.abc import Sequence +from enum import Enum + from pydantic import BaseModel -from ..extraction.specs import FieldSpec, TypeIdentity -from ..extraction.type_analyzer import TypeInfo, TypeKind -from ..extraction.type_registry import is_semantic_newtype, resolve_type_name +from ..extraction.field import ( + AnyScalar, + ArrayOf, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + Scalar, + UnionRef, +) +from ..extraction.specs import FieldSpec, TypeIdentity, is_pydantic_sourced +from ..extraction.type_registry import ( + get_type_mapping, + is_semantic_newtype, + resolve_type_name, +) from .link_computation import LinkContext __all__ = [ - "format_dict_type", "format_type", "format_underlying_type", "resolve_type_link", @@ -18,17 +35,17 @@ def _code_link(name: str, href: str) -> str: - """Format a markdown link with inline-code text: [`name`](href).""" + """Format a markdown link with inline-code text: `[``name``](href)`.""" return f"[`{name}`]({href})" def resolve_type_link(identity: TypeIdentity, ctx: LinkContext | None = None) -> str: - """Resolve a TypeIdentity to a linked code span or plain code span. + """Resolve a `TypeIdentity` to a linked or plain code span. - When *ctx* is provided, links only to types in the registry (types - without pages render as inline code). Without context, renders as - inline code -- producing a link requires a placement registry to - compute correct relative paths. + With `ctx`, links only to types in the registry (types without + pages render as inline code). Without context, renders as inline + code -- producing a link requires a placement registry to compute + correct relative paths. """ if ctx: href = ctx.resolve_link(identity) @@ -40,9 +57,9 @@ def resolve_type_link(identity: TypeIdentity, ctx: LinkContext | None = None) -> def _wrap_list_n(inner: str, depth: int) -> str: """Wrap an inner type string in `list<...>` markdown syntax *depth* times. - Builds a single broken-backtick wrapper rather than nesting iteratively. - Iterative nesting creates adjacent backticks that CommonMark - interprets as multi-backtick code span delimiters. + Builds a single broken-backtick wrapper rather than nesting + iteratively, since iterative nesting creates adjacent backticks + that CommonMark interprets as multi-backtick code span delimiters. """ return f"`{'list<' * depth}`{inner}`{'>' * depth}`" @@ -52,165 +69,238 @@ def _plain_list_type(base: str, depth: int) -> str: return f"`{'list<' * depth}{base}{'>' * depth}`" -def _linked_type_identity(ti: TypeInfo) -> TypeIdentity | None: - """Return the TypeIdentity to use for a markdown link, or None for non-linked types.""" - if ( - is_semantic_newtype(ti) - and ti.newtype_ref is not None - and ti.newtype_name is not None - ): - return TypeIdentity(ti.newtype_ref, ti.newtype_name) - if ti.kind in (TypeKind.ENUM, TypeKind.MODEL) and ti.source_type is not None: - return TypeIdentity(ti.source_type, ti.base_type) - return None - - -def _try_primitive_link( - ti: TypeInfo, display_name: str, ctx: LinkContext | None -) -> str | None: - """Try to link a PRIMITIVE type to its page via registry lookup. +def _peel_arrays(shape: FieldShape) -> tuple[int, FieldShape]: + """Strip outer `ArrayOf` layers; return (count, inner).""" + depth = 0 + while isinstance(shape, ArrayOf): + depth += 1 + shape = shape.element + return depth, shape - Registered primitives (int32, Geometry) and Pydantic types (HttpUrl) - can have pages in the registry. Uses the type registry display name - (e.g. `geometry` not `Geometry`) for the link text. - """ - if ti.kind != TypeKind.PRIMITIVE or not ctx: - return None - candidate = ti.newtype_ref or ti.source_type - if candidate is None: - return None - href = ctx.resolve_link(TypeIdentity(candidate, display_name)) - if href: - return _code_link(display_name, href) - return None - -def _markdown_type_name(ti: TypeInfo) -> str: - """Return the markdown display name for a type. - - Uses the semantic NewType name when present (e.g. `LanguageTag`), - otherwise falls back to the resolved markdown type (e.g. `string`). - """ - name = ti.newtype_name if is_semantic_newtype(ti) else None - return name or resolve_type_name(ti, "markdown") - - -def format_dict_type(ti: TypeInfo) -> str: - """Format a dict TypeInfo as bare `map` using resolved markdown names.""" - if ti.dict_key_type is None or ti.dict_value_type is None: - msg = f"format_dict_type requires dict key/value types, got {ti}" - raise ValueError(msg) - key = _markdown_type_name(ti.dict_key_type) - value = _markdown_type_name(ti.dict_value_type) - return f"map<{key}, {value}>" +def _format_literal(values: tuple[object, ...]) -> str: + """Format Literal values for display.""" + if len(values) == 1: + return f'`"{values[0]}"`' + return r" \| ".join(f'`"{v}"`' for v in values) def _format_union_members( - members: tuple[type[BaseModel], ...], + members: Sequence[type[BaseModel]], ctx: LinkContext | None, separator: str = r" \| ", ) -> str: - r"""Format union members as individually linked/backticked names. + r"""Format union members as individually linked / backticked names. - Each member is resolved independently so members with pages get linked - while others render as plain code spans. *separator* is inserted between - members (default is `\|` for table-cell safety). + Each member is resolved independently so members with pages get + linked while others render as plain code spans. `separator` is + inserted between members (default is `\|` for table-cell safety). """ return separator.join(resolve_type_link(TypeIdentity.of(m), ctx) for m in members) -def format_type( - field: FieldSpec, - ctx: LinkContext | None = None, -) -> str: - """Format a field's type for markdown display, with links and qualifiers.""" - ti = field.type_info - qualifiers: list[str] = [] +def _model_link(model_ref: ModelRef, ctx: LinkContext | None) -> str: + """Resolve a `ModelRef` to a markdown link or fallback code span.""" + src = model_ref.model.source_type + if src is not None: + return resolve_type_link(TypeIdentity(src, model_ref.model.name), ctx) + return f"`{model_ref.model.name}`" - if ti.kind == TypeKind.LITERAL and ti.literal_values: - if len(ti.literal_values) == 1: - return f'`"{ti.literal_values[0]}"`' - return r" \| ".join(f'`"{v}"`' for v in ti.literal_values) - - identity = _linked_type_identity(ti) - - if ti.kind == TypeKind.UNION and ti.union_members: - display = _format_union_members(ti.union_members, ctx) - if ti.is_list: - qualifiers.append("list") - elif ti.is_dict: - if identity: - display = resolve_type_link(identity, ctx) - qualifiers.append("map") - else: - display = f"`{format_dict_type(ti)}`" - elif identity: - display = resolve_type_link(identity, ctx) - # List layers outside a NewType wrap with list<> syntax (e.g., list[PhoneNumber] - # renders as list). List layers inside a NewType use a (list) - # qualifier instead (e.g., Sources wrapping list[SourceItem] renders as - # Sources (list)), since the list-ness is an implementation detail of the type. - if ti.newtype_outer_list_depth > 0: - display = _wrap_list_n(display, ti.newtype_outer_list_depth) - elif ti.is_list and ti.newtype_name is not None: # list is inside the NewType - qualifiers.append("list") - elif ti.is_list: - display = _wrap_list_n(display, ti.list_depth) - else: - # Fallback: types without a linked identity. Registered primitives (int32, - # Geometry) and Pydantic types (HttpUrl) may still link to aggregate pages - # via the placement registry. Unregistered primitives render as plain code. - base = resolve_type_name(ti, "markdown") - link = _try_primitive_link(ti, base, ctx) - if link and ti.is_list: - display = _wrap_list_n(link, ti.list_depth) - elif link: - display = link - elif ti.is_list: - display = _plain_list_type(base, ti.list_depth) - else: - display = f"`{base}`" +def _scalar_identity(scalar: Primitive) -> TypeIdentity | None: + """Return a linkable identity for a `Primitive`'s `source_type`, if any.""" + src = scalar.source_type + if src is None: + return None + if isinstance(src, type) and ( + issubclass(src, Enum) or issubclass(src, BaseModel) or is_pydantic_sourced(src) + ): + return TypeIdentity.of(src) + return None + + +def _scalar_display(scalar: Scalar, ctx: LinkContext | None) -> tuple[str, bool]: + """Render a `Scalar` variant as a markdown string; second value is True if linked. + + Linked when the scalar is a `Primitive` with an Enum / BaseModel / + Pydantic-sourced `source_type` whose identity resolves to a page. + Otherwise renders as the registry-resolved markdown name. + """ + if isinstance(scalar, Primitive): + identity = _scalar_identity(scalar) + if identity is not None and ctx: + href = ctx.resolve_link(identity) + if href: + return _code_link(identity.name, href), True + if identity is not None: + return f"`{identity.name}`", False + return f"`{_registry_name(scalar)}`", False + + +def _registry_name(scalar: Scalar) -> str: + """Resolve a scalar to its markdown registry name (e.g. `int64`).""" + if isinstance(scalar, LiteralScalar): + return "Literal" + if isinstance(scalar, AnyScalar): + return "Any" + mapping = get_type_mapping(scalar.base_type) + if mapping is None and scalar.source_type is not None: + mapping = get_type_mapping(scalar.source_type.__name__) + if mapping is not None: + return mapping.markdown + return scalar.base_type + + +def _format_map(shape: MapOf, ctx: LinkContext | None) -> str: + """Format a `MapOf` as a bare `map` code span (no outer wrappers).""" + key = _markdown_name_for_shape(shape.key) + value = _markdown_name_for_shape(shape.value) + return f"`map<{key}, {value}>`" + + +def _markdown_name_for_shape(shape: FieldShape) -> str: + """Return a bare markdown name (no link, no backticks) for a shape. + + Used inside `map` rendering. Picks the semantic NewType name + when wrapping a registered primitive, otherwise the registry name + of the terminal scalar. + """ + if isinstance(shape, NewTypeShape): + return shape.name + if isinstance(shape, Scalar): + return _registry_name(shape) + if isinstance(shape, ModelRef): + return shape.model.name + if isinstance(shape, ArrayOf): + inner = _markdown_name_for_shape(shape.element) + return f"list<{inner}>" + if isinstance(shape, MapOf): + return ( + f"map<{_markdown_name_for_shape(shape.key)}, " + f"{_markdown_name_for_shape(shape.value)}>" + ) + return "?" + + +def format_type(field: FieldSpec, ctx: LinkContext | None = None) -> str: + """Format a field's type for markdown display, with links and qualifiers.""" + qualifiers: list[str] = [] + display = _format_shape(field.shape, ctx, qualifiers) if not field.is_required: qualifiers.append("optional") - if qualifiers: return f"{display} ({', '.join(qualifiers)})" return display -def _linked_or_backticked(ti: TypeInfo, ctx: LinkContext | None) -> tuple[str, bool]: - """Return (formatted_string, has_link) for a TypeInfo component. +def _format_shape( + shape: FieldShape, ctx: LinkContext | None, qualifiers: list[str] +) -> str: + """Format a `FieldShape`, possibly appending qualifiers like `list`, `map`.""" + outer_depth, inner = _peel_arrays(shape) + + match inner: + case LiteralScalar(values=values): + if outer_depth > 0: + inside = " | ".join(f'"{v}"' for v in values) + return _plain_list_type(inside, outer_depth) + return _format_literal(values) + + case UnionRef(union=u): + if outer_depth > 0: + qualifiers.append("list") + return _format_union_members(u.members, ctx) + + case MapOf() as m: + map_str = _format_map(m, ctx) + if outer_depth > 0: + return _wrap_list_n(map_str.strip("`"), outer_depth) + return map_str + + case ModelRef() as m: + link = _model_link(m, ctx) + if outer_depth > 0: + return _wrap_list_n(link, outer_depth) + return link + + case NewTypeShape(name=name, ref=ref, inner=nt_inner): + link = resolve_type_link(TypeIdentity(ref, name), ctx) + if outer_depth > 0: + return _wrap_list_n(link, outer_depth) + if isinstance(nt_inner, ArrayOf): + qualifiers.append("list") + elif isinstance(nt_inner, MapOf): + qualifiers.append("map") + return link + + case Primitive() | AnyScalar() as s: + text, linked = _scalar_display(s, ctx) + if outer_depth > 0: + if linked: + return _wrap_list_n(text, outer_depth) + return _plain_list_type(text.strip("`"), outer_depth) + return text + + raise TypeError(f"Unhandled FieldShape: {shape!r}") + + +# ---- Underlying-type rendering for NewType pages ---- + + +def _peel_to_terminal(shape: FieldShape) -> FieldShape: + """Strip `NewTypeShape` / `ArrayOf` layers to find the terminal shape.""" + while True: + if isinstance(shape, NewTypeShape): + shape = shape.inner + elif isinstance(shape, ArrayOf): + shape = shape.element + else: + return shape + - Used by format_underlying_type to decide whether container types - need broken-backtick formatting (interleaving backtick runs with - linked text). +def _linked_or_backticked( + shape: FieldShape, ctx: LinkContext | None +) -> tuple[str, bool]: + """Return (formatted_string, has_link) for a shape component. - When `has_link` is True, `formatted_string` is a markdown link - ready for broken-backtick container syntax. When False, it is a raw - name that the caller embeds inside backticks. + Used by NewType page rendering to format the underlying type with + a link to its source page when one exists. """ - identity = _linked_type_identity(ti) + identity: TypeIdentity | None = None + _, cur = _peel_arrays(shape) + if isinstance(cur, NewTypeShape) and is_semantic_newtype(shape): + identity = TypeIdentity(cur.ref, cur.name) + elif isinstance(cur, Primitive) and cur.source_type is not None: + src = cur.source_type + if isinstance(src, type) and ( + issubclass(src, Enum) or issubclass(src, BaseModel) + ): + identity = TypeIdentity(src, cur.base_type) if identity and ctx: href = ctx.resolve_link(identity) if href: return _code_link(identity.name, href), True - return _markdown_type_name(ti), False + return _markdown_name_for_underlying(shape), False -def format_underlying_type(ti: TypeInfo, ctx: LinkContext | None = None) -> str: - """Format a NewType's underlying type for the page header, with links. +def _markdown_name_for_underlying(shape: FieldShape) -> str: + """Bare markdown display name for a NewType's underlying type.""" + if is_semantic_newtype(shape): + _, cur = _peel_arrays(shape) + if isinstance(cur, NewTypeShape): + return cur.name + return resolve_type_name(shape) - Links enums and models that have their own pages. Does not link the - outermost NewType (which would self-reference). Dict key/value types - use full link resolution since they reference other types. - """ - if ti.kind == TypeKind.UNION and ti.union_members: - return _format_union_members(ti.union_members, ctx, separator=" | ") - if ti.is_dict and ti.dict_key_type and ti.dict_value_type: - key_str, key_linked = _linked_or_backticked(ti.dict_key_type, ctx) - val_str, val_linked = _linked_or_backticked(ti.dict_value_type, ctx) +def format_underlying_type(shape: FieldShape, ctx: LinkContext | None = None) -> str: + """Format a NewType's underlying type for the page header, with links.""" + terminal = _peel_to_terminal(shape) + if isinstance(terminal, UnionRef): + return _format_union_members(terminal.union.members, ctx, separator=" | ") + + if isinstance(terminal, MapOf): + key_str, key_linked = _linked_or_backticked(terminal.key, ctx) + val_str, val_linked = _linked_or_backticked(terminal.value, ctx) if key_linked or val_linked: if not key_linked: key_str = f"`{key_str}`" @@ -219,22 +309,28 @@ def format_underlying_type(ti: TypeInfo, ctx: LinkContext | None = None) -> str: return f"`map<`{key_str}`,`{val_str}`>`" return f"`map<{key_str}, {val_str}>`" - # Only link enums and models -- skip is_semantic_newtype to avoid - # self-linking (this TypeInfo belongs to the NewType being rendered). - identity = ( - TypeIdentity.of(ti.source_type) - if ti.kind in (TypeKind.ENUM, TypeKind.MODEL) and ti.source_type - else None - ) + # For underlying-type rendering on a NewType's own page, skip the + # is_semantic_newtype path to avoid self-linking: this shape + # belongs to the NewType being rendered. + identity: TypeIdentity | None = None + if isinstance(terminal, Primitive) and terminal.source_type is not None: + src = terminal.source_type + if isinstance(src, type) and ( + issubclass(src, Enum) or issubclass(src, BaseModel) + ): + identity = TypeIdentity.of(src) + + depth, _ = _peel_arrays(shape) + if identity and ctx: href = ctx.resolve_link(identity) if href: linked = _code_link(identity.name, href) - if ti.is_list: - return _wrap_list_n(linked, ti.list_depth) + if depth > 0: + return _wrap_list_n(linked, depth) return linked - base = identity.name if identity else resolve_type_name(ti, "markdown") - if ti.is_list: - return _plain_list_type(base, ti.list_depth) + base = identity.name if identity else resolve_type_name(shape) + if depth > 0: + return _plain_list_type(base, depth) return f"`{base}`" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py new file mode 100644 index 000000000..13a0e841a --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py @@ -0,0 +1 @@ +"""PySpark codegen pipeline: FeatureSpec to expression and test modules.""" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py new file mode 100644 index 000000000..fc1f68e57 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py @@ -0,0 +1,265 @@ +"""Shared rendering primitives used by `renderer` and `test_renderer`. + +Concerns: + +- `jinja_env` -- the cached Jinja2 environment. +- `py_literal` / `tuple_literal` -- render Python values back to source code. +- `parse_field_eq` -- unwrap a `FieldEqCondition` / `Not(FieldEqCondition)`. +- check/label naming -- `check_name`, `field_label`, `column_level_suffix`, + `model_constraint_field_label`, `COLUMN_LEVEL_FUNCTIONS` (membership), + and `_COLUMN_LEVEL_SUFFIXES` (label suffix lookup). +- collision disambiguation -- `disambiguate` (function names) and + `compute_label_suffixes` (violation labels). +""" + +from __future__ import annotations + +import functools +from collections import Counter +from collections.abc import Hashable, Iterable +from enum import Enum +from pathlib import Path +from typing import NamedTuple, TypeVar + +from jinja2 import Environment, FileSystemLoader + +from overture.schema.system.field_path import ArrayPath +from overture.schema.system.model_constraint import ( + Condition, + FieldEqCondition, + Not, +) + +from .check_ir import Check, ModelCheck +from .constraint_dispatch import ForbidIf, RequireIf, model_constraint_function + +__all__ = [ + "COLUMN_LEVEL_FUNCTIONS", + "FieldEq", + "check_name", + "column_level_suffix", + "compute_label_suffixes", + "disambiguate", + "field_label", + "jinja_env", + "model_constraint_field_label", + "parse_field_eq", + "py_literal", + "tuple_literal", +] + +_K = TypeVar("_K", bound=Hashable) + +# Constraint functions that emit a column-level check (one per field +# rather than per element), used by the check builder to split them +# into their own `Check` IR nodes. +COLUMN_LEVEL_FUNCTIONS: frozenset[str] = frozenset( + { + "check_required", + "check_array_min_length", + "check_array_max_length", + "check_struct_unique", + } +) + +# Violation label suffix per column-level check that shares its +# field's structural path. `check_required` lands on its field's own +# path, so it stays absent from this table. +_COLUMN_LEVEL_SUFFIXES: dict[str, str] = { + "check_array_min_length": "_min_length", + "check_array_max_length": "_max_length", + "check_struct_unique": "_unique", +} + +_TEMPLATES_DIR = Path(__file__).parent / "templates" + + +@functools.lru_cache(maxsize=1) +def jinja_env() -> Environment: + """Return the shared Jinja2 environment for PySpark code generation templates.""" + env = Environment( + loader=FileSystemLoader(_TEMPLATES_DIR), + trim_blocks=True, + lstrip_blocks=True, + keep_trailing_newline=True, + autoescape=False, + ) + env.filters["py_literal"] = py_literal + return env + + +_CHECK_PREFIX = "check_" + + +def tuple_literal(rendered_items: Iterable[str]) -> str: + """Wrap pre-rendered items as a Python tuple literal source. + + A single-element tuple needs a trailing comma; this helper applies + that rule so callers rendering enum-like values that don't fit + `py_literal` can still share its tuple-formatting behaviour. + """ + items = list(rendered_items) + joined = ", ".join(items) + return f"({joined},)" if len(items) == 1 else f"({joined})" + + +def py_literal(value: object) -> str: + """Render a Python value as source code. + + Recurses into containers to extract `Enum.value` (since `repr()` of + an Enum member is not valid Python). Quote style and line wrapping + are left to `ruff format`. + """ + if isinstance(value, Enum): + return py_literal(value.value) + if isinstance(value, dict): + items = ", ".join(f"{py_literal(k)}: {py_literal(v)}" for k, v in value.items()) + return "{" + items + "}" + if isinstance(value, list): + return "[" + ", ".join(py_literal(v) for v in value) + "]" + if isinstance(value, tuple): + return tuple_literal(py_literal(v) for v in value) + return repr(value) + + +class FieldEq(NamedTuple): + """An unwrapped `FieldEqCondition`, with `negated` set when wrapped in `Not`.""" + + field_name: str + value: object + negated: bool + + +def parse_field_eq(condition: Condition) -> FieldEq | None: + """Unwrap a `FieldEqCondition` or `Not(FieldEqCondition)`. + + Returns a `FieldEq` triple for either shape, or `None` for any + other condition. `negated` is True iff the condition was wrapped + in `Not`. + """ + match condition: + case Not(inner=FieldEqCondition(field_name=fn, value=v)): + return FieldEq(fn, v, True) + case FieldEqCondition(field_name=fn, value=v): + return FieldEq(fn, v, False) + case _: + return None + + +def check_name(function: str, override: str | None = None) -> str: + """Strip the `check_` prefix to produce a human-readable check name.""" + if override is not None: + return override + return function.removeprefix(_CHECK_PREFIX) + + +def column_level_suffix(check: Check) -> str: + """Return the column-level label suffix for `check`, or empty string. + + Column-level checks (`check_array_min_length`, `check_struct_unique`, + etc.) share their structural path with the field they constrain; the + suffix differentiates the violation label so each check reports a + distinct `Check.field`. + """ + if not check.descriptors: + return "" + return _COLUMN_LEVEL_SUFFIXES.get(check.descriptors[0].function, "") + + +def field_label(check: Check) -> str: + """Render the violation label for a Check. + + Combines the structural field path with any column-level suffix + (`_min_length`, `_unique`, etc.) so each check reports a distinct + `Check.field` even when several share a structural path. + """ + return f"{check.target}{column_level_suffix(check)}" + + +def _model_check_base_label(check: ModelCheck) -> str: + """Compute the violation field label sans collision suffix. + + - `require_if` / `forbid_if` produce a per-target label + (`field_required` / `path.field_forbidden`) since each descriptor + now carries a single target field (multi-field decorators split + at dispatch time). + - Other kinds (`require_any_of`, `radio_group`, `min_fields_set`) + name the whole constraint; on `ArrayPath` targets they use the + path itself so anchors are distinguishable across nestings. + """ + match check.descriptor: + case RequireIf(): + kind_suffix = "_required" + case ForbidIf(): + kind_suffix = "_forbidden" + case _: + if isinstance(check.target, ArrayPath): + return str(check.target) + return check_name(model_constraint_function(check.descriptor)) + target = check.descriptor.field_names[0] + if not isinstance(check.target, ArrayPath): + return f"{target}{kind_suffix}" + return f"{check.target}.{target}{kind_suffix}" + + +def model_constraint_field_label(check: ModelCheck, label_suffix: str) -> str: + """Compute the field label for a model constraint check. + + `label_suffix` (from `compute_label_suffixes`) disambiguates labels + that would otherwise collide -- e.g. two `@require_any_of` on the + same model, or two `@require_if(["x"], ...)` with different + conditions. + """ + return f"{_model_check_base_label(check)}{label_suffix}" + + +def _occurrence_indices(keys: list[_K]) -> list[tuple[int, int]]: + """Pair each key with `(occurrence_index, total_count)`. + + `occurrence_index` is the 0-based position of the key among its + equal siblings; `total_count` is how many times the key appears in + `keys`. Both `disambiguate` and `compute_label_suffixes` need this + "where am I within my collision group" view. + """ + counts: Counter[_K] = Counter(keys) + seen: Counter[_K] = Counter() + result: list[tuple[int, int]] = [] + for key in keys: + result.append((seen[key], counts[key])) + seen[key] += 1 + return result + + +def disambiguate(names: list[str]) -> list[str]: + """Make a list of names unique by appending `_N` to repeated entries. + + The first occurrence of a name is left bare; the second becomes + `name_1`, the third `name_2`, and so on. Names that appear once are + untouched. + + Assumes no input name already matches a generated `name_N` form; a + collision there would reintroduce a duplicate. Field names in + practice never carry that suffix, so the assumption holds. + """ + return [ + f"{name}_{idx}" if total > 1 and idx > 0 else name + for name, (idx, total) in zip(names, _occurrence_indices(names), strict=True) + ] + + +def compute_label_suffixes(model_checks: list[ModelCheck]) -> list[str]: + """Pre-compute field label suffixes, adding counters only for collisions. + + Unlike `disambiguate`, every colliding entry receives a `_N` suffix + including the first one (`_0`, `_1`, ...). This is symmetric on + purpose: violation labels for a colliding group all share the same + base name, so each needs an explicit collision index to stay + distinct. `disambiguate` operates on Python function names where + leaving the first occurrence bare preserves readable identifiers + for the common no-collision case. + """ + base_labels = [_model_check_base_label(check) for check in model_checks] + return [ + f"_{idx}" if total > 1 else "" + for idx, total in _occurrence_indices(base_labels) + ] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py new file mode 100644 index 000000000..9e736a67c --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py @@ -0,0 +1,699 @@ +"""Walk FieldSpec trees to produce Check/ModelCheck IR for rendering. + +Consults the constraint dispatch table to map each constraint to a +descriptor, then applies composition rules the dispatch table can't see: + +- Coalesce ordering: gather descriptors for the same field into one + `Check` (required first, then enum, then dispatched constraints), + deduplicate, and split column-level checks into separate suffixed checks. +- Target resolution: a shape walker descends each field's `FieldShape` + tree, building the `ScalarPath` or `ArrayPath` target by appending + segments as it goes -- so the path read in the code is the path that + lands in the IR. Entering a `list[...]` layer promotes the path's + terminal struct segment to an iterated `ArraySegment`. +- Subtype gating: annotate variant-specific fields with discriminator + `Guard`s, synthesize forbid_if/require_if for absent or required + variants, and gate check_required under nullable struct ancestors. +""" + +from __future__ import annotations + +from collections import defaultdict +from dataclasses import dataclass, replace +from enum import Enum + +from pydantic import BaseModel +from typing_extensions import assert_never + +from overture.schema.system.field_path import ( + ArrayPath, + ArraySegment, + FieldPath, + ScalarPath, + promote_terminal_array, +) +from overture.schema.system.model_constraint import ( + FieldEqCondition, + ModelConstraint, + Not, +) + +from ..extraction.field import ( + AnyScalar, + ArrayOf, + ConstraintSource, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + Scalar, + UnionRef, +) +from ..extraction.field_walk import terminal_primitive +from ..extraction.specs import FeatureSpec, FieldSpec, ModelSpec, UnionSpec +from ..extraction.type_registry import PRIMITIVE_TYPES +from ._render_common import COLUMN_LEVEL_FUNCTIONS +from .check_ir import ( + Check, + ColumnGuard, + ElementGuard, + Guard, + ModelCheck, +) +from .constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + RequireIf, + dispatch_base_type, + dispatch_constraint, + dispatch_model_constraint, + dispatch_newtype, + forbid_if_field_shapes, +) + +__all__ = [ + "build_checks", +] + + +def _dispatch_layer_constraints( + constraints: tuple[ConstraintSource, ...], + base_type: str | None, +) -> list[ExpressionDescriptor]: + """Dispatch one shape layer's constraints, skipping primitive-inherent ones.""" + descriptors: list[ExpressionDescriptor] = [] + for cs in constraints: + if cs.source_name is not None and cs.source_name in PRIMITIVE_TYPES: + continue + desc = dispatch_constraint(cs.constraint, base_type=base_type) + if desc is not None: + descriptors.append(desc) + return descriptors + + +def _enum_values(scalar: Scalar) -> list[object] | None: + """Return enum/literal values for a terminal `Scalar`, or `None`.""" + if isinstance(scalar, LiteralScalar): + return list(scalar.values) + if isinstance(scalar, Primitive): + src = scalar.source_type + if isinstance(src, type) and issubclass(src, Enum): + return [m.value for m in src] + return None + + +def _required_descriptor(gate: FieldPath | None) -> ExpressionDescriptor: + return ExpressionDescriptor(function="check_required", gate=gate) + + +@dataclass(frozen=True, slots=True) +class _ShapeTerminal: + """A `ModelRef`/`UnionRef` terminal and the path the walker reached it at. + + The `FieldSpec` recursion uses `path` directly as the prefix for the + sub-model's or sub-union's fields. The walker returns `None` instead + of a `_ShapeTerminal` for terminals it fully handles itself (scalars, + maps, and NewTypes with a dispatch override). + """ + + ref: ModelRef | UnionRef + path: FieldPath + + +def _walk_field_shape( + shape: FieldShape, + path: FieldPath, + *, + base_type: str | None, + required: bool, + required_gate: FieldPath | None, + carried_element: list[ExpressionDescriptor], +) -> tuple[list[Check], _ShapeTerminal | None]: + """Descend a `FieldShape`, emitting the field's own Checks. + + Builds the `FieldPath` target structurally: `ArrayOf` promotes the + path's terminal segment, `NewTypeShape` passes the path through, + terminals emit at the path reached. Returns the emitted Checks plus, + at a `ModelRef`/`UnionRef` terminal, a `_ShapeTerminal` for the + `FieldSpec` recursion (`None` for terminals the walker fully handles). + + Parameters + ---------- + path + The path reached so far, promoted once per `ArrayOf` layer + crossed. `required` and `path` move together: a field's path + starts as a plain struct path and is promoted exactly when the + first `ArrayOf` clears `required`, so while `required` holds the + path is still the plain struct path -- a standalone + `check_required` always lands there. + required + Whether the field still needs a `check_required`. Cleared by the + first `ArrayOf`: before it, `check_required` merges into the + terminal Check; from it on, it is a standalone column-level Check. + carried_element + Element-level descriptors from `ArrayOf` layers above, prepended + to the terminal's own element-level descriptors. + """ + match shape: + case NewTypeShape(name=name, inner=inner): + nt_descriptors = dispatch_newtype(name) + if nt_descriptors is not None: + if isinstance(path.segments[-1], ArraySegment): + # A NewType with a dispatch override nested under a list + # layer has no schema field; raise to keep the gap loud + # rather than emit an untested target (cf. list[list[Union]]). + raise NotImplementedError( + f"NewType with a dispatch override ({name}) nested " + "under a list layer is not supported" + ) + descriptors = list(nt_descriptors) + if required: + descriptors.insert(0, _required_descriptor(required_gate)) + return [Check(descriptors=tuple(descriptors), target=path)], None + return _walk_field_shape( + inner, + path, + base_type=base_type, + required=required, + required_gate=required_gate, + carried_element=carried_element, + ) + + case ArrayOf(element=element, constraints=constraints): + layer_descriptors = _dispatch_layer_constraints( + constraints, + base_type, + ) + column_descriptors = list( + dict.fromkeys( + d for d in layer_descriptors if d.function in COLUMN_LEVEL_FUNCTIONS + ) + ) + element_descriptors = [ + d for d in layer_descriptors if d.function not in COLUMN_LEVEL_FUNCTIONS + ] + checks: list[Check] = [] + if required: + checks.append( + Check( + descriptors=(_required_descriptor(required_gate),), + target=path, + ) + ) + checks.extend( + Check(descriptors=(d,), target=path) for d in column_descriptors + ) + sub_checks, terminal = _walk_field_shape( + element, + promote_terminal_array(path), + base_type=base_type, + required=False, + required_gate=required_gate, + carried_element=[*carried_element, *element_descriptors], + ) + return [*checks, *sub_checks], terminal + + case UnionRef(): + terminal_seg = path.segments[-1] + if isinstance(terminal_seg, ArraySegment) and terminal_seg.iter_count > 1: + # `list[list[Union]]` would build a multi-iter union target, + # but no schema field has that shape. The walker raises to + # keep the gap loud rather than silently emit one. + raise NotImplementedError( + "Union nested under multiple list layers " + "(list[list[Union]]) is not supported" + ) + return _ref_terminal_checks(shape, path, required, required_gate) + + case ModelRef(): + return _ref_terminal_checks(shape, path, required, required_gate) + + case Primitive() | LiteralScalar() | AnyScalar() | MapOf(): + # `MapOf` shares this arm: a map is a terminal the walker + # does not descend into. Length constraints on a MapOf are + # rejected at extraction (`attach_constraints` raises). + # No schema field exercises map-level constraints today. + constraints = shape.constraints + element_descriptors = list(carried_element) + enum_values = _enum_values(shape) if isinstance(shape, Scalar) else None + if enum_values is not None: + element_descriptors.append( + ExpressionDescriptor( + function="check_enum", + args=(tuple(enum_values),), + ) + ) + element_descriptors.extend( + _dispatch_layer_constraints(constraints, base_type) + ) + if base_type is not None: + base_descriptors = dispatch_base_type(base_type) + if base_descriptors is not None: + element_descriptors.extend(base_descriptors) + element_descriptors = list(dict.fromkeys(element_descriptors)) + + if required: + return [ + Check( + descriptors=( + _required_descriptor(required_gate), + *element_descriptors, + ), + target=path, + ) + ], None + if element_descriptors: + return [ + Check(descriptors=tuple(element_descriptors), target=path) + ], None + return [], None + + assert_never(shape) + + +def _ref_terminal_checks( + ref: ModelRef | UnionRef, + path: FieldPath, + required: bool, + required_gate: FieldPath | None, +) -> tuple[list[Check], _ShapeTerminal]: + """Handle a `ModelRef`/`UnionRef` terminal: emit `check_required`, hand back the ref. + + A required model or union field always gets a standalone + `check_required` Check; `required` holds only before any `ArrayOf`, + so `path` is the field's plain struct path. The sub-fields are the + caller's job, reached via the returned `_ShapeTerminal`. + """ + checks: list[Check] = [] + if required: + checks.append( + Check( + descriptors=(_required_descriptor(required_gate),), + target=path, + ) + ) + return checks, _ShapeTerminal(ref=ref, path=path) + + +def _build_field_checks( + field_spec: FieldSpec, + prefix: FieldPath = ScalarPath(), + *, + nullable_gate: FieldPath | None = None, + arm: str | None = None, +) -> tuple[list[Check], list[ModelCheck]]: + """Build Checks for a single field by walking its shape tree. + + `arm` is the singleton union-arm discriminator value the field belongs + to (when it lives in exactly one arm), or `None` when the field is + shared. It propagates to any model constraints discovered through this + field's sub-models so per-arm test modules can filter them correctly. + """ + path = prefix.append_struct(field_spec.name) + checks, terminal = _walk_field_shape( + field_spec.shape, + path, + base_type=( + p.base_type + if (p := terminal_primitive(field_spec.shape)) is not None + else None + ), + required=field_spec.is_required, + required_gate=nullable_gate, + carried_element=[], + ) + + model_checks: list[ModelCheck] = [] + match terminal: + case None: + pass + case _ShapeTerminal(ref=UnionRef(union=union_spec), path=terminal_path): + sub_field_checks, sub_model_checks = _recurse_into_union( + union_spec, terminal_path, arm=arm + ) + checks.extend(sub_field_checks) + model_checks.extend(sub_model_checks) + case _ShapeTerminal(ref=ModelRef(model=model_spec), path=terminal_path): + sub_field_checks, sub_model_checks = _recurse_into_model( + model_spec, + terminal_path, + field_spec.is_optional, + nullable_gate, + arm=arm, + ) + checks.extend(sub_field_checks) + model_checks.extend(sub_model_checks) + case _ShapeTerminal(ref=ref): + raise AssertionError( + f"unhandled _ShapeTerminal.ref variant: {type(ref).__name__}" + ) + + return checks, model_checks + + +def _recurse_into_model( + model_spec: ModelSpec, + prefix: FieldPath = ScalarPath(), + is_optional: bool = False, + nullable_gate: FieldPath | None = None, + *, + arm: str | None = None, +) -> tuple[list[Check], list[ModelCheck]]: + """Walk a MODEL-kind field's children plus its model-level constraints. + + `prefix` is the terminal path the shape walker reached the `ModelRef` + at, defaulting to the empty `ScalarPath()` at the row root. Its terminal + segment is an `ArraySegment` exactly when the field is itself a list, + which resets the nullable gate (array iteration handles element + nullability). + + `arm` propagates from the union arm whose variant-specific field led + here, so model constraints declared on the sub-model are tagged with + that arm rather than `None` (which would route them to every per-arm + test). + """ + last_seg = prefix.segments[-1] if prefix.segments else None + field_is_list = isinstance(last_seg, ArraySegment) + if field_is_list: + child_gate: FieldPath | None = None + else: + child_gate = prefix if is_optional else nullable_gate + + field_checks: list[Check] = [] + model_checks: list[ModelCheck] = [] + for sub_field in model_spec.fields: + sub_field_checks, sub_model_checks = _build_field_checks( + sub_field, + prefix=prefix, + nullable_gate=child_gate, + arm=arm, + ) + field_checks.extend(sub_field_checks) + model_checks.extend(sub_model_checks) + + if model_spec.constraints: + sub_model_constraint_checks = _dispatch_model_constraints( + model_spec.constraints, + model_spec.fields, + target=_model_constraint_target(prefix), + arm=arm, + ) + if sub_model_constraint_checks: + _guard_struct_nested_anchor(prefix, model_spec.name) + model_checks.extend(sub_model_constraint_checks) + return field_checks, model_checks + + +def _guard_struct_nested_anchor(prefix: FieldPath, name: str) -> None: + """Raise when emitting a model constraint at a struct-only prefix. + + See `_model_constraint_target`: in that case the constraint's target + collapses to the row root, which is wrong for any non-skipped + constraint. Today only `NoExtraFieldsConstraint` reaches here (and + dispatches to None); a real descriptor at this depth is a renderer + gap, not a normal case. + """ + if not isinstance(prefix, ArrayPath) and prefix.segments: + raise NotImplementedError( + f"Model constraint on struct-nested {name!r} " + f"(reached at {prefix!r}) -- the renderer has no anchor " + "for nested-struct model constraints." + ) + + +def _recurse_into_union( + union_spec: UnionSpec, + prefix: FieldPath = ScalarPath(), + *, + arm: str | None = None, +) -> tuple[list[Check], list[ModelCheck]]: + """Walk a UNION-kind field's variants, gathering Checks and ModelChecks. + + `prefix` is the terminal path the shape walker reached the `UnionRef` + at; the union's variant fields live directly under it. An `ArrayPath` + prefix means the union is reached through array iteration, so variant + gates are element-level and model constraints target that path. + + `arm` is the outer union arm whose variant-specific field reached this + inner union. It tags any model constraints discovered here so they + aren't propagated to other arms' test modules. + """ + mapping = union_spec.discriminator_mapping or {} + value_by_class = {cls: value for value, cls in mapping.items()} + union_target = _model_constraint_target(prefix) + + field_checks, field_model_checks = _field_checks_for_union( + union_spec, value_by_class, prefix=prefix, arm=arm + ) + union_level_checks = _model_checks_for_union( + union_spec, value_by_class, union_target, arm=arm + ) + exclusivity_checks = _exclusivity_checks_for_union( + union_spec, value_by_class, union_target, arm=arm + ) + if union_level_checks or exclusivity_checks: + _guard_struct_nested_anchor(prefix, union_spec.name) + return field_checks, union_level_checks + field_model_checks + exclusivity_checks + + +def _model_constraint_target(prefix: FieldPath) -> FieldPath: + """Where a model constraint's check should be anchored. + + Two supported cases: + + - `ArrayPath` -- constraints on a sub-model reached through array + iteration target the array path (so the renderer wraps the check + in `array_check`). + - Empty or struct-only `ScalarPath` -- constraints anchor at the row + root. Pure struct nesting (e.g. `Names` reached at + `ScalarPath('names')`) collapses here because the renderer has no + anchor for nested-struct model constraints. The only constraint kind + currently reachable through pure struct nesting is + `NoExtraFieldsConstraint`, which `dispatch_model_constraint` + discards before the target is consulted, so the collapse is + observationally inert today; a non-skipped constraint at this depth + would surface as a wrong-anchor bug. + """ + return prefix if isinstance(prefix, ArrayPath) else ScalarPath() + + +def _dispatch_model_constraints( + constraints: tuple[ModelConstraint, ...], + fields: list[FieldSpec], + *, + target: FieldPath = ScalarPath(), + arm: str | None = None, +) -> list[ModelCheck]: + """Dispatch model constraints to ModelChecks.""" + return [ + ModelCheck(descriptor=desc, target=target, arm=arm) + for mc in constraints + for desc in dispatch_model_constraint(mc, fields) + ] + + +def _singleton_arm(values: tuple[str, ...]) -> str | None: + """Return the sole arm in `values`, or None when there isn't exactly one. + + No real schema today has a variant-specific field belonging to a + proper subset of arms (2-of-N): every variant-specific field is + declared on exactly one arm. If a future schema introduces a 2-of-N + field whose sub-model declares model constraints, this collapse + would broadcast those constraints to every arm (including the ones + the field doesn't belong to). `TestMultiArmVariantSourcesPolicy` + pins the current behaviour as a tombstone. + """ + return values[0] if len(values) == 1 else None + + +def _field_checks_for_union( + spec: UnionSpec, + value_by_class: dict[type[BaseModel], str], + prefix: FieldPath = ScalarPath(), + *, + arm: str | None = None, +) -> tuple[list[Check], list[ModelCheck]]: + """Build field checks for a union spec's annotated fields. + + `arm` is the outer-union arm threaded through from an enclosing + `_recurse_into_union`. When present, every sub-model constraint + reached from here inherits that arm -- the inner union's own + discriminator is irrelevant to per-arm test filtering, which always + keys on the outermost union's discriminator. + """ + guard_cls: type[Guard] = ( + ElementGuard if isinstance(prefix, ArrayPath) else ColumnGuard + ) + field_checks: list[Check] = [] + model_checks: list[ModelCheck] = [] + discriminator = spec.discriminator_field + for af in spec.annotated_fields: + values: tuple[str, ...] = () + if af.variant_sources is not None and discriminator is not None: + values = tuple( + value_by_class[src] + for src in af.variant_sources + if src in value_by_class + ) + # Outer arm dominates: when this is a nested union, every sub-model + # constraint discovered here belongs to the outer arm. Only the + # outermost union picks a `field_arm` from its own variant sources, + # and only when the field is variant-specific to a single arm. + field_arm = arm if arm is not None else _singleton_arm(values) + checks, sub_model_checks = _build_field_checks( + af.field_spec, prefix=prefix, arm=field_arm + ) + model_checks.extend(sub_model_checks) + if values and discriminator is not None: + # Outer guards land first so the renderer composes + # outer-then-inner (e.g. a `ColumnGuard` from a parent union, + # then an `ElementGuard` from the nested union the field + # lives in). + guard: Guard = guard_cls(discriminator=discriminator, values=values) + checks = [replace(ck, guards=(guard, *ck.guards)) for ck in checks] + field_checks.extend(checks) + return field_checks, model_checks + + +def _model_checks_for_union( + spec: UnionSpec, + arm_by_class: dict[type[BaseModel], str], + target: FieldPath = ScalarPath(), + *, + arm: str | None = None, +) -> list[ModelCheck]: + """Build ModelChecks for the union itself plus each member's own constraints. + + When `arm` is None (top-level union): union-level constraints carry + `arm=None` because they apply regardless of which arm matches. + Member-class constraints (e.g. `@radio_group` on `RoadSegment`) are + tagged with the discriminator value mapped to that class so the test + renderer can confine them to the right per-arm test module. + + When `arm` is set (nested union reached from an outer arm): every + check produced -- union-level and member-level -- inherits that outer + arm. The inner union's own discriminator is irrelevant to per-arm + test filtering, which always keys on the outermost union's + discriminator. + """ + model_checks = _dispatch_model_constraints( + spec.constraints, + spec.fields, + target=target, + arm=arm, + ) + for member in spec.member_specs: + member_constraints = ModelConstraint.get_model_constraints(member.member_cls) + member_arm = arm if arm is not None else arm_by_class.get(member.member_cls) + model_checks.extend( + _dispatch_model_constraints( + member_constraints, + member.spec.fields, + target=target, + arm=member_arm, + ) + ) + return model_checks + + +def _exclusivity_checks_for_union( + spec: UnionSpec, + value_by_class: dict[type[BaseModel], str], + target: FieldPath = ScalarPath(), + *, + arm: str | None = None, +) -> list[ModelCheck]: + """Generate forbid_if/require_if checks from union variant structure. + + Unlike `dispatch_model_constraint` (which maps user-declared + `ModelConstraint` objects to descriptors), this synthesizes + `ForbidIf`/`RequireIf` descriptors directly from the union's variant + grouping. The input is a structural property of the union, not a + declared constraint, so there is no source `ModelConstraint` to + dispatch from. + + `arm` is the outer-union arm threaded through when this union is + nested inside another. Inner exclusivity checks belong to that outer + arm rather than being broadcast to every arm. + """ + if spec.discriminator_mapping is None or spec.discriminator_field is None: + return [] + + all_values = set(spec.discriminator_mapping) + + grouped: dict[str, set[type[BaseModel]]] = defaultdict(set) + required_by_field: dict[str, set[type[BaseModel]]] = defaultdict(set) + shape_by_field: dict[str, FieldShape] = {} + for af in spec.annotated_fields: + if af.variant_sources is None: + continue + name = af.field_spec.name + shape_by_field[name] = af.field_spec.shape + for src in af.variant_sources: + if src in value_by_class: + grouped[name].add(src) + if af.field_spec.is_required: + required_by_field[name].add(src) + + def forbid_check(field_name: str, condition: FieldEqCondition | Not) -> ModelCheck: + return ModelCheck( + descriptor=ForbidIf( + field_names=(field_name,), + condition=condition, + field_shapes=forbid_if_field_shapes((field_name,), shape_by_field), + ), + target=target, + arm=arm, + ) + + def require_check(field_name: str, condition: FieldEqCondition | Not) -> ModelCheck: + return ModelCheck( + descriptor=RequireIf(field_names=(field_name,), condition=condition), + target=target, + arm=arm, + ) + + checks: list[ModelCheck] = [] + disc_field = spec.discriminator_field + for field_name, variant_classes in grouped.items(): + variant_values = {value_by_class[cls] for cls in variant_classes} + excluded_values = all_values - variant_values + if not excluded_values: + continue + + if len(variant_values) == 1 and len(excluded_values) > 1: + (sole_value,) = variant_values + checks.append( + forbid_check(field_name, Not(FieldEqCondition(disc_field, sole_value))) + ) + else: + for exc_val in sorted(excluded_values): + checks.append( + forbid_check(field_name, FieldEqCondition(disc_field, exc_val)) + ) + + required_classes = required_by_field[field_name] + required_values = {value_by_class[cls] for cls in required_classes} + for req_val in sorted(required_values): + checks.append( + require_check(field_name, FieldEqCondition(disc_field, req_val)) + ) + + return checks + + +def build_checks( + spec: FeatureSpec, +) -> tuple[list[Check], list[ModelCheck]]: + """Build all check IR for a feature spec. + + Roots the walk at the empty `ScalarPath()` and delegates to the same + helpers used at every nested level (`_recurse_into_union` for unions, + `_recurse_into_model` for models), so the row-root and nested cases + share one path. + """ + if isinstance(spec, UnionSpec): + return _recurse_into_union(spec) + return _recurse_into_model(spec) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py new file mode 100644 index 000000000..e9029c632 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py @@ -0,0 +1,83 @@ +"""Tree-shaped IR for PySpark check expressions. + +Sum types describe each check's structural placement: + +- `Check.target: FieldPath` -- a `ScalarPath` or `ArrayPath` locating + where the descriptor's expression is evaluated. The choice of variant + signals whether the renderer wraps the expression in `array_check` / + `nested_array_check`. +- `Guard` -- a single discriminator gate. `Check.guards` is a tuple + of `Guard`s AND-composed at render time; nested-union gating + composes one `ColumnGuard` with one `ElementGuard`. + +The check_builder produces these types and the renderer consumes them. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TypeAlias + +from overture.schema.system.field_path import FieldPath, ScalarPath + +from .constraint_dispatch import ExpressionDescriptor, ModelConstraintDescriptor + +__all__ = [ + "Check", + "ColumnGuard", + "ElementGuard", + "Guard", + "ModelCheck", +] + + +@dataclass(frozen=True, slots=True) +class ColumnGuard: + """Discriminator gate where the discriminator is a top-level row column.""" + + discriminator: str + values: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class ElementGuard: + """Discriminator gate where the discriminator is a struct field inside an array element.""" + + discriminator: str + values: tuple[str, ...] + + +Guard: TypeAlias = ColumnGuard | ElementGuard + + +@dataclass(frozen=True, slots=True) +class Check: + """A field-level validation check.""" + + descriptors: tuple[ExpressionDescriptor, ...] + target: FieldPath + guards: tuple[Guard, ...] = () + + +@dataclass(frozen=True, slots=True) +class ModelCheck: + """A model-level validation check (cross-field constraint). + + `target` locates the model the constraint applies to: an empty + `ScalarPath()` for row-root constraints, or an `ArrayPath` when the + constrained model is reached by iterating one or more arrays. The + default `ScalarPath()` makes the row-root case ergonomic at + construction sites and is the common case; `Check.target` has no + sensible default and is required. + + `arm` records the discriminator value of the union member that + contributed the constraint, or `None` when the constraint applies to + every arm. The test renderer filters per-arm test modules by this + value. Constraints discovered through a variant-specific field's + sub-model or sub-union inherit the contributing outer arm, so they + land only in that arm's test module. + """ + + descriptor: ModelConstraintDescriptor + target: FieldPath = ScalarPath() + arm: str | None = None diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py new file mode 100644 index 000000000..b02f5b735 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py @@ -0,0 +1,509 @@ +"""Constraint type to PySpark expression descriptor dispatch. + +Pure mapping from constraint objects to expression descriptors. +No awareness of field paths, list depth, or struct nesting -- +those are composition concerns handled by check_builder. +""" + +from __future__ import annotations + +import re +from collections.abc import Callable, Mapping +from dataclasses import dataclass +from typing import Any, TypeAlias + +from annotated_types import Ge, Gt, Interval, Le, Lt +from pydantic import Strict + +from overture.schema.system.case import to_snake_case +from overture.schema.system.field_constraint.collection import UniqueItemsConstraint +from overture.schema.system.field_constraint.string import ( + JsonPointerConstraint, + PatternConstraint, + StrippedConstraint, +) +from overture.schema.system.field_path import FieldPath +from overture.schema.system.model_constraint import ( + Condition, + ForbidIfConstraint, + MinFieldsSetConstraint, + NoExtraFieldsConstraint, + RadioGroupConstraint, + RequireAnyOfConstraint, + RequireIfConstraint, +) +from overture.schema.system.primitive import GeometryTypeConstraint +from overture.schema.system.ref import Reference + +from ..extraction.field import FieldShape, ModelRef +from ..extraction.field_walk import has_array_layer, terminal_of +from ..extraction.length_constraints import ( + ArrayMaxLen, + ArrayMinLen, + ScalarMaxLen, + ScalarMinLen, +) +from ..extraction.specs import FieldSpec + +__all__ = [ + "ExpressionDescriptor", + "ForbidIf", + "MinFieldsSet", + "ModelConstraintDescriptor", + "RadioGroup", + "RequireAnyOf", + "RequireIf", + "dispatch_base_type", + "dispatch_constraint", + "dispatch_model_constraint", + "dispatch_newtype", + "forbid_if_field_shapes", + "model_constraint_function", + "model_mutation_function", +] + + +@dataclass(frozen=True, slots=True) +class ExpressionDescriptor: + """Describes a constraint_expressions function call. + + `function` names the function (e.g., `"check_bounds"`). + `args` are positional arguments after `col` and `field`. + `kwargs` are keyword arguments, stored as a tuple of `(name, value)` + pairs so the descriptor is hashable -- consumers convert with `dict()` + when they need mapping access. + `constraint_type` is the Python class of the constraint that + produced this descriptor (e.g., `NoWhitespaceConstraint`), + used by test generators to pick pattern-appropriate mutation values. + `gate` is the structural path to a nullable ancestor struct; when set, + the renderer wraps the expression in `F.when(gate.isNotNull(), ...)`. + `label` is a human-readable description used in error messages + (e.g., `"ISO 3166-1 alpha-2 country code"`). + `check_name` overrides the Check.name derivation in error_key; + when None, the renderer strips the `check_` prefix from `function`. + """ + + function: str + args: tuple[object, ...] = () + kwargs: tuple[tuple[str, object], ...] = () + constraint_type: type | None = None + gate: FieldPath | None = None + label: str | None = None + check_name: str | None = None + + +_BASE_TYPE_DISPATCH: dict[str, tuple[ExpressionDescriptor, ...]] = { + "HttpUrl": ( + ExpressionDescriptor(function="check_url_format"), + ExpressionDescriptor(function="check_url_length"), + ), + "EmailStr": (ExpressionDescriptor(function="check_email"),), + "BBox": ( + ExpressionDescriptor(function="check_bbox_completeness"), + ExpressionDescriptor(function="check_bbox_lat_ordering"), + ExpressionDescriptor(function="check_bbox_lat_range"), + ), +} + +_NEWTYPE_DISPATCH: dict[str, tuple[ExpressionDescriptor, ...]] = { + "LinearlyReferencedRange": ( + ExpressionDescriptor(function="check_linear_range_length"), + ExpressionDescriptor(function="check_linear_range_bounds"), + ExpressionDescriptor(function="check_linear_range_order"), + ), +} + + +def _normalize_anchor(pattern: str) -> str: + """Replace trailing `$` with `\\z` for Java/Spark regex compatibility. + + Leaves an escaped trailing `\\$` (literal dollar match) untouched. + """ + if pattern.endswith("$") and not pattern.endswith(r"\$"): + return pattern[:-1] + r"\z" + return pattern + + +def _pattern_check_name(constraint: PatternConstraint) -> str: + """Derive a snake_case check name from the constraint class name.""" + if type(constraint) is PatternConstraint: + return "pattern" + return to_snake_case(type(constraint).__name__.removesuffix("Constraint")) + + +def _pattern_label(constraint: PatternConstraint) -> str: + """Extract a human-readable label from a PatternConstraint.""" + if constraint.description: + return constraint.description + doc = type(constraint).__doc__ + if doc: + return doc.strip().split("\n")[0].rstrip(".") + name = type(constraint).__name__.removesuffix("Constraint") + return re.sub(r"(?<=[a-z0-9])([A-Z])", r" \1", name).lower() + + +_ConstraintHandler = Callable[[Any, str | None], ExpressionDescriptor | None] + + +_BOUND_ATTRS = ("ge", "gt", "le", "lt") + +_FLOAT_BASE_TYPES = frozenset({"float", "float32", "float64"}) + + +def _dispatch_bounds( + constraint: Ge | Gt | Le | Lt | Interval, + base_type: str | None, +) -> ExpressionDescriptor: + """Extract bound kwargs from an annotated_types constraint. + + Coerces integer bound values to float on float-typed columns so + that generated test mutations match the Spark DoubleType column. + """ + is_float = base_type in _FLOAT_BASE_TYPES + kwargs: list[tuple[str, object]] = [] + for attr in _BOUND_ATTRS: + value = getattr(constraint, attr, None) + if value is not None: + if is_float and isinstance(value, int) and not isinstance(value, bool): + value = float(value) + kwargs.append((attr, value)) + return ExpressionDescriptor(function="check_bounds", kwargs=tuple(kwargs)) + + +def _dispatch_pattern( + constraint: PatternConstraint, + _base_type: str | None, +) -> ExpressionDescriptor: + """Map a PatternConstraint (or subclass) to a check_pattern descriptor.""" + return ExpressionDescriptor( + function="check_pattern", + args=(_normalize_anchor(constraint.pattern.pattern),), + constraint_type=type(constraint), + label=_pattern_label(constraint), + check_name=_pattern_check_name(constraint), + ) + + +# Ordered: the first matching entry wins, so any subclass relationship +# between keys must place the more-specific class first. StrippedConstraint +# subclasses PatternConstraint, so it must appear before the PatternConstraint +# fallback entry. +_CONSTRAINT_DISPATCH: list[tuple[type | tuple[type, ...], _ConstraintHandler]] = [ + ((Reference, Strict), lambda _c, _bt: None), + ((Ge, Gt, Le, Lt, Interval), _dispatch_bounds), + ( + ArrayMinLen, + lambda c, _bt: ExpressionDescriptor( + function="check_array_min_length", args=(c.min_length,) + ), + ), + ( + ArrayMaxLen, + lambda c, _bt: ExpressionDescriptor( + function="check_array_max_length", args=(c.max_length,) + ), + ), + ( + ScalarMinLen, + lambda c, _bt: ExpressionDescriptor( + function="check_string_min_length", args=(c.min_length,) + ), + ), + ( + ScalarMaxLen, + lambda c, _bt: ExpressionDescriptor( + function="check_string_max_length", args=(c.max_length,) + ), + ), + ( + StrippedConstraint, + lambda _c, _bt: ExpressionDescriptor(function="check_stripped"), + ), + ( + JsonPointerConstraint, + lambda _c, _bt: ExpressionDescriptor(function="check_json_pointer"), + ), + (PatternConstraint, _dispatch_pattern), + # check_struct_unique uses Spark's array_distinct: structural equality on + # whole elements, against the raw stored values. Pydantic's + # UniqueItemsConstraint on list[HttpUrl] compares *normalized* URLs + # (trailing-slash, lowercase host/scheme), so it catches duplicates that + # differ only in normalization. We accept that difference -- the PySpark + # check catches exact duplicates only. + ( + UniqueItemsConstraint, + lambda _c, _bt: ExpressionDescriptor(function="check_struct_unique"), + ), + ( + GeometryTypeConstraint, + lambda c, _bt: ExpressionDescriptor( + function="check_geometry_type", args=tuple(c.allowed_types) + ), + ), +] + + +def dispatch_constraint( + constraint: object, + *, + base_type: str | None = None, +) -> ExpressionDescriptor | None: + """Map a constraint object to an expression descriptor. + + Parameters + ---------- + constraint + The constraint object from `ConstraintSource.constraint`. Length + constraints arrive as `ArrayMinLen` / `ArrayMaxLen` / + `ScalarMinLen` / `ScalarMaxLen` -- the typed variants emitted + by `extraction.type_analyzer.attach_constraints`. + base_type + The field's terminal-scalar base type, used to detect float + bounds. + + Returns + ------- + ExpressionDescriptor or None + `None` for explicitly skipped constraints (Reference, Strict). + + Raises + ------ + TypeError + For unrecognized constraint types. + """ + for key_types, handler in _CONSTRAINT_DISPATCH: + if isinstance(constraint, key_types): + return handler(constraint, base_type) + raise TypeError(f"Unhandled constraint type: {type(constraint).__name__}") + + +def dispatch_newtype(newtype_name: str) -> tuple[ExpressionDescriptor, ...] | None: + """Look up a NewType-level expression override. + + Returns None when the NewType decomposes normally into + individual constraint dispatches. + """ + return _NEWTYPE_DISPATCH.get(newtype_name) + + +def dispatch_base_type(base_type: str) -> tuple[ExpressionDescriptor, ...] | None: + """Look up a base-type-level expression override. + + Handles primitive types like HttpUrl and EmailStr that carry no + Annotated constraints but need semantic validation functions. + """ + return _BASE_TYPE_DISPATCH.get(base_type) + + +@dataclass(frozen=True, slots=True) +class RequireAnyOf: + """Descriptor for `check_require_any_of`: at least one field must be set.""" + + field_names: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class RadioGroup: + """Descriptor for `check_radio_group`: exactly one boolean field must be True.""" + + field_names: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class RequireIf: + """Descriptor for `check_require_if`: field required when condition holds.""" + + field_names: tuple[str, ...] + condition: Condition + + +@dataclass(frozen=True, slots=True) +class ForbidIf: + """Descriptor for `check_forbid_if`: field must be absent when condition holds. + + `field_shapes` pairs non-string field names with their `FieldShape` so + the test renderer can emit type-appropriate `fill_values` literals. + Stored as a tuple of `(name, shape)` pairs so the descriptor is + hashable; consumers convert with `dict()` when they need mapping + access. String fields are omitted because the renderer defaults to + `""` for them without needing the shape. + """ + + field_names: tuple[str, ...] + condition: Condition + field_shapes: tuple[tuple[str, FieldShape], ...] + + +@dataclass(frozen=True, slots=True) +class MinFieldsSet: + """Descriptor for `check_min_fields_set`: at least `count` fields set. + + Matches Pydantic's `model_fields_set` semantics: required fields are + always set (the constructor requires them) and contribute to the count + alongside any explicitly-set optional fields. Both kinds are passed to + the runtime check. + """ + + field_names: tuple[str, ...] + count: int + + +ModelConstraintDescriptor: TypeAlias = ( + RequireAnyOf | RadioGroup | RequireIf | ForbidIf | MinFieldsSet +) +"""One variant per model-constraint kind. + +Each variant carries only the fields meaningful for that constraint; +`ForbidIf` adds `field_shapes` for non-string targets so the test +renderer can emit type-appropriate `fill_values` literals. +""" + + +def _first_required_leaf(field_spec: FieldSpec) -> str | None: + """Return the name of the first required field in a MODEL-kind `FieldSpec`. + + Returns `None` for fields whose terminal is anything but a + `ModelRef` (scalars, arrays, `UnionRef`s, etc.). The + `RequireAnyOf` unwrapping uses this to drill into a struct's + required leaf when one exists; non-struct terminals leave the + field name unwrapped, which is the correct behavior for scalars + and arrays. `UnionRef` returns `None` because picking one arm's + required leaf would silently bias the constraint to that arm. + """ + if has_array_layer(field_spec.shape): + return None + terminal = terminal_of(field_spec.shape) + if not isinstance(terminal, ModelRef): + return None + for sub in terminal.model.fields: + if sub.is_required: + return sub.name + return None + + +def _unwrap_require_any_of_names( + field_names: tuple[str, ...], + by_name: dict[str, FieldSpec], +) -> tuple[str, ...]: + """Replace struct field names with their first required leaf path.""" + result = [] + for name in field_names: + field_spec = by_name.get(name) + leaf = _first_required_leaf(field_spec) if field_spec is not None else None + result.append(f"{name}.{leaf}" if leaf is not None else name) + return tuple(result) + + +def _is_compound_shape(shape: FieldShape) -> bool: + """Whether `shape` needs a non-`{}` fill value in mutation helpers.""" + if has_array_layer(shape): + return True + return isinstance(terminal_of(shape), ModelRef) + + +def forbid_if_field_shapes( + field_names: tuple[str, ...], + shape_by_name: Mapping[str, FieldShape], +) -> tuple[tuple[str, FieldShape], ...]: + """Build the `field_shapes` pairs for non-string ForbidIf targets. + + Keeps only fields whose shape is compound (an array or a model + reference); string fields are omitted because the test renderer + defaults their fill value to `""` without needing the shape. + """ + return tuple( + (name, shape) + for name in field_names + if (shape := shape_by_name.get(name)) is not None and _is_compound_shape(shape) + ) + + +def dispatch_model_constraint( + constraint: object, + fields: list[FieldSpec], +) -> tuple[ModelConstraintDescriptor, ...]: + """Map a model-level constraint to fully constructed typed descriptors. + + Parameters + ---------- + constraint + The model constraint object. + fields + All fields of the model. Branches consult them as needed -- + `RequireAnyOf` and `ForbidIf` index by name, `MinFieldsSet` + enumerates every field (required and optional). + + Returns + ------- + tuple of ModelConstraintDescriptor + Empty tuple for explicitly skipped constraints (NoExtraFields). + Most kinds return a single-element tuple. Multi-field + `@require_if` / `@forbid_if` split into one descriptor per + target field because the runtime check functions take a single + target column each. + + Raises + ------ + TypeError + For unrecognized constraint types. + """ + match constraint: + case NoExtraFieldsConstraint(): + return () + case RequireAnyOfConstraint(): + unwrapped = _unwrap_require_any_of_names( + constraint.field_names, {f.name: f for f in fields} + ) + return (RequireAnyOf(field_names=unwrapped),) + case RadioGroupConstraint(): + return (RadioGroup(field_names=constraint.field_names),) + case RequireIfConstraint(): + # `@require_if(["a", "b"], cond)` means "all of a, b required when + # cond" -- one runtime check per field, since check_require_if + # takes a single target column. + return tuple( + RequireIf(field_names=(name,), condition=constraint.condition) + for name in constraint.field_names + ) + case ForbidIfConstraint(): + shapes_by_field = forbid_if_field_shapes( + constraint.field_names, + {f.name: f.shape for f in fields}, + ) + per_field_shapes = dict(shapes_by_field) + return tuple( + ForbidIf( + field_names=(name,), + condition=constraint.condition, + field_shapes=( + ((name, per_field_shapes[name]),) + if name in per_field_shapes + else () + ), + ) + for name in constraint.field_names + ) + case MinFieldsSetConstraint(): + all_names = tuple(f.name for f in fields) + return (MinFieldsSet(field_names=all_names, count=constraint.count),) + case _: + raise TypeError(f"Unhandled model constraint: {type(constraint).__name__}") + + +_MODEL_CONSTRAINT_DISPATCH: dict[type[ModelConstraintDescriptor], tuple[str, str]] = { + RequireAnyOf: ("check_require_any_of", "mutate_require_any_of"), + RadioGroup: ("check_radio_group", "mutate_radio_group"), + RequireIf: ("check_require_if", "mutate_require_if"), + ForbidIf: ("check_forbid_if", "mutate_forbid_if"), + MinFieldsSet: ("check_min_fields_set", "mutate_min_fields_set"), +} + + +def model_constraint_function(d: ModelConstraintDescriptor) -> str: + """Map a `ModelConstraintDescriptor` variant to its runtime function name.""" + return _MODEL_CONSTRAINT_DISPATCH[type(d)][0] + + +def model_mutation_function(d: ModelConstraintDescriptor) -> str: + """Map a `ModelConstraintDescriptor` variant to its test mutation helper.""" + return _MODEL_CONSTRAINT_DISPATCH[type(d)][1] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py new file mode 100644 index 000000000..a6033c0db --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py @@ -0,0 +1,256 @@ +"""PySpark generation pipeline: produce modules without I/O. + +Orchestrates check building, schema building, and rendering into +GeneratedModule objects. The caller decides what to do with them (write +to disk, stream to stdout, etc.). +""" + +from __future__ import annotations + +from collections.abc import Iterable, Sequence +from dataclasses import dataclass +from pathlib import PurePosixPath + +from overture.schema.system.case import to_snake_case +from overture.schema.system.discovery import entry_point_to_path +from overture.schema.system.primitive import GeometryType + +from ..extraction.specs import FeatureSpec, UnionSpec +from .check_builder import build_checks +from .check_ir import Check, ModelCheck +from .renderer import render_feature_module +from .schema_builder import build_schema +from .test_data.base_row import ( + generate_arm_rows, + generate_base_row, + generate_populated_arm_rows, + generate_populated_row, +) +from .test_renderer import render_test_module + +__all__ = [ + "GeneratedModule", + "PipelineOutput", + "generate_pyspark_module", + "generate_pyspark_modules", +] + + +@dataclass(frozen=True, slots=True) +class GeneratedModule: + """A generated Python module with its content and output path.""" + + content: str + path: PurePosixPath + + +@dataclass(frozen=True, slots=True) +class PipelineOutput: + """PySpark modules emitted by the pipeline, split by output tree. + + The source and test trees are written to separate directories and + mirror the same relative layout, so a path is meaningful only + relative to its tree. Splitting at the boundary keeps each tree + self-contained -- in practice the overlap today is just + `__init__.py`, but any path duplicated between trees would be + ambiguous in a single flat list. + """ + + source: list[GeneratedModule] + test: list[GeneratedModule] + + +_OUTPUT_PACKAGE = "overture.schema.pyspark.expressions.generated" + +# Dots in `from ...x import y` from a generated test module to reach +# `tests/`: one to leave the file's package, one to leave `generated/`. +# Each additional directory component under `generated/` adds another. +_DOTS_FROM_TEST_TO_TESTS_ROOT = 2 + + +def _support_prefix(directory: PurePosixPath) -> str: + """Relative-import prefix used by generated test modules to reach `_support`. + + Each leading dot climbs one package level; the first two dots step + out of `tests/generated/` to `tests/`, and an extra dot is appended + for every component of *directory* under `generated/`. + """ + return "." * (len(directory.parts) + _DOTS_FROM_TEST_TO_TESTS_ROOT) + + +def _require_entry_point(spec: FeatureSpec) -> str: + """Return *spec*'s entry point or raise if it's missing.""" + if spec.entry_point is None: + msg = f"FeatureSpec {spec.name!r} has no entry_point." + raise ValueError(msg) + return spec.entry_point + + +def _directory_and_feature_name(spec: FeatureSpec) -> tuple[PurePosixPath, str]: + """Return the output directory and snake_case feature name for a spec. + + Both halves derive from the entry-point's class name so filenames + and symbol names stay in sync with what the runtime registry + discovers. + """ + directory, cls_name = entry_point_to_path(_require_entry_point(spec)) + return directory, to_snake_case(cls_name) + + +def _extract_geometry_types( + field_checks: list[Check], +) -> tuple[GeometryType, ...]: + """Collect allowed geometry types from every `check_geometry_type` descriptor. + + A feature may carry multiple `check_geometry_type` descriptors -- e.g. + one per union arm with a distinct allowed-types set. The result is the + union of all of them, sorted by name for deterministic output. + """ + seen: set[GeometryType] = set() + for check in field_checks: + for desc in check.descriptors: + if desc.function != "check_geometry_type": + continue + for arg in desc.args: + if isinstance(arg, GeometryType): + seen.add(arg) + return tuple(sorted(seen, key=lambda g: g.name)) + + +def _init_modules(paths: Iterable[PurePosixPath]) -> list[GeneratedModule]: + """Emit empty `__init__.py` for every directory of `paths`. + + Includes the output root so the top-level package exists after a + full `rm -rf` of the generated tree. + """ + paths = list(paths) + if not paths: + return [] + dirs: set[PurePosixPath] = set() + for path in paths: + dirs.update(path.parents) + return [GeneratedModule(content="", path=d / "__init__.py") for d in sorted(dirs)] + + +def generate_pyspark_module(spec: FeatureSpec) -> GeneratedModule: + """Generate a PySpark validation module from a feature spec. + + Parameters + ---------- + spec + The extracted feature spec to generate from. + + Returns + ------- + GeneratedModule + Module content and a relative output path mirroring the + feature's entry-point package layout. + """ + return _render_module(spec, build_checks(spec)) + + +def generate_pyspark_modules( + feature_specs: Sequence[FeatureSpec], +) -> PipelineOutput: + """Generate PySpark validation modules for all features. + + Parameters + ---------- + feature_specs + Extracted feature specs to generate from. + + Returns + ------- + PipelineOutput + Source-tree feature modules and test-tree modules. Each tree + includes the `__init__.py` files needed for its package layout. + """ + items = [(spec, build_checks(spec)) for spec in feature_specs] + source = [_render_module(spec, checks) for spec, checks in items] + test: list[GeneratedModule] = [] + for spec, checks in items: + test.extend(_render_test_modules(spec, checks)) + source.extend(_init_modules(m.path for m in source)) + test.extend(_init_modules(m.path for m in test)) + return PipelineOutput(source=source, test=test) + + +def _render_module( + spec: FeatureSpec, + checks: tuple[list[Check], list[ModelCheck]], +) -> GeneratedModule: + """Build checks, schema, and render for a feature spec.""" + field_checks, model_checks = checks + schema_fields = build_schema(spec) + geometry_types = _extract_geometry_types(field_checks) + directory, feature_name = _directory_and_feature_name(spec) + content = render_feature_module( + feature_name, + field_checks, + model_checks, + schema_fields, + geometry_types, + entry_point=_require_entry_point(spec), + partitions=spec.partitions, + ) + return GeneratedModule( + content=content, + path=directory / f"{feature_name}.py", + ) + + +def _select_arm_rows( + spec: FeatureSpec, +) -> dict[str | None, tuple[dict[str, object], dict[str, object]]]: + """Map each test module's arm key to its (sparse, populated) base rows. + + Multi-arm unions key by discriminator value (one entry per arm); other + specs use a single `None` key. Either way the caller iterates the dict + to emit one test module per entry. + """ + if isinstance(spec, UnionSpec) and spec.discriminator_field: + sparse_arm_rows = generate_arm_rows(spec) + populated_arm_rows = generate_populated_arm_rows(spec) + return { + arm: (sparse_arm_rows[arm], populated_arm_rows[arm]) + for arm in sparse_arm_rows + } + return {None: (generate_base_row(spec), generate_populated_row(spec))} + + +def _render_test_modules( + spec: FeatureSpec, + checks: tuple[list[Check], list[ModelCheck]], +) -> list[GeneratedModule]: + """Render test modules for a feature spec. + + For union specs with multiple discriminator arms, produces one + test module per arm. Each arm's test includes the field and + model checks tagged with that arm (or untagged), filtered by + `render_test_module`. + """ + field_checks, model_checks = checks + directory, feature_name = _directory_and_feature_name(spec) + expression_import = ".".join([_OUTPUT_PACKAGE, *directory.parts, feature_name]) + support_prefix = _support_prefix(directory) + + modules: list[GeneratedModule] = [] + for arm, (base_row_sparse, base_row_populated) in _select_arm_rows(spec).items(): + suffix = f"_{arm}" if arm else "" + modules.append( + GeneratedModule( + content=render_test_module( + feature_name, + field_checks, + model_checks, + base_row_sparse=base_row_sparse, + base_row_populated=base_row_populated, + arm=arm, + spec=spec, + expression_import=expression_import, + support_prefix=support_prefix, + ), + path=directory / f"test_{feature_name}{suffix}.py", + ) + ) + return modules diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py new file mode 100644 index 000000000..9728a499a --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py @@ -0,0 +1,647 @@ +"""Render Check / ModelCheck IR into complete Python modules.""" + +from __future__ import annotations + +import re +from collections.abc import Mapping +from enum import Enum + +from overture.schema.system.field_path import ( + ArrayPath, + FieldPath, + ScalarPath, +) +from overture.schema.system.model_constraint import Condition +from overture.schema.system.primitive import GeometryType + +from ._render_common import ( + check_name, + compute_label_suffixes, + disambiguate, + field_label, + jinja_env, + model_constraint_field_label, + parse_field_eq, + py_literal, + tuple_literal, +) +from .check_ir import ( + Check, + ColumnGuard, + ElementGuard, + ModelCheck, +) +from .constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + MinFieldsSet, + RadioGroup, + RequireAnyOf, + RequireIf, + model_constraint_function, +) +from .schema_builder import SHARED_TYPE_REFS, SchemaField + +__all__ = [ + "render_feature_module", +] + +# Descriptor function names that resolve to helpers from the +# `column_patterns` runtime module (rather than `constraint_expressions`). +# Used to route imports to the correct module. Distinct from +# `_render_common.COLUMN_LEVEL_FUNCTIONS`, which classifies checks that +# emit one Check per field rather than per array element. +_COLUMN_PATTERN_HELPERS = frozenset( + {"array_check", "nested_array_check", "check_struct_unique"} +) + +_SHARED_STRUCT_REFS = frozenset(SHARED_TYPE_REFS.values()) + +_SPARK_TYPES = frozenset( + { + "ArrayType", + "BinaryType", + "BooleanType", + "ByteType", + "DateType", + "DoubleType", + "FloatType", + "IntegerType", + "LongType", + "MapType", + "ShortType", + "StringType", + "StructField", + "StructType", + "TimestampType", + } +) + + +# Collapses runs of `.`, `[`, `]`, `_` to a single `_` for identifier sanitization. +_PATH_SEPARATOR_RUN = re.compile(r"[.\[\]_]+") + + +def _sanitize_field_name(field: str) -> str: + """Convert an encoded field-path string to a valid Python identifier fragment.""" + return _PATH_SEPARATOR_RUN.sub("_", field).strip("_") + + +def _render_condition_desc(condition: Condition) -> str: + """Render a Condition to a human-readable description string for error messages.""" + parsed = parse_field_eq(condition) + if parsed is None: + raise TypeError(f"Unhandled condition type: {type(condition).__name__}") + display = repr( + parsed.value.value if isinstance(parsed.value, Enum) else parsed.value + ) + op = "!=" if parsed.negated else "=" + return f"{parsed.field_name} {op} {display}" + + +def _render_condition( + condition: Condition, *, in_array: bool = False, var: str = "el" +) -> str: + """Render a Condition to a PySpark Column expression string.""" + parsed = parse_field_eq(condition) + if parsed is None: + raise TypeError(f"Unhandled condition type: {type(condition).__name__}") + ref = _render_field_ref(parsed.field_name, in_array=in_array, var=var) + op = "!=" if parsed.negated else "==" + return f"{ref} {op} {py_literal(parsed.value)}" + + +def _render_field_ref( + field_name: str, + *, + in_array: bool, + struct_path: tuple[str, ...] = (), + var: str = "el", +) -> str: + """Render a field reference as F.col("x"), el["x"], or el["struct"]["x"]. + + `F.col` accepts dotted names directly so the top-level form keeps + `field_name` intact. The in-array form descends a struct via + `el[...]`, which requires the dotted name to be split into segments + before applying `struct_path` and the field's own parts. + """ + if not in_array: + return f'F.col("{field_name}")' + parts = (*struct_path, *field_name.split(".")) + return _element_accessor(var, parts) + + +def _geometry_type_literal(g: GeometryType) -> str: + """Spell out `GeometryType.NAME` as valid Python source. + + `repr(g)` yields ``, which is not a valid + expression. + """ + return f"GeometryType.{g.name}" + + +# Check functions whose first positional arg is a list of allowed values. +# Descriptors store the values as a tuple for hashability; the renderer +# unwraps that one position to a list literal so the generated call matches +# the runtime signature. +_LIST_FIRST_ARG_FUNCTIONS = frozenset({"check_enum"}) + + +def _render_arg(arg: object) -> str: + """Render a descriptor arg as a valid Python expression string.""" + if isinstance(arg, GeometryType): + return _geometry_type_literal(arg) + return py_literal(arg) + + +def _render_expr_call( + desc: ExpressionDescriptor, + col_expr: str, +) -> str: + """Render a single ExpressionDescriptor call with col injected.""" + parts = [col_expr] + for idx, arg in enumerate(desc.args): + if ( + idx == 0 + and desc.function in _LIST_FIRST_ARG_FUNCTIONS + and isinstance(arg, tuple) + ): + parts.append(py_literal(list(arg))) + else: + parts.append(_render_arg(arg)) + for k, v in desc.kwargs: + parts.append(f"{k}={py_literal(v)}") + if desc.label is not None: + parts.append(f"label={py_literal(desc.label)}") + return f"{desc.function}({', '.join(parts)})" + + +def _element_accessor(var: str, path: tuple[str, ...]) -> str: + """Build bracket-notation accessor like `el["foo"]["bar"]`.""" + return var + "".join(f'["{p}"]' for p in path) + + +def _iter_var_name(idx: int, total: int) -> str: + """Lambda variable name at iteration depth `idx` (0..total-1) of `total`. + + Single-iteration cases (`total == 1`) return `"el"` from the first + branch; the innermost frame of a nested iteration uses `"inner"`, + intermediate frames `"el2"`, `"el3"`, ... + """ + if idx == 0: + return "el" + if idx == total - 1: + return "inner" + return f"el{idx + 1}" + + +def _wrap_element_gate(body: str, var: str, gate_parts: tuple[str, ...]) -> str: + """Wrap a lambda body in F.when(var[gate].isNotNull(), ...) for nullable parent gating.""" + gate_accessor = _element_accessor(var, gate_parts) + return f"F.when({gate_accessor}.isNotNull(), {body})" + + +def _wrap_in_array_iteration( + column_path: str, + inner_struct_paths: tuple[tuple[str, ...], ...], + body: str, + *, + gate_parts: tuple[str, ...] = (), +) -> str: + """Wrap `body` in nested array_check / nested_array_check frames. + + One frame per iteration: `column_path` names the outermost array + column, `inner_struct_paths` gives the struct accessor from each + iteration's element to the next array (its length plus one is the + iteration count). `body` is the innermost lambda body. `gate_parts`, + when set, wraps the outermost lambda body in a nullable-parent + element gate. + + The recursion descends one frame per call, carrying the frame index + and its lambda variable; the innermost frame is `array_check`, every + outer frame `nested_array_check`. + """ + total = 1 + len(inner_struct_paths) + + def frame(idx: int, accessor: str, var: str) -> str: + if idx == total - 1: + inner = body + fn = "array_check" + else: + child_var = _iter_var_name(idx + 1, total) + child_accessor = _element_accessor(var, inner_struct_paths[idx]) + inner = frame(idx + 1, child_accessor, child_var) + fn = "nested_array_check" + if idx == 0 and gate_parts: + inner = _wrap_element_gate(inner, var, gate_parts) + return f"{fn}({accessor}, lambda {var}: {inner})" + + return frame(0, f'"{column_path}"', "el") + + +def _render_array_check_expr( + target: ArrayPath, + desc: ExpressionDescriptor, + *, + element_guards: tuple[ElementGuard, ...] = (), + gate_parts: tuple[str, ...] = (), +) -> str: + """Render an ArrayPath target to an array_check / nested_array_check expression. + + Element guards are applied at the innermost iteration variable. This + assumes each guard's discriminator lives on the same struct level as + the leaf accessor -- which is true today because `ElementGuard`s only + arise from a union variant whose discriminator field is the + immediately enclosing array element. A future case where a check is + reached through further iteration *inside* a discriminated union + element would need per-guard depth info to apply the guard at the + correct frame. + """ + inner_struct_paths = target.iter_struct_paths + iteration_count = 1 + len(inner_struct_paths) + + innermost_var = _iter_var_name(iteration_count - 1, iteration_count) + leaf_accessor = _element_accessor(innermost_var, target.leaf) + body = _render_expr_call(desc, leaf_accessor) + + for guard in reversed(element_guards): + body = _render_variant_expr( + body, guard.values, guard.discriminator, in_array=True, var=innermost_var + ) + + return _wrap_in_array_iteration( + target.column_path, inner_struct_paths, body, gate_parts=gate_parts + ) + + +def _render_variant_expr( + inner_expr: str, + variant_values: tuple[str, ...], + discriminator_field: str, + *, + in_array: bool = False, + var: str = "el", +) -> str: + """Wrap an expression in F.when(...).isin() gating for union variant fields.""" + values_repr = py_literal(list(variant_values)) + disc_ref = ( + f'{var}["{discriminator_field}"]' + if in_array + else f'F.col("{discriminator_field}")' + ) + return f"F.when({disc_ref}.isin({values_repr}), {inner_expr})" + + +def _render_column_gate(expr: str, gate: FieldPath) -> str: + """Wrap an expression in F.when(gate.isNotNull(), ...) for nullable parent gating.""" + return f'F.when(F.col("{gate}").isNotNull(), {expr})' + + +def _model_check_func_name(check: ModelCheck, idx: int) -> str: + """Build the private function name for a model-constraint check. + + Non-array targets emit `_{fn}_{idx}_check`. Array targets prefix the + column path -- using the full encoded `FieldPath` when the check is + reached via inner iteration or leaf struct navigation, otherwise the + outer column name alone -- so collisions across nested contexts get + distinct identifiers. + """ + fn = model_constraint_function(check.descriptor) + match check.target: + case ArrayPath() as target: + has_nested_path = bool(target.iter_struct_paths) or bool(target.leaf) + prefix_source = str(target) if has_nested_path else target.column_path + prefix = _sanitize_field_name(prefix_source) + return f"_{prefix}_{fn}_{idx}_check" + case _: + return f"_{fn}_{idx}_check" + + +def _root_field_for_target(target: FieldPath) -> str | None: + """Top-level schema column for a Check/ModelCheck target. + + Returns the first segment's name, or `None` for an empty path. + """ + return target.segments[0].name if target.segments else None + + +def _check_shape_token(target: FieldPath) -> str: + """Token naming the runtime `CheckShape` member for a target path. + + Mirrors the member names of `overture.schema.pyspark.check.CheckShape`; + the check-function template prefixes `CheckShape.` to the result. An + `ArrayPath` target renders to an `array` expression, every + other path to a nullable string. + """ + return "ARRAY" if isinstance(target, ArrayPath) else "SCALAR" + + +def _render_check_expr(check: Check, descriptor_idx: int) -> str: + """Render the PySpark expression for one descriptor of `check`.""" + desc = check.descriptors[descriptor_idx] + column_guards = tuple(g for g in check.guards if isinstance(g, ColumnGuard)) + element_guards = tuple(g for g in check.guards if isinstance(g, ElementGuard)) + + match check.target: + case ScalarPath(): + expr = _render_expr_call(desc, f'F.col("{check.target}")') + if desc.gate: + expr = _render_column_gate(expr, desc.gate) + case ArrayPath(): + gate_parts: tuple[str, ...] = () + if desc.gate is not None: + # check_builder zeros the nullable gate when descending into + # a list (see `_recurse_into_model`), so a gate paired with + # an ArrayPath target should never occur today. If it does, + # the column-level fallback below would silently hide a + # codegen bug -- raise instead. + element_relative = check.target.element_relative_gate(desc.gate) + if element_relative is None: + raise AssertionError( + f"ArrayPath target with column-level gate is not " + f"produced by check_builder (gate={desc.gate!r}, " + f"target={check.target!r})" + ) + gate_parts = element_relative + expr = _render_array_check_expr( + check.target, + desc, + element_guards=element_guards, + gate_parts=gate_parts, + ) + case _: + raise TypeError( + f"Unhandled FieldPath variant: {type(check.target).__name__}" + ) + + for guard in reversed(column_guards): + expr = _render_variant_expr(expr, guard.values, guard.discriminator) + return expr + + +def _check_function_context( + *, target: FieldPath, func_name: str, field: str, name: str, expr: str +) -> dict[str, object]: + """Assemble the template context dict for one check function.""" + return { + "func_name": func_name, + "field": field, + "check_name": name, + "expr": expr, + "shape": _check_shape_token(target), + "root_field": _root_field_for_target(target), + } + + +def _render_check_function_context( + check: Check, func_name: str, descriptor_idx: int = 0 +) -> dict[str, object]: + """Build the template context for a per-field check function from a Check.""" + desc = check.descriptors[descriptor_idx] + return _check_function_context( + target=check.target, + func_name=func_name, + field=field_label(check), + name=check_name(desc.function, desc.check_name), + expr=_render_check_expr(check, descriptor_idx), + ) + + +def _render_model_constraint_function_context( + check: ModelCheck, idx: int, label_suffix: str +) -> dict[str, object]: + """Build the template context for a model-constraint check function.""" + desc = check.descriptor + target = check.target + match target: + case ArrayPath(): + in_array = True + var = "inner" if target.iter_struct_paths else "el" + struct_path: tuple[str, ...] = target.leaf + case _: + in_array = False + var, struct_path = "el", () + + def _field_ref(field_name: str) -> str: + return _render_field_ref( + field_name, in_array=in_array, struct_path=struct_path, var=var + ) + + fn = model_constraint_function(desc) + + def _cols_and_names() -> tuple[str, str]: + cols_list = "[" + ", ".join(_field_ref(f) for f in desc.field_names) + "]" + names_list = py_literal(list(desc.field_names)) + return cols_list, names_list + + match desc: + case RequireAnyOf() | RadioGroup(): + cols_list, names_list = _cols_and_names() + inner_expr = f"{fn}({cols_list}, {names_list})" + case RequireIf() | ForbidIf(): + target_name = desc.field_names[0] + condition_expr = _render_condition( + desc.condition, in_array=in_array, var=var + ) + condition_desc = _render_condition_desc(desc.condition) + target_ref = _field_ref(target_name) + inner_expr = ( + f"{fn}({target_ref}, {condition_expr}, {py_literal(condition_desc)})" + ) + case MinFieldsSet(): + cols_list, names_list = _cols_and_names() + inner_expr = f"{fn}({cols_list}, {names_list}, {desc.count})" + case _: + raise TypeError(f"Unhandled model constraint descriptor: {desc!r}") + + if isinstance(target, ArrayPath): + expr = _wrap_in_array_iteration( + target.column_path, target.iter_struct_paths, inner_expr + ) + else: + expr = inner_expr + + return _check_function_context( + target=target, + func_name=_model_check_func_name(check, idx), + field=model_constraint_field_label(check, label_suffix), + name=check_name(fn), + expr=expr, + ) + + +def _collect_constraint_expr_imports( + field_checks: list[Check], + model_checks: list[ModelCheck], +) -> set[str]: + """Collect all constraint_expressions function names needed. + + Field-descriptor names go through a `_COLUMN_PATTERN_HELPERS` + filter so column-pattern helpers route to their own import bucket. + Model-constraint function names (`check_require_any_of`, + `check_radio_group`, ...) are disjoint from that set, so they pass + through unfiltered. + """ + names: set[str] = { + desc.function + for check in field_checks + for desc in check.descriptors + if desc.function not in _COLUMN_PATTERN_HELPERS + } + for mc in model_checks: + names.add(model_constraint_function(mc.descriptor)) + return names + + +def _needs_geometry_type_import(field_checks: list[Check]) -> bool: + """Return True when any descriptor arg is a GeometryType instance.""" + for check in field_checks: + for desc in check.descriptors: + if any(isinstance(a, GeometryType) for a in desc.args): + return True + return False + + +def _pattern_imports_for(target: FieldPath) -> set[str]: + """Column-pattern helpers needed to iterate `target`.""" + match target: + case ArrayPath(): + names = {"array_check"} + if target.iter_struct_paths: + names.add("nested_array_check") + return names + case _: + return set() + + +def _collect_column_pattern_imports( + field_checks: list[Check], + model_checks: list[ModelCheck], +) -> set[str]: + """Collect column_patterns function names needed.""" + names: set[str] = set() + for check in field_checks: + names |= _pattern_imports_for(check.target) + for desc in check.descriptors: + if desc.function in _COLUMN_PATTERN_HELPERS: + names.add(desc.function) + for mc in model_checks: + names |= _pattern_imports_for(mc.target) + return names + + +_IDENTIFIER_TOKEN = re.compile(r"[A-Z][A-Za-z0-9_]*") + + +def _identifier_tokens(expr: str) -> set[str]: + """Tokenize a Spark type expression into capitalized identifiers.""" + return set(_IDENTIFIER_TOKEN.findall(expr)) + + +def _collect_spark_type_imports(schema_fields: list[SchemaField]) -> set[str]: + """Collect Spark type class names from schema field type expressions.""" + if not schema_fields: + return set() + used: set[str] = {"StructType", "StructField"} + for sf in schema_fields: + used |= _identifier_tokens(sf.type_expr) & _SPARK_TYPES + return used + + +def _collect_schema_struct_imports(schema_fields: list[SchemaField]) -> set[str]: + """Collect _schema_structs constant names referenced in field type expressions.""" + refs: set[str] = set() + for sf in schema_fields: + refs |= _identifier_tokens(sf.type_expr) & _SHARED_STRUCT_REFS + return refs + + +def _field_check_function_entries( + field_checks: list[Check], +) -> list[dict[str, object]]: + """Build template contexts for field-level checks.""" + descriptor_refs: list[tuple[Check, int]] = [] + raw_names: list[str] = [] + for check in field_checks: + labeled = field_label(check) + multi = len(check.descriptors) > 1 + for desc_idx, desc in enumerate(check.descriptors): + suffix = f"_{check_name(desc.function, desc.check_name)}" if multi else "" + raw_names.append(f"_{_sanitize_field_name(labeled)}{suffix}_check") + descriptor_refs.append((check, desc_idx)) + + func_names = disambiguate(raw_names) + return [ + _render_check_function_context(check, func_name, desc_idx) + for (check, desc_idx), func_name in zip( + descriptor_refs, func_names, strict=True + ) + ] + + +def _model_check_function_entries( + model_checks: list[ModelCheck], +) -> list[dict[str, object]]: + """Build template contexts for model-level checks.""" + label_suffixes = compute_label_suffixes(model_checks) + return [ + _render_model_constraint_function_context(mc, idx, label_suffixes[idx]) + for idx, mc in enumerate(model_checks) + ] + + +def render_feature_module( + feature_name: str, + field_checks: list[Check], + model_checks: list[ModelCheck], + schema_fields: list[SchemaField], + geometry_types: tuple[GeometryType, ...] = (), + *, + entry_point: str = "tests.placeholder:Placeholder", + partitions: Mapping[str, str] | None = None, +) -> str: + """Render a complete Python module for a feature's checks and schema.""" + constraint_expr_fns = sorted( + _collect_constraint_expr_imports(field_checks, model_checks) + ) + column_pattern_fns = sorted( + _collect_column_pattern_imports(field_checks, model_checks) + ) + spark_types = sorted(_collect_spark_type_imports(schema_fields)) + schema_struct_refs = sorted(_collect_schema_struct_imports(schema_fields)) + geometry_type = _needs_geometry_type_import(field_checks) or bool(geometry_types) + geometry_types_literal = ( + _render_geometry_types(geometry_types) if geometry_types else None + ) + + check_functions = _field_check_function_entries( + field_checks + ) + _model_check_function_entries(model_checks) + + feature_title = feature_name.replace("_", " ").title() + + template = jinja_env().get_template("feature_module.py.jinja2") + return template.render( + feature_name=feature_name, + feature_title=feature_title, + constraint_expr_fns=constraint_expr_fns, + column_pattern_fns=column_pattern_fns, + spark_types=spark_types, + schema_struct_refs=schema_struct_refs, + geometry_type=geometry_type, + check_functions=check_functions, + schema_const_name=f"{feature_name.upper()}_SCHEMA", + schema_fields=schema_fields, + geometry_types_literal=geometry_types_literal, + entry_point=entry_point, + partitions=dict(partitions) if partitions else {}, + ) + + +def _render_geometry_types(geo: tuple[GeometryType, ...]) -> str: + """Render a `geometry_types` tuple literal. + + `GeometryType` is an Enum, so `repr()` does not produce a valid + expression -- members need explicit `GeometryType.NAME` source. + """ + return tuple_literal(_geometry_type_literal(g) for g in geo) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py new file mode 100644 index 000000000..194119145 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py @@ -0,0 +1,183 @@ +"""Build StructType schema source from FeatureSpec field trees.""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum + +from ..extraction.field import ( + AnyScalar, + ArrayOf, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + Scalar, + UnionRef, +) +from ..extraction.field_walk import terminal_scalar +from ..extraction.specs import FeatureSpec, FieldSpec, UnionSpec +from ..extraction.type_registry import get_type_mapping + +__all__ = [ + "SHARED_TYPE_REFS", + "SchemaField", + "build_schema", + "spark_type_rank", +] + +# Types whose base_type name maps to a _schema_structs.py StructType constant. +# Reserved for types the codegen cannot walk (BBox is a plain class, not a +# Pydantic BaseModel). Pydantic BaseModels are inlined. +SHARED_TYPE_REFS: dict[str, str] = { + "BBox": "BBOX_STRUCT", +} + +# Literal and Enum fields both serialize as strings in Parquet. +_STRING_FALLBACK = "StringType()" + + +@dataclass(frozen=True, slots=True) +class SchemaField: + """One field in the generated StructType. + + Parameters + ---------- + name + Column name. + type_expr + Spark type expression string (e.g. `"StringType()"`) or + a `_schema_structs.py` constant name. + """ + + name: str + type_expr: str + + +def _spark_for_base(base_type: str, source_type: type | None) -> str: + """Return a Spark type expression for a primitive base type. + + Tries `base_type` first, then falls back to `source_type.__name__`. + Returns `StringType()` when neither maps to a known Spark type. + """ + mapping = get_type_mapping(base_type) + if mapping is not None and mapping.spark is not None: + return mapping.spark + if source_type is not None: + fallback = get_type_mapping(source_type.__name__) + if fallback is not None and fallback.spark is not None: + return fallback.spark + return _STRING_FALLBACK + + +def _spark_for_scalar(scalar: Scalar) -> str: + """Map a `Scalar` variant to a Spark type expression. + + `LiteralScalar` and `AnyScalar` serialize as strings. `Primitive` + scalars look up the type registry; enum primitives and BBox short- + circuit to strings / shared constants before the registry. + """ + if isinstance(scalar, (LiteralScalar, AnyScalar)): + return _STRING_FALLBACK + if scalar.base_type in SHARED_TYPE_REFS: + return SHARED_TYPE_REFS[scalar.base_type] + if ( + scalar.source_type is not None + and isinstance(scalar.source_type, type) + and issubclass(scalar.source_type, Enum) + ): + return _STRING_FALLBACK + return _spark_for_base(scalar.base_type, scalar.source_type) + + +# Spark numeric type widening precedence (higher rank = wider type). +_SPARK_TYPE_WIDENING: dict[str, int] = { + "IntegerType()": 0, + "LongType()": 1, + "DoubleType()": 2, +} + + +def spark_type_rank(field_spec: FieldSpec) -> int: + """Return a widening rank for the field's resolved Spark type. + + Fields with a higher rank are preferred when deduplicating union + members by name. Non-numeric types return -1 (no widening). + """ + scalar = terminal_scalar(field_spec.shape) + if not isinstance(scalar, Primitive): + return -1 + expr = _spark_for_base(scalar.base_type, scalar.source_type) + return _SPARK_TYPE_WIDENING.get(expr, -1) + + +def _deduplicate_by_name(fields: list[FieldSpec]) -> list[FieldSpec]: + """Keep one FieldSpec per name, widening the Spark type on conflict. + + Union annotated_fields may contain the same field name with different + type shapes (e.g. `value` as uint8 in one variant and float64 in + another). Parquet stores one column per name, so the schema needs + exactly one entry. When two fields share a name, the one with the + wider Spark type wins (matching Parquet's type-widening behavior). + """ + seen: dict[str, FieldSpec] = {} + for f in fields: + existing = seen.get(f.name) + if existing is None or spark_type_rank(f) > spark_type_rank(existing): + seen[f.name] = f + return list(seen.values()) + + +def _struct_type_expr(fields: list[FieldSpec]) -> str: + """Build an inline `StructType([...])` expression from a list of fields.""" + parts = [ + f'StructField("{f.name}", {_shape_to_spark(f.shape)}, True)' for f in fields + ] + return f"StructType([{', '.join(parts)}])" + + +def _shape_to_spark(shape: FieldShape) -> str: + """Convert a FieldShape to a Spark type expression string.""" + match shape: + case ArrayOf(element=element): + return f"ArrayType({_shape_to_spark(element)}, True)" + case NewTypeShape(inner=inner): + return _shape_to_spark(inner) + case ModelRef(model=m): + return _struct_type_expr(m.fields) + case UnionRef(union=u): + return _struct_type_expr(_deduplicate_by_name(u.fields)) + case MapOf(key=k, value=v): + return f"MapType({_shape_to_spark(k)}, {_shape_to_spark(v)}, True)" + case Primitive() | LiteralScalar() | AnyScalar() as s: + return _spark_for_scalar(s) + raise TypeError(f"Unhandled FieldShape: {shape!r}") + + +def build_schema(spec: FeatureSpec) -> list[SchemaField]: + """Build schema fields for a feature spec. + + Walks the field tree and maps types to Spark type expressions. + Recognizes shared types and emits fields in model order. + + Parameters + ---------- + spec + The feature spec to build schema fields for. + + Returns + ------- + list[SchemaField] + One entry per schema column in model order. + """ + source_fields = ( + _deduplicate_by_name(spec.fields) + if isinstance(spec, UnionSpec) + else spec.fields + ) + return [ + SchemaField(name=f.name, type_expr=_shape_to_spark(f.shape)) + for f in source_fields + ] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 new file mode 100644 index 000000000..8c15ed9d9 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 @@ -0,0 +1,10 @@ +{%- macro check_function(c) -%} +def {{ c.func_name }}() -> Check: + return Check( + field="{{ c.field }}", + name="{{ c.check_name }}", + expr={{ c.expr }}, + shape=CheckShape.{{ c.shape }}, + root_field={{ c.root_field | py_literal }}, + ) +{% endmacro %} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/feature_module.py.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/feature_module.py.jinja2 new file mode 100644 index 000000000..1a28d39b2 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/feature_module.py.jinja2 @@ -0,0 +1,83 @@ +{% from '_check_function.py.jinja2' import check_function -%} +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""{{ feature_title }} validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +{% if spark_types %} +from pyspark.sql.types import ( +{% for t in spark_types %} + {{ t }}, +{% endfor %} +) +{% endif %} +{% if geometry_type %} +from overture.schema.system.primitive import GeometryType + +{% endif %} +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +{% if schema_struct_refs %} +from overture.schema.pyspark.expressions._schema_structs import ( +{% for r in schema_struct_refs %} + {{ r }}, +{% endfor %} +) +{% endif %} +{% if column_pattern_fns %} +from overture.schema.pyspark.expressions.column_patterns import ( +{% for f in column_pattern_fns %} + {{ f }}, +{% endfor %} +) +{% endif %} +{% if constraint_expr_fns %} +from overture.schema.pyspark.expressions.constraint_expressions import ( +{% for f in constraint_expr_fns %} + {{ f }}, +{% endfor %} +) +{% endif %} + + +{% for c in check_functions %} +{{ check_function(c) }} +{% endfor %} + +def {{ feature_name }}_checks() -> list[Check]: + """All validation checks for {{ feature_name }}.""" +{% if check_functions %} + return [ +{% for c in check_functions %} + {{ c.func_name }}(), +{% endfor %} + ] +{% else %} + return [] +{% endif %} + + +{{ schema_const_name }} = StructType( + [ +{%- for sf in schema_fields %} + StructField("{{ sf.name }}", {{ sf.type_expr }}, True), +{%- endfor %} + ] +) +{% if geometry_types_literal %} + +GEOMETRY_TYPES: tuple[GeometryType, ...] = {{ geometry_types_literal }} +{% endif %} + +ENTRY_POINT = "{{ entry_point }}" + +PARTITIONS: dict[str, str] = {{ partitions | py_literal }} + +FEATURE_VALIDATION = FeatureValidation( + schema={{ schema_const_name }}, + checks={{ feature_name }}_checks, +{%- if geometry_types_literal %} + geometry_types=GEOMETRY_TYPES, +{%- endif %} +) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 new file mode 100644 index 000000000..c69f146a8 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 @@ -0,0 +1,124 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for {{ feature_name }}.""" + +from __future__ import annotations + +import pytest +from {{ expression_import }} import ( + {{ feature_name | upper }}_SCHEMA, + {{ feature_name }}_checks, +) +from pyspark.sql import SparkSession + +from {{ support_prefix }}_support.harness import ( + ValidationResults, + run_validation_pipeline, +) +{% if mutation_imports %} +from {{ support_prefix }}_support.mutations import {{ mutation_imports | join(", ") }} +{% endif %} +{% if needs_set_at_path %} +from {{ support_prefix }}_support.helpers import set_at_path +{% endif %} +from {{ support_prefix }}_support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = {{ base_row_sparse }} + + +BASE_ROW_POPULATED: dict = {{ base_row_populated }} + + +SCENARIOS: list[Scenario] = [ +{% for entry in scenarios %} + Scenario( + {% for k, v in entry %} + {{ k }}={{ v }}, + {% endfor %} + ), +{% endfor %} +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return {{ feature_name }}_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + {{ schema_name }}, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="{{ feature_name }}", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + {{ schema_name }}, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="{{ feature_name }}", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("{{ feature_name }}::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("{{ feature_name }}::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/__init__.py new file mode 100644 index 000000000..fa271d7b8 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/__init__.py @@ -0,0 +1,9 @@ +"""Test-data generation for the rendered PySpark conformance tests. + +Three modules cover three flavors of data: + +- `invalid_value`: constraint-violating values for triggering each check. +- `base_row`: minimal and fully populated valid rows. +- `scaffold`: sparse path scaffolds that supply the nested intermediates + (optional structs, arrays) a check's field path requires. +""" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py new file mode 100644 index 000000000..6af5b0855 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py @@ -0,0 +1,648 @@ +"""Generate valid base rows for the rendered conformance tests. + +`generate_base_row` produces a minimal valid row (required fields only) +from a `FeatureSpec`. `generate_populated_row` produces a fully +populated row including optional fields. `generate_arm_rows` and +`generate_populated_arm_rows` do the same for each arm of a discriminated +union. +""" + +from __future__ import annotations + +import uuid +from collections.abc import Callable +from enum import Enum +from typing import Any + +from overture.schema.common.scoping.lr import LinearReferenceRangeConstraint +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + HexColorConstraint, + JsonPointerConstraint, + LanguageTagConstraint, + PhoneNumberConstraint, + RegionCodeConstraint, + SnakeCaseConstraint, + StrippedConstraint, + WikidataIdConstraint, +) +from overture.schema.system.model_constraint import ( + FieldEqCondition, + ForbidIfConstraint, + MinFieldsSetConstraint, + RadioGroupConstraint, + RequireAnyOfConstraint, + RequireIfConstraint, +) +from overture.schema.system.primitive.geom import ( + Geometry, + GeometryType, + GeometryTypeConstraint, +) + +from ...extraction.field import ( + AnyScalar, + ArrayOf, + ConstraintSource, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + UnionRef, +) +from ...extraction.field_walk import has_array_layer, terminal_primitive +from ...extraction.length_constraints import ArrayMinLen +from ...extraction.specs import FeatureSpec, FieldSpec, ModelSpec, UnionSpec +from ..constraint_dispatch import ExpressionDescriptor, dispatch_constraint +from ..schema_builder import spark_type_rank + +__all__ = [ + "generate_arm_rows", + "generate_base_row", + "generate_populated_arm_rows", + "generate_populated_row", + "value_for_field", +] + +_BASE_ROW_NAMESPACE = uuid.uuid5( + uuid.NAMESPACE_DNS, "overturemaps.org/codegen/base_row" +) + + +# WKT strings for each allowed geometry type (valid side) +_VALID_GEOMETRY_WKT: dict[GeometryType, str] = { + GeometryType.POINT: "POINT (0 0)", + GeometryType.LINE_STRING: "LINESTRING (0 0, 1 1)", + GeometryType.POLYGON: "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", + GeometryType.MULTI_POLYGON: "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + GeometryType.MULTI_LINE_STRING: "MULTILINESTRING ((0 0, 1 1))", +} + + +_PRIMITIVE_DEFAULTS: dict[str, object] = { + "str": "", + "NoWhitespaceString": "", + "HttpUrl": "https://example.com/", + "EmailStr": "user@example.com", + "bool": False, + "bytes": b"", + "datetime": "2024-01-01T00:00:00Z", + "date": "2024-01-01", +} + + +def _bbox_value() -> dict[str, float]: + return {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0} + + +# Field-name overrides applied before any shape-based value generation in +# `value_for_field`. Each builder receives `(field_spec, spec_name)`. +_SPECIAL_FIELD_VALUES: dict[str, Callable[[FieldSpec, str], object]] = { + "id": lambda _f, spec_name: str(uuid.uuid5(_BASE_ROW_NAMESPACE, spec_name)), + "bbox": lambda _f, _spec_name: _bbox_value(), +} + + +def _is_geometry_terminal(terminal: Primitive) -> bool: + """Whether this terminal represents a geometry value. + + Only fires for the `Geometry` source class. Fields wanting a + geometry value must declare `Geometry`; ad-hoc forms like + `Annotated[bytes, GeometryTypeConstraint(...)]` aren't recognized. + """ + return terminal.source_type is Geometry + + +def generate_base_row(spec: FeatureSpec, *, index: int = 0) -> dict[str, Any]: + """Produce a minimal valid row from a feature spec (required fields only). + + The row passes `TypeAdapter(validation_type).validate_python()`. + + Parameters + ---------- + spec + An expanded feature spec. + index + Position within a parent list. Non-zero values suffix string fields + to ensure uniqueness across list items. + """ + return _build_row(spec, index=index, populate_optional=False) + + +def generate_populated_row(spec: FeatureSpec, *, index: int = 0) -> dict[str, Any]: + """Produce a fully populated valid row (all fields, including optional). + + Sub-models are recursively populated. + + Parameters + ---------- + spec + An expanded feature spec. + index + Position within a parent list. Non-zero values suffix string fields + to ensure uniqueness across list items. + """ + return _build_row(spec, index=index, populate_optional=True) + + +def generate_arm_rows(spec: FeatureSpec) -> dict[str, dict[str, Any]]: + """Produce one minimal valid row per discriminator arm of a union. + + Returns `{arm_value: row}` where each row passes TypeAdapter + validation against the union's source annotation. + + Parameters + ---------- + spec + An expanded union spec. + """ + return _build_arm_rows(_require_union(spec), populate_optional=False) + + +def generate_populated_arm_rows( + spec: FeatureSpec, +) -> dict[str, dict[str, Any]]: + """Produce one fully populated valid row per discriminator arm. + + Returns `{arm_value: row}` where each row passes TypeAdapter + validation and includes all optional fields with valid values. + + Parameters + ---------- + spec + An expanded union spec. + """ + return _build_arm_rows(_require_union(spec), populate_optional=True) + + +def _require_union(spec: FeatureSpec) -> UnionSpec: + if not isinstance(spec, UnionSpec): + raise TypeError( + f"Expected a UnionSpec, got {type(spec).__name__}: {spec.name!r}" + ) + return spec + + +def _build_row( + spec: FeatureSpec, + *, + index: int = 0, + populate_optional: bool, + name_override: str | None = None, +) -> dict[str, Any]: + row: dict[str, Any] = {} + name = name_override or spec.name + for field in spec.fields: + if not populate_optional and not field.is_required: + continue + row[field.name] = value_for_field( + field, name, index=index, populate_optional=populate_optional + ) + _satisfy_model_constraints(row, spec) + return row + + +def _build_arm_rows( + spec: UnionSpec, + *, + populate_optional: bool, +) -> dict[str, dict[str, Any]]: + if spec.discriminator_field is None or spec.discriminator_mapping is None: + raise ValueError(f"UnionSpec {spec.name!r} has no discriminator") + if spec.constraints: + # Per-arm rows are built from member specs only; union-level + # constraints (e.g. radio_group on the union itself) would need + # `_satisfy_model_constraints` applied with the union's field + # list. No schema exercises this today; raise so a future union + # that adds one fails loudly rather than producing invalid rows. + raise NotImplementedError( + f"UnionSpec {spec.name!r} has {len(spec.constraints)} model " + "constraint(s); per-arm row generation does not enforce them" + ) + spec_by_class = {ms.member_cls: ms.spec for ms in spec.member_specs} + result: dict[str, dict[str, Any]] = {} + for arm_val, member_cls in spec.discriminator_mapping.items(): + row = _build_row( + spec_by_class[member_cls], + populate_optional=populate_optional, + name_override=spec.name, + ) + row[spec.discriminator_field] = arm_val + result[arm_val] = row + return result + + +def _row_satisfies_condition(row: dict[str, Any], condition: object) -> bool: + """Check whether a FieldEqCondition is satisfied by the row's current values.""" + if not isinstance(condition, FieldEqCondition): + return False + cond_value = condition.value + if isinstance(cond_value, Enum): + cond_value = cond_value.value + return row.get(condition.field_name) == cond_value + + +def _satisfy_model_constraints(row: dict[str, Any], spec: FeatureSpec) -> None: + """Adjust *row* so each model constraint is satisfied. + + `require_if`/`radio_group`/`require_any_of`/`min_fields_set` fill in + optional fields the constraint makes mandatory. `forbid_if` removes + fields the constraint excludes. Constraints whose guard predicate is + false (e.g. a `RequireIf` whose condition does not hold for the + current row) need no adjustment and pass through; any constraint type + not matched by an arm here is silently skipped, intentionally -- new + constraint kinds surface via `dispatch_model_constraint` (which + raises) rather than here. + """ + fields_by_name = {f.name: f for f in spec.fields} + for constraint in spec.constraints: + match constraint: + case RequireIfConstraint() if _row_satisfies_condition( + row, constraint.condition + ): + for field_name in constraint.field_names: + if field_name in row: + continue + field_spec = fields_by_name.get(field_name) + if field_spec is not None: + row[field_name] = value_for_field(field_spec, spec.name) + case RadioGroupConstraint() if not any( + row.get(fn) is True for fn in constraint.field_names + ): + for field_name in constraint.field_names: + if field_name in fields_by_name: + row[field_name] = True + break + case RequireAnyOfConstraint() if not any( + fn in row for fn in constraint.field_names + ): + for field_name in constraint.field_names: + field_spec = fields_by_name.get(field_name) + if field_spec is not None: + row[field_name] = value_for_field(field_spec, spec.name) + break + case ForbidIfConstraint() if _row_satisfies_condition( + row, constraint.condition + ): + for field_name in constraint.field_names: + row.pop(field_name, None) + case MinFieldsSetConstraint(count=count): + # Mirror Pydantic's `model_fields_set` semantics: every + # required field is "set" by the constructor, and counts + # alongside any non-null optional field. Required fields + # are always populated by the time we reach this branch, + # so satisfying `count` may need extra optional fills. + missing = count - sum(1 for f in spec.fields if f.name in row) + for opt_field in (f for f in spec.fields if not f.is_required): + if missing <= 0: + break + if opt_field.name in row: + continue + row[opt_field.name] = value_for_field(opt_field, spec.name) + missing -= 1 + + +def value_for_field( + field: FieldSpec, + spec_name: str, + *, + index: int = 0, + populate_optional: bool = False, +) -> object: + """Produce a valid value for a single field. + + Consults field constraints via `dispatch_constraint` to produce + constraint-satisfying values (e.g., a valid country code instead of + an empty string). + + Parameters + ---------- + field + The field spec to produce a value for. + spec_name + The name of the containing spec, used for deterministic UUID generation. + index + Position within a parent list. Non-zero values suffix string fields + to ensure uniqueness across list items. + populate_optional + When True, MODEL and UNION sub-rows include optional fields via + `generate_populated_row`. When False (default), sub-rows are sparse + via `generate_base_row`. + """ + special = _SPECIAL_FIELD_VALUES.get(field.name) + if special is not None: + return special(field, spec_name) + + shape = field.shape + + # Geometry fields short-circuit to a WKT literal. PySpark's Geometry + # validator parses WKT via `from_wkt`; the field is stored as + # BinaryType (WKB) downstream. + terminal = terminal_primitive(shape) + if terminal is not None and _is_geometry_terminal(terminal): + return _geometry_wkt_from_shape_constraints(terminal.constraints) + + # Non-list fields: try a constraint-driven value (e.g. CountryCode -> "US") + # before falling back to type defaults. The terminal scalar carries the + # constraints directly in the no-list case. Lists go through the recursive + # shape walk so array-level constraints and per-element constraints both + # get a chance to drive value generation. + if not has_array_layer(shape) and terminal is not None: + constraint_val = _value_from_scalar_constraints(terminal) + if constraint_val is not None: + if index > 0 and isinstance(constraint_val, str): + return f"{constraint_val}{index}" + return constraint_val + + return _value_for_shape( + shape, + index=index, + check_constraints=False, + populate_optional=populate_optional, + ) + + +def _widest_union_member(union: UnionSpec) -> ModelSpec: + """Pick the union member whose fields have the highest cumulative Spark type rank. + + When multiple union members share a field name with different numeric + types (e.g. `value: uint8` in one variant and `value: float64` in + another), PySpark widens the column to the broadest type (DoubleType). + Generating a row from the narrower member produces Python `int` values + that PySpark silently converts to null in `DoubleType` columns. + + By selecting the member with the widest field types, the generated row + uses Python `float` values that PySpark accepts in `DoubleType` columns. + """ + best_spec = union.member_specs[0].spec + best_rank = -1 + for member in union.member_specs: + field_ranks = [spark_type_rank(f) for f in member.spec.fields] + rank = sum(r for r in field_ranks if r >= 0) + if rank > best_rank: + best_rank = rank + best_spec = member.spec + return best_spec + + +def _row_from_model_spec( + spec: ModelSpec, + *, + index: int = 0, + populate_optional: bool = False, +) -> dict[str, Any]: + """Generate a row dict from an already-extracted model spec.""" + if populate_optional: + return generate_populated_row(spec, index=index) + return generate_base_row(spec, index=index) + + +def _value_for_shape( + shape: FieldShape, + *, + index: int = 0, + check_constraints: bool = True, + populate_optional: bool = False, +) -> object: + """Produce a valid value from a `FieldShape`. + + Each shape layer carries its own constraints: `ArrayOf`'s + constraints drive list-length decisions; the element shape's + constraints (visible after descending into `element`) drive + per-item value generation. + + Parameters + ---------- + shape + The field shape to produce a value for. + index + Array element index, used to suffix strings for uniqueness. + check_constraints + When True, attempt constraint-driven value generation at the + terminal Scalar before falling back to a primitive default. + populate_optional + When True, MODEL and UNION sub-rows include optional fields via + `generate_populated_row`. When False (default), sub-rows are + sparse via `generate_base_row`. + """ + match shape: + case ArrayOf(element=element, constraints=array_constraints): + list_val = _list_value_from_shape_constraints(array_constraints) + if list_val is not None: + return list_val + count = _min_length_from_shape_constraints(array_constraints) + return [ + _value_for_shape(element, index=i, populate_optional=populate_optional) + for i in range(count) + ] + + case NewTypeShape(inner=inner): + return _value_for_shape( + inner, + index=index, + check_constraints=check_constraints, + populate_optional=populate_optional, + ) + + case MapOf(): + return {} + + case LiteralScalar(values=values): + val = values[0] + return val.value if isinstance(val, Enum) else val + + case Primitive(source_type=cls) if ( + cls is not None and isinstance(cls, type) and issubclass(cls, Enum) + ): + return list(cls)[0].value # type: ignore[call-overload] + + case ModelRef(model=m): + return _row_from_model_spec( + m, index=index, populate_optional=populate_optional + ) + + case UnionRef(union=u): + # The selected member's discriminator field is a `Literal[X] = "x"` + # with a default, so it has `is_required=False`. In the populated + # case the LiteralScalar branch writes the literal explicitly; in + # the sparse case the field is omitted from the dict and Pydantic + # supplies the default during `TypeAdapter.validate_python()`. + return _row_from_model_spec( + _widest_union_member(u), + index=index, + populate_optional=populate_optional, + ) + + case AnyScalar(): + # Unreachable today: the only `AnyScalar` is a `MapOf` value + # type, and the `MapOf` case returns `{}` without descending. + raise TypeError( + "AnyScalar reached base-row generation; no value strategy exists" + ) + + case Primitive() as scalar: + constraint_val: object | None = None + if check_constraints: + constraint_val = _value_from_scalar_constraints(scalar) + val = ( + constraint_val + if constraint_val is not None + else _primitive_default(scalar.base_type) + ) + if index > 0 and isinstance(val, str): + val = f"{val}{index}" + return val + + raise TypeError(f"Unhandled FieldShape: {shape!r}") + + +def _value_from_check_bounds( + desc: ExpressionDescriptor, scalar: Primitive, cs: ConstraintSource +) -> object | None: + # Skip structural bounds from numeric primitive NewTypes (int32, uint8, ...). + # Those bounds match Spark/Parquet types structurally -- the type system + # already enforces the range. Only semantic bounds (from field-level + # constraints or semantic NewTypes like FeatureVersion) produce values. + if cs.source_name == scalar.base_type: + return None + return _valid_bound_for_base_row(desc) + + +def _value_from_check_enum( + desc: ExpressionDescriptor, _scalar: Primitive, _cs: ConstraintSource +) -> object: + """Return the first allowed value from a `check_enum` descriptor.""" + return desc.args[0][0] # type: ignore[index,no-any-return] + + +def _value_from_check_string_min_length( + _desc: ExpressionDescriptor, _scalar: Primitive, _cs: ConstraintSource +) -> str: + """Return any single character; satisfies `min_length>=1` for every schema today.""" + return "a" + + +# Builders for descriptor-driven values, keyed by `ExpressionDescriptor.function`. +# Functions absent from this table are intentionally skipped -- notably +# `check_pattern`, since matching strings can't be generated generically. +_DESCRIPTOR_VALUE_BUILDERS: dict[ + str, Callable[[ExpressionDescriptor, Primitive, ConstraintSource], object | None] +] = { + "check_enum": _value_from_check_enum, + "check_bounds": _value_from_check_bounds, + "check_string_min_length": _value_from_check_string_min_length, +} + + +_CONSTRAINT_VALID_VALUES: dict[type, object] = { + CountryCodeAlpha2Constraint: "US", + HexColorConstraint: "#aabbcc", + JsonPointerConstraint: "/valid/pointer", + LanguageTagConstraint: "en", + PhoneNumberConstraint: "+1 555-555-5555", + RegionCodeConstraint: "US-CA", + SnakeCaseConstraint: "snake_case", + StrippedConstraint: "clean", + WikidataIdConstraint: "Q42", +} + +_CONSTRAINT_VALID_LIST_VALUES: dict[type, list[object]] = { + LinearReferenceRangeConstraint: [0.0, 1.0], +} + + +def _value_from_scalar_constraints(scalar: Primitive) -> object | None: + """Return a value satisfying the first dispatched constraint. + + Maps known constraint types to valid values directly, then dispatches + remaining constraints through `_DESCRIPTOR_VALUE_BUILDERS` keyed on + the `ExpressionDescriptor` function name. Assumes constraints on a + single field don't conflict; no schema today mixes constraints in a + way that would expose a conflict. + """ + for cs in scalar.constraints: + constraint_type = type(cs.constraint) + if constraint_type in _CONSTRAINT_VALID_VALUES: + return _CONSTRAINT_VALID_VALUES[constraint_type] + desc = dispatch_constraint(cs.constraint, base_type=scalar.base_type) + if desc is None: + continue + builder = _DESCRIPTOR_VALUE_BUILDERS.get(desc.function) + if builder is None: + continue + val = builder(desc, scalar, cs) + if val is not None: + return val + return None + + +def _list_value_from_shape_constraints( + constraints: tuple[ConstraintSource, ...], +) -> list[object] | None: + """Return a fixed valid list value if a list-level constraint requires it.""" + for cs in constraints: + val = _CONSTRAINT_VALID_LIST_VALUES.get(type(cs.constraint)) + if val is not None: + return val + return None + + +def _min_length_from_shape_constraints( + constraints: tuple[ConstraintSource, ...], +) -> int: + """Extract the array min_length from constraints anchored at this layer. + + Constraints sit on the `ArrayOf` whose iteration they govern, so any + `ArrayMinLen` we see here applies to this list level directly -- no + anchor arithmetic is required. + """ + for cs in constraints: + if isinstance(cs.constraint, ArrayMinLen): + return max(cs.constraint.min_length, 1) + return 1 + + +def _valid_bound_for_base_row(desc: ExpressionDescriptor) -> object: + """Produce a value satisfying a bounds check for base row generation.""" + kwargs = dict(desc.kwargs) + if "ge" in kwargs: + return kwargs["ge"] + if "gt" in kwargs: + return kwargs["gt"] + 1 # type: ignore[operator] + if "le" in kwargs: + return kwargs["le"] + if "lt" in kwargs: + return kwargs["lt"] - 1 # type: ignore[operator] + return 0 + + +def _primitive_default(base_type: str) -> object: + """Return a type-appropriate default for a primitive base_type.""" + explicit = _PRIMITIVE_DEFAULTS.get(base_type) + if explicit is not None: + return explicit + # Numeric types: match prefixes like int32, uint8, float64, double + lower = base_type.lower() + if lower.startswith(("float", "double")): + return 0.0 + if lower.startswith(("int", "uint")): + return 0 + # Fallback for string-like types + return "" + + +def _geometry_wkt_from_shape_constraints( + constraints: tuple[ConstraintSource, ...], +) -> str: + """Extract the allowed geometry type from constraints and return valid WKT.""" + for cs in constraints: + if isinstance(cs.constraint, GeometryTypeConstraint): + geom_type = cs.constraint.allowed_types[0] + wkt = _VALID_GEOMETRY_WKT.get(geom_type) + if wkt is not None: + return wkt + raise ValueError(f"No WKT defined for geometry type: {geom_type!r}") + # No constraint — default to POINT + return _VALID_GEOMETRY_WKT[GeometryType.POINT] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py new file mode 100644 index 000000000..055cb2c51 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py @@ -0,0 +1,129 @@ +"""Generate constraint-violating values for the rendered conformance tests. + +`invalid_value` returns a concrete value that violates a given check. The +generated tests inject these into otherwise-valid rows to confirm that +each constraint produces the expected violation. +""" + +from __future__ import annotations + +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + HexColorConstraint, + LanguageTagConstraint, + NoWhitespaceConstraint, + PhoneNumberConstraint, + RegionCodeConstraint, + SnakeCaseConstraint, + WikidataIdConstraint, +) +from overture.schema.system.primitive.geom import GeometryType + +from ..constraint_dispatch import ExpressionDescriptor + +__all__ = ["invalid_value"] + +# Ordered candidates for the invalid geometry side (first not in allowed set wins) +_INVALID_GEOMETRY_CANDIDATES: tuple[tuple[GeometryType, str], ...] = ( + (GeometryType.POINT, "POINT (0 0)"), + (GeometryType.LINE_STRING, "LINESTRING (0 0, 1 1)"), + (GeometryType.GEOMETRY_COLLECTION, "GEOMETRYCOLLECTION EMPTY"), +) + + +# Pattern-constraint -> sample value that violates the pattern. +# Used by `check_pattern` whose constraint_type identifies which validator. +_INVALID_PATTERN_VALUES: dict[type, str] = { + NoWhitespaceConstraint: "has whitespace", + CountryCodeAlpha2Constraint: "99", + RegionCodeConstraint: "99-999", + SnakeCaseConstraint: "HAS SPACES", + PhoneNumberConstraint: "1234567890", + WikidataIdConstraint: "P999", + HexColorConstraint: "not-hex", + LanguageTagConstraint: "123", +} + +# Direct lookup: check function name -> invalid value (no descriptor inspection). +_INVALID_LITERALS: dict[str, object] = { + "check_required": None, + "check_enum": "__INVALID__", + "check_url_format": "not-a-url", + "check_url_length": "https://" + "x" * 2076, + "check_email": "not-an-email", + "check_stripped": " has spaces ", + "check_json_pointer": "no-slash", + "check_array_min_length": [], + "check_string_min_length": "", + "check_linear_range_length": [0.5], + "check_linear_range_bounds": [1.5, 2.0], + "check_linear_range_order": [0.8, 0.2], + "check_bbox_completeness": {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0}, + "check_bbox_lat_ordering": {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0}, + "check_bbox_lat_range": {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0}, +} + + +def invalid_value(desc: ExpressionDescriptor) -> object: + """Return a Python value that violates `desc`'s check function. + + Parameters + ---------- + desc + The expression descriptor to produce an invalid value for. + + Raises + ------ + ValueError + For unrecognised check function names or when all geometry candidates + are in the allowed set. + """ + fn = desc.function + if fn in _INVALID_LITERALS: + return _INVALID_LITERALS[fn] + if fn == "check_bounds": + return _invalid_bound(desc) + if fn == "check_pattern": + return _INVALID_PATTERN_VALUES.get(desc.constraint_type, "!!!INVALID!!!") # type: ignore[arg-type] + if fn == "check_array_max_length": + max_len = int(desc.args[0]) # type: ignore[call-overload] + return [{}] * (max_len + 1) + if fn == "check_string_max_length": + max_len = int(desc.args[0]) # type: ignore[call-overload] + return "x" * (max_len + 1) + if fn == "check_geometry_type": + return _invalid_geometry(desc) + raise ValueError(f"No invalid value defined for check function: {fn!r}") + + +def _invalid_bound(desc: ExpressionDescriptor) -> object: + """Produce a value violating a bounds check for invalid-value generation. + + The `ge` / `le` branches return one below / above the bound. For + `ge=0` this returns `-1`, which violates the bound but would also + underflow an unsigned base type. No schema today combines `ge=0` with + an unsigned terminal -- if that ever changes, the caller will need to + consult the base type and pick a sentinel (e.g. a string or null) for + the violating value. + """ + kwargs = dict(desc.kwargs) + if "ge" in kwargs: + return kwargs["ge"] - 1 # type: ignore[operator] + if "gt" in kwargs: + return kwargs["gt"] + if "le" in kwargs: + return kwargs["le"] + 1 # type: ignore[operator] + if "lt" in kwargs: + return kwargs["lt"] + raise ValueError(f"No recognised bound key in kwargs: {kwargs!r}") + + +def _invalid_geometry(desc: ExpressionDescriptor) -> str: + allowed = set(desc.args) + for geom_type, wkt in _INVALID_GEOMETRY_CANDIDATES: + if geom_type not in allowed: + return wkt + raise ValueError( + f"All geometry candidates are in the allowed set: {allowed!r}. " + "Cannot produce an invalid geometry value." + ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py new file mode 100644 index 000000000..d78cf3c43 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py @@ -0,0 +1,264 @@ +"""Generate sparse path scaffolds for the rendered conformance tests. + +`generate_scaffold` builds a sparse dict that, when merged with a base +row, supplies the nested intermediates (optional structs, arrays) the +base row lacks but a check's field path requires. +`generate_model_scaffold` does the same for model-level constraints. +`leaf_list_depth` reports unaccounted-for list depth on a target field. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from overture.schema.system.field_path import ( + ArrayPath, + ArraySegment, + FieldPath, + PathSegment, +) + +from ...extraction.field_walk import has_array_layer, list_depth, terminal_model_ref +from ...extraction.specs import FeatureSpec, FieldSpec +from ..check_ir import ( + Check, + ElementGuard, + ModelCheck, +) +from .base_row import value_for_field + +__all__ = [ + "generate_model_scaffold", + "generate_scaffold", + "leaf_list_depth", +] + + +@dataclass(frozen=True, slots=True) +class _ElementDiscriminator: + """Discriminator value to seed at one nesting depth of the scaffold.""" + + field: str + value: str + depth: int + + +def _find_field_spec(fields: list[FieldSpec], name: str) -> FieldSpec | None: + """Find a FieldSpec by name in a list.""" + for f in fields: + if f.name == name: + return f + return None + + +def leaf_list_depth(field_path: FieldPath, spec: FeatureSpec) -> int: + """Return the unaccounted-for list depth of the leaf field. + + Walks the spec's field tree along *field_path* and returns the + leaf's `list_depth(shape)` minus any `iter_count` on the terminal + path segment. Paths whose terminal segment is itself an array + target the array's elements, so the mutation already operates one + level deep. Returns 0 when *field_path* is empty or when any + segment fails to resolve against *spec* (e.g. union arms that + don't share the path's intermediate fields). + """ + segments = field_path.segments + if not segments: + return 0 + fields = list(spec.fields) + for seg in segments[:-1]: + field = _find_field_spec(fields, seg.name) + if field is None: + return 0 + model_ref = terminal_model_ref(field.shape) + if model_ref is None: + return 0 + fields = model_ref.model.fields + leaf_seg = segments[-1] + leaf = _find_field_spec(fields, leaf_seg.name) + if leaf is None: + return 0 + terminal_iter = leaf_seg.iter_count if isinstance(leaf_seg, ArraySegment) else 0 + return max(0, list_depth(leaf.shape) - terminal_iter) + + +def _required_siblings( + fields: list[FieldSpec], exclude: str, spec_name: str +) -> dict[str, Any]: + """Populate required siblings at one nesting level, excluding the target.""" + result: dict[str, Any] = {} + for f in fields: + if f.name == exclude or not f.is_required: + continue + result[f.name] = value_for_field(f, spec_name) + return result + + +def _walk_to_target( + segments: tuple[PathSegment, ...], + fields: list[FieldSpec], + spec_name: str, + *, + discriminator: _ElementDiscriminator | None, + current_depth: int = 0, +) -> dict[str, Any]: + """Recursively build the scaffold dict along the path segments.""" + if not segments: + return {} + + seg = segments[0] + remaining = segments[1:] + field_spec = _find_field_spec(fields, seg.name) + + inner: Any + child_model = ( + terminal_model_ref(field_spec.shape) if field_spec is not None else None + ) + if remaining and child_model is not None: + child_fields = child_model.model.fields + inner = _walk_to_target( + remaining, + child_fields, + spec_name, + discriminator=discriminator, + current_depth=current_depth + 1, + ) + siblings = _required_siblings(child_fields, remaining[0].name, spec_name) + inner = {**siblings, **inner} + elif not remaining and field_spec is not None: + inner = value_for_field(field_spec, spec_name) + else: + inner = {} + + if ( + isinstance(inner, dict) + and discriminator is not None + and current_depth == discriminator.depth + ): + inner[discriminator.field] = discriminator.value + + # When the terminal segment is an array and the field itself is a list, + # `value_for_field` already wrapped the value -- skip extra wrapping. + if isinstance(seg, ArraySegment): + if ( + not remaining + and field_spec is not None + and has_array_layer(field_spec.shape) + ): + return {seg.name: inner} + wrapped: Any = inner + for _ in range(seg.iter_count): + wrapped = [wrapped] + return {seg.name: wrapped} + if remaining and field_spec is not None and has_array_layer(field_spec.shape): + return {seg.name: [inner]} + return {seg.name: inner} + + +def _element_discriminator(check: Check) -> _ElementDiscriminator | None: + """Return the element-level discriminator for a Check, or None. + + Bundles the discriminator field, the value to seed, and the depth at + which to seed it (the innermost array segment in the target path). + The check_ir invariant is that nested-union gating composes at most + one `ElementGuard` per Check; more than one would mean the gate + composition rule changed without updating the scaffold, so raise to + surface the gap rather than silently dropping guards. + """ + element_guards = [g for g in check.guards if isinstance(g, ElementGuard)] + if len(element_guards) > 1: + raise NotImplementedError( + f"Check carries {len(element_guards)} ElementGuards " + f"({element_guards!r}); the scaffold only seeds one. Update " + "the scaffold builder when the gate composition rule changes." + ) + if not element_guards or not element_guards[0].values: + return None + guard = element_guards[0] + segments = check.target.segments + for i in range(len(segments) - 1, -1, -1): + if isinstance(segments[i], ArraySegment): + return _ElementDiscriminator( + field=guard.discriminator, value=guard.values[0], depth=i + ) + return None + + +def generate_scaffold(check: Check, spec: FeatureSpec) -> dict[str, Any]: + """Build a sparse dict from null to the target field of a Check.""" + segments = check.target.segments + if not segments: + return {} + + if len(segments) == 1: + seg0 = segments[0] + field_spec = _find_field_spec(spec.fields, seg0.name) + if field_spec is None or field_spec.is_required: + return {} + return {seg0.name: value_for_field(field_spec, spec.name)} + + return _walk_to_target( + segments, + spec.fields, + spec.name, + discriminator=_element_discriminator(check), + ) + + +def generate_model_scaffold(check: ModelCheck, spec: FeatureSpec) -> dict[str, Any]: + """Build a sparse dict for a model-level check's nesting structure. + + Only top-level array columns are supported -- a `ScalarPath` target + returns `{}` (no scaffold needed at row root) and an `ArrayPath` + whose column lives inside a struct raises `NotImplementedError`. + No schema today places a list of model-constrained models inside a + struct field, so the case has no test coverage. + """ + match check.target: + case ArrayPath() as target: + pass + case _: + return {} + column_prefix = target.column_prefix + if column_prefix.segments: + raise NotImplementedError( + "Multi-segment column paths (struct fields containing arrays) " + "require walking the parent tree from the root to the array " + f"column; got {target!r}" + ) + + field_spec = _find_field_spec(spec.fields, target.column_path) + if field_spec is None: + return {} + + inner_levels = target.iter_struct_paths + leaf_path = target.leaf + + inner: dict[str, Any] = {} + root_model = terminal_model_ref(field_spec.shape) + current_fields: list[FieldSpec] = root_model.model.fields if root_model else [] + nested = inner + + for level in inner_levels: + for part in level: + child_spec = _find_field_spec(current_fields, part) + child_is_list = child_spec is not None and has_array_layer(child_spec.shape) + child_model = ( + terminal_model_ref(child_spec.shape) if child_spec is not None else None + ) + if child_is_list: + nested[part] = [{}] + nested = nested[part][0] + else: + nested[part] = {} + nested = nested[part] + current_fields = child_model.model.fields if child_model else [] + + for part in leaf_path: + nested[part] = {} + nested = nested[part] + + if has_array_layer(field_spec.shape): + return {target.column_path: [inner]} + return {target.column_path: inner} if inner else {} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py new file mode 100644 index 000000000..bd933fb20 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py @@ -0,0 +1,423 @@ +"""Render Check / ModelCheck IR into generated conformance test modules.""" + +from __future__ import annotations + +from typing import Any, NamedTuple + +from typing_extensions import assert_never + +from overture.schema.system.field_path import ArrayPath + +from ..extraction.field import FieldShape +from ..extraction.field_walk import has_array_layer +from ..extraction.specs import FeatureSpec +from ._render_common import ( + check_name, + compute_label_suffixes, + disambiguate, + field_label, + jinja_env, + model_constraint_field_label, + parse_field_eq, + py_literal, +) +from .check_ir import ( + Check, + ColumnGuard, + ModelCheck, +) +from .constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + MinFieldsSet, + ModelConstraintDescriptor, + RadioGroup, + RequireAnyOf, + RequireIf, + model_constraint_function, + model_mutation_function, +) +from .test_data.invalid_value import invalid_value +from .test_data.scaffold import ( + generate_model_scaffold, + generate_scaffold, + leaf_list_depth, +) + +__all__ = ["render_test_module"] + + +def _check_belongs_to_arm(check: Check, arm: str) -> bool: + """Return True when a Check applies to a given union arm. + + The outermost union's discriminator surfaces as `ColumnGuard`s; inner + unions use `ElementGuard`s on a different discriminator field and are + irrelevant to arm filtering. A check belongs to *arm* when every + `ColumnGuard` admits it (guards are AND-composed). + """ + column_guards = [g for g in check.guards if isinstance(g, ColumnGuard)] + return all(arm in g.values for g in column_guards) + + +def _model_check_belongs_to_arm(check: ModelCheck, arm: str) -> bool: + """Return True when a ModelCheck applies to a given union arm. + + `ModelCheck.arm` is `None` for union-level constraints (which apply + regardless of discriminator) and set to a discriminator value for + constraints contributed by one specific member class. + """ + return check.arm is None or check.arm == arm + + +def render_test_module( + feature_name: str, + field_checks: list[Check], + model_checks: list[ModelCheck], + *, + expression_import: str, + support_prefix: str, + base_row_sparse: dict[str, Any] | None = None, + base_row_populated: dict[str, Any] | None = None, + arm: str | None = None, + spec: FeatureSpec | None = None, +) -> str: + """Render a complete pytest test file for a feature's validation checks. + + Arm filtering uses two complementary signals. A field check's + `ColumnGuard`s identify the arms it belongs to. A model check's `arm` + attribute is set for member-specific constraints and `None` for + union-level constraints (which apply to every arm). + """ + if arm is not None: + field_checks = [c for c in field_checks if _check_belongs_to_arm(c, arm)] + model_checks = [c for c in model_checks if _model_check_belongs_to_arm(c, arm)] + + model_scenarios, used_mutation_fns = _render_model_scenarios( + feature_name, model_checks, spec + ) + field_scenarios, field_helpers = _render_field_check_scenarios( + feature_name, field_checks, spec + ) + used_mutation_fns |= field_helpers - {"set_at_path"} + + sparse_repr = py_literal(base_row_sparse) if base_row_sparse is not None else "{}" + populated_repr = ( + py_literal(base_row_populated) if base_row_populated is not None else "{}" + ) + + all_scenarios = field_scenarios + model_scenarios + + template = jinja_env().get_template("test_module.py.jinja2") + return template.render( + feature_name=feature_name, + schema_name=f"{feature_name.upper()}_SCHEMA", + mutation_imports=sorted(used_mutation_fns), + needs_set_at_path="set_at_path" in field_helpers, + base_row_sparse=sparse_repr, + base_row_populated=populated_repr, + scenarios=all_scenarios, + expression_import=expression_import, + support_prefix=support_prefix, + ) + + +def _scenario_entry( + *, + scenario_id: str, + scaffold: dict[str, Any], + mutate_expr: str, + expected_field: str, + expected_check: str, +) -> list[tuple[str, str]]: + """Build a rendered Scenario kwargs list for the test_module template.""" + return [ + ("id", py_literal(scenario_id)), + ("scaffold", py_literal(scaffold)), + ("mutate", mutate_expr), + ("expected_field", py_literal(expected_field)), + ("expected_check", py_literal(expected_check)), + ] + + +class _MutateExpr(NamedTuple): + """One rendered `mutate=` expression and the helper it imports. + + `helper` is `None` when the expression is a literal `set_at_path` + call (the default), and otherwise names a `mutate_*` helper from + `tests/_support/mutations.py` to import. + """ + + expr: str + helper: str | None + + +def _field_mutate_expr( + check: Check, desc: ExpressionDescriptor, spec: FeatureSpec | None +) -> _MutateExpr: + """Render the `mutate=` expression for one field-check descriptor. + + `check_struct_unique` calls the `mutate_unique_items` helper at the + target path; every other descriptor injects a constraint-violating + literal via `set_at_path`. + """ + target_repr = py_literal(str(check.target)) + if desc.function == "check_struct_unique": + return _MutateExpr( + f"lambda row: mutate_unique_items(row, {target_repr})", + "mutate_unique_items", + ) + iv_val = _wrap_for_list_leaf(invalid_value(desc), check, spec) + return _MutateExpr(f"set_at_path({target_repr}, {py_literal(iv_val)})", None) + + +def _render_field_check_scenarios( + feature_name: str, + field_checks: list[Check], + spec: FeatureSpec | None, +) -> tuple[list[list[tuple[str, str]]], set[str]]: + """Render Scenario entries for field-level checks. + + Returns the entries and the set of mutation helper names referenced + by them, mirroring `_render_model_scenarios`. + """ + rows: list[tuple[Check, ExpressionDescriptor, str, str]] = [] + for check in field_checks: + label = field_label(check) + for desc in check.descriptors: + name = check_name(desc.function, desc.check_name) + rows.append((check, desc, label, name)) + + scenario_ids = disambiguate( + [f"{feature_name}::{label}:{name}" for _check, _desc, label, name in rows] + ) + + entries: list[list[tuple[str, str]]] = [] + used_helpers: set[str] = set() + for (check, desc, label, name), scenario_id in zip(rows, scenario_ids, strict=True): + scaffold = generate_scaffold(check, spec) if spec is not None else {} + try: + mutate = _field_mutate_expr(check, desc, spec) + except ValueError as exc: + raise ValueError( + f"Cannot render mutate expression for {scenario_id}: {exc}" + ) from exc + used_helpers.add(mutate.helper or "set_at_path") + entries.append( + _scenario_entry( + scenario_id=scenario_id, + scaffold=scaffold, + mutate_expr=mutate.expr, + expected_field=label, + expected_check=name, + ) + ) + + return entries, used_helpers + + +def _checks_array_element(check: Check) -> bool: + """True when the check fires on each element of an `ArrayPath` directly. + + The check target ends at the array (`leaf=()`), so the mutation + replaces an array element rather than a struct field on one. For + these checks, a `None` invalid value still needs list wrapping; for + nested struct fields, `None` already sits at the right level. + """ + return isinstance(check.target, ArrayPath) and not check.target.leaf + + +def _wrap_for_list_leaf( + value: object, + check: Check, + spec: FeatureSpec | None, +) -> object: + """Wrap a scalar invalid value to match the field's list nesting depth.""" + if spec is None or isinstance(value, list): + return value + if value is None and not _checks_array_element(check): + return value + depth = leaf_list_depth(check.target, spec) + for _ in range(depth): + value = [value] + return value + + +def _render_model_scenarios( + feature_name: str, + model_checks: list[ModelCheck], + spec: FeatureSpec | None, +) -> tuple[list[list[tuple[str, str]]], set[str]]: + """Render Scenario entries for model-level checks. + + Returns the entries and the set of mutation helper names referenced + by them, so the caller can scope the test module's imports. + """ + entries: list[list[tuple[str, str]]] = [] + used_mutation_fns: set[str] = set() + label_suffixes = compute_label_suffixes(model_checks) + + for idx, mc in enumerate(model_checks): + desc = mc.descriptor + fn = model_constraint_function(desc) + mutation_fn = model_mutation_function(desc) + name = check_name(fn) + scenario_id = f"{feature_name}::model:{name}:{idx}" + label = model_constraint_field_label(mc, label_suffixes[idx]) + scaffold = generate_model_scaffold(mc, spec) if spec is not None else {} + + try: + call = _render_mutation_call(mutation_fn, desc, mc) + except ValueError as exc: + raise ValueError( + f"Cannot render mutation call for {scenario_id}: {exc}" + ) from exc + mutate_expr = f"lambda row: {call}" + used_mutation_fns.add(mutation_fn) + entries.append( + _scenario_entry( + scenario_id=scenario_id, + scaffold=scaffold, + mutate_expr=mutate_expr, + expected_field=label, + expected_check=name, + ) + ) + + return entries, used_mutation_fns + + +def _render_mutation_call( + mutation_fn: str, + desc: ModelConstraintDescriptor, + check: ModelCheck, +) -> str: + """Render a model mutation helper function call.""" + fields_repr = py_literal(list(desc.field_names)) + + match desc: + case RequireIf() | ForbidIf(): + return _render_conditional_mutation_call( + mutation_fn, desc, check, fields_repr + ) + case RadioGroup(): + if isinstance(check.target, ArrayPath): + raise ValueError( + "mutate_radio_group does not accept array_path " + f"(target={check.target!r})" + ) + return f"{mutation_fn}(row, {fields_repr})" + case RequireAnyOf() | MinFieldsSet(): + parts = _array_kwargs_leaf(check, mutation_fn) + suffix = ", " + ", ".join(parts) if parts else "" + return f"{mutation_fn}(row, {fields_repr}{suffix})" + assert_never(desc) + + +def _render_conditional_mutation_call( + mutation_fn: str, + desc: RequireIf | ForbidIf, + check: ModelCheck, + fields_repr: str, +) -> str: + """Render a mutate_require_if or mutate_forbid_if call.""" + parsed = parse_field_eq(desc.condition) + fn = model_constraint_function(desc) + if parsed is None: + raise ValueError( + f"{fn} condition {desc.condition!r} is not a " + "FieldEqCondition or Not(FieldEqCondition); cannot render " + f"{mutation_fn} call" + ) + fill = _render_fill_values(desc) if isinstance(desc, ForbidIf) else None + kwarg_parts: list[str] = [] + if parsed.negated: + kwarg_parts.append("negate=True") + if fill: + kwarg_parts.append(f"fill_values={fill}") + kwarg_parts.extend(_array_kwargs_inner(check, mutation_fn)) + suffix = ", " + ", ".join(kwarg_parts) if kwarg_parts else "" + return ( + f"{mutation_fn}(row, {fields_repr}, " + f"{py_literal(parsed.field_name)}, {py_literal(parsed.value)}{suffix})" + ) + + +def _fill_value_literal(shape: FieldShape) -> str: + """Return a Python source literal for a type-appropriate non-null fill value.""" + if has_array_layer(shape): + return "[{}]" + return "{}" + + +def _render_fill_values(desc: ForbidIf) -> str | None: + """Render a `fill_values` dict literal for non-string ForbidIf targets.""" + if not desc.field_shapes: + return None + items = [ + f"{py_literal(name)}: {_fill_value_literal(shape)}" + for name, shape in desc.field_shapes + ] + return "{" + ", ".join(items) + "}" + + +def _array_kwargs_leaf(check: ModelCheck, mutation_fn: str) -> list[str]: + """Array kwargs for mutations accepting `struct_path` (a trailing leaf). + + Yields `array_path=...` and optionally `struct_path=...`. Inner array + iteration is rejected -- these mutations consume only the outermost + array level. + """ + if not isinstance(check.target, ArrayPath): + return [] + inner_struct_paths = check.target.iter_struct_paths + leaf_path = check.target.leaf + + if inner_struct_paths: + raise ValueError( + f"{mutation_fn} does not accept inner_array_path " + f"(inner struct paths={inner_struct_paths!r})" + ) + + kwargs = [f'array_path="{check.target.column_path}"'] + if leaf_path: + if len(leaf_path) > 1: + raise ValueError( + f"multi-segment leaf_path {leaf_path!r} not supported by " + f"{mutation_fn} (struct_path must be a single segment)" + ) + kwargs.append(f'struct_path="{leaf_path[0]}"') + return kwargs + + +def _array_kwargs_inner(check: ModelCheck, mutation_fn: str) -> list[str]: + """Array kwargs for mutations accepting `inner_array_path`. + + Yields `array_path=...` and optionally `inner_array_path=...`. A + trailing leaf path is rejected -- these mutations target an inner + array directly, not a struct field on its elements. + """ + if not isinstance(check.target, ArrayPath): + return [] + inner_struct_paths = check.target.iter_struct_paths + leaf_path = check.target.leaf + + if leaf_path: + raise ValueError( + f"{mutation_fn} does not accept struct_path (leaf_path={leaf_path!r})" + ) + + kwargs = [f'array_path="{check.target.column_path}"'] + if inner_struct_paths: + if len(inner_struct_paths) > 1: + raise ValueError( + f"multi-level inner struct paths {inner_struct_paths!r} not supported by " + f"{mutation_fn} (inner_array_path consumes one iteration)" + ) + if not inner_struct_paths[0]: + raise ValueError( + f"empty inner struct path not supported by {mutation_fn} " + f"(target={check.target!r}); nested-iteration arrays without " + f"intermediate struct fields cannot be addressed via inner_array_path" + ) + kwargs.append(f'inner_array_path="{".".join(inner_struct_paths[0])}"') + return kwargs diff --git a/packages/overture-schema-codegen/tests/codegen_test_support.py b/packages/overture-schema-codegen/tests/codegen_test_support.py index 2a18faf13..4bdea9f62 100644 --- a/packages/overture-schema-codegen/tests/codegen_test_support.py +++ b/packages/overture-schema-codegen/tests/codegen_test_support.py @@ -12,19 +12,27 @@ from typing import Annotated, Generic, Literal, NewType, TypeVar import pytest +from annotated_types import MinLen +from overture.schema.codegen.extraction.field import LiteralScalar, Primitive +from overture.schema.codegen.extraction.field_walk import terminal_of from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.pydantic_extraction import extract_pydantic_type from overture.schema.codegen.extraction.specs import ( AnnotatedField, EnumMemberSpec, EnumSpec, + FeatureSpec, FieldSpec, + MemberSpec, ModelSpec, TypeIdentity, UnionSpec, is_model_class, + is_union_alias, + partitions_from_tags, ) -from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind +from overture.schema.codegen.extraction.union_extraction import extract_union +from overture.schema.codegen.layout.module_layout import entry_point_class from overture.schema.system.discovery import ( TagSelector, discover_models, @@ -33,7 +41,7 @@ from overture.schema.system.discovery.tag import get_values_for_key from overture.schema.system.doc import DocumentedEnum from overture.schema.system.field_constraint import UniqueItemsConstraint -from overture.schema.system.model_constraint import require_any_of +from overture.schema.system.model_constraint import radio_group, require_any_of from overture.schema.system.primitive import ( Geometry, GeometryType, @@ -45,7 +53,7 @@ from overture.schema.system.string import HexColor, LanguageTag, StrippedString from pydantic import BaseModel, EmailStr, Field, HttpUrl -STR_TYPE = TypeInfo(base_type="str", kind=TypeKind.PRIMITIVE) +STR_TYPE = Primitive(base_type="str") ThemeT = TypeVar("ThemeT") TypeT = TypeVar("TypeT") @@ -210,6 +218,20 @@ class FeatureWithUrl(FeatureBase[Literal["test"], Literal["linked"]]): emails: list[EmailStr] | None = None +class DatasetEntry(BaseModel): + """A dataset with required URL fields.""" + + name: str = Field(description="Dataset name") + url: HttpUrl + download_urls: list[HttpUrl] | None = None + + +class FeatureWithRequiredUrl(FeatureBase[Literal["test"], Literal["urlreq"]]): + """A feature with required URL fields at multiple nesting levels.""" + + datasets: list[DatasetEntry] + + HTTP_URL_SPEC = extract_pydantic_type(HttpUrl) EMAIL_STR_SPEC = extract_pydantic_type(EmailStr) @@ -243,6 +265,49 @@ class WaterSegment(SegmentBase): ] +class ShortNamesSegment(SegmentBase): + """Segment variant whose `aliases` requires at least one entry.""" + + subtype: Literal["short"] + aliases: Annotated[list[str], Field(min_length=1)] | None = None + + +class LongNamesSegment(SegmentBase): + """Segment variant whose `aliases` requires at least five entries.""" + + subtype: Literal["long"] + aliases: Annotated[list[str], Field(min_length=5)] | None = None + + +TestSegmentDivergingConstraints = Annotated[ + ShortNamesSegment | LongNamesSegment, + Field(description="Union whose members declare diverging field constraints"), +] + + +class VehicleKind(str, Enum): + """Vehicle classification.""" + + CAR = "car" + BIKE = "bike" + + +class CarVariant(SegmentBase): + subtype: Literal[VehicleKind.CAR] + doors: int | None = None + + +class BikeVariant(SegmentBase): + subtype: Literal[VehicleKind.BIKE] + has_basket: bool | None = None + + +TestEnumDiscriminatorUnion = Annotated[ + CarVariant | BikeVariant, + Field(description="Union with enum-valued discriminator", discriminator="subtype"), +] + + class ContactInfo(BaseModel): """Contact information for a venue.""" @@ -273,16 +338,23 @@ def make_union_spec( common_base: type[BaseModel] | None = None, entry_point: str | None = None, ) -> UnionSpec: - """Build a UnionSpec with sensible defaults for tests.""" + """Build a UnionSpec with sensible defaults for tests. + + `member_specs` is derived from `members` via `extract_model`, matching + what `extract_union` produces, so specs built here behave the same + through `_model_checks_for_union` and the base-row generators. + """ + members = members or [] return UnionSpec( name=name, description=description, annotated_fields=annotated_fields or [], - members=members or [], + members=members, discriminator_field=None, discriminator_mapping=None, source_annotation=source_annotation, common_base=common_base or BaseModel, + member_specs=[MemberSpec(m, extract_model(m)) for m in members], entry_point=entry_point, ) @@ -333,8 +405,9 @@ def assert_literal_field( ) -> None: """Assert a field is a single-value Literal with the expected value.""" field = find_field(spec, field_name) - assert field.type_info.kind == TypeKind.LITERAL - assert field.type_info.literal_values == (expected_value,) + terminal = terminal_of(field.shape) + assert isinstance(terminal, LiteralScalar) + assert terminal.values == (expected_value,) def flat_specs_from_discovery( @@ -354,6 +427,67 @@ def flat_specs_from_discovery( return result +class LiteralSubtypeModel(BaseModel): + """Model with a required Literal field and an optional string.""" + + subtype: Literal["a", "b", "c"] + name: str | None = None + + +class TripleInnerModel(BaseModel): + tag: Annotated[str, MinLen(1)] + + +class TripleNestedArrayModel(BaseModel): + deep: list[list[list[TripleInnerModel]]] + + +@radio_group("a", "b") +class RadioModel(BaseModel): + a: bool = False + b: bool = False + + +@require_any_of("x", "y") +class RequireAnyModel(BaseModel): + x: str | None = None + y: str | None = None + + +def discover_feature(class_name: str) -> FeatureSpec: + """Discover and extract a feature spec by class name.""" + models = discover_models() + for key, entry in models.items(): + partitions = partitions_from_tags(key.tags) + if is_model_class(entry) and entry.__name__ == class_name: + return extract_model( + entry, entry_point=key.entry_point, partitions=partitions + ) + if is_union_alias(entry) and entry_point_class(key.entry_point) == class_name: + return extract_union( + entry_point_class(key.entry_point), + entry, + entry_point=key.entry_point, + partitions=partitions, + ) + raise LookupError(f"{class_name} not found in discovered models") + + +def feature_spec_for_model( + cls: type[BaseModel], + *, + entry_point: str | None = None, + partitions: Mapping[str, str] | None = None, +) -> ModelSpec: + """Extract a model class for tests; sub-specs are populated by extract_model.""" + return extract_model(cls, entry_point=entry_point, partitions=partitions) + + +def union_spec_for(name: str, union_type: object) -> UnionSpec: + """Extract a discriminated-union annotation for tests.""" + return extract_union(name, union_type) + + def assert_golden(actual: str, golden_path: Path, *, update: bool) -> None: """Compare rendered output against a golden file. diff --git a/packages/overture-schema-codegen/tests/test_cli.py b/packages/overture-schema-codegen/tests/test_cli.py index 606e1837f..d81843027 100644 --- a/packages/overture-schema-codegen/tests/test_cli.py +++ b/packages/overture-schema-codegen/tests/test_cli.py @@ -438,6 +438,62 @@ def test_segment_appears_in_markdown_output( assert "subtype" in content +class TestCliGeneratePyspark: + def test_pyspark_format_accepted(self, cli_runner: CliRunner) -> None: + """pyspark format should be a valid --format choice.""" + result = cli_runner.invoke(cli, ["generate", "--format", "pyspark"]) + assert "Invalid value" not in (result.output or "") + assert result.exit_code == 0 + + def test_pyspark_to_output_dir(self, cli_runner: CliRunner, tmp_path: Path) -> None: + """pyspark format with --output-dir should create expression files.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "pyspark", + "--tag", + "overture:theme=divisions", + "--output-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 0 + py_files = list(tmp_path.rglob("*.py")) + assert len(py_files) > 0 + names = {f.stem for f in py_files} + assert "division_area" in names + + def test_pyspark_writes_under_entry_point_namespace( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """Expression modules land under the entry-point namespace, no extra `expressions/` wrapper.""" + output_dir = tmp_path / "expressions" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "pyspark", + "--tag", + "overture:theme=divisions", + "--output-dir", + str(output_dir), + ], + ) + assert result.exit_code == 0 + + # Modules land under the entry-point namespace. + assert (output_dir / "overture" / "schema" / "divisions").is_dir() + + # No nested expressions/ subdirectory. + nested = output_dir / "expressions" + assert not nested.exists(), ( + f"Nested expressions/ directory found: {list(nested.iterdir())}" + ) + + class TestReverseReferences: """Integration test: Reverse references appear in generated markdown.""" diff --git a/packages/overture-schema-codegen/tests/test_constraint_description.py b/packages/overture-schema-codegen/tests/test_constraint_description.py index cd31f2554..4ae9f2dff 100644 --- a/packages/overture-schema-codegen/tests/test_constraint_description.py +++ b/packages/overture-schema-codegen/tests/test_constraint_description.py @@ -1,10 +1,16 @@ """Tests for constraint description (model-level and field-level).""" -from annotated_types import Ge, Gt, Interval, Le, Lt, MaxLen, MinLen +from annotated_types import Ge, Gt, Interval, Le, Lt from overture.schema.codegen.extraction.field_constraints import ( constraint_display_text, describe_field_constraint, ) +from overture.schema.codegen.extraction.length_constraints import ( + ArrayMaxLen, + ArrayMinLen, + ScalarMaxLen, + ScalarMinLen, +) from overture.schema.codegen.extraction.model_constraints import ( analyze_model_constraints, ) @@ -339,11 +345,27 @@ def test_gt(self) -> None: def test_lt(self) -> None: assert describe_field_constraint(Lt(lt=100)) == "`< 100`" - def test_min_len(self) -> None: - assert describe_field_constraint(MinLen(min_length=1)) == "Minimum length: 1" + def test_scalar_min_len(self) -> None: + assert ( + describe_field_constraint(ScalarMinLen(min_length=1)) == "Minimum length: 1" + ) + + def test_array_min_len(self) -> None: + assert ( + describe_field_constraint(ArrayMinLen(min_length=1)) == "Minimum length: 1" + ) - def test_max_len(self) -> None: - assert describe_field_constraint(MaxLen(max_length=10)) == "Maximum length: 10" + def test_scalar_max_len(self) -> None: + assert ( + describe_field_constraint(ScalarMaxLen(max_length=10)) + == "Maximum length: 10" + ) + + def test_array_max_len(self) -> None: + assert ( + describe_field_constraint(ArrayMaxLen(max_length=10)) + == "Maximum length: 10" + ) def test_interval_closed(self) -> None: assert describe_field_constraint(Interval(ge=0, le=100)) == "`0 ≤ x ≤ 100`" diff --git a/packages/overture-schema-codegen/tests/test_example_loader.py b/packages/overture-schema-codegen/tests/test_example_loader.py index 1f94bc06d..541b81282 100644 --- a/packages/overture-schema-codegen/tests/test_example_loader.py +++ b/packages/overture-schema-codegen/tests/test_example_loader.py @@ -9,6 +9,7 @@ from typing import Annotated, Literal import pytest +from overture.schema.buildings.building import Building from overture.schema.codegen.extraction.examples import ( ExampleRecord, _inject_literal_fields, @@ -20,7 +21,14 @@ resolve_pyproject_path, validate_example, ) +from overture.schema.system.primitive import BBox, Geometry +from overture.schema.transportation import Segment +from overture.schema.transportation.segment.models import ( + RoadSegment, + TransportationSegment, +) from pydantic import BaseModel, ConfigDict, Field, Tag, ValidationError +from shapely.geometry import Point class TestOrderExampleRows: @@ -639,10 +647,6 @@ class TestIntegration: def test_real_building_examples_validate(self) -> None: """Validate real Building examples from the schema package.""" - pytest.importorskip("overture.schema.buildings.building") - - from overture.schema.buildings.building import Building # noqa: PLC0415 - pyproject_path = resolve_pyproject_path(Building) assert pyproject_path is not None, "Could not find pyproject.toml for Building" @@ -657,14 +661,6 @@ def test_real_building_examples_validate(self) -> None: def test_real_segment_examples_validate(self) -> None: """Validate real Segment examples (discriminated union with cross-arm fields).""" - pytest.importorskip("overture.schema.transportation") - - from overture.schema.transportation import Segment # noqa: PLC0415 - from overture.schema.transportation.segment.models import ( # noqa: PLC0415 - RoadSegment, - TransportationSegment, - ) - pyproject_path = resolve_pyproject_path(RoadSegment) assert pyproject_path is not None @@ -889,7 +885,6 @@ class Aliased(BaseModel): def test_slots_based_field_flattened(self) -> None: """Non-BaseModel types with __slots__ and properties are flattened.""" - from overture.schema.system.primitive import BBox # noqa: PLC0415 class WithBBox(BaseModel): id: str @@ -907,7 +902,6 @@ class WithBBox(BaseModel): def test_none_slots_based_field_is_leaf(self) -> None: """A slots-based field with None value is a leaf.""" - from overture.schema.system.primitive import BBox # noqa: PLC0415 class WithBBox(BaseModel): id: str @@ -919,8 +913,6 @@ class WithBBox(BaseModel): def test_single_slot_wrapper_is_leaf(self) -> None: """Single-slot types (wrappers like Geometry) are leaf values.""" - from overture.schema.system.primitive import Geometry # noqa: PLC0415 - from shapely.geometry import Point # noqa: PLC0415 class WithGeom(BaseModel): id: str diff --git a/packages/overture-schema-codegen/tests/test_field_walk.py b/packages/overture-schema-codegen/tests/test_field_walk.py new file mode 100644 index 000000000..d0d493cf9 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_field_walk.py @@ -0,0 +1,164 @@ +"""Tests for the `FieldShape` walker and structural helpers.""" + +import pytest +from overture.schema.codegen.extraction.field import ( + AnyScalar, + ArrayOf, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + UnionRef, +) +from overture.schema.codegen.extraction.field_walk import ( + shape_children, + terminal_model_ref, + terminal_of, + terminal_primitive, + terminal_scalar, + walk_shape, +) + + +class TestShapeChildren: + """Direct child enumeration over `FieldShape`.""" + + def test_scalar_has_no_children(self) -> None: + assert list(shape_children(Primitive(base_type="str"))) == [] + + def test_array_yields_element(self) -> None: + inner = Primitive(base_type="int32") + assert list(shape_children(ArrayOf(element=inner))) == [inner] + + def test_map_yields_key_then_value(self) -> None: + k = Primitive(base_type="str") + v = Primitive(base_type="int32") + assert list(shape_children(MapOf(key=k, value=v))) == [k, v] + + def test_model_ref_has_no_children(self) -> None: + sentinel = object() + assert list(shape_children(ModelRef(model=sentinel))) == [] # type: ignore[arg-type] + + def test_union_ref_has_no_children(self) -> None: + sentinel = object() + assert list(shape_children(UnionRef(union=sentinel))) == [] # type: ignore[arg-type] + + def test_newtype_shape_yields_inner(self) -> None: + inner = Primitive(base_type="int32") + nt = NewTypeShape(name="N", ref=object(), inner=inner) + assert list(shape_children(nt)) == [inner] + + +class TestWalkShape: + """Pre-order traversal over `FieldShape` trees.""" + + @staticmethod + def _collect(root: object) -> list[object]: + seen: list[object] = [] + walk_shape(root, seen.append) # type: ignore[arg-type] + return seen + + def test_scalar_visits_once(self) -> None: + root = Primitive(base_type="str") + assert self._collect(root) == [root] + + def test_nested_arrays(self) -> None: + leaf = Primitive(base_type="int32") + middle = ArrayOf(element=leaf) + root = ArrayOf(element=middle) + assert self._collect(root) == [root, middle, leaf] + + def test_map_visits_self_key_value(self) -> None: + k = Primitive(base_type="str") + v = Primitive(base_type="int32") + root = MapOf(key=k, value=v) + assert self._collect(root) == [root, k, v] + + def test_model_ref_is_boundary(self) -> None: + sentinel = object() + root = ModelRef(model=sentinel) # type: ignore[arg-type] + assert self._collect(root) == [root] + + def test_union_ref_is_boundary(self) -> None: + sentinel = object() + root = UnionRef(union=sentinel) # type: ignore[arg-type] + assert self._collect(root) == [root] + + def test_array_of_newtype_walks_through(self) -> None: + leaf = Primitive(base_type="str") + nt = NewTypeShape(name="N", ref=object(), inner=leaf) + root = ArrayOf(element=nt) + assert self._collect(root) == [root, nt, leaf] + + +_STR = Primitive(base_type="str") +_INT = Primitive(base_type="int32") +_LITERAL = LiteralScalar(values=("a",)) +_ANY = AnyScalar() +_MODEL = ModelRef(model=object()) # type: ignore[arg-type] +_MAP = MapOf(key=_STR, value=_INT) +_NEWTYPE_STR = NewTypeShape(name="N", ref=object(), inner=_STR) +_ARRAY_NEWTYPE_STR = ArrayOf(element=_NEWTYPE_STR) + + +class TestTerminalFilters: + """`terminal_of` and the three typed `terminal_*` narrowing helpers.""" + + @pytest.mark.parametrize( + ("shape", "expected"), + [ + (_STR, _STR), + (ArrayOf(element=ArrayOf(element=_INT)), _INT), + (_NEWTYPE_STR, _STR), + (_ARRAY_NEWTYPE_STR, _STR), + (ArrayOf(element=_MODEL), _MODEL), + (_MAP, _MAP), + ], + ) + def test_terminal_of_unwraps_to_innermost( + self, shape: object, expected: object + ) -> None: + assert terminal_of(shape) is expected # type: ignore[arg-type] + + @pytest.mark.parametrize( + ("shape", "expected"), + [ + (_STR, _STR), + (ArrayOf(element=_INT), _INT), + (_NEWTYPE_STR, _STR), + (_LITERAL, None), + (_ANY, None), + (_MODEL, None), + ], + ) + def test_terminal_primitive(self, shape: object, expected: object) -> None: + assert terminal_primitive(shape) is expected # type: ignore[arg-type] + + @pytest.mark.parametrize( + ("shape", "expected"), + [ + (_STR, _STR), + (_LITERAL, _LITERAL), + (_ANY, _ANY), + (ArrayOf(element=_LITERAL), _LITERAL), + (_MODEL, None), + (_MAP, None), + ], + ) + def test_terminal_scalar(self, shape: object, expected: object) -> None: + assert terminal_scalar(shape) is expected # type: ignore[arg-type] + + @pytest.mark.parametrize( + ("shape", "expected"), + [ + (_MODEL, _MODEL), + (ArrayOf(element=_MODEL), _MODEL), + (NewTypeShape(name="N", ref=object(), inner=_MODEL), _MODEL), + (_STR, None), + (_LITERAL, None), + (_ANY, None), + ], + ) + def test_terminal_model_ref(self, shape: object, expected: object) -> None: + assert terminal_model_ref(shape) is expected # type: ignore[arg-type] diff --git a/packages/overture-schema-codegen/tests/test_golden_markdown.py b/packages/overture-schema-codegen/tests/test_golden_markdown.py index 42320ee69..e75eddcc5 100644 --- a/packages/overture-schema-codegen/tests/test_golden_markdown.py +++ b/packages/overture-schema-codegen/tests/test_golden_markdown.py @@ -18,14 +18,11 @@ Venue, Widget, assert_golden, + feature_spec_for_model, ) from overture.schema.codegen.extraction.enum_extraction import extract_enum -from overture.schema.codegen.extraction.model_extraction import ( - expand_model_tree, - extract_model, -) from overture.schema.codegen.extraction.newtype_extraction import extract_newtype -from overture.schema.codegen.extraction.specs import TypeIdentity +from overture.schema.codegen.extraction.specs import FeatureSpec, TypeIdentity from overture.schema.codegen.layout.type_collection import ( collect_all_supplementary_types, ) @@ -67,12 +64,10 @@ @pytest.fixture(scope="module") def reverse_refs() -> dict[TypeIdentity, list[UsedByEntry]]: """Compute reverse references for all test models.""" - feature_specs = [] + feature_specs: list[FeatureSpec] = [] for model_class, _ in FEATURE_CASES: assert isinstance(model_class, type) and issubclass(model_class, BaseModel) - spec = extract_model(model_class) - expand_model_tree(spec) - feature_specs.append(spec) + feature_specs.append(feature_spec_for_model(model_class)) all_specs = collect_all_supplementary_types(feature_specs) return compute_reverse_references(feature_specs, all_specs) @@ -89,8 +84,7 @@ def test_feature_golden( update_golden: bool, reverse_refs: dict[TypeIdentity, list[UsedByEntry]], ) -> None: - spec = extract_model(model_class) - expand_model_tree(spec) + spec = feature_spec_for_model(model_class) used_by = reverse_refs.get(spec.identity) actual = render_feature(spec, used_by=used_by) assert_golden(actual, GOLDEN_DIR / golden_filename, update=update_golden) diff --git a/packages/overture-schema-codegen/tests/test_integration_real_models.py b/packages/overture-schema-codegen/tests/test_integration_real_models.py index 9ed20d112..b0f90e266 100644 --- a/packages/overture-schema-codegen/tests/test_integration_real_models.py +++ b/packages/overture-schema-codegen/tests/test_integration_real_models.py @@ -5,7 +5,7 @@ """ import pytest -from codegen_test_support import assert_literal_field +from codegen_test_support import assert_literal_field, feature_spec_for_model from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.specs import ( FeatureSpec, @@ -15,7 +15,6 @@ is_model_class, is_union_alias, ) -from overture.schema.codegen.extraction.type_analyzer import TypeKind from overture.schema.codegen.extraction.union_extraction import extract_union from overture.schema.codegen.layout.module_layout import entry_point_class from overture.schema.codegen.markdown.pipeline import generate_markdown_pages @@ -29,21 +28,6 @@ class TestDiscoverModels: """Tests for model discovery.""" - def test_discover_models_returns_dict(self) -> None: - """discover_models() should return a dictionary.""" - models = discover_models() - assert isinstance(models, dict) - - def test_discover_models_finds_building( - self, building_class: type[BaseModel] - ) -> None: - """Should discover the Building model.""" - assert issubclass(building_class, BaseModel) - - def test_discover_models_finds_place(self, place_class: type[BaseModel]) -> None: - """Should discover the Place model.""" - assert issubclass(place_class, BaseModel) - def test_discover_models_returns_multiple_themes(self) -> None: """Should discover models from multiple themes.""" models = discover_models() @@ -68,11 +52,10 @@ def test_extract_building_has_fields(self, building_spec: ModelSpec) -> None: field_names = {f.name for f in building_spec.fields} assert "id" in field_names - def test_building_field_types_are_valid(self, building_spec: ModelSpec) -> None: - """All Building fields should have valid TypeInfo.""" + def test_building_field_shapes_are_present(self, building_spec: ModelSpec) -> None: + """Every Building field has a `FieldShape`.""" for field in building_spec.fields: - assert field.type_info is not None - assert field.type_info.kind in TypeKind + assert field.shape is not None class TestExtractPlaceModel: @@ -109,17 +92,12 @@ def test_no_analyze_type_crashes(self, all_discovered_models: dict) -> None: spec = extract_model(model_class) assert spec.name == model_class.__name__ - def test_all_field_types_resolved(self, all_discovered_models: dict) -> None: - """All fields should have resolved TypeInfo.""" + def test_all_field_shapes_resolved(self, all_discovered_models: dict) -> None: + """Every field of every discovered model carries a `FieldShape`.""" for model_class in filter_model_classes(all_discovered_models): spec = extract_model(model_class) for field in spec.fields: - assert field.type_info.base_type, ( - f"No base_type for {spec.name}.{field.name}" - ) - assert field.type_info.kind in TypeKind, ( - f"Invalid kind for {spec.name}.{field.name}" - ) + assert field.shape is not None, f"No shape for {spec.name}.{field.name}" class TestMarkdownRenderingRealModels: @@ -127,7 +105,7 @@ class TestMarkdownRenderingRealModels: def test_render_building_content(self, building_class: type[BaseModel]) -> None: """Building renders with title, field table, and expected fields.""" - markdown = render_feature(extract_model(building_class)) + markdown = render_feature(feature_spec_for_model(building_class)) assert "# Building" in markdown assert "| Name |" in markdown @@ -138,9 +116,7 @@ def test_render_building_content(self, building_class: type[BaseModel]) -> None: def test_render_all_models_without_crash(self, all_discovered_models: dict) -> None: """render_feature should not crash on any discovered model.""" for model_class in filter_model_classes(all_discovered_models): - markdown = render_feature(extract_model(model_class)) - assert isinstance(markdown, str) - assert len(markdown) > 0 + render_feature(feature_spec_for_model(model_class)) class TestDiscriminatedUnions: @@ -221,9 +197,8 @@ def test_segment_discriminator_extracted_from_callable( assert segment_spec.discriminator_field == "subtype" assert segment_spec.discriminator_mapping is not None assert len(segment_spec.discriminator_mapping) == 3 - # Keys are str(enum_member), e.g. "Subtype.ROAD" - road_key = next(k for k in segment_spec.discriminator_mapping if "ROAD" in k) - assert segment_spec.discriminator_mapping[road_key] is RoadSegment + # Keys are runtime string values, e.g. "road" + assert segment_spec.discriminator_mapping["road"] is RoadSegment def test_segment_common_base_is_base_model(self, segment_spec: UnionSpec) -> None: """Segment common_base is the shared base class.""" diff --git a/packages/overture-schema-codegen/tests/test_markdown_renderer.py b/packages/overture-schema-codegen/tests/test_markdown_renderer.py index 698f9d70a..92f1d0ac1 100644 --- a/packages/overture-schema-codegen/tests/test_markdown_renderer.py +++ b/packages/overture-schema-codegen/tests/test_markdown_renderer.py @@ -21,13 +21,11 @@ Sources, TreeNode, Venue, + feature_spec_for_model, make_union_spec, ) from overture.schema.codegen.extraction.examples import ExampleRecord -from overture.schema.codegen.extraction.model_extraction import ( - expand_model_tree, - extract_model, -) +from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.newtype_extraction import extract_newtype from overture.schema.codegen.extraction.specs import ( AnnotatedField, @@ -365,9 +363,7 @@ class ModelWithSources(BaseModel): sources: TestSources | None = None - spec = extract_model(ModelWithSources) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(ModelWithSources)) assert "`TestSources`" in result assert "(list, optional)" in result @@ -441,9 +437,7 @@ class Outer(BaseModel): inner: Inner - spec = extract_model(Outer) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(Outer)) assert "| `Inner` |" in result @@ -453,9 +447,7 @@ class TestRenderFeatureInlineExpansion: def test_direct_model_fields_expanded_with_dot_prefix(self) -> None: """Direct model field expands sub-fields with dot notation.""" - spec = extract_model(FeatureWithAddress) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(FeatureWithAddress)) assert "| `address.street` |" in result assert "| `address.city` |" in result @@ -463,18 +455,14 @@ def test_direct_model_fields_expanded_with_dot_prefix(self) -> None: def test_list_of_model_fields_expanded_with_bracket_dot_prefix(self) -> None: """List-of-model field expands sub-fields with []. notation.""" - spec = extract_model(FeatureWithSources) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(FeatureWithSources)) assert "| `sources[]` |" in result assert "| `sources[].dataset` |" in result def test_cycle_detection_prevents_infinite_recursion(self) -> None: """Recursive model emits parent row but does not recurse.""" - spec = extract_model(TreeNode) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(TreeNode)) # The parent field row appears assert "| `parent` |" in result @@ -491,9 +479,7 @@ def test_primitive_field_unchanged(self) -> None: def test_parent_row_preserved_before_expansion(self) -> None: """The parent field row still appears before expanded sub-fields.""" - spec = extract_model(FeatureWithAddress) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(FeatureWithAddress)) # Parent row for 'address' itself appears assert "| `address` |" in result @@ -589,13 +575,11 @@ def test_field_with_no_description_gets_constraint_note(self) -> None: class TestRenderFeatureFieldConstraints: - """Tests for field-level constraint annotation from TypeInfo.""" + """Tests for field-level constraint annotation from the field's shape.""" def test_venue_geometry_shows_allowed_types(self) -> None: """Venue's geometry field shows GeometryTypeConstraint as a note.""" - spec = extract_model(Venue) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(Venue)) lines = result.splitlines() geo_line = next(line for line in lines if "| `geometry` |" in line) @@ -603,8 +587,6 @@ def test_venue_geometry_shows_allowed_types(self) -> None: def test_venue_reference_links_when_context_available(self) -> None: """Reference constraint links the target type when LinkContext has the page.""" - spec = extract_model(Venue) - expand_model_tree(spec) ctx = LinkContext( page_path=PurePosixPath("music/venue.md"), registry={ @@ -613,7 +595,7 @@ def test_venue_reference_links_when_context_available(self) -> None: ) }, ) - result = render_feature(spec, link_ctx=ctx) + result = render_feature(feature_spec_for_model(Venue), link_ctx=ctx) lines = result.splitlines() ref_line = next(line for line in lines if "| `resident_ensemble` |" in line) @@ -622,9 +604,7 @@ def test_venue_reference_links_when_context_available(self) -> None: def test_venue_reference_unlinked_without_context(self) -> None: """Reference constraint renders as plain code when no LinkContext.""" - spec = extract_model(Venue) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(Venue)) lines = result.splitlines() ref_line = next(line for line in lines if "| `resident_ensemble` |" in line) @@ -1203,7 +1183,7 @@ def test_shared_fields_have_no_variant_tag(self) -> None: AnnotatedField( field_spec=FieldSpec( name="id", - type_info=STR_TYPE, + shape=STR_TYPE, description="ID", is_required=True, ), @@ -1217,17 +1197,21 @@ def test_shared_fields_have_no_variant_tag(self) -> None: def test_variant_fields_have_inline_tag(self) -> None: """Variant-specific fields get *(Variant)* tag.""" + + class RoadSegment(BaseModel): + pass + spec = make_union_spec( name="Segment", annotated_fields=[ AnnotatedField( field_spec=FieldSpec( name="speed_limit", - type_info=STR_TYPE, + shape=STR_TYPE, description=None, is_required=False, ), - variant_sources=("RoadSegment",), + variant_sources=(RoadSegment,), ), ], ) diff --git a/packages/overture-schema-codegen/tests/test_markdown_type_format.py b/packages/overture-schema-codegen/tests/test_markdown_type_format.py index e54426f5f..fc1b946a2 100644 --- a/packages/overture-schema-codegen/tests/test_markdown_type_format.py +++ b/packages/overture-schema-codegen/tests/test_markdown_type_format.py @@ -4,15 +4,18 @@ from pathlib import PurePosixPath from typing import Literal, NewType -from overture.schema.codegen.extraction.specs import FieldSpec, TypeIdentity -from overture.schema.codegen.extraction.type_analyzer import ( - TypeInfo, - TypeKind, - analyze_type, +from overture.schema.codegen.extraction.field import ( + AnyScalar, + ArrayOf, + LiteralScalar, + Scalar, + UnionRef, ) +from overture.schema.codegen.extraction.specs import FieldSpec, TypeIdentity +from overture.schema.codegen.extraction.type_analyzer import analyze_type from overture.schema.codegen.markdown.link_computation import LinkContext from overture.schema.codegen.markdown.type_format import ( - format_dict_type, + _registry_name, format_type, format_underlying_type, ) @@ -32,34 +35,34 @@ class TestFormatType: """Tests for format_type.""" def test_plain_str_renders_as_string(self) -> None: - ti = analyze_type(str) - assert format_type(_make_field(ti)) == "`string`" + assert format_type(_make_field(str)) == "`string`" def test_optional_adds_qualifier(self) -> None: - ti = analyze_type(str | None) - assert format_type(_make_field(ti, is_required=False)) == "`string` (optional)" + assert ( + format_type(_make_field(str | None, is_required=False)) + == "`string` (optional)" + ) def test_literal_renders_as_quoted_value(self) -> None: - ti = analyze_type(Literal["places"]) - assert format_type(_make_field(ti)) == '`"places"`' + assert format_type(_make_field(Literal["places"])) == '`"places"`' def test_multi_value_literal_renders_comma_separated(self) -> None: - ti = analyze_type(Literal["a", "b", "c"]) - assert format_type(_make_field(ti)) == '`"a"` \\| `"b"` \\| `"c"`' + assert ( + format_type(_make_field(Literal["a", "b", "c"])) + == '`"a"` \\| `"b"` \\| `"c"`' + ) def test_enum_without_context_renders_as_code(self) -> None: class Color(str, Enum): RED = "red" - ti = analyze_type(Color) - assert format_type(_make_field(ti)) == "`Color`" + assert format_type(_make_field(Color)) == "`Color`" def test_enum_with_link_context(self) -> None: class Color(str, Enum): RED = "red" - ti = analyze_type(Color) - field = _make_field(ti) + field = _make_field(Color) ctx = LinkContext( page_path=PurePosixPath("buildings/building/building.md"), registry={ @@ -69,55 +72,70 @@ class Color(str, Enum): assert format_type(field, ctx) == "[`Color`](../../types/enums/color.md)" def test_list_of_primitives(self) -> None: - ti = analyze_type(list[str]) - assert format_type(_make_field(ti)) == "`list`" + assert format_type(_make_field(list[str])) == "`list`" def test_nested_list_of_primitives(self) -> None: - ti = analyze_type(list[list[str]]) - assert format_type(_make_field(ti)) == "`list>`" + assert format_type(_make_field(list[list[str]])) == "`list>`" def test_registered_primitive_not_linked(self) -> None: - ti = analyze_type(int32) - result = format_type(_make_field(ti)) + result = format_type(_make_field(int32)) assert result == "`int32`" assert "](int32.md)" not in result -class TestFormatDictType: - """Tests for format_dict_type.""" - - def test_simple_dict_renders_as_map(self) -> None: - ti = analyze_type(dict[str, int]) - result = format_dict_type(ti) - assert result == "map" - - def test_dict_with_newtype_shows_semantic_name(self) -> None: - MyKey = NewType("MyKey", str) - ti = analyze_type(dict[MyKey, int]) - result = format_dict_type(ti) - assert result == "map" - - def _make_field( - ti: TypeInfo, *, name: str = "x", is_required: bool = True + annotation: object, + *, + name: str = "x", + is_required: bool = True, + is_optional: bool = False, ) -> FieldSpec: - """Build a FieldSpec for test convenience.""" - return FieldSpec(name=name, type_info=ti, description=None, is_required=is_required) + """Build a FieldSpec from an annotation for test convenience.""" + from overture.schema.codegen.extraction.field import FieldShape + + if isinstance(annotation, (Scalar, ArrayOf, UnionRef)): + shape: FieldShape = annotation # type: ignore[assignment] + else: + shape, resolved_optional, _ = analyze_type(annotation) + is_optional = is_optional or resolved_optional + return FieldSpec( + name=name, + shape=shape, + description=None, + is_required=is_required, + is_optional=is_optional, + ) + + +def _union_ref(members: list[type]) -> UnionRef: + """Build a UnionRef for tests without running through extract_union.""" + from overture.schema.codegen.extraction.specs import UnionSpec + from pydantic import BaseModel + + union_spec = UnionSpec( + name=members[0].__name__, + description=None, + annotated_fields=[], + members=members, # type: ignore[arg-type] + discriminator_field=None, + discriminator_mapping=None, + source_annotation=object(), + common_base=BaseModel, + ) + return UnionRef(union=union_spec) class TestFormatUnionType: - """Tests for UNION-kind TypeInfo in format_type.""" + """Tests for union FieldShape in format_type.""" def test_union_renders_all_members(self) -> None: - ti = analyze_type(_ModelA | _ModelB) - result = format_type(_make_field(ti)) + result = format_type(_make_field(_union_ref([_ModelA, _ModelB]))) assert "`_ModelA`" in result assert "`_ModelB`" in result # Pipe separator escaped for table cells assert r"\|" in result def test_union_with_link_context_links_each_member(self) -> None: - ti = analyze_type(_ModelA | _ModelB) ctx = LinkContext( page_path=PurePosixPath("theme/feature/feature.md"), registry={ @@ -129,39 +147,36 @@ def test_union_with_link_context_links_each_member(self) -> None: ), }, ) - result = format_type(_make_field(ti), ctx) + result = format_type(_make_field(_union_ref([_ModelA, _ModelB])), ctx) assert "[`_ModelA`](types/model_a.md)" in result assert "[`_ModelB`](types/model_b.md)" in result def test_optional_union_adds_qualifier(self) -> None: - ti = analyze_type(_ModelA | _ModelB | None) - result = format_type(_make_field(ti, is_required=False)) + result = format_type( + _make_field( + _union_ref([_ModelA, _ModelB]), is_required=False, is_optional=True + ) + ) assert "(optional)" in result assert "`_ModelA`" in result assert "`_ModelB`" in result def test_list_of_union_adds_qualifier(self) -> None: - ti = TypeInfo( - base_type="_ModelA", - kind=TypeKind.UNION, - list_depth=1, - union_members=(_ModelA, _ModelB), - ) - result = format_type(_make_field(ti)) + """list[union] renders with (list) qualifier.""" + shape = ArrayOf(element=_union_ref([_ModelA, _ModelB])) + result = format_type(_make_field(shape)) assert "(list)" in result assert "`_ModelA`" in result assert "`_ModelB`" in result def test_union_members_unlinked_without_context(self) -> None: - ti = analyze_type(_ModelA | _ModelB) - result = format_type(_make_field(ti)) + result = format_type(_make_field(_union_ref([_ModelA, _ModelB]))) # No markdown links without context assert "]()" not in result assert "[`" not in result def test_union_partial_links(self) -> None: """Members with pages get linked; members without don't.""" - ti = analyze_type(_ModelA | _ModelB) ctx = LinkContext( page_path=PurePosixPath("theme/feature/feature.md"), registry={ @@ -170,19 +185,45 @@ def test_union_partial_links(self) -> None: ) }, ) - result = format_type(_make_field(ti), ctx) + result = format_type(_make_field(_union_ref([_ModelA, _ModelB])), ctx) assert "[`_ModelA`](types/model_a.md)" in result assert "`_ModelB`" in result # _ModelB should NOT be linked assert "[`_ModelB`]" not in result +class TestScalarVariantRendering: + """format_type and _registry_name handle all three Scalar variants correctly.""" + + def test_registry_name_any_scalar(self) -> None: + assert _registry_name(AnyScalar()) == "Any" + + def test_registry_name_literal_scalar(self) -> None: + assert _registry_name(LiteralScalar(values=("road",))) == "Literal" + + def test_any_scalar_renders_as_Any(self) -> None: + assert format_type(_make_field(AnyScalar())) == "`Any`" + + def test_literal_scalar_renders_first_value_quoted(self) -> None: + # LiteralScalar goes through the Literal path in format_type, not _registry_name + assert format_type(_make_field(LiteralScalar(values=("road",)))) == '`"road"`' + + def test_literal_scalar_multi_value(self) -> None: + result = format_type(_make_field(LiteralScalar(values=("a", "b")))) + assert '`"a"`' in result + assert '`"b"`' in result + + def test_list_of_literal_single_value(self) -> None: + assert format_type(_make_field(list[Literal["road"]])) == '`list<"road">`' + + def test_list_of_literal_multi_value(self) -> None: + assert format_type(_make_field(list[Literal["a", "b"]])) == '`list<"a" | "b">`' + + class TestPydanticTypeLinking: """Tests for PRIMITIVE types with pages getting linked.""" def test_pydantic_type_linked_when_in_registry(self) -> None: - ti = analyze_type(HttpUrl) - field = _make_field(ti) ctx = LinkContext( page_path=PurePosixPath("places/place/place.md"), registry={ @@ -191,24 +232,20 @@ def test_pydantic_type_linked_when_in_registry(self) -> None: ) }, ) - result = format_type(field, ctx) + result = format_type(_make_field(HttpUrl), ctx) assert "[`HttpUrl`]" in result assert "pydantic/networks/http_url.md" in result def test_pydantic_type_unlinked_without_registry_entry(self) -> None: - ti = analyze_type(HttpUrl) - field = _make_field(ti) ctx = LinkContext( page_path=PurePosixPath("places/place/place.md"), registry={}, ) - result = format_type(field, ctx) + result = format_type(_make_field(HttpUrl), ctx) assert result == "`HttpUrl`" assert "[" not in result def test_list_of_pydantic_type_linked(self) -> None: - ti = analyze_type(list[HttpUrl]) - field = _make_field(ti) ctx = LinkContext( page_path=PurePosixPath("places/place/place.md"), registry={ @@ -217,14 +254,12 @@ def test_list_of_pydantic_type_linked(self) -> None: ) }, ) - result = format_type(field, ctx) + result = format_type(_make_field(list[HttpUrl]), ctx) assert "HttpUrl" in result assert "pydantic/networks/http_url.md" in result def test_registered_primitive_links_to_aggregate_page(self) -> None: """int32 links to the primitives aggregate page when in registry.""" - ti = analyze_type(int32) - field = _make_field(ti) ctx = LinkContext( page_path=PurePosixPath("places/place/place.md"), registry={ @@ -233,7 +268,7 @@ def test_registered_primitive_links_to_aggregate_page(self) -> None: ) }, ) - result = format_type(field, ctx) + result = format_type(_make_field(int32), ctx) assert "[`int32`]" in result assert "system/primitive/primitives.md" in result @@ -249,8 +284,7 @@ class TestListOfSemanticNewtype: def test_list_of_scalar_newtype_renders_list_syntax(self) -> None: """list[ScalarNewType] renders as list, not Name (list).""" ScalarNT = NewType("ScalarNT", str) - ti = analyze_type(list[ScalarNT]) - result = format_type(_make_field(ti)) + result = format_type(_make_field(list[ScalarNT])) assert "list<" in result assert "ScalarNT" in result assert "(list)" not in result @@ -258,23 +292,20 @@ def test_list_of_scalar_newtype_renders_list_syntax(self) -> None: def test_newtype_wrapping_list_renders_qualifier(self) -> None: """NewType wrapping list[X] renders as Name (list).""" ListNT = NewType("ListNT", list[str]) - ti = analyze_type(ListNT) - result = format_type(_make_field(ti)) + result = format_type(_make_field(ListNT)) assert "(list)" in result assert "ListNT" in result def test_list_of_scalar_newtype_with_link(self) -> None: """list[ScalarNewType] with link context renders linked list.""" ScalarNT = NewType("ScalarNT", str) - ti = analyze_type(list[ScalarNT]) - field = _make_field(ti) ctx = LinkContext( page_path=PurePosixPath("places/place/place.md"), registry={ TypeIdentity(ScalarNT, "ScalarNT"): PurePosixPath("system/scalar_nt.md") }, ) - result = format_type(field, ctx) + result = format_type(_make_field(list[ScalarNT]), ctx) assert "list<" in result assert "ScalarNT" in result assert "system/scalar_nt.md" in result @@ -283,8 +314,7 @@ def test_list_of_scalar_newtype_with_link(self) -> None: def test_nested_list_of_scalar_newtype_renders_nested_list_syntax(self) -> None: """list[list[ScalarNewType]] renders as list>.""" ScalarNT = NewType("ScalarNT", str) - ti = analyze_type(list[list[ScalarNT]]) - result = format_type(_make_field(ti)) + result = format_type(_make_field(list[list[ScalarNT]])) assert "list<" in result assert "list<`" in result or "`list None: class TestFormatUnderlyingUnionType: - """Tests for UNION-kind TypeInfo in format_underlying_type.""" + """Tests for union FieldShape in format_underlying_type.""" def test_union_renders_all_members(self) -> None: - ti = analyze_type(_ModelA | _ModelB) - result = format_underlying_type(ti) + shape = _union_ref([_ModelA, _ModelB]) + result = format_underlying_type(shape) assert result == "`_ModelA` | `_ModelB`" def test_union_with_link_context(self) -> None: - ti = analyze_type(_ModelA | _ModelB) + shape = _union_ref([_ModelA, _ModelB]) ctx = LinkContext( page_path=PurePosixPath("types/my_union.md"), registry={ @@ -312,6 +342,6 @@ def test_union_with_link_context(self) -> None: ), }, ) - result = format_underlying_type(ti, ctx) + result = format_underlying_type(shape, ctx) assert "[`_ModelA`](../theme/feature/types/model_a.md)" in result assert "[`_ModelB`](../theme/feature/types/model_b.md)" in result diff --git a/packages/overture-schema-codegen/tests/test_model_extraction.py b/packages/overture-schema-codegen/tests/test_model_extraction.py new file mode 100644 index 000000000..d5791ee61 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_model_extraction.py @@ -0,0 +1,43 @@ +"""Tests for `extract_model`.""" + +from overture.schema.codegen.extraction.field import ArrayOf, UnionRef +from overture.schema.codegen.extraction.field_walk import terminal_of +from overture.schema.codegen.extraction.length_constraints import ArrayMinLen +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.common.scoping.vehicle import VehicleSelector +from pydantic import BaseModel, Field + + +def test_extract_model_populates_union_terminal() -> None: + """`extract_model` resolves UNION terminals to a `UnionRef` carrying a `UnionSpec`.""" + + class Container(BaseModel): + items: list[VehicleSelector] + + spec = extract_model(Container) + items_field = next(f for f in spec.fields if f.name == "items") + + terminal = terminal_of(items_field.shape) + assert isinstance(terminal, UnionRef) + assert terminal.union.discriminator_field == "dimension" + + +def test_field_metadata_minlen_wrapped_as_array_min_len() -> None: + """MinLen in field_info.metadata is wrapped to ArrayMinLen, not left as raw MinLen. + + Pydantic strips the outermost Annotated wrapper from non-optional, + non-union list fields and moves MinLen to field_info.metadata. Without + routing through attach_constraints, the raw MinLen would survive into + the constraint table untyped, causing dispatch to raise TypeError at + codegen time. + """ + + class M(BaseModel): + items: list[str] = Field(min_length=2) + + spec = extract_model(M) + items_field = next(f for f in spec.fields if f.name == "items") + + assert isinstance(items_field.shape, ArrayOf) + constraints = [cs.constraint for cs in items_field.shape.constraints] + assert ArrayMinLen(min_length=2) in constraints diff --git a/packages/overture-schema-codegen/tests/test_model_extractor.py b/packages/overture-schema-codegen/tests/test_model_extractor.py index f2b2bd257..c033a19cf 100644 --- a/packages/overture-schema-codegen/tests/test_model_extractor.py +++ b/packages/overture-schema-codegen/tests/test_model_extractor.py @@ -12,11 +12,9 @@ assert_literal_field, find_field, ) -from overture.schema.codegen.extraction.model_extraction import ( - expand_model_tree, - extract_model, -) -from overture.schema.codegen.extraction.specs import ModelSpec +from overture.schema.codegen.extraction.field import ModelRef, Primitive +from overture.schema.codegen.extraction.field_walk import has_array_layer, terminal_of +from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.system.field_constraint import UniqueItemsConstraint from overture.schema.system.model_constraint import ( FieldEqCondition, @@ -90,7 +88,9 @@ class SimpleModel(BaseModel): assert result.description == "A simple test model." assert len(result.fields) == 1 assert result.fields[0].name == "name" - assert result.fields[0].type_info.base_type == "str" + scalar = terminal_of(result.fields[0].shape) + assert isinstance(scalar, Primitive) + assert scalar.base_type == "str" assert result.fields[0].is_required is True def test_extract_model_does_not_set_entry_point(self) -> None: @@ -118,7 +118,7 @@ class ModelWithOptional(BaseModel): nickname_field = find_field(result, "nickname") assert nickname_field.is_required is False - assert nickname_field.type_info.is_optional is True + assert nickname_field.is_optional is True def test_extract_model_with_field_description(self) -> None: """Should extract field descriptions from Field().""" @@ -144,8 +144,10 @@ class ModelWithList(BaseModel): tags_field = result.fields[0] assert tags_field.name == "tags" - assert tags_field.type_info.is_list is True - assert tags_field.type_info.base_type == "str" + assert has_array_layer(tags_field.shape) + scalar = terminal_of(tags_field.shape) + assert isinstance(scalar, Primitive) + assert scalar.base_type == "str" class TestExtractModelWithThemeType: @@ -365,115 +367,115 @@ class Child(Parent, ChildMixin): assert field_names == ["core", "p", "pm", "own", "cm"] -class TestExpandModelTree: - """Tests for expand_model_tree.""" +class TestSubModelExpansion: + """Sub-model resolution at extract_model time.""" def test_model_without_sub_models_unchanged(self) -> None: - """Fields without MODEL kind get model=None.""" + """Fields without MODEL kind have no ModelRef in their shape.""" class Simple(BaseModel): name: str count: int spec = extract_model(Simple) - expand_model_tree(spec) for f in spec.fields: - assert f.model is None - assert f.starts_cycle is False + assert not isinstance(terminal_of(f.shape), ModelRef) def test_nested_model_gets_expanded(self) -> None: - """MODEL-kind fields get their model populated.""" + """MODEL-kind fields resolve to a ModelRef in the shape.""" spec = extract_model(FeatureWithAddress) - expand_model_tree(spec) addr_field = find_field(spec, "address") - assert addr_field.model is not None - assert addr_field.model.name == "Address" - assert addr_field.starts_cycle is False + terminal = terminal_of(addr_field.shape) + assert isinstance(terminal, ModelRef) + assert terminal.model.name == "Address" + assert terminal.starts_cycle is False # Sub-model fields should exist - sub_names = [f.name for f in addr_field.model.fields] + sub_names = [f.name for f in terminal.model.fields] assert "street" in sub_names assert "city" in sub_names def test_cycle_detected_and_marked(self) -> None: - """Self-referential model gets starts_cycle=True.""" + """Self-referential model gets starts_cycle=True on the ModelRef.""" spec = extract_model(TreeNode) - expand_model_tree(spec) parent_field = find_field(spec, "parent") - assert parent_field.model is not None - assert parent_field.model is spec # Same object -- cycle - assert parent_field.starts_cycle is True + terminal = terminal_of(parent_field.shape) + assert isinstance(terminal, ModelRef) + assert terminal.model is spec # Same object -- cycle + assert terminal.starts_cycle is True - def test_shared_reference_not_marked_as_cycle(self) -> None: - """Two models referencing the same sub-model share it without cycle.""" + def test_shared_reference_within_one_extraction(self) -> None: + """Two fields referencing the same sub-model share the ModelSpec.""" class Shared(BaseModel): value: str - class ModelA(BaseModel): - ref: Shared - - class ModelB(BaseModel): - ref: Shared - - cache: dict[type, ModelSpec] = {} - spec_a = extract_model(ModelA) - expand_model_tree(spec_a, cache) - - spec_b = extract_model(ModelB) - expand_model_tree(spec_b, cache) + class Container(BaseModel): + first: Shared + second: Shared - ref_a = find_field(spec_a, "ref") - ref_b = find_field(spec_b, "ref") + spec = extract_model(Container) + first = find_field(spec, "first") + second = find_field(spec, "second") - # Same ModelSpec object, neither is a cycle - assert ref_a.model is ref_b.model - assert ref_a.starts_cycle is False - assert ref_b.starts_cycle is False + first_ref = terminal_of(first.shape) + second_ref = terminal_of(second.shape) + assert isinstance(first_ref, ModelRef) + assert isinstance(second_ref, ModelRef) + # Within one extract_model call, the cache ensures the same + # ModelSpec is reused for both references; neither is a cycle. + assert first_ref.model is second_ref.model + assert first_ref.starts_cycle is False + assert second_ref.starts_cycle is False def test_list_of_model_gets_expanded(self) -> None: - """list[Model] fields also get their model populated.""" + """list[Model] fields also get their model populated via ModelRef.""" class HasList(BaseModel): items: list[SourceItem] spec = extract_model(HasList) - expand_model_tree(spec) items_field = find_field(spec, "items") - assert items_field.model is not None - assert items_field.model.name == "SourceItem" + terminal = terminal_of(items_field.shape) + assert isinstance(terminal, ModelRef) + assert terminal.model.name == "SourceItem" class TestFieldInfoMetadataConstraints: - """Constraints from field_info.metadata are merged into TypeInfo. + """Constraints from `field_info.metadata` attach to the field's shape. Pydantic strips the Annotated wrapper from some fields and moves the - metadata to field_info.metadata. extract_model merges these back into - TypeInfo.constraints so they aren't silently dropped. + metadata to `field_info.metadata`. `extract_model` attaches these + constraints to the appropriate `FieldShape` layer so they aren't + silently dropped. """ def test_geometry_type_constraint_extracted(self) -> None: """GeometryTypeConstraint on geometry field should appear in constraints.""" + from overture.schema.codegen.extraction.field_walk import all_constraints + spec = extract_model(Venue) geometry_field = find_field(spec, "geometry") constraint_types = [ - type(cs.constraint) for cs in geometry_field.type_info.constraints + type(cs.constraint) for cs in all_constraints(geometry_field.shape) ] assert GeometryTypeConstraint in constraint_types def test_geometry_type_constraint_has_null_source(self) -> None: """Constraints from field_info.metadata have source_ref=None (not from a NewType).""" + from overture.schema.codegen.extraction.field_walk import all_constraints + spec = extract_model(Venue) geometry_field = find_field(spec, "geometry") geo_constraints = [ cs - for cs in geometry_field.type_info.constraints + for cs in all_constraints(geometry_field.shape) if isinstance(cs.constraint, GeometryTypeConstraint) ] assert len(geo_constraints) == 1 @@ -485,12 +487,14 @@ def test_metadata_constraints_not_duplicated(self) -> None: When field_info.metadata is empty (Pydantic kept the Annotated wrapper), no extra constraints are added. """ + from overture.schema.codegen.extraction.field_walk import all_constraints + spec = extract_model(Instrument) tags_field = find_field(spec, "tags") unique_constraints = [ cs - for cs in tags_field.type_info.constraints + for cs in all_constraints(tags_field.shape) if isinstance(cs.constraint, UniqueItemsConstraint) ] assert len(unique_constraints) == 1 @@ -498,6 +502,7 @@ def test_metadata_constraints_not_duplicated(self) -> None: def test_standalone_annotated_field_extracts_metadata(self) -> None: """Direct Annotated[Type, constraint] fields (non-optional, non-union) get their constraints from field_info.metadata.""" + from overture.schema.codegen.extraction.field_walk import all_constraints class Model(BaseModel): geo: Annotated[ @@ -509,7 +514,7 @@ class Model(BaseModel): geo_field = find_field(spec, "geo") constraint_types = [ - type(cs.constraint) for cs in geo_field.type_info.constraints + type(cs.constraint) for cs in all_constraints(geo_field.shape) ] assert GeometryTypeConstraint in constraint_types diff --git a/packages/overture-schema-codegen/tests/test_newtype_extraction.py b/packages/overture-schema-codegen/tests/test_newtype_extraction.py index 6cd73c5c2..150198668 100644 --- a/packages/overture-schema-codegen/tests/test_newtype_extraction.py +++ b/packages/overture-schema-codegen/tests/test_newtype_extraction.py @@ -3,6 +3,7 @@ from typing import Annotated, NewType from codegen_test_support import STR_TYPE +from overture.schema.codegen.extraction.field import ArrayOf from overture.schema.codegen.extraction.newtype_extraction import extract_newtype from overture.schema.codegen.extraction.specs import NewTypeSpec from overture.schema.system.field_constraint import UniqueItemsConstraint @@ -19,15 +20,23 @@ def test_extract_hex_color(self) -> None: spec = extract_newtype(HexColor) assert spec.name == "HexColor" - assert spec.type_info.newtype_name == "HexColor" + # Outermost NewTypeShape stripped; shape is the underlying scalar. + from overture.schema.codegen.extraction.field_walk import terminal_scalar + + assert terminal_scalar(spec.shape) is not None def test_extract_id(self) -> None: """Should extract Id NewType with nested chain.""" spec = extract_newtype(Id) assert spec.name == "Id" - assert spec.type_info.newtype_name == "Id" - assert spec.type_info.base_type == "NoWhitespaceString" + # Id wraps NoWhitespaceString, which is a registered semantic newtype + # resolving to a Scalar. After stripping "Id", shape is Scalar with + # base_type "NoWhitespaceString". + from overture.schema.codegen.extraction.field import Primitive + + assert isinstance(spec.shape, Primitive) + assert spec.shape.base_type == "NoWhitespaceString" def test_extract_newtype_wrapping_list(self) -> None: """Should extract a list-wrapping NewType.""" @@ -41,8 +50,8 @@ class Item(BaseModel): spec = extract_newtype(TestSources) assert spec.name == "TestSources" - assert spec.type_info.is_list is True - assert spec.type_info.newtype_name == "TestSources" + # After stripping the outer NewTypeShape("TestSources"), shape is ArrayOf. + assert isinstance(spec.shape, ArrayOf) def test_extract_newtype_without_doc_uses_field_description(self) -> None: """NewType with Field(description=...) but no __doc__ uses Field description.""" @@ -66,7 +75,7 @@ class TestNewTypeSpecSourceType: """Tests for source_type on NewTypeSpec.""" def test_newtype_spec_source_type_defaults_to_none(self) -> None: - spec = NewTypeSpec(name="Test", description=None, type_info=STR_TYPE) + spec = NewTypeSpec(name="Test", description=None, shape=STR_TYPE) assert spec.source_type is None def test_extract_newtype_sets_source_type(self) -> None: diff --git a/packages/overture-schema-codegen/tests/test_numeric_extraction.py b/packages/overture-schema-codegen/tests/test_numeric_extraction.py index ee604ba75..6f3a5767f 100644 --- a/packages/overture-schema-codegen/tests/test_numeric_extraction.py +++ b/packages/overture-schema-codegen/tests/test_numeric_extraction.py @@ -55,7 +55,7 @@ class TestExtractNumericBounds: def test_signed_integer_bounds(self) -> None: """Should extract ge/le from a constrained integer NewType.""" spec = extract_newtype(int32) - bounds = extract_numeric_bounds(spec.type_info) + bounds = extract_numeric_bounds(spec.shape) assert bounds.ge == -(2**31) assert bounds.le == 2**31 - 1 @@ -63,7 +63,7 @@ def test_signed_integer_bounds(self) -> None: def test_unsigned_integer_bounds(self) -> None: """Should extract 0-based bounds from unsigned NewType.""" spec = extract_newtype(uint8) - bounds = extract_numeric_bounds(spec.type_info) + bounds = extract_numeric_bounds(spec.shape) assert bounds.ge == 0 assert bounds.le == 255 @@ -71,7 +71,7 @@ def test_unsigned_integer_bounds(self) -> None: def test_int64_bounds(self) -> None: """Should extract large bounds from int64.""" spec = extract_newtype(int64) - bounds = extract_numeric_bounds(spec.type_info) + bounds = extract_numeric_bounds(spec.shape) assert bounds.ge == -(2**63) assert bounds.le == 2**63 - 1 @@ -79,7 +79,7 @@ def test_int64_bounds(self) -> None: def test_unconstrained_type(self) -> None: """Should return empty Interval for types without numeric constraints.""" spec = extract_newtype(float32) - bounds = extract_numeric_bounds(spec.type_info) + bounds = extract_numeric_bounds(spec.shape) assert bounds.ge is None assert bounds.gt is None @@ -91,8 +91,8 @@ def test_exclusive_bounds(self) -> None: ExclusiveBounded = NewType( "ExclusiveBounded", Annotated[int, Field(gt=0, lt=100)] ) - type_info = analyze_type(ExclusiveBounded) - bounds = extract_numeric_bounds(type_info) + shape, _, _ = analyze_type(ExclusiveBounded) + bounds = extract_numeric_bounds(shape) assert bounds.gt == 0 assert bounds.lt == 100 @@ -102,8 +102,8 @@ def test_exclusive_bounds(self) -> None: def test_mixed_bounds(self) -> None: """Should extract a mix of inclusive and exclusive bounds.""" MixedBounded = NewType("MixedBounded", Annotated[int, Field(ge=0, lt=256)]) - type_info = analyze_type(MixedBounded) - bounds = extract_numeric_bounds(type_info) + shape, _, _ = analyze_type(MixedBounded) + bounds = extract_numeric_bounds(shape) assert bounds.ge == 0 assert bounds.lt == 256 diff --git a/packages/overture-schema-codegen/tests/test_pyspark_base_row.py b/packages/overture-schema-codegen/tests/test_pyspark_base_row.py new file mode 100644 index 000000000..fadcd94fe --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_base_row.py @@ -0,0 +1,319 @@ +"""Tests for valid-row generation from FeatureSpecs.""" + +import uuid + +import pytest +from codegen_test_support import ( + FeatureWithRequiredUrl, + discover_feature, + feature_spec_for_model, +) +from overture.schema.codegen.extraction.field import AnyScalar, LiteralScalar, ModelRef +from overture.schema.codegen.extraction.specs import ( + FeatureSpec, + FieldSpec, + UnionSpec, +) +from overture.schema.codegen.pyspark.test_data.base_row import ( + _primitive_default, + generate_arm_rows, + generate_base_row, + generate_populated_arm_rows, + generate_populated_row, + value_for_field, +) +from pydantic import HttpUrl, TypeAdapter + + +@pytest.fixture(scope="module") +def connector_spec() -> FeatureSpec: + return discover_feature("Connector") + + +@pytest.fixture(scope="module") +def segment_spec() -> FeatureSpec: + return discover_feature("Segment") + + +@pytest.fixture(scope="module") +def segment_union(segment_spec: FeatureSpec) -> UnionSpec: + assert isinstance(segment_spec, UnionSpec) + return segment_spec + + +class TestPrimitiveDefault: + """Primitive defaults for string-like types that need valid placeholders.""" + + def test_http_url_is_valid(self) -> None: + val = _primitive_default("HttpUrl") + TypeAdapter(HttpUrl).validate_python(val) + + def test_email_str_contains_at(self) -> None: + val = _primitive_default("EmailStr") + assert isinstance(val, str) + assert "@" in val + + +class TestBaseRowUrlFields: + """Base rows with URL-typed fields produce Pydantic-valid values.""" + + def test_required_url_field_passes_validation(self) -> None: + spec = feature_spec_for_model(FeatureWithRequiredUrl) + row = generate_base_row(spec) + TypeAdapter(FeatureWithRequiredUrl).validate_python(row) + + +class TestGenerateBaseRow: + def test_passes_pydantic_validation(self, connector_spec: FeatureSpec) -> None: + row = generate_base_row(connector_spec) + assert connector_spec.source_type is not None + TypeAdapter(connector_spec.source_type).validate_python(row) + + def test_required_fields_present(self, connector_spec: FeatureSpec) -> None: + row = generate_base_row(connector_spec) + required_names = {f.name for f in connector_spec.fields if f.is_required} + assert required_names <= set(row.keys()) + + def test_optional_fields_absent(self, connector_spec: FeatureSpec) -> None: + row = generate_base_row(connector_spec) + optional_names = {f.name for f in connector_spec.fields if not f.is_required} + assert optional_names.isdisjoint(set(row.keys())) + + def test_id_is_deterministic_uuid(self, connector_spec: FeatureSpec) -> None: + row = generate_base_row(connector_spec) + assert "id" in row + parsed = uuid.UUID(row["id"]) + assert parsed.version == 5 + + def test_geometry_is_valid_wkt(self, connector_spec: FeatureSpec) -> None: + row = generate_base_row(connector_spec) + assert "geometry" in row + assert row["geometry"].startswith("POINT") + + +class TestGenerateArmRows: + def test_returns_dict_per_arm( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_arm_rows(segment_spec) + assert segment_union.discriminator_mapping is not None + assert set(rows.keys()) == set(segment_union.discriminator_mapping.keys()) + + def test_each_row_passes_validation( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_arm_rows(segment_spec) + adapter: TypeAdapter[object] = TypeAdapter(segment_union.source_annotation) + for _arm_val, row in rows.items(): + adapter.validate_python(row) + + def test_discriminator_field_set( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_arm_rows(segment_spec) + assert segment_union.discriminator_field is not None + for arm_val, row in rows.items(): + assert row[segment_union.discriminator_field] == arm_val + + def test_arm_specific_required_fields_present( + self, segment_spec: FeatureSpec + ) -> None: + """Road arm requires 'class' field; water arm does not.""" + rows = generate_arm_rows(segment_spec) + assert "class" in rows["road"] + assert "class" not in rows["water"] + + +class TestPopulateOptionalFlag: + """populate_optional flag controls recursion depth.""" + + def test_value_for_field_default_skips_optional_children( + self, connector_spec: FeatureSpec + ) -> None: + """Default (`populate_optional=False`) yields sparse sub-models.""" + field = next(f for f in connector_spec.fields if f.name == "sources") + model_ref = _list_of_model(field.shape) + val = value_for_field(field, "Connector") + assert isinstance(val, list) + elem = val[0] + assert isinstance(elem, dict) + optional_names = {f.name for f in model_ref.model.fields if not f.is_required} + assert not (optional_names & set(elem.keys())) + + def test_value_for_field_populate_includes_optional_children( + self, connector_spec: FeatureSpec + ) -> None: + """`populate_optional=True` yields sub-models that include optional fields.""" + field = next(f for f in connector_spec.fields if f.name == "sources") + model_ref = _list_of_model(field.shape) + val = value_for_field(field, "Connector", populate_optional=True) + assert isinstance(val, list) + elem = val[0] + assert isinstance(elem, dict) + optional_names = {f.name for f in model_ref.model.fields if not f.is_required} + assert optional_names & set(elem.keys()) == optional_names + + +def _list_of_model(shape: object) -> ModelRef: + """Peel `ArrayOf` / `NewTypeShape` layers to reach the inner `ModelRef`.""" + from overture.schema.codegen.extraction.field_walk import terminal_of + + terminal = terminal_of(shape) # type: ignore[arg-type] + assert isinstance(terminal, ModelRef), ( + f"Expected ModelRef terminal, got {type(terminal).__name__}" + ) + return terminal + + +class TestGeneratePopulatedRow: + def test_passes_pydantic_validation(self, connector_spec: FeatureSpec) -> None: + row = generate_populated_row(connector_spec) + assert connector_spec.source_type is not None + TypeAdapter(connector_spec.source_type).validate_python(row) + + def test_required_fields_present(self, connector_spec: FeatureSpec) -> None: + row = generate_populated_row(connector_spec) + required_names = {f.name for f in connector_spec.fields if f.is_required} + assert required_names <= set(row.keys()) + + def test_optional_fields_present(self, connector_spec: FeatureSpec) -> None: + row = generate_populated_row(connector_spec) + optional_names = {f.name for f in connector_spec.fields if not f.is_required} + assert optional_names <= set(row.keys()) + + def test_id_matches_sparse_row(self, connector_spec: FeatureSpec) -> None: + sparse = generate_base_row(connector_spec) + populated = generate_populated_row(connector_spec) + assert populated["id"] == sparse["id"] + + def test_nested_structs_populated(self, connector_spec: FeatureSpec) -> None: + """Optional struct fields contain populated sub-dicts, not empty.""" + row = generate_populated_row(connector_spec) + assert "sources" in row + elem = row["sources"][0] + sources_field = next(f for f in connector_spec.fields if f.name == "sources") + model_ref = _list_of_model(sources_field.shape) + optional_source_fields = { + f.name for f in model_ref.model.fields if not f.is_required + } + present = optional_source_fields & set(elem.keys()) + assert present == optional_source_fields + + +class TestGeneratePopulatedArmRows: + def test_returns_dict_per_arm( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_populated_arm_rows(segment_spec) + assert segment_union.discriminator_mapping is not None + assert set(rows.keys()) == set(segment_union.discriminator_mapping.keys()) + + def test_each_row_passes_validation( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_populated_arm_rows(segment_spec) + adapter: TypeAdapter[object] = TypeAdapter(segment_union.source_annotation) + for _arm_val, row in rows.items(): + adapter.validate_python(row) + + def test_discriminator_field_set( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_populated_arm_rows(segment_spec) + assert segment_union.discriminator_field is not None + for arm_val, row in rows.items(): + assert row[segment_union.discriminator_field] == arm_val + + def test_optional_fields_present(self, segment_spec: FeatureSpec) -> None: + """Populated arm rows include optional fields.""" + rows = generate_populated_arm_rows(segment_spec) + # Road arm has optional speed_limits + road_row = rows["road"] + assert "speed_limits" in road_row + + +class TestValueForShapeScalarVariants: + """_value_for_shape handles the Scalar variants it can reach.""" + + def test_any_scalar_raises(self) -> None: + # `AnyScalar` only appears as a `MapOf` value type in feature + # models; `_value_for_shape` returns `{}` for `MapOf` without + # descending, so reaching `AnyScalar` directly is a bug. + field = FieldSpec(name="x", shape=AnyScalar()) + with pytest.raises(TypeError, match="AnyScalar reached base-row generation"): + value_for_field(field, "Foo") + + def test_literal_scalar_returns_first_value(self) -> None: + field = FieldSpec(name="x", shape=LiteralScalar(values=("road",))) + assert value_for_field(field, "Foo") == "road" + + +class TestMinFieldsSetSatisfied: + """`_satisfy_model_constraints` populates optional fields for `min_fields_set`.""" + + def test_min_fields_set_populates_optional_fields(self) -> None: + from overture.schema.codegen.extraction.model_extraction import extract_model + from overture.schema.system.model_constraint import min_fields_set + from pydantic import BaseModel + + @min_fields_set(2) + class MinTwoModel(BaseModel): + a: str | None = None + b: str | None = None + c: str | None = None + + spec = extract_model(MinTwoModel) + row = generate_base_row(spec) + present = [name for name in ("a", "b", "c") if name in row] + assert len(present) >= 2 + + def test_min_fields_set_counts_required_fields(self) -> None: + # Required fields are always present in the sparse base row, and they + # count against `min_fields_set(N)` -- matching Pydantic's + # `model_fields_set` semantics. With one required + three optional + # and `min_fields_set(2)`, the required field plus one optional + # already satisfy the constraint, so the sparse row only needs + # one additional optional fill. + from overture.schema.codegen.extraction.model_extraction import extract_model + from overture.schema.system.model_constraint import min_fields_set + from pydantic import BaseModel + + @min_fields_set(2) + class MixedMinModel(BaseModel): + required_field: str + opt_a: str | None = None + opt_b: str | None = None + opt_c: str | None = None + + spec = extract_model(MixedMinModel) + row = generate_base_row(spec) + assert "required_field" in row + present_optional = [n for n in ("opt_a", "opt_b", "opt_c") if n in row] + assert len(present_optional) >= 1 + assert ( + sum( + 1 + for name in row + if name in {"required_field", "opt_a", "opt_b", "opt_c"} + ) + >= 2 + ) + + def test_min_fields_set_all_required_needs_no_optional_fill(self) -> None: + # When required fields alone satisfy `count`, no optional fills are + # needed -- matching Pydantic, which counts required fields toward + # `model_fields_set`. + from overture.schema.codegen.extraction.model_extraction import extract_model + from overture.schema.system.model_constraint import min_fields_set + from pydantic import BaseModel + + @min_fields_set(2) + class AllRequiredModel(BaseModel): + req_a: str + req_b: str + opt_a: str | None = None + + spec = extract_model(AllRequiredModel) + row = generate_base_row(spec) + assert "req_a" in row and "req_b" in row + assert "opt_a" not in row diff --git a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py new file mode 100644 index 000000000..0c89a0367 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py @@ -0,0 +1,1877 @@ +"""Tests for check_builder -- scalar fields, struct recursion, and model constraints.""" + +from dataclasses import replace +from enum import Enum +from typing import Annotated, Literal, NewType, Union + +import pytest +from annotated_types import Ge, Le, MinLen +from codegen_test_support import ( + LiteralSubtypeModel, + RadioModel, + RequireAnyModel, + TripleNestedArrayModel, + discover_feature, + feature_spec_for_model, + union_spec_for, +) +from overture.schema.codegen.extraction.field import ConstraintSource, Primitive +from overture.schema.codegen.extraction.specs import ( + FeatureSpec, + FieldSpec, + ModelSpec, +) +from overture.schema.codegen.extraction.union_extraction import extract_union +from overture.schema.codegen.pyspark._render_common import column_level_suffix +from overture.schema.codegen.pyspark.check_builder import ( + build_checks, +) +from overture.schema.codegen.pyspark.check_ir import ( + Check, + ColumnGuard, + ElementGuard, + ModelCheck, +) +from overture.schema.codegen.pyspark.constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + RequireIf, + model_constraint_function, +) +from overture.schema.common.scoping.lr import LinearlyReferencedRange +from overture.schema.system.field_constraint.collection import UniqueItemsConstraint +from overture.schema.system.field_path import ( + ArrayPath, + ArraySegment, + FieldPath, + ScalarPath, + parse, +) +from overture.schema.system.model_constraint import ( + FieldEqCondition, + Not, + forbid_if, + require_any_of, +) +from overture.schema.system.string import CountryCodeAlpha2 +from pydantic import BaseModel, Field +from pydantic.fields import FieldInfo +from pydantic.networks import HttpUrl + +_path = parse + + +def _column_guard(check: Check) -> ColumnGuard | None: + """Return the first ColumnGuard, or None.""" + for g in check.guards: + if isinstance(g, ColumnGuard): + return g + return None + + +def _element_guard(check: Check) -> ElementGuard | None: + """Return the first ElementGuard, or None.""" + for g in check.guards: + if isinstance(g, ElementGuard): + return g + return None + + +def _checks_for( + model_cls: type[BaseModel], +) -> tuple[list[Check], list[ModelCheck]]: + return build_checks(feature_spec_for_model(model_cls)) + + +def _condition_of(check: ModelCheck) -> object: + """Return the condition of a RequireIf or ForbidIf descriptor.""" + desc = check.descriptor + assert isinstance(desc, (RequireIf, ForbidIf)), ( + f"Expected RequireIf or ForbidIf, got {type(desc).__name__}" + ) + return desc.condition + + +def _filter_nodes( + nodes: list[ModelCheck], + function: str | tuple[str, ...], + field_names: tuple[str, ...] | None = None, +) -> list[ModelCheck]: + functions = (function,) if isinstance(function, str) else function + return [ + n + for n in nodes + if model_constraint_function(n.descriptor) in functions + and (field_names is None or n.descriptor.field_names == field_names) + ] + + +def _union_checks( + name: str, union_type: object +) -> tuple[list[Check], list[ModelCheck]]: + return build_checks(union_spec_for(name, union_type)) + + +def _union_model_nodes(name: str, union_type: object) -> list[ModelCheck]: + _, model_nodes = _union_checks(name, union_type) + return model_nodes + + +class TestScalarChecks: + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(LiteralSubtypeModel) + return nodes + + def test_literal_produces_enum_check(self, nodes: list[Check]) -> None: + enum_nodes = [n for n in nodes if n.target == _path("subtype")] + assert len(enum_nodes) == 1 + node = enum_nodes[0] + descriptors = node.descriptors + funcs = [d.function for d in descriptors] + assert "check_required" in funcs + assert "check_enum" in funcs + + def test_optional_field_no_required_check(self, nodes: list[Check]) -> None: + name_nodes = [n for n in nodes if n.target == _path("name")] + for node in name_nodes: + funcs = [d.function for d in node.descriptors] + assert "check_required" not in funcs + + def test_required_comes_first_in_coalesce(self, nodes: list[Check]) -> None: + enum_nodes = [n for n in nodes if n.target == _path("subtype")] + node = enum_nodes[0] + funcs = [d.function for d in node.descriptors] + req_idx = funcs.index("check_required") + enum_idx = funcs.index("check_enum") + assert req_idx < enum_idx + + def test_enum_args_contain_literal_values(self, nodes: list[Check]) -> None: + enum_nodes = [n for n in nodes if n.target == _path("subtype")] + node = enum_nodes[0] + enum_desc = next(d for d in node.descriptors if d.function == "check_enum") + assert enum_desc.args == (("a", "b", "c"),) + + def test_optional_str_field_no_checks(self, nodes: list[Check]) -> None: + # name: str | None = None has no constraints, so no check node + name_nodes = [n for n in nodes if n.target == _path("name")] + assert len(name_nodes) == 0 + + +class _RequiredNewtypeModel(BaseModel): + country: CountryCodeAlpha2 + + +class TestRequiredNewtypeChecks: + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_RequiredNewtypeModel) + return nodes + + def test_required_newtype_includes_check_required(self, nodes: list[Check]) -> None: + country_nodes = [n for n in nodes if n.target == _path("country")] + assert len(country_nodes) == 1 + funcs = [d.function for d in country_nodes[0].descriptors] + assert "check_required" in funcs + + def test_required_newtype_includes_newtype_function( + self, nodes: list[Check] + ) -> None: + country_nodes = [n for n in nodes if n.target == _path("country")] + funcs = [d.function for d in country_nodes[0].descriptors] + assert "check_pattern" in funcs + + def test_required_precedes_newtype_function(self, nodes: list[Check]) -> None: + country_nodes = [n for n in nodes if n.target == _path("country")] + funcs = [d.function for d in country_nodes[0].descriptors] + assert funcs.index("check_required") < funcs.index("check_pattern") + + +class _Color(str, Enum): + RED = "red" + GREEN = "green" + BLUE = "blue" + + +class EnumFieldModel(BaseModel): + color: _Color + + +class TestEnumKindChecks: + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(EnumFieldModel) + return nodes + + def test_enum_field_produces_check_enum(self, nodes: list[Check]) -> None: + enum_descs = [ + d for n in nodes for d in n.descriptors if d.function == "check_enum" + ] + assert len(enum_descs) == 1 + + def test_enum_field_uses_member_values(self, nodes: list[Check]) -> None: + enum_descs = [ + d for n in nodes for d in n.descriptors if d.function == "check_enum" + ] + assert enum_descs[0].args == (("red", "green", "blue"),) + + +class InnerModel(BaseModel): + value: str + count: int = Field(ge=0) + + +class OuterModel(BaseModel): + inner: InnerModel | None = None + + +class _ArrayElement(BaseModel): + tag: str + + +class _NullableWithArray(BaseModel): + items: list[_ArrayElement] | None = None + + +class _NullableArrayGrandparent(BaseModel): + parent: _NullableWithArray | None = None + + +class TestNullableParentGating: + """Required fields within optional struct parents get gated check_required.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(OuterModel) + return nodes + + def test_required_field_has_gated_check_required(self, nodes: list[Check]) -> None: + value_nodes = [n for n in nodes if n.target == _path("inner.value")] + req_descs = [ + d + for n in value_nodes + for d in n.descriptors + if d.function == "check_required" + ] + assert len(req_descs) == 1 + assert req_descs[0].gate == _path("inner") + + def test_non_check_required_descriptors_have_no_gate( + self, nodes: list[Check] + ) -> None: + count_nodes = [n for n in nodes if n.target == _path("inner.count")] + for node in count_nodes: + for desc in node.descriptors: + if desc.function != "check_required": + assert desc.gate is None + + def test_other_checks_still_present(self, nodes: list[Check]) -> None: + count_nodes = [n for n in nodes if n.target == _path("inner.count")] + assert len(count_nodes) >= 1 + funcs = [d.function for d in count_nodes[0].descriptors] + assert "check_bounds" in funcs + + +class TestArrayBoundaryResetsNullable: + """nullable_gate resets at array boundaries.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_NullableArrayGrandparent) + return nodes + + def test_required_field_in_array_element_has_check_required( + self, nodes: list[Check] + ) -> None: + tag_nodes = [n for n in nodes if n.target == _path("parent.items[].tag")] + assert len(tag_nodes) >= 1 + funcs = [d.function for n in tag_nodes for d in n.descriptors] + assert "check_required" in funcs + + def test_array_element_required_has_no_gate(self, nodes: list[Check]) -> None: + tag_nodes = [n for n in nodes if n.target == _path("parent.items[].tag")] + req_descs = [ + d + for n in tag_nodes + for d in n.descriptors + if d.function == "check_required" + ] + assert len(req_descs) == 1 + assert req_descs[0].gate is None + + +class _OptionalNested(BaseModel): + mode: str + + +class _ElementWithOptional(BaseModel): + nested: _OptionalNested | None = None + + +class _ArrayWithOptionalNested(BaseModel): + items: list[_ElementWithOptional] + + +class TestArrayElementConditionalGate: + """Optional structs within array elements get gated check_required.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_ArrayWithOptionalNested) + return nodes + + def test_required_in_optional_element_struct_has_gate( + self, nodes: list[Check] + ) -> None: + mode_nodes = [n for n in nodes if n.target == _path("items[].nested.mode")] + req_descs = [ + d + for n in mode_nodes + for d in n.descriptors + if d.function == "check_required" + ] + assert len(req_descs) == 1 + assert req_descs[0].gate == _path("items[].nested") + + +class TestStructRecursion: + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(OuterModel) + return nodes + + def test_recurses_into_model_fields(self, nodes: list[Check]) -> None: + paths = {n.target for n in nodes} + assert _path("inner.count") in paths + + def test_nested_field_uses_dot_path(self, nodes: list[Check]) -> None: + count_nodes = [n for n in nodes if n.target == _path("inner.count")] + assert len(count_nodes) == 1 + + +class ItemModel(BaseModel): + value: str + + +class ArrayModel(BaseModel): + items: Annotated[list[ItemModel], MinLen(1)] + + +class TestArrayChecks: + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(ArrayModel) + return nodes + + def test_array_min_length_is_scalar_shape(self, nodes: list[Check]) -> None: + length_nodes = [ + n + for n in nodes + if any(d.function == "check_array_min_length" for d in n.descriptors) + ] + assert len(length_nodes) == 1 + assert isinstance(length_nodes[0].target, ScalarPath) + + def test_array_element_field_uses_bracket_notation( + self, nodes: list[Check] + ) -> None: + paths = {n.target for n in nodes} + assert any(isinstance(p, ArrayPath) for p in paths) + + def test_array_element_subfield_path(self, nodes: list[Check]) -> None: + # ItemModel.value is required, so a check node for items[].value must exist + paths = {n.target for n in nodes} + assert _path("items[].value") in paths + + def test_array_level_check_has_no_inner_levels(self, nodes: list[Check]) -> None: + length_nodes = [ + n + for n in nodes + if any(d.function == "check_array_min_length" for d in n.descriptors) + ] + assert length_nodes[0].target == _path("items") + + def test_required_array_field_has_required_check(self, nodes: list[Check]) -> None: + # check_required on an array field is a column-level null check; its + # target is the scalar `items` column, not an element path. + required_nodes = [ + n + for n in nodes + if n.target == _path("items") + and any(d.function == "check_required" for d in n.descriptors) + ] + assert len(required_nodes) == 1 + + def test_array_element_subfield_has_single_check(self, nodes: list[Check]) -> None: + value_nodes = [n for n in nodes if n.target == _path("items[].value")] + assert len(value_nodes) == 1 + + +class _StringListModel(BaseModel): + tags: Annotated[list[str], MinLen(1)] + + +class _NestedListModel(BaseModel): + """list[list[ItemModel]] — both layers contribute MinLen + UniqueItems.""" + + items: Annotated[ + list[Annotated[list[InnerModel], MinLen(1), UniqueItemsConstraint()]], + MinLen(1), + UniqueItemsConstraint(), + ] + + +class _StringInListModel(BaseModel): + """list[Annotated[str, MinLen]] with outer list MinLen — inner is string MinLen.""" + + tags: Annotated[list[Annotated[str, MinLen(1)]], MinLen(1)] + + +_HierarchyItemList = NewType( + "_HierarchyItemList", + Annotated[list[InnerModel], MinLen(1), UniqueItemsConstraint()], +) + + +class _HierarchyLikeModel(BaseModel): + """Mirror of Division.hierarchies: inner list lives inside a NewType.""" + + hierarchies: Annotated[ + list[_HierarchyItemList], + MinLen(1), + UniqueItemsConstraint(), + ] + + +class TestListFieldNameSplitting: + """Column-level and element-level checks for list fields get distinct field names.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_StringListModel) + return nodes + + def test_unique_labels_for_different_shapes(self, nodes: list[Check]) -> None: + labels = [(n.target, column_level_suffix(n)) for n in nodes] + assert len(labels) == len(set(labels)), f"Duplicate labels: {labels}" + + def test_min_length_check_carries_min_length_suffix( + self, nodes: list[Check] + ) -> None: + min_len_nodes = [ + n + for n in nodes + if any(d.function == "check_array_min_length" for d in n.descriptors) + ] + assert len(min_len_nodes) == 1 + assert min_len_nodes[0].target == _path("tags") + assert column_level_suffix(min_len_nodes[0]) == "_min_length" + + +def _node_for(nodes: list[Check], field: str, function: str) -> Check: + field_path = _path(field) + matching = [ + n + for n in nodes + if n.target == field_path and any(d.function == function for d in n.descriptors) + ] + assert len(matching) == 1, ( + f"expected exactly one node for field={field!r} function={function!r}, " + f"got {len(matching)}" + ) + return matching[0] + + +@pytest.mark.parametrize( + ("model_cls", "field"), + [ + (_NestedListModel, "items"), + (_HierarchyLikeModel, "hierarchies"), + ], + ids=["nested_list", "hierarchy_newtype"], +) +class TestPerLevelListConstraints: + """Each layer of `list[list[X]]` emits its own column-level check. + + Covers both raw nested lists (`_NestedListModel`) and the + NewType-wrapped variant (`_HierarchyLikeModel`, mirroring + `Division.hierarchies`). + """ + + def test_no_duplicate_labels(self, model_cls: type[BaseModel], field: str) -> None: + nodes, _ = _checks_for(model_cls) + labels = [(n.target, column_level_suffix(n)) for n in nodes] + assert len(labels) == len(set(labels)), f"Duplicate labels: {labels}" + + def test_outer_min_length_check( + self, model_cls: type[BaseModel], field: str + ) -> None: + nodes, _ = _checks_for(model_cls) + outer = _node_for(nodes, field, "check_array_min_length") + assert outer.target == _path(field) + + def test_inner_min_length_check( + self, model_cls: type[BaseModel], field: str + ) -> None: + nodes, _ = _checks_for(model_cls) + inner = _node_for(nodes, f"{field}[]", "check_array_min_length") + assert inner.target == _path(f"{field}[]") + + def test_outer_unique_check(self, model_cls: type[BaseModel], field: str) -> None: + nodes, _ = _checks_for(model_cls) + outer = _node_for(nodes, field, "check_struct_unique") + assert outer.target == _path(field) + + def test_inner_unique_check(self, model_cls: type[BaseModel], field: str) -> None: + nodes, _ = _checks_for(model_cls) + inner = _node_for(nodes, f"{field}[]", "check_struct_unique") + assert inner.target == _path(f"{field}[]") + + +class TestPerLevelScalarMinLen: + """list[Annotated[str, MinLen]] with outer list MinLen splits cleanly.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_StringInListModel) + return nodes + + def test_outer_array_min_length(self, nodes: list[Check]) -> None: + outer = _node_for(nodes, "tags", "check_array_min_length") + assert outer.target == _path("tags") + + def test_inner_string_min_length(self, nodes: list[Check]) -> None: + inner = _node_for(nodes, "tags[]", "check_string_min_length") + assert inner.target == _path("tags[]") + + +class TestDescriptorDedupKey: + """Descriptor equality drives layer-level dedup via `dict.fromkeys`.""" + + def test_identical_descriptors_collapse(self) -> None: + desc = ExpressionDescriptor(function="check_array_min_length", args=(1,)) + assert list(dict.fromkeys([desc, desc])) == [desc] + + def test_distinct_descriptors_preserve_order(self) -> None: + first = ExpressionDescriptor(function="check_array_min_length", args=(1,)) + second = ExpressionDescriptor(function="check_struct_unique") + assert list(dict.fromkeys([first, second, first])) == [first, second] + + def test_different_args_are_distinct(self) -> None: + one = ExpressionDescriptor(function="check_array_min_length", args=(1,)) + two = ExpressionDescriptor(function="check_array_min_length", args=(2,)) + assert list(dict.fromkeys([one, two])) == [one, two] + + def test_different_gates_are_distinct(self) -> None: + ungated = ExpressionDescriptor(function="check_required") + gated = ExpressionDescriptor(function="check_required", gate=_path("parent")) + assert list(dict.fromkeys([ungated, gated])) == [ungated, gated] + + +class TestListOfNewtypeConstraintDispatch: + """Element-level MinLen from NewType inside a list dispatches as string check.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + MyId = NewType("MyId", Annotated[str, MinLen(1)]) + + class ListOfIdModel(BaseModel): + ids: list[MyId] + + nodes, _ = _checks_for(ListOfIdModel) + return nodes + + def test_element_min_length_dispatches_as_string_check( + self, nodes: list[Check] + ) -> None: + """MinLen from the element NewType should produce check_string_min_length, not check_array_min_length.""" + all_funcs = [d.function for n in nodes for d in n.descriptors] + assert "check_string_min_length" in all_funcs + # check_array_min_length should NOT appear — there's no list-level MinLen + assert "check_array_min_length" not in all_funcs + + +class _InternalListNewtypeModel(BaseModel): + """Model with a NewType that wraps list[float] (list is inside the NewType).""" + + between: list[CountryCodeAlpha2] | None = None # outer list wrapping + + +class TestNewtypeWithInternalList: + """When a NewType IS a list, the check function handles the whole array.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + class InternalListModel(BaseModel): + between: LinearlyReferencedRange | None = None + + nodes, _ = _checks_for(InternalListModel) + return nodes + + def test_internal_list_newtype_has_single_check(self, nodes: list[Check]) -> None: + between_nodes = [n for n in nodes if n.target == _path("between")] + assert len(between_nodes) == 1 + + def test_internal_list_newtype_has_three_descriptors( + self, nodes: list[Check] + ) -> None: + between_nodes = [n for n in nodes if n.target == _path("between")] + fns = [d.function for d in between_nodes[0].descriptors] + assert "check_linear_range_length" in fns + assert "check_linear_range_bounds" in fns + assert "check_linear_range_order" in fns + + +class TestBaseTypeDispatchInCheckBuilder: + """Base type dispatch generates element-level checks for HttpUrl/EmailStr.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + class HttpUrlListModel(BaseModel): + websites: list[HttpUrl] | None = None + + nodes, _ = _checks_for(HttpUrlListModel) + return nodes + + def test_http_url_produces_check_url_format(self, nodes: list[Check]) -> None: + url_nodes = [ + n + for n in nodes + if any(d.function == "check_url_format" for d in n.descriptors) + ] + assert len(url_nodes) == 1 + + def test_http_url_element_check_is_array_shape(self, nodes: list[Check]) -> None: + url_nodes = [ + n + for n in nodes + if any(d.function == "check_url_format" for d in n.descriptors) + ] + assert isinstance(url_nodes[0].target, ArrayPath) + + +class _DeepInner(BaseModel): + field: str + + +class _ArrayElementWithNestedStruct(BaseModel): + nested: _DeepInner + + +class _DeepNestedArrayModel(BaseModel): + items: list[_ArrayElementWithNestedStruct] + + +class TestArrayElementNestedStructChecks: + """Struct fields inside array elements produce array-shaped checks.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_DeepNestedArrayModel) + return nodes + + def test_nested_struct_field_path(self, nodes: list[Check]) -> None: + paths = {n.target for n in nodes} + assert _path("items[].nested.field") in paths + + +class _ArrayElementWithList(BaseModel): + tags: list[CountryCodeAlpha2] + + +class _ListInArrayModel(BaseModel): + items: list[_ArrayElementWithList] + + +class TestArrayElementListChecks: + """List fields inside array elements need nested iteration.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_ListInArrayModel) + return nodes + + def test_list_subfield_element_checks_have_inner_levels( + self, nodes: list[Check] + ) -> None: + # Element-level check on a list field inside an outer array: target + # encodes both iterations explicitly as `items[].tags[]`. + element_nodes = [n for n in nodes if n.target == _path("items[].tags[]")] + assert len(element_nodes) >= 1 + + def test_list_subfield_column_path_is_enclosing_array( + self, nodes: list[Check] + ) -> None: + tag_nodes = [n for n in nodes if str(n.target).startswith("items[].tags")] + for node in tag_nodes: + assert isinstance(node.target, ArrayPath) + # the outermost iterated column is `items`, not the inner `tags` list + assert node.target.array_chunks[0] == ((), "items", 1) + + +class _ArrayElementWithNewtype(BaseModel): + country: CountryCodeAlpha2 + + +class _NewtypeInArrayModel(BaseModel): + items: list[_ArrayElementWithNewtype] + + +class TestArrayElementNewtypeChecks: + """Newtype fields inside array elements: shape=ARRAY, no inner_levels.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_NewtypeInArrayModel) + return nodes + + def test_newtype_subfield_has_single_check(self, nodes: list[Check]) -> None: + country_nodes = [n for n in nodes if n.target == _path("items[].country")] + assert len(country_nodes) == 1 + + +class TestModelLevelConstraints: + @pytest.fixture + def radio_model_nodes(self) -> list[ModelCheck]: + _, model_nodes = _checks_for(RadioModel) + return model_nodes + + @pytest.fixture + def require_any_model_nodes(self) -> list[ModelCheck]: + _, model_nodes = _checks_for(RequireAnyModel) + return model_nodes + + def test_radio_group_produces_model_check( + self, radio_model_nodes: list[ModelCheck] + ) -> None: + assert len(_filter_nodes(radio_model_nodes, "check_radio_group")) == 1 + + def test_radio_group_field_names(self, radio_model_nodes: list[ModelCheck]) -> None: + radio = _filter_nodes(radio_model_nodes, "check_radio_group")[0] + assert set(radio.descriptor.field_names) == {"a", "b"} + + def test_require_any_of_produces_model_check( + self, require_any_model_nodes: list[ModelCheck] + ) -> None: + assert len(_filter_nodes(require_any_model_nodes, "check_require_any_of")) == 1 + + def test_require_any_of_field_names( + self, require_any_model_nodes: list[ModelCheck] + ) -> None: + node = _filter_nodes(require_any_model_nodes, "check_require_any_of")[0] + assert set(node.descriptor.field_names) == {"x", "y"} + + def test_no_constraints_returns_empty_model_nodes(self) -> None: + _, model_nodes = _checks_for(LiteralSubtypeModel) + assert model_nodes == [] + + +class _SpeedStruct(BaseModel): + value: int + unit: str + + +@require_any_of("fast", "slow") +class _RequireAnyOfStructFields(BaseModel): + fast: _SpeedStruct | None = None + slow: _SpeedStruct | None = None + + +class TestRequireAnyOfStructUnwrapping: + """require_any_of on struct fields must reference the leaf scalar, not the struct.""" + + @pytest.fixture + def node(self) -> ModelCheck: + _, model_nodes = _checks_for(_RequireAnyOfStructFields) + nodes = _filter_nodes(model_nodes, "check_require_any_of") + assert len(nodes) == 1 + return nodes[0] + + def test_field_names_use_leaf_path(self, node: ModelCheck) -> None: + assert set(node.descriptor.field_names) == {"fast.value", "slow.value"} + + +class _SyntheticUnionFixtures: + """Discriminated-union models exercising union check generation.""" + + class Base(BaseModel): + kind: str + + class TypeA(Base): + kind: Literal["a"] = "a" + a_field: Literal["x", "y"] | None = None + + class TypeB(Base): + kind: Literal["b"] = "b" + b_field: Literal["p", "q"] | None = None + + SyntheticUnion = Annotated[ + Union[TypeA, TypeB], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + @require_any_of("p", "q") + class ConstrainedMember(Base): + kind: Literal["c"] = "c" + p: str | None = None + q: str | None = None + + ConstrainedUnion = Annotated[ + Union[TypeA, ConstrainedMember], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + class MemberX(Base): + kind: Literal["x"] = "x" + shared_name: Literal["x1", "x2"] + + class MemberY(Base): + kind: Literal["y"] = "y" + shared_name: Literal["y1", "y2"] + + class MemberZ(Base): + kind: Literal["z"] = "z" + + ThreeWayUnion = Annotated[ + Union[MemberX, MemberY, MemberZ], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + class MixedRequired(Base): + kind: Literal["r"] = "r" + mixed_field: str + + class MixedOptional(Base): + kind: Literal["o"] = "o" + mixed_field: str | None = None + + class MixedAbsent(Base): + kind: Literal["a"] = "a" + + MixedRequirednessUnion = Annotated[ + Union[MixedRequired, MixedOptional, MixedAbsent], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + class AllVarA(Base): + kind: Literal["a"] = "a" + everywhere: str | None = None + + class AllVarB(Base): + kind: Literal["b"] = "b" + everywhere: str | None = None + + AllVariantsUnion = Annotated[ + Union[AllVarA, AllVarB], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + @require_any_of("fast", "slow") + @forbid_if(["restrictions"], FieldEqCondition("gated", True)) + class MemberWithModelConstraints(Base): + """Union member carrying model constraints over struct/compound fields.""" + + kind: Literal["m"] = "m" + gated: bool = False + fast: _SpeedStruct | None = None + slow: _SpeedStruct | None = None + restrictions: list[str] | None = None + + MemberConstraintUnion = Annotated[ + Union[TypeA, MemberWithModelConstraints], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + class PlainMember(Base): + kind: Literal["p"] = "p" + + +class TestSyntheticUnionChecks: + @pytest.fixture + def field_nodes(self) -> list[Check]: + nodes, _ = _union_checks("Synthetic", _SyntheticUnionFixtures.SyntheticUnion) + return nodes + + def test_variant_field_gets_variant_values(self, field_nodes: list[Check]) -> None: + a_nodes = [n for n in field_nodes if n.target == _path("a_field")] + assert len(a_nodes) > 0 + for node in a_nodes: + assert node.guards == (ColumnGuard(discriminator="kind", values=("a",)),) + + def test_shared_field_has_no_variant_values(self, field_nodes: list[Check]) -> None: + kind_nodes = [n for n in field_nodes if n.target == _path("kind")] + for node in kind_nodes: + assert node.guards == () + + def test_b_field_gets_b_variant_value(self, field_nodes: list[Check]) -> None: + b_nodes = [n for n in field_nodes if n.target == _path("b_field")] + assert len(b_nodes) > 0 + for node in b_nodes: + assert node.guards == (ColumnGuard(discriminator="kind", values=("b",)),) + + def test_variant_nodes_carry_discriminator_field( + self, field_nodes: list[Check] + ) -> None: + variant_nodes = [n for n in field_nodes if n.guards] + for node in variant_nodes: + for guard in node.guards: + assert guard.discriminator == "kind" + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes("Synthetic", _SyntheticUnionFixtures.SyntheticUnion) + + @pytest.mark.parametrize( + ("field_name", "expected_value"), + [("a_field", "b"), ("b_field", "a")], + ) + def test_variant_field_gets_forbid_if( + self, + model_nodes: list[ModelCheck], + field_name: str, + expected_value: str, + ) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", (field_name,)) + assert len(forbid_nodes) == 1 + condition = _condition_of(forbid_nodes[0]) + assert isinstance(condition, FieldEqCondition) + assert condition.field_name == "kind" + assert condition.value == expected_value + + def test_forbid_if_nodes_are_top_level(self, model_nodes: list[ModelCheck]) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if") + assert len(forbid_nodes) == 2 + for node in forbid_nodes: + assert node.target == ScalarPath() + + +class TestUnionMemberModelConstraints: + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "Constrained", _SyntheticUnionFixtures.ConstrainedUnion + ) + + def test_member_model_constraints_collected( + self, model_nodes: list[ModelCheck] + ) -> None: + assert len(_filter_nodes(model_nodes, "check_require_any_of")) == 1 + + def test_member_constraint_tagged_with_arm( + self, model_nodes: list[ModelCheck] + ) -> None: + """Constraint from ConstrainedMember carries that member's discriminator value.""" + require_any_of_nodes = _filter_nodes(model_nodes, "check_require_any_of") + assert len(require_any_of_nodes) == 1 + assert require_any_of_nodes[0].arm == "c" + + def test_exclusivity_checks_have_no_arm( + self, model_nodes: list[ModelCheck] + ) -> None: + """Synthesized forbid_if/require_if checks apply to every arm.""" + exclusivity_nodes = _filter_nodes( + model_nodes, ("check_forbid_if", "check_require_if") + ) + assert exclusivity_nodes + for node in exclusivity_nodes: + assert node.arm is None + + +class TestUnionMemberStructAndCompoundConstraints: + """Member-level constraints on struct/compound fields dispatch with real shapes. + + `@require_any_of` over struct fields must unwrap to the first required + leaf scalar; `@forbid_if` over a compound field must populate + `field_shapes`. Both depend on the member being run through real + extraction rather than stubbed proxies -- a latent gap, since no real + schema member currently carries a model-level constraint decorator. + """ + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "MemberConstraint", _SyntheticUnionFixtures.MemberConstraintUnion + ) + + def test_require_any_of_unwraps_struct_leaf( + self, model_nodes: list[ModelCheck] + ) -> None: + nodes = _filter_nodes(model_nodes, "check_require_any_of") + assert len(nodes) == 1 + assert set(nodes[0].descriptor.field_names) == {"fast.value", "slow.value"} + + def test_forbid_if_populates_compound_field_shapes( + self, model_nodes: list[ModelCheck] + ) -> None: + # Exclusivity logic also emits a forbid_if for `restrictions`, but + # gated on the discriminator; the member-level constraint is the + # one whose condition references `gated`. + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", ("restrictions",)) + member_level = [ + n + for n in forbid_nodes + if isinstance((cond := _condition_of(n)), FieldEqCondition) + and cond.field_name == "gated" + ] + assert len(member_level) == 1 + descriptor = member_level[0].descriptor + assert isinstance(descriptor, ForbidIf) + assert "restrictions" in dict(descriptor.field_shapes) + + +@require_any_of("max_speed", "min_speed") +class _SpeedLimitElement(BaseModel): + """Element model with its own @require_any_of constraint.""" + + max_speed: int | None = None + min_speed: int | None = None + + +class _VariantWithConstrainedList(_SyntheticUnionFixtures.Base): + """Union member with a variant-specific list of constrained sub-models.""" + + kind: Literal["v"] = "v" + speed_limits: list[_SpeedLimitElement] | None = None + + +_VariantFieldConstraintUnion = Annotated[ + Union[_VariantWithConstrainedList, _SyntheticUnionFixtures.PlainMember], # noqa: UP007 + FieldInfo(discriminator="kind"), +] + + +class TestVariantSpecificFieldDiscoveredModelConstraints: + """Model constraints discovered through a variant-specific field carry the contributing arm. + + A `@require_any_of` declared on an element model of a list field that + appears only in one union arm must be tagged with that arm, not + propagated to every arm. + """ + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "VariantFieldConstraint", _VariantFieldConstraintUnion + ) + + def test_field_discovered_constraint_tagged_with_arm( + self, model_nodes: list[ModelCheck] + ) -> None: + nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("speed_limits[]") + ] + assert len(nodes) == 1 + assert nodes[0].arm == "v" + + +class _VariantWithConstrainedModelRef(_SyntheticUnionFixtures.Base): + """Variant-specific direct (non-list) model ref with model-level constraint.""" + + kind: Literal["d"] = "d" + speed: _SpeedLimitElement | None = None + + +_DirectModelRefConstraintUnion = Annotated[ + Union[_VariantWithConstrainedModelRef, _SyntheticUnionFixtures.PlainMember], # noqa: UP007 + FieldInfo(discriminator="kind"), +] + + +class TestVariantSpecificDirectModelRefConstraint: + """Variant-specific non-list `ModelRef` with a constrained sub-model is unsupported. + + The direct-ref path routes through `_recurse_into_model` rather + than the array branch of `_walk_field_shape`, and pure struct + nesting can't anchor a real model constraint -- the dispatch + raises `NotImplementedError`. Distinct from the `list[Model]` + case in `TestVariantSpecificFieldDiscoveredModelConstraints`, + which is supported. + """ + + def test_direct_modelref_constraint_raises(self) -> None: + # Pure struct nesting can't anchor a real model constraint; today + # the only constraint kind that survives struct nesting raises. + with pytest.raises( + NotImplementedError, match="Model constraint on struct-nested" + ): + _union_model_nodes( + "DirectModelRefConstraint", _DirectModelRefConstraintUnion + ) + + +class _OuterWithStructNestedUnion(BaseModel): + """Non-list `UnionRef` field reaches a union with a constrained member.""" + + nested: _SyntheticUnionFixtures.ConstrainedUnion + + +class TestStructNestedUnionWithConstraint: + """Non-list `UnionRef` reaching a union with model checks is unsupported. + + `_recurse_into_union` mirrors `_recurse_into_model`'s guard: when + the prefix is struct-nested (no `ArrayPath` segment) and the union + would emit either union-level constraints or synthesized + exclusivity checks (`check_forbid_if`/`check_require_if`), the + dispatch raises because `_model_constraint_target` would collapse + the anchor to the row root with field names that don't exist + there. This fixture exercises the union-level branch; the + exclusivity branch isn't covered by a synthetic fixture today + because the dual-trigger raise body is one statement. + """ + + def test_struct_nested_union_constraint_raises(self) -> None: + with pytest.raises( + NotImplementedError, match="Model constraint on struct-nested" + ): + build_checks(feature_spec_for_model(_OuterWithStructNestedUnion)) + + +class _NestedInnerBase(BaseModel): + inner_kind: str + + +class _NestedInnerArmA(_NestedInnerBase): + inner_kind: Literal["i_a"] = "i_a" + a_only: str | None = None + + +@require_any_of("first", "second") +class _NestedInnerArmB(_NestedInnerBase): + """Inner-union arm with its own model-level constraint.""" + + inner_kind: Literal["i_b"] = "i_b" + first: str | None = None + second: str | None = None + + +_NestedInnerUnion = Annotated[ + Union[_NestedInnerArmA, _NestedInnerArmB], # noqa: UP007 + FieldInfo(discriminator="inner_kind"), +] + + +class _OuterArmWithInnerUnion(_SyntheticUnionFixtures.Base): + """Outer-union arm that wraps a nested union via a list field.""" + + kind: Literal["n"] = "n" + inners: list[_NestedInnerUnion] | None = None + + +_NestedUnionViaVariantField = Annotated[ + Union[_OuterArmWithInnerUnion, _SyntheticUnionFixtures.PlainMember], # noqa: UP007 + FieldInfo(discriminator="kind"), +] + + +class TestNestedUnionThroughVariantField: + """Inner-union member constraints inherit the outer-union arm. + + Reached through a variant-specific field carrying a nested union, + the inner member's `@require_any_of` must be tagged with the outer + arm ('n'), not the inner discriminator value ('i_b'). The outermost + union's discriminator is the only one per-arm test filtering keys + on. + """ + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "NestedUnionViaVariantField", _NestedUnionViaVariantField + ) + + def test_inner_member_constraint_tagged_with_outer_arm( + self, model_nodes: list[ModelCheck] + ) -> None: + require_any_of_nodes = _filter_nodes(model_nodes, "check_require_any_of") + assert len(require_any_of_nodes) == 1 + assert require_any_of_nodes[0].arm == "n" + + +class _MultiArmContributorA(_SyntheticUnionFixtures.Base): + kind: Literal["a"] = "a" + shared_limits: list[_SpeedLimitElement] | None = None + + +class _MultiArmContributorB(_SyntheticUnionFixtures.Base): + kind: Literal["b"] = "b" + shared_limits: list[_SpeedLimitElement] | None = None + + +class _MultiArmThirdMember(_SyntheticUnionFixtures.Base): + """Third arm that does NOT contribute the shared field.""" + + kind: Literal["c"] = "c" + + +_MultiArmVariantSourcesUnion = Annotated[ + Union[ # noqa: UP007 + _MultiArmContributorA, _MultiArmContributorB, _MultiArmThirdMember + ], + FieldInfo(discriminator="kind"), +] + + +class TestMultiArmVariantSourcesPolicy: + """Tombstone: a 2-of-N variant-specific field collapses to `arm=None`. + + No real schema today declares a variant-specific field on a proper + subset of arms (2-of-N). When/if that pattern surfaces with a + sub-model carrying its own model constraint, the current policy + routes the constraint to every arm rather than the intersection -- + including arms the field doesn't belong to. This pins the + behaviour explicitly so the gap surfaces if anyone treats it as + correct or relies on it. See `_singleton_arm` in `check_builder.py`. + """ + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "MultiArmVariantSources", _MultiArmVariantSourcesUnion + ) + + def test_multi_arm_field_discovered_constraint_has_no_arm( + self, model_nodes: list[ModelCheck] + ) -> None: + nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("shared_limits[]") + ] + assert len(nodes) == 1 + # The 2-of-N case can't pick a single arm, so the constraint + # carries arm=None -- broadcasting to every arm, including the + # third member that doesn't declare shared_limits at all. + # Tracked for resolution if/when a real schema surfaces this. + assert nodes[0].arm is None + + +class TestGroupedExclusivityChecks: + """A required field with the same name in 2 of 3 variants (different types) groups correctly.""" + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes("ThreeWay", _SyntheticUnionFixtures.ThreeWayUnion) + + def test_grouped_field_forbid_if_for_excluded_variant( + self, model_nodes: list[ModelCheck] + ) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", ("shared_name",)) + assert len(forbid_nodes) == 1 + condition = _condition_of(forbid_nodes[0]) + assert isinstance(condition, FieldEqCondition) + assert condition.field_name == "kind" + assert condition.value == "z" + + def test_grouped_field_require_if_per_variant( + self, model_nodes: list[ModelCheck] + ) -> None: + require_nodes = _filter_nodes(model_nodes, "check_require_if", ("shared_name",)) + assert len(require_nodes) == 2 + conditions = set() + for node in require_nodes: + cond = _condition_of(node) + assert isinstance(cond, FieldEqCondition) + conditions.add(cond.value) + assert conditions == {"x", "y"} + + +class TestMixedRequirednessExclusivity: + """Same-named field required in one variant, optional in another.""" + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "Mixed", _SyntheticUnionFixtures.MixedRequirednessUnion + ) + + def test_require_if_only_for_required_variant( + self, model_nodes: list[ModelCheck] + ) -> None: + require_nodes = _filter_nodes(model_nodes, "check_require_if", ("mixed_field",)) + assert len(require_nodes) == 1 + condition = _condition_of(require_nodes[0]) + assert isinstance(condition, FieldEqCondition) + assert condition.value == "r" + + def test_forbid_if_for_absent_variant(self, model_nodes: list[ModelCheck]) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", ("mixed_field",)) + assert len(forbid_nodes) == 1 + condition = _condition_of(forbid_nodes[0]) + assert isinstance(condition, FieldEqCondition) + assert condition.value == "a" + + +class TestExclusivityEdgeCases: + def test_no_discriminator_produces_zero_exclusivity_nodes(self) -> None: + """Union without discriminator_mapping produces no exclusivity checks.""" + spec = replace( + extract_union("Synthetic", _SyntheticUnionFixtures.SyntheticUnion), + discriminator_mapping=None, + discriminator_field=None, + ) + _, model_nodes = build_checks(spec) + forbid = _filter_nodes(model_nodes, "check_forbid_if") + require = _filter_nodes(model_nodes, "check_require_if") + assert len(forbid) + len(require) == 0 + + def test_field_in_all_variants_no_exclusivity(self) -> None: + """Field present in every variant via variant_sources produces no exclusivity checks.""" + _, model_nodes = _union_checks( + "AllVariants", _SyntheticUnionFixtures.AllVariantsUnion + ) + forbid = _filter_nodes(model_nodes, "check_forbid_if") + require = _filter_nodes(model_nodes, "check_require_if") + assert len(forbid) + len(require) == 0 + + +@require_any_of("x", "y") +class _ArrayElementWithConstraint(BaseModel): + x: str | None = None + y: str | None = None + + +class _ArrayOfConstrainedModel(BaseModel): + items: list[_ArrayElementWithConstraint] + + +@require_any_of("a", "b") +class _NestedConstrainedStruct(BaseModel): + a: str | None = None + b: str | None = None + + +class _ArrayElementWithConstrainedNested(BaseModel): + nested: _NestedConstrainedStruct + + +class _ArrayOfNestedConstrained(BaseModel): + items: list[_ArrayElementWithConstrainedNested] + + +@require_any_of("a", "b") +class _InnerConstrainedElement(BaseModel): + a: str | None = None + b: str | None = None + + +class _OuterElementWithConstrainedList(BaseModel): + things: list[_InnerConstrainedElement] + + +class _DoubleNestedConstrained(BaseModel): + items: list[_OuterElementWithConstrainedList] + + +def _require_any_node_for(model_cls: type[BaseModel]) -> ModelCheck: + _, model_nodes = _checks_for(model_cls) + nodes = _filter_nodes(model_nodes, "check_require_any_of") + assert len(nodes) == 1 + return nodes[0] + + +@pytest.mark.parametrize( + ("model_cls", "expected_target"), + [ + pytest.param(_ArrayOfConstrainedModel, _path("items[]"), id="direct_element"), + pytest.param( + _ArrayOfNestedConstrained, _path("items[].nested"), id="nested_struct" + ), + ], +) +class TestArrayContextModelConstraints: + """Model constraints on array-element (or nested struct) models produce array-context ModelChecks.""" + + def test_produces_model_check_node( + self, model_cls: type[BaseModel], expected_target: FieldPath + ) -> None: + node = _require_any_node_for(model_cls) + assert model_constraint_function(node.descriptor) == "check_require_any_of" + + def test_target( + self, model_cls: type[BaseModel], expected_target: FieldPath + ) -> None: + node = _require_any_node_for(model_cls) + assert node.target == expected_target + + +class TestDoubleNestedArrayModelConstraints: + """Model constraints on list[] elements nested inside another array use nested geometry.""" + + def test_target_is_nested_inner_array(self) -> None: + # `things` is itself an ArraySegment, so the constraint's target + # iterates items[] then things[] with no struct nav between. + node = _require_any_node_for(_DoubleNestedConstrained) + assert node.target == _path("items[].things[]") + + +class TestSegmentUnionChecks: + @pytest.fixture(scope="class") + def segment_spec(self) -> FeatureSpec: + return discover_feature("Segment") + + @pytest.fixture(scope="class") + def segment_checks( + self, segment_spec: FeatureSpec + ) -> tuple[list[Check], list[ModelCheck]]: + return build_checks(segment_spec) + + @pytest.fixture(scope="class") + def field_nodes( + self, segment_checks: tuple[list[Check], list[ModelCheck]] + ) -> list[Check]: + return segment_checks[0] + + @pytest.fixture(scope="class") + def model_nodes( + self, segment_checks: tuple[list[Check], list[ModelCheck]] + ) -> list[ModelCheck]: + return segment_checks[1] + + def test_produces_variant_gated_checks(self, field_nodes: list[Check]) -> None: + variant_nodes = [n for n in field_nodes if n.guards] + assert len(variant_nodes) > 0 + + def test_shared_fields_have_no_variant_values( + self, field_nodes: list[Check] + ) -> None: + subtype_nodes = [n for n in field_nodes if n.target == _path("subtype")] + for node in subtype_nodes: + assert node.guards == () + + def test_speed_limits_require_any_of_in_model_nodes( + self, model_nodes: list[ModelCheck] + ) -> None: + speed_limit_nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("speed_limits[]") + ] + assert len(speed_limit_nodes) >= 1 + + def test_destinations_require_any_of_in_model_nodes( + self, model_nodes: list[ModelCheck] + ) -> None: + dest_nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("destinations[]") + ] + assert len(dest_nodes) >= 1 + + def test_speed_limits_when_require_any_of_in_model_nodes( + self, model_nodes: list[ModelCheck] + ) -> None: + when_nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("speed_limits[].when") + ] + assert len(when_nodes) >= 1 + + @pytest.mark.parametrize( + ("field_name", "expected_subtype"), + [("road_surface", "road"), ("rail_flags", "rail")], + ) + def test_single_variant_field_forbid_if( + self, + model_nodes: list[ModelCheck], + field_name: str, + expected_subtype: str, + ) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", (field_name,)) + assert len(forbid_nodes) == 1 + condition = _condition_of(forbid_nodes[0]) + assert isinstance(condition, Not) + assert isinstance(condition.inner, FieldEqCondition) + assert condition.inner.field_name == "subtype" + assert condition.inner.value == expected_subtype + + def test_class_forbid_if_for_water(self, model_nodes: list[ModelCheck]) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", ("class",)) + assert len(forbid_nodes) == 1 + condition = _condition_of(forbid_nodes[0]) + assert isinstance(condition, FieldEqCondition) + assert condition.field_name == "subtype" + assert condition.value == "water" + + def test_class_require_if_for_road_and_rail( + self, model_nodes: list[ModelCheck] + ) -> None: + require_nodes = _filter_nodes(model_nodes, "check_require_if", ("class",)) + assert len(require_nodes) == 2 + conditions = [_condition_of(n) for n in require_nodes] + assert all(isinstance(c, FieldEqCondition) for c in conditions) + values = {c.value for c in conditions if isinstance(c, FieldEqCondition)} + assert values == {"road", "rail"} + + def test_nested_union_discriminator_preserved( + self, field_nodes: list[Check] + ) -> None: + """Inner VehicleSelector discriminator survives outer Segment annotation. + + Vehicle unit checks inside variant-specific fields (speed_limits, + prohibited_transitions) need both the outer subtype guard and the + inner dimension discriminator. The outer annotation must not + clobber the inner one. + """ + unit_nodes = [ + n for n in field_nodes if "vehicle[].unit" in str(n.target) and n.guards + ] + assert len(unit_nodes) > 0, "Expected variant-gated vehicle unit nodes" + + for node in unit_nodes: + inner = _element_guard(node) + assert inner is not None, ( + f"{node.target}: inner discriminator should be element-level, " + f"got guards {node.guards}" + ) + assert inner.discriminator == "dimension", ( + f"{node.target}: inner discriminator should be 'dimension', " + f"got {inner.discriminator!r}" + ) + + # Variant-specific fields (speed_limits, prohibited_transitions) + # also need the outer subtype guard. + speed_unit_nodes = [n for n in unit_nodes if "speed_limits" in str(n.target)] + for node in speed_unit_nodes: + outer = _column_guard(node) + assert outer is not None, f"{node.target}: missing outer subtype guard" + assert outer.discriminator == "subtype", ( + f"{node.target}: outer discriminator should be 'subtype', " + f"got {outer.discriminator!r}" + ) + + def test_segment_vehicle_selector_field_checks( + self, field_nodes: list[Check] + ) -> None: + """VehicleSelector fields appear with correct nesting.""" + vehicle_nodes = [n for n in field_nodes if "vehicle[]" in str(n.target)] + assert len(vehicle_nodes) > 0 + + dim_nodes = [n for n in vehicle_nodes if "dimension" in str(n.target)] + assert any("speed_limits" in str(n.target) for n in dim_nodes) + assert any("access_restrictions" in str(n.target) for n in dim_nodes) + + for node in dim_nodes: + assert isinstance(node.target, ArrayPath) + # vehicle[] is nested inside an outer array (speed_limits, etc.), + # so the struct nav to `dimension` lands in the target's leaf. + assert len(node.target.leaf) >= 1 + + def test_segment_vehicle_selector_exclusivity( + self, model_nodes: list[ModelCheck] + ) -> None: + """VehicleSelector produces forbid_if/require_if for unit field.""" + vehicle_forbid = [ + n + for n in _filter_nodes(model_nodes, "check_forbid_if") + if "unit" in n.descriptor.field_names and isinstance(n.target, ArrayPath) + ] + assert len(vehicle_forbid) > 0 + + vehicle_require = [ + n + for n in _filter_nodes(model_nodes, "check_require_if") + if "unit" in n.descriptor.field_names and isinstance(n.target, ArrayPath) + ] + assert len(vehicle_require) > 0 + + def test_segment_vehicle_selector_exclusivity_has_inner_levels( + self, model_nodes: list[ModelCheck] + ) -> None: + """VehicleSelector exclusivity checks use nested geometry to reach vehicle[].""" + vehicle_constraint_nodes = [ + n + for n in _filter_nodes(model_nodes, ("check_forbid_if", "check_require_if")) + if "unit" in n.descriptor.field_names and isinstance(n.target, ArrayPath) + ] + for node in vehicle_constraint_nodes: + assert isinstance(node.target, ArrayPath) + # The target reaches the inner vehicle[] via a second iteration: + # one inner level navigating `when` to the `vehicle` array. + iter_paths = node.target.iter_struct_paths + assert len(iter_paths) == 1 + assert "when" in iter_paths[0] + assert "vehicle" in iter_paths[0] + + +class _InnerBase(BaseModel): + kind: str + + +class _InnerA(_InnerBase): + kind: Literal["a"] = "a" + a_field: str + + +class _InnerB(_InnerBase): + kind: Literal["b"] = "b" + b_field: int = Field(ge=0) + + +_InnerUnion = Annotated[ + _InnerA | _InnerB, + Field(discriminator="kind"), +] + + +class _Wrapper(BaseModel): + items: list[_InnerUnion] + + +class TestUnionInsideArray: + """UNION-kind fields nested inside list[] produce variant-gated checks.""" + + @pytest.fixture(scope="class") + def results(self) -> tuple[list[Check], list[ModelCheck]]: + return build_checks(feature_spec_for_model(_Wrapper)) + + @pytest.fixture(scope="class") + def field_nodes(self, results: tuple[list[Check], list[ModelCheck]]) -> list[Check]: + return results[0] + + @pytest.fixture(scope="class") + def model_nodes( + self, results: tuple[list[Check], list[ModelCheck]] + ) -> list[ModelCheck]: + return results[1] + + @pytest.fixture(scope="class") + def a_nodes(self, field_nodes: list[Check]) -> list[Check]: + return [n for n in field_nodes if n.target == _path("items[].a_field")] + + @pytest.fixture(scope="class") + def b_nodes(self, field_nodes: list[Check]) -> list[Check]: + return [n for n in field_nodes if n.target == _path("items[].b_field")] + + def test_a_field_check_produced(self, a_nodes: list[Check]) -> None: + assert len(a_nodes) >= 1 + + def test_a_field_is_array_shape(self, a_nodes: list[Check]) -> None: + assert isinstance(a_nodes[0].target, ArrayPath) + + def test_a_field_target_is_items(self, a_nodes: list[Check]) -> None: + assert a_nodes[0].target == _path("items[].a_field") + + def test_a_field_guard(self, a_nodes: list[Check]) -> None: + assert a_nodes[0].guards == (ElementGuard(discriminator="kind", values=("a",)),) + + def test_a_nodes_have_array_shape(self, a_nodes: list[Check]) -> None: + assert all(isinstance(n.target, ArrayPath) for n in a_nodes) + + def test_b_field_check_produced(self, b_nodes: list[Check]) -> None: + assert len(b_nodes) >= 1 + + def test_b_field_guard(self, b_nodes: list[Check]) -> None: + assert b_nodes[0].guards == (ElementGuard(discriminator="kind", values=("b",)),) + + def test_b_nodes_have_array_shape(self, b_nodes: list[Check]) -> None: + assert all(isinstance(n.target, ArrayPath) for n in b_nodes) + + def test_forbid_nodes_produced(self, model_nodes: list[ModelCheck]) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if") + assert len(forbid_nodes) > 0 + + def test_forbid_nodes_have_array_column_path( + self, model_nodes: list[ModelCheck] + ) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if") + for node in forbid_nodes: + assert node.target == _path("items[]") + + def test_require_if_model_nodes_have_array_column_path( + self, model_nodes: list[ModelCheck] + ) -> None: + require_nodes = _filter_nodes(model_nodes, "check_require_if") + for node in require_nodes: + assert node.target == _path("items[]") + + +class TestTopLevelUnionColumnPath: + """Top-level union (not inside array) exclusivity nodes have column_path=None.""" + + @pytest.fixture(scope="class") + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes("Synthetic", _SyntheticUnionFixtures.SyntheticUnion) + + def test_forbid_if_column_path_is_none(self, model_nodes: list[ModelCheck]) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if") + assert len(forbid_nodes) > 0 + for node in forbid_nodes: + assert node.target == ScalarPath() + + def test_require_if_column_path_is_none( + self, model_nodes: list[ModelCheck] + ) -> None: + require_nodes = _filter_nodes(model_nodes, "check_require_if") + for node in require_nodes: + assert node.target == ScalarPath() + + +class _ListUnionContainer(BaseModel): + """Top-level list of a discriminated union. + + The variant fields live inside each list element, so variant gating + must reference the element-level discriminator (`el["kind"]`), not a + top-level column (`F.col("kind")`). + """ + + items: list[_SyntheticUnionFixtures.SyntheticUnion] + + +class TestTopLevelListUnion: + """Field-level checks for `list[DiscriminatedUnion]` at the feature root. + + Regression test: the discriminator must be flagged as element-level so + the renderer accesses `el["kind"]` rather than `F.col("kind")`. + """ + + @pytest.fixture() + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_ListUnionContainer) + return nodes + + def test_variant_field_uses_element_level_discriminator( + self, nodes: list[Check] + ) -> None: + for variant_field in ("a_field", "b_field"): + variant_nodes = [n for n in nodes if variant_field in str(n.target)] + assert variant_nodes, f"Expected variant-gated {variant_field} nodes" + for node in variant_nodes: + guard = _element_guard(node) + assert guard is not None, ( + f"{node.target}: list[Union] descendants must use the " + "element-level discriminator" + ) + assert guard.discriminator == "kind", ( + f"{node.target}: discriminator should be 'kind'" + ) + + +class _NestedListUnionContainer(BaseModel): + """Top-level `list[list[DiscriminatedUnion]]` with a constrained member. + + A union nested under multiple list layers would need the union + target to record `list_depth` iterations, but the rebase in + `_recurse_into_union` records only one. No real schema exercises + this path; `build_checks` raises rather than emit a target that + silently drops iterations. + """ + + nested: list[list[_SyntheticUnionFixtures.ConstrainedUnion]] + + +class TestNestedListUnionModelConstraints: + """`list[list[Union]]` raises rather than emit a collapsed target.""" + + def test_build_checks_raises_not_implemented(self) -> None: + with pytest.raises(NotImplementedError, match="multiple list layers"): + _checks_for(_NestedListUnionContainer) + + +class _DeepInnerModel(BaseModel): + value: Annotated[str, Field(min_length=1)] + + +class _DoubleNestedArrayModel(BaseModel): + items: list[list[_DeepInnerModel]] + + +class TestDoubleNestedArrayFieldChecks: + """Sub-field validation for list[list[Model]] (list_depth=2).""" + + @pytest.fixture() + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_DoubleNestedArrayModel) + return nodes + + def test_subfield_target_encodes_both_array_levels( + self, nodes: list[Check] + ) -> None: + # A `list[list[Model]]` sub-field reaches `value` through a single + # ArraySegment with iter_count=2; the target pins the full geometry. + assert any(n.target == _path("items[][].value") for n in nodes) + + +class TestTripleNestedArrayFieldChecks: + """Verify depth=3 nesting generates correct geometry.""" + + @pytest.fixture() + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(TripleNestedArrayModel) + return nodes + + def test_subfield_target_shows_three_brackets(self, nodes: list[Check]) -> None: + assert any(n.target == _path("deep[][][].tag") for n in nodes) + + +class _NestedScalarListModel(BaseModel): + """list[list[scalar]] terminating directly in a constrained scalar. + + Exercises the one nested-array geometry the other tests miss: an + element-level check whose target's terminal ArraySegment carries + iter_count > 1 with no struct leaf after it (`grid[][]`, not + `grid[][].field`). + """ + + grid: list[list[Annotated[str, MinLen(1)]]] + + +class TestNestedScalarListTarget: + """Element-level check on list[list[scalar]] targets a bare `field[][]`.""" + + def test_terminal_target_carries_iter_count_two(self) -> None: + nodes, _ = _checks_for(_NestedScalarListModel) + node = _node_for(nodes, "grid[][]", "check_string_min_length") + target = node.target + assert isinstance(target, ArrayPath) + last = target.segments[-1] + assert isinstance(last, ArraySegment) + assert last.name == "grid" + assert last.iter_count == 2 + + +class TestPrimitiveBoundsFiltered: + """Constraints inherent to primitive numeric types are filtered out.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + """Field with int32-inherent and layered bounds.""" + shape = Primitive( + base_type="int32", + constraints=( + # Layered by schema author + ConstraintSource( + source_ref=None, source_name="FeatureVersion", constraint=Ge(ge=0) + ), + # Inherent to int32 + ConstraintSource( + source_ref=None, source_name="int32", constraint=Ge(ge=-(2**31)) + ), + ConstraintSource( + source_ref=None, source_name="int32", constraint=Le(le=2**31 - 1) + ), + ), + ) + field = FieldSpec( + name="version", shape=shape, description=None, is_required=True + ) + spec = ModelSpec(name="Test", description=None, fields=[field]) + nodes, _ = build_checks(spec) + return nodes + + def test_layered_bound_survives(self, nodes: list[Check]) -> None: + descs = nodes[0].descriptors + bounds = [d for d in descs if d.function == "check_bounds"] + assert len(bounds) == 1 + assert dict(bounds[0].kwargs) == {"ge": 0} + + def test_primitive_bounds_excluded(self, nodes: list[Check]) -> None: + descs = nodes[0].descriptors + bounds = [d for d in descs if d.function == "check_bounds"] + for b in bounds: + d = dict(b.kwargs) + assert d.get("ge") != -(2**31) + assert d.get("le") != 2**31 - 1 diff --git a/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py b/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py new file mode 100644 index 000000000..2cfd6a676 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py @@ -0,0 +1,385 @@ +"""Tests for pyspark constraint dispatch.""" + +import pytest +from annotated_types import Ge, Gt, Interval, Le, Lt +from overture.schema.codegen.extraction.field import Primitive +from overture.schema.codegen.extraction.length_constraints import ( + ArrayMaxLen, + ArrayMinLen, + ScalarMaxLen, + ScalarMinLen, +) +from overture.schema.codegen.extraction.specs import FieldSpec +from overture.schema.codegen.pyspark.constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + MinFieldsSet, + RadioGroup, + RequireAnyOf, + RequireIf, + dispatch_base_type, + dispatch_constraint, + dispatch_model_constraint, + dispatch_newtype, + model_constraint_function, +) +from overture.schema.system.field_constraint.collection import UniqueItemsConstraint +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + JsonPointerConstraint, + PatternConstraint, + SnakeCaseConstraint, + StrippedConstraint, +) +from overture.schema.system.model_constraint import ( + FieldEqCondition, + ForbidIfConstraint, + MinFieldsSetConstraint, + NoExtraFieldsConstraint, + Not, + RadioGroupConstraint, + RequireAnyOfConstraint, + RequireIfConstraint, +) +from overture.schema.system.primitive import GeometryType, GeometryTypeConstraint +from overture.schema.system.ref import Identified, Reference, Relationship +from pydantic import Strict + + +class _Stub(Identified): + pass + + +class TestBoundsDispatch: + @pytest.mark.parametrize( + ("constraint", "expected_kwargs"), + [ + (Ge(ge=0), (("ge", 0),)), + (Gt(gt=0), (("gt", 0),)), + (Le(le=100), (("le", 100),)), + (Lt(lt=100), (("lt", 100),)), + (Interval(ge=0, le=1), (("ge", 0), ("le", 1))), + (Interval(ge=0), (("ge", 0),)), + ], + ) + def test_bound_dispatches_to_check_bounds( + self, constraint: object, expected_kwargs: tuple[tuple[str, object], ...] + ) -> None: + desc = dispatch_constraint(constraint) + assert desc is not None + assert desc.function == "check_bounds" + assert desc.kwargs == expected_kwargs + + def test_int_bounds_coerced_to_float_for_float_type(self) -> None: + """Integer bound values become float when the field is a float type.""" + desc = dispatch_constraint(Ge(ge=0), base_type="float64") + assert desc is not None + assert desc.kwargs == (("ge", 0.0),) + assert isinstance(dict(desc.kwargs)["ge"], float) + + def test_int_bounds_preserved_for_int_type(self) -> None: + desc = dispatch_constraint(Ge(ge=0), base_type="int32") + assert desc is not None + assert desc.kwargs == (("ge", 0),) + assert isinstance(dict(desc.kwargs)["ge"], int) + + def test_float_bounds_unchanged_for_float_type(self) -> None: + desc = dispatch_constraint(Ge(ge=0.0), base_type="float64") + assert desc is not None + assert desc.kwargs == (("ge", 0.0),) + assert isinstance(dict(desc.kwargs)["ge"], float) + + +class TestLengthDispatch: + def test_min_len_on_array(self) -> None: + desc = dispatch_constraint(ArrayMinLen(min_length=2)) + assert desc == ExpressionDescriptor( + function="check_array_min_length", args=(2,) + ) + + def test_min_len_on_scalar(self) -> None: + desc = dispatch_constraint(ScalarMinLen(min_length=1)) + assert desc == ExpressionDescriptor( + function="check_string_min_length", args=(1,) + ) + + def test_max_len_on_array(self) -> None: + desc = dispatch_constraint(ArrayMaxLen(max_length=10)) + assert desc == ExpressionDescriptor( + function="check_array_max_length", args=(10,) + ) + + def test_max_len_on_scalar(self) -> None: + desc = dispatch_constraint(ScalarMaxLen(max_length=10)) + assert desc == ExpressionDescriptor( + function="check_string_max_length", args=(10,) + ) + + +class TestStringConstraintDispatch: + def test_stripped(self) -> None: + desc = dispatch_constraint(StrippedConstraint()) + assert desc is not None + assert desc.function == "check_stripped" + + def test_json_pointer(self) -> None: + desc = dispatch_constraint(JsonPointerConstraint()) + assert desc is not None + assert desc.function == "check_json_pointer" + + def test_pattern_constraint_base(self) -> None: + c = PatternConstraint(r"^[A-Z]{2}$", "test error") + desc = dispatch_constraint(c) + assert desc is not None + assert desc.function == "check_pattern" + assert desc.args == (r"^[A-Z]{2}\z",) # anchor normalized + + def test_country_code_dispatches_as_pattern(self) -> None: + c = CountryCodeAlpha2Constraint() + desc = dispatch_constraint(c) + assert desc is not None + assert desc.function == "check_pattern" + assert desc.args == (r"^[A-Z]{2}\z",) # anchor normalized + assert desc.label == "ISO 3166-1 alpha-2 country code" + assert desc.check_name == "country_code_alpha2" + + def test_snake_case_dispatches_as_pattern(self) -> None: + c = SnakeCaseConstraint() + desc = dispatch_constraint(c) + assert desc is not None + assert desc.function == "check_pattern" + assert desc.args == (r"^[a-z0-9]+(_[a-z0-9]+)*\z",) # anchor normalized + assert desc.label == "Category in snake_case format" + assert desc.check_name == "snake_case" + + +class TestPatternConstraintDispatch: + def test_pattern_constraint_label_fallback_to_docstring(self) -> None: + """PatternConstraint with no description falls back to docstring, period stripped.""" + c = PatternConstraint(r"^test$", "error: {value}") + desc = dispatch_constraint(c) + assert desc is not None + # Base PatternConstraint has docstring "Generic pattern-based string constraint." + assert desc.label == "Generic pattern-based string constraint" + + def test_pattern_constraint_check_name_base_class(self) -> None: + c = PatternConstraint(r"^test$", "error: {value}") + desc = dispatch_constraint(c) + assert desc is not None + assert desc.check_name == "pattern" + + def test_anchor_normalized_dollar_to_backslash_z(self) -> None: + c = CountryCodeAlpha2Constraint() # pattern ends with $ + desc = dispatch_constraint(c) + assert desc is not None + pattern = str(desc.args[0]) + assert pattern.endswith(r"\z") + assert not pattern.endswith("$") + + def test_anchor_normalization_replaces_only_trailing_dollar(self) -> None: + """Dollar signs inside character classes are not end-anchors.""" + c = PatternConstraint(r"^[\$]+$", "error: {value}") + desc = dispatch_constraint(c) + assert desc is not None + pattern = str(desc.args[0]) + # The trailing $ is replaced; the \$ inside the class is preserved + assert pattern == r"^[\$]+\z" + + +class TestStructuralConstraintDispatch: + def test_unique_items(self) -> None: + desc = dispatch_constraint(UniqueItemsConstraint()) + assert desc is not None + assert desc.function == "check_struct_unique" + + def test_geometry_type(self) -> None: + c = GeometryTypeConstraint(GeometryType.POINT) + desc = dispatch_constraint(c) + assert desc is not None + assert desc.function == "check_geometry_type" + assert GeometryType.POINT in desc.args + + +class TestSkippedConstraints: + def test_reference_returns_none(self) -> None: + r = Reference(Relationship.AGGREGATION, _Stub) + desc = dispatch_constraint(r) + assert desc is None + + def test_strict_returns_none(self) -> None: + desc = dispatch_constraint(Strict()) + assert desc is None + + +class TestBaseTypeDispatch: + def test_http_url_dispatches_to_check_url_format_and_length(self) -> None: + descs = dispatch_base_type("HttpUrl") + assert descs is not None + assert len(descs) == 2 + assert descs[0].function == "check_url_format" + assert descs[1].function == "check_url_length" + + def test_email_str_dispatches_to_check_email(self) -> None: + descs = dispatch_base_type("EmailStr") + assert descs is not None + assert len(descs) == 1 + assert descs[0].function == "check_email" + + def test_bbox_dispatches_to_three_checks(self) -> None: + descs = dispatch_base_type("BBox") + assert descs is not None + assert len(descs) == 3 + assert descs[0].function == "check_bbox_completeness" + assert descs[1].function == "check_bbox_lat_ordering" + assert descs[2].function == "check_bbox_lat_range" + + def test_unknown_base_type_returns_none(self) -> None: + descs = dispatch_base_type("str") + assert descs is None + + +class TestNewtypeDispatch: + def test_linear_range(self) -> None: + descs = dispatch_newtype("LinearlyReferencedRange") + assert descs is not None + assert len(descs) == 3 + assert descs[0].function == "check_linear_range_length" + assert descs[1].function == "check_linear_range_bounds" + assert descs[2].function == "check_linear_range_order" + + def test_country_code_alpha2_returns_none(self) -> None: + descs = dispatch_newtype("CountryCodeAlpha2") + assert descs is None + + def test_region_code_returns_none(self) -> None: + descs = dispatch_newtype("RegionCode") + assert descs is None + + def test_unknown_newtype_returns_none(self) -> None: + desc = dispatch_newtype("FeatureVersion") + assert desc is None + + +class TestUnknownConstraintFails: + def test_unknown_constraint_raises(self) -> None: + with pytest.raises(TypeError, match="Unhandled constraint"): + dispatch_constraint(object()) + + +class TestModelConstraintDispatch: + def test_require_any_of(self) -> None: + c = RequireAnyOfConstraint("a", "b") + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, RequireAnyOf) + assert model_constraint_function(desc) == "check_require_any_of" + assert desc.field_names == ("a", "b") + + def test_radio_group(self) -> None: + c = RadioGroupConstraint("is_land", "is_territorial") + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, RadioGroup) + assert model_constraint_function(desc) == "check_radio_group" + assert desc.field_names == ("is_land", "is_territorial") + + def test_require_if(self) -> None: + c = RequireIfConstraint( + field_names=("class",), + condition=FieldEqCondition(field_name="subtype", value="road"), + ) + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, RequireIf) + assert model_constraint_function(desc) == "check_require_if" + assert desc.field_names == ("class",) + assert desc.condition is c.condition + + def test_require_if_multi_field_splits(self) -> None: + """Multi-field `@require_if(["a", "b"], cond)` splits into one descriptor per field. + + Each runtime `check_require_if` call takes a single target + column, so the descriptor mirrors that: one per field, sharing + the same condition. + """ + condition = FieldEqCondition(field_name="subtype", value="road") + c = RequireIfConstraint(field_names=("a", "b"), condition=condition) + descs = dispatch_model_constraint(c, []) + assert len(descs) == 2 + assert all(isinstance(d, RequireIf) for d in descs) + assert [d.field_names for d in descs] == [("a",), ("b",)] + assert all(d.condition is condition for d in descs) # type: ignore[union-attr] + + def test_forbid_if(self) -> None: + c = ForbidIfConstraint( + field_names=("class",), + condition=FieldEqCondition(field_name="subtype", value="water"), + ) + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, ForbidIf) + assert model_constraint_function(desc) == "check_forbid_if" + assert desc.field_names == ("class",) + assert desc.field_shapes == () + + def test_forbid_if_negated(self) -> None: + c = ForbidIfConstraint( + field_names=("parent_division_id",), + condition=Not(FieldEqCondition(field_name="subtype", value="country")), + ) + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, ForbidIf) + assert model_constraint_function(desc) == "check_forbid_if" + assert desc.condition is c.condition + + def test_forbid_if_multi_field_splits(self) -> None: + """Multi-field `@forbid_if` splits into one descriptor per field, each with its own shape.""" + condition = FieldEqCondition(field_name="subtype", value="road") + c = ForbidIfConstraint(field_names=("a", "b"), condition=condition) + descs = dispatch_model_constraint(c, []) + assert len(descs) == 2 + assert all(isinstance(d, ForbidIf) for d in descs) + assert [d.field_names for d in descs] == [("a",), ("b",)] + + def test_min_fields_set(self) -> None: + c = MinFieldsSetConstraint(count=1) + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, MinFieldsSet) + assert model_constraint_function(desc) == "check_min_fields_set" + assert desc.count == 1 + assert desc.field_names == () + + def test_min_fields_set_enumerates_all_fields(self) -> None: + """`field_names` holds every field -- required and optional alike. + + Matches Pydantic's `model_fields_set` semantics, where required + fields are always set by the constructor and contribute to the + count alongside any explicitly-set optional fields. + """ + fields = [ + FieldSpec(name="required_a", shape=Primitive(base_type="str")), + FieldSpec( + name="optional_b", + shape=Primitive(base_type="str"), + is_required=False, + ), + FieldSpec(name="required_c", shape=Primitive(base_type="str")), + FieldSpec( + name="optional_d", + shape=Primitive(base_type="str"), + is_required=False, + ), + ] + c = MinFieldsSetConstraint(count=1) + (desc,) = dispatch_model_constraint(c, fields) + assert isinstance(desc, MinFieldsSet) + assert desc.field_names == ( + "required_a", + "optional_b", + "required_c", + "optional_d", + ) + + def test_no_extra_fields_skipped(self) -> None: + c = NoExtraFieldsConstraint() + assert dispatch_model_constraint(c, []) == () + + def test_unknown_model_constraint_raises(self) -> None: + with pytest.raises(TypeError, match="Unhandled model constraint"): + dispatch_model_constraint(object(), []) diff --git a/packages/overture-schema-codegen/tests/test_pyspark_e2e.py b/packages/overture-schema-codegen/tests/test_pyspark_e2e.py new file mode 100644 index 000000000..6b7629ea1 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_e2e.py @@ -0,0 +1,206 @@ +"""End-to-end generation tests: verify generated modules match hand-written references.""" + +import ast +from pathlib import Path +from typing import Annotated, Literal + +import pytest +from annotated_types import Ge +from codegen_test_support import discover_feature +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.codegen.pyspark.pipeline import ( + GeneratedModule, + generate_pyspark_module, +) +from pydantic import BaseModel + + +class SimpleModel(BaseModel): + subtype: Literal["a", "b"] + score: Annotated[float, Ge(0.0)] | None = None + + +class TestDivisionAreaGeneration: + @pytest.fixture + def generated(self) -> GeneratedModule: + spec = discover_feature("DivisionArea") + return generate_pyspark_module(spec) + + def test_generates_valid_python(self, generated: GeneratedModule) -> None: + ast.parse(generated.content) + + def test_has_builder_function(self, generated: GeneratedModule) -> None: + assert "def division_area_checks()" in generated.content + + def test_has_schema_constant(self, generated: GeneratedModule) -> None: + assert "DIVISION_AREA_SCHEMA" in generated.content + + def test_output_path(self, generated: GeneratedModule) -> None: + assert generated.path.name == "division_area.py" + + def test_checks_cover_expected_fields(self, generated: GeneratedModule) -> None: + """Generated checks should cover the fields from the hand-written module.""" + content = generated.content + # Hand-written checks: subtype, class, country, region, radio_group (is_land, is_territorial), admin_level + for field in ["subtype", "class", "country", "region"]: + assert f'field="{field}"' in content, f"Missing check for {field}" + + def test_schema_has_expected_fields(self, generated: GeneratedModule) -> None: + """Schema should contain all expected DivisionArea fields.""" + content = generated.content + expected_fields = [ + "id", + "geometry", + "bbox", + "country", + "version", + "subtype", + "class", + "names", + "is_land", + "is_territorial", + "region", + "admin_level", + "division_id", + "theme", + "type", + ] + for field in expected_fields: + assert f'"{field}"' in content, f"Missing schema field: {field}" + + def test_uses_bbox_shared_struct(self, generated: GeneratedModule) -> None: + """Should reference BBOX_STRUCT from _schema_structs (BBox is not a BaseModel).""" + assert "BBOX_STRUCT" in generated.content + + def test_imports_constraint_expressions(self, generated: GeneratedModule) -> None: + """Should import constraint expression functions.""" + content = generated.content + assert ( + "from overture.schema.pyspark.expressions.constraint_expressions import" + in content + ) + + def test_radio_group_constraint(self, generated: GeneratedModule) -> None: + """Should have a radio_group check for is_land/is_territorial.""" + content = generated.content + assert "check_radio_group" in content + assert "is_land" in content + assert "is_territorial" in content + + def test_subtype_has_check_enum(self, generated: GeneratedModule) -> None: + """Subtype (ENUM-kind field) should produce a check_enum with member values.""" + assert "check_enum" in generated.content + + def test_country_uses_check_pattern(self, generated: GeneratedModule) -> None: + """Country field (required newtype) produces both check_required and check_pattern.""" + assert "check_pattern" in generated.content + # Bug #1 regression: check_required must not be skipped for required newtype fields. + # With split checks, each descriptor produces its own function; both must appear. + assert "check_required" in generated.content + + def test_region_uses_check_pattern(self, generated: GeneratedModule) -> None: + """Region field produces check_pattern with the region-code label.""" + assert "ISO 3166-2 subdivision code" in generated.content + + +@pytest.mark.parametrize( + "class_name,builder_name,schema_name", + [ + ("DivisionArea", "division_area_checks", "DIVISION_AREA_SCHEMA"), + ("Division", "division_checks", "DIVISION_SCHEMA"), + ("DivisionBoundary", "division_boundary_checks", "DIVISION_BOUNDARY_SCHEMA"), + ("Place", "place_checks", "PLACE_SCHEMA"), + ], +) +class TestModelFeatureGeneration: + @pytest.fixture + def generated(self, class_name: str) -> GeneratedModule: + spec = discover_feature(class_name) + return generate_pyspark_module(spec) + + def test_generates_valid_python( + self, + generated: GeneratedModule, + class_name: str, + builder_name: str, + schema_name: str, + ) -> None: + ast.parse(generated.content) + + def test_has_builder_function( + self, + generated: GeneratedModule, + class_name: str, + builder_name: str, + schema_name: str, + ) -> None: + assert f"def {builder_name}()" in generated.content + + def test_has_schema_constant( + self, + generated: GeneratedModule, + class_name: str, + builder_name: str, + schema_name: str, + ) -> None: + assert schema_name in generated.content + + def test_has_shared_bbox_struct( + self, + generated: GeneratedModule, + class_name: str, + builder_name: str, + schema_name: str, + ) -> None: + assert "BBOX_STRUCT" in generated.content + + +class TestSegmentGeneration: + @pytest.fixture + def generated(self) -> GeneratedModule: + spec = discover_feature("Segment") + return generate_pyspark_module(spec) + + def test_generates_valid_python(self, generated: GeneratedModule) -> None: + ast.parse(generated.content) + + def test_has_builder_and_schema(self, generated: GeneratedModule) -> None: + assert "def segment_checks()" in generated.content + assert "SEGMENT_SCHEMA" in generated.content + + def test_has_shared_bbox_struct(self, generated: GeneratedModule) -> None: + assert "BBOX_STRUCT" in generated.content + + def test_has_variant_conditional_checks(self, generated: GeneratedModule) -> None: + """Segment has subtype-gated fields using runtime values like 'road'.""" + assert "F.when" in generated.content + assert "isin" in generated.content + # Variant values must use the runtime string value, not the enum repr + assert '"road"' in generated.content or "'road'" in generated.content + assert "Subtype.ROAD" not in generated.content + + def test_array_discriminator_outside_lambda( + self, generated: GeneratedModule + ) -> None: + """Top-level discriminator must wrap array_check, not appear inside the lambda.""" + # el["subtype"] must never appear — subtype is a top-level column, not an element field + assert 'el["subtype"]' not in generated.content, ( + 'el["subtype"] found — top-level discriminator placed inside array lambda' + ) + # F.col("subtype") must appear as the discriminator reference + assert 'F.col("subtype")' in generated.content + + +def test_cli_writes_init_modules(tmp_path: Path) -> None: + from overture.schema.codegen.cli import _generate_pyspark + + spec = extract_model(SimpleModel, entry_point="overture.schema.simple:SimpleModel") + out = tmp_path / "src" + test_out = tmp_path / "tests" + _generate_pyspark([spec], out, test_out) + assert (out / "overture" / "schema" / "simple" / "__init__.py").exists() + assert (out / "overture" / "schema" / "simple" / "simple_model.py").exists() + assert (test_out / "overture" / "schema" / "simple" / "__init__.py").exists() + assert ( + test_out / "overture" / "schema" / "simple" / "test_simple_model.py" + ).exists() diff --git a/packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py b/packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py new file mode 100644 index 000000000..d2a7811d1 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py @@ -0,0 +1,175 @@ +"""Tests for constraint-violating value generation.""" + +import pytest +from overture.schema.codegen.pyspark.constraint_dispatch import ExpressionDescriptor +from overture.schema.codegen.pyspark.test_data.invalid_value import invalid_value +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + NoWhitespaceConstraint, + RegionCodeConstraint, +) +from overture.schema.system.primitive.geom import GeometryType + + +class TestInvalidValueRequired: + def test_returns_none(self) -> None: + desc = ExpressionDescriptor(function="check_required") + assert invalid_value(desc) is None + + +class TestInvalidValueEnum: + def test_returns_invalid_sentinel(self) -> None: + desc = ExpressionDescriptor(function="check_enum", args=(["a", "b"],)) + assert invalid_value(desc) == "__INVALID__" + + +class TestInvalidValueBounds: + def test_ge(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("ge", 0),)) + assert invalid_value(desc) == -1 + + def test_ge_float(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("ge", 0.0),)) + assert invalid_value(desc) == -1.0 + assert isinstance(invalid_value(desc), float) + + def test_gt(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("gt", 0),)) + assert invalid_value(desc) == 0 + + def test_le(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("le", 100),)) + assert invalid_value(desc) == 101 + + def test_lt(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("lt", 100),)) + assert invalid_value(desc) == 100 + + def test_unknown_bound_raises(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("unknown", 5),)) + with pytest.raises(ValueError): + invalid_value(desc) + + +class TestInvalidValuePattern: + def test_default_pattern(self) -> None: + desc = ExpressionDescriptor(function="check_pattern", args=(r"^[A-Z]+$",)) + assert invalid_value(desc) == "!!!INVALID!!!" + + def test_no_whitespace_pattern(self) -> None: + desc = ExpressionDescriptor( + function="check_pattern", + args=(r"^\S+$",), + constraint_type=NoWhitespaceConstraint, + ) + assert invalid_value(desc) == "has whitespace" + + +class TestInvalidValueStringTypes: + def test_country_code(self) -> None: + desc = ExpressionDescriptor( + function="check_pattern", + constraint_type=CountryCodeAlpha2Constraint, + ) + assert invalid_value(desc) == "99" + + def test_region_code(self) -> None: + desc = ExpressionDescriptor( + function="check_pattern", + constraint_type=RegionCodeConstraint, + ) + assert invalid_value(desc) == "99-999" + + def test_url_format(self) -> None: + desc = ExpressionDescriptor(function="check_url_format") + assert invalid_value(desc) == "not-a-url" + + def test_url_length(self) -> None: + desc = ExpressionDescriptor(function="check_url_length") + assert invalid_value(desc) == "https://" + "x" * 2076 + + def test_email(self) -> None: + desc = ExpressionDescriptor(function="check_email") + assert invalid_value(desc) == "not-an-email" + + def test_stripped(self) -> None: + desc = ExpressionDescriptor(function="check_stripped") + assert invalid_value(desc) == " has spaces " + + def test_json_pointer(self) -> None: + desc = ExpressionDescriptor(function="check_json_pointer") + assert invalid_value(desc) == "no-slash" + + +class TestInvalidValueCollections: + def test_min_length_empty_list(self) -> None: + desc = ExpressionDescriptor(function="check_array_min_length", args=(1,)) + assert invalid_value(desc) == [] + + def test_max_length_oversized(self) -> None: + desc = ExpressionDescriptor(function="check_array_max_length", args=(3,)) + assert invalid_value(desc) == [{}] * 4 + + def test_string_min_length_empty_string(self) -> None: + desc = ExpressionDescriptor(function="check_string_min_length", args=(1,)) + assert invalid_value(desc) == "" + + def test_string_max_length_oversized_string(self) -> None: + desc = ExpressionDescriptor(function="check_string_max_length", args=(3,)) + assert invalid_value(desc) == "x" * 4 + + +class TestInvalidValueLinearRange: + def test_linear_range_length(self) -> None: + desc = ExpressionDescriptor(function="check_linear_range_length") + assert invalid_value(desc) == [0.5] + + def test_linear_range_bounds(self) -> None: + desc = ExpressionDescriptor(function="check_linear_range_bounds") + assert invalid_value(desc) == [1.5, 2.0] + + def test_linear_range_order(self) -> None: + desc = ExpressionDescriptor(function="check_linear_range_order") + assert invalid_value(desc) == [0.8, 0.2] + + +class TestInvalidValueGeometry: + def test_point_not_allowed_picks_point(self) -> None: + # Allowed: polygon only → first candidate (POINT) not in allowed set + desc = ExpressionDescriptor( + function="check_geometry_type", args=(GeometryType.POLYGON,) + ) + assert invalid_value(desc) == "POINT (0 0)" + + def test_point_allowed_picks_linestring(self) -> None: + desc = ExpressionDescriptor( + function="check_geometry_type", + args=(GeometryType.POINT, GeometryType.POLYGON), + ) + assert invalid_value(desc) == "LINESTRING (0 0, 1 1)" + + def test_point_and_linestring_allowed_picks_collection(self) -> None: + desc = ExpressionDescriptor( + function="check_geometry_type", + args=(GeometryType.POINT, GeometryType.LINE_STRING), + ) + assert invalid_value(desc) == "GEOMETRYCOLLECTION EMPTY" + + def test_all_candidates_allowed_raises(self) -> None: + desc = ExpressionDescriptor( + function="check_geometry_type", + args=( + GeometryType.POINT, + GeometryType.LINE_STRING, + GeometryType.GEOMETRY_COLLECTION, + ), + ) + with pytest.raises(ValueError): + invalid_value(desc) + + +class TestInvalidValueUnknown: + def test_unknown_function_raises(self) -> None: + desc = ExpressionDescriptor(function="check_something_unknown") + with pytest.raises(ValueError): + invalid_value(desc) diff --git a/packages/overture-schema-codegen/tests/test_pyspark_pipeline.py b/packages/overture-schema-codegen/tests/test_pyspark_pipeline.py new file mode 100644 index 000000000..95201a09b --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_pipeline.py @@ -0,0 +1,391 @@ +"""Tests for the PySpark generation pipeline.""" + +import ast +from pathlib import PurePosixPath +from typing import Annotated, Literal + +import pytest +from annotated_types import Ge +from codegen_test_support import find_theme, partitions_from_tags +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.codegen.extraction.specs import ( + FeatureSpec, + is_model_class, + is_union_alias, +) +from overture.schema.codegen.extraction.union_extraction import extract_union +from overture.schema.codegen.layout.module_layout import entry_point_class +from overture.schema.codegen.pyspark.check_ir import Check +from overture.schema.codegen.pyspark.constraint_dispatch import ExpressionDescriptor +from overture.schema.codegen.pyspark.pipeline import ( + GeneratedModule, + PipelineOutput, + _extract_geometry_types, + generate_pyspark_module, + generate_pyspark_modules, +) +from overture.schema.system.field_path import ScalarPath +from overture.schema.system.primitive import GeometryType +from pydantic import BaseModel + + +class SimpleModel(BaseModel): + subtype: Literal["a", "b"] + score: Annotated[float, Ge(0.0)] | None = None + + +class BoundsModel(BaseModel): + value: Annotated[float, Ge(0.0)] + + +class TestGeneratePysparkModule: + @pytest.fixture + def simple_module(self) -> GeneratedModule: + return generate_pyspark_module( + extract_model(SimpleModel, entry_point="overture.schema.simple:SimpleModel") + ) + + def test_returns_generated_module(self, simple_module: GeneratedModule) -> None: + assert isinstance(simple_module, GeneratedModule) + + def test_content_is_nonempty(self, simple_module: GeneratedModule) -> None: + assert simple_module.content + + def test_content_is_valid_python(self, simple_module: GeneratedModule) -> None: + ast.parse(simple_module.content) + + def test_path_uses_snake_case_feature_name( + self, simple_module: GeneratedModule + ) -> None: + assert simple_module.path == PurePosixPath( + "overture/schema/simple/simple_model.py" + ) + + def test_path_for_bounds_model(self) -> None: + result = generate_pyspark_module( + extract_model(BoundsModel, entry_point="overture.schema.bounds:BoundsModel") + ) + assert result.path == PurePosixPath("overture/schema/bounds/bounds_model.py") + + def test_content_contains_checks_function( + self, simple_module: GeneratedModule + ) -> None: + assert "simple_model_checks" in simple_module.content + + def test_content_contains_schema_constant( + self, simple_module: GeneratedModule + ) -> None: + assert "SIMPLE_MODEL_SCHEMA" in simple_module.content + + +def _two_specs() -> list[FeatureSpec]: + return [ + extract_model(SimpleModel, entry_point="overture.schema.simple:SimpleModel"), + extract_model(BoundsModel, entry_point="overture.schema.bounds:BoundsModel"), + ] + + +def _features(modules: list[GeneratedModule]) -> list[GeneratedModule]: + return [m for m in modules if m.path.name != "__init__.py"] + + +class TestGeneratePysparkModules: + @pytest.fixture + def two_spec_modules(self) -> PipelineOutput: + return generate_pyspark_modules(_two_specs()) + + def test_empty_specs_returns_no_modules(self) -> None: + result = generate_pyspark_modules([]) + assert result.source == [] + assert result.test == [] + + def test_one_module_per_spec(self, two_spec_modules: PipelineOutput) -> None: + assert len(_features(two_spec_modules.source)) == 2 + + def test_paths_unique_per_tree(self, two_spec_modules: PipelineOutput) -> None: + # source and test trees mirror the same dirs; uniqueness is + # only required within each tree, not across them. + for tree in (two_spec_modules.source, two_spec_modules.test): + paths = [m.path for m in tree] + assert len(paths) == len(set(paths)) + + def test_all_content_is_valid_python( + self, two_spec_modules: PipelineOutput + ) -> None: + for mod in (*two_spec_modules.source, *two_spec_modules.test): + ast.parse(mod.content) + + def test_divisions_theme_produces_division_area( + self, all_discovered_models: dict + ) -> None: + """divisions theme should produce a division_area.py module.""" + division_specs: list[FeatureSpec] = [] + for key, entry in all_discovered_models.items(): + if find_theme(key.tags) != "divisions": + continue + partitions = partitions_from_tags(key.tags) + if is_model_class(entry): + division_specs.append( + extract_model( + entry, entry_point=key.entry_point, partitions=partitions + ) + ) + elif is_union_alias(entry): + division_specs.append( + extract_union( + entry_point_class(key.entry_point), + entry, + entry_point=key.entry_point, + partitions=partitions, + ) + ) + + results = generate_pyspark_modules(division_specs) + names = {r.path.stem for r in results.source} + assert "division_area" in names + + +class TestTestModuleGeneration: + @pytest.fixture + def all_modules(self) -> PipelineOutput: + return generate_pyspark_modules(_two_specs()) + + def test_generates_test_modules(self, all_modules: PipelineOutput) -> None: + assert len(_features(all_modules.test)) == 2 # one per feature spec + + def test_test_module_paths(self, all_modules: PipelineOutput) -> None: + paths = {m.path.name for m in _features(all_modules.test)} + assert "test_simple_model.py" in paths + assert "test_bounds_model.py" in paths + + def test_test_modules_are_valid_python(self, all_modules: PipelineOutput) -> None: + for mod in all_modules.test: + ast.parse(mod.content) + + def test_test_module_contains_imports(self, all_modules: PipelineOutput) -> None: + for mod in _features(all_modules.test): + assert "_support.harness import" in mod.content + assert "_support.scenarios import" in mod.content + + +def _extract_scenarios_block(content: str) -> str: + """Extract the SCENARIOS list literal from generated test source.""" + start = content.index("SCENARIOS:") + end = content.index("]", start) + 1 + return content[start:end] + + +class TestPerArmTestGeneration: + """Union features with multiple examples produce per-arm test modules.""" + + @pytest.fixture + def segment_modules(self, all_discovered_models: dict) -> PipelineOutput: + specs: list[FeatureSpec] = [] + for key, entry in all_discovered_models.items(): + if key.name != "segment": + continue + if is_union_alias(entry): + specs.append( + extract_union( + entry_point_class(key.entry_point), + entry, + entry_point=key.entry_point, + partitions=partitions_from_tags(key.tags), + ) + ) + return generate_pyspark_modules(specs) + + def test_produces_per_arm_test_files(self, segment_modules: PipelineOutput) -> None: + paths = {m.path.name for m in _features(segment_modules.test)} + assert "test_segment_road.py" in paths + assert "test_segment_rail.py" in paths + + def test_no_monolithic_test_file(self, segment_modules: PipelineOutput) -> None: + """When per-arm tests exist, no undifferentiated test_segment.py.""" + paths = {m.path.name for m in _features(segment_modules.test)} + assert "test_segment.py" not in paths + + def test_per_arm_modules_are_valid_python( + self, segment_modules: PipelineOutput + ) -> None: + for mod in segment_modules.test: + ast.parse(mod.content) + + def test_road_module_has_road_checks(self, segment_modules: PipelineOutput) -> None: + road = next( + m for m in segment_modules.test if m.path.name == "test_segment_road.py" + ) + assert "road_surface" in road.content + + def test_rail_module_has_rail_checks(self, segment_modules: PipelineOutput) -> None: + rail = next( + m for m in segment_modules.test if m.path.name == "test_segment_rail.py" + ) + assert "rail_flags" in rail.content + + def test_road_module_no_rail_field_scenarios( + self, segment_modules: PipelineOutput + ) -> None: + road = next( + m for m in segment_modules.test if m.path.name == "test_segment_road.py" + ) + scenarios = _extract_scenarios_block(road.content) + assert "rail_flags[].values" not in scenarios + + def test_rail_module_no_road_field_scenarios( + self, segment_modules: PipelineOutput + ) -> None: + rail = next( + m for m in segment_modules.test if m.path.name == "test_segment_rail.py" + ) + scenarios = _extract_scenarios_block(rail.content) + assert "road_surface" not in scenarios + + def test_non_union_still_gets_single_test(self) -> None: + """Non-union features produce a single test module (unchanged).""" + modules = generate_pyspark_modules( + [ + extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + ] + ) + tests = _features(modules.test) + assert len(tests) == 1 + assert tests[0].path.name == "test_simple_model.py" + + +class TestNestedSourcePaths: + def test_module_path_mirrors_entry_point(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + features = _features(modules.source) + assert len(features) == 1 + assert features[0].path == PurePosixPath( + "overture/schema/simple/simple_model.py" + ) + + def test_two_packages_no_collision(self) -> None: + a = extract_model(SimpleModel, entry_point="overture.schema.places:Place") + b = extract_model(SimpleModel, entry_point="annex.schema.places:Place") + modules = generate_pyspark_modules([a, b]) + paths = {m.path for m in _features(modules.source)} + assert PurePosixPath("overture/schema/places/place.py") in paths + assert PurePosixPath("annex/schema/places/place.py") in paths + + +_EXPECTED_INIT_PATHS = { + PurePosixPath("__init__.py"), + PurePosixPath("overture/__init__.py"), + PurePosixPath("overture/schema/__init__.py"), + PurePosixPath("overture/schema/simple/__init__.py"), +} + + +def _init_paths(modules: list[GeneratedModule]) -> set[PurePosixPath]: + return {m.path for m in modules if m.path.name == "__init__.py"} + + +class TestInitModuleEmission: + def test_intermediate_dirs_get_init_modules(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + assert _init_paths(modules.source) == _EXPECTED_INIT_PATHS + + def test_init_modules_are_empty(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + init = next(m for m in modules.source if m.path.name == "__init__.py") + assert init.content == "" + + def test_shared_dirs_emitted_once(self) -> None: + a = extract_model(SimpleModel, entry_point="overture.schema.simple:SimpleModel") + b = extract_model(BoundsModel, entry_point="overture.schema.simple:BoundsModel") + modules = generate_pyspark_modules([a, b]) + init_paths = [m.path for m in modules.source if m.path.name == "__init__.py"] + assert len(init_paths) == len(set(init_paths)) + + +class TestNoRegistryEmitted: + def test_registry_module_is_no_longer_generated(self) -> None: + # The runtime builds the registry via entry-point discovery; codegen + # must not emit `_registry.py`. + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + for tree in (modules.source, modules.test): + assert all(m.path.name != "_registry.py" for m in tree) + + +class TestNestedTestPaths: + def test_test_module_path_mirrors_source(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + tests = _features(modules.test) + assert len(tests) == 1 + assert tests[0].path == PurePosixPath( + "overture/schema/simple/test_simple_model.py" + ) + + def test_test_module_imports_nested_expression(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + test_mod = next(iter(_features(modules.test))) + assert ( + "from overture.schema.pyspark.expressions.generated.overture.schema.simple.simple_model import" + in test_mod.content + ) + + def test_test_dirs_get_init_modules(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + # Source-tree init modules already covered in TestInitModuleEmission. + # The test tree must mirror the same package layout. + assert _init_paths(modules.test) == _EXPECTED_INIT_PATHS + + +class TestExtractGeometryTypes: + """`_extract_geometry_types` aggregates across descriptors and checks.""" + + def test_aggregates_across_descriptors(self) -> None: + checks = [ + Check( + descriptors=( + ExpressionDescriptor( + function="check_geometry_type", + args=(GeometryType.POINT,), + ), + ), + target=ScalarPath(), + ), + Check( + descriptors=( + ExpressionDescriptor( + function="check_geometry_type", + args=(GeometryType.POLYGON, GeometryType.LINE_STRING), + ), + ), + target=ScalarPath(), + ), + ] + assert _extract_geometry_types(checks) == ( + GeometryType.LINE_STRING, + GeometryType.POINT, + GeometryType.POLYGON, + ) + + def test_returns_empty_when_absent(self) -> None: + assert _extract_geometry_types([]) == () diff --git a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py new file mode 100644 index 000000000..42775be41 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py @@ -0,0 +1,1097 @@ +"""Tests for pyspark feature module renderer.""" + +import ast +import re +from enum import Enum +from typing import Annotated, Literal, Union + +import pytest +from annotated_types import Ge, MinLen +from codegen_test_support import ( + LiteralSubtypeModel, + RadioModel, + RequireAnyModel, + TripleNestedArrayModel, + feature_spec_for_model, +) +from overture.schema.codegen.pyspark._render_common import jinja_env +from overture.schema.codegen.pyspark.check_builder import build_checks +from overture.schema.codegen.pyspark.check_ir import ( + Check, + ColumnGuard, + ElementGuard, + ModelCheck, +) +from overture.schema.codegen.pyspark.constraint_dispatch import ( + ExpressionDescriptor, + RequireAnyOf, +) +from overture.schema.codegen.pyspark.renderer import ( + _render_check_function_context, + _render_model_constraint_function_context, + render_feature_module, +) +from overture.schema.codegen.pyspark.schema_builder import build_schema +from overture.schema.system.field_path import ( + parse, +) +from overture.schema.system.model_constraint import ( + FieldEqCondition, + Not, + forbid_if, + require_any_of, + require_if, +) +from overture.schema.system.primitive import ( + Geometry, + GeometryType, + GeometryTypeConstraint, +) +from overture.schema.system.string import CountryCodeAlpha2 +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +_path = parse + + +class BoundsModel(BaseModel): + score: Annotated[float, Ge(0.0)] + + +class ArrayModel(BaseModel): + tags: Annotated[list[str], MinLen(1)] + + +class InnerModel(BaseModel): + value: str + + +class NestedArrayModel(BaseModel): + items: list[InnerModel] | None = None + + +# list[Annotated[float, Ge(0.0)]] produces ARRAY-shape nodes because +# check_bounds is an element-level function (not in _COLUMN_LEVEL_FUNCTIONS). +class FloatListModel(BaseModel): + scores: list[Annotated[float, Ge(0.0)]] | None = None + + +def _render(model_cls: type[BaseModel], name: str = "simple") -> str: + spec = feature_spec_for_model(model_cls) + field_checks, model_checks = build_checks(spec) + schema_fields = build_schema(spec) + return render_feature_module(name, field_checks, model_checks, schema_fields) + + +def _render_check_function_string(ctx: dict[str, object]) -> str: + """Render a single check function context to source via the Jinja macro.""" + template = jinja_env().get_template("_check_function.py.jinja2") + return str(template.module.check_function(c=ctx)) # type: ignore[attr-defined] + + +def _render_check_function( + check: Check, func_name: str, descriptor_idx: int = 0 +) -> str: + """Render a per-field check function source from a Check.""" + ctx = _render_check_function_context(check, func_name, descriptor_idx) + return _render_check_function_string(ctx) + + +def _render_node(check: Check) -> str: + """Render a single Check to its function source.""" + return _render_check_function(check, "_test_check", descriptor_idx=0) + + +def _render_model_node(check: ModelCheck) -> str: + """Render a single ModelCheck to its function source.""" + ctx = _render_model_constraint_function_context(check, 0, "") + return _render_check_function_string(ctx) + + +@pytest.fixture(scope="module") +def literal_subtype_source() -> str: + """Rendered `LiteralSubtypeModel` source (default `simple` feature name). + + Module-scoped so the extraction+render cost is paid once for all + consumers in this file. + """ + return _render(LiteralSubtypeModel) + + +class TestParseable: + def test_renders_parseable_python(self, literal_subtype_source: str) -> None: + ast.parse(literal_subtype_source) + + def test_bounds_model_parseable(self) -> None: + source = _render(BoundsModel) + ast.parse(source) + + def test_array_model_parseable(self) -> None: + source = _render(ArrayModel) + ast.parse(source) + + def test_nested_array_model_parseable(self) -> None: + source = _render(NestedArrayModel) + ast.parse(source) + + def test_radio_model_parseable(self) -> None: + source = _render(RadioModel, "radio") + ast.parse(source) + + def test_require_any_model_parseable(self) -> None: + source = _render(RequireAnyModel, "require_any") + ast.parse(source) + + def test_depth_3_renders_valid_python(self) -> None: + source = _render(TripleNestedArrayModel, "triple") + ast.parse(source) + assert "nested_array_check(" in source + + +class TestBuilderFunction: + def test_contains_builder_function(self, literal_subtype_source: str) -> None: + assert "def simple_checks()" in literal_subtype_source + + def test_builder_returns_list_check(self, literal_subtype_source: str) -> None: + assert "list[Check]" in literal_subtype_source + + def test_builder_name_uses_feature_name(self) -> None: + source = _render(LiteralSubtypeModel, "my_feature") + assert "def my_feature_checks()" in source + + +class TestSchemaConstant: + def test_contains_schema_constant(self, literal_subtype_source: str) -> None: + assert "SIMPLE_SCHEMA" in literal_subtype_source + + def test_schema_constant_name_uppercased(self) -> None: + source = _render(LiteralSubtypeModel, "my_feature") + assert "MY_FEATURE_SCHEMA" in source + + def test_contains_struct_type(self, literal_subtype_source: str) -> None: + assert "StructType" in literal_subtype_source + + def test_contains_struct_field(self, literal_subtype_source: str) -> None: + assert "StructField" in literal_subtype_source + + def test_shared_struct_ref_emits_struct_field(self) -> None: + """Shared struct refs (BBOX_STRUCT) render as the type of a StructField.""" + from overture.schema.codegen.pyspark.schema_builder import SchemaField + + schema_fields = [SchemaField(name="bbox", type_expr="BBOX_STRUCT")] + source = render_feature_module("simple", [], [], schema_fields) + assert 'StructField("bbox", BBOX_STRUCT, True)' in source + + +class TestGeometryTypes: + """`GEOMETRY_TYPES` constant emission for runtime discovery.""" + + def test_omitted_when_empty(self, literal_subtype_source: str) -> None: + assert "GEOMETRY_TYPES" not in literal_subtype_source + + def test_emitted_when_provided(self) -> None: + spec = feature_spec_for_model(LiteralSubtypeModel) + field_nodes, model_nodes = build_checks(spec) + schema_fields = build_schema(spec) + source = render_feature_module( + "simple", + field_nodes, + model_nodes, + schema_fields, + geometry_types=(GeometryType.POINT,), + ) + assert ( + "GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.POINT,)" in source + ) + + def test_geometry_type_imported_when_only_constant_needs_it(self) -> None: + # LiteralSubtypeModel has no check_geometry_type constraint, so the + # import is only required because GEOMETRY_TYPES references it. + spec = feature_spec_for_model(LiteralSubtypeModel) + field_nodes, model_nodes = build_checks(spec) + schema_fields = build_schema(spec) + source = render_feature_module( + "simple", + field_nodes, + model_nodes, + schema_fields, + geometry_types=(GeometryType.POINT,), + ) + assert "from overture.schema.system.primitive import GeometryType" in source + + +class TestImports: + def test_imports_pyspark_functions(self, literal_subtype_source: str) -> None: + assert "from pyspark.sql import functions as F" in literal_subtype_source + + def test_imports_check_classes(self, literal_subtype_source: str) -> None: + assert ( + "from overture.schema.pyspark.check import Check, CheckShape" + in literal_subtype_source + ) + + def test_imports_constraint_expressions(self, literal_subtype_source: str) -> None: + assert ( + "from overture.schema.pyspark.expressions.constraint_expressions import" + in literal_subtype_source + ) + + def test_imports_schema_types(self, literal_subtype_source: str) -> None: + # StructType and StructField must appear in the import section (before first def) + first_def = literal_subtype_source.index("\ndef ") + import_section = literal_subtype_source[:first_def] + assert "pyspark.sql.types" in import_section + assert "StructType" in import_section + assert "StructField" in import_section + + def test_imports_array_check_when_needed(self) -> None: + source = _render(FloatListModel, "float_list") + assert "array_check" in source + + def test_no_unused_column_patterns_import_for_simple( + self, literal_subtype_source: str + ) -> None: + # LiteralSubtypeModel has no array fields -- column_patterns import not needed + assert "column_patterns" not in literal_subtype_source + + +class TestPerFieldFunctions: + def test_per_field_function_exists(self, literal_subtype_source: str) -> None: + # With split checks, compound fields produce suffixed names + assert ( + "_subtype_required_check" in literal_subtype_source + or "_subtype_enum_check" in literal_subtype_source + ) + + def test_check_has_name_field(self, literal_subtype_source: str) -> None: + """Rendered Check includes name= derived from constraint function.""" + assert 'name="required"' in literal_subtype_source + assert 'name="enum"' in literal_subtype_source + + def test_no_field_in_check_calls(self, literal_subtype_source: str) -> None: + """check_* calls should not include field string as second arg.""" + # Match pattern: check_xxx(F.col("yyy"), "yyy", ...) — field as 2nd arg + field_arg_pattern = re.compile(r'check_\w+\(F\.col\("[^"]+"\),\s*"[^"]+"') + assert not field_arg_pattern.search(literal_subtype_source) + + def test_scalar_single_descriptor_no_coalesce(self) -> None: + class OptionalBounds(BaseModel): + value: Annotated[float, Ge(0.0)] | None = None + + source = _render(OptionalBounds, "opt") + assert "check_bounds" in source + assert "F.coalesce" not in source + + def test_scalar_multi_descriptor_produces_separate_checks( + self, literal_subtype_source: str + ) -> None: + """SimpleModel.subtype has check_required + check_enum -> two separate functions.""" + assert "F.coalesce" not in literal_subtype_source + assert 'name="required"' in literal_subtype_source + assert 'name="enum"' in literal_subtype_source + + def test_compound_checks_split(self, literal_subtype_source: str) -> None: + """A field with required + enum produces two Check functions, not one coalesced.""" + assert "F.coalesce" not in literal_subtype_source + + def test_array_shape_uses_array_check(self) -> None: + source = _render(FloatListModel, "float_list") + assert "array_check" in source + + def test_field_function_name_sanitized(self) -> None: + # nested field like "items[].value" -> _items_value_check + source = _render(NestedArrayModel) + assert "_items_value_check" in source + + def test_builder_collects_all_checks(self, literal_subtype_source: str) -> None: + # With split checks, both descriptors appear in the builder + assert "_subtype_required_check()" in literal_subtype_source + assert "_subtype_enum_check()" in literal_subtype_source + + +class TestModelConstraintFunctions: + def test_radio_group_check_rendered(self) -> None: + source = _render(RadioModel, "radio") + assert "check_radio_group" in source + + def test_require_any_of_rendered(self) -> None: + source = _render(RequireAnyModel, "require_any") + assert "check_require_any_of" in source + + def test_radio_group_no_context_arg(self) -> None: + """check_radio_group must not receive a context string argument.""" + source = _render(RadioModel, "radio") + # Context arg was the model name, e.g. "RadioModel" — must not appear + assert "'RadioModel'" not in source + + def test_require_any_of_no_context_arg(self) -> None: + """check_require_any_of must not receive a context string argument.""" + source = _render(RequireAnyModel, "require_any") + assert "'RequireAnyModel'" not in source + + def test_model_constraint_imports_function(self) -> None: + source = _render(RadioModel, "radio") + assert "check_radio_group" in source + # imported from constraint_expressions + assert ( + "from overture.schema.pyspark.expressions.constraint_expressions import" + in source + ) + + def test_model_constraint_included_in_builder(self) -> None: + source = _render(RadioModel, "radio") + # some check function for radio_group should appear in builder return + lines = source.splitlines() + builder_lines = [] + in_builder = False + for line in lines: + if "def radio_checks()" in line: + in_builder = True + if in_builder: + builder_lines.append(line) + builder_src = "\n".join(builder_lines) + assert "check" in builder_src.lower() + + +class TestEnumConstants: + def test_enum_values_appear_as_list(self, literal_subtype_source: str) -> None: + for value in ("a", "b", "c"): + assert f"'{value}'" in literal_subtype_source + + def test_check_enum_called_with_values(self, literal_subtype_source: str) -> None: + assert "check_enum" in literal_subtype_source + + +class GeomModel(BaseModel): + geometry: Annotated[ + Geometry, + GeometryTypeConstraint(GeometryType.POLYGON, GeometryType.MULTI_POLYGON), + ] + + +class TestGeometryTypeRendering: + def test_geometry_type_renders_valid_python(self) -> None: + source = _render(GeomModel, "geom") + ast.parse(source) + + def test_geometry_type_uses_qualified_name(self) -> None: + source = _render(GeomModel, "geom") + assert "GeometryType.POLYGON" in source + assert "GeometryType.MULTI_POLYGON" in source + + def test_geometry_type_import_present(self) -> None: + source = _render(GeomModel, "geom") + assert "from overture.schema.system.primitive import GeometryType" in source + + def test_no_geometry_type_import_without_geometry_field( + self, literal_subtype_source: str + ) -> None: + assert "GeometryType" not in literal_subtype_source + + +class _DeepInner(BaseModel): + field: str + + +class _ArrayElementWithNestedStruct(BaseModel): + nested: _DeepInner + + +class DeepNestedArrayModel(BaseModel): + items: list[_ArrayElementWithNestedStruct] + + +class _ArrayElementWithList(BaseModel): + countries: list[CountryCodeAlpha2] + + +class ListInArrayModel(BaseModel): + items: list[_ArrayElementWithList] + + +class _ArrayElementWithNewtype(BaseModel): + country: Annotated[str, Ge(0)] # stand-in for a constrained field + + +class TestArrayElementSubfieldRendering: + """Scalar sub-fields of array elements render as array_check with el[...] accessors.""" + + def test_scalar_subfield_uses_array_check(self) -> None: + source = _render(NestedArrayModel, "nested") + assert "array_check(" in source + + def test_scalar_subfield_uses_element_accessor(self) -> None: + source = _render(NestedArrayModel, "nested") + assert 'el["value"]' in source + + def test_scalar_subfield_no_f_col_with_brackets(self) -> None: + source = _render(NestedArrayModel, "nested") + assert 'F.col("items[].value")' not in source + + def test_nested_struct_subfield_chained_brackets(self) -> None: + source = _render(DeepNestedArrayModel, "deep") + assert 'el["nested"]["field"]' in source + + def test_nested_struct_subfield_no_dot_in_brackets(self) -> None: + source = _render(DeepNestedArrayModel, "deep") + assert 'el["nested.field"]' not in source + + def test_list_subfield_uses_nested_array_check(self) -> None: + source = _render(ListInArrayModel, "list_in_array") + assert "nested_array_check(" in source + + def test_list_subfield_has_inner_array_check(self) -> None: + source = _render(ListInArrayModel, "list_in_array") + # nested_array_check outer + array_check inner + assert "nested_array_check(" in source + assert "array_check(" in source + + def test_list_subfield_parseable(self) -> None: + source = _render(ListInArrayModel, "list_in_array") + ast.parse(source) + + def test_deep_nested_parseable(self) -> None: + source = _render(DeepNestedArrayModel, "deep") + ast.parse(source) + + +class TestNoFunctionNameCollisions: + def test_list_field_produces_unique_function_names(self) -> None: + source = _render(ArrayModel, "arr") + # Each "def _" function name should appear exactly once + func_defs = re.findall(r"^def (_\w+_check)\(", source, re.MULTILINE) + assert len(func_defs) == len(set(func_defs)), ( + f"Duplicate function names: {func_defs}" + ) + + def test_list_field_renders_parseable(self) -> None: + source = _render(ArrayModel, "arr") + ast.parse(source) + + +class PlaceSubtype(str): + COUNTRY = "country" + REGION = "region" + + def __new__(cls, value: str) -> "PlaceSubtype": + return str.__new__(cls, value) + + +class _SubtypeEnum(str, Enum): + COUNTRY = "country" + REGION = "region" + + +@require_if(["admin_level"], FieldEqCondition("subtype", _SubtypeEnum.COUNTRY)) +class RequireIfEnumModel(BaseModel): + subtype: str + admin_level: int | None = None + + +class TestModelConstraintNoRedundantArgs: + """Model constraints must not embed context or target_name strings.""" + + def test_require_if_no_target_name_arg(self) -> None: + """check_require_if must not pass the field name as a string arg.""" + source = _render(RequireIfEnumModel, "require_if_enum") + # Was: check_require_if(F.col("admin_level"), "admin_level", condition, desc) + # Now: check_require_if(F.col("admin_level"), condition, desc) + # Pattern: check_require_if(col_expr, "field_name", ... + pattern = re.compile(r'check_require_if\([^,]+,\s*"[^"]+",\s*F\.') + assert not pattern.search(source), ( + "check_require_if still passes field name as string arg" + ) + + def test_forbid_if_no_target_name_arg(self) -> None: + """check_forbid_if must not pass the field name as a string arg.""" + source = _render(RequireForbidModel, "rf") + pattern = re.compile(r'check_forbid_if\([^,]+,\s*"[^"]+",\s*F\.') + assert not pattern.search(source), ( + "check_forbid_if still passes field name as string arg" + ) + + +class TestEnumValueInCondition: + def test_renders_valid_python(self) -> None: + source = _render(RequireIfEnumModel, "require_if_enum") + ast.parse(source) + + def test_enum_value_rendered_as_string_literal_in_column_expr(self) -> None: + source = _render(RequireIfEnumModel, "require_if_enum") + # The column expression (F.col == ...) must use the plain string value, + # not the non-parseable enum repr <_SubtypeEnum.COUNTRY: 'country'>. + # The condition description string may still contain the enum repr since + # it's only displayed in error messages (inside a quoted string literal). + assert "'country'" in source + + +class TestConditionDescriptionRendering: + """Model constraint condition descriptions are human-readable, not Python repr.""" + + def test_condition_desc_no_enum_repr(self) -> None: + source = _render(RequireIfEnumModel, "require_if_enum") + # The condition_desc string (4th arg to check_require_if) must not contain + # the non-parseable enum repr like <_SubtypeEnum.COUNTRY: 'country'> + assert "<_SubtypeEnum" not in source + + def test_condition_desc_uses_field_eq_format(self) -> None: + source = _render(RequireIfEnumModel, "require_if_enum") + # Should render as "subtype = 'country'" style (value quoted) + assert "subtype = 'country'" in source + + def test_condition_desc_with_double_quote_in_value_parseable(self) -> None: + """Condition values containing double-quotes must produce parseable output.""" + + @require_if(["admin_level"], FieldEqCondition("subtype", 'say "hi"')) + class DoubleQuoteCondModel(BaseModel): + subtype: str + admin_level: int | None = None + + source = _render(DoubleQuoteCondModel, "dq_cond") + ast.parse(source) + + +@forbid_if(["admin_level"], FieldEqCondition("subtype", "country")) +@require_if(["admin_level"], Not(FieldEqCondition("subtype", "country"))) +class RequireForbidModel(BaseModel): + subtype: str + admin_level: int | None = None + + +class TestModelConstraintFieldLabels: + """require_if/forbid_if field labels: no suffix when unique, per-field counter on collision.""" + + def test_require_if_single_constraint_no_suffix(self) -> None: + source = _render(RequireIfEnumModel, "require_if_enum") + assert 'field="admin_level_required"' in source + + def test_forbid_if_single_constraint_no_suffix(self) -> None: + source = _render(RequireForbidModel, "rf") + assert 'field="admin_level_forbidden"' in source + + def test_require_and_forbid_have_distinct_labels(self) -> None: + source = _render(RequireForbidModel, "rf") + assert 'field="admin_level_required"' in source + assert 'field="admin_level_forbidden"' in source + + def test_multiple_require_if_same_target_disambiguated(self) -> None: + """Multiple require_if on the same target get per-field numeric suffixes.""" + + @require_if(["level"], FieldEqCondition("kind", "a")) + @require_if(["level"], FieldEqCondition("kind", "b")) + class MultiRequireModel(BaseModel): + kind: str + level: int | None = None + + source = _render(MultiRequireModel, "multi_req") + labels = re.findall(r'field="(level_required[^"]*)"', source) + assert len(labels) >= 2, f"Expected >=2 unique labels, got {labels}" + assert len(labels) == len(set(labels)), f"Duplicate labels: {labels}" + assert all(re.search(r"_\d+$", lbl) for lbl in labels), ( + f"Expected numeric suffixes on collision labels: {labels}" + ) + + +class TestDuplicateFunctionNames: + def test_column_and_element_level_get_unique_names(self) -> None: + """division_ids and division_ids[] should produce distinct function names.""" + col_check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("items"), + ) + elem_check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("items[]"), + ) + source = render_feature_module("dup", [col_check, elem_check], [], []) + ast.parse(source) + func_defs = re.findall(r"^def (_\w+_check\w*)\(", source, re.MULTILINE) + assert len(func_defs) == len(set(func_defs)), ( + f"Duplicate function names: {func_defs}" + ) + + def test_same_field_different_variants_get_unique_names(self) -> None: + """class for road and class for rail should produce distinct function names.""" + road_check = Check( + descriptors=( + ExpressionDescriptor(function="check_enum", args=(["a", "b"],)), + ), + target=_path("class"), + guards=(ColumnGuard(discriminator="subtype", values=("road",)),), + ) + rail_check = Check( + descriptors=( + ExpressionDescriptor(function="check_enum", args=(["x", "y"],)), + ), + target=_path("class"), + guards=(ColumnGuard(discriminator="subtype", values=("rail",)),), + ) + source = render_feature_module("dup", [road_check, rail_check], [], []) + ast.parse(source) + func_defs = re.findall(r"^def (_\w+_check\w*)\(", source, re.MULTILINE) + assert len(func_defs) == len(set(func_defs)), ( + f"Duplicate function names: {func_defs}" + ) + + +@require_any_of("x", "y") +class _ArrayElementConstrained(BaseModel): + x: str | None = None + y: str | None = None + + +class ArrayOfConstrained(BaseModel): + items: list[_ArrayElementConstrained] + + +class TestArrayModelConstraintRendering: + """Model constraints on array elements render inside array_check.""" + + def test_renders_parseable_python(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + ast.parse(source) + + def test_renders_array_check(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + assert "array_check(" in source + + def test_renders_el_field_refs(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + assert 'el["x"]' in source + assert 'el["y"]' in source + + def test_no_f_col_for_array_element_constraint(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + # The array-element model constraint should not use F.col for its field refs + assert 'F.col("x")' not in source + assert 'F.col("y")' not in source + + def test_shape_is_array(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + assert "CheckShape.ARRAY" in source + + def test_field_label_uses_prefix(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + assert 'field="items[]' in source + + def test_imports_array_check(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + assert "array_check" in source + + +class TestVariantDiscriminatorField: + def test_variant_uses_check_discriminator_field(self) -> None: + """Variant gating should use the Guard's discriminator field, not hardcoded 'subtype'.""" + check = Check( + descriptors=( + ExpressionDescriptor(function="check_enum", args=(["x", "y"],)), + ), + target=_path("a_field"), + guards=(ColumnGuard(discriminator="kind", values=("a",)),), + ) + source = render_feature_module("test_variant", [check], [], []) + ast.parse(source) + assert 'F.col("kind")' in source + assert 'F.col("subtype")' not in source + + +@require_any_of("a", "b") +class _NestedConstrainedStruct(BaseModel): + a: str | None = None + b: str | None = None + + +class _ArrayElementWithNestedConstraint(BaseModel): + nested: _NestedConstrainedStruct + + +class ArrayOfNestedConstrained(BaseModel): + items: list[_ArrayElementWithNestedConstraint] + + +class TestVariantGatedArrayLambdaScope: + """Variant gating for ARRAY-shaped nodes must be inside the lambda, not wrapping it.""" + + @pytest.fixture(scope="class") + def rendered_source(self) -> str: + class _Base(BaseModel): + kind: str + + class _TypeA(_Base): + kind: Literal["a"] = "a" + a_field: str + + class _TypeB(_Base): + kind: Literal["b"] = "b" + + _Union = Annotated[ + Union[_TypeA, _TypeB], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + class _Wrapper(BaseModel): + items: list[_Union] + + return _render(_Wrapper, "wrapper") + + def test_parseable(self, rendered_source: str) -> None: + ast.parse(rendered_source) + + def test_variant_gating_inside_lambda(self, rendered_source: str) -> None: + """el['kind'] must appear inside the lambda body, not outside array_check.""" + lines = rendered_source.splitlines() + for i, line in enumerate(lines): + if "array_check(" in line and i > 0: + preceding = lines[i - 1].strip() + assert not preceding.startswith("F.when("), ( + f"array_check wrapped by F.when at line {i}: {lines[i - 1]!r}" + ) + + lambda_found = False + el_kind_inside_lambda = False + for line in lines: + if "lambda el:" in line: + lambda_found = True + if lambda_found and 'el["kind"]' in line: + el_kind_inside_lambda = True + break + + assert lambda_found, "No lambda el: found in generated source" + assert el_kind_inside_lambda, ( + 'el["kind"] never appears after lambda el: — variant gating is outside lambda scope' + ) + + +class TestTopLevelVariantGatedArray: + """When the array column itself is variant-conditional, discriminator wraps array_check.""" + + @pytest.fixture(scope="class") + def surface_check(self) -> Check: + """ARRAY check with top-level discriminator -- surface only exists for subtype='a'.""" + return Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("surface[]"), + guards=(ColumnGuard(discriminator="subtype", values=("a",)),), + ) + + @pytest.fixture(scope="class") + def surface_value_check(self) -> Check: + """ARRAY check with leaf path and top-level discriminator.""" + return Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("surface[].value"), + guards=(ColumnGuard(discriminator="subtype", values=("a",)),), + ) + + def test_parseable(self, surface_check: Check) -> None: + source = render_feature_module("test", [surface_check], [], []) + ast.parse(source) + + def test_discriminator_uses_f_col(self, surface_check: Check) -> None: + """Top-level discriminator must reference F.col, not el[...].""" + source = render_feature_module("test", [surface_check], [], []) + assert 'F.col("subtype")' in source, ( + "Top-level discriminator must use F.col, not el[...]" + ) + assert 'el["subtype"]' not in source, ( + 'el["subtype"] found -- discriminator placed inside lambda' + ) + + def test_f_when_wraps_array_check(self, surface_check: Check) -> None: + """F.when must wrap the array_check call, not the lambda body.""" + source = _render_check_function(surface_check, "_surface_check") + # F.when must appear before array_check in the expression. + f_when_pos = source.find("F.when(") + array_check_pos = source.find("array_check(") + assert f_when_pos != -1, "F.when not found in output" + assert array_check_pos != -1, "array_check not found in output" + assert f_when_pos < array_check_pos, ( + f"F.when (pos {f_when_pos}) must appear before array_check (pos {array_check_pos})" + ) + + def test_no_el_discriminator_in_lambda(self, surface_value_check: Check) -> None: + """el['subtype'] must not appear even with leaf path -- subtype is top-level.""" + source = render_feature_module("test", [surface_value_check], [], []) + assert 'el["subtype"]' not in source, ( + 'el["subtype"] found -- top-level discriminator must not appear inside lambda' + ) + + def test_leaf_path_check_parseable(self, surface_value_check: Check) -> None: + source = render_feature_module("test", [surface_value_check], [], []) + ast.parse(source) + + +class TestNestedStructModelConstraintRendering: + """Nested struct model constraints inside array elements use chained el accessors.""" + + def test_renders_parseable_python(self) -> None: + source = _render(ArrayOfNestedConstrained, "nested_constrained") + ast.parse(source) + + def test_chained_struct_accessor(self) -> None: + source = _render(ArrayOfNestedConstrained, "nested_constrained") + assert 'el["nested"]["a"]' in source + assert 'el["nested"]["b"]' in source + + def test_no_direct_el_access(self) -> None: + """Should NOT produce el["a"] — must go through nested struct.""" + source = _render(ArrayOfNestedConstrained, "nested_constrained") + # el["a"] without ["nested"] prefix should not appear + lines = source.split("\n") + for line in lines: + if 'el["a"]' in line and '["nested"]' not in line: + pytest.fail(f'Found bare el["a"] without struct prefix: {line}') + + +class TestRenderNestedArrayCheckStructure: + """_render_check_function emits correct nested_array_check / lambda structure.""" + + def test_render_nested_array_check(self) -> None: + check = Check( + descriptors=( + ExpressionDescriptor(function="check_bounds", kwargs=(("ge", 0),)), + ), + target=_path("items[].things[].value"), + ) + source = _render_check_function(check, "_test_check") + assert "nested_array_check" in source + assert "lambda el" in source + assert "lambda inner" in source + assert 'el["things"]' in source + assert 'check_bounds(inner["value"],' in source + + def test_render_variant_expr_in_nested_array_top_level_disc(self) -> None: + """Top-level discriminator wraps nested_array_check in F.when(F.col(...)).""" + check = Check( + descriptors=( + ExpressionDescriptor(function="check_enum", args=(["m", "km"],)), + ), + target=_path("items[].things[].unit"), + guards=(ColumnGuard(discriminator="kind", values=("a", "b")),), + ) + source = _render_check_function(check, "_test_check") + assert "nested_array_check" in source + assert 'F.col("kind").isin(' in source + + def test_render_variant_expr_in_nested_array_element_disc(self) -> None: + """Element-level discriminator gates inside the inner lambda.""" + check = Check( + descriptors=( + ExpressionDescriptor(function="check_enum", args=(["m", "km"],)), + ), + target=_path("items[].things[].unit"), + guards=(ElementGuard(discriminator="kind", values=("a", "b")),), + ) + source = _render_check_function(check, "_test_check") + assert "nested_array_check" in source + assert 'F.col("kind")' not in source + assert 'inner["kind"]' in source + + +@require_any_of("a", "b") +class _DoubleNestedConstrainedElement(BaseModel): + a: str | None = None + b: str | None = None + + +class _OuterArrayElement(BaseModel): + things: list[_DoubleNestedConstrainedElement] + + +class _DoubleNestedModel(BaseModel): + items: list[_OuterArrayElement] + + +class TestDoubleNestedArrayModelConstraintRendering: + """Model constraints on list[] inside another array render nested_array_check.""" + + def test_renders_parseable_python(self) -> None: + source = _render(_DoubleNestedModel, "double_nested") + ast.parse(source) + + def test_uses_nested_array_check(self) -> None: + source = _render(_DoubleNestedModel, "double_nested") + assert "nested_array_check" in source + + def test_inner_lambda_uses_inner_variable(self) -> None: + source = _render(_DoubleNestedModel, "double_nested") + assert 'inner["a"]' in source + assert 'inner["b"]' in source + + def test_outer_lambda_navigates_to_inner_array(self) -> None: + source = _render(_DoubleNestedModel, "double_nested") + assert 'el["things"]' in source + + +class TestMultiLevelNestedArrayRendering: + """Rendering of deeply nested array checks (2+ inner levels).""" + + def test_two_inner_levels_produces_double_nesting(self) -> None: + """list[list[list[Struct]]].field -> nested(nested(array_check)).""" + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("items[][][].value"), + ) + source = _render_node(check) + # Three IterateArrays -> 1 outer nested_array_check + 1 intermediate + # nested_array_check + 1 innermost array_check = 2 nested_array_check calls. + assert source.count("nested_array_check(") == 2 + assert "lambda el:" in source + assert "lambda el2:" in source # intermediate level + assert "lambda inner:" in source # innermost + assert 'check_required(inner["value"])' in source + + def test_two_inner_levels_with_struct_path(self) -> None: + """Intermediate level with struct navigation.""" + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("outer[].mid[][].leaf"), + ) + source = _render_node(check) + assert 'el["mid"]' in source + assert source.count("nested_array_check(") == 2 + + def test_model_constraint_with_two_inner_levels(self) -> None: + """Model constraint at depth 3 uses double-nested wrapping.""" + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + target=_path("items[][][]"), + ) + source = _render_model_node(check) + assert source.count("nested_array_check(") == 2 + assert "lambda el:" in source + assert "lambda el2:" in source + assert "array_check(" in source + + def test_variant_gating_only_at_innermost_level(self) -> None: + """Variant values on a multi-level check with element guard apply at innermost.""" + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("items[][][].value"), + guards=(ElementGuard(discriminator="kind", values=("type_a",)),), + ) + source = _render_node(check) + # Variant gating appears at the innermost level. + assert 'inner["kind"]' in source + + +class TestGatedScalarRendering: + """Gated check_required wraps expression in F.when(gate.isNotNull(), ...).""" + + @pytest.fixture + def gated_check(self) -> Check: + return Check( + descriptors=( + ExpressionDescriptor(function="check_required", gate=_path("inner")), + ), + target=_path("inner.value"), + ) + + def test_gated_scalar_has_when_wrapping(self, gated_check: Check) -> None: + source = _render_node(gated_check) + assert 'F.col("inner").isNotNull()' in source + assert "check_required" in source + assert "F.when(" in source + + def test_gated_scalar_is_parseable(self, gated_check: Check) -> None: + source = _render_node(gated_check) + ast.parse(source) + + def test_ungated_scalar_unchanged(self) -> None: + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("value"), + ) + source = _render_node(check) + assert "isNotNull" not in source + assert "check_required" in source + + +class _NullableNestedElement(BaseModel): + value: str + + +class _ElementWithNullableStruct(BaseModel): + nested: _NullableNestedElement | None = None + + +class _ArrayWithNullableStruct(BaseModel): + items: list[_ElementWithNullableStruct] + + +class TestGatedFullModelRendering: + def test_gated_array_descriptor_is_parseable(self) -> None: + source = _render(_ArrayWithNullableStruct, "arr") + ast.parse(source) + + def test_gated_array_descriptor_has_element_gate(self) -> None: + source = _render(_ArrayWithNullableStruct, "arr") + assert 'el["nested"].isNotNull()' in source + assert "check_required" in source + + def test_model_with_nullable_parent_is_parseable(self) -> None: + class Inner(BaseModel): + value: str + + class Outer(BaseModel): + inner: Inner | None = None + + source = _render(Outer, "outer") + ast.parse(source) + assert "isNotNull" in source + assert "check_required" in source + + +class TestGatedArrayRendering: + """Gated check_required in array context uses element accessor for gate.""" + + @pytest.fixture + def element_gated_check(self) -> Check: + return Check( + descriptors=( + ExpressionDescriptor( + function="check_required", gate=_path("items[].nested") + ), + ), + target=_path("items[].nested.mode"), + ) + + def test_gated_array_has_element_gate(self, element_gated_check: Check) -> None: + source = _render_node(element_gated_check) + assert 'el["nested"].isNotNull()' in source + assert "check_required" in source + assert "F.when(" in source + + def test_gated_array_is_parseable(self, element_gated_check: Check) -> None: + source = _render_node(element_gated_check) + ast.parse(source) + + def test_column_level_gate_on_array_target_raises(self) -> None: + """A column-level gate on an ArrayPath target is not produced by check_builder.""" + check = Check( + descriptors=( + ExpressionDescriptor( + function="check_required", gate=_path("perspectives") + ), + ), + target=_path("perspectives.countries[]"), + ) + with pytest.raises(AssertionError, match="column-level gate"): + _render_node(check) + + def test_nested_array_gate_applied_at_outermost_lambda(self) -> None: + """Gate on a nested_array_check wraps the el lambda body, not inner.""" + check = Check( + descriptors=( + ExpressionDescriptor( + function="check_required", gate=_path("rules[].perspectives") + ), + ), + target=_path("rules[].perspectives.countries[]"), + ) + source = _render_node(check) + ast.parse(source) + assert "nested_array_check(" in source + # Gate must be on el (the rule struct), not inner (the country string). + assert 'el["perspectives"].isNotNull()' in source + assert "inner[" not in source diff --git a/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py b/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py new file mode 100644 index 000000000..aba025cda --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py @@ -0,0 +1,245 @@ +"""Tests for sparse path scaffold generation.""" + +from dataclasses import replace + +import pytest +from codegen_test_support import ( + FeatureWithRequiredUrl, + discover_feature, + feature_spec_for_model, +) +from overture.schema.codegen.extraction.specs import FeatureSpec +from overture.schema.codegen.pyspark.check_builder import build_checks +from overture.schema.codegen.pyspark.check_ir import ElementGuard +from overture.schema.codegen.pyspark.test_data.scaffold import ( + generate_model_scaffold, + generate_scaffold, + leaf_list_depth, +) +from overture.schema.system.field_path import ArrayPath, parse + +_path = parse + + +@pytest.fixture(scope="module") +def connector_spec() -> FeatureSpec: + return discover_feature("Connector") + + +@pytest.fixture(scope="module") +def division_area_spec() -> FeatureSpec: + return discover_feature("DivisionArea") + + +@pytest.fixture(scope="module") +def segment_spec() -> FeatureSpec: + return discover_feature("Segment") + + +class TestLeafListDepth: + def test_leaf_list_depth(self) -> None: + """leaf_list_depth returns unaccounted-for list depth.""" + spec = feature_spec_for_model(FeatureWithRequiredUrl) + # Scalar field inside array struct — no extra wrapping + assert leaf_list_depth(_path("datasets[].url"), spec) == 0 + # List field without trailing array marker — needs wrapping + assert leaf_list_depth(_path("datasets[].download_urls"), spec) == 1 + # List field with array marker means element-level access — no wrapping + assert leaf_list_depth(_path("datasets[].download_urls[]"), spec) == 0 + + +class TestNestedListUrlField: + """Scaffold for FeatureWithRequiredUrl handles nested list[HttpUrl] fields.""" + + def test_nested_list_url_field_single_depth(self) -> None: + """list[HttpUrl] scaffold should be single-depth, not double-wrapped.""" + spec = feature_spec_for_model(FeatureWithRequiredUrl) + field_nodes, _ = build_checks(spec) + url_nodes = [n for n in field_nodes if "download_urls" in str(n.target)] + assert url_nodes, "Expected check nodes for download_urls" + for node in url_nodes: + scaffold = generate_scaffold(node, spec) + if "datasets" in scaffold: + entry = scaffold["datasets"][0] + if "download_urls" in entry: + val = entry["download_urls"] + assert isinstance(val, list) + assert all(isinstance(v, str) for v in val), ( + f"Expected list[str], got nested structure: {val!r}" + ) + + +class TestGenerateScaffoldConnector: + """Scaffold for Connector — simple top-level and one-level-nested fields.""" + + def test_required_top_level_field_produces_empty_scaffold( + self, connector_spec: FeatureSpec + ) -> None: + """Required top-level fields exist in base row; scaffold adds nothing.""" + field_nodes, _ = build_checks(connector_spec) + id_node = next(n for n in field_nodes if n.target == _path("id")) + scaffold = generate_scaffold(id_node, connector_spec) + assert scaffold == {} + + def test_optional_top_level_field_produces_scaffold( + self, connector_spec: FeatureSpec + ) -> None: + """Optional fields absent from base row get a valid scaffold value.""" + field_nodes, _ = build_checks(connector_spec) + node = next( + n + for n in field_nodes + if n.target == _path("sources") + and any(d.function == "check_array_min_length" for d in n.descriptors) + ) + scaffold = generate_scaffold(node, connector_spec) + assert "sources" in scaffold + assert isinstance(scaffold["sources"], list) + assert len(scaffold["sources"]) >= 1 + + def test_array_nested_field_builds_path(self, connector_spec: FeatureSpec) -> None: + """sources[].property needs a sources array with one element.""" + field_nodes, _ = build_checks(connector_spec) + node = next(n for n in field_nodes if n.target == _path("sources[].property")) + scaffold = generate_scaffold(node, connector_spec) + assert "sources" in scaffold + assert isinstance(scaffold["sources"], list) + assert len(scaffold["sources"]) == 1 + elem = scaffold["sources"][0] + # Required sibling 'dataset' populated + assert "dataset" in elem + + def test_scaffold_is_dict(self, connector_spec: FeatureSpec) -> None: + field_nodes, _ = build_checks(connector_spec) + for node in field_nodes: + scaffold = generate_scaffold(node, connector_spec) + assert isinstance(scaffold, dict) + + +class TestGenerateScaffoldSegment: + """Scaffold for Segment — deeply nested arrays and discriminators.""" + + def test_suffixed_nested_leaf_uses_actual_field_name( + self, segment_spec: FeatureSpec + ) -> None: + """Column-level checks share the structural path with the real field.""" + field_nodes, _ = build_checks(segment_spec) + node = next( + n + for n in field_nodes + if n.target == _path("access_restrictions[].when.mode") + and any(d.function == "check_array_min_length" for d in n.descriptors) + ) + scaffold = generate_scaffold(node, segment_spec) + assert "access_restrictions" in scaffold + when = scaffold["access_restrictions"][0]["when"] + assert "mode" in when, f"Expected 'mode', got keys: {list(when.keys())}" + assert "mode_min_length" not in when + + def test_deeply_nested_array_path(self, segment_spec: FeatureSpec) -> None: + """speed_limits[].when.vehicle[].dimension builds full nesting.""" + field_nodes, _ = build_checks(segment_spec) + node = next( + n + for n in field_nodes + if n.target == _path("speed_limits[].when.vehicle[].dimension") + ) + scaffold = generate_scaffold(node, segment_spec) + assert "speed_limits" in scaffold + sl_elem = scaffold["speed_limits"][0] + assert "when" in sl_elem + when = sl_elem["when"] + assert "vehicle" in when + assert isinstance(when["vehicle"], list) + assert len(when["vehicle"]) == 1 + + def test_element_guard_discriminator_set(self, segment_spec: FeatureSpec) -> None: + """Checks with an `ElementGuard` set the discriminator value in the scaffold.""" + field_checks, _ = build_checks(segment_spec) + # Find a speed_limits check with an ElementGuard. + check = next( + c + for c in field_checks + if any(isinstance(g, ElementGuard) for g in c.guards) + and "speed_limits" in str(c.target) + ) + scaffold = generate_scaffold(check, segment_spec) + # Walk to the innermost array element where the discriminator lives. + assert "speed_limits" in scaffold + sl_elem = scaffold["speed_limits"][0] + when = sl_elem["when"] + vehicle_elem = when["vehicle"][0] + element_guard = next(g for g in check.guards if isinstance(g, ElementGuard)) + assert element_guard.discriminator in vehicle_elem + assert vehicle_elem[element_guard.discriminator] == element_guard.values[0] + + def test_column_variant_does_not_appear_inside_scaffold( + self, segment_spec: FeatureSpec + ) -> None: + """`ColumnGuard`s don't set discriminator inside the scaffold dict.""" + field_checks, _ = build_checks(segment_spec) + # Find a check whose only guard is a ColumnGuard (no ElementGuard). + check = next( + c + for c in field_checks + if c.guards + and not any(isinstance(g, ElementGuard) for g in c.guards) + and "speed_limits[]." in str(c.target) + ) + scaffold = generate_scaffold(check, segment_spec) + # The column-level discriminator is NOT set in the scaffold -- + # it belongs at the row level, which the base row handles. + assert isinstance(scaffold, dict) + + def test_multiple_element_guards_raises(self, segment_spec: FeatureSpec) -> None: + """The check_ir invariant allows at most one `ElementGuard` per Check. + + Multiple guards would indicate the gate composition rule changed + without updating the scaffold, so the scaffold raises rather than + silently dropping all but the first. + """ + field_checks, _ = build_checks(segment_spec) + check = next( + c + for c in field_checks + if any(isinstance(g, ElementGuard) for g in c.guards) + ) + bogus = replace( + check, + guards=( + *check.guards, + ElementGuard(discriminator="other_field", values=("other_value",)), + ), + ) + with pytest.raises(NotImplementedError, match="ElementGuards"): + generate_scaffold(bogus, segment_spec) + + +class TestGenerateModelScaffold: + def test_top_level_model_constraint_produces_empty_scaffold( + self, division_area_spec: FeatureSpec + ) -> None: + """Model constraints at the top level need no nesting.""" + _, model_nodes = build_checks(division_area_spec) + assert model_nodes, "DivisionArea should have model constraints" + node = model_nodes[0] + scaffold = generate_model_scaffold(node, division_area_spec) + assert isinstance(scaffold, dict) + + def test_array_nested_model_constraint_builds_path( + self, segment_spec: FeatureSpec + ) -> None: + """Model constraints inside arrays build the array path.""" + _, model_checks = build_checks(segment_spec) + if not model_checks: + pytest.skip("Segment has no model constraints") + # Find one with an array target. + nested = [c for c in model_checks if isinstance(c.target, ArrayPath)] + if not nested: + pytest.skip("No nested model constraints found") + check = nested[0] + scaffold = generate_model_scaffold(check, segment_spec) + assert isinstance(scaffold, dict) + # The scaffold should contain the column root (top-level column name). + assert isinstance(check.target, ArrayPath) + assert check.target.array_chunks[0][1] in scaffold diff --git a/packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py b/packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py new file mode 100644 index 000000000..26dcdff30 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py @@ -0,0 +1,213 @@ +"""Tests for schema_builder.""" + +from enum import Enum + +import pytest +from codegen_test_support import feature_spec_for_model +from overture.schema.codegen.extraction.field import Primitive +from overture.schema.codegen.extraction.specs import ( + AnnotatedField, + FieldSpec, + UnionSpec, +) +from overture.schema.codegen.pyspark.schema_builder import SchemaField, build_schema +from overture.schema.divisions import DivisionArea +from pydantic import BaseModel, Field + + +class SimpleModel(BaseModel): + name: str + count: int = Field(ge=0) + + +class TestPrimitiveFields: + @pytest.fixture + def fields(self) -> list[SchemaField]: + return build_schema(feature_spec_for_model(SimpleModel)) + + def test_string_field_maps_to_string_type(self, fields: list[SchemaField]) -> None: + name_field = next(f for f in fields if f.name == "name") + assert name_field.type_expr == "StringType()" + + def test_int_field_maps_to_long_type(self, fields: list[SchemaField]) -> None: + count_field = next(f for f in fields if f.name == "count") + assert count_field.type_expr == "LongType()" + + +class NestedModel(BaseModel): + value: str + count: int + + +class ContainerModel(BaseModel): + item: NestedModel | None = None + + +class TestNestedModel: + @pytest.fixture + def fields(self) -> list[SchemaField]: + return build_schema(feature_spec_for_model(ContainerModel)) + + def test_nested_model_emits_struct_type(self, fields: list[SchemaField]) -> None: + item_field = next(f for f in fields if f.name == "item") + assert item_field.type_expr.startswith("StructType([") + + def test_nested_struct_contains_subfields(self, fields: list[SchemaField]) -> None: + item_field = next(f for f in fields if f.name == "item") + assert 'StructField("value"' in item_field.type_expr + assert 'StructField("count"' in item_field.type_expr + + +class ListModel(BaseModel): + tags: list[str] + counts: list[int] | None = None + + +class TestListFields: + @pytest.fixture + def fields(self) -> list[SchemaField]: + return build_schema(feature_spec_for_model(ListModel)) + + def test_list_str_maps_to_array_string(self, fields: list[SchemaField]) -> None: + tags_field = next(f for f in fields if f.name == "tags") + assert tags_field.type_expr == "ArrayType(StringType(), True)" + + def test_optional_list_int_maps_to_array_long( + self, fields: list[SchemaField] + ) -> None: + counts_field = next(f for f in fields if f.name == "counts") + assert counts_field.type_expr == "ArrayType(LongType(), True)" + + +class DictModel(BaseModel): + labels: dict[str, str] | None = None + + +class TestDictFields: + @pytest.fixture + def fields(self) -> list[SchemaField]: + return build_schema(feature_spec_for_model(DictModel)) + + def test_dict_str_str_maps_to_map_type(self, fields: list[SchemaField]) -> None: + labels_field = next(f for f in fields if f.name == "labels") + assert labels_field.type_expr == "MapType(StringType(), StringType(), True)" + + +class TestDivisionAreaSchema: + @pytest.fixture(scope="class") + def fields(self) -> list[SchemaField]: + return build_schema(feature_spec_for_model(DivisionArea)) + + def test_id_field_is_string_type(self, fields: list[SchemaField]) -> None: + id_field = next(f for f in fields if f.name == "id") + assert id_field.type_expr == "StringType()" + + def test_geometry_field_is_binary_type(self, fields: list[SchemaField]) -> None: + geom_field = next(f for f in fields if f.name == "geometry") + assert geom_field.type_expr == "BinaryType()" + + def test_bbox_emits_shared_struct_ref(self, fields: list[SchemaField]) -> None: + bbox_field = next(f for f in fields if f.name == "bbox") + assert bbox_field.type_expr == "BBOX_STRUCT" + + def test_version_is_integer_type(self, fields: list[SchemaField]) -> None: + ver_field = next(f for f in fields if f.name == "version") + assert ver_field.type_expr == "IntegerType()" + + def test_is_land_is_boolean_type(self, fields: list[SchemaField]) -> None: + field = next(f for f in fields if f.name == "is_land") + assert field.type_expr == "BooleanType()" + + def test_country_is_string_type(self, fields: list[SchemaField]) -> None: + field = next(f for f in fields if f.name == "country") + assert field.type_expr == "StringType()" + + def test_admin_level_is_integer_type(self, fields: list[SchemaField]) -> None: + field = next(f for f in fields if f.name == "admin_level") + assert field.type_expr == "IntegerType()" + + def test_subtype_enum_is_string_type(self, fields: list[SchemaField]) -> None: + field = next(f for f in fields if f.name == "subtype") + assert field.type_expr == "StringType()" + + def test_theme_appears_once_at_model_position( + self, fields: list[SchemaField] + ) -> None: + theme_fields = [f for f in fields if f.name == "theme"] + assert len(theme_fields) == 1 + + def test_theme_and_type_present(self, fields: list[SchemaField]) -> None: + names = [f.name for f in fields] + assert "theme" in names + assert "type" in names + + +class _ColorA(Enum): + RED = "red" + GREEN = "green" + + +class _ColorB(Enum): + BLUE = "blue" + YELLOW = "yellow" + + +class _VariantA(BaseModel): + pass + + +class _VariantB(BaseModel): + pass + + +class TestUnionSchemaDeduplicate: + """build_schema deduplicates same-name fields from different union variants.""" + + @pytest.fixture + def fields(self) -> list[SchemaField]: + af_shared = AnnotatedField( + field_spec=FieldSpec( + name="id", + shape=Primitive(base_type="str"), + description=None, + is_required=True, + ), + variant_sources=None, + ) + af_color_a = AnnotatedField( + field_spec=FieldSpec( + name="color", + shape=Primitive(base_type="ColorA", source_type=_ColorA), + description=None, + is_required=True, + ), + variant_sources=(_VariantA,), + ) + af_color_b = AnnotatedField( + field_spec=FieldSpec( + name="color", + shape=Primitive(base_type="ColorB", source_type=_ColorB), + description=None, + is_required=True, + ), + variant_sources=(_VariantB,), + ) + spec = UnionSpec( + name="TestUnion", + description=None, + annotated_fields=[af_shared, af_color_a, af_color_b], + members=[], + discriminator_field=None, + discriminator_mapping=None, + source_annotation=object(), + common_base=BaseModel, + ) + return build_schema(spec) + + def test_one_schema_field_per_name(self, fields: list[SchemaField]) -> None: + color_fields = [f for f in fields if f.name == "color"] + assert len(color_fields) == 1 + + def test_color_field_is_string_type(self, fields: list[SchemaField]) -> None: + color_field = next(f for f in fields if f.name == "color") + assert color_field.type_expr == "StringType()" diff --git a/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py new file mode 100644 index 000000000..64537c9b5 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py @@ -0,0 +1,880 @@ +"""Tests for the generated conformance test module renderer.""" + +import ast +import re +from enum import Enum + +import pytest +from overture.schema.codegen.extraction.field import ArrayOf, Primitive +from overture.schema.codegen.pyspark.check_ir import ( + Check, + ColumnGuard, + ElementGuard, + ModelCheck, +) +from overture.schema.codegen.pyspark.constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + MinFieldsSet, + RadioGroup, + RequireAnyOf, + RequireIf, +) +from overture.schema.codegen.pyspark.test_renderer import ( + render_test_module as _real_render_test_module, +) +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + NoWhitespaceConstraint, +) +from overture.schema.system.field_path import ArrayPath, ScalarPath, parse +from overture.schema.system.model_constraint import FieldEqCondition, Not +from overture.schema.system.primitive.geom import GeometryType + +_path = parse + +# Placeholder expression import path -- tests parse the rendered source +# rather than executing it, so the import target need not be real. +_TEST_EXPRESSION_IMPORT = "_placeholder.expression_module" + + +def render_test_module(*args: object, **kwargs: object) -> str: + """Invoke the renderer with placeholder `expression_import`/`support_prefix`. + + Tests parse the rendered source rather than executing it, so neither + the expression import target nor the relative `_support` package depth + needs to match a real layout. Defining this as a free function (rather + than a fixture) keeps test bodies terse. + """ + kwargs.setdefault("expression_import", _TEST_EXPRESSION_IMPORT) + kwargs.setdefault("support_prefix", "..") + return _real_render_test_module(*args, **kwargs) # type: ignore[arg-type] + + +def make_check( + function: str, + target: object, + *, + args: tuple[object, ...] = (), + kwargs: tuple[tuple[str, object], ...] = (), + constraint_type: object = None, + label: str | None = None, + check_name: str | None = None, + guards: tuple[object, ...] = (), +) -> Check: + """Build a single-descriptor Check; defaults match Check/ExpressionDescriptor.""" + descriptor_kwargs: dict[str, object] = {"function": function} + if args: + descriptor_kwargs["args"] = args + if kwargs: + descriptor_kwargs["kwargs"] = kwargs + if constraint_type is not None: + descriptor_kwargs["constraint_type"] = constraint_type + if label is not None: + descriptor_kwargs["label"] = label + if check_name is not None: + descriptor_kwargs["check_name"] = check_name + return Check( + descriptors=(ExpressionDescriptor(**descriptor_kwargs),), # type: ignore[arg-type] + target=target, # type: ignore[arg-type] + guards=guards, # type: ignore[arg-type] + ) + + +def _array( + column: str, + inner_struct_paths: tuple[tuple[str, ...], ...] = (), + leaf_path: tuple[str, ...] = (), +) -> ArrayPath: + """Build an ArrayPath from a column name, inner struct paths, and a leaf path. + + Each entry in `inner_struct_paths` is `(prefix_structs..., inner_array_name)`: + the prefix names become struct segments and the last name becomes an + inner ArraySegment. + """ + column_path = _path(column) + if isinstance(column_path, ScalarPath): + prefix_structs = column_path.segments[:-1] + outer_name = column_path.segments[-1].name + prefix = ScalarPath(segments=prefix_structs) + path = prefix.append_array(outer_name, iter_count=1) + else: + path = column_path + for sp in inner_struct_paths: + for n in sp[:-1]: + path = path.append_struct(n) + path = path.append_array(sp[-1], iter_count=1) + for n in leaf_path: + path = path.append_struct(n) + return path + + +class TestRenderTestModuleParseable: + def test_renders_valid_python_with_nodes(self) -> None: + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("division_area", nodes, []) + ast.parse(source) + + def test_empty_nodes_renders_valid_python(self) -> None: + source = render_test_module("empty", [], []) + ast.parse(source) + + +class TestBaseRow: + def test_default_base_rows_are_empty(self) -> None: + source = render_test_module("test", [], []) + assert "BASE_ROW_SPARSE: dict = {}" in source + assert "BASE_ROW_POPULATED: dict = {}" in source + + def test_provided_sparse_row_rendered(self) -> None: + source = render_test_module("test", [], [], base_row_sparse={"id": "abc"}) + assert "BASE_ROW_SPARSE: dict = " in source + assert "'id': 'abc'" in source + + def test_provided_populated_row_rendered(self) -> None: + source = render_test_module( + "test", + [], + [], + base_row_sparse={"id": "abc"}, + base_row_populated={"id": "abc", "names": {"primary": ""}}, + ) + assert "BASE_ROW_POPULATED: dict = " in source + assert "'names'" in source + + +class TestFieldScenarios: + def test_required_produces_none_value(self) -> None: + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("test", nodes, []) + assert "Scenario(" in source + assert "set_at_path('country', None)" in source + assert "'country'" in source + assert "'required'" in source + + def test_enum_produces_invalid_string(self) -> None: + nodes = [ + make_check("check_enum", _path("subtype"), args=(["a", "b", "c"],)), + ] + source = render_test_module("test", nodes, []) + assert "__INVALID__" in source + assert "'enum'" in source + + def test_bounds_produces_out_of_range(self) -> None: + nodes = [ + make_check("check_bounds", _path("score"), kwargs=(("ge", 0.0),)), + ] + source = render_test_module("test", nodes, []) + assert "-1" in source or "-1.0" in source + assert "'bounds'" in source + + def test_bounds_preserves_int_type(self) -> None: + """Integer bound kwargs emit integer literals for IntegerType fields.""" + nodes = [ + make_check("check_bounds", _path("version"), kwargs=(("ge", 0),)), + ] + source = render_test_module("test", nodes, []) + assert "set_at_path('version', -1)" in source + + def test_bounds_preserves_float_type(self) -> None: + """Float bound kwargs emit float literals for DoubleType fields.""" + nodes = [ + make_check("check_bounds", _path("height"), kwargs=(("ge", 0.0),)), + ] + source = render_test_module("test", nodes, []) + assert "-1.0" in source + + def test_unknown_constraint_raises(self) -> None: + nodes = [make_check("check_something_unknown", _path("geom"))] + with pytest.raises(ValueError, match="Cannot render mutate expression"): + render_test_module("test", nodes, []) + + def test_pattern_produces_invalid_string(self) -> None: + nodes = [ + make_check("check_pattern", _path("wikidata.value"), args=(r"^Q\d+$",)), + ] + source = render_test_module("test", nodes, []) + assert "'pattern'" in source + + def test_no_whitespace_pattern_mutation_contains_whitespace(self) -> None: + """Mutation for NoWhitespaceConstraint must contain whitespace to violate ^\\S+$.""" + nodes = [ + make_check( + "check_pattern", + _path("id"), + args=(r"^\S+$",), + constraint_type=NoWhitespaceConstraint, + ), + ] + source = render_test_module("test", nodes, []) + match = re.search( + r"set_at_path\('id',\s*(.+?)\)", + source, + re.DOTALL, + ) + assert match, f"no id:pattern set_at_path found in:\n{source}" + mutation_value = match.group(1).strip() + assert re.search(r"\\s|\s", mutation_value.strip("'")), ( + f"mutation {mutation_value} does not contain whitespace" + ) + + def test_country_code_uses_invalid_value(self) -> None: + nodes = [ + make_check( + "check_pattern", + _path("country.value"), + constraint_type=CountryCodeAlpha2Constraint, + label="ISO 3166-1 alpha-2 country code", + check_name="country_code_alpha2", + ), + ] + source = render_test_module("test", nodes, []) + assert "'99'" in source + + def test_multiple_descriptors_produce_multiple_entries(self) -> None: + """A field with required + enum produces two scenario entries.""" + nodes = [ + Check( + descriptors=( + ExpressionDescriptor(function="check_required"), + ExpressionDescriptor(function="check_enum", args=(["a"],)), + ), + target=_path("subtype"), + ), + ] + source = render_test_module("test", nodes, []) + assert "'required'" in source + assert "'enum'" in source + + def test_min_length_produces_empty_list(self) -> None: + nodes = [ + make_check("check_array_min_length", _path("sources"), args=(1,)), + ] + source = render_test_module("test", nodes, []) + assert "set_at_path('sources', [])" in source + assert "expected_field='sources_min_length'" in source + + def test_max_length_produces_oversized_list(self) -> None: + nodes = [ + make_check("check_array_max_length", _path("connectors"), args=(3,)), + ] + source = render_test_module("test", nodes, []) + assert "[{}, {}, {}, {}]" in source or "[{}] * 4" in source + assert "expected_field='connectors_max_length'" in source + + def test_scenario_id_includes_feature_name(self) -> None: + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("division_area", nodes, []) + assert "division_area::country:required" in source + + def test_scenario_has_scaffold(self) -> None: + """Scenario includes a scaffold dict (empty when spec is None).""" + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("test", nodes, []) + assert "scaffold={}" in source + + +class TestModelScenarios: + def test_radio_group_imports_mutation(self) -> None: + model_nodes = [ + ModelCheck( + descriptor=RadioGroup(field_names=("is_land", "is_territorial")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "mutate_radio_group" in source + assert "radio_group" in source + + def test_require_any_of_imports_mutation(self) -> None: + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("x", "y")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "mutate_require_any_of" in source + + def test_require_if_includes_condition(self) -> None: + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", "country"), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "mutate_require_if" in source + assert "'country'" in source + + def test_model_scenario_uses_contains_assertion(self) -> None: + """Model-level tests use 'in' not '==' to check violation membership.""" + model_nodes = [ + ModelCheck( + descriptor=RadioGroup(field_names=("a", "b")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "assert expected in invalid_violations" in source + + def test_renders_valid_python(self) -> None: + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", "country"), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + + def test_enum_condition_value_renders_valid_python(self) -> None: + """Enum condition values must render as their string payload, not repr.""" + + class PlaceType(str, Enum): + COUNTY = "county" + + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", PlaceType.COUNTY), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "'county'" in source + + def test_forbid_if_array_field_generates_fill_values(self) -> None: + """forbid_if targeting an array field emits fill_values with [{}].""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("destinations",), + condition=FieldEqCondition("subtype", "road"), + field_shapes=( + ( + "destinations", + ArrayOf(element=Primitive(base_type="Destination")), + ), + ), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "fill_values" in source + assert "[{}]" in source + + def test_forbid_if_struct_field_generates_fill_values(self) -> None: + """forbid_if targeting a struct field emits fill_values with {}.""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("road_surface",), + condition=FieldEqCondition("subtype", "road"), + field_shapes=( + ("road_surface", Primitive(base_type="RoadSurface")), + ), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "fill_values" in source + assert "'road_surface': {}" in source + + def test_forbid_if_string_field_no_fill_values(self) -> None: + """forbid_if targeting a string field does not emit fill_values.""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("class",), + condition=FieldEqCondition("subtype", "water"), + field_shapes=(), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "fill_values" not in source + + def test_forbid_if_not_condition_uses_negate(self) -> None: + """forbid_if with Not(FieldEqCondition) passes negate=True to mutation.""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("destinations",), + condition=Not(FieldEqCondition("subtype", "road")), + field_shapes=(), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "negate=True" in source + assert "'road'" in source + + def test_require_any_of_nested_uses_array_path(self) -> None: + """require_any_of in an array element passes array_path to mutation.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("labels", "symbols")), + target=_array("destinations"), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert 'array_path="destinations"' in source + + def test_require_any_of_nested_with_leaf_path(self) -> None: + """require_any_of nested in struct within array passes struct_path.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("heading", "during")), + target=_array("access_restrictions", leaf_path=("when",)), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert 'array_path="access_restrictions"' in source + assert 'struct_path="when"' in source + + def test_require_any_of_top_level_no_array_path(self) -> None: + """Top-level require_any_of does not emit array_path.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "array_path" not in source + + def test_require_if_not_condition_uses_negate(self) -> None: + """require_if with Not(FieldEqCondition) passes negate=True to mutation.""" + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("class",), + condition=Not(FieldEqCondition("subtype", "road")), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "negate=True" in source + + def test_model_scenario_uses_inline_lambda(self) -> None: + """Model scenarios emit mutate=lambda row: ... directly.""" + model_nodes = [ + ModelCheck( + descriptor=RadioGroup(field_names=("a", "b")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "mutate=lambda row:" in source + assert "mutate_radio_group(" in source + + def test_model_scenario_has_scaffold(self) -> None: + """Scenario includes a scaffold dict (empty when spec is None).""" + model_nodes = [ + ModelCheck( + descriptor=RadioGroup(field_names=("a", "b")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "Scenario(" in source + assert "scaffold={}" in source + + def test_min_fields_set_renders_mutation_call(self) -> None: + """MinFieldsSet dispatches to `mutate_min_fields_set`.""" + model_nodes = [ + ModelCheck( + descriptor=MinFieldsSet(field_names=("x", "y"), count=1), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "mutate_min_fields_set(row, ['x', 'y'])" in source + import_match = re.search( + r"from \.\._support\.mutations\s+import\s+(.+?)(?:\n\n|\Z)", + source, + re.DOTALL, + ) + assert import_match is not None + assert "mutate_min_fields_set" in import_match.group(1) + + def test_require_any_of_with_inner_levels_raises(self) -> None: + """require_any_of does not accept inner_array_path.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + target=_array("outer", inner_struct_paths=(("inner",),)), + ), + ] + with pytest.raises(ValueError, match="inner_array_path"): + render_test_module("test", [], model_nodes) + + def test_radio_group_with_array_path_raises(self) -> None: + """radio_group takes no array kwargs; nodes with column_path raise.""" + model_nodes = [ + ModelCheck( + descriptor=RadioGroup(field_names=("a", "b")), + target=_array("outer"), + ), + ] + with pytest.raises(ValueError, match="array_path"): + render_test_module("test", [], model_nodes) + + def test_require_if_with_leaf_path_raises(self) -> None: + """require_if does not accept struct_path; nodes with leaf_path raise.""" + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", "country"), + ), + target=_array("outer", leaf_path=("when",)), + ), + ] + with pytest.raises(ValueError, match="struct_path"): + render_test_module("test", [], model_nodes) + + def test_require_if_with_multi_inner_levels_raises(self) -> None: + """require_if only consumes one inner iteration; multi-level is rejected.""" + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", "country"), + ), + target=_array("outer", inner_struct_paths=(("middle",), ("inner",))), + ), + ] + with pytest.raises(ValueError, match="multi-level inner struct paths"): + render_test_module("test", [], model_nodes) + + +class TestTestLayer: + @pytest.fixture(scope="class") + def empty_source(self) -> str: + return render_test_module("test", [], []) + + def test_test_scenario_sparse_present(self, empty_source: str) -> None: + assert "def test_scenario_sparse(" in empty_source + + def test_test_scenario_populated_present(self, empty_source: str) -> None: + assert "def test_scenario_populated(" in empty_source + + def test_test_baseline_sparse_present(self, empty_source: str) -> None: + assert "def test_baseline_sparse(" in empty_source + + def test_test_baseline_populated_present(self, empty_source: str) -> None: + assert "def test_baseline_populated(" in empty_source + + def test_sparse_results_fixture_present(self, empty_source: str) -> None: + assert "def sparse_results(" in empty_source + + def test_populated_results_fixture_present(self, empty_source: str) -> None: + assert "def populated_results(" in empty_source + + def test_assert_scenario_helper_present(self, empty_source: str) -> None: + assert "def _assert_scenario(" in empty_source + + def test_imports_scenario(self, empty_source: str) -> None: + assert "Scenario" in empty_source + + def test_uses_harness_imports(self, empty_source: str) -> None: + assert "from .._support.harness import" in empty_source + + def test_imports_set_at_path_only_when_field_scenarios_present(self) -> None: + # No field checks -> no set_at_path scenarios -> no import + empty = render_test_module("test", [], []) + assert "from .._support.helpers import set_at_path" not in empty + + # Field check -> set_at_path used -> import emitted + with_field = render_test_module( + "test", + [make_check("check_required", _path("country"))], + [], + ) + assert "from .._support.helpers import set_at_path" in with_field + + def test_scenario_checks_valid_and_invalid(self, empty_source: str) -> None: + assert "::valid" in empty_source + assert "::invalid" in empty_source + + def test_scenarios_list_type_annotation(self, empty_source: str) -> None: + assert "list[Scenario]" in empty_source + + def test_populated_tests_not_marked_skip(self, empty_source: str) -> None: + assert "pytest.mark.skip" not in empty_source + + +class TestStructUniqueCheckScenarios: + @pytest.fixture() + def sources_unique_output(self) -> str: + nodes = [make_check("check_struct_unique", _path("sources"))] + return render_test_module("test", nodes, []) + + def test_struct_unique_emits_scenario(self, sources_unique_output: str) -> None: + """struct_unique_check produces Scenario with scaffold and inline lambda.""" + assert "Scenario(" in sources_unique_output + assert "expected_field='sources_unique'" in sources_unique_output + assert "expected_check='struct_unique'" in sources_unique_output + + def test_struct_unique_imports_mutate_unique_items( + self, sources_unique_output: str + ) -> None: + assert ( + "from .._support.mutations import mutate_unique_items" + in sources_unique_output + ) + + def test_no_struct_unique_does_not_import_mutate_unique_items(self) -> None: + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("test", nodes, []) + assert "mutate_unique_items" not in source + + def test_struct_unique_inline_lambda(self, sources_unique_output: str) -> None: + """struct_unique_check emits mutate=lambda row: mutate_unique_items(...).""" + assert "mutate=lambda row: mutate_unique_items(" in sources_unique_output + assert "'sources'" in sources_unique_output + + def test_struct_unique_nested_path_strips_suffix(self) -> None: + """Nested bracket path uses the structural field for mutation.""" + nodes = [ + make_check("check_struct_unique", _path("access_restrictions[].when.mode")), + ] + source = render_test_module("test", nodes, []) + # Black may wrap the long lambda — check parts separately + assert "mutate_unique_items(" in source + assert "'access_restrictions[].when.mode'" in source + assert "expected_field='access_restrictions[].when.mode_unique'" in source + + def test_struct_unique_renders_valid_python( + self, sources_unique_output: str + ) -> None: + ast.parse(sources_unique_output) + + def test_struct_unique_mixed_with_field_scenarios(self) -> None: + """struct_unique_check alongside normal field checks renders valid Python.""" + nodes = [ + make_check("check_required", _path("sources")), + make_check("check_struct_unique", _path("sources")), + ] + source = render_test_module("test", nodes, []) + ast.parse(source) + assert source.count("Scenario(") == 2 + + def test_struct_unique_has_scaffold(self, sources_unique_output: str) -> None: + """struct_unique_check Scenario includes scaffold dict.""" + assert "scaffold={}" in sources_unique_output + + +class TestArmFiltering: + """Per-arm test generation filters field checks by discriminator value.""" + + def _common_node(self) -> Check: + return make_check("check_required", _path("id")) + + def _road_node(self) -> Check: + return make_check( + "check_required", + _array("road_surface"), + guards=(ColumnGuard(discriminator="subtype", values=("road",)),), + ) + + def _rail_node(self) -> Check: + return make_check( + "check_required", + _array("rail_flags"), + guards=(ColumnGuard(discriminator="subtype", values=("rail",)),), + ) + + def _inner_disc_node(self) -> Check: + """Road-arm check with in-element discriminator (vehicle dimension).""" + return make_check( + "check_required", + _path("speed_limits[].when.vehicle[].value"), + guards=( + ColumnGuard(discriminator="subtype", values=("road",)), + ElementGuard(discriminator="dimension", values=("height", "length")), + ), + ) + + def test_arm_road_includes_common_and_road_checks(self) -> None: + nodes = [self._common_node(), self._road_node(), self._rail_node()] + source = render_test_module("test", nodes, [], arm="road") + assert "set_at_path('id'" in source + assert "road_surface" in source + assert "rail_flags" not in source + + def test_arm_rail_includes_common_and_rail_checks(self) -> None: + nodes = [self._common_node(), self._road_node(), self._rail_node()] + source = render_test_module("test", nodes, [], arm="rail") + assert "set_at_path('id'" in source + assert "rail_flags" in source + assert "road_surface" not in source + + def test_arm_includes_inner_disc_by_outer_variant(self) -> None: + """In-element discriminator checks emit when the outer Guard matches the arm.""" + nodes = [self._inner_disc_node()] + source = render_test_module("test", nodes, [], arm="road") + assert "vehicle" in source + + def test_arm_excludes_inner_disc_wrong_outer(self) -> None: + nodes = [self._inner_disc_node()] + source = render_test_module("test", nodes, [], arm="rail") + assert "vehicle" not in source + + def test_no_arm_includes_all_checks(self) -> None: + """Without arm filtering, all checks are included.""" + nodes = [self._common_node(), self._road_node(), self._rail_node()] + source = render_test_module("test", nodes, []) + assert "set_at_path('id'" in source + assert "road_surface" in source + assert "rail_flags" in source + + def test_arm_includes_model_checks(self) -> None: + """Arm-agnostic ModelChecks (arm=None) reach every arm test.""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("rail_flags",), + condition=Not(FieldEqCondition("subtype", "rail")), + field_shapes=(), + ), + ), + ] + source = render_test_module("test", [], model_nodes, arm="road") + assert "mutate_forbid_if" in source + + def test_arm_excludes_other_arms_model_checks(self) -> None: + """A ModelCheck tagged for one arm does not appear in another arm's tests.""" + road_only = ModelCheck( + descriptor=RadioGroup(field_names=("road_flag_a", "road_flag_b")), + arm="road", + ) + road_source = render_test_module("test", [], [road_only], arm="road") + assert "mutate_radio_group" in road_source + rail_source = render_test_module("test", [], [road_only], arm="rail") + assert "mutate_radio_group" not in rail_source + + def test_arm_renders_valid_python(self) -> None: + nodes = [self._common_node(), self._road_node(), self._rail_node()] + source = render_test_module("test", nodes, [], arm="road") + ast.parse(source) + + def test_arm_filtering_ignores_inner_element_discriminator(self) -> None: + """Element guards on inner-union discriminators don't gate arm filtering. + + The inner `ElementGuard` discriminator (`dimension`) is unrelated + to the outer union arm (`subtype`). When an `ElementGuard` value + happens to coincide with an arm name, an `any(...)` filter would + wrongly include the check in that arm; the correct filter + consults only `ColumnGuard`s. + """ + check = make_check( + "check_required", + _path("speed_limits[].when.vehicle[].value"), + guards=( + ColumnGuard(discriminator="subtype", values=("road",)), + # ElementGuard values include "rail" by coincidence -- it's + # a vehicle dimension, not a segment subtype. Filtering by + # `any(...)` would let arm="rail" include the check. + ElementGuard(discriminator="dimension", values=("rail",)), + ), + ) + rail = render_test_module("test", [check], [], arm="rail") + assert "speed_limits" not in rail + road = render_test_module("test", [check], [], arm="road") + assert "speed_limits" in road + + +class TestLinearRangeMutations: + @pytest.mark.parametrize( + ("function", "expected_value"), + [ + ("check_linear_range_length", "[0.5]"), + ("check_linear_range_bounds", "[1.5, 2.0]"), + ("check_linear_range_order", "[0.8, 0.2]"), + ], + ) + def test_mutation_renders(self, function: str, expected_value: str) -> None: + nodes = [make_check(function, _path("between"))] + source = render_test_module("test", nodes, []) + assert expected_value in source + + +class TestGeometryTypeMutations: + def test_point_allowed_emits_linestring(self) -> None: + """When Point is allowed, inject LineString as the wrong type.""" + nodes = [ + make_check( + "check_geometry_type", + _path("geometry"), + args=(GeometryType.POINT,), + ), + ] + source = render_test_module("test", nodes, []) + assert "LineString" in source or "LINESTRING" in source + + def test_polygon_allowed_emits_point(self) -> None: + """When Point is not allowed, inject Point as the wrong type.""" + nodes = [ + make_check( + "check_geometry_type", + _path("geometry"), + args=(GeometryType.POLYGON, GeometryType.MULTI_POLYGON), + ), + ] + source = render_test_module("test", nodes, []) + assert "POINT" in source or "Point" in source + + def test_geometry_type_renders_valid_python(self) -> None: + nodes = [ + make_check( + "check_geometry_type", + _path("geometry"), + args=(GeometryType.POINT,), + ), + ] + source = render_test_module("test", nodes, []) + ast.parse(source) + + def test_geometry_type_uses_wkt_strings(self) -> None: + """Geometry scenarios use WKT strings, not shapely constructor calls.""" + nodes = [ + make_check( + "check_geometry_type", + _path("geometry"), + args=(GeometryType.POINT,), + ), + ] + source = render_test_module("test", nodes, []) + assert "shapely" not in source + assert "LINESTRING" in source or "LineString" in source + + def test_all_candidates_allowed_raises(self) -> None: + """When all geometry candidates are allowed, scenario generation raises.""" + nodes = [ + make_check( + "check_geometry_type", + _path("geometry"), + args=( + GeometryType.POINT, + GeometryType.LINE_STRING, + GeometryType.GEOMETRY_COLLECTION, + ), + ), + ] + with pytest.raises(ValueError, match="Cannot render mutate expression"): + render_test_module("test", nodes, []) + + def test_no_geometry_type_no_shapely_imports(self) -> None: + """Shapely imports are absent when no geometry type scenario exists.""" + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("test", nodes, []) + assert "shapely" not in source diff --git a/packages/overture-schema-codegen/tests/test_reverse_references.py b/packages/overture-schema-codegen/tests/test_reverse_references.py index fb8e1e41a..7897a8256 100644 --- a/packages/overture-schema-codegen/tests/test_reverse_references.py +++ b/packages/overture-schema-codegen/tests/test_reverse_references.py @@ -11,17 +11,18 @@ RoadSegment, TreeNode, Venue, + feature_spec_for_model, has_name, lookup_by_name, make_union_spec, ) from overture.schema.codegen.extraction.enum_extraction import extract_enum -from overture.schema.codegen.extraction.model_extraction import ( - expand_model_tree, - extract_model, -) from overture.schema.codegen.extraction.newtype_extraction import extract_newtype -from overture.schema.codegen.extraction.specs import PydanticTypeSpec, TypeIdentity +from overture.schema.codegen.extraction.specs import ( + ModelSpec, + PydanticTypeSpec, + TypeIdentity, +) from overture.schema.codegen.layout.type_collection import ( collect_all_supplementary_types, ) @@ -49,13 +50,12 @@ def test_model_referencing_type_produces_used_by_entry( target_name: str, ) -> None: """Model referencing a type produces a 'used by' entry on that type.""" - model_spec = extract_model(model_class, entry_point=model_name) - expand_model_tree(model_spec) - all_specs = collect_all_supplementary_types([model_spec]) + expanded = feature_spec_for_model(model_class, entry_point=model_name) + all_specs = collect_all_supplementary_types([expanded]) assert has_name(all_specs, target_name) - result = compute_reverse_references([model_spec], all_specs) + result = compute_reverse_references([expanded], all_specs) entries = lookup_by_name(result, target_name) assert len(entries) == 1 @@ -95,8 +95,8 @@ def test_union_members_have_used_by_entries() -> None: ) # Extract the member - road_spec = extract_model(RoadSegment) - expand_model_tree(road_spec) + road_spec = feature_spec_for_model(RoadSegment) + assert isinstance(road_spec, ModelSpec) all_specs = {TypeIdentity(RoadSegment, "RoadSegment"): road_spec} result = compute_reverse_references([union_spec], all_specs) @@ -109,8 +109,8 @@ def test_union_members_have_used_by_entries() -> None: def test_self_references_filtered_out() -> None: """Self-references are filtered out (handles recursive types).""" - tree_spec = extract_model(TreeNode, entry_point="TreeNode") - expand_model_tree(tree_spec) + tree_spec = feature_spec_for_model(TreeNode, entry_point="TreeNode") + assert isinstance(tree_spec, ModelSpec) # Manually add TreeNode to all_specs to test self-reference filtering all_specs = {TypeIdentity(TreeNode, "TreeNode"): tree_spec} @@ -124,10 +124,8 @@ def test_self_references_filtered_out() -> None: def test_deduplication_same_type_multiple_fields() -> None: """Deduplication works when same type is referenced via multiple fields.""" - instrument_spec = extract_model(Instrument, entry_point="Instrument") - venue_spec = extract_model(Venue, entry_point="Venue") - expand_model_tree(instrument_spec) - expand_model_tree(venue_spec) + instrument_spec = feature_spec_for_model(Instrument, entry_point="Instrument") + venue_spec = feature_spec_for_model(Venue, entry_point="Venue") all_specs = collect_all_supplementary_types([instrument_spec, venue_spec]) assert has_name(all_specs, "Id") @@ -145,14 +143,13 @@ def test_deduplication_same_type_multiple_fields() -> None: def test_pydantic_type_has_used_by_from_feature() -> None: """Pydantic type in all_specs gets used-by entries from features referencing it.""" - model_spec = extract_model(FeatureWithUrl, entry_point="FeatureWithUrl") - expand_model_tree(model_spec) - all_specs = collect_all_supplementary_types([model_spec]) + expanded = feature_spec_for_model(FeatureWithUrl, entry_point="FeatureWithUrl") + all_specs = collect_all_supplementary_types([expanded]) assert has_name(all_specs, "HttpUrl") assert isinstance(lookup_by_name(all_specs, "HttpUrl"), PydanticTypeSpec) - result = compute_reverse_references([model_spec], all_specs) + result = compute_reverse_references([expanded], all_specs) entries = lookup_by_name(result, "HttpUrl") assert any(e.identity.name == "FeatureWithUrl" for e in entries) @@ -176,10 +173,8 @@ class FeatureBeta(BaseModel): FeatureBeta.__name__ = "Feature" FeatureBeta.__module__ = "beta.models" - spec_a = extract_model(FeatureAlpha, entry_point="Feature") - spec_b = extract_model(FeatureBeta, entry_point="Feature") - expand_model_tree(spec_a) - expand_model_tree(spec_b) + spec_a = feature_spec_for_model(FeatureAlpha, entry_point="Feature") + spec_b = feature_spec_for_model(FeatureBeta, entry_point="Feature") enum_id = TypeIdentity(SharedEnum, "SharedEnum") all_specs = {enum_id: extract_enum(SharedEnum)} @@ -201,10 +196,8 @@ def test_sorting_models_before_newtypes() -> None: # Create a synthetic NewType that wraps Id CustomId = NewType("CustomId", Id) - instrument_spec = extract_model(Instrument, entry_point="Instrument") - venue_spec = extract_model(Venue, entry_point="Venue") - expand_model_tree(instrument_spec) - expand_model_tree(venue_spec) + instrument_spec = feature_spec_for_model(Instrument, entry_point="Instrument") + venue_spec = feature_spec_for_model(Venue, entry_point="Venue") all_specs = collect_all_supplementary_types([instrument_spec, venue_spec]) # Add the CustomId NewType which references Id diff --git a/packages/overture-schema-codegen/tests/test_specs.py b/packages/overture-schema-codegen/tests/test_specs.py index 0780e2fda..550af18b7 100644 --- a/packages/overture-schema-codegen/tests/test_specs.py +++ b/packages/overture-schema-codegen/tests/test_specs.py @@ -20,215 +20,86 @@ TypeIdentity, is_union_alias, ) -from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind +from overture.schema.system.primitive import int32 from pydantic import BaseModel, Field -class TestFeatureSpecProtocol: - """Tests for FeatureSpec protocol compliance.""" - - def test_model_spec_satisfies_feature_spec(self) -> None: - """ModelSpec satisfies the FeatureSpec protocol.""" - +class TestFeatureSpec: + def test_model_spec_is_feature_spec(self) -> None: class Simple(BaseModel): name: str - spec = extract_model(Simple) - # Protocol compliance check - assert isinstance(spec, FeatureSpec) - # Verify protocol attributes + spec: FeatureSpec = extract_model(Simple) assert spec.name == "Simple" assert isinstance(spec.fields, list) assert spec.source_type is Simple class TestFieldSpec: - """Tests for FieldSpec dataclass.""" - - def test_fieldspec_stores_basic_attributes(self) -> None: - """FieldSpec should store name, type_info, description, is_required.""" - field_spec = FieldSpec( - name="test_field", - type_info=STR_TYPE, - description="A test field", - is_required=True, - ) - - assert field_spec.name == "test_field" - assert field_spec.type_info == STR_TYPE - assert field_spec.description == "A test field" - assert field_spec.is_required is True - - def test_fieldspec_optional_field(self) -> None: - """FieldSpec should handle optional fields.""" - optional_str = TypeInfo( - base_type="str", kind=TypeKind.PRIMITIVE, is_optional=True - ) - - field_spec = FieldSpec( + def test_carries_shape_and_optional_flag(self) -> None: + fs = FieldSpec( name="optional_field", - type_info=optional_str, + shape=STR_TYPE, description=None, is_required=False, + is_optional=True, ) - - assert field_spec.is_required is False - assert field_spec.description is None - - -class TestModelSpec: - """Tests for ModelSpec dataclass.""" - - def test_modelspec_stores_basic_attributes(self) -> None: - """ModelSpec should store name, description, fields.""" - field = FieldSpec( - name="id", - type_info=STR_TYPE, - description="Unique identifier", - is_required=True, - ) - - model_spec = ModelSpec( - name="TestModel", - description="A test model", - fields=[field], - ) - - assert model_spec.name == "TestModel" - assert model_spec.description == "A test model" - assert len(model_spec.fields) == 1 - assert model_spec.fields[0].name == "id" - - def test_entry_point_defaults_to_none(self) -> None: - spec = ModelSpec(name="M", description=None) - assert spec.entry_point is None + assert fs.name == "optional_field" + assert fs.shape is STR_TYPE + assert fs.is_required is False + assert fs.is_optional is True class TestAnnotatedField: - """Tests for AnnotatedField wrapper.""" - def test_stores_field_and_variant_sources(self) -> None: - """AnnotatedField pairs a FieldSpec with variant provenance.""" - fs = FieldSpec(name="x", type_info=STR_TYPE, description=None, is_required=True) - af = AnnotatedField(field_spec=fs, variant_sources=("RoadSegment",)) + class RoadSegment(BaseModel): + pass + + fs = FieldSpec(name="x", shape=STR_TYPE) + af = AnnotatedField(field_spec=fs, variant_sources=(RoadSegment,)) assert af.field_spec is fs - assert af.variant_sources == ("RoadSegment",) + assert af.variant_sources == (RoadSegment,) def test_none_variant_sources_means_shared(self) -> None: - """variant_sources=None indicates a shared field.""" - fs = FieldSpec(name="x", type_info=STR_TYPE, description=None, is_required=True) + fs = FieldSpec(name="x", shape=STR_TYPE) af = AnnotatedField(field_spec=fs, variant_sources=None) assert af.variant_sources is None -class TestFieldSpecModelTree: - """Tests for FieldSpec model and starts_cycle fields.""" - - def test_model_defaults_to_none(self) -> None: - field_spec = FieldSpec( - name="test", type_info=STR_TYPE, description=None, is_required=True - ) - assert field_spec.model is None - - def test_starts_cycle_defaults_to_false(self) -> None: - field_spec = FieldSpec( - name="test", type_info=STR_TYPE, description=None, is_required=True - ) - assert field_spec.starts_cycle is False - - def test_model_can_hold_model_spec(self) -> None: - type_info = TypeInfo(base_type="Address", kind=TypeKind.MODEL) - sub = ModelSpec(name="Address", description=None) - field_spec = FieldSpec( - name="address", - type_info=type_info, - description=None, - is_required=True, - model=sub, - ) - assert field_spec.model is sub - - def test_starts_cycle_can_be_set(self) -> None: - type_info = TypeInfo(base_type="Node", kind=TypeKind.MODEL) - sub = ModelSpec(name="Node", description=None) - field_spec = FieldSpec( - name="parent", - type_info=type_info, - description=None, - is_required=False, - model=sub, - starts_cycle=True, - ) - assert field_spec.starts_cycle is True - assert field_spec.model is sub - - def test_starts_cycle_without_model_is_nonsensical(self) -> None: - """starts_cycle=True with model=None is expressible but invalid. - - expand_model_tree never produces this combination -- starts_cycle - is only set when model points to the cycle-causing ModelSpec. - Document the invariant so violations stand out. - """ - type_info = TypeInfo(base_type="Node", kind=TypeKind.MODEL) - field_spec = FieldSpec( - name="parent", - type_info=type_info, - description=None, - is_required=False, - starts_cycle=True, - ) - # Expressible but meaningless: cycle to nowhere - assert field_spec.starts_cycle is True - assert field_spec.model is None - - class TestIsUnionAlias: - """Tests for is_union_alias predicate.""" - def test_annotated_union_of_models_returns_true(self) -> None: - """Annotated[Union of BaseModels] is a union alias.""" - class A(BaseModel): x: int class B(BaseModel): y: str - union_type = Annotated[A | B, Field(description="test")] - assert is_union_alias(union_type) is True + assert is_union_alias(Annotated[A | B, Field(description="test")]) is True def test_model_class_returns_false(self) -> None: - """A concrete BaseModel class is not a union alias.""" - class A(BaseModel): x: int assert is_union_alias(A) is False def test_plain_string_returns_false(self) -> None: - """A plain string is not a union alias.""" assert is_union_alias("not a type") is False def test_non_model_union_returns_false(self) -> None: - """A union of non-model types is not a union alias.""" assert is_union_alias(str | int) is False class TestUnionSpec: - """Tests for UnionSpec data structure.""" - def test_fields_property_returns_plain_field_specs(self) -> None: - """UnionSpec.fields property returns list[FieldSpec] from annotated_fields.""" - fs1 = FieldSpec( - name="a", type_info=STR_TYPE, description=None, is_required=True - ) - fs2 = FieldSpec( - name="b", type_info=STR_TYPE, description=None, is_required=False - ) + class X(BaseModel): + pass + + fs1 = FieldSpec(name="a", shape=STR_TYPE) + fs2 = FieldSpec(name="b", shape=STR_TYPE, is_required=False) spec = make_union_spec( annotated_fields=[ AnnotatedField(field_spec=fs1, variant_sources=None), - AnnotatedField(field_spec=fs2, variant_sources=("X",)), + AnnotatedField(field_spec=fs2, variant_sources=(X,)), ], ) assert spec.fields == [fs1, fs2] @@ -240,20 +111,13 @@ def test_frozen(self) -> None: with pytest.raises(AttributeError): ti.obj = str # type: ignore[misc] - def test_same_obj_equal(self) -> None: + def test_equality_by_obj_identity(self) -> None: a = TypeIdentity(obj=int, name="int") b = TypeIdentity(obj=int, name="integer") + c = TypeIdentity(obj=str, name="int") assert a == b - - def test_same_obj_same_hash(self) -> None: - a = TypeIdentity(obj=int, name="int") - b = TypeIdentity(obj=int, name="integer") assert hash(a) == hash(b) - - def test_different_obj_not_equal(self) -> None: - a = TypeIdentity(obj=int, name="int") - b = TypeIdentity(obj=str, name="int") - assert a != b + assert a != c def test_works_as_dict_key(self) -> None: ti = TypeIdentity(obj=int, name="int") @@ -271,35 +135,21 @@ def test_not_equal_to_non_identity(self) -> None: class TestSpecIdentity: def test_model_spec_identity(self) -> None: spec = ModelSpec(name="Foo", description=None, source_type=SimpleModel) - ident = spec.identity - assert isinstance(ident, TypeIdentity) - assert ident.obj is SimpleModel - assert ident.name == "Foo" + assert spec.identity.obj is SimpleModel + assert spec.identity.name == "Foo" def test_enum_spec_identity(self) -> None: spec = EnumSpec(name="Color", description=None, source_type=InstrumentFamily) - ident = spec.identity - assert ident.obj is InstrumentFamily - assert ident.name == "Color" + assert spec.identity.obj is InstrumentFamily def test_newtype_spec_identity(self) -> None: - from overture.schema.system.primitive import int32 - spec = NewTypeSpec( - name="int32", description=None, type_info=STR_TYPE, source_type=int32 + name="int32", description=None, shape=STR_TYPE, source_type=int32 ) - ident = spec.identity - assert ident.obj is int32 - assert ident.name == "int32" + assert spec.identity.obj is int32 def test_union_spec_identity(self) -> None: sentinel = object() spec = make_union_spec("TestUnion", source_annotation=sentinel) - ident = spec.identity - assert ident.obj is sentinel - assert ident.name == "TestUnion" - - def test_model_spec_satisfies_feature_protocol_with_identity(self) -> None: - spec = ModelSpec(name="Foo", description=None, source_type=SimpleModel) - feature: FeatureSpec = spec - assert feature.identity.obj is SimpleModel + assert spec.identity.obj is sentinel + assert spec.identity.name == "TestUnion" diff --git a/packages/overture-schema-codegen/tests/test_type_analyzer.py b/packages/overture-schema-codegen/tests/test_type_analyzer.py index bbf8373fd..f8ccf88f0 100644 --- a/packages/overture-schema-codegen/tests/test_type_analyzer.py +++ b/packages/overture-schema-codegen/tests/test_type_analyzer.py @@ -1,18 +1,34 @@ -"""Tests for type analysis.""" +"""Tests for `analyze_type`: annotation -> `FieldShape` analysis.""" from enum import Enum from typing import Annotated, Any, Literal, NewType, Optional import pytest -from annotated_types import Ge +from annotated_types import Ge, MaxLen, MinLen +from overture.schema.codegen.extraction.field import ( + AnyScalar, + ArrayOf, + FieldShape, + LiteralScalar, + MapOf, + NewTypeShape, + Primitive, +) +from overture.schema.codegen.extraction.field_walk import ( + all_constraints, + list_depth, +) +from overture.schema.codegen.extraction.length_constraints import ( + ArrayMinLen, + ScalarMinLen, +) from overture.schema.codegen.extraction.type_analyzer import ( - TypeInfo, - TypeKind, UnsupportedUnionError, analyze_type, single_literal_value, + unwrap_list, ) -from overture.schema.system.primitive import float64, int32 +from overture.schema.system.primitive import int32 from overture.schema.system.ref import Id from overture.schema.system.string import ( HexColor, @@ -24,568 +40,290 @@ from typing_extensions import Sentinel -@pytest.fixture() -def id_type_info() -> TypeInfo: - return analyze_type(Id) +def _shape(annotation: object) -> FieldShape: + shape, _, _ = analyze_type(annotation) + return shape -@pytest.fixture() -def hex_color_type_info() -> TypeInfo: - return analyze_type(HexColor) +def _is_optional(annotation: object) -> bool: + _, is_optional, _ = analyze_type(annotation) + return is_optional -class TestAnalyzeTypePrimitives: - """Tests for primitive type analysis.""" +def _description(annotation: object) -> str | None: + _, _, description = analyze_type(annotation) + return description + +class TestPrimitives: @pytest.mark.parametrize("annotation", [str, int, float, bool]) - def test_builtin_returns_primitive_type_info(self, annotation: type) -> None: - """Builtin type annotations return PRIMITIVE TypeInfo with matching base_type.""" - result = analyze_type(annotation) + def test_builtin_emits_primitive(self, annotation: type) -> None: + shape = _shape(annotation) + assert isinstance(shape, Primitive) + assert shape.base_type == annotation.__name__ + assert shape.source_type is annotation - assert result.base_type == annotation.__name__ - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is False - assert result.is_list is False + def test_any_emits_any_scalar(self) -> None: + shape = _shape(Any) + assert isinstance(shape, AnyScalar) -class TestAnalyzeTypeSentinel: - """Tests for Sentinel type filtering in unions. - - Pydantic uses `typing_extensions.Sentinel` instances (like ``) - in union types for optional fields. The type analyzer filters these out - alongside `None` when processing unions. - """ +class TestSentinel: + """`Sentinel` arms in unions are filtered alongside `None`.""" @pytest.fixture() - def missing_sentinel(self) -> object: + def missing(self) -> object: return Sentinel("MISSING") - def test_sentinel_filtered_from_union(self, missing_sentinel: object) -> None: - """Sentinel is filtered out, leaving the concrete type.""" - result = analyze_type(str | missing_sentinel) # type: ignore[arg-type] - - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is False - - def test_sentinel_with_none_sets_optional(self, missing_sentinel: object) -> None: - """Sentinel + None both filtered; None triggers is_optional.""" - result = analyze_type(str | missing_sentinel | None) # type: ignore[arg-type] - - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is True - - -class TestAnalyzeTypeOptional: - """Tests for Optional type analysis.""" - - def test_pipe_none_sets_is_optional(self) -> None: - """str | None returns TypeInfo with is_optional=True.""" - result = analyze_type(str | None) - - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is True - assert result.is_list is False - - def test_type_with_literal_and_none(self) -> None: - """str | Literal[""] | None filters Literal and marks optional.""" - result = analyze_type(str | Literal[""] | None) - - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is True - - def test_typing_optional_sets_is_optional(self) -> None: - """Optional[str] from typing module returns TypeInfo with is_optional=True.""" - result = analyze_type(Optional[str]) # noqa: UP045 - - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is True - assert result.is_list is False + def test_filtered_leaves_concrete_type(self, missing: object) -> None: + shape = _shape(str | missing) # type: ignore[arg-type] + assert isinstance(shape, Primitive) + assert shape.base_type == "str" + assert _is_optional(str | missing) is False # type: ignore[arg-type] + def test_with_none_sets_optional(self, missing: object) -> None: + assert _is_optional(str | missing | None) is True # type: ignore[arg-type] -class TestAnalyzeTypeUnionLiteralFiltering: - """Tests for filtering Literal arms out of unions.""" - def test_type_with_literal_alternative(self) -> None: - """str | Literal[""] filters out the Literal and analyzes the concrete type.""" - result = analyze_type(str | Literal[""]) +class TestOptional: + def test_pipe_none(self) -> None: + assert _is_optional(str | None) is True - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is False + def test_typing_optional(self) -> None: + assert _is_optional(Optional[str]) is True # noqa: UP045 + def test_literal_arm_filtered_with_concrete(self) -> None: + shape, optional, _ = analyze_type(str | Literal[""] | None) + assert isinstance(shape, Primitive) and shape.base_type == "str" + assert optional is True -class TestAnalyzeTypeList: - """Tests for list type analysis.""" - def test_list_str_sets_is_list(self) -> None: - """list[str] returns TypeInfo with is_list=True.""" - result = analyze_type(list[str]) +class TestList: + def test_simple_list(self) -> None: + shape = _shape(list[str]) + assert isinstance(shape, ArrayOf) + assert isinstance(shape.element, Primitive) + assert shape.element.base_type == "str" - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is False - assert result.is_list is True + def test_nested_list_records_depth(self) -> None: + shape = _shape(list[list[str]]) + assert list_depth(shape) == 2 - def test_nested_list_sets_depth_2(self) -> None: - """list[list[str]] records two levels of nesting.""" - result = analyze_type(list[list[str]]) + def test_optional_list(self) -> None: + shape, optional, _ = analyze_type(list[str] | None) + assert isinstance(shape, ArrayOf) + assert optional is True - assert result.list_depth == 2 - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE + def test_list_optional_element(self) -> None: + shape, optional, _ = analyze_type(list[str | None]) + assert isinstance(shape, ArrayOf) + # `is_optional` reflects the field accepting None; element-level + # `| None` propagates the same way. + assert optional is True -class TestAnalyzeTypeComposite: - """Tests for composite/nested type analysis.""" - - def test_list_optional_str(self) -> None: - """list[str | None] sets both is_list and is_optional.""" - result = analyze_type(list[str | None]) - - assert result.base_type == "str" - assert result.is_list is True - assert result.is_optional is True - - def test_optional_list_str(self) -> None: - """list[str] | None sets both is_list and is_optional.""" - result = analyze_type(list[str] | None) - - assert result.base_type == "str" - assert result.is_list is True - assert result.is_optional is True - - def test_annotated_optional_str(self) -> None: - """Annotated[str | None, ...] extracts constraints and sets is_optional.""" - result = analyze_type(Annotated[str | None, "description"]) - - assert result.base_type == "str" - assert result.is_optional is True - assert len(result.constraints) == 1 - assert result.constraints[0].source_ref is None - assert result.constraints[0].constraint == "description" - - def test_annotated_list_str(self) -> None: - """Annotated[list[str], ...] extracts constraints and sets is_list.""" - result = analyze_type(Annotated[list[str], Field(min_length=1)]) - - assert result.base_type == "str" - assert result.is_list is True - assert len(result.constraints) == 1 - assert result.constraints[0].source_ref is None - - -class TestAnalyzeTypeAnnotated: - """Tests for Annotated type analysis.""" - - def test_annotated_int_with_ge_extracts_constraint(self) -> None: - """Annotated[int, Field(ge=0)] unpacks FieldInfo to extract Ge constraint.""" - result = analyze_type(Annotated[int, Field(ge=0)]) - - assert result.base_type == "int" - assert result.kind == TypeKind.PRIMITIVE - assert len(result.constraints) == 1 - cs = result.constraints[0] - assert cs.source_ref is None +class TestAnnotated: + def test_ge_collected_on_terminal(self) -> None: + shape = _shape(Annotated[int, Field(ge=0)]) + assert isinstance(shape, Primitive) + assert len(shape.constraints) == 1 + cs = shape.constraints[0] assert isinstance(cs.constraint, Ge) - assert cs.constraint.ge == 0 - - def test_annotated_without_constraints(self) -> None: - """Annotated[str, 'description'] extracts non-Field metadata.""" - result = analyze_type(Annotated[str, "just a description"]) - - assert result.base_type == "str" - assert len(result.constraints) == 1 - assert result.constraints[0].source_ref is None - assert result.constraints[0].constraint == "just a description" - - -class TestAnalyzeTypeLiteral: - """Tests for Literal type analysis.""" - - def test_literal_string_extracts_values(self) -> None: - """Literal["active"] stores the value in literal_values tuple.""" - result = analyze_type(Literal["active"]) - - assert result.kind == TypeKind.LITERAL - assert result.literal_values == ("active",) - - def test_literal_int_extracts_values(self) -> None: - """Literal[42] stores the value in literal_values tuple.""" - result = analyze_type(Literal[42]) - - assert result.kind == TypeKind.LITERAL - assert result.literal_values == (42,) - - def test_multi_value_literal_stores_all_args(self) -> None: - """Literal["a", "b"] stores all args in literal_values tuple.""" - result = analyze_type(Literal["a", "b"]) - - assert result.kind == TypeKind.LITERAL - assert result.literal_values == ("a", "b") - - def test_optional_literal_extracts_values(self) -> None: - """Optional[Literal["x"]] unwraps to Literal with is_optional set.""" - result = analyze_type(Literal["x"] | None) - - assert result.kind == TypeKind.LITERAL - assert result.literal_values == ("x",) - assert result.is_optional is True - - -class TestAnalyzeTypeEnum: - """Tests for Enum type analysis.""" - - def test_enum_subclass_returns_kind_enum(self) -> None: - """Enum subclass returns TypeInfo with kind=ENUM.""" + assert cs.source_ref is None + def test_non_field_metadata_collected(self) -> None: + shape = _shape(Annotated[str, "just a description"]) + assert isinstance(shape, Primitive) + assert shape.constraints[0].constraint == "just a description" + + def test_list_level_minlen_lands_on_arrayof(self) -> None: + shape = _shape(Annotated[list[str], Field(min_length=1)]) + assert isinstance(shape, ArrayOf) + assert len(shape.constraints) == 1 + assert isinstance(shape.element, Primitive) + assert shape.element.constraints == () + + def test_layered_constraints_anchor_separately(self) -> None: + shape = _shape(Annotated[list[Annotated[str, MinLen(2)]], MinLen(3)]) + assert isinstance(shape, ArrayOf) + outer = shape.constraints + assert len(outer) == 1 + assert outer[0].constraint == ArrayMinLen(min_length=3) + assert isinstance(shape.element, Primitive) + inner = shape.element.constraints + assert len(inner) == 1 + assert inner[0].constraint == ScalarMinLen(min_length=2) + + +class TestLiteral: + def test_single_value(self) -> None: + shape = _shape(Literal["active"]) + assert isinstance(shape, LiteralScalar) + assert shape.values == ("active",) + + def test_multi_value(self) -> None: + shape = _shape(Literal["a", "b"]) + assert isinstance(shape, LiteralScalar) + assert shape.values == ("a", "b") + + def test_optional_literal(self) -> None: + shape, optional, _ = analyze_type(Literal["x"] | None) + assert isinstance(shape, LiteralScalar) + assert shape.values == ("x",) + assert optional is True + + +class TestEnumAndModel: + def test_enum_emits_primitive_with_source(self) -> None: class Color(Enum): RED = "red" - GREEN = "green" - - result = analyze_type(Color) - - assert result.base_type == "Color" - assert result.kind == TypeKind.ENUM - - -class TestAnalyzeTypeModel: - """Tests for BaseModel type analysis.""" - def test_basemodel_subclass_returns_kind_model(self) -> None: - """BaseModel subclass returns TypeInfo with kind=MODEL.""" + shape = _shape(Color) + assert isinstance(shape, Primitive) + assert shape.source_type is Color + def test_model_without_resolver_falls_back_to_primitive(self) -> None: class Person(BaseModel): name: str - result = analyze_type(Person) - - assert result.base_type == "Person" - assert result.kind == TypeKind.MODEL - - -class TestAnalyzeTypeNewType: - """Tests for NewType primitive analysis.""" - - def test_int32_returns_newtype_name(self) -> None: - """int32 NewType returns TypeInfo with base_type='int32'.""" - result = analyze_type(int32) - - assert result.base_type == "int32" - assert result.kind == TypeKind.PRIMITIVE - - def test_float64_returns_newtype_name(self) -> None: - """float64 NewType returns TypeInfo with base_type='float64'.""" - result = analyze_type(float64) - - assert result.base_type == "float64" - assert result.kind == TypeKind.PRIMITIVE - - def test_optional_int32(self) -> None: - """int32 | None sets is_optional and preserves base_type.""" - result = analyze_type(int32 | None) - - assert result.base_type == "int32" - assert result.is_optional is True - - -class TestNewtypeName: - """Tests for outermost NewType name tracking.""" - - def test_single_layer_newtype(self) -> None: - """Single NewType like int32 sets newtype_name to its name.""" - result = analyze_type(int32) - - assert result.newtype_name == "int32" - assert result.base_type == "int32" - - def test_nested_newtype_preserves_outermost(self, id_type_info: TypeInfo) -> None: - """Nested NewType chain uses outermost name for newtype_name.""" - assert id_type_info.newtype_name == "Id" - assert id_type_info.base_type == "NoWhitespaceString" - - def test_plain_type_has_no_newtype_name(self) -> None: - """Plain types without NewType wrapping have newtype_name=None.""" - result = analyze_type(str) - - assert result.newtype_name is None + shape = _shape(Person) + assert isinstance(shape, Primitive) + assert shape.source_type is Person + assert shape.base_type == "Person" - def test_newtype_ref_set_for_newtype(self, id_type_info: TypeInfo) -> None: - """newtype_ref points to the outermost NewType callable.""" - assert id_type_info.newtype_ref is Id - def test_newtype_ref_none_for_plain_type(self) -> None: - """Plain types have newtype_ref=None.""" - result = analyze_type(str) +class TestNewType: + def test_simple_newtype(self) -> None: + shape = _shape(int32) + assert isinstance(shape, NewTypeShape) + assert shape.name == "int32" + assert isinstance(shape.inner, Primitive) + assert shape.inner.base_type == "int32" - assert result.newtype_ref is None + def test_outermost_newtype_is_outer_wrapper(self) -> None: + shape = _shape(Id) + assert isinstance(shape, NewTypeShape) + assert shape.name == "Id" + def test_optional_newtype(self) -> None: + assert _is_optional(int32 | None) is True -class TestNewtypeWrappingList: - """Tests for NewType wrapping a list type.""" - def test_newtype_wrapping_list(self) -> None: - """NewType wrapping a list sets is_list and preserves newtype_name.""" +class TestNewTypeWrappingList: + def test_newtype_around_list(self) -> None: TestSources = NewType("TestSources", Annotated[list[str], Field(min_length=1)]) - result = analyze_type(TestSources) + shape = _shape(TestSources) + assert isinstance(shape, NewTypeShape) and shape.name == "TestSources" + assert isinstance(shape.inner, ArrayOf) - assert result.is_list is True - assert result.newtype_name == "TestSources" - - def test_scalar_newtype_is_not_list(self) -> None: - """Scalar NewType like int32 has is_list=False.""" - result = analyze_type(int32) - - assert result.is_list is False - - def test_plain_list_has_no_newtype_name(self) -> None: - """Plain list[str] without NewType has newtype_name=None.""" - result = analyze_type(list[str]) - - assert result.newtype_name is None - assert result.is_list is True - - def test_newtype_wrapping_list_of_models(self) -> None: - """list[NewType wrapping list[Model]] records depth 2, outer depth 1.""" - - class _Item(BaseModel): - name: str - - Inner = NewType("Inner", Annotated[list[_Item], Field(min_length=1)]) - result = analyze_type(list[Inner]) - - assert result.list_depth == 2 - assert result.newtype_outer_list_depth == 1 - assert result.base_type == "Inner" - assert result.kind == TypeKind.MODEL - assert result.source_type is _Item + def test_list_around_scalar_newtype(self) -> None: + ScalarNT = NewType("ScalarNT", str) + shape = _shape(list[ScalarNT]) + assert isinstance(shape, ArrayOf) + assert isinstance(shape.element, NewTypeShape) -class TestNewtypeOuterListDepth: - """Tests for newtype_outer_list_depth tracking.""" +class TestConstraintProvenance: + """Constraints carry the NewType that contributed them.""" - def test_list_of_scalar_newtype_has_outer_depth(self) -> None: - """list[ScalarNewType] records the list layer as outside the NewType.""" - ScalarNT = NewType("ScalarNT", str) - result = analyze_type(list[ScalarNT]) - - assert result.newtype_outer_list_depth == 1 - assert result.list_depth == 1 - - def test_newtype_wrapping_list_has_zero_outer_depth(self) -> None: - """NewType wrapping list[X] records no list layers outside the NewType.""" - ListNT = NewType("ListNT", Annotated[list[str], Field(min_length=1)]) - result = analyze_type(ListNT) - - assert result.newtype_outer_list_depth == 0 - assert result.list_depth == 1 - - @pytest.mark.parametrize( - "annotation", - [ - list[str], # list without NewType - int32, # scalar NewType - str, # plain type - ], - ids=["plain_list", "scalar_newtype", "plain_type"], - ) - def test_zero_outer_depth_without_newtype_boundary( - self, annotation: object - ) -> None: - """Types without a NewType inside a list have newtype_outer_list_depth=0.""" - result = analyze_type(annotation) - - assert result.newtype_outer_list_depth == 0 - - def test_nested_list_of_scalar_newtype_has_outer_depth_2(self) -> None: - """list[list[ScalarNewType]] records two outer list layers.""" - ScalarNT = NewType("ScalarNT", str) - result = analyze_type(list[list[ScalarNT]]) + @pytest.fixture() + def id_shape(self) -> FieldShape: + return _shape(Id) - assert result.newtype_outer_list_depth == 2 - assert result.list_depth == 2 + @pytest.fixture() + def hex_shape(self) -> FieldShape: + return _shape(HexColor) + def test_nested_newtype_flattens_with_sources(self, id_shape: FieldShape) -> None: + sources = {cs.source_name for cs in all_constraints(id_shape)} + assert "Id" in sources + assert "NoWhitespaceString" in sources -class TestConstraintProvenance: - """Tests for flattened constraints with provenance tracking.""" - - def test_nested_newtype_flattens_constraints(self, id_type_info: TypeInfo) -> None: - """Id -> NoWhitespaceString -> str flattens all constraints with sources.""" - source_names = { - cs.source_name for cs in id_type_info.constraints if cs.source_name - } - assert "Id" in source_names - assert "NoWhitespaceString" in source_names - - def test_nested_newtype_includes_inner_constraints( - self, id_type_info: TypeInfo - ) -> None: - """Inner NewType constraints are collected with provenance.""" - nws_constraints = [ - cs for cs in id_type_info.constraints if cs.source_ref is NoWhitespaceString + def test_inner_newtype_constraints_preserved(self, id_shape: FieldShape) -> None: + nws = [ + cs + for cs in all_constraints(id_shape) + if cs.source_ref is NoWhitespaceString ] - constraint_types = {type(cs.constraint) for cs in nws_constraints} - assert NoWhitespaceConstraint in constraint_types + assert NoWhitespaceConstraint in {type(cs.constraint) for cs in nws} def test_direct_annotation_has_none_source(self) -> None: - """Constraints from direct Annotated (no NewType) have source_ref=None.""" - result = analyze_type(Annotated[str, "direct"]) - - assert len(result.constraints) == 1 - assert result.constraints[0].source_ref is None - assert result.constraints[0].constraint == "direct" - - def test_single_newtype_constraints_attributed( - self, hex_color_type_info: TypeInfo - ) -> None: - """HexColor constraints are attributed to the HexColor callable.""" - assert all(cs.source_ref is HexColor for cs in hex_color_type_info.constraints) - assert len(hex_color_type_info.constraints) > 0 - - def test_source_ref_is_newtype_callable( - self, hex_color_type_info: TypeInfo - ) -> None: - """source_ref is the actual NewType callable, not a string.""" - cs = hex_color_type_info.constraints[0] - assert cs.source_ref is HexColor - - def test_constraint_preserves_original_object( - self, hex_color_type_info: TypeInfo - ) -> None: - """ConstraintSource.constraint holds the original constraint object.""" - hcc = next( - cs - for cs in hex_color_type_info.constraints - if type(cs.constraint).__name__ == "HexColorConstraint" - ) - assert hcc.constraint.__class__.__name__ == "HexColorConstraint" + shape = _shape(Annotated[str, "direct"]) + cs = all_constraints(shape) + assert len(cs) == 1 + assert cs[0].source_ref is None + def test_single_newtype_attributed_to_itself(self, hex_shape: FieldShape) -> None: + cs = all_constraints(hex_shape) + assert cs and all(c.source_ref is HexColor for c in cs) -class TestTypeInfoDescription: - """Tests for TypeInfo.description from Field(description=...) metadata.""" - def test_newtype_with_field_description( - self, hex_color_type_info: TypeInfo - ) -> None: - """Should extract Field description from HexColor.""" - assert hex_color_type_info.description is not None - assert "color" in hex_color_type_info.description.lower() +class TestDescription: + def test_newtype_field_description(self) -> None: + desc = _description(HexColor) + assert desc is not None and "color" in desc.lower() - def test_newtype_without_field_description(self) -> None: - """Should have None description for types without Field(description=...).""" - result = analyze_type(int) - assert result.description is None + def test_plain_type_has_no_description(self) -> None: + assert _description(int) is None - def test_plain_annotated_with_field_description(self) -> None: - """Should extract description from Annotated with Field(description=...).""" + def test_annotated_field_description(self) -> None: MyType = Annotated[str, Field(description="A test description")] - result = analyze_type(MyType) - assert result.description == "A test description" - - def test_outermost_description_wins(self, id_type_info: TypeInfo) -> None: - """Outermost FieldInfo.description takes precedence in nested NewTypes.""" - assert id_type_info.description is not None - assert "unique identifier" in id_type_info.description.lower() - - def test_newtype_without_field_has_none_description(self) -> None: - """NewType with constraints but no Field(description=...) has None.""" - result = analyze_type(SnakeCaseString) - assert result.description is None - - -class TestAnalyzeTypeAny: - """Tests for typing.Any analysis.""" - - def test_any_returns_primitive(self) -> None: - """Any annotation returns TypeInfo with base_type='Any' and kind=PRIMITIVE.""" - result = analyze_type(Any) + assert _description(MyType) == "A test description" - assert result.base_type == "Any" - assert result.kind == TypeKind.PRIMITIVE - - def test_dict_with_any_value(self) -> None: - """dict[str, Any] analyzes without error.""" - result = analyze_type(dict[str, Any]) - - assert result.is_dict is True - assert result.dict_value_type is not None - assert result.dict_value_type.base_type == "Any" + def test_outermost_description_wins(self) -> None: + desc = _description(Id) + assert desc is not None and "unique identifier" in desc.lower() + def test_newtype_without_field_description(self) -> None: + assert _description(SnakeCaseString) is None -class TestAnalyzeTypeDict: - """Tests for dict type analysis.""" - @pytest.fixture() - def dict_str_int(self) -> TypeInfo: - return analyze_type(dict[str, int]) - - def test_dict_str_int_sets_is_dict(self, dict_str_int: TypeInfo) -> None: - """dict[str, int] returns TypeInfo with is_dict=True.""" - assert dict_str_int.is_dict is True - assert dict_str_int.is_optional is False - assert dict_str_int.is_list is False - - def test_dict_key_type_analyzed(self, dict_str_int: TypeInfo) -> None: - """dict[str, int] has dict_key_type describing the key.""" - assert dict_str_int.dict_key_type is not None - assert dict_str_int.dict_key_type.base_type == "str" - assert dict_str_int.dict_key_type.kind == TypeKind.PRIMITIVE - - def test_dict_value_type_analyzed(self, dict_str_int: TypeInfo) -> None: - """dict[str, int] has dict_value_type describing the value.""" - assert dict_str_int.dict_value_type is not None - assert dict_str_int.dict_value_type.base_type == "int" - assert dict_str_int.dict_value_type.kind == TypeKind.PRIMITIVE +class TestDict: + def test_simple_dict(self) -> None: + shape = _shape(dict[str, int]) + assert isinstance(shape, MapOf) + assert isinstance(shape.key, Primitive) and shape.key.base_type == "str" + assert isinstance(shape.value, Primitive) and shape.value.base_type == "int" def test_optional_dict(self) -> None: - """dict[str, str] | None sets is_dict and is_optional.""" - result = analyze_type(dict[str, str] | None) - - assert result.is_dict is True - assert result.is_optional is True + shape, optional, _ = analyze_type(dict[str, str] | None) + assert isinstance(shape, MapOf) + assert optional is True - def test_newtype_wrapping_dict(self) -> None: - """NewType wrapping dict preserves newtype_name and sets is_dict.""" + def test_newtype_around_dict(self) -> None: TestMapping = NewType("TestMapping", dict[str, str]) - result = analyze_type(TestMapping) + shape = _shape(TestMapping) + assert isinstance(shape, NewTypeShape) and shape.name == "TestMapping" + assert isinstance(shape.inner, MapOf) - assert result.is_dict is True - assert result.newtype_name == "TestMapping" + def test_dict_with_any_value(self) -> None: + shape = _shape(dict[str, Any]) + assert isinstance(shape, MapOf) + assert isinstance(shape.value, AnyScalar) - def test_bare_dict_raises_type_error(self) -> None: - """Bare dict without type arguments raises TypeError.""" + def test_bare_dict_raises(self) -> None: with pytest.raises(TypeError, match="Bare dict"): analyze_type(dict) + def test_minlen_on_map_raises(self) -> None: + with pytest.raises(NotImplementedError, match="MinLen on a Map"): + _shape(Annotated[dict[str, int], MinLen(1)]) + + def test_maxlen_on_map_raises(self) -> None: + with pytest.raises(NotImplementedError, match="MaxLen on a Map"): + _shape(Annotated[dict[str, int], MaxLen(10)]) -class TestAnalyzeTypeErrors: - """Tests for error handling.""" - def test_unsupported_annotation_raises_type_error(self) -> None: - """Unsupported annotation type raises TypeError.""" +class TestErrors: + def test_unsupported_annotation(self) -> None: with pytest.raises(TypeError, match="Unsupported annotation type"): analyze_type("not a type") - def test_multi_type_union_raises_clear_error(self) -> None: - """Multi-type unions like str | int raise UnsupportedUnionError.""" - with pytest.raises( - UnsupportedUnionError, match="Multi-type unions not supported" - ): + def test_multi_type_union_without_resolver(self) -> None: + with pytest.raises(UnsupportedUnionError): analyze_type(str | int) - def test_multi_type_union_with_none_raises_clear_error(self) -> None: - """Multi-type optional unions like str | int | None raise UnsupportedUnionError.""" - with pytest.raises( - UnsupportedUnionError, match="Multi-type unions not supported" - ): - analyze_type(str | int | None) - - def test_bare_list_raises_type_error(self) -> None: - """Bare list without type argument raises TypeError.""" + def test_bare_list(self) -> None: with pytest.raises(TypeError, match="Bare list without type argument"): analyze_type(list) @@ -598,79 +336,184 @@ class UnionModelB(BaseModel): y: str -class TestAnalyzeTypeUnion: - """Tests for discriminated union analysis.""" +class TestUnionResolver: + """Multi-arm unions of models go through the resolver callback.""" + + def test_resolver_receives_annotation_members_and_description(self) -> None: + captured: list[tuple[object, tuple[type[BaseModel], ...], str | None]] = [] - def test_all_model_union_returns_union_kind(self) -> None: - """Annotated[Union of BaseModel subclasses] returns TypeKind.UNION.""" - union_type = Annotated[UnionModelA | UnionModelB, Field(description="test")] - result = analyze_type(union_type) + def resolver( + annotation: object, + members: tuple[type[BaseModel], ...], + description: str | None, + ) -> Primitive: + captured.append((annotation, members, description)) + return Primitive(base_type="__captured__") - assert result.kind == TypeKind.UNION - assert result.union_members is not None - assert len(result.union_members) == 2 - assert UnionModelA in result.union_members - assert UnionModelB in result.union_members + union_type = Annotated[UnionModelA | UnionModelB, Field(description="x")] + shape, _, _ = analyze_type(union_type, union_resolver=resolver) + + assert isinstance(shape, Primitive) + assert shape.base_type == "__captured__" + _ann, members, description = captured[0] + expected: set[type[BaseModel]] = {UnionModelA, UnionModelB} + assert set(members) == expected + assert description == "x" + + def test_no_resolver_raises_on_multi_arm(self) -> None: + union_type = Annotated[UnionModelA | UnionModelB, Field(description="x")] + with pytest.raises(UnsupportedUnionError): + analyze_type(union_type) def test_annotated_wrapped_members_unwrapped(self) -> None: - """Union members wrapped in Annotated[X, Tag(...)] are unwrapped.""" + from overture.schema.codegen.extraction.type_analyzer import analyze_type as at + + captured_members: list[tuple[type[BaseModel], ...]] = [] + + def resolver( + _ann: object, + members: tuple[type[BaseModel], ...], + _description: str | None, + ) -> Primitive: + captured_members.append(members) + return Primitive(base_type="x") + union_type = Annotated[ Annotated[UnionModelA, Tag("a")] | Annotated[UnionModelB, Tag("b")], Field(description="disc"), ] - result = analyze_type(union_type) + at(union_type, union_resolver=resolver) + expected: set[type[BaseModel]] = {UnionModelA, UnionModelB} + assert set(captured_members[0]) == expected - assert result.kind == TypeKind.UNION - assert result.union_members is not None - assert len(result.union_members) == 2 - assert UnionModelA in result.union_members - assert UnionModelB in result.union_members - - def test_mixed_model_nonmodel_union_still_raises(self) -> None: - """Union of model + non-model types still raises UnsupportedUnionError.""" + def test_mixed_model_nonmodel_raises(self) -> None: with pytest.raises(UnsupportedUnionError): analyze_type(UnionModelA | str) - def test_non_model_multi_union_still_raises(self) -> None: - """Multi-type union of non-models still raises UnsupportedUnionError.""" - with pytest.raises(UnsupportedUnionError): - analyze_type(str | int) - - def test_union_base_type_is_first_member_name(self) -> None: - """UNION TypeInfo base_type is the first member's class name.""" - result = analyze_type( - Annotated[UnionModelA | UnionModelB, Field(description="test")] - ) - assert result.base_type == "UnionModelA" - - def test_optional_union_sets_is_optional(self) -> None: - """Union with None among model members sets is_optional.""" - result = analyze_type( - Annotated[UnionModelA | UnionModelB, Field(description="test")] | None - ) - assert result.kind == TypeKind.UNION - assert result.is_optional is True - class TestSingleLiteralValue: - """Tests for single_literal_value convenience accessor.""" - - def test_single_value_literal(self) -> None: - """Literal["x"] returns the literal value.""" + def test_single_string(self) -> None: assert single_literal_value(Literal["x"]) == "x" - def test_single_int_literal(self) -> None: - """Literal[42] returns the integer value.""" + def test_single_int(self) -> None: assert single_literal_value(Literal[42]) == 42 - def test_multi_value_literal_returns_none(self) -> None: - """Multi-value Literal returns None (no single default).""" + def test_multi_value_returns_none(self) -> None: assert single_literal_value(Literal["a", "b"]) is None def test_non_literal_returns_none(self) -> None: - """Non-Literal types return None.""" assert single_literal_value(str) is None - def test_unsupported_type_returns_none(self) -> None: - """Types that raise during analysis return None.""" + def test_unsupported_returns_none(self) -> None: assert single_literal_value("not a type") is None + + +class TestUnwrapList: + def test_plain_list(self) -> None: + assert unwrap_list(list[int]) is int + + def test_nested_list(self) -> None: + assert unwrap_list(list[list[str]]) is str + + def test_non_list_passthrough(self) -> None: + assert unwrap_list(int) is int + + def test_optional_list(self) -> None: + assert unwrap_list(list[int] | None) is int + + def test_optional_list_preserves_annotated(self) -> None: + from overture.schema.common.scoping.vehicle import VehicleSelector + + assert unwrap_list(list[VehicleSelector] | None) is VehicleSelector + + +class TestNestedArrayCharacterization: + """Pin analyze_type behavior on consecutive-list and NewType-chain shapes. + + The schema has no genuine `list[list[X]]` field, so these are the only + coverage of the path the recursive _unwrap rewrite must preserve. + """ + + def test_list_of_list_nests_two_arrayofs(self) -> None: + shape = _shape(list[list[str]]) + assert isinstance(shape, ArrayOf) + assert isinstance(shape.element, ArrayOf) + assert isinstance(shape.element.element, Primitive) + assert shape.element.element.base_type == "str" + + def test_list_of_list_constraints_anchor_to_their_layer(self) -> None: + # Each MinLen lands on the ArrayOf layer it annotates, not flattened. + # Outer Annotated[..., Field(min_length=3)] targets the outer list. + # Inner Annotated[list[str], Field(min_length=2)] targets the inner list. + shape = _shape( + Annotated[ + list[Annotated[list[str], Field(min_length=2)]], Field(min_length=3) + ] + ) + assert isinstance(shape, ArrayOf) + inner = shape.element + assert isinstance(inner, ArrayOf) + outer_min_lens = [ + cs.constraint.min_length + for cs in shape.constraints + if isinstance(cs.constraint, ArrayMinLen) + ] + inner_min_lens = [ + cs.constraint.min_length + for cs in inner.constraints + if isinstance(cs.constraint, ArrayMinLen) + ] + assert outer_min_lens == [3] + assert inner_min_lens == [2] + + def test_nested_newtype_chain_flattens_to_one_wrapper(self) -> None: + # Id = NewType("Id", Annotated[NoWhitespaceString, Field(min_length=1)]) + shape = _shape(Id) + assert isinstance(shape, NewTypeShape) + assert shape.name == "Id" + # exactly one NewTypeShape -- the inner NoWhitespaceString does not nest + assert not isinstance(shape.inner, NewTypeShape) + assert isinstance(shape.inner, Primitive) + assert shape.inner.base_type == "NoWhitespaceString" + + def test_nested_newtype_constraint_order_outer_first(self) -> None: + shape = _shape(Id) + names = [cs.source_name for cs in all_constraints(shape)] + # Id's own constraint precedes NoWhitespaceString's + assert names == ["Id", "NoWhitespaceString"] + + def test_newtype_nested_as_list_element_flattens_under_outer_newtype(self) -> None: + # A NewType chain collapses to one NewTypeShape (the outermost) even + # when an inner NewType is nested across a list boundary -- the inner + # name survives only as the terminal `base_type`. + InnerElem = NewType("InnerElem", str) + OuterList = NewType("OuterList", list[InnerElem]) + shape = _shape(OuterList) + assert isinstance(shape, NewTypeShape) + assert shape.name == "OuterList" + assert isinstance(shape.inner, ArrayOf) + # the InnerElem NewType does NOT produce its own NewTypeShape + assert isinstance(shape.inner.element, Primitive) + assert shape.inner.element.base_type == "InnerElem" + + def test_sole_list_element_newtype_keeps_its_wrapper(self) -> None: + # With no outer NewType, a list-element NewType IS the outermost -- + # it keeps its NewTypeShape (guards against over-erasing). + ElemOnly = NewType("ElemOnly", str) + shape = _shape(list[ElemOnly]) + assert isinstance(shape, ArrayOf) + assert isinstance(shape.element, NewTypeShape) + assert shape.element.name == "ElemOnly" + + def test_newtype_inside_dict_value_is_an_independent_spine(self) -> None: + # `dict` key/value are independent spines: a NewType in the value + # keeps its wrapper even under an outer NewType, because erasure + # stops at MapOf. + DictValue = NewType("DictValue", str) + DictWrap = NewType("DictWrap", dict[str, DictValue]) + shape = _shape(DictWrap) + assert isinstance(shape, NewTypeShape) + assert shape.name == "DictWrap" + assert isinstance(shape.inner, MapOf) + assert isinstance(shape.inner.value, NewTypeShape) + assert shape.inner.value.name == "DictValue" diff --git a/packages/overture-schema-codegen/tests/test_type_collection.py b/packages/overture-schema-codegen/tests/test_type_collection.py index 154b39e2c..2df73cf2f 100644 --- a/packages/overture-schema-codegen/tests/test_type_collection.py +++ b/packages/overture-schema-codegen/tests/test_type_collection.py @@ -6,13 +6,10 @@ FeatureWithUrl, Instrument, TestSegmentWithSubModel, + feature_spec_for_model, has_name, lookup_by_name, ) -from overture.schema.codegen.extraction.model_extraction import ( - expand_model_tree, - extract_model, -) from overture.schema.codegen.extraction.specs import ( EnumSpec, ModelSpec, @@ -37,9 +34,7 @@ def _make_feature_with_sub_model(sub_model: type) -> type[BaseModel]: def _expanded_supplementary(model_class: type) -> dict[TypeIdentity, SupplementarySpec]: - spec = extract_model(model_class) - expand_model_tree(spec) - return collect_all_supplementary_types([spec]) + return collect_all_supplementary_types([feature_spec_for_model(model_class)]) class TestCollectAllSupplementarySpecs: @@ -77,11 +72,8 @@ def test_same_name_different_types_both_collected(self) -> None: ModelA = type("Address", (BaseModel,), {"__annotations__": {"x": str}}) ModelB = type("Address", (BaseModel,), {"__annotations__": {"y": int}}) - outer_a = extract_model(_make_feature_with_sub_model(ModelA)) - expand_model_tree(outer_a) - - outer_b = extract_model(_make_feature_with_sub_model(ModelB)) - expand_model_tree(outer_b) + outer_a = feature_spec_for_model(_make_feature_with_sub_model(ModelA)) + outer_b = feature_spec_for_model(_make_feature_with_sub_model(ModelB)) result = collect_all_supplementary_types([outer_a, outer_b]) diff --git a/packages/overture-schema-codegen/tests/test_type_placement.py b/packages/overture-schema-codegen/tests/test_type_placement.py index 63e26457c..8550a7319 100644 --- a/packages/overture-schema-codegen/tests/test_type_placement.py +++ b/packages/overture-schema-codegen/tests/test_type_placement.py @@ -11,7 +11,6 @@ lookup_by_name, make_union_spec, ) -from overture.schema.codegen.extraction.model_extraction import expand_model_tree from overture.schema.codegen.extraction.specs import ( AnnotatedField, FeatureSpec, @@ -45,9 +44,6 @@ def _build_registry( feature_specs: list[ModelSpec], ) -> tuple[dict[TypeIdentity, PurePosixPath], dict[TypeIdentity, SupplementarySpec]]: """Build placement registry with standard aggregate names.""" - cache: dict[type, ModelSpec] = {} - for spec in feature_specs: - expand_model_tree(spec, cache) all_specs = collect_all_supplementary_types(feature_specs) registry = build_placement_registry( feature_specs, all_specs, _NUMERIC_NAMES, _GEOMETRY_NAMES, _SCHEMA_ROOT @@ -162,7 +158,7 @@ class A(Base): AnnotatedField( field_spec=FieldSpec( name="name", - type_info=STR_TYPE, + shape=STR_TYPE, description=None, is_required=True, ), diff --git a/packages/overture-schema-codegen/tests/test_type_registry.py b/packages/overture-schema-codegen/tests/test_type_registry.py index b9d02d2ac..b2a4b45dc 100644 --- a/packages/overture-schema-codegen/tests/test_type_registry.py +++ b/packages/overture-schema-codegen/tests/test_type_registry.py @@ -1,7 +1,10 @@ """Tests for type registry.""" -import pytest -from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind +from overture.schema.codegen.extraction.field import ( + ArrayOf, + NewTypeShape, + Primitive, +) from overture.schema.codegen.extraction.type_registry import ( PRIMITIVE_TYPES, TypeMapping, @@ -11,33 +14,32 @@ class TestTypeMapping: - """Tests for TypeMapping dataclass.""" - - def test_typemapping_accepts_markdown(self) -> None: - """TypeMapping should construct with markdown field.""" - mapping = TypeMapping(markdown="int32") - - assert mapping.markdown == "int32" - - def test_for_target_returns_markdown(self) -> None: - """for_target should return markdown representation for markdown target.""" - mapping = TypeMapping(markdown="int32") - - assert mapping.for_target("markdown") == "int32" - - def test_for_target_rejects_unknown_target(self) -> None: - """for_target should raise ValueError for unknown targets.""" - mapping = TypeMapping(markdown="int32") - - with pytest.raises(ValueError, match="Unknown target 'scala'"): - mapping.for_target("scala") + def test_markdown_field(self) -> None: + assert TypeMapping(markdown="int32").markdown == "int32" + + def test_spark_type_mapping(self) -> None: + cases = [ + ("str", "StringType()"), + ("int32", "IntegerType()"), + ("int64", "LongType()"), + ("float64", "DoubleType()"), + ("bool", "BooleanType()"), + ("Geometry", "BinaryType()"), + ("float32", "FloatType()"), + ] + for type_name, expected in cases: + mapping = get_type_mapping(type_name) + assert mapping is not None, f"No mapping for {type_name!r}" + assert mapping.spark == expected + + def test_bbox_has_no_spark_mapping(self) -> None: + mapping = get_type_mapping("BBox") + assert mapping is not None + assert mapping.spark is None class TestPrimitiveTypes: - """Tests for PRIMITIVE_TYPES registry.""" - def test_registry_contains_expected_types(self) -> None: - """Registry should contain all expected primitive types.""" expected_types = { "int8", "int16", @@ -55,89 +57,48 @@ def test_registry_contains_expected_types(self) -> None: "Geometry", "BBox", } - assert set(PRIMITIVE_TYPES.keys()) == expected_types def test_bbox_mapping(self) -> None: - """BBox should map to bbox.""" bbox = PRIMITIVE_TYPES["BBox"] - assert bbox.markdown == "bbox" + assert bbox.spark is None class TestGetTypeMapping: - """Tests for get_type_mapping function.""" - def test_returns_mapping_for_known_type(self) -> None: - """Should return TypeMapping for known primitive type.""" - result = get_type_mapping("int32") - - assert result is not None - assert result.markdown == "int32" + assert get_type_mapping("int32").markdown == "int32" # type: ignore[union-attr] def test_returns_none_for_unknown_type(self) -> None: - """Should return None for unknown type names.""" - result = get_type_mapping("unknown_type") - - assert result is None + assert get_type_mapping("unknown_type") is None def test_returns_mapping_for_builtin_int(self) -> None: - """Should map Python int to int64.""" - result = get_type_mapping("int") - - assert result is not None - assert result.markdown == "int64" - - def test_returns_mapping_for_builtin_float(self) -> None: - """Should map Python float to float64.""" - result = get_type_mapping("float") - - assert result is not None - assert result.markdown == "float64" - + assert get_type_mapping("int").markdown == "int64" # type: ignore[union-attr] -class TestResolveTypeNameNewTypeFallback: - """Tests for resolve_type_name with unregistered NewTypes.""" +class TestResolveTypeName: def test_unregistered_newtype_falls_back_to_source_type(self) -> None: - """Unregistered NewType resolves to source_type name.""" - ti = TypeInfo( - base_type="Sources", - kind=TypeKind.MODEL, - newtype_name="Sources", - source_type=type("SourceItem", (), {}), + cls = type("SourceItem", (), {}) + shape = NewTypeShape( + name="Sources", + ref=object(), + inner=Primitive(base_type="Sources", source_type=cls), ) - result = resolve_type_name(ti, "markdown") - - assert result == "SourceItem" + assert resolve_type_name(shape) == "SourceItem" - def test_registered_newtype_unaffected(self) -> None: - """Registered NewType (int32) still resolves through the registry.""" - ti = TypeInfo( - base_type="int32", - kind=TypeKind.PRIMITIVE, - newtype_name="int32", - source_type=int, + def test_registered_newtype_resolves_via_registry(self) -> None: + shape = NewTypeShape( + name="int32", + ref=object(), + inner=Primitive(base_type="int32", source_type=int), ) - result = resolve_type_name(ti, "markdown") - - assert result == "int32" + assert resolve_type_name(shape) == "int32" + def test_plain_scalar(self) -> None: + assert ( + resolve_type_name(Primitive(base_type="str", source_type=str)) == "string" + ) -class TestResolveTypeName: - """Tests for resolve_type_name with list/optional flags.""" - - def _make_type_info(self, **kwargs: object) -> TypeInfo: - defaults = {"base_type": "str", "kind": TypeKind.PRIMITIVE} - defaults.update(kwargs) - return TypeInfo(**defaults) # type: ignore[arg-type] - - def test_ignores_list_depth(self) -> None: - """resolve_type_name returns the base type regardless of list_depth.""" - ti = self._make_type_info(list_depth=1) - assert resolve_type_name(ti, "markdown") == "string" - - def test_ignores_is_optional(self) -> None: - """resolve_type_name returns the base type regardless of is_optional.""" - ti = self._make_type_info(is_optional=True) - assert resolve_type_name(ti, "markdown") == "string" + def test_array_of_scalar_resolves_terminal(self) -> None: + shape = ArrayOf(element=Primitive(base_type="str", source_type=str)) + assert resolve_type_name(shape) == "string" diff --git a/packages/overture-schema-codegen/tests/test_union_extraction.py b/packages/overture-schema-codegen/tests/test_union_extraction.py index a8b685c48..42b5e0c43 100644 --- a/packages/overture-schema-codegen/tests/test_union_extraction.py +++ b/packages/overture-schema-codegen/tests/test_union_extraction.py @@ -5,11 +5,14 @@ RailSegment, RoadSegment, SegmentBase, + TestEnumDiscriminatorUnion, TestSegment, + TestSegmentDivergingConstraints, WaterSegment, ) from overture.schema.codegen.extraction.specs import FieldSpec, UnionSpec from overture.schema.codegen.extraction.union_extraction import extract_union +from overture.schema.common.scoping.vehicle import VehicleSelector class TestExtractUnion: @@ -51,19 +54,19 @@ def test_shared_fields_first(self, segment_spec: UnionSpec) -> None: def test_variant_specific_fields_have_sources( self, segment_spec: UnionSpec ) -> None: - """Variant-only fields carry their source class names.""" + """Variant-only fields carry their source classes.""" speed = next( af for af in segment_spec.annotated_fields if af.field_spec.name == "speed_limit" ) - assert speed.variant_sources == ("RoadSegment",) + assert speed.variant_sources == (RoadSegment,) gauge = next( af for af in segment_spec.annotated_fields if af.field_spec.name == "rail_gauge" ) - assert gauge.variant_sources == ("RailSegment",) + assert gauge.variant_sources == (RailSegment,) def test_heterogeneous_same_name_produces_separate_rows( self, segment_spec: UnionSpec @@ -74,8 +77,8 @@ def test_heterogeneous_same_name_produces_separate_rows( ] assert len(class_fields) == 2 sources = {af.variant_sources for af in class_fields} - assert ("RoadSegment",) in sources - assert ("RailSegment",) in sources + assert (RoadSegment,) in sources + assert (RailSegment,) in sources def test_members_lists_all_member_classes(self, segment_spec: UnionSpec) -> None: """UnionSpec.members contains all union member classes.""" @@ -89,3 +92,43 @@ def test_fields_property_returns_plain_list(self, segment_spec: UnionSpec) -> No """spec.fields returns list[FieldSpec] without provenance.""" for f in segment_spec.fields: assert isinstance(f, FieldSpec) + + +class TestExtractDiscriminatorWithEnumLiterals: + """Discriminator mapping uses runtime string values for enum literals.""" + + @pytest.fixture + def spec(self) -> UnionSpec: + return extract_union("TestEnumDiscriminatorUnion", TestEnumDiscriminatorUnion) + + def test_discriminator_mapping_uses_enum_values(self, spec: UnionSpec) -> None: + """Mapping keys must be the Parquet-serialized string values, not enum repr.""" + assert spec.discriminator_mapping is not None + assert set(spec.discriminator_mapping.keys()) == {"car", "bike"} + + +class TestDivergingConstraints: + """Same-named fields with matching shape but diverging constraints fail loudly.""" + + def test_diverging_constraints_raise(self) -> None: + """A field shared by structure but not by constraints raises ValueError. + + `ShortNamesSegment` and `LongNamesSegment` both declare `aliases` + as `list[str] | None`, so the structural fingerprint collapses + them — but the `min_length` constraints differ. Dedup would + silently keep one member's `FieldSpec`, so extraction raises + instead. + """ + with pytest.raises(ValueError, match="diverging constraints"): + extract_union( + "TestSegmentDivergingConstraints", TestSegmentDivergingConstraints + ) + + +class TestUnionNameDerivation: + """Union name fallback when the caller passes a member class name.""" + + def test_name_derived_from_common_base(self) -> None: + """When name matches a member class, derive from common base minus 'Base' suffix.""" + spec = extract_union("VehicleAxleCountSelector", VehicleSelector) + assert spec.name == "VehicleSelector" diff --git a/packages/overture-schema-pyspark/README.md b/packages/overture-schema-pyspark/README.md new file mode 100644 index 000000000..ef13ce9e9 --- /dev/null +++ b/packages/overture-schema-pyspark/README.md @@ -0,0 +1,238 @@ +# overture-schema-pyspark + +PySpark validation expressions for Overture Maps data. Translates schema +constraints into composable PySpark Column expressions that validate +DataFrames and produce per-row, per-field error messages. + +Expression modules and the registry are generated by +[overture-schema-codegen](../overture-schema-codegen/). Regenerate after +schema changes rather than editing the generated output. + +## Usage + +### Python API + +```python +from pyspark.sql import SparkSession + +from overture.schema.pyspark import validate_feature, explain_errors + +spark = SparkSession.builder.getOrCreate() +df = spark.read.parquet("samples/segment.parquet") + +result = validate_feature(df, "segment") + +result.evaluated.cache() +total_rows = result.evaluated.count() +error_count = result.error_rows().count() +print(f"{error_count} / {total_rows} rows with errors") + +if error_count > 0: + violations = explain_errors(result.evaluated, result.checks) + violations.select("id", "field", "check", "message").show(truncate=False) +``` + +`validate_feature()` looks up the feature type in the registry, compares +schemas, and evaluates all checks in a single pass. It returns a +`ValidationResult` with the evaluated DataFrame, the checks that ran, +any schema mismatches, and suppressed checks. + +| Function | Returns | Description | +| --- | --- | --- | +| `validate_feature(df, type)` | `ValidationResult` | Registry lookup, schema comparison, check evaluation. | +| `result.error_rows()` | `DataFrame` | Rows with at least one violation. Original columns only. | +| `explain_errors(evaluated, checks)` | `DataFrame` | One row per violation. Adds `field`, `check`, `message` columns. | +| `feature_types()` | `list[str]` | Available feature type names, sorted. | + +Lower-level helpers (`evaluate_checks`, `filter_errors`) are available +for consumers needing finer control. All public symbols are re-exported +from `overture.schema.pyspark`. + +### CLI + +```bash +# Validate and show first 20 error rows (default) +overture-validate segment samples/segment.parquet + +# Custom output path, show first 50 violations +overture-validate segment samples/segment.parquet -o errors.parquet --head 50 + +# Count errors only (skip unpivot/explain) +overture-validate segment samples/segment.parquet --count-only + +# Pass Spark config +overture-validate segment samples/segment.parquet \ + --conf spark.master=local[4] + +# Continue past schema mismatches (e.g. Float vs Double on bbox) +overture-validate place s3a://overturemaps-us-west-2/release/2026-02-18.0 \ + --skip-schema-check + +# Skip checks for a column absent from the data +overture-validate segment data.parquet --skip-columns connector_ids + +# Ignore extra columns in the data that aren't in the schema +overture-validate segment data.parquet --ignore-extra-columns my_custom_col + +# Suppress all checks on a field +overture-validate segment data.parquet --suppress sources + +# Suppress a specific check (FIELD:CHECK) +overture-validate segment data.parquet --suppress version:bounds +``` + +The output Parquet contains one row per violation with the original columns +(minus geometry, if present) plus `field`, `check`, and `message`. Summary and the +first N violations print to the terminal; the full set is in the Parquet +file for further analysis. + +| Option | Description | +| --- | --- | +| `--skip-schema-check` | Warn on schema mismatches instead of aborting. | +| `--skip-columns COL` | Declare a column absent from the data; skips its checks and schema comparison. Repeatable. | +| `--ignore-extra-columns COL` | Ignore an extra data column not in the expected schema. Repeatable. | +| `--suppress SPEC` | Suppress checks. `FIELD` suppresses all checks on that root field; `FIELD:CHECK` suppresses one specific check. Repeatable. | +| `--count-only` | Report error count only; skip the explain/unpivot step. | +| `--conf KEY=VALUE` | Spark config pair. Repeatable. Overrides S3A defaults. | +| `-o`, `--output PATH` | Write violations to a Parquet file. | +| `--head N` | Number of error rows to display (default: 20). | + +### Path resolution + +The CLI resolves the input path to a Parquet read plan based on its +structure: + +| Path shape | Example | Behavior | +| --- | --- | --- | +| Hive partition path (contains `/theme=`) | `.../theme=transportation/type=segment/` | Reads directly; derives `basePath` so Spark discovers partition columns. | +| Individual file | `segment.parquet` | Reads directly; data already contains `theme`/`type` columns. | +| Release root | `s3a://overturemaps-us-west-2/release/2026-02-18.0` | Appends `theme={theme}/type={type}` using the schema's theme mapping; sets `basePath` to the original path. | + +This means you can point the CLI at a release root and it constructs the +full Hive path automatically: + +```bash +# These are equivalent: +overture-validate segment s3a://overturemaps-us-west-2/release/2026-02-18.0 +overture-validate segment s3a://overturemaps-us-west-2/release/2026-02-18.0/theme=transportation/type=segment/ +``` + +### Reading from S3 + +Paths starting with `s3a://` are detected automatically. The CLI +configures `hadoop-aws`, the S3A filesystem implementation, and +anonymous credentials -- no setup required for public buckets like +the Overture release bucket: + +```bash +overture-validate segment \ + s3a://overturemaps-us-west-2/release/2026-02-18.0/theme=transportation/type=segment/ +``` + +To use named AWS credentials instead of anonymous access: + +```bash +overture-validate segment \ + s3a://overturemaps-us-west-2/release/2026-02-18.0/theme=transportation/type=segment/ \ + --conf spark.hadoop.fs.s3a.aws.credentials.provider=software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider +``` + +Any `--conf` values override the S3A defaults. + +## Architecture + +```text +validate_feature() Entry point -- registry lookup, schema check, evaluation + | + list[Check] Interface -- frozen (field, name, expr, shape) tuples + | + expression builders Translation -- schema constraints to Column expressions + (generated by registered in REGISTRY + overture-schema-codegen) + | + column_patterns / Reusable PySpark building blocks + constraint_expressions +``` + +**Check** is the interface between expression builders and composition. +Each `Check` carries a PySpark `Column` expression (unevaluated), a `field` +name for error grouping, a `name` identifying the check type (e.g. +`"required"`, `"bounds"`, `"enum"`), and a `shape` tag (`SCALAR` or `ARRAY`) +that tells `evaluate_checks()` how to normalize the result. + +Expression builders (like `connector_checks()`) are generated by +`overture-schema-codegen` from Pydantic schema models and registered in +`REGISTRY` by feature type name, paired with an expected `StructType` +schema via `FeatureValidation`. + +## Generated expression builders + +Expression builders return `list[Check]`. The generated code uses constraint +expressions for common patterns and column patterns for structural wrappers. +Here's what the generated output looks like, using connector as an example: + +```python +from pyspark.sql import functions as F + +from overture.schema.pyspark.check import Check, CheckShape +from overture.schema.pyspark.expressions.column_patterns import array_check +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_bounds, + check_enum, + check_array_min_length, + check_required, +) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["transportation"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check( + "sources", + lambda el: check_required(el["dataset"]), + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) +``` + +The registry maps feature type names to `FeatureValidation` pairs: + +```python +from overture.schema.pyspark.check import FeatureValidation +from overture.schema.pyspark._registry import REGISTRY + +# REGISTRY is auto-generated: +# REGISTRY["connector"] = FeatureValidation(schema=CONNECTOR_SCHEMA, checks=connector_checks) +``` diff --git a/packages/overture-schema-pyspark/pyproject.toml b/packages/overture-schema-pyspark/pyproject.toml new file mode 100644 index 000000000..1206ee7ad --- /dev/null +++ b/packages/overture-schema-pyspark/pyproject.toml @@ -0,0 +1,27 @@ +[build-system] +build-backend = "hatchling.build" +requires = ["hatchling"] + +[project] +dependencies = [ + "click>=8.0", + "overture-schema-system", + "pyspark>=3.4", +] +description = "PySpark validation expressions for Overture Maps data" +dynamic = ["version"] +license = "MIT" +name = "overture-schema-pyspark" +requires-python = ">=3.10" + +[project.scripts] +overture-validate = "overture.schema.pyspark.cli:validate_cli" + +[tool.hatch.build.targets.wheel] +packages = ["src/overture"] + +[tool.hatch.version] +path = "src/overture/schema/pyspark/__about__.py" + +[tool.uv.sources] +overture-schema-system = { workspace = true } diff --git a/packages/overture-schema-pyspark/src/overture/__init__.py b/packages/overture-schema-pyspark/src/overture/__init__.py new file mode 100644 index 000000000..8db66d3d0 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/__init__.py @@ -0,0 +1 @@ +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/packages/overture-schema-pyspark/src/overture/schema/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/__init__.py new file mode 100644 index 000000000..8db66d3d0 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/__init__.py @@ -0,0 +1 @@ +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/__about__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/__about__.py new file mode 100644 index 000000000..3dc1f76bc --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/__about__.py @@ -0,0 +1 @@ +__version__ = "0.1.0" diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py new file mode 100644 index 000000000..cb262b3d7 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py @@ -0,0 +1,27 @@ +"""PySpark validation expressions for Overture Maps data.""" + +from .check import Check, CheckShape +from .schema_check import SchemaMismatch, compare_schemas +from .validate import ( + ValidationResult, + evaluate_checks, + explain_errors, + feature_keys, + feature_names, + filter_errors, + validate_feature, +) + +__all__ = [ + "Check", + "CheckShape", + "SchemaMismatch", + "ValidationResult", + "compare_schemas", + "evaluate_checks", + "explain_errors", + "feature_keys", + "feature_names", + "filter_errors", + "validate_feature", +] diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py new file mode 100644 index 000000000..85158fb79 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py @@ -0,0 +1,67 @@ +"""Runtime registry of feature validations. + +Built at import time by walking the generated `expressions.generated` +namespace and collecting every module that exposes the +codegen-emitted `ENTRY_POINT` and `FEATURE_VALIDATION` constants. + +The generated tree on disk is the runtime source of truth: the +registry contains exactly what was generated, regardless of which +theme packages are installed alongside the pyspark package. A missing +`expressions/generated/` subtree simply yields an empty registry -- +the package still imports cleanly. +""" + +from __future__ import annotations + +import importlib +import logging +import pkgutil + +from .check import FeatureValidation + +logger = logging.getLogger(__name__) + +_GENERATED_ROOT = "overture.schema.pyspark.expressions.generated" + + +def _walk() -> tuple[dict[str, FeatureValidation], dict[str, dict[str, str]]]: + """Walk the generated tree and collect registry + partition map. + + Returns a `(registry, partition_map)` pair: + + * `registry` keys every feature by its `ENTRY_POINT` value. + * `partition_map` keys partitioned features by entry-point, mapping + to a Hive partition dict (e.g. `{"theme": "places", "type": + "place"}`) for path construction. Features with no `PARTITIONS` + data (empty dict) are omitted; the codegen only sets `PARTITIONS` + when the data lake organizes the feature by Hive partitions. + `type` is appended here from the module file name so consumers + get a complete partition path without the codegen having to + duplicate the type value. + """ + registry: dict[str, FeatureValidation] = {} + partition_map: dict[str, dict[str, str]] = {} + + try: + root = importlib.import_module(_GENERATED_ROOT) + except ImportError: + return registry, partition_map + + for info in pkgutil.walk_packages(root.__path__, prefix=root.__name__ + "."): + if info.ispkg: + continue + module = importlib.import_module(info.name) + entry_point = getattr(module, "ENTRY_POINT", None) + validation = getattr(module, "FEATURE_VALIDATION", None) + if entry_point is None or validation is None: + continue + registry[entry_point] = validation + partitions = getattr(module, "PARTITIONS", None) or {} + if partitions: + feature_type = info.name.rsplit(".", 1)[-1] + partition_map[entry_point] = {**partitions, "type": feature_type} + + return registry, partition_map + + +REGISTRY, PARTITION_MAP = _walk() diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py new file mode 100644 index 000000000..de6e5f955 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py @@ -0,0 +1,49 @@ +"""Check dataclass — interface between expression builders and composition.""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass +from enum import Enum + +from pyspark.sql import Column +from pyspark.sql.types import StructType + +from overture.schema.system.primitive import GeometryType + + +class CheckShape(Enum): + """How the composition layer handles a check expression.""" + + SCALAR = "scalar" # expression returns nullable string + ARRAY = "array" # expression returns array + + +@dataclass(frozen=True) +class Check: + """One validation check. + + `field` identifies what the check is about (for error column naming + and report grouping), not how to access the data. The expression in + `expr` already encodes the access pattern. + + `root_field` is the top-level schema column the check belongs to, + or None for synthetic model-level checks (radio_group, require_any_of) + that don't correspond to a single column. Used by `validate_feature` + to suppress or skip checks by column name. + """ + + field: str + name: str + expr: Column + shape: CheckShape + root_field: str | None + + +@dataclass(frozen=True) +class FeatureValidation: + """Pairs an expected schema with check builders for a feature type.""" + + schema: StructType + checks: Callable[[], list[Check]] + geometry_types: tuple[GeometryType, ...] = () diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py new file mode 100644 index 000000000..1a8ada445 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py @@ -0,0 +1,239 @@ +"""CLI entry point for validation.""" + +from __future__ import annotations + +import sys +from collections.abc import Mapping +from dataclasses import dataclass + +import click +from pyspark.sql import DataFrame, SparkSession + +from overture.schema.system.discovery import resolve_entry_point_key +from overture.schema.system.primitive import GeometryType + +from ._registry import PARTITION_MAP, REGISTRY +from .validate import ( + explain_errors, + feature_names, + validate_feature, +) + + +@dataclass(frozen=True) +class ReadSpec: + """Parquet read plan. + + `data_path` selects the files to read; `base_path`, when set, tells + Spark where to start discovering Hive partition columns. + """ + + data_path: str + base_path: str | None = None + + +def resolve_read(path: str, partitions: Mapping[str, str] | None) -> ReadSpec: + """Determine read strategy from path structure. + + Three cases: + + 1. **Hive partition path** (contains `/{key}=` for some key in + `partitions`) -- derive `basePath` so Spark discovers partition + columns. + 2. **Individual file** (`*.parquet`) or no partitions -- read + directly; data already contains the partition columns inline. + 3. **Release root** -- append the partition path + (`key1=v1/key2=v2/...`) and set `basePath` to the original path. + """ + stripped = path.rstrip("/") + + # Path already contains Hive partition directories + for key in partitions or (): + idx = stripped.find(f"/{key}=") + if idx >= 0: + return ReadSpec(data_path=path, base_path=stripped[:idx]) + + # Individual file or no partition mapping — data has partition columns inline + if stripped.endswith(".parquet") or not partitions: + return ReadSpec(data_path=path) + + # Release root — construct leaf path from partition map + partition_path = "/".join(f"{k}={v}" for k, v in partitions.items()) + return ReadSpec( + data_path=f"{stripped}/{partition_path}", + base_path=stripped, + ) + + +def read_feature(spark: SparkSession, spec: ReadSpec) -> DataFrame: + """Read a DataFrame according to a ReadSpec.""" + reader = spark.read + if spec.base_path: + reader = reader.option("basePath", spec.base_path) + return reader.parquet(spec.data_path) + + +_S3A_DEFAULTS: dict[str, str] = { + "spark.jars.packages": "org.apache.hadoop:hadoop-aws:3.4.1", + "spark.hadoop.fs.s3a.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem", + "spark.hadoop.fs.s3a.aws.credentials.provider": ( + "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider" + ), +} + +_LARGE_GEOMETRY_TYPES = frozenset( + { + GeometryType.LINE_STRING, + GeometryType.MULTI_LINE_STRING, + GeometryType.POLYGON, + GeometryType.MULTI_POLYGON, + GeometryType.GEOMETRY_COLLECTION, + } +) + + +def _may_have_large_geometry(feature_key: str) -> bool: + """Whether a registered feature's geometries may be large. + + Returns True when the registered geometry types include + (multi)linestrings, (multi)polygons, or geometry collections, + or when geometry types are unspecified (safe default). + """ + validation = REGISTRY[feature_key] + if not validation.geometry_types: + return True + return bool(set(validation.geometry_types) & _LARGE_GEOMETRY_TYPES) + + +def _spark_config(path: str, conf: tuple[str, ...], feature_key: str) -> dict[str, str]: + """Build Spark config dict with safe defaults. + + Disables the vectorized Parquet reader for features with large + geometries (polygons, linestrings) to avoid OOM on WKB binary + columns. Adds S3A credentials for `s3a://` paths. User-supplied + `--conf` values override any defaults. + """ + config: dict[str, str] = {} + if _may_have_large_geometry(feature_key): + config["spark.sql.parquet.enableVectorizedReader"] = "false" + if path.startswith("s3a://"): + config.update(_S3A_DEFAULTS) + for pair in conf: + key, _, value = pair.partition("=") + config[key] = value + return config + + +@click.command("overture-validate") +@click.argument("feature_type") +@click.argument("path") +@click.option("-o", "--output", default=None, help="Output path for validated Parquet.") +@click.option( + "--head", + "head_n", + default=20, + type=int, + show_default=True, + help="Error rows to display.", +) +@click.option("--conf", multiple=True, help="Spark config key=value pairs.") +@click.option( + "--count-only", + is_flag=True, + default=False, + help="Report error count only; skip explain/unpivot.", +) +@click.option( + "--skip-schema-check", + is_flag=True, + default=False, + help="Warn on schema mismatches instead of aborting.", +) +@click.option( + "--skip-columns", + multiple=True, + help="Columns declared absent from data; skips their checks.", +) +@click.option( + "--ignore-extra-columns", + multiple=True, + help="Extra data columns to ignore in schema comparison.", +) +@click.option( + "--suppress", + "suppress_specs", + multiple=True, + help="Suppress checks: FIELD (all checks) or FIELD:CHECK (specific).", +) +def validate_cli( + feature_type: str, + path: str, + output: str | None, + head_n: int, + conf: tuple[str, ...], + count_only: bool, + skip_schema_check: bool, + skip_columns: tuple[str, ...], + ignore_extra_columns: tuple[str, ...], + suppress_specs: tuple[str, ...], +) -> None: + """Validate Overture data at PATH and write annotated Parquet.""" + try: + resolved = resolve_entry_point_key(feature_type, REGISTRY) + except ValueError: + click.echo( + f"Unknown type '{feature_type}'. Known: {', '.join(feature_names())}", + err=True, + ) + sys.exit(1) + + builder = SparkSession.builder + for key, value in _spark_config(path, conf, resolved).items(): + builder = builder.config(key, value) + spark = builder.getOrCreate() + spark.sparkContext.setLogLevel("ERROR") + + spec = resolve_read(path, PARTITION_MAP.get(resolved)) + df = read_feature(spark, spec) + + suppress: list[str | tuple[str, str]] = [] + for s in suppress_specs: + if ":" in s: + field, name = s.split(":", 1) + suppress.append((field, name)) + else: + suppress.append(s) + + try: + result = validate_feature( + df, + resolved, + skip_columns=skip_columns, + ignore_extra_columns=ignore_extra_columns, + suppress=suppress, + ) + except ValueError as e: + click.echo(str(e), err=True) + sys.exit(1) + + if result.schema_mismatches: + click.echo(f"Schema mismatches for {resolved}:", err=True) + for m in result.schema_mismatches: + click.echo(f" {m.path}: expected {m.expected}, got {m.actual}", err=True) + if not skip_schema_check: + sys.exit(1) + + total_rows, error_count = result.row_counts() + click.echo(f"{error_count} / {total_rows} rows with errors", err=True) + + if error_count > 0: + if not count_only: + explained = explain_errors(result.evaluated, result.checks).drop("geometry") + if output and head_n > 0: + explained = explained.cache() + if output: + explained.write.mode("overwrite").parquet(output) + click.echo(f"Written to {output}", err=True) + if head_n > 0: + explained.show(head_n, truncate=False) + sys.exit(1) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/__init__.py new file mode 100644 index 000000000..572e57314 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/__init__.py @@ -0,0 +1 @@ +"""Expression builders and reusable PySpark column patterns.""" diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/_schema_structs.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/_schema_structs.py new file mode 100644 index 000000000..3bc8ea809 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/_schema_structs.py @@ -0,0 +1,22 @@ +"""Hand-written Spark StructType fragments for types the codegen can't generate. + +The codegen builds feature schemas by walking Pydantic `BaseModel` +subclasses. `BBox` is a plain class, not a `BaseModel`, so extraction +can't reach it -- `BBOX_STRUCT` is hand-written here to fill the gap. +Every other nested type is a `BaseModel` and gets generated directly +into each feature module, which is why this file holds only the one +struct. +""" + +from __future__ import annotations + +from pyspark.sql.types import DoubleType, StructField, StructType + +BBOX_STRUCT = StructType( + [ + StructField("xmin", DoubleType(), True), + StructField("xmax", DoubleType(), True), + StructField("ymin", DoubleType(), True), + StructField("ymax", DoubleType(), True), + ] +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py new file mode 100644 index 000000000..c6d274790 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py @@ -0,0 +1,94 @@ +"""Structural PySpark column patterns for validation expression composition. + +These functions provide reusable wrappers for array iteration, null +guarding, and error message construction. Expression builders and +constraint translators compose them; codegen calls them rather than +reimplementing the patterns. +""" + +from __future__ import annotations + +from collections.abc import Callable + +from pyspark.sql import Column +from pyspark.sql import functions as F + + +def error_msg(prefix: str, *value_cols: Column) -> Column: + """Build an error message: literal prefix followed by interpolated values.""" + return F.concat(F.lit(prefix), *value_cols) + + +def _resolve_column(column: str | Column) -> Column: + """Resolve a string column name to a Column, passing Column through.""" + return F.col(column) if isinstance(column, str) else column + + +def _null_guarded_transform( + col: Column, + check_fn: Callable[[Column], Column], + flatten: bool = False, +) -> Column: + """Null-guard, transform, optionally flatten, and compact. + + When `flatten=True`, null inner arrays are coalesced to empty before + flattening. `F.flatten` returns NULL whenever any inner array is + NULL, which would silently drop sibling errors -- inner `array_check` + legitimately returns NULL when its column is null (e.g. an optional + nested array that's absent on some elements but populated on others). + """ + transformed = F.transform(col, check_fn) + if flatten: + empty = F.array().cast("array") + transformed = F.flatten( + F.transform(transformed, lambda inner: F.coalesce(inner, empty)) + ) + return F.when(col.isNotNull(), F.array_compact(transformed)) + + +def array_check(column: str | Column, check_fn: Callable[[Column], Column]) -> Column: + """Null-guard a column, transform its elements, compact out nulls. + + *check_fn* receives each array element and returns a string Column + (error message) or null. + """ + return _null_guarded_transform(_resolve_column(column), check_fn) + + +def nested_array_check( + column: str | Column, check_fn: Callable[[Column], Column] +) -> Column: + """Like `array_check` but flattens nested error arrays. + + Use when *check_fn* itself returns an `array` (e.g. an + inner `array_check`). The outer transform produces + `array>`; this function flattens to `array` + before compacting nulls. + """ + return _null_guarded_transform(_resolve_column(column), check_fn, flatten=True) + + +def check_struct_unique(column: str | Column) -> Column: + """Check that an array has no duplicate items by whole-element comparison. + + Compares `size(col)` against `size(array_distinct(col))`. + `array_distinct` handles struct and nested-array elements natively + in Spark 3.4+. + + For string arrays (e.g. websites, socials), this compares raw values. + Pydantic's UniqueItemsConstraint on `list[HttpUrl]` compares + *normalized* URLs (adds trailing slash, lowercases host and scheme), + so it catches duplicates that differ only in normalization. This + check catches exact duplicates only — the difference is accepted. + """ + col = _resolve_column(column) + has_duplicates = F.size(col) > F.size(F.array_distinct(col)) + return F.when( + col.isNotNull(), + F.when(has_duplicates, F.lit("contains duplicate items")), + ) + + +def coalesce_errors(check: Column) -> Column: + """Wrap an array-producing check so nulls become empty arrays.""" + return F.coalesce(check, F.array().cast("array")) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py new file mode 100644 index 000000000..9982b1486 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py @@ -0,0 +1,484 @@ +"""Constraint type to PySpark Column expression translation. + +Semantic translation layer: maps constraint parameters to Column +expressions that detect violations. Analogous to +`field_constraint_description.py` in overture-schema-codegen +(which maps constraints to prose). + +Each function takes a column accessor (`F.col("x")` or +`el["field"]`) and constraint parameters. Returns a Column that +evaluates to an error string on violation or null on success. Field +identity is carried structurally by `Check.field`, not embedded in +error messages. +""" + +from __future__ import annotations + +from collections.abc import Callable +from functools import reduce +from typing import Literal + +from pyspark.sql import Column +from pyspark.sql import functions as F + +from overture.schema.system.primitive import GeometryType + +from .column_patterns import error_msg + +_WKB_TYPE_HEX: dict[GeometryType, str] = { + GeometryType.POINT: "01", + GeometryType.LINE_STRING: "02", + GeometryType.POLYGON: "03", + GeometryType.MULTI_POINT: "04", + GeometryType.MULTI_LINE_STRING: "05", + GeometryType.MULTI_POLYGON: "06", + GeometryType.GEOMETRY_COLLECTION: "07", +} + + +_BOUND_OPS: dict[str, tuple[str, Callable[[Column, float | int], Column]]] = { + "ge": (">=", lambda c, v: c < v), + "gt": (">", lambda c, v: c <= v), + "le": ("<=", lambda c, v: c > v), + "lt": ("<", lambda c, v: c >= v), +} + + +def check_bounds( + col: Column, + *, + ge: float | int | None = None, + gt: float | int | None = None, + le: float | int | None = None, + lt: float | int | None = None, +) -> Column: + """Numeric bounds check. Returns error string or null.""" + checks: list[Column] = [] + for key, value in (("ge", ge), ("gt", gt), ("le", le), ("lt", lt)): + if value is None: + continue + symbol, violates = _BOUND_OPS[key] + checks.append( + F.when( + violates(col, value), + error_msg( + f"must be {symbol} {value}, got ", + col.cast("string"), + ), + ) + ) + if not checks: + return F.lit(None).cast("string") + # null col -> all F.when checks return null (no false positive) + return F.coalesce(*checks) + + +def check_enum( + col: Column, + allowed: list[str], +) -> Column: + """Enum membership check. Returns error string or null.""" + return F.when( + col.isNotNull() & ~col.isin(allowed), + error_msg("invalid value '", col.cast("string"), F.lit("'")), + ) + + +def check_required(col: Column) -> Column: + """Null check for required fields. Returns error string or null.""" + return F.when(col.isNull(), F.lit("missing (null)")) + + +def check_pattern(col: Column, pattern: str, *, label: str) -> Column: + """Regex pattern check via rlike. Returns error string or null. + + Parameters + ---------- + col + Column to validate. + pattern + Java regex pattern (use `\\z` for absolute end-of-input). + label + Human-readable description used in error messages: + `"invalid {label}: got '...'"` + """ + msg = error_msg(f"invalid {label}: got '", col.cast("string"), F.lit("'")) + return F.when(col.isNotNull() & ~col.rlike(pattern), msg) + + +def check_url_format(col: Column) -> Column: + """HTTP/HTTPS URL format check via pattern match. Returns error string or null. + + Pydantic's `HttpUrl` additionally normalizes values (adds trailing + slash, lowercases host and scheme) before validation and comparison. + This check validates the raw string without normalization — format + acceptance is broader, and downstream uniqueness checks compare + un-normalized values. + """ + return check_pattern(col, r"^https?://[^\s]+\z", label="HTTP/HTTPS URL") + + +def check_url_length(col: Column) -> Column: + """URL length check: must not exceed 2083 characters. Returns error string or null.""" + return F.when( + col.isNotNull() & (F.length(col) > 2083), + error_msg("URL exceeds 2083 characters: length ", F.length(col).cast("string")), + ) + + +def check_email(col: Column) -> Column: + """Email address format check. Returns error string or null.""" + return check_pattern( + col, + r"^[^\s@.]+(\.[^\s@.]+)*@([^\s@.]+\.)+[^\s@.]+\z", + label="email address", + ) + + +def _check_length( + col: Column, + measure: Column, + limit: int, + *, + direction: Literal["minimum", "maximum"], +) -> Column: + """Shared length-check logic for arrays and strings. + + *measure* is the pre-computed size/length column. + *direction* is `"minimum"` or `"maximum"`, controlling the + comparison operator and error label. + """ + violation = measure < limit if direction == "minimum" else measure > limit + return F.when( + col.isNotNull() & violation, + error_msg(f"{direction} length {limit}, got ", measure.cast("string")), + ) + + +def check_array_min_length(col: Column, min_len: int) -> Column: + """Array minimum length check. Returns error string or null.""" + return _check_length(col, F.size(col), min_len, direction="minimum") + + +def check_array_max_length(col: Column, max_len: int) -> Column: + """Array maximum length check. Returns error string or null.""" + return _check_length(col, F.size(col), max_len, direction="maximum") + + +def check_string_min_length(col: Column, min_len: int) -> Column: + """String minimum character length check. Returns error string or null.""" + return _check_length(col, F.length(col), min_len, direction="minimum") + + +def check_string_max_length(col: Column, max_len: int) -> Column: + """String maximum character length check. Returns error string or null.""" + return _check_length(col, F.length(col), max_len, direction="maximum") + + +_STRIPPED_PATTERN = r"(?sU)^[^\s\p{Cc}](.*[^\s\p{Cc}])?\z" +r"""Java regex: reject whitespace AND control characters at string boundaries. + +Boundary class `[^\s\p{Cc}]` rejects two categories at the first and +last character positions: + +1. **Whitespace** (`\s` with `(?U)`): Unicode `White_Space` property + — space, tab, newline, NBSP, em-space, etc. +2. **Control characters** (`\p{Cc}`): Unicode "Control" category — + C0 (U+0000-001F), DEL (U+007F), and C1 (U+0080-009F). + +Why both are needed: Python's `\s` (and `str.strip()`) treats +U+001C-001F (file/group/record/unit separators) as whitespace. Java's +`\s` with `(?U)` follows the Unicode `White_Space` property, which +excludes those four characters. Using `\S` alone in Java misses them, +allowing strings like `"Main St \x1f"` to pass. Adding `\p{Cc}` +closes that gap and also rejects other control characters (NUL, SOH, +DEL, C1 controls) that have no place at string boundaries. + +Interior control characters (middle of the string) are NOT rejected — +the `.*` in the middle position still matches anything. Policing +interior content is a separate concern. + +Flags: `(?s)` (DOTALL) lets `.*` cross newlines. `(?U)` +(UNICODE_CHARACTER_CLASS) gives `\s` full Unicode coverage. `\z` +(absolute end-of-input) avoids `$` matching before a trailing newline. +""" + + +def check_stripped(col: Column) -> Column: + """No leading/trailing whitespace or control characters. Returns error string or null.""" + return F.when( + col.isNotNull() & (F.length(col) > 0) & ~col.rlike(_STRIPPED_PATTERN), + error_msg("leading/trailing whitespace"), + ) + + +def check_json_pointer(col: Column) -> Column: + """JSON Pointer (RFC 6901) format check. + + Valid pointers start with `/` or are the empty string (which + references the whole document). + """ + return F.when( + col.isNotNull() & (col != "") & ~col.startswith("/"), + error_msg( + "invalid JSON pointer, must start with /, got '", + col.cast("string"), + F.lit("'"), + ), + ) + + +def check_linear_range_length(col: Column) -> Column: + """Linear reference range length check: exactly 2 elements.""" + size = F.size(col) + return F.when( + col.isNotNull() & (size != 2), + error_msg("must have exactly 2 elements, got ", size.cast("string")), + ) + + +def check_linear_range_bounds(col: Column) -> Column: + """Linear reference range bounds check: both values in [0.0, 1.0]. + + The `F.size(col) == 2` guard skips wrong-length arrays so this + check only fires when exactly two elements are present. Length + validation is `check_linear_range_length`'s responsibility. + """ + size = F.size(col) + v0, v1 = F.get(col, 0), F.get(col, 1) + return F.when( + col.isNotNull() + & (size == 2) + & ((v0 < 0.0) | (v0 > 1.0) | (v1 < 0.0) | (v1 > 1.0)), + error_msg( + "values must be in [0.0, 1.0], got [", + v0.cast("string"), + F.lit(", "), + v1.cast("string"), + F.lit("]"), + ), + ) + + +def check_linear_range_order(col: Column) -> Column: + """Linear reference range ordering check: start < end. + + The `F.size(col) == 2` guard skips wrong-length arrays so this + check only fires when exactly two elements are present. Length + validation is `check_linear_range_length`'s responsibility. + """ + size = F.size(col) + return F.when( + col.isNotNull() & (size == 2) & (F.get(col, 0) >= F.get(col, 1)), + error_msg("start must be < end"), + ) + + +def check_radio_group( + cols: list[Column], + field_names: list[str], +) -> Column: + """Exactly one of the given boolean columns must be True.""" + true_count = reduce( + lambda a, b: a + b, + (F.when(c, 1).otherwise(0) for c in cols), + ) + names = ", ".join(field_names) + return F.when( + true_count != 1, + error_msg( + f"exactly one of {names} must be true, got ", + true_count.cast("string"), + F.lit(" true"), + ), + ) + + +def _count_non_null(cols: list[Column]) -> Column: + """Sum of non-null indicators across *cols*.""" + return reduce( + lambda a, b: a + b, + (F.when(c.isNotNull(), 1).otherwise(0) for c in cols), + ) + + +def check_require_any_of( + cols: list[Column], + field_names: list[str], +) -> Column: + """At least one of the given columns must be non-null.""" + all_null = reduce(lambda a, b: a & b, (c.isNull() for c in cols)) + names = ", ".join(field_names) + return F.when(all_null, F.lit(f"requires at least one of {names}")) + + +def check_min_fields_set( + cols: list[Column], + field_names: list[str], + count: int, +) -> Column: + """At least *count* of the given columns must be non-null. + + Parameters + ---------- + cols + Column expressions to test for non-null. + field_names + Human-readable names for each column, used in the error message. + count + Minimum number of non-null columns required. + + Returns + ------- + Column + Error string on violation, null on success. + """ + non_null_count = _count_non_null(cols) + names = ", ".join(field_names) + return F.when( + non_null_count < count, + error_msg( + f"at least {count} of {names} required, got ", + non_null_count.cast("string"), + F.lit(" non-null"), + ), + ) + + +def _check_conditional_presence( + target: Column, + condition: Column, + condition_desc: str, + *condition_value_cols: Column, + expect_present: bool, +) -> Column: + """Shared logic for require_if / forbid_if. + + *expect_present=True* means target must be non-null when condition + holds (require); *False* means target must be null (forbid). + """ + word = "required" if expect_present else "forbidden" + target_test = target.isNull() if expect_present else target.isNotNull() + prefix = f"{word} when {condition_desc}" + if condition_value_cols: + interleaved = [ + p + for vc in condition_value_cols + for p in (F.lit(", got "), vc.cast("string")) + ] + msg = error_msg(prefix, *interleaved) + else: + msg = F.lit(prefix) + return F.when(condition & target_test, msg) + + +def check_require_if( + target: Column, + condition: Column, + condition_desc: str, + *condition_value_cols: Column, +) -> Column: + """Target must be non-null when condition is true.""" + return _check_conditional_presence( + target, + condition, + condition_desc, + *condition_value_cols, + expect_present=True, + ) + + +def check_forbid_if( + target: Column, + condition: Column, + condition_desc: str, + *condition_value_cols: Column, +) -> Column: + """Target must be null when condition is true.""" + return _check_conditional_presence( + target, + condition, + condition_desc, + *condition_value_cols, + expect_present=False, + ) + + +def check_geometry_type( + col: Column, + *allowed: GeometryType, +) -> Column: + """Geometry type check via WKB header byte parsing. + + Reads the endianness indicator and type uint32 from the WKB binary + without deserializing coordinates. O(1) per row regardless of + geometry complexity. + + Extracts only the low byte of the type uint32, which is safe for + OGC types 1-7 and immune to Z/M/ZM flag bits (those modify high + bytes only). + """ + hex_geom = F.hex(col) + byte_order = F.substring(hex_geom, 1, 2) + # LE: type LSB at hex positions 3-4 + # BE: type LSB at hex positions 9-10 + type_hex = F.when( + byte_order == "01", + F.substring(hex_geom, 3, 2), + ).otherwise( + F.substring(hex_geom, 9, 2), + ) + allowed_hex = [_WKB_TYPE_HEX[t] for t in allowed] + names = " | ".join(t.geo_json_type for t in allowed) + if len(allowed_hex) == 1: + violation = type_hex != allowed_hex[0] + else: + violation = ~type_hex.isin(allowed_hex) + return F.when( + col.isNotNull() & violation, + error_msg(f"expected {names} geometry"), + ) + + +def check_bbox_completeness(col: Column) -> Column: + """Check that all bbox sub-fields are present when bbox is non-null.""" + return F.when( + col.isNotNull() + & ( + col["xmin"].isNull() + | col["ymin"].isNull() + | col["xmax"].isNull() + | col["ymax"].isNull() + ), + error_msg("bbox sub-fields must all be present"), + ) + + +def check_bbox_lat_ordering(col: Column) -> Column: + """Check that ymin does not exceed ymax.""" + return F.when( + col.isNotNull() & (col["ymin"] > col["ymax"]), + error_msg("expected ymin <= ymax"), + ) + + +def check_bbox_lat_range(col: Column) -> Column: + """Check that latitude values fall within [-90, 90].""" + return F.when( + col.isNotNull() + & ( + (col["ymin"] < -90) + | (col["ymin"] > 90) + | (col["ymax"] < -90) + | (col["ymax"] > 90) + ), + error_msg("latitude values must be in [-90, 90]"), + ) + + +# TODO: check_bbox_lon_ordering -- deferred pending antimeridian crossing +# policy. RFC 7946 section 5.2 allows xmin > xmax for bboxes that cross +# the antimeridian. + +# TODO: check_bbox_lon_range -- deferred pending decision on whether +# coordinates can wrap beyond [-180, 180]. diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/py.typed b/packages/overture-schema-pyspark/src/overture/schema/pyspark/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py new file mode 100644 index 000000000..8376ff5b0 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py @@ -0,0 +1,109 @@ +"""Schema comparison for structural validation. + +Recursively diffs two `StructType` objects and reports mismatches +as a flat list with dot-notation paths. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from pyspark.sql.types import ( + ArrayType, + DataType, + MapType, + StructType, +) + + +@dataclass(frozen=True) +class SchemaMismatch: + """One structural difference between actual and expected schemas. + + Parameters + ---------- + path + Dot-notation path to the field (e.g. `"bbox.xmin"`). + actual + Actual type name, or `"missing"` if the field is absent. + expected + Expected type name, or `"missing"` if the field is unexpected. + """ + + path: str + actual: str + expected: str + + +def _type_name(dt: DataType) -> str: + """Short display name for a DataType (e.g. `"StringType"`).""" + return type(dt).__name__ + + +def _compare( + actual: DataType, + expected: DataType, + prefix: str, + out: list[SchemaMismatch], +) -> None: + """Recursively compare two DataType trees.""" + if isinstance(expected, StructType) and isinstance(actual, StructType): + _compare_structs(actual, expected, prefix, out) + return + + if isinstance(expected, ArrayType) and isinstance(actual, ArrayType): + _compare(actual.elementType, expected.elementType, f"{prefix}[]", out) + return + + if isinstance(expected, MapType) and isinstance(actual, MapType): + _compare(actual.keyType, expected.keyType, f"{prefix}{{key}}", out) + _compare(actual.valueType, expected.valueType, f"{prefix}{{value}}", out) + return + + if type(actual) is not type(expected): + out.append(SchemaMismatch(prefix, _type_name(actual), _type_name(expected))) + + +def _compare_structs( + actual: StructType, + expected: StructType, + prefix: str, + out: list[SchemaMismatch], +) -> None: + """Compare two StructTypes field by field.""" + actual_fields = {f.name: f for f in actual.fields} + expected_fields = {f.name: f for f in expected.fields} + + # Ordered union: actual fields first, then any expected-only fields appended. + all_names = dict.fromkeys([*actual_fields, *expected_fields]) + for name in all_names: + path = f"{prefix}.{name}" if prefix else name + a = actual_fields.get(name) + e = expected_fields.get(name) + if a is None and e is not None: + out.append(SchemaMismatch(path, "missing", _type_name(e.dataType))) + elif e is None and a is not None: + out.append(SchemaMismatch(path, _type_name(a.dataType), "missing")) + elif a is not None and e is not None: + _compare(a.dataType, e.dataType, path, out) + + +def compare_schemas(actual: StructType, expected: StructType) -> list[SchemaMismatch]: + """Compare two Spark schemas and return all mismatches. + + Parameters + ---------- + actual + Schema inferred from the data (e.g. `df.schema`). + expected + Declared expected schema for the feature type. + + Returns + ------- + list[SchemaMismatch] + Empty when schemas match. Each mismatch identifies the + dot-notation path and what differs. + """ + out: list[SchemaMismatch] = [] + _compare_structs(actual, expected, "", out) + return out diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py new file mode 100644 index 000000000..9b03ed34b --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py @@ -0,0 +1,334 @@ +"""Validation pipeline for Overture feature data. + +`validate_feature()` is the primary entry point: it looks up the +feature type in the registry, compares schemas, filters checks, and +evaluates them in a single pass. Returns a `ValidationResult` +carrying the evaluated DataFrame and metadata. + +Lower-level helpers (`evaluate_checks`, `filter_errors`, +`explain_errors`) are available for consumers needing finer control. +""" + +from __future__ import annotations + +import re +from collections import Counter +from collections.abc import Iterable +from dataclasses import dataclass + +from pyspark.sql import DataFrame +from pyspark.sql import functions as F +from pyspark.sql.types import StringType, StructField, StructType + +from overture.schema.system.discovery import ( + entry_point_class_alias, + resolve_entry_point_key, +) + +from ._registry import REGISTRY +from .check import Check, CheckShape +from .expressions.column_patterns import coalesce_errors +from .schema_check import SchemaMismatch, compare_schemas + + +def feature_keys() -> list[str]: + """Canonical entry-point keys registered in the validation registry.""" + return sorted(REGISTRY) + + +def feature_names() -> list[str]: + """All names `validate_feature` accepts. + + Includes canonical entry-point keys and the snake-case class-name + aliases the resolver recognizes (only when an alias is unambiguous). + """ + aliases = { + name + for name, count in Counter(entry_point_class_alias(k) for k in REGISTRY).items() + if count == 1 + } + return sorted(set(REGISTRY) | aliases) + + +def _normalize_suppress( + suppress: Iterable[str | tuple[str, str] | Check], +) -> tuple[set[str], set[tuple[str, str]]]: + """Partition suppress entries into root field names and (field, name) pairs. + + Parameters + ---------- + suppress + Mix of bare field name strings, `(field, name)` tuples, and + `Check` objects. + + Returns + ------- + tuple[set[str], set[tuple[str, str]]] + `(root_fields, pairs)` where `root_fields` is bare field names + and `pairs` is `(field, name)` pairs extracted from tuples and + Check objects. + """ + root_fields: set[str] = set() + pairs: set[tuple[str, str]] = set() + for entry in suppress: + if isinstance(entry, str): + root_fields.add(entry) + elif isinstance(entry, Check): + pairs.add((entry.field, entry.name)) + else: + pairs.add(entry) + return root_fields, pairs + + +# Matches the `_err_` columns `evaluate_checks` appends; ordinary +# user columns starting with `_err_` (but not followed by digits only) +# are preserved. +_ERR_COLUMN = re.compile(r"^_err_\d+$") + + +def _non_error_columns(evaluated: DataFrame) -> list[str]: + """Column names excluding `_err_N` error columns appended by `evaluate_checks`.""" + return [c for c in evaluated.columns if not _ERR_COLUMN.match(c)] + + +def evaluate_checks(df: DataFrame, checks: list[Check]) -> DataFrame: + """Append `_err_N` columns for each check. + + Returns the input DataFrame with one `array` column per check, + containing error messages (non-empty) or null/empty (no error). + """ + error_cols = [] + for i, chk in enumerate(checks): + if chk.shape == CheckShape.SCALAR: + col = F.array_compact(F.array(chk.expr)) + else: + col = coalesce_errors(F.filter(chk.expr, lambda x: x.isNotNull())) + error_cols.append(col.cast("array").alias(f"_err_{i}")) + return df.select("*", *error_cols) + + +def _max_error_size(n: int) -> F.Column: + """Build a Column for the largest `_err_N` array size across all checks. + + Use `greatest()` instead of chaining OR across all checks. A 255-check + OR tree triggers Spark's CommutativeExpression.orderCommutative during + plan canonicalization, which is O(n²+) and OOMs the driver. `greatest()` + is not a CommutativeExpression, so the optimizer skips that path. + + Caller must guarantee `n >= 1`. + """ + err_sizes = [F.coalesce(F.size(F.col(f"_err_{i}")), F.lit(0)) for i in range(n)] + return err_sizes[0] if n == 1 else F.greatest(*err_sizes) + + +def filter_errors(evaluated: DataFrame, checks: list[Check]) -> DataFrame: + """Filter an evaluated DataFrame to rows with at least one error. + + Parameters + ---------- + evaluated + DataFrame produced by `evaluate_checks()`. + checks + Same check list passed to `evaluate_checks()`. + + Returns + ------- + DataFrame + Original columns only (`_err_N` columns stripped). + """ + return evaluated.filter(_max_error_size(len(checks)) > 0).select( + *_non_error_columns(evaluated) + ) + + +def explain_errors(evaluated: DataFrame, checks: list[Check]) -> DataFrame: + """Unpivot evaluated error columns into one row per violation. + + Parameters + ---------- + evaluated + DataFrame produced by `evaluate_checks()`. + checks + Same check list passed to `evaluate_checks()`. + + Returns + ------- + DataFrame + Schema: `, field, check, message`. + """ + orig_cols = _non_error_columns(evaluated) + n = len(checks) + if n == 0: + empty_schema = StructType( + [ + *evaluated.select(*orig_cols).schema.fields, + StructField("field", StringType(), True), + StructField("check", StringType(), True), + StructField("message", StringType(), True), + ] + ) + return evaluated.sparkSession.createDataFrame([], empty_schema) + stack_args = ", ".join(f"{i}, `_err_{i}`" for i in range(n)) + unpivoted = evaluated.select( + *orig_cols, + F.expr(f"stack({n}, {stack_args}) as (_idx, _errors)"), + ).filter(F.col("_errors").isNotNull() & (F.size("_errors") > 0)) + + exploded = unpivoted.select( + *orig_cols, + "_idx", + F.explode("_errors").alias("message"), + ) + + meta_df = evaluated.sparkSession.createDataFrame( + [(i, c.field, c.name) for i, c in enumerate(checks)], + ["_idx", "field", "check"], + ) + + return exploded.join(F.broadcast(meta_df), "_idx").select( + *orig_cols, "field", "check", "message" + ) + + +@dataclass(frozen=True) +class ValidationResult: + """Result of validate_feature(). + + Consumer owns caching of `evaluated`. Call `error_rows()` for + the filtered view; use `explain_errors(result.evaluated, + result.checks)` for the opt-in UNPIVOT. + """ + + evaluated: DataFrame + checks: list[Check] + schema_mismatches: list[SchemaMismatch] + suppressed_checks: list[Check] + + def error_rows(self) -> DataFrame: + """Rows with at least one violation. Original columns only.""" + if not self.checks: + return self.evaluated.limit(0) + return filter_errors(self.evaluated, self.checks) + + def row_counts(self) -> tuple[int, int]: + """Count total and error rows in a single pass. + + Computes both counts with one aggregation over the evaluated + DataFrame, avoiding the need to cache before counting. + + Returns + ------- + tuple[int, int] + `(total_rows, error_rows)`. + """ + if not self.checks: + return self.evaluated.count(), 0 + max_err = _max_error_size(len(self.checks)) + row = self.evaluated.agg( + F.count(F.lit(1)).alias("total"), + F.coalesce(F.sum(F.when(max_err > 0, 1).otherwise(0)), F.lit(0)).alias( + "errors" + ), + ).first() + assert row is not None # aggregation on a DataFrame always produces a row + return row["total"], row["errors"] + + +def validate_feature( + df: DataFrame, + feature_type: str, + *, + skip_columns: Iterable[str] = (), + ignore_extra_columns: Iterable[str] = (), + suppress: Iterable[str | tuple[str, str] | Check] = (), +) -> ValidationResult: + """Validate a DataFrame against a registered feature type. + + Parameters + ---------- + df + Input DataFrame to validate. + feature_type + Registered feature type name (e.g. `"building"`). + skip_columns + Columns declared absent from the data. Raises `ValueError` + if any are present in `df.columns`. + ignore_extra_columns + Columns that may be present in the data but absent from the + expected schema. + suppress + Checks to remove before evaluation. Bare strings suppress by + root field; tuples by exact `(field, name)`; Check objects + by extracting `(field, name)`. Raises `ValueError` if any + entry doesn't match a registered check. + + Raises + ------ + ValueError + If `feature_type` isn't registered. Message includes the + sorted list of known types. + """ + feature_type = resolve_entry_point_key(feature_type, REGISTRY) + validation = REGISTRY[feature_type] + skip = frozenset(skip_columns) + ignore_extra = frozenset(ignore_extra_columns) + suppress_roots, suppress_pairs = _normalize_suppress(suppress) + + # Validate skip_columns are actually absent + present = skip & set(df.columns) + if present: + raise ValueError( + f"skip_columns {sorted(present)} are present in the " + f"DataFrame; remove them from skip_columns or drop them " + f"from the data" + ) + + # Schema comparison with filtering + raw_mismatches = compare_schemas(df.schema, validation.schema) + mismatches = [] + for m in raw_mismatches: + root = m.path.split(".", 1)[0] + if root in skip: + continue + if m.expected == "missing" and root in ignore_extra: + continue + mismatches.append(m) + + # Validate suppress entries match real checks before filtering + all_checks = validation.checks() + valid_roots = {c.root_field for c in all_checks if c.root_field is not None} + valid_pairs = {(c.field, c.name) for c in all_checks} + unmatched_roots = suppress_roots - valid_roots + unmatched_pairs = suppress_pairs - valid_pairs + if unmatched_roots or unmatched_pairs: + parts = [] + if unmatched_roots: + parts.append(f"unknown root fields {sorted(unmatched_roots)}") + if unmatched_pairs: + parts.append(f"unknown (field, name) pairs {sorted(unmatched_pairs)}") + raise ValueError( + f"suppress entries don't match any check for {feature_type!r}: " + + "; ".join(parts) + ) + + # Check filtering + kept: list[Check] = [] + suppressed: list[Check] = [] + for chk in all_checks: + if chk.root_field is not None and chk.root_field in skip: + continue # structurally absent, not tracked in suppressed + if chk.root_field is not None and chk.root_field in suppress_roots: + suppressed.append(chk) + continue + if (chk.field, chk.name) in suppress_pairs: + suppressed.append(chk) + continue + kept.append(chk) + + evaluated = evaluate_checks(df, kept) + return ValidationResult( + evaluated=evaluated, + checks=kept, + schema_mismatches=mismatches, + suppressed_checks=suppressed, + ) diff --git a/packages/overture-schema-pyspark/tests/__init__.py b/packages/overture-schema-pyspark/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/_support/__init__.py b/packages/overture-schema-pyspark/tests/_support/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/_support/harness.py b/packages/overture-schema-pyspark/tests/_support/harness.py new file mode 100644 index 000000000..b21320bb3 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/_support/harness.py @@ -0,0 +1,246 @@ +"""Validation harness for generated conformance tests. + +Builds a single DataFrame per feature type from scenario mutations, +runs validation once, and indexes violations by scenario ID. +""" + +from __future__ import annotations + +import copy +import uuid +from collections.abc import Sequence +from dataclasses import dataclass +from typing import Any + +from overture.schema.pyspark.check import Check +from overture.schema.pyspark.validate import evaluate_checks, explain_errors +from pyspark.sql import SparkSession +from pyspark.sql.types import StringType, StructField, StructType +from shapely import wkb, wkt + +from .helpers import PathTraversalError, deep_merge +from .scenarios import Scenario + +# Namespace for `_scenario_id` UUIDs. Distinct from +# `overture.schema.codegen.pyspark.test_data.base_row._BASE_ROW_NAMESPACE` +# (which synthesizes feature `id` values) so a feature `id` can never +# collide with a scenario tag and confuse the violations index. +_NAMESPACE = uuid.UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890") + + +@dataclass(frozen=True) +class ValidationResults: + """Named return type from run_validation_pipeline.""" + + violations: dict[str, set[tuple[str, str]]] + skipped: dict[str, str] + + +def scenario_uuid(scenario_id: str) -> str: + """Deterministic UUID for the harness's `_scenario_id` tag.""" + return str(uuid.uuid5(_NAMESPACE, scenario_id)) + + +def build_scenario_map( + scenarios: Sequence[Scenario], + *, + feature_name: str, +) -> dict[str, str]: + """Map _scenario_id values to human-readable scenario IDs. + + Parameters + ---------- + scenarios + All scenarios for a feature type. + feature_name + Feature name for the baseline row ID. + + Returns + ------- + dict[str, str] + Maps _scenario_id UUID string -> scenario ID. Includes baseline. + + Raises + ------ + ValueError + If two scenarios would produce the same UUID key. + """ + baseline_id = f"{feature_name}::baseline" + scenario_map: dict[str, str] = {scenario_uuid(baseline_id): baseline_id} + + for s in scenarios: + for suffix in ("::valid", "::invalid"): + label = f"{s.id}{suffix}" + key = scenario_uuid(label) + if key in scenario_map: + raise ValueError( + f"Duplicate scenario id {key!r}: {scenario_map[key]!r} and {label!r}" + ) + scenario_map[key] = label + + return scenario_map + + +def build_scenario_rows( + base_row: dict[str, Any], + scenarios: Sequence[Scenario], + *, + feature_name: str, +) -> tuple[list[dict[str, Any]], dict[str, str], dict[str, str]]: + """Build mutation rows and scenario mapping from scenarios. + + Parameters + ---------- + base_row + Valid base row dict from the example loader. + scenarios + Scenarios to apply. + feature_name + Feature name for baseline ID and UUID namespace. + + Returns + ------- + tuple + (rows, scenario_map, skipped) where rows is a list of row dicts, + scenario_map maps _scenario_id values to scenario IDs, and skipped + maps scenario IDs to skip reasons. + """ + scenario_map = build_scenario_map(scenarios, feature_name=feature_name) + base_row = sanitize_row(base_row) + # Deep-copy every row so nested structures aren't aliased with base_row; + # a future in-place mutation of one row would otherwise leak across rows. + rows: list[dict[str, Any]] = [ + { + **copy.deepcopy(base_row), + "_scenario_id": scenario_uuid(f"{feature_name}::baseline"), + } + ] + skipped: dict[str, str] = {} + + for s in scenarios: + try: + invalid_row = sanitize_row(s.mutate(deep_merge(base_row, s.scaffold))) + invalid_row["_scenario_id"] = scenario_uuid(f"{s.id}::invalid") + rows.append( + { + **copy.deepcopy(base_row), + "_scenario_id": scenario_uuid(f"{s.id}::valid"), + } + ) + rows.append(invalid_row) + except PathTraversalError as e: + skipped[s.id] = str(e) + + return rows, scenario_map, skipped + + +_WKT_PREFIXES = ( + "POINT", + "LINESTRING", + "POLYGON", + "MULTIPOINT", + "MULTILINESTRING", + "MULTIPOLYGON", + "GEOMETRYCOLLECTION", +) + +# Schema field whose string value should be parsed as WKT and re-emitted as +# WKB (the storage representation Spark's BinaryType expects). +_GEOMETRY_FIELD = "geometry" + + +def sanitize_row(row: dict[str, Any]) -> dict[str, Any]: + """Return a deep copy of `row` with WKT geometry strings converted to WKB. + + Geometry values from TOML examples are WKT strings, but the schema + expects BinaryType (WKB). Walks the row recursively; any string at + the `geometry` key that looks like WKT is converted via shapely. + """ + return _sanitize_in_place(copy.deepcopy(row)) + + +def _sanitize_in_place(d: dict[str, Any]) -> dict[str, Any]: + for key, value in d.items(): + if isinstance(value, dict): + d[key] = _sanitize_in_place(value) + elif isinstance(value, list): + d[key] = [ + _sanitize_in_place(item) if isinstance(item, dict) else item + for item in value + ] + elif ( + key == _GEOMETRY_FIELD + and isinstance(value, str) + and value.upper().startswith(_WKT_PREFIXES) + ): + d[key] = wkb.dumps(wkt.loads(value)) + return d + + +def assert_schema_covers_checks(schema: StructType, checks: list[Check]) -> None: + """Assert every check's root field exists in the schema. + + Synthetic model-level checks (`root_field=None`) pass + unconditionally. Otherwise the root must be a top-level schema + column. This is a fast sanity check; deeper field paths are the + codegen's responsibility and surface at Spark execution time. + """ + top_level = {f.name for f in schema.fields} + for chk in checks: + if chk.root_field is None or chk.root_field in top_level: + continue + raise AssertionError( + f"Check references root field {chk.root_field!r} " + f"not found in schema. Available: {sorted(top_level)}" + ) + + +def run_validation_pipeline( + spark: SparkSession, + schema: StructType, + checks: list[Check], + base_row: dict[str, Any], + scenarios: Sequence[Scenario], + feature_name: str, +) -> ValidationResults: + """Run the full validation pipeline. + + Returns a ValidationResults with violations indexed by scenario ID and + a skipped dict for scenarios that could not be built due to path + traversal errors. + """ + assert_schema_covers_checks(schema, checks) + rows, scenario_map, skipped = build_scenario_rows( + base_row, scenarios, feature_name=feature_name + ) + augmented_schema = StructType( + schema.fields + [StructField("_scenario_id", StringType(), True)] + ) + df = spark.createDataFrame(rows, schema=augmented_schema, verifySchema=False) # type: ignore[union-attr] + violations = explain_errors(evaluate_checks(df, checks), checks) + return ValidationResults( + violations=index_violations(violations.collect(), scenario_map), + skipped=skipped, + ) + + +def index_violations( + violation_rows: list[Any], + scenario_map: dict[str, str], +) -> dict[str, set[tuple[str, str]]]: + """Index collected violation rows by human-readable scenario ID. + + Parameters + ---------- + violation_rows + Collected rows from `explain().collect()`. + scenario_map + Mapping from _scenario_id values to scenario IDs. + """ + result: dict[str, set[tuple[str, str]]] = {} + for row in violation_rows: + scenario_id = scenario_map.get(row["_scenario_id"]) + if scenario_id is None: + continue + result.setdefault(scenario_id, set()).add((row["field"], row["check"])) + return result diff --git a/packages/overture-schema-pyspark/tests/_support/helpers.py b/packages/overture-schema-pyspark/tests/_support/helpers.py new file mode 100644 index 000000000..2551e4b8b --- /dev/null +++ b/packages/overture-schema-pyspark/tests/_support/helpers.py @@ -0,0 +1,135 @@ +"""Low-level utilities for the conformance test harness. + +Internal to the harness — not imported directly by generated test files. +""" + +from __future__ import annotations + +import copy +from collections.abc import Callable +from typing import Any + +from overture.schema.system.field_path import ArraySegment, FieldPath, coerce + + +def deep_merge(base: dict, scaffold: dict) -> dict: + """Recursively merge scaffold onto a deep copy of base. + + Dict values merge recursively. All other values (including lists) + in scaffold replace the corresponding base values; scaffold values + are deep-copied so callers cannot accidentally share state with + the merged result. Keys present in base but absent from scaffold + are preserved. + """ + result = copy.deepcopy(base) + for key, value in scaffold.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = deep_merge(result[key], value) + else: + result[key] = copy.deepcopy(value) + return result + + +class PathTraversalError(Exception): + """Raised when set_at_path cannot traverse a path in the row dict.""" + + +def _scaffold_struct(target: dict, name: str) -> dict: + """Return target[name] as a dict, scaffolding `{}` when missing or None.""" + child = target.get(name) if isinstance(target, dict) else None + if child is None: + child = {} + target[name] = child + return child + + +def _scaffold_array(target: dict, name: str, path: FieldPath | str) -> list: + """Return target[name] as a list, scaffolding `[{}]` when None. + + Empty arrays raise — there is no element to mutate. + """ + child = target.get(name) if isinstance(target, dict) else None + if child is None: + child = [{}] + target[name] = child + if not isinstance(child, list): + raise PathTraversalError( + f"Expected list at '{name}' in path '{path}', got {type(child).__name__}" + ) + if len(child) == 0: + raise PathTraversalError(f"Empty array at '{name}' in path '{path}'") + return child + + +def _descend_through_array( + segment: ArraySegment, target: dict, path: FieldPath | str +) -> list: + """Enter an array segment and walk through its iter_count. + + Scaffolds `[{}]` at the outer level when None; deeper levels + (`iter_count > 1`) must already be lists -- scaffolding into + nested-list shapes isn't supported because no current schema + needs it. + + Returns the innermost list. For terminal use, write to `[0]`; + for intermediate use, the next segment lives in `[0]`. + """ + container = _scaffold_array(target, segment.name, path) + for _ in range(segment.iter_count - 1): + if len(container) == 0 or not isinstance(container[0], list): + raise PathTraversalError( + f"Expected non-empty nested list at '{segment.name}' in path '{path}'" + ) + container = container[0] + return container + + +def set_at_path(path: FieldPath | str, value: object) -> Callable[[dict], dict]: + """Return a mutator that sets *value* at *path* in a deep copy of the row. + + `[]` always indexes element 0 — one bad element suffices to trigger + a violation since `validate()` checks are element-wise. + + None at an intermediate struct segment is scaffolded as `{}`; None at + an intermediate array segment is scaffolded as `[{}]`. Empty arrays + raise `PathTraversalError` when called — there is no element to mutate. + + Parameters + ---------- + path + A `FieldPath` or its canonical encoded form (`"rules[].tags[].v"`). + value + The value to set at the resolved path. + + Returns + ------- + Callable[[dict], dict] + A function that takes a row dict and returns a deep copy with the + value at `path` replaced. + + Raises + ------ + PathTraversalError + When the path is empty, or when an intermediate or final array + segment is empty (raised at call time, not at factory time). + """ + segments = coerce(path).segments + + def mutator(row_dict: dict) -> dict: + if not segments: + raise PathTraversalError(f"Empty path: {path!r}") + result = copy.deepcopy(row_dict) + target: Any = result + for segment in segments[:-1]: + if isinstance(segment, ArraySegment): + target = _descend_through_array(segment, target, path)[0] + else: + target = _scaffold_struct(target, segment.name) + last = segments[-1] + if isinstance(last, ArraySegment): + _descend_through_array(last, target, path)[0] = value + else: + target[last.name] = value + return result + + return mutator diff --git a/packages/overture-schema-pyspark/tests/_support/mutations.py b/packages/overture-schema-pyspark/tests/_support/mutations.py new file mode 100644 index 000000000..4ed3466e3 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/_support/mutations.py @@ -0,0 +1,388 @@ +"""Model-level mutation functions for generated conformance tests. + +Each function takes a row dict and returns a modified copy that should +trigger a specific model-level constraint violation. Generated test +files import these by name. +""" + +from __future__ import annotations + +import copy +from collections.abc import Callable +from typing import Any + +from overture.schema.system.field_path import ( + ArrayPath, + ArraySegment, + FieldPath, + PathSegment, + ScalarPath, + coerce, +) + +from .helpers import PathTraversalError + +_SENTINEL = "__FORBIDDEN_PRESENT__" +_NOT_EQUAL_PREFIX = "__NOT_" + + +def mutate_require_any_of( + row_dict: dict, + field_names: list[FieldPath | str], + *, + array_path: FieldPath | str | None = None, + struct_path: str | None = None, +) -> dict: + """Null every named field so `require_any_of` fires. + + Parameters + ---------- + array_path + Array column the constrained model lives inside. When None, the + fields live at the row root. + struct_path + Optional single intermediate struct field between the array + element and the target fields. + + See `_null_all_named_fields` for the full nesting semantics. + """ + return _null_all_named_fields( + row_dict, field_names, array_path=array_path, struct_path=struct_path + ) + + +def mutate_radio_group(row_dict: dict, field_names: list[FieldPath | str]) -> dict: + """Set first two fields to True so radio_group fires.""" + result = copy.deepcopy(row_dict) + for name in field_names[:2]: + _set_nested(result, name, True) + return result + + +def mutate_min_fields_set( + row_dict: dict, + field_names: list[FieldPath | str], + *, + array_path: FieldPath | str | None = None, + struct_path: str | None = None, +) -> dict: + """Null every named field so `min_fields_set(N)` fires (0 < N). + + The descriptor enumerates every field of the constrained model, so + nulling all of them drops the non-null count to zero -- below any + positive `count`. Nulling required fields incidentally trips their + `check_required` checks; the conformance test only asserts the + expected violation is present, so the extra failures don't matter. + + `array_path` / `struct_path` mirror `mutate_require_any_of` for the + case where the constrained model is reached through array iteration + (and optionally one intermediate struct field). + """ + return _null_all_named_fields( + row_dict, field_names, array_path=array_path, struct_path=struct_path + ) + + +def _null_all_named_fields( + row_dict: dict, + field_names: list[FieldPath | str], + *, + array_path: FieldPath | str | None, + struct_path: str | None, +) -> dict: + """Return a deep copy of *row_dict* with every named field set to None. + + Without *array_path*, the fields live at the row root. With *array_path*, + the fields live inside elements of that array column; *struct_path* + names an optional single intermediate struct field between the array + element and the target fields. A null array is replaced with a single + stub element so the violation has a row to fire on. + """ + result = copy.deepcopy(row_dict) + if array_path is None: + for name in field_names: + _set_nested(result, name, None) + return result + + arr: list[dict] | None = _get_nested(result, array_path) # type: ignore[assignment] + if arr is None: + stub: dict = {} + for name in field_names: + _set_nested(stub, name, None, create=True) + element = {struct_path: stub} if struct_path else stub + _set_nested(result, array_path, [element]) + else: + for element in arr: + if struct_path: + target = element.get(struct_path) + if target is None: + target = {} + element[struct_path] = target + else: + target = element + for name in field_names: + _set_nested(target, name, None) + return result + + +def mutate_require_if( + row_dict: dict, + field_names: list[FieldPath | str], + condition_field: FieldPath | str, + condition_value: object, + *, + negate: bool = False, + array_path: FieldPath | str | None = None, + inner_array_path: FieldPath | str | None = None, +) -> dict: + """Set condition to trigger require_if, then null target fields.""" + result = copy.deepcopy(row_dict) + + def _apply(target: dict) -> None: + _ensure_condition(target, condition_field, condition_value, negate=negate) + for name in field_names: + _set_nested(target, name, None) + + _apply_to_targets(result, _apply, array_path, inner_array_path) + return result + + +def mutate_forbid_if( + row_dict: dict, + field_names: list[str], + condition_field: FieldPath | str, + condition_value: object, + *, + negate: bool = False, + fill_values: dict[str, object] | None = None, + array_path: FieldPath | str | None = None, + inner_array_path: FieldPath | str | None = None, +) -> dict: + """Set condition to trigger forbid_if, ensure target fields are non-null. + + `field_names` are flat scalar field names — model-level forbid_if + references fields by name on the enclosing model. `fill_values` is + keyed by the same names. + """ + result = copy.deepcopy(row_dict) + fills = fill_values or {} + + def _apply(target: dict) -> None: + _ensure_condition(target, condition_field, condition_value, negate=negate) + for name in field_names: + if _get_nested(target, name) is None: + _set_nested(target, name, fills.get(name, _SENTINEL)) + + _apply_to_targets(result, _apply, array_path, inner_array_path) + return result + + +def mutate_unique_items(row_dict: dict, path: FieldPath | str) -> dict: + """Duplicate the first array element so unique_items fires. + + Supports bracket paths like `"restrictions[].when.mode"` -- enters + element 0 at each `[]` segment, then duplicates the first element + of the final array. A terminal `[]` (e.g. `"hierarchies[]"`) + targets the inner array at element 0 of the named field -- the + walker descends one extra level per bracket on the terminal + segment and duplicates the first element of the array it lands on. + """ + result = copy.deepcopy(row_dict) + segments = coerce(path).segments + + parent: Any = _walk_strict(result, segments[:-1], path) + last = segments[-1] + if not isinstance(parent, dict) or last.name not in parent: + raise PathTraversalError(f"Missing key '{last.name}' in path '{path}'") + + # When the terminal is an array segment, descend `iter_count` levels of + # `[0]`. Otherwise the terminal struct already references the list to + # mutate. The final `container[key]` must itself be a list. + container: Any = parent + key: int | str = last.name + iter_count = last.iter_count if isinstance(last, ArraySegment) else 0 + for depth in range(iter_count): + inner = container[key] + _require_non_empty_array(inner, f"{last.name}{'[]' * depth}", path) + container, key = inner, 0 + arr = container[key] + if not isinstance(arr, list): + raise PathTraversalError( + f"Expected list at terminal of path '{path}', got {type(arr).__name__}" + ) + _duplicate_first(container, key, arr) + return result + + +def _walk_strict( + target: Any, segments: tuple[PathSegment, ...], path: FieldPath | str +) -> Any: + """Walk segments without scaffolding. + + Raises `PathTraversalError` on missing or null struct intermediates, + and on empty arrays encountered at array intermediates (each `[]` in + a segment's `iter_count` descends one element, which requires a + non-empty list). + """ + for segment in segments: + if not isinstance(target, dict) or target.get(segment.name) is None: + raise PathTraversalError( + f"Missing or null key '{segment.name}' in path '{path}'" + ) + target = target[segment.name] + if isinstance(segment, ArraySegment): + for _ in range(segment.iter_count): + _require_non_empty_array(target, segment.name, path) + target = target[0] + return target + + +def _require_non_empty_array(value: Any, name: str, path: FieldPath | str) -> None: + """Raise PathTraversalError unless *value* is a non-empty list.""" + if not isinstance(value, list) or len(value) == 0: + raise PathTraversalError(f"Empty or missing array at '{name}' in path '{path}'") + + +def _duplicate_first(container: Any, key: int | str, arr: list) -> None: + """Replace `container[key]` with `arr` having its first element duplicated. + + No-op when `arr` is empty. Both elements are deep-copied so callers + cannot accidentally share state between the duplicates. + """ + if not arr: + return + dup = copy.deepcopy(arr[0]) + container[key] = [dup, copy.deepcopy(dup)] + list(arr[1:]) + + +_Applicator = Callable[[dict], None] + + +def _apply_to_targets( + row: dict, + fn: _Applicator, + array_path: FieldPath | str | None, + inner_array_path: FieldPath | str | None, +) -> None: + """Apply a mutation function to target dicts at the appropriate nesting level. + + Without array paths, applies directly to the row. With `array_path`, + iterates over elements of that array. With both `array_path` and + `inner_array_path`, iterates over outer elements, navigates the + inner struct path to a nested array, then iterates those elements. + + Creates stub array elements when the arrays are null so the mutation + can populate them. + """ + if array_path is None: + fn(row) + return + outer_arr: list[dict] | None = _get_nested(row, array_path) # type: ignore[assignment] + if outer_arr is None: + outer_stub: dict = {} + _stub_apply(outer_stub, inner_array_path, fn) + _set_nested(row, array_path, [outer_stub]) + return + if inner_array_path is None: + for element in outer_arr: + fn(element) + else: + for element in outer_arr: + inner_arr: list[dict] | None = _get_nested(element, inner_array_path) # type: ignore[assignment] + if inner_arr is not None: + for inner_element in inner_arr: + fn(inner_element) + else: + _stub_apply(element, inner_array_path, fn) + + +def _stub_apply( + parent: dict, + inner_array_path: FieldPath | str | None, + fn: _Applicator, +) -> None: + """Build a stub element at `inner_array_path` inside *parent* and run `fn`. + + When `inner_array_path` is None, *parent* itself is the stub that + `fn` mutates. Otherwise an empty stub is inserted as the sole + element of `[stub]` at `inner_array_path` inside *parent* + (scaffolding intermediate dicts), and `fn` mutates the stub. + """ + if inner_array_path is None: + fn(parent) + return + stub: dict = {} + fn(stub) + _set_nested(parent, inner_array_path, [stub], create=True) + + +def _ensure_condition( + d: dict, + condition_field: FieldPath | str, + condition_value: object, + *, + negate: bool, +) -> None: + """Set condition_field so the constraint condition evaluates to True. + + When *negate* is False, sets the field to *condition_value* (the + condition is `field == value`). When True, ensures the field is + NOT equal to *condition_value* (the condition is `field != value`); + if it already differs, leaves it alone. + """ + if negate: + current = _get_nested(d, condition_field) + if current == condition_value: + _set_nested(d, condition_field, f"{_NOT_EQUAL_PREFIX}{condition_value}__") + else: + _set_nested(d, condition_field, condition_value) + + +def _as_scalar_path(path: FieldPath | str) -> ScalarPath: + """Coerce *path* to a ScalarPath, rejecting any array markers. + + The dict-walking helpers operate only on struct fields; an array + marker indicates the caller wanted array-aware navigation and picked + the wrong helper. + """ + coerced = coerce(path) + if isinstance(coerced, ArrayPath): + raise ValueError(f"struct-only path expected, got array segment in {path!r}") + return coerced + + +def _set_nested( + d: dict, path: FieldPath | str, value: object, *, create: bool = False +) -> None: + """Set a value in a nested dict using a struct-field path. + + When *create* is True, intermediate dicts are created if missing or + None. When an intermediate is None and *value* is also None, the path + is already effectively null — returns without error. + """ + segments = _as_scalar_path(path).segments + target = d + for segment in segments[:-1]: + part = segment.name + if create and (part not in target or target[part] is None): + target[part] = {} + child = target.get(part) if isinstance(target, dict) else None + if child is None: + if value is None: + return + raise TypeError(f"None intermediate at '{part}' in path '{path}'") + target = child + target[segments[-1].name] = value + + +def _get_nested(d: dict, path: FieldPath | str) -> object: + """Get a value from a nested dict using a struct-field path. + + Returns None when any intermediate key is missing. + """ + target: object = d + for segment in _as_scalar_path(path).segments: + if not isinstance(target, dict) or segment.name not in target: + return None + target = target[segment.name] + return target diff --git a/packages/overture-schema-pyspark/tests/_support/scenarios.py b/packages/overture-schema-pyspark/tests/_support/scenarios.py new file mode 100644 index 000000000..a2f58abbd --- /dev/null +++ b/packages/overture-schema-pyspark/tests/_support/scenarios.py @@ -0,0 +1,34 @@ +"""Scenario dataclass for generated conformance tests.""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True, slots=True) +class Scenario: + """A test scenario: a mutation that should produce a specific violation. + + Parameters + ---------- + id + Human-readable scenario identifier, e.g. `"building::id:required"`. + scaffold + Dict merged onto the base row before mutation to provide valid values + for fields the base row lacks (e.g. array elements for nested paths). + mutate + Callable applied to `deep_merge(base_row, scaffold)` to produce the + invalid row. Must return a new dict; must not mutate its argument. + expected_field + Field name expected in the violation output. + expected_check + Check name expected in the violation output. + """ + + id: str + scaffold: dict[str, Any] + mutate: Callable[[dict], dict] + expected_field: str + expected_check: str diff --git a/packages/overture-schema-pyspark/tests/conftest.py b/packages/overture-schema-pyspark/tests/conftest.py new file mode 100644 index 000000000..ccacb1aac --- /dev/null +++ b/packages/overture-schema-pyspark/tests/conftest.py @@ -0,0 +1,50 @@ +"""Shared pytest fixtures for overture-schema-pyspark tests.""" + +import os +import socket +import sys +from collections.abc import Callable +from typing import Any + +import pytest +from pyspark.sql import SparkSession + +# Ensure PySpark workers use the same Python as the driver to avoid +# version mismatch errors when a different system Python is on PATH. +os.environ.setdefault("PYSPARK_PYTHON", sys.executable) +os.environ.setdefault("PYSPARK_DRIVER_PYTHON", sys.executable) + + +def pytest_configure(config: pytest.Config) -> None: + """Suppress ResourceWarning from PySpark's unclosed py4j sockets. + + PySpark uses py4j to communicate with the JVM. py4j socket proxies + are GC'd between tests and their __del__ fires ResourceWarning via + sys.unraisablehook. With -W error this becomes a test failure. + + The original hook is preserved for all other unraisable exceptions. + """ + original_hook: Callable[[Any], None] = sys.unraisablehook + + def _hook(unraisable: Any) -> None: + if isinstance(unraisable.exc_value, ResourceWarning) and isinstance( + unraisable.object, socket.socket + ): + return + original_hook(unraisable) + + sys.unraisablehook = _hook + + +@pytest.fixture(scope="session") +def spark() -> SparkSession: + """Provide a local SparkSession for testing.""" + session = ( + SparkSession.builder.master("local[1]") + .appName("overture-pyspark-tests") + .config("spark.ui.enabled", "false") + .config("spark.sql.shuffle.partitions", "1") + .getOrCreate() + ) + session.sparkContext.setLogLevel("ERROR") + return session diff --git a/packages/overture-schema-pyspark/tests/expressions/__init__.py b/packages/overture-schema-pyspark/tests/expressions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py b/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py new file mode 100644 index 000000000..6720da35e --- /dev/null +++ b/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py @@ -0,0 +1,258 @@ +"""Tests for column_patterns — structural PySpark composition helpers.""" + +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + coalesce_errors, + error_msg, + nested_array_check, +) +from pyspark.sql import Row, SparkSession +from pyspark.sql import functions as F + + +def test_error_msg_concatenates(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="bad")]) + result = df.select(error_msg("field: got ", F.col("val")).alias("msg")).collect() + assert result[0]["msg"] == "field: got bad" + + +def test_error_msg_multiple_values(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(a="x", b="y")]) + result = df.select( + error_msg("prefix ", F.col("a"), F.lit(" and "), F.col("b")).alias("msg") + ).collect() + assert result[0]["msg"] == "prefix x and y" + + +def test_array_check_null_column_returns_null(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=None)], + schema="items array>", + ) + result = df.select( + array_check("items", lambda el: F.lit("err")).alias("errs") + ).collect() + assert result[0]["errs"] is None + + +def test_array_check_filters_nulls(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=[Row(val="ok"), Row(val="bad")])], + schema="items array>", + ) + result = df.select( + array_check( + "items", + lambda el: F.when(el["val"] == "bad", F.lit("error")), + ).alias("errs") + ).collect() + assert result[0]["errs"] == ["error"] + + +def test_array_check_empty_when_all_valid(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=[Row(val="ok")])], + schema="items array>", + ) + result = df.select( + array_check( + "items", + lambda el: F.when(el["val"] == "bad", F.lit("error")), + ).alias("errs") + ).collect() + assert result[0]["errs"] == [] + + +def test_struct_unique_no_duplicates(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=[Row(id="a"), Row(id="b")])], + schema="items array>", + ) + result = df.select(check_struct_unique("items").alias("err")).collect() + assert result[0]["err"] is None + + +def test_struct_unique_with_duplicates(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=[Row(id="a"), Row(id="a")])], + schema="items array>", + ) + result = df.select(check_struct_unique("items").alias("err")).collect() + assert result[0]["err"] is not None + assert "duplicate" in result[0]["err"] + + +def test_struct_unique_null_column(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=None)], + schema="items array>", + ) + result = df.select(check_struct_unique("items").alias("err")).collect() + assert result[0]["err"] is None + + +def test_struct_unique_repeated_value_different_fields(spark: SparkSession) -> None: + """Structs with same value subfield but different other fields are unique.""" + df = spark.createDataFrame( + [ + Row( + items=[ + Row(value="a", pos=0.0), + Row(value="b", pos=0.5), + Row(value="a", pos=0.7), + ] + ) + ] + ) + result = df.select(check_struct_unique("items").alias("err")).collect() + assert result[0]["err"] is None + + +def test_struct_unique_single_element(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=[Row(id="a")])], + schema="items array>", + ) + result = df.select(check_struct_unique("items").alias("err")).collect() + assert result[0]["err"] is None + + +def test_array_check_accepts_column(spark: SparkSession) -> None: + """array_check works when passed a Column instead of a string name.""" + df = spark.createDataFrame( + [Row(items=[Row(val="ok"), Row(val="bad")])], + schema="items array>", + ) + result = df.select( + array_check( + F.col("items"), + lambda el: F.when(el["val"] == "bad", F.lit("error")), + ).alias("errs") + ).collect() + assert result[0]["errs"] == ["error"] + + +def test_check_struct_unique_accepts_column(spark: SparkSession) -> None: + """check_struct_unique works when passed a Column instead of a string name.""" + df = spark.createDataFrame( + [Row(items=[Row(id="a"), Row(id="a")])], + schema="items array>", + ) + result = df.select(check_struct_unique(F.col("items")).alias("err")).collect() + assert result[0]["err"] is not None + assert "duplicate" in result[0]["err"] + + +def test_check_struct_unique_column_null(spark: SparkSession) -> None: + """check_struct_unique with Column input handles null.""" + df = spark.createDataFrame( + [Row(items=None)], schema="items array>" + ) + result = df.select(check_struct_unique(F.col("items")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_nested_array_check_flattens(spark: SparkSession) -> None: + """Inner array_check per outer element produces flat error list.""" + schema = "items array>>" + df = spark.createDataFrame( + [ + Row( + items=[ + Row(tags=["good", "bad"]), + Row(tags=["worse"]), + ] + ) + ], + schema=schema, + ) + result_col = nested_array_check( + "items", + lambda el: array_check( + el["tags"], + lambda tag: F.when(tag != "good", F.concat(F.lit("bad: "), tag)), + ), + ) + result = df.select(coalesce_errors(result_col).alias("errs")).collect() + errors = result[0]["errs"] + assert len(errors) == 2 + assert all(isinstance(e, str) for e in errors) + + +def test_nested_array_check_null_outer(spark: SparkSession) -> None: + schema = "items array>>" + df = spark.createDataFrame([Row(items=None)], schema=schema) + result_col = nested_array_check( + "items", + lambda el: array_check( + el["tags"], + lambda tag: F.when(tag != "good", F.lit("bad")), + ), + ) + result = df.select(coalesce_errors(result_col).alias("errs")).collect() + assert result[0]["errs"] == [] + + +def test_nested_array_check_mixed_null_inner_with_sibling_errors( + spark: SparkSession, +) -> None: + """A null inner array must not nullify sibling errors during flatten. + + `F.flatten` returns NULL whenever any sub-array is NULL. Without + guarding inner nulls, the outer transform produces NULL and every + sibling error is silently dropped. + """ + schema = "items array>>" + df = spark.createDataFrame( + [ + Row( + items=[ + Row(tags=["good"]), + Row(tags=None), + Row(tags=["bad"]), + ] + ) + ], + schema=schema, + ) + result_col = nested_array_check( + "items", + lambda el: array_check( + el["tags"], + lambda tag: F.when(tag != "good", F.concat(F.lit("bad: "), tag)), + ), + ) + result = df.select(coalesce_errors(result_col).alias("errs")).collect() + assert result[0]["errs"] == ["bad: bad"] + + +def test_nested_array_check_no_errors(spark: SparkSession) -> None: + schema = "items array>>" + df = spark.createDataFrame( + [Row(items=[Row(tags=["good"])])], + schema=schema, + ) + result_col = nested_array_check( + "items", + lambda el: array_check( + el["tags"], + lambda tag: F.when(tag != "good", F.lit("bad")), + ), + ) + result = df.select(coalesce_errors(result_col).alias("errs")).collect() + assert result[0]["errs"] == [] + + +def test_coalesce_errors_null_becomes_empty(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(x=1)]) + result = df.select( + coalesce_errors(F.lit(None).cast("array")).alias("errs") + ).collect() + assert result[0]["errs"] == [] + + +def test_coalesce_errors_preserves_array(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(x=1)]) + result = df.select(coalesce_errors(F.array(F.lit("err"))).alias("errs")).collect() + assert result[0]["errs"] == ["err"] diff --git a/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py b/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py new file mode 100644 index 000000000..6a4289e35 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py @@ -0,0 +1,1341 @@ +"""Tests for constraint_expressions — constraint type to Column translation.""" + +import struct + +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_max_length, + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_email, + check_enum, + check_forbid_if, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_min_fields_set, + check_pattern, + check_radio_group, + check_require_any_of, + check_require_if, + check_required, + check_string_max_length, + check_string_min_length, + check_stripped, + check_url_format, + check_url_length, +) +from overture.schema.system.primitive import GeometryType +from pyspark.sql import Row, SparkSession +from pyspark.sql import functions as F +from pyspark.sql.types import DoubleType, StructField, StructType +from shapely.geometry import LineString, MultiPolygon, Point, Polygon + + +def test_check_bounds_ge_le_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=5)]) + result = df.select(check_bounds(F.col("val"), ge=1, le=10).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bounds_ge_violation(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=0)]) + result = df.select(check_bounds(F.col("val"), ge=1).alias("err")).collect() + assert result[0]["err"] is not None + assert ">= 1" in result[0]["err"] + + +def test_check_bounds_gt_violation(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=0)]) + result = df.select(check_bounds(F.col("val"), gt=0).alias("err")).collect() + assert result[0]["err"] is not None + assert "> 0" in result[0]["err"] + + +def test_check_bounds_le_violation(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=100)]) + result = df.select(check_bounds(F.col("val"), le=50).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_bounds_null_passthrough(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val int") + result = df.select(check_bounds(F.col("val"), ge=1).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_enum_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="road")]) + result = df.select( + check_enum(F.col("val"), ["road", "rail", "water"]).alias("err") + ).collect() + assert result[0]["err"] is None + + +def test_check_enum_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="sky")]) + result = df.select( + check_enum(F.col("val"), ["road", "rail", "water"]).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "sky" in result[0]["err"] + + +class TestCheckPattern: + def test_valid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("AB",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), r"^[A-Z]{2}$", label="test pattern").alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("abc",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), r"^[A-Z]{2}$", label="test pattern").alias("e") + ) + err = result.collect()[0]["e"] + assert "invalid test pattern" in err + assert "abc" in err + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern(F.col("v"), r"^[A-Z]{2}$", label="test pattern").alias("e") + ) + assert result.collect()[0]["e"] is None + + +class TestCheckMinLength: + def test_at_limit(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=["a", "b"])], schema="items array" + ) + result = df.select( + check_array_min_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_below_limit(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(items=["a"])], schema="items array") + result = df.select( + check_array_min_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "minimum length 2" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(items=None)], schema="items array") + result = df.select( + check_array_min_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckMaxLength: + def test_within_limit(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=["a", "b"])], schema="items array" + ) + result = df.select( + check_array_max_length(F.col("items"), 3).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_at_limit(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=["a", "b"])], schema="items array" + ) + result = df.select( + check_array_max_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_exceeds_limit(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=["a", "b", "c"])], schema="items array" + ) + result = df.select( + check_array_max_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "maximum length 2" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(items=None)], schema="items array") + result = df.select( + check_array_max_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is None + + +def test_check_require_any_of_satisfied(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(a=1, b=None)], schema="a int, b int") + result = df.select( + check_require_any_of([F.col("a"), F.col("b")], ["a", "b"]).alias("err") + ).collect() + assert result[0]["err"] is None + + +def test_check_require_any_of_all_null(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(a=None, b=None)], schema="a int, b int") + result = df.select( + check_require_any_of([F.col("a"), F.col("b")], ["a", "b"]).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "a" in result[0]["err"] + assert "b" in result[0]["err"] + + +class TestCheckRequireIf: + def test_required_present(self, spark: SparkSession) -> None: + """Target is present when condition is true -> no error.""" + df = spark.createDataFrame( + [("road", "primary")], schema="subtype string, road_class string" + ) + result = df.select( + check_require_if( + F.col("road_class"), + F.col("subtype").isin(["road", "rail"]), + "subtype in [road, rail]", + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_required_absent(self, spark: SparkSession) -> None: + """Target is null when condition is true -> error.""" + df = spark.createDataFrame( + [("road", None)], schema="subtype string, road_class string" + ) + result = df.select( + check_require_if( + F.col("road_class"), + F.col("subtype").isin(["road", "rail"]), + "subtype in [road, rail]", + ).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "required" in result[0]["err"] + + def test_condition_false_skips(self, spark: SparkSession) -> None: + """Target is null but condition is false -> no error.""" + df = spark.createDataFrame( + [("water", None)], schema="subtype string, road_class string" + ) + result = df.select( + check_require_if( + F.col("road_class"), + F.col("subtype").isin(["road", "rail"]), + "subtype in [road, rail]", + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_with_value_cols(self, spark: SparkSession) -> None: + """Error message includes actual discriminator value.""" + df = spark.createDataFrame( + [("road", None)], schema="subtype string, road_class string" + ) + result = df.select( + check_require_if( + F.col("road_class"), + F.col("subtype").isin(["road", "rail"]), + "subtype in [road, rail]", + F.col("subtype"), + ).alias("err") + ).collect() + assert "road" in result[0]["err"] + + +class TestCheckForbidIf: + def test_forbidden_absent(self, spark: SparkSession) -> None: + """Target is null when condition is true -> no error.""" + df = spark.createDataFrame( + [Row(subtype="country", parent=None)], + schema="subtype string, parent string", + ) + result = df.select( + check_forbid_if( + F.col("parent"), + F.col("subtype") == "country", + "subtype = country", + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_forbidden_present(self, spark: SparkSession) -> None: + """Target is present when condition is true -> error.""" + df = spark.createDataFrame([Row(subtype="country", parent="abc")]) + result = df.select( + check_forbid_if( + F.col("parent"), + F.col("subtype") == "country", + "subtype = country", + ).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "forbidden" in result[0]["err"] + + def test_condition_false_skips(self, spark: SparkSession) -> None: + """Target is present but condition is false -> no error.""" + df = spark.createDataFrame([Row(subtype="region", parent="abc")]) + result = df.select( + check_forbid_if( + F.col("parent"), + F.col("subtype") == "country", + "subtype = country", + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_with_value_cols(self, spark: SparkSession) -> None: + """Error message includes actual discriminator value.""" + df = spark.createDataFrame([Row(subtype="country", parent="abc")]) + result = df.select( + check_forbid_if( + F.col("parent"), + F.col("subtype") == "country", + "subtype = country", + F.col("subtype"), + ).alias("err") + ).collect() + assert "country" in result[0]["err"] + + +class TestCheckStringMinLength: + def test_valid_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="abc")]) + result = df.select( + check_string_min_length(F.col("val"), 1).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_empty_string_violation(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="")]) + result = df.select( + check_string_min_length(F.col("val"), 1).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "minimum length" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select( + check_string_min_length(F.col("val"), 1).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_exact_min_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="ab")]) + result = df.select( + check_string_min_length(F.col("val"), 2).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_below_min_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="a")]) + result = df.select( + check_string_min_length(F.col("val"), 2).alias("err") + ).collect() + assert result[0]["err"] is not None + + +class TestCheckStringMaxLength: + def test_valid_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="abc")]) + result = df.select( + check_string_max_length(F.col("val"), 5).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_above_max_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="abcdef")]) + result = df.select( + check_string_max_length(F.col("val"), 5).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "maximum length" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select( + check_string_max_length(F.col("val"), 5).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_exact_max_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="abcde")]) + result = df.select( + check_string_max_length(F.col("val"), 5).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckRadioGroup: + def test_exactly_one_true(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(is_land=True, is_territorial=False)]) + result = df.select( + check_radio_group( + [F.col("is_land"), F.col("is_territorial")], + ["is_land", "is_territorial"], + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_none_true(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(is_land=False, is_territorial=False)]) + result = df.select( + check_radio_group( + [F.col("is_land"), F.col("is_territorial")], + ["is_land", "is_territorial"], + ).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "exactly one" in result[0]["err"] + assert "0" in result[0]["err"] + + def test_both_true(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(is_land=True, is_territorial=True)]) + result = df.select( + check_radio_group( + [F.col("is_land"), F.col("is_territorial")], + ["is_land", "is_territorial"], + ).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "2" in result[0]["err"] + + def test_null_treated_as_false(self, spark: SparkSession) -> None: + """Null booleans count as not-true (0 toward the count).""" + df = spark.createDataFrame( + [Row(is_land=True, is_territorial=None)], + schema="is_land boolean, is_territorial boolean", + ) + result = df.select( + check_radio_group( + [F.col("is_land"), F.col("is_territorial")], + ["is_land", "is_territorial"], + ).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckGeometryType: + def test_point_matches(self, spark: SparkSession) -> None: + wkb_bytes = Point(0, 0).wkb + df = spark.createDataFrame( + [Row(geometry=bytearray(wkb_bytes))], schema="geometry binary" + ) + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_point_rejects_linestring(self, spark: SparkSession) -> None: + wkb_bytes = LineString([(0, 0), (1, 1)]).wkb + df = spark.createDataFrame( + [Row(geometry=bytearray(wkb_bytes))], schema="geometry binary" + ) + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "Point" in result[0]["err"] + + def test_multiple_allowed_types(self, spark: SparkSession) -> None: + wkb_polygon = Polygon([(0, 0), (1, 0), (1, 1), (0, 0)]).wkb + wkb_multi = MultiPolygon([Polygon([(0, 0), (1, 0), (1, 1), (0, 0)])]).wkb + df = spark.createDataFrame( + [ + Row(geometry=bytearray(wkb_polygon)), + Row(geometry=bytearray(wkb_multi)), + ], + schema="geometry binary", + ) + result = df.select( + check_geometry_type( + F.col("geometry"), + GeometryType.POLYGON, + GeometryType.MULTI_POLYGON, + ).alias("err") + ).collect() + assert all(r["err"] is None for r in result) + + def test_multiple_allowed_rejects_wrong_type(self, spark: SparkSession) -> None: + wkb_point = Point(0, 0).wkb + df = spark.createDataFrame( + [Row(geometry=bytearray(wkb_point))], schema="geometry binary" + ) + result = df.select( + check_geometry_type( + F.col("geometry"), + GeometryType.POLYGON, + GeometryType.MULTI_POLYGON, + ).alias("err") + ).collect() + assert result[0]["err"] is not None + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(geometry=None)], schema="geometry binary") + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_big_endian_wkb(self, spark: SparkSession) -> None: + """Verify BE byte order handling. + + Shapely writes LE by default. Construct BE WKB for a Point + manually: byte_order=0x00, type=0x00000001, x=0.0, y=0.0. + """ + be_point = struct.pack(">bIdd", 0, 1, 0.0, 0.0) + df = spark.createDataFrame( + [Row(geometry=bytearray(be_point))], schema="geometry binary" + ) + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckStripped: + def test_clean_string(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="hello world")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_single_char(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="x")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_leading_space(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=" hello")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + assert "whitespace" in result[0]["err"] + + def test_trailing_space(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="hello ")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + assert "whitespace" in result[0]["err"] + + def test_leading_tab(self, spark: SparkSession) -> None: + """Tab is Unicode whitespace -- must be caught (not just ASCII space).""" + df = spark.createDataFrame([Row(val="\thello")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_trailing_newline(self, spark: SparkSession) -> None: + """Trailing newline requires \\z anchor -- $ matches before it in Java regex.""" + df = spark.createDataFrame([Row(val="hello\n")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_empty_string(self, spark: SparkSession) -> None: + """Empty string has no leading/trailing whitespace -- passes.""" + df = spark.createDataFrame([Row(val="")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_trailing_unit_separator(self, spark: SparkSession) -> None: + """U+001F (unit separator) -- Python strips it, Java \\S with (?U) does not.""" + df = spark.createDataFrame([Row(val="Main St \x1f")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_leading_file_separator(self, spark: SparkSession) -> None: + """U+001C (file separator) -- C0 control char Python treats as whitespace.""" + df = spark.createDataFrame([Row(val="\x1chello")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_trailing_soh(self, spark: SparkSession) -> None: + """U+0001 (SOH) -- C0 control char that even Python's strip() misses.""" + df = spark.createDataFrame([Row(val="hello\x01")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_trailing_del(self, spark: SparkSession) -> None: + """U+007F (DEL) -- control char outside C0 range.""" + df = spark.createDataFrame([Row(val="hello\x7f")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_trailing_c1_control(self, spark: SparkSession) -> None: + """U+009F (APC) -- C1 control char.""" + df = spark.createDataFrame([Row(val="hello\x9f")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_control_char_in_middle_passes(self, spark: SparkSession) -> None: + """Control chars in the middle of a string are not a stripped concern.""" + df = spark.createDataFrame([Row(val="hel\x1flo")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +class TestCheckJsonPointer: + def test_valid_pointer(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="/properties/name")]) + result = df.select(check_json_pointer(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_root_pointer(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="/")]) + result = df.select(check_json_pointer(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_empty_string_valid(self, spark: SparkSession) -> None: + """Empty string is valid per RFC 6901 (references whole document).""" + df = spark.createDataFrame([Row(val="")]) + result = df.select(check_json_pointer(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_missing_leading_slash(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="properties/name")]) + result = df.select(check_json_pointer(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + assert "JSON pointer" in result[0]["err"] + assert "properties/name" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_json_pointer(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +class TestCheckLinearRangeLength: + def test_valid_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.0, 1.0])], schema="between array" + ) + result = df.select( + check_linear_range_length(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_wrong_length_one(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(between=[0.5])], schema="between array") + result = df.select( + check_linear_range_length(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "2 elements" in result[0]["err"] + + def test_wrong_length_three(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.0, 0.5, 1.0])], schema="between array" + ) + result = df.select( + check_linear_range_length(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "2 elements" in result[0]["err"] + + def test_empty_array(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(between=[])], schema="between array") + result = df.select( + check_linear_range_length(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "2 elements" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(between=None)], schema="between array") + result = df.select( + check_linear_range_length(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckLinearRangeBounds: + def test_valid_bounds(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.2, 0.8])], schema="between array" + ) + result = df.select( + check_linear_range_bounds(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_value_below_zero(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[-0.1, 0.5])], schema="between array" + ) + result = df.select( + check_linear_range_bounds(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "[0.0, 1.0]" in result[0]["err"] + + def test_value_above_one(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.0, 1.1])], schema="between array" + ) + result = df.select( + check_linear_range_bounds(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "[0.0, 1.0]" in result[0]["err"] + + def test_wrong_length_passthrough(self, spark: SparkSession) -> None: + """Wrong-length arrays are not this function's concern.""" + df = spark.createDataFrame([Row(between=[0.5])], schema="between array") + result = df.select( + check_linear_range_bounds(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(between=None)], schema="between array") + result = df.select( + check_linear_range_bounds(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckLinearRangeOrder: + def test_valid_order(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.2, 0.8])], schema="between array" + ) + result = df.select( + check_linear_range_order(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_start_equals_end(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.5, 0.5])], schema="between array" + ) + result = df.select( + check_linear_range_order(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "start must be < end" in result[0]["err"] + + def test_start_after_end(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.8, 0.2])], schema="between array" + ) + result = df.select( + check_linear_range_order(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "start must be < end" in result[0]["err"] + + def test_wrong_length_passthrough(self, spark: SparkSession) -> None: + """Wrong-length arrays are not this function's concern.""" + df = spark.createDataFrame([Row(between=[0.5])], schema="between array") + result = df.select( + check_linear_range_order(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(between=None)], schema="between array") + result = df.select( + check_linear_range_order(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + +def test_check_required_null_is_error(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_required(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + assert "missing" in result[0]["err"] + + +def test_check_required_non_null_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="hello")]) + result = df.select(check_required(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_required_composes_with_enum(spark: SparkSession) -> None: + """check_required + check_enum via F.coalesce catches both null and invalid.""" + df = spark.createDataFrame([Row(val=None)], schema="val string") + expr = F.coalesce( + check_required(F.col("val")), + check_enum(F.col("val"), ["a", "b"]), + ) + result = df.select(expr.alias("err")).collect() + assert result[0]["err"] is not None + assert "missing" in result[0]["err"] + + +_COUNTRY_CODE_PATTERN = r"^[A-Z]{2}\z" +_COUNTRY_CODE_LABEL = "ISO 3166-1 alpha-2 country code" + + +class TestCheckCountryCodeViaPattern: + """Country code validation through check_pattern with label.""" + + def test_valid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("US",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _COUNTRY_CODE_PATTERN, label=_COUNTRY_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_lowercase_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("us",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _COUNTRY_CODE_PATTERN, label=_COUNTRY_CODE_LABEL + ).alias("e") + ) + err = result.collect()[0]["e"] + assert f"invalid {_COUNTRY_CODE_LABEL}" in err + assert "us" in err + + def test_three_chars_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("USA",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _COUNTRY_CODE_PATTERN, label=_COUNTRY_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is not None + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern( + F.col("v"), _COUNTRY_CODE_PATTERN, label=_COUNTRY_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + +_REGION_CODE_PATTERN = r"^[A-Z]{2}-[A-Z0-9]{1,3}\z" +_REGION_CODE_LABEL = "ISO 3166-2 subdivision code" + + +class TestCheckRegionCodeViaPattern: + """Region code validation through check_pattern with label.""" + + def test_valid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("US-NY",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _REGION_CODE_PATTERN, label=_REGION_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_valid_numeric(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("CN-11",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _REGION_CODE_PATTERN, label=_REGION_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_no_dash_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("USNY",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _REGION_CODE_PATTERN, label=_REGION_CODE_LABEL + ).alias("e") + ) + err = result.collect()[0]["e"] + assert f"invalid {_REGION_CODE_LABEL}" in err + assert "USNY" in err + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern( + F.col("v"), _REGION_CODE_PATTERN, label=_REGION_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + +_SNAKE_CASE_PATTERN = r"^[a-z0-9]+(_[a-z0-9]+)*\z" +_SNAKE_CASE_LABEL = "Category in snake_case format" + + +class TestCheckSnakeCaseViaPattern: + """Snake_case validation through check_pattern with label.""" + + def test_valid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("hello_world",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_single_word(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("hello",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_with_numbers(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("hello_123",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_uppercase_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("Hello_World",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + err = result.collect()[0]["e"] + assert f"invalid {_SNAKE_CASE_LABEL}" in err + + def test_spaces_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("hello world",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is not None + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + +def test_check_url_format_http_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="http://example.com")]) + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_url_format_https_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="https://example.com/path?q=1")]) + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_url_format_no_scheme_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="example.com")]) + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_url_format_ftp_scheme_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="ftp://example.com")]) + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_url_format_null_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_url_length_exceeds_2083_chars_invalid(spark: SparkSession) -> None: + long_url = "https://example.com/" + "a" * 2064 # 2084 chars + df = spark.createDataFrame([Row(val=long_url)]) + result = df.select(check_url_length(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_url_length_exactly_2083_chars_valid(spark: SparkSession) -> None: + url = "https://example.com/" + "a" * 2063 # 2083 chars + df = spark.createDataFrame([Row(val=url)]) + result = df.select(check_url_length(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_url_length_null_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_url_length(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_email_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_email_no_at_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="userexample.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_no_domain_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_spaces_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user @example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_null_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_email_trailing_period_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@example.com.")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_leading_period_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=".user@example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_period_before_at_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user.@example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_period_after_at_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@.example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_double_period_domain_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@example..com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_dotted_local_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user.name@example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_email_subdomain_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@mail.example.co.uk")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +_PHONE_PATTERN = r"^\+\d{1,3}[\s\-\(\)0-9]+\z" +_PHONE_LABEL = "International phone number (+ followed by country code and number)" + + +class TestCheckPhoneViaPattern: + """Phone number validation through check_pattern with label.""" + + def test_valid_us(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("+1 555-555-5555",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _PHONE_PATTERN, label=_PHONE_LABEL).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_valid_international(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("+44 20 7946 0958",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _PHONE_PATTERN, label=_PHONE_LABEL).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_no_plus_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("555-555-5555",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _PHONE_PATTERN, label=_PHONE_LABEL).alias("e") + ) + err = result.collect()[0]["e"] + assert f"invalid {_PHONE_LABEL}" in err + + def test_letters_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("+1 abc-defg",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _PHONE_PATTERN, label=_PHONE_LABEL).alias("e") + ) + assert result.collect()[0]["e"] is not None + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern(F.col("v"), _PHONE_PATTERN, label=_PHONE_LABEL).alias("e") + ) + assert result.collect()[0]["e"] is None + + +_WIKIDATA_PATTERN = r"^Q\d+\z" +_WIKIDATA_LABEL = "Wikidata identifier (Q followed by digits)" + + +class TestCheckWikidataIdViaPattern: + """Wikidata ID validation through check_pattern with label.""" + + def test_valid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("Q42",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + assert result.collect()[0]["e"] is None + + def test_large_number(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("Q123456789",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + assert result.collect()[0]["e"] is None + + def test_lowercase_q_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("q42",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + err = result.collect()[0]["e"] + assert f"invalid {_WIKIDATA_LABEL}" in err + + def test_no_digits_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("Q",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + assert result.collect()[0]["e"] is not None + + def test_p_prefix_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("P42",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + assert result.collect()[0]["e"] is not None + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + assert result.collect()[0]["e"] is None + + +class TestCheckMinFieldsSet: + def test_meets_threshold(self, spark: SparkSession) -> None: + """Count at threshold -> no error.""" + df = spark.createDataFrame( + [Row(a=1, b=2, c=None)], schema="a int, b int, c int" + ) + result = df.select( + check_min_fields_set( + [F.col("a"), F.col("b"), F.col("c")], + ["a", "b", "c"], + 2, + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_exceeds_threshold(self, spark: SparkSession) -> None: + """Count above threshold -> no error.""" + df = spark.createDataFrame([Row(a=1, b=2, c=3)], schema="a int, b int, c int") + result = df.select( + check_min_fields_set( + [F.col("a"), F.col("b"), F.col("c")], + ["a", "b", "c"], + 2, + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_below_threshold(self, spark: SparkSession) -> None: + """Count below threshold -> error with field names and actual count.""" + df = spark.createDataFrame( + [Row(a=1, b=None, c=None)], schema="a int, b int, c int" + ) + result = df.select( + check_min_fields_set( + [F.col("a"), F.col("b"), F.col("c")], + ["a", "b", "c"], + 2, + ).alias("err") + ).collect() + err = result[0]["err"] + assert err is not None + assert "at least 2" in err + assert "a, b, c" in err + assert "1" in err + + def test_all_null_below_threshold(self, spark: SparkSession) -> None: + """All null -> error showing 0 non-null.""" + df = spark.createDataFrame([Row(a=None, b=None)], schema="a int, b int") + result = df.select( + check_min_fields_set( + [F.col("a"), F.col("b")], + ["a", "b"], + 1, + ).alias("err") + ).collect() + err = result[0]["err"] + assert err is not None + assert "0" in err + + def test_error_message_format(self, spark: SparkSession) -> None: + """Error message matches expected format exactly.""" + df = spark.createDataFrame([Row(x=None, y=None)], schema="x int, y int") + result = df.select( + check_min_fields_set( + [F.col("x"), F.col("y")], + ["x", "y"], + 1, + ).alias("err") + ).collect() + err = result[0]["err"] + assert err == "at least 1 of x, y required, got 0 non-null" + + +_BBOX_SCHEMA = StructType( + [ + StructField( + "bbox", + StructType( + [ + StructField("xmin", DoubleType(), True), + StructField("xmax", DoubleType(), True), + StructField("ymin", DoubleType(), True), + StructField("ymax", DoubleType(), True), + ] + ), + True, + ), + ] +) + + +def test_check_bbox_completeness_valid(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=0.0, ymax=1.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_completeness(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_completeness_null_bbox_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(bbox=None)], schema=_BBOX_SCHEMA) + result = df.select(check_bbox_completeness(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_completeness_null_subfield_fails(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=None, xmax=1.0, ymin=0.0, ymax=1.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_completeness(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_bbox_lat_ordering_valid(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=-10.0, ymax=10.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_ordering(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_lat_ordering_equal_valid(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=5.0, ymax=5.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_ordering(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_lat_ordering_inverted_fails(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=10.0, ymax=-10.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_ordering(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_bbox_lat_ordering_null_bbox_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(bbox=None)], schema=_BBOX_SCHEMA) + result = df.select(check_bbox_lat_ordering(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_lat_range_valid(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=-90.0, ymax=90.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_range(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_lat_range_ymin_below_fails(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=-91.0, ymax=1.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_range(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_bbox_lat_range_ymax_above_fails(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=0.0, ymax=91.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_range(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_bbox_lat_range_null_bbox_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(bbox=None)], schema=_BBOX_SCHEMA) + result = df.select(check_bbox_lat_range(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None diff --git a/packages/overture-schema-pyspark/tests/expressions/test_schema_check.py b/packages/overture-schema-pyspark/tests/expressions/test_schema_check.py new file mode 100644 index 000000000..937b8862d --- /dev/null +++ b/packages/overture-schema-pyspark/tests/expressions/test_schema_check.py @@ -0,0 +1,268 @@ +"""Tests for schema comparison.""" + +from overture.schema.pyspark.schema_check import ( + SchemaMismatch, + compare_schemas, +) +from pyspark.sql.types import ( + ArrayType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + + +class TestIdenticalSchemas: + def test_empty_schemas(self) -> None: + assert compare_schemas(StructType(), StructType()) == [] + + def test_flat_schema(self) -> None: + schema = StructType( + [ + StructField("id", StringType(), True), + StructField("version", IntegerType(), True), + ] + ) + assert compare_schemas(schema, schema) == [] + + def test_nested_struct(self) -> None: + schema = StructType( + [ + StructField( + "bbox", + StructType( + [ + StructField("xmin", DoubleType(), True), + ] + ), + True, + ), + ] + ) + assert compare_schemas(schema, schema) == [] + + def test_array_of_structs(self) -> None: + schema = StructType( + [ + StructField( + "items", + ArrayType( + StructType( + [ + StructField("name", StringType(), True), + ] + ) + ), + True, + ), + ] + ) + assert compare_schemas(schema, schema) == [] + + +class TestMissingFields: + def test_missing_in_actual(self) -> None: + actual = StructType([StructField("id", StringType(), True)]) + expected = StructType( + [ + StructField("id", StringType(), True), + StructField("version", IntegerType(), True), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("version", "missing", "IntegerType")] + + def test_extra_in_actual(self) -> None: + actual = StructType( + [ + StructField("id", StringType(), True), + StructField("extra", StringType(), True), + ] + ) + expected = StructType([StructField("id", StringType(), True)]) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("extra", "StringType", "missing")] + + +class TestTypeMismatches: + def test_top_level_type_mismatch(self) -> None: + actual = StructType([StructField("version", StringType(), True)]) + expected = StructType([StructField("version", IntegerType(), True)]) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("version", "StringType", "IntegerType")] + + def test_nested_struct_mismatch(self) -> None: + actual = StructType( + [ + StructField( + "bbox", + StructType( + [ + StructField("xmin", IntegerType(), True), + ] + ), + True, + ), + ] + ) + expected = StructType( + [ + StructField( + "bbox", + StructType( + [ + StructField("xmin", DoubleType(), True), + ] + ), + True, + ), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("bbox.xmin", "IntegerType", "DoubleType")] + + def test_array_element_type_mismatch(self) -> None: + actual = StructType( + [ + StructField("tags", ArrayType(IntegerType()), True), + ] + ) + expected = StructType( + [ + StructField("tags", ArrayType(StringType()), True), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("tags[]", "IntegerType", "StringType")] + + def test_array_struct_field_mismatch(self) -> None: + actual = StructType( + [ + StructField( + "items", + ArrayType( + StructType( + [ + StructField("name", IntegerType(), True), + ] + ) + ), + True, + ), + ] + ) + expected = StructType( + [ + StructField( + "items", + ArrayType( + StructType( + [ + StructField("name", StringType(), True), + ] + ) + ), + True, + ), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("items[].name", "IntegerType", "StringType")] + + def test_map_key_type_mismatch(self) -> None: + actual = StructType( + [ + StructField("tags", MapType(IntegerType(), StringType()), True), + ] + ) + expected = StructType( + [ + StructField("tags", MapType(StringType(), StringType()), True), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("tags{key}", "IntegerType", "StringType")] + + def test_map_value_type_mismatch(self) -> None: + actual = StructType( + [ + StructField("tags", MapType(StringType(), IntegerType()), True), + ] + ) + expected = StructType( + [ + StructField("tags", MapType(StringType(), StringType()), True), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("tags{value}", "IntegerType", "StringType")] + + +class TestFieldOrdering: + def test_different_order_is_ok(self) -> None: + actual = StructType( + [ + StructField("b", StringType(), True), + StructField("a", IntegerType(), True), + ] + ) + expected = StructType( + [ + StructField("a", IntegerType(), True), + StructField("b", StringType(), True), + ] + ) + assert compare_schemas(actual, expected) == [] + + +class TestMultipleMismatches: + def test_missing_and_extra_and_wrong_type(self) -> None: + actual = StructType( + [ + StructField("id", IntegerType(), True), + StructField("extra", StringType(), True), + ] + ) + expected = StructType( + [ + StructField("id", StringType(), True), + StructField("version", IntegerType(), True), + ] + ) + result = compare_schemas(actual, expected) + assert SchemaMismatch("id", "IntegerType", "StringType") in result + assert SchemaMismatch("extra", "StringType", "missing") in result + assert SchemaMismatch("version", "missing", "IntegerType") in result + + +class TestKindMismatch: + def test_struct_vs_primitive(self) -> None: + actual = StructType([StructField("x", StringType(), True)]) + expected = StructType( + [ + StructField( + "x", + StructType( + [ + StructField("y", StringType(), True), + ] + ), + True, + ), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("x", "StringType", "StructType")] + + def test_array_vs_primitive(self) -> None: + actual = StructType([StructField("x", StringType(), True)]) + expected = StructType( + [ + StructField("x", ArrayType(StringType()), True), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("x", "StringType", "ArrayType")] diff --git a/packages/overture-schema-pyspark/tests/test_check.py b/packages/overture-schema-pyspark/tests/test_check.py new file mode 100644 index 000000000..681add76b --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_check.py @@ -0,0 +1,20 @@ +"""Tests for Check dataclass and CheckShape enum.""" + +import dataclasses + +import pytest +from overture.schema.pyspark.check import Check, CheckShape +from pyspark.sql import SparkSession +from pyspark.sql import functions as F + + +def test_check_is_frozen(spark: SparkSession) -> None: + check = Check( + field="subtype", + name="required", + expr=F.lit("error"), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + with pytest.raises(dataclasses.FrozenInstanceError): + check.field = "other" # type: ignore[misc] diff --git a/packages/overture-schema-pyspark/tests/test_cli.py b/packages/overture-schema-pyspark/tests/test_cli.py new file mode 100644 index 000000000..037d6aaeb --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_cli.py @@ -0,0 +1,475 @@ +"""Tests for CLI entry points.""" + +from collections.abc import Iterator +from pathlib import Path + +import pytest +from click.testing import CliRunner +from overture.schema.pyspark._registry import REGISTRY +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.cli import ( + ReadSpec, + _spark_config, + read_feature, + resolve_read, + validate_cli, +) +from pyspark.sql import Row, SparkSession +from pyspark.sql import functions as F +from pyspark.sql.types import StringType, StructField, StructType + +_TEST_TYPE = "_test_cli" + +# Tests that branch on registered geometry types require the runtime registry +# to be populated (i.e. generated expression modules present). +_requires_generated = pytest.mark.skipif( + not REGISTRY, reason="requires generated expression modules" +) + + +class TestSparkConfig: + """Tests for S3A auto-configuration.""" + + @_requires_generated + def test_large_geometry_disables_vectorized_reader(self) -> None: + config = _spark_config( + "samples/segment.parquet", (), "overture.schema.transportation:Segment" + ) + assert config["spark.sql.parquet.enableVectorizedReader"] == "false" + + @_requires_generated + def test_point_geometry_keeps_vectorized_reader(self) -> None: + config = _spark_config( + "samples/place.parquet", (), "overture.schema.places:Place" + ) + assert "spark.sql.parquet.enableVectorizedReader" not in config + + def test_unspecified_geometry_disables_vectorized_reader(self) -> None: + # _TEST_TYPE registers no geometry_types -- safe default disables the reader + config = _spark_config("samples/test.parquet", (), _TEST_TYPE) + assert config["spark.sql.parquet.enableVectorizedReader"] == "false" + + def test_s3a_path_applies_defaults(self) -> None: + config = _spark_config("s3a://bucket/path", (), _TEST_TYPE) + assert "org.apache.hadoop:hadoop-aws" in config["spark.jars.packages"] + assert "S3AFileSystem" in config["spark.hadoop.fs.s3a.impl"] + assert ( + "AnonymousAWSCredentialsProvider" + in config["spark.hadoop.fs.s3a.aws.credentials.provider"] + ) + + def test_user_conf_overrides_s3a_defaults(self) -> None: + config = _spark_config( + "s3a://bucket/path", + ( + "spark.hadoop.fs.s3a.aws.credentials.provider=" + "software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider", + ), + _TEST_TYPE, + ) + assert ( + "ProfileCredentialsProvider" + in config["spark.hadoop.fs.s3a.aws.credentials.provider"] + ) + + def test_user_conf_merges_with_s3a_defaults(self) -> None: + config = _spark_config( + "s3a://bucket/path", ("spark.master=local[4]",), _TEST_TYPE + ) + assert config["spark.master"] == "local[4]" + assert "spark.jars.packages" in config + + def test_local_path_passes_user_conf(self) -> None: + config = _spark_config( + "samples/test.parquet", ("spark.master=local[4]",), _TEST_TYPE + ) + assert config["spark.master"] == "local[4]" + assert config["spark.sql.parquet.enableVectorizedReader"] == "false" + + +def _test_checks() -> list[Check]: + """Minimal checks for CLI testing: value must be 'good'.""" + return [ + Check( + field="value", + name="enum", + expr=F.when(F.col("value") != "good", F.lit("not good")), + shape=CheckShape.SCALAR, + root_field="value", + ), + ] + + +@pytest.fixture(autouse=True) +def _register_test_checks() -> Iterator[None]: + REGISTRY[_TEST_TYPE] = FeatureValidation( + schema=StructType( + [ + StructField("id", StringType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("value", StringType(), True), + ] + ), + checks=_test_checks, + ) + yield + del REGISTRY[_TEST_TYPE] + + +def test_validate_missing_args() -> None: + runner = CliRunner() + result = runner.invoke(validate_cli, []) + assert result.exit_code != 0 + + +def test_validate_unknown_type() -> None: + runner = CliRunner() + result = runner.invoke(validate_cli, ["nonexistent", "/dev/null"]) + assert result.exit_code != 0 + assert "nonexistent" in result.output + + +def test_validate_clean_data(spark: SparkSession, tmp_path: Path) -> None: + """Valid data exits 0, no output file written.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="good")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path, "-o", output_path]) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output + assert not Path(output_path).exists() + + +def test_validate_error_count(spark: SparkSession, tmp_path: Path) -> None: + """Rows with errors are counted in summary.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="bad")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path, "-o", output_path]) + assert result.exit_code != 0 + assert "1 / 1 rows with errors" in result.output + + +def test_validate_shows_error_rows(spark: SparkSession, tmp_path: Path) -> None: + """Error rows are displayed with violation columns.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + spark.createDataFrame( + [Row(id="row1", theme="test", type="test_cli", value="bad")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path, "-o", output_path]) + assert result.exit_code != 0 + assert "row1" in result.output + assert "value" in result.output + + +def test_validate_head_zero(spark: SparkSession, tmp_path: Path) -> None: + """--head 0 suppresses the error row table.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + spark.createDataFrame( + [Row(id="row1", theme="test", type="test_cli", value="bad")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke( + validate_cli, [_TEST_TYPE, input_path, "-o", output_path, "--head", "0"] + ) + assert result.exit_code != 0 + assert "1 / 1 rows with errors" in result.output + assert "row1" not in result.output + + +def test_validate_schema_mismatch_exits(spark: SparkSession, tmp_path: Path) -> None: + """Schema mismatch prints diff and exits before validation.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + # Write data with wrong schema (IntegerType where StringType expected) + spark.createDataFrame( + [Row(id="r1", value=42)], schema="id string, value int" + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path, "-o", output_path]) + assert result.exit_code != 0 + assert "Schema mismatch" in result.output + assert "value" in result.output + + +def test_validate_skip_schema_check(spark: SparkSession, tmp_path: Path) -> None: + """--skip-schema-check warns on mismatches but continues validation.""" + input_path = str(tmp_path / "input.parquet") + + # Extra column causes a mismatch but doesn't break check evaluation + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="good", extra="x")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke( + validate_cli, [_TEST_TYPE, input_path, "--skip-schema-check"] + ) + assert "Schema mismatch" in result.output + assert "rows with errors" in result.output + + +def test_validate_skip_columns(spark: SparkSession, tmp_path: Path) -> None: + """--skip-columns skips checks for absent columns.""" + input_path = str(tmp_path / "input.parquet") + + # Data missing 'value' column — declare it absent via --skip-columns + spark.createDataFrame([Row(id="r1", theme="test", type="test_cli")]).write.parquet( + input_path + ) + + runner = CliRunner() + result = runner.invoke( + validate_cli, + [_TEST_TYPE, input_path, "--skip-columns", "value", "--skip-schema-check"], + ) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output + + +def test_validate_ignore_extra_columns(spark: SparkSession, tmp_path: Path) -> None: + """--ignore-extra-columns suppresses 'expected missing' schema mismatches.""" + input_path = str(tmp_path / "input.parquet") + + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="good", extra="x")] + ).write.parquet(input_path) + + runner = CliRunner() + # Without the flag, schema mismatch exits + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path]) + assert result.exit_code != 0 + assert "Schema mismatch" in result.output + + # With the flag, extra column is tolerated + result = runner.invoke( + validate_cli, [_TEST_TYPE, input_path, "--ignore-extra-columns", "extra"] + ) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output + + +def test_validate_suppress_field(spark: SparkSession, tmp_path: Path) -> None: + """--suppress FIELD removes all checks on that field.""" + input_path = str(tmp_path / "input.parquet") + + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="bad")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke( + validate_cli, [_TEST_TYPE, input_path, "--suppress", "value"] + ) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output + + +def test_validate_suppress_field_check(spark: SparkSession, tmp_path: Path) -> None: + """--suppress FIELD:CHECK removes a specific check.""" + input_path = str(tmp_path / "input.parquet") + + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="bad")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke( + validate_cli, [_TEST_TYPE, input_path, "--suppress", "value:enum"] + ) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output + + +def test_validate_output_contains_explained_violations( + spark: SparkSession, tmp_path: Path +) -> None: + """Output Parquet contains explain() violations with field/check/message.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + spark.createDataFrame( + [ + Row(id="r1", theme="test", type="test_cli", value="good"), + Row(id="r2", theme="test", type="test_cli", value="bad"), + ] + ).write.parquet(input_path) + + runner = CliRunner() + runner.invoke(validate_cli, [_TEST_TYPE, input_path, "-o", output_path]) + + result_df = spark.read.parquet(output_path) + assert {"field", "check", "message"} <= set(result_df.columns) + assert result_df.count() == 1 # one violation from r2 + + +_BATHYMETRY_PARTITIONS = {"theme": "base", "type": "bathymetry"} +_SEGMENT_PARTITIONS = {"theme": "transportation", "type": "segment"} + + +class TestResolveRead: + """Pure-function tests for path resolution logic.""" + + def test_release_root(self) -> None: + spec = resolve_read("/data/release/2026-02-18.0/", _BATHYMETRY_PARTITIONS) + assert spec == ReadSpec( + data_path="/data/release/2026-02-18.0/theme=base/type=bathymetry", + base_path="/data/release/2026-02-18.0", + ) + + def test_release_root_no_trailing_slash(self) -> None: + spec = resolve_read("/data/release/2026-02-18.0", _BATHYMETRY_PARTITIONS) + assert spec == ReadSpec( + data_path="/data/release/2026-02-18.0/theme=base/type=bathymetry", + base_path="/data/release/2026-02-18.0", + ) + + def test_leaf_partition(self) -> None: + spec = resolve_read( + "/data/release/2026-02-18.0/theme=base/type=bathymetry/", + _BATHYMETRY_PARTITIONS, + ) + assert spec == ReadSpec( + data_path="/data/release/2026-02-18.0/theme=base/type=bathymetry/", + base_path="/data/release/2026-02-18.0", + ) + + def test_theme_partition_without_type(self) -> None: + spec = resolve_read( + "/data/release/2026-02-18.0/theme=base/", _BATHYMETRY_PARTITIONS + ) + assert spec == ReadSpec( + data_path="/data/release/2026-02-18.0/theme=base/", + base_path="/data/release/2026-02-18.0", + ) + + def test_individual_file(self) -> None: + spec = resolve_read("/tmp/bathymetry.parquet", _BATHYMETRY_PARTITIONS) + assert spec == ReadSpec(data_path="/tmp/bathymetry.parquet") + + def test_individual_file_no_partitions(self) -> None: + spec = resolve_read("/tmp/data.parquet", None) + assert spec == ReadSpec(data_path="/tmp/data.parquet") + + def test_plain_directory_no_partitions(self) -> None: + spec = resolve_read("/tmp/data/", None) + assert spec == ReadSpec(data_path="/tmp/data/") + + def test_s3a_release_root(self) -> None: + spec = resolve_read("s3a://bucket/release/2026-02-18.0/", _SEGMENT_PARTITIONS) + assert spec == ReadSpec( + data_path="s3a://bucket/release/2026-02-18.0/theme=transportation/type=segment", + base_path="s3a://bucket/release/2026-02-18.0", + ) + + def test_s3a_leaf_partition(self) -> None: + spec = resolve_read( + "s3a://bucket/release/2026-02-18.0/theme=transportation/type=segment/", + _SEGMENT_PARTITIONS, + ) + assert spec == ReadSpec( + data_path="s3a://bucket/release/2026-02-18.0/theme=transportation/type=segment/", + base_path="s3a://bucket/release/2026-02-18.0", + ) + + +def _write_partitioned(spark: SparkSession, base_dir: Path, rows: list[Row]) -> None: + """Write test rows as Hive-partitioned Parquet under *base_dir*.""" + spark.createDataFrame(rows).write.partitionBy("theme", "type").parquet( + str(base_dir) + ) + + +class TestReadFeature: + """Integration tests: resolve_read + read_feature against local Parquet.""" + + def test_read_from_release_root(self, spark: SparkSession, tmp_path: Path) -> None: + base = tmp_path / "release" + _write_partitioned( + spark, + base, + [Row(id="r1", value="good", theme="test", type=_TEST_TYPE)], + ) + spec = resolve_read(str(base), {"theme": "test", "type": _TEST_TYPE}) + df = read_feature(spark, spec) + assert df.count() == 1 + assert set(df.columns) >= {"id", "theme", "type", "value"} + + def test_read_from_leaf_partition( + self, spark: SparkSession, tmp_path: Path + ) -> None: + base = tmp_path / "release" + _write_partitioned( + spark, + base, + [Row(id="r1", value="good", theme="test", type=_TEST_TYPE)], + ) + leaf = str(base / f"theme=test/type={_TEST_TYPE}") + spec = resolve_read(leaf, {"theme": "test", "type": _TEST_TYPE}) + df = read_feature(spark, spec) + assert df.count() == 1 + assert set(df.columns) >= {"id", "theme", "type", "value"} + + def test_read_from_individual_file( + self, spark: SparkSession, tmp_path: Path + ) -> None: + file_path = str(tmp_path / "data.parquet") + spark.createDataFrame( + [Row(id="r1", theme="test", type=_TEST_TYPE, value="good")] + ).write.parquet(file_path) + spec = resolve_read(file_path, {"theme": "test", "type": _TEST_TYPE}) + df = read_feature(spark, spec) + assert df.count() == 1 + assert set(df.columns) >= {"id", "theme", "type", "value"} + + def test_release_root_filters_to_type( + self, spark: SparkSession, tmp_path: Path + ) -> None: + """Only the target type's rows are returned from a multi-type release.""" + base = tmp_path / "release" + _write_partitioned( + spark, + base, + [ + Row(id="r1", value="good", theme="test", type=_TEST_TYPE), + Row(id="r2", value="good", theme="test", type="other"), + ], + ) + spec = resolve_read(str(base), {"theme": "test", "type": _TEST_TYPE}) + df = read_feature(spark, spec) + assert df.count() == 1 + assert df.collect()[0]["id"] == "r1" + + +def test_validate_from_partitioned_release(spark: SparkSession, tmp_path: Path) -> None: + """Full CLI round-trip reading from a Hive-partitioned release root.""" + base = tmp_path / "release" + _write_partitioned( + spark, + base, + [Row(id="r1", value="good", theme="test", type=_TEST_TYPE)], + ) + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, str(base)]) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output diff --git a/packages/overture-schema-pyspark/tests/test_harness.py b/packages/overture-schema-pyspark/tests/test_harness.py new file mode 100644 index 000000000..188bdc3ac --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_harness.py @@ -0,0 +1,361 @@ +"""Tests for the conformance test harness.""" + +from __future__ import annotations + +import re + +import pytest +from overture.schema.pyspark.check import Check, CheckShape +from pyspark.sql import Row, SparkSession +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + IntegerType, + StringType, + StructField, + StructType, +) + +from ._support.harness import ( + assert_schema_covers_checks, + build_scenario_map, + build_scenario_rows, + index_violations, + sanitize_row, + scenario_uuid, +) +from ._support.helpers import PathTraversalError, set_at_path +from ._support.scenarios import Scenario + + +class TestScenarioUuid: + def test_deterministic(self) -> None: + """Same ID produces same UUID.""" + assert scenario_uuid("building::id:required") == scenario_uuid( + "building::id:required" + ) + + def test_different_ids_different_uuids(self) -> None: + assert scenario_uuid("a::b:c") != scenario_uuid("d::e:f") + + def test_valid_uuid_format(self) -> None: + uuid_str = scenario_uuid("test::x:y") + assert re.match( + r"^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + uuid_str, + ) + + +class TestBuildScenarioMap: + def test_scenarios_get_valid_and_invalid_entries(self) -> None: + scenarios = [ + Scenario( + id="f::x:required", + scaffold={}, + mutate=set_at_path("x", None), + expected_field="x", + expected_check="required", + ), + ] + scenario_map = build_scenario_map(scenarios, feature_name="f") + assert scenario_uuid("f::x:required::valid") in scenario_map + assert ( + scenario_map[scenario_uuid("f::x:required::valid")] + == "f::x:required::valid" + ) + assert scenario_uuid("f::x:required::invalid") in scenario_map + assert ( + scenario_map[scenario_uuid("f::x:required::invalid")] + == "f::x:required::invalid" + ) + + def test_baseline_plus_two_entries_per_scenario(self) -> None: + scenarios = [ + Scenario( + id="f::x:check", + scaffold={}, + mutate=set_at_path("x", 0), + expected_field="x", + expected_check="check", + ), + ] + scenario_map = build_scenario_map(scenarios, feature_name="f") + # baseline + (::valid, ::invalid) for the one scenario + assert len(scenario_map) == 3 + + def test_duplicate_id_values_raises(self) -> None: + scenarios = [ + Scenario( + id="f::x:required", + scaffold={}, + mutate=set_at_path("x", None), + expected_field="x", + expected_check="required", + ), + Scenario( + id="f::x:required", + scaffold={}, + mutate=set_at_path("x", None), + expected_field="x", + expected_check="required", + ), + ] + with pytest.raises(ValueError, match="Duplicate"): + build_scenario_map(scenarios, feature_name="f") + + +class TestBuildScenarioRows: + def test_baseline_row_included(self) -> None: + base = {"id": "original-uuid", "theme": "buildings", "type": "building", "x": 1} + rows, scenario_map, skipped = build_scenario_rows( + base, [], feature_name="building" + ) + assert len(rows) == 1 + assert rows[0]["theme"] == "buildings" + assert "_scenario_id" in rows[0] + + def test_path_traversal_error_skips(self) -> None: + """Mutation functions that raise PathTraversalError produce skips.""" + base = {"theme": "t", "type": "ty"} + + def bad_mutation(row: dict) -> dict: + raise PathTraversalError("cannot traverse") + + scenarios = [ + Scenario( + id="f::x:check", + scaffold={}, + mutate=bad_mutation, + expected_field="x", + expected_check="check", + ), + ] + rows, scenario_map, skipped = build_scenario_rows( + base, scenarios, feature_name="f" + ) + assert len(rows) == 1 + assert "f::x:check" in skipped + + def test_scenario_creates_valid_and_invalid_rows(self) -> None: + """Each Scenario produces both a valid and an invalid row.""" + base = {"id": "orig", "theme": "t", "type": "ty", "x": 1} + scenarios = [ + Scenario( + id="f::x:required", + scaffold={}, + mutate=set_at_path("x", None), + expected_field="x", + expected_check="required", + ), + ] + rows, scenario_map, skipped = build_scenario_rows( + base, scenarios, feature_name="f" + ) + # baseline + valid + invalid + assert len(rows) == 3 + assert rows[1]["x"] == 1 # valid row is a copy of base_row + assert rows[2]["x"] is None + assert rows[1]["_scenario_id"] == scenario_uuid("f::x:required::valid") + assert rows[2]["_scenario_id"] == scenario_uuid("f::x:required::invalid") + + def test_valid_row_uses_base_row_not_scaffold(self) -> None: + """Valid row is a copy of base_row, not the scaffold-merged row.""" + base = {"id": "orig", "theme": "t", "type": "ty", "items": [{"a": 1, "b": 2}]} + scenarios = [ + Scenario( + id="f::items[].a:required", + scaffold={"items": [{"a": 0}]}, + mutate=set_at_path("items[].a", None), + expected_field="items[].a", + expected_check="required", + ), + ] + rows, scenario_map, skipped = build_scenario_rows( + base, scenarios, feature_name="f" + ) + assert len(rows) == 3 + # Valid row uses base_row (preserves all fields in items element) + assert rows[1]["items"] == [{"a": 1, "b": 2}] + # Invalid row uses scaffold-merged row + assert rows[2]["items"][0]["a"] is None + + def test_scaffold_merged_onto_invalid_row(self) -> None: + base_row = {"id": "x", "a": 1} + s = Scenario( + id="test::b:check", + scaffold={"b": 10}, + mutate=set_at_path("b", 0), + expected_field="b", + expected_check="check", + ) + rows, scenario_map, skipped = build_scenario_rows( + base_row, [s], feature_name="test" + ) + invalid_id = scenario_uuid("test::b:check::invalid") + invalid_row = next(r for r in rows if r["_scenario_id"] == invalid_id) + # base field preserved, scaffold provides b, path overrides b + assert invalid_row["a"] == 1 + assert invalid_row["b"] == 0 + + def test_applies_scaffold_then_mutation(self) -> None: + base_row = {"id": "x", "a": 1} + s = Scenario( + id="test::model:check", + scaffold={"b": 10}, + mutate=lambda row: {**row, "a": None}, + expected_field="a", + expected_check="required", + ) + rows, scenario_map, skipped = build_scenario_rows( + base_row, [s], feature_name="test" + ) + assert len(rows) == 3 + assert not skipped + invalid_id = scenario_uuid("test::model:check::invalid") + invalid_row = next(r for r in rows if r["_scenario_id"] == invalid_id) + # scaffold merged: b exists + assert invalid_row["b"] == 10 + # mutation applied: a is None + assert invalid_row["a"] is None + + +class TestSanitizeRow: + def test_nested_geometry_converted(self) -> None: + row = { + "id": "x", + "nested": {"geometry": "POINT (1 2)"}, + } + result = sanitize_row(row) + assert isinstance(result["nested"]["geometry"], bytes) + + def test_top_level_geometry_converted(self) -> None: + row = {"id": "x", "geometry": "POINT (1 2)"} + result = sanitize_row(row) + assert isinstance(result["geometry"], bytes) + + def test_non_wkt_string_at_geometry_key_unchanged(self) -> None: + row = {"id": "x", "geometry": "not-a-geometry"} + result = sanitize_row(row) + assert result["geometry"] == "not-a-geometry" + + def test_non_geometry_keys_unchanged(self) -> None: + row = {"id": "x", "name": "POINT (1 2)"} + result = sanitize_row(row) + assert result["name"] == "POINT (1 2)" + + +class TestSchemaAssertions: + def test_assert_schema_covers_checks_passes(self, spark: SparkSession) -> None: + schema = StructType( + [ + StructField("id", StringType()), + StructField("x", IntegerType()), + ] + ) + checks = [ + Check( + field="id", + name="required", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field="id", + ) + ] + assert_schema_covers_checks(schema, checks) # should not raise + + def test_assert_schema_covers_synthetic_field(self, spark: SparkSession) -> None: + schema = StructType([StructField("sources", ArrayType(StringType()))]) + checks = [ + Check( + field="sources_min_length", + name="min_length", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field="sources", + ) + ] + assert_schema_covers_checks(schema, checks) # should not raise + + def test_assert_schema_covers_checks_missing_field( + self, spark: SparkSession + ) -> None: + schema = StructType([StructField("id", StringType())]) + checks = [ + Check( + field="missing", + name="required", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field="missing", + ) + ] + with pytest.raises(AssertionError, match="missing"): + assert_schema_covers_checks(schema, checks) + + def test_assert_schema_covers_synthetic_model_check( + self, spark: SparkSession + ) -> None: + """root_field=None passes regardless of schema (radio_group, etc.).""" + schema = StructType([StructField("id", StringType())]) + checks = [ + Check( + field="radio_group", + name="radio_group", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field=None, + ) + ] + assert_schema_covers_checks(schema, checks) # should not raise + + +class TestIndexViolations: + def test_groups_by_scenario_id(self) -> None: + uuid_a = scenario_uuid("f::a:required") + uuid_b = scenario_uuid("f::b:enum") + scenario_map = {uuid_a: "f::a:required", uuid_b: "f::b:enum"} + violation_rows = [ + Row( + _scenario_id=uuid_a, + x=1, + field="a", + check="required", + message="missing", + ), + Row( + _scenario_id=uuid_b, + x=2, + field="b", + check="enum", + message="invalid", + ), + ] + result = index_violations(violation_rows, scenario_map) + assert result["f::a:required"] == {("a", "required")} + assert result["f::b:enum"] == {("b", "enum")} + + def test_multiple_violations_per_scenario(self) -> None: + uuid_a = scenario_uuid("f::a:r") + scenario_map = {uuid_a: "f::a:r"} + violation_rows = [ + Row( + _scenario_id=uuid_a, + x=1, + field="a", + check="required", + message="m1", + ), + Row( + _scenario_id=uuid_a, + x=1, + field="a", + check="bounds", + message="m2", + ), + ] + result = index_violations(violation_rows, scenario_map) + assert result["f::a:r"] == {("a", "required"), ("a", "bounds")} + + def test_empty_violations(self) -> None: + result = index_violations([], {}) + assert result == {} diff --git a/packages/overture-schema-pyspark/tests/test_helpers.py b/packages/overture-schema-pyspark/tests/test_helpers.py new file mode 100644 index 000000000..b202ce3d1 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_helpers.py @@ -0,0 +1,147 @@ +"""Tests for the conformance test helpers.""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from ._support.helpers import PathTraversalError, deep_merge, set_at_path + + +class TestSetAtPath: + def test_simple_field(self) -> None: + row = {"name": "Alice"} + result = set_at_path("name", "Bob")(row) + assert result["name"] == "Bob" + + def test_does_not_mutate_original(self) -> None: + row = {"name": "Alice"} + set_at_path("name", "Bob")(row) + assert row["name"] == "Alice" + + def test_nested_field(self) -> None: + row = {"outer": {"inner": "old"}} + result = set_at_path("outer.inner", "new")(row) + assert result["outer"]["inner"] == "new" + + def test_array_index_zero(self) -> None: + row = {"items": [{"value": 1}, {"value": 2}]} + result = set_at_path("items[].value", 99)(row) + assert result["items"][0]["value"] == 99 + assert result["items"][1]["value"] == 2 # untouched + + def test_set_to_none(self) -> None: + row = {"country": "US"} + result = set_at_path("country", None)(row) + assert result["country"] is None + + def test_nested_array(self) -> None: + row = {"rules": [{"tags": [{"v": "x"}]}]} + result = set_at_path("rules[].tags[].v", "y")(row) + assert result["rules"][0]["tags"][0]["v"] == "y" + + def test_deep_nested(self) -> None: + row = {"a": {"b": {"c": {"d": "old"}}}} + result = set_at_path("a.b.c.d", "new")(row) + assert result["a"]["b"]["c"]["d"] == "new" + + def test_returns_callable(self) -> None: + mutate = set_at_path("a.b", 1) + assert callable(mutate) + assert mutate({"a": {"b": 0}}) == {"a": {"b": 1}} + + +class TestSetAtPathTraversalErrors: + def test_raises_on_empty_array(self) -> None: + row: dict[str, Any] = {"items": []} + with pytest.raises(PathTraversalError): + set_at_path("items[].value", "x")(row) + + def test_raises_on_empty_nested_array(self) -> None: + row: dict[str, Any] = {"names": {"rules": []}} + with pytest.raises(PathTraversalError): + set_at_path("names.rules[].value", "x")(row) + + def test_error_message_empty_array_names_path(self) -> None: + row: dict[str, Any] = {"names": {"rules": []}} + with pytest.raises(PathTraversalError, match="rules"): + set_at_path("names.rules[].value", "x")(row) + + def test_raises_on_empty_path(self) -> None: + mutator = set_at_path("", "x") + with pytest.raises(PathTraversalError, match="Empty path"): + mutator({}) + + +class TestSetAtPathScaffolding: + def test_null_struct_intermediate_scaffolded(self) -> None: + row = {"id": "x", "names": None} + result = set_at_path("names.primary", "test")(row) + assert result["names"]["primary"] == "test" + + def test_null_array_intermediate_scaffolded(self) -> None: + row = {"id": "x", "rules": None} + result = set_at_path("rules[].value", "test")(row) + assert result["rules"][0]["value"] == "test" + + def test_null_nested_struct_in_array_scaffolded(self) -> None: + row = {"id": "x", "items": [{"nested": None}]} + result = set_at_path("items[].nested.field", "test")(row) + assert result["items"][0]["nested"]["field"] == "test" + + def test_deep_null_chain_scaffolded(self) -> None: + row = {"id": "x", "a": None} + result = set_at_path("a.b[].c", "test")(row) + assert result["a"]["b"][0]["c"] == "test" + + def test_chained_calls_preserve_prior_content(self) -> None: + """Chaining set_at_path preserves values set by prior calls.""" + row = {"items": None} + with_kind = set_at_path("items[].kind", "height")(row) + with_both = set_at_path("items[].value", 5.2)(with_kind) + assert with_both["items"][0]["kind"] == "height" + assert with_both["items"][0]["value"] == 5.2 + + def test_chained_calls_through_deep_null_path(self) -> None: + """Chained calls scaffold and preserve through deeply nested nulls.""" + row = {"outer": None} + with_disc = set_at_path("outer[].inner[].dimension", "height")(row) + with_value = set_at_path("outer[].inner[].value", None)(with_disc) + assert with_value["outer"][0]["inner"][0]["dimension"] == "height" + assert with_value["outer"][0]["inner"][0]["value"] is None + + +class TestDeepMerge: + def test_flat_merge(self) -> None: + base = {"a": 1, "b": 2} + scaffold = {"b": 3, "c": 4} + assert deep_merge(base, scaffold) == {"a": 1, "b": 3, "c": 4} + + def test_nested_dict_merge(self) -> None: + base = {"a": {"x": 1, "y": 2}} + scaffold = {"a": {"y": 3, "z": 4}} + assert deep_merge(base, scaffold) == {"a": {"x": 1, "y": 3, "z": 4}} + + def test_array_replace(self) -> None: + base = {"items": [{"a": 1}]} + scaffold = {"items": [{"b": 2}]} + assert deep_merge(base, scaffold) == {"items": [{"b": 2}]} + + def test_does_not_mutate_base(self) -> None: + base = {"a": {"x": 1}} + scaffold = {"a": {"y": 2}} + result = deep_merge(base, scaffold) + assert "y" not in base["a"] + assert result == {"a": {"x": 1, "y": 2}} + + def test_empty_scaffold(self) -> None: + base = {"a": 1} + assert deep_merge(base, {}) == {"a": 1} + + def test_scaffold_adds_new_key(self) -> None: + base = {"a": 1} + scaffold = {"speed_limits": [{"max_speed": {"value": 60}}]} + result = deep_merge(base, scaffold) + assert result["a"] == 1 + assert result["speed_limits"] == [{"max_speed": {"value": 60}}] diff --git a/packages/overture-schema-pyspark/tests/test_mutations.py b/packages/overture-schema-pyspark/tests/test_mutations.py new file mode 100644 index 000000000..f4e233dad --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_mutations.py @@ -0,0 +1,263 @@ +"""Tests for model-level mutation functions.""" + +import pytest + +from ._support.helpers import PathTraversalError +from ._support.mutations import ( + mutate_forbid_if, + mutate_min_fields_set, + mutate_radio_group, + mutate_require_any_of, + mutate_require_if, + mutate_unique_items, +) + + +class TestMutateRequireAnyOf: + def test_nulls_all_named_fields(self) -> None: + row = {"a": 1, "b": 2, "c": 3} + result = mutate_require_any_of(row, ["a", "b"]) + assert result["a"] is None + assert result["b"] is None + assert result["c"] == 3 + + def test_does_not_mutate_original(self) -> None: + row = {"a": 1, "b": 2} + mutate_require_any_of(row, ["a"]) + assert row["a"] == 1 + + +class TestMutateRadioGroup: + def test_sets_two_fields_to_true(self) -> None: + row = {"is_land": True, "is_territorial": False, "other": "x"} + result = mutate_radio_group(row, ["is_land", "is_territorial"]) + assert result["is_land"] is True + assert result["is_territorial"] is True + + def test_does_not_mutate_original(self) -> None: + row = {"a": False, "b": False} + mutate_radio_group(row, ["a", "b"]) + assert row["a"] is False + + +class TestMutateMinFieldsSet: + def test_nulls_all_named_fields(self) -> None: + row = {"a": 1, "b": 2, "c": 3} + result = mutate_min_fields_set(row, ["a", "b", "c"]) + assert result["a"] is None + assert result["b"] is None + assert result["c"] is None + + def test_leaves_unlisted_fields_alone(self) -> None: + row = {"a": 1, "b": 2, "other": "keep"} + result = mutate_min_fields_set(row, ["a", "b"]) + assert result["other"] == "keep" + + def test_does_not_mutate_original(self) -> None: + row = {"a": 1, "b": 2} + mutate_min_fields_set(row, ["a", "b"]) + assert row["a"] == 1 + + def test_with_array_path_nulls_inside_each_element(self) -> None: + row = {"items": [{"a": 1, "b": 2}, {"a": 3, "b": 4}]} + result = mutate_min_fields_set(row, ["a", "b"], array_path="items") + assert result["items"] == [{"a": None, "b": None}, {"a": None, "b": None}] + + +class TestMutateRequireIf: + def test_sets_condition_and_nulls_targets(self) -> None: + row = {"subtype": "other", "admin_level": 5} + result = mutate_require_if(row, ["admin_level"], "subtype", "country") + assert result["subtype"] == "country" + assert result["admin_level"] is None + + def test_does_not_mutate_original(self) -> None: + row = {"subtype": "other", "admin_level": 5} + mutate_require_if(row, ["admin_level"], "subtype", "country") + assert row["subtype"] == "other" + + +class TestMutateForbidIf: + def test_sets_condition_and_ensures_non_null(self) -> None: + row = {"subtype": "other", "admin_level": None} + result = mutate_forbid_if(row, ["admin_level"], "subtype", "country") + assert result["subtype"] == "country" + assert result["admin_level"] is not None + + def test_preserves_existing_non_null(self) -> None: + row = {"subtype": "other", "admin_level": 5} + result = mutate_forbid_if(row, ["admin_level"], "subtype", "country") + assert result["admin_level"] == 5 + + def test_uses_fill_value_for_array_field(self) -> None: + row = {"subtype": "other", "destinations": None} + result = mutate_forbid_if( + row, + ["destinations"], + "subtype", + "road", + fill_values={"destinations": [{}]}, + ) + assert result["destinations"] == [{}] + + def test_uses_fill_value_for_struct_field(self) -> None: + row = {"subtype": "other", "road_surface": None} + result = mutate_forbid_if( + row, + ["road_surface"], + "subtype", + "road", + fill_values={"road_surface": {}}, + ) + assert result["road_surface"] == {} + + def test_fill_value_ignored_when_field_already_non_null(self) -> None: + row = {"subtype": "other", "destinations": [{"id": "x"}]} + result = mutate_forbid_if( + row, + ["destinations"], + "subtype", + "road", + fill_values={"destinations": [{}]}, + ) + assert result["destinations"] == [{"id": "x"}] + + +class TestMutateRequireAnyOfNested: + def test_nulls_fields_within_array_elements(self) -> None: + row = { + "items": [ + {"a": 1, "b": 2, "c": 3}, + {"a": 4, "b": 5, "c": 6}, + ] + } + result = mutate_require_any_of(row, ["a", "b"], array_path="items") + for item in result["items"]: + assert item["a"] is None + assert item["b"] is None + assert item["c"] is not None + + def test_nulls_fields_within_nested_struct(self) -> None: + row = { + "items": [ + {"when": {"a": 1, "b": 2}}, + ] + } + result = mutate_require_any_of( + row, ["a", "b"], array_path="items", struct_path="when" + ) + assert result["items"][0]["when"]["a"] is None + assert result["items"][0]["when"]["b"] is None + + def test_creates_stub_element_when_array_is_null(self) -> None: + row = {"items": None} + result = mutate_require_any_of(row, ["a", "b"], array_path="items") + assert isinstance(result["items"], list) + assert len(result["items"]) == 1 + assert result["items"][0]["a"] is None + assert result["items"][0]["b"] is None + + def test_creates_stub_with_struct_path_when_null(self) -> None: + row = {"items": None} + result = mutate_require_any_of( + row, ["a", "b"], array_path="items", struct_path="when" + ) + assert result["items"][0]["when"]["a"] is None + assert result["items"][0]["when"]["b"] is None + + def test_does_not_mutate_original(self) -> None: + row = {"items": [{"a": 1, "b": 2}]} + mutate_require_any_of(row, ["a", "b"], array_path="items") + assert row["items"][0]["a"] == 1 + + +class TestMutateForbidIfNegate: + def test_negate_changes_condition_value(self) -> None: + """negate=True sets condition_field to something != condition_value.""" + row = {"subtype": "road", "destinations": [{"id": "x"}]} + result = mutate_forbid_if(row, ["destinations"], "subtype", "road", negate=True) + assert result["subtype"] != "road" + assert result["destinations"] is not None + + def test_negate_preserves_non_matching_value(self) -> None: + """When condition_field already != condition_value, leave it.""" + row = {"subtype": "water", "class": "canal"} + result = mutate_forbid_if(row, ["class"], "subtype", "road", negate=True) + assert result["subtype"] == "water" + + +class TestMutateRequireIfNegate: + def test_negate_changes_condition_value(self) -> None: + """negate=True sets condition_field to something != condition_value.""" + row = {"subtype": "road", "class": "motorway"} + result = mutate_require_if(row, ["class"], "subtype", "road", negate=True) + assert result["subtype"] != "road" + assert result["class"] is None + + def test_negate_preserves_non_matching_value(self) -> None: + """When condition_field already != condition_value, leave it.""" + row = {"subtype": "water", "class": "canal"} + result = mutate_require_if(row, ["class"], "subtype", "road", negate=True) + assert result["subtype"] == "water" + assert result["class"] is None + + +class TestMutateUniqueItems: + def test_duplicates_first_element(self) -> None: + row = {"ids": [{"value": "a"}, {"value": "b"}]} + result = mutate_unique_items(row, "ids") + assert result["ids"][0] == result["ids"][1] + assert len(result["ids"]) == 3 + + def test_nested_path(self) -> None: + row = {"outer": {"ids": [{"v": 1}, {"v": 2}]}} + result = mutate_unique_items(row, "outer.ids") + assert result["outer"]["ids"][0] == result["outer"]["ids"][1] + + def test_does_not_mutate_original(self) -> None: + row = {"ids": [{"value": "a"}, {"value": "b"}]} + mutate_unique_items(row, "ids") + assert len(row["ids"]) == 2 + + def test_bracket_path_enters_array_element(self) -> None: + row = {"restrictions": [{"when": {"mode": [{"type": "car"}, {"type": "bus"}]}}]} + result = mutate_unique_items(row, "restrictions[].when.mode") + mode = result["restrictions"][0]["when"]["mode"] + assert mode[0] == mode[1] + assert len(mode) == 3 + + def test_empty_array_returns_unchanged(self) -> None: + row: dict = {"items": []} + result = mutate_unique_items(row, "items") + assert result["items"] == [] + + def test_none_array_raises_traversal_error(self) -> None: + row: dict = {"ids": None} + with pytest.raises(PathTraversalError): + mutate_unique_items(row, "ids") + + def test_missing_key_raises_traversal_error(self) -> None: + row: dict = {"other": "x"} + with pytest.raises(PathTraversalError): + mutate_unique_items(row, "missing.nested") + + def test_nested_bracket_deep(self) -> None: + """Two levels of bracket nesting.""" + row: dict = {"outer": [{"inner": [{"vals": [{"x": 1}]}]}]} + result = mutate_unique_items(row, "outer[].inner[].vals") + vals = result["outer"][0]["inner"][0]["vals"] + assert vals[0] == vals[1] + + def test_terminal_bracket_duplicates_inner_list(self) -> None: + """Terminal `[]` targets the inner list at element 0 of the named field.""" + row: dict = {"hierarchies": [[{"a": 1}]]} + result = mutate_unique_items(row, "hierarchies[]") + inner = result["hierarchies"][0] + assert inner[0] == inner[1] + assert len(inner) == 2 + + def test_terminal_bracket_non_list_inner_raises(self) -> None: + """Terminal `[]` with non-list content at element 0 raises.""" + row: dict = {"hierarchies": [{"a": 1}]} + with pytest.raises(PathTraversalError): + mutate_unique_items(row, "hierarchies[]") diff --git a/packages/overture-schema-pyspark/tests/test_validate.py b/packages/overture-schema-pyspark/tests/test_validate.py new file mode 100644 index 000000000..c3fe3ea08 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_validate.py @@ -0,0 +1,516 @@ +"""Tests for validation pipeline.""" + +from collections.abc import Iterator + +import pytest +from overture.schema.pyspark._registry import REGISTRY +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.validate import ( + ValidationResult, + _normalize_suppress, + evaluate_checks, + explain_errors, + feature_keys, + feature_names, + filter_errors, + validate_feature, +) +from pyspark.sql import DataFrame, Row, SparkSession +from pyspark.sql import functions as F +from pyspark.sql.types import StringType, StructField, StructType + + +def _scalar_check( + field: str, name: str, expr: F.Column, *, root_field: str | None = None +) -> Check: + return Check( + field=field, + name=name, + expr=expr, + shape=CheckShape.SCALAR, + root_field=root_field if root_field is not None else field, + ) + + +def _array_check( + field: str, name: str, expr: F.Column, *, root_field: str | None = None +) -> Check: + return Check( + field=field, + name=name, + expr=expr, + shape=CheckShape.ARRAY, + root_field=root_field if root_field is not None else field, + ) + + +def _row(**kwargs: object) -> Row: + """Build a row with convenience id/theme/type defaults.""" + defaults: dict[str, object] = {"id": "id1", "theme": "t", "type": "f"} + defaults.update(kwargs) + return Row(**defaults) + + +class TestEvaluateChecks: + """Tests for evaluate_checks().""" + + def test_appends_error_columns(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + assert "_err_0" in evaluated.columns + assert set(df.columns) < set(evaluated.columns) + + def test_multiple_checks(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [ + _scalar_check("a", "c1", F.lit("e1")), + _scalar_check("b", "c2", F.lit("e2")), + ] + evaluated = evaluate_checks(df, checks) + assert "_err_0" in evaluated.columns + assert "_err_1" in evaluated.columns + + def test_error_column_is_array_string(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + row = evaluated.collect()[0] + assert row["_err_0"] == ["fail"] + + def test_null_error_for_passing_check(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "ok", F.lit(None).cast("string"))] + evaluated = evaluate_checks(df, checks) + row = evaluated.collect()[0] + assert row["_err_0"] == [] + + +class TestFilterErrors: + """Tests for filter_errors().""" + + def test_keeps_failing_rows(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + result = filter_errors(evaluated, checks) + assert result.count() == 1 + + def test_removes_passing_rows(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "ok", F.lit(None).cast("string"))] + evaluated = evaluate_checks(df, checks) + result = filter_errors(evaluated, checks) + assert result.count() == 0 + + def test_strips_error_columns(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + result = filter_errors(evaluated, checks) + assert not any(c.startswith("_err_") for c in result.columns) + assert set(result.columns) == set(df.columns) + + def test_preserves_schema(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + result = filter_errors(evaluated, checks) + assert result.schema == df.schema + + def test_mixed_rows(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row(id="pass"), _row(id="fail")]) + checks = [ + _scalar_check( + "id", + "not_fail", + F.when(F.col("id") == "fail", F.lit("bad")), + ), + ] + evaluated = evaluate_checks(df, checks) + result = filter_errors(evaluated, checks) + assert result.count() == 1 + assert result.collect()[0]["id"] == "fail" + + +class TestExplainErrors: + """Tests for explain_errors().""" + + def test_scalar_violation(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("missing"))] + evaluated = evaluate_checks(df, checks) + result = explain_errors(evaluated, checks) + rows = result.collect() + assert len(rows) == 1 + assert rows[0]["field"] == "value" + assert rows[0]["check"] == "required" + assert rows[0]["message"] == "missing" + + def test_array_violation(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_array_check("arr", "elem", F.array(F.lit("e1"), F.lit("e2")))] + evaluated = evaluate_checks(df, checks) + result = explain_errors(evaluated, checks) + messages = sorted(r["message"] for r in result.collect()) + assert messages == ["e1", "e2"] + + def test_no_violations(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "ok", F.lit(None).cast("string"))] + evaluated = evaluate_checks(df, checks) + result = explain_errors(evaluated, checks) + assert result.count() == 0 + + def test_preserves_original_columns(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + result = explain_errors(evaluated, checks) + rows = result.collect() + assert rows[0]["id"] == "id1" + assert set(result.columns) == {*df.columns, "field", "check", "message"} + + def test_output_columns(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("x", "required", F.lit("err"))] + evaluated = evaluate_checks(df, checks) + result = explain_errors(evaluated, checks) + expected_cols = {*df.columns, "field", "check", "message"} + assert set(result.columns) == expected_cols + + def test_empty_checks_returns_empty_dataframe_with_schema( + self, spark: SparkSession + ) -> None: + # Regression: explain_errors([]) on rows with no checks must + # return a typed empty DataFrame, not invoke `stack(0, ...)` + # (which Spark rejects). Consumers expect the standard + # `field/check/message` columns even when nothing fired. + df = spark.createDataFrame([_row()]) + result = explain_errors(df, []) + assert result.count() == 0 + assert set(result.columns) == {*df.columns, "field", "check", "message"} + + +class TestUserErrColumn: + """`_err_` is reserved; user `_err_*` names are passed through.""" + + def test_user_err_named_column_preserved(self, spark: SparkSession) -> None: + # Regression: `_orig_columns` strips only `_err_`. A + # user-supplied column like `_err_foo` must survive + # filter_errors / explain_errors round-trips. + df = spark.createDataFrame([_row(_err_foo="custom-data")]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + filtered = filter_errors(evaluated, checks) + assert "_err_foo" in filtered.columns + assert filtered.collect()[0]["_err_foo"] == "custom-data" + + explained = explain_errors(evaluated, checks) + assert "_err_foo" in explained.columns + assert explained.collect()[0]["_err_foo"] == "custom-data" + + +class TestSinglePassPipeline: + """Tests for the evaluate-once pattern used by the CLI.""" + + def test_shared_evaluated_gives_same_results(self, spark: SparkSession) -> None: + """filter_errors + explain_errors from the same evaluated DataFrame.""" + df = spark.createDataFrame([_row(id="ok"), _row(id="bad")]) + checks = [ + _scalar_check( + "id", + "not_bad", + F.when(F.col("id") == "bad", F.lit("is bad")), + ), + ] + evaluated = evaluate_checks(df, checks) + filtered = filter_errors(evaluated, checks) + explained = explain_errors(evaluated, checks) + assert filtered.count() == 1 + assert filtered.collect()[0]["id"] == "bad" + assert explained.count() == 1 + assert explained.collect()[0]["field"] == "id" + + +class TestNormalizeSuppress: + def test_empty(self) -> None: + roots, pairs = _normalize_suppress(()) + assert roots == set() + assert pairs == set() + + def test_bare_strings(self) -> None: + roots, pairs = _normalize_suppress(["sources", "theme"]) + assert roots == {"sources", "theme"} + assert pairs == set() + + def test_tuples(self) -> None: + roots, pairs = _normalize_suppress([("sources[].confidence", "bounds")]) + assert roots == set() + assert pairs == {("sources[].confidence", "bounds")} + + def test_check_objects(self, spark: SparkSession) -> None: + check = Check( + field="radio_group", + name="radio_group", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field=None, + ) + roots, pairs = _normalize_suppress([check]) + assert roots == set() + assert pairs == {("radio_group", "radio_group")} + + def test_mixed(self, spark: SparkSession) -> None: + check = Check( + field="radio_group", + name="radio_group", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field=None, + ) + roots, pairs = _normalize_suppress( + [ + "sources", + ("theme", "enum"), + check, + ] + ) + assert roots == {"sources"} + assert pairs == {("theme", "enum"), ("radio_group", "radio_group")} + + +# These exercise the populated REGISTRY built by runtime discovery, so they +# require generated expression modules to be present on disk. When the +# generated tree is absent (e.g. a fresh checkout before `make +# generate-pyspark`), the registry is empty and these assertions can't hold. +_requires_generated = pytest.mark.skipif( + not REGISTRY, reason="requires generated expression modules" +) + + +@_requires_generated +def test_feature_names_includes_aliases() -> None: + result = feature_names() + assert isinstance(result, list) + assert result == sorted(result) + assert "building" in result + assert "segment" in result + assert "overture.schema.buildings:Building" in result + + +@_requires_generated +def test_feature_keys_only_canonical() -> None: + result = feature_keys() + assert isinstance(result, list) + assert result == sorted(result) + assert "overture.schema.buildings:Building" in result + assert "building" not in result + + +class TestValidationResult: + def test_error_rows_delegates_to_filter_errors(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row(id="ok"), _row(id="bad")]) + checks = [ + _scalar_check( + "id", + "not_bad", + F.when(F.col("id") == "bad", F.lit("is bad")), + ), + ] + evaluated = evaluate_checks(df, checks) + result = ValidationResult( + evaluated=evaluated, + checks=checks, + schema_mismatches=[], + suppressed_checks=[], + ) + error_rows = result.error_rows() + assert error_rows.count() == 1 + assert error_rows.collect()[0]["id"] == "bad" + assert not any(c.startswith("_err_") for c in error_rows.columns) + + def test_frozen(self) -> None: + result = ValidationResult( + evaluated=None, # type: ignore[arg-type] + checks=[], + schema_mismatches=[], + suppressed_checks=[], + ) + with pytest.raises(AttributeError): + result.checks = [] # type: ignore[misc] + + +_VF_TYPE = "_test_validate_feature" +_VF_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("value", StringType(), True), + StructField("sources", StringType(), True), + ] +) + + +def _vf_checks() -> list[Check]: + return [ + Check( + field="theme", + name="enum", + expr=F.when(F.col("theme") != "test", F.lit("bad theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ), + Check( + field="value", + name="required", + expr=F.when(F.col("value").isNull(), F.lit("missing")), + shape=CheckShape.SCALAR, + root_field="value", + ), + Check( + field="sources_min_length", + name="min_length", + expr=F.when(F.length("sources") < 1, F.lit("too short")), + shape=CheckShape.SCALAR, + root_field="sources", + ), + ] + + +class TestValidateFeature: + @pytest.fixture(autouse=True) + def _register_vf_type(self) -> Iterator[None]: + REGISTRY[_VF_TYPE] = FeatureValidation(schema=_VF_SCHEMA, checks=_vf_checks) + yield + del REGISTRY[_VF_TYPE] + + @pytest.fixture() + def vf_df(self, spark: SparkSession) -> DataFrame: + return spark.createDataFrame( + [Row(id="1", theme="test", type=_VF_TYPE, value="ok", sources="s")], + schema=_VF_SCHEMA, + ) + + def test_unknown_type_raises_value_error(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + with pytest.raises( + ValueError, match="Unknown entry-point alias.*nonexistent_type_xyz" + ): + validate_feature(df, "nonexistent_type_xyz") + + def test_basic_validation(self, vf_df: DataFrame) -> None: + result = validate_feature(vf_df, _VF_TYPE) + assert isinstance(result, ValidationResult) + assert result.schema_mismatches == [] + assert len(result.checks) == 3 + assert result.error_rows().count() == 0 + + def test_skip_columns_errors_if_present(self, vf_df: DataFrame) -> None: + with pytest.raises(ValueError, match="skip_columns.*theme.*present"): + validate_feature(vf_df, _VF_TYPE, skip_columns=["theme"]) + + def test_skip_columns_filters_checks(self, spark: SparkSession) -> None: + schema_no_theme = StructType( + [f for f in _VF_SCHEMA.fields if f.name != "theme"] + ) + df = spark.createDataFrame( + [Row(id="1", type=_VF_TYPE, value="ok", sources="s")], + schema=schema_no_theme, + ) + result = validate_feature(df, _VF_TYPE, skip_columns=["theme"]) + check_fields = [c.field for c in result.checks] + assert "theme" not in check_fields + assert "value" in check_fields + + def test_skip_columns_filters_schema_mismatches(self, spark: SparkSession) -> None: + schema_no_theme = StructType( + [f for f in _VF_SCHEMA.fields if f.name != "theme"] + ) + df = spark.createDataFrame( + [Row(id="1", type=_VF_TYPE, value="ok", sources="s")], + schema=schema_no_theme, + ) + result = validate_feature(df, _VF_TYPE, skip_columns=["theme"]) + mismatch_fields = [m.path for m in result.schema_mismatches] + assert "theme" not in mismatch_fields + + def test_ignore_extra_columns(self, spark: SparkSession) -> None: + schema_extra = StructType( + _VF_SCHEMA.fields + [StructField("extra_score", StringType(), True)] + ) + df = spark.createDataFrame( + [ + Row( + id="1", + theme="test", + type=_VF_TYPE, + value="ok", + sources="s", + extra_score="9", + ) + ], + schema=schema_extra, + ) + result = validate_feature(df, _VF_TYPE, ignore_extra_columns=["extra_score"]) + mismatch_paths = [m.path for m in result.schema_mismatches] + assert "extra_score" not in mismatch_paths + + def test_suppress_unknown_root_raises(self, vf_df: DataFrame) -> None: + with pytest.raises(ValueError, match="unknown root fields.*typo_field"): + validate_feature(vf_df, _VF_TYPE, suppress=["typo_field"]) + + def test_suppress_unknown_pair_raises(self, vf_df: DataFrame) -> None: + with pytest.raises(ValueError, match=r"unknown \(field, name\) pairs"): + validate_feature(vf_df, _VF_TYPE, suppress=[("theme", "wrong_name")]) + + def test_suppress_mixed_unknown_lists_both(self, vf_df: DataFrame) -> None: + with pytest.raises(ValueError, match="unknown root fields.*unknown"): + validate_feature( + vf_df, + _VF_TYPE, + suppress=["typo_field", ("theme", "wrong_name")], + ) + + def test_suppress_bare_string(self, vf_df: DataFrame) -> None: + result = validate_feature(vf_df, _VF_TYPE, suppress=["sources"]) + check_fields = [c.field for c in result.checks] + assert not any(f.startswith("sources") for f in check_fields) + assert len(result.suppressed_checks) == 1 + assert result.suppressed_checks[0].field == "sources_min_length" + + def test_suppress_tuple(self, vf_df: DataFrame) -> None: + result = validate_feature(vf_df, _VF_TYPE, suppress=[("value", "required")]) + check_fields_names = [(c.field, c.name) for c in result.checks] + assert ("value", "required") not in check_fields_names + assert len(result.suppressed_checks) == 1 + + def test_suppress_check_object(self, vf_df: DataFrame) -> None: + initial = validate_feature(vf_df, _VF_TYPE) + target = [c for c in initial.checks if c.name == "required"][0] + result = validate_feature(vf_df, _VF_TYPE, suppress=[target]) + # Column objects can't be compared with ==, so compare by (field, name) + result_pairs = [(c.field, c.name) for c in result.checks] + suppressed_pairs = [(c.field, c.name) for c in result.suppressed_checks] + assert (target.field, target.name) not in result_pairs + assert (target.field, target.name) in suppressed_pairs + + def test_evaluated_has_err_columns(self, vf_df: DataFrame) -> None: + result = validate_feature(vf_df, _VF_TYPE) + err_cols = [c for c in result.evaluated.columns if c.startswith("_err_")] + assert len(err_cols) == len(result.checks) + + def test_suppressed_checks_not_in_checks(self, vf_df: DataFrame) -> None: + result = validate_feature(vf_df, _VF_TYPE, suppress=[("theme", "enum")]) + for sc in result.suppressed_checks: + assert sc not in result.checks + + def test_all_checks_suppressed(self, vf_df: DataFrame) -> None: + result = validate_feature( + vf_df, + _VF_TYPE, + suppress=["theme", "value", "sources"], + ) + assert result.checks == [] + assert result.error_rows().count() == 0 diff --git a/packages/overture-schema-system/src/overture/schema/system/case.py b/packages/overture-schema-system/src/overture/schema/system/case.py new file mode 100644 index 000000000..62b3733ae --- /dev/null +++ b/packages/overture-schema-system/src/overture/schema/system/case.py @@ -0,0 +1,26 @@ +"""PascalCase to snake_case conversion.""" + +import re + +__all__ = ["to_snake_case"] + +_ACRONYM_BOUNDARY = re.compile(r"([A-Z]+)([A-Z][a-z])") +_CAMEL_BOUNDARY = re.compile(r"([a-z0-9])([A-Z])") + + +def to_snake_case(name: str) -> str: + """Convert PascalCase to snake_case. + + Handles acronym runs correctly: "HTMLParser" becomes "html_parser", + not "h_t_m_l_parser". + + >>> to_snake_case("HTMLParser") + 'html_parser' + >>> to_snake_case("BuildingPart") + 'building_part' + >>> to_snake_case("simple") + 'simple' + """ + name = _ACRONYM_BOUNDARY.sub(r"\1_\2", name) + name = _CAMEL_BOUNDARY.sub(r"\1_\2", name) + return name.lower() diff --git a/packages/overture-schema-system/src/overture/schema/system/discovery/__init__.py b/packages/overture-schema-system/src/overture/schema/system/discovery/__init__.py index ed8af77ad..c894f591e 100644 --- a/packages/overture-schema-system/src/overture/schema/system/discovery/__init__.py +++ b/packages/overture-schema-system/src/overture/schema/system/discovery/__init__.py @@ -5,6 +5,12 @@ filter_models, get_registered_model, ) +from .entry_point import ( + entry_point_class_alias, + entry_point_to_path, + resolve_entry_point_key, + split_entry_point, +) from .keys import ModelKey from .types import ModelDict @@ -13,7 +19,11 @@ "ModelKey", "TagSelector", "discover_models", + "entry_point_class_alias", + "entry_point_to_path", "filter_models", "get_registered_model", + "resolve_entry_point_key", + "split_entry_point", "tag", ] diff --git a/packages/overture-schema-system/src/overture/schema/system/discovery/entry_point.py b/packages/overture-schema-system/src/overture/schema/system/discovery/entry_point.py new file mode 100644 index 000000000..270c8addd --- /dev/null +++ b/packages/overture-schema-system/src/overture/schema/system/discovery/entry_point.py @@ -0,0 +1,119 @@ +"""Entry-point string utilities.""" + +from __future__ import annotations + +from collections.abc import Mapping +from pathlib import PurePosixPath + +from ..case import to_snake_case + +__all__ = [ + "entry_point_class_alias", + "entry_point_to_path", + "resolve_entry_point_key", + "split_entry_point", +] + + +def split_entry_point(entry_point_path: str) -> tuple[str, str]: + """Split `"module.path:ClassName"` into dotted module and class name. + + >>> split_entry_point("overture.schema.buildings:Building") + ('overture.schema.buildings', 'Building') + """ + if ":" not in entry_point_path: + msg = f"Expected 'module:Class' format, got {entry_point_path!r}" + raise ValueError(msg) + module, cls = entry_point_path.split(":", 1) + return module, cls + + +def entry_point_to_path(entry_point_path: str) -> tuple[PurePosixPath, str]: + """Translate an entry-point string into a directory path and class name. + + Each dotted component of the module becomes a directory, mirroring + the source package structure. The result is stable regardless of the + set of installed packages. + + Parameters + ---------- + entry_point_path + String in `"module.path:ClassName"` form. + + Returns + ------- + tuple[PurePosixPath, str] + Directory derived from the module path, and the class name. + + Examples + -------- + >>> entry_point_to_path("overture.schema.places:Place") + (PurePosixPath('overture/schema/places'), 'Place') + """ + module, cls = split_entry_point(entry_point_path) + return PurePosixPath(*module.split(".")), cls + + +def entry_point_class_alias(entry_point_path: str) -> str: + """Snake-case class name from an entry-point string. + + The alias is the user-friendly form used to look up entry-point + keys in a registry (e.g. `"place"` resolves + `"overture.schema.places:Place"`). Input without a colon is treated + as a bare class name and snake-cased directly, so the function is + safe to apply to every key in an arbitrary registry mapping. + + Parameters + ---------- + entry_point_path + String in `"module.path:ClassName"` form, or a bare name. + + Examples + -------- + >>> entry_point_class_alias("overture.schema.divisions:DivisionArea") + 'division_area' + """ + cls = entry_point_path.rsplit(":", 1)[-1] + return to_snake_case(cls) + + +def resolve_entry_point_key(name: str, registry: Mapping[str, object]) -> str: + """Resolve a user-supplied name to a canonical entry-point key. + + Tries exact match first, then snake-case class-name alias. Raises + `ValueError` when the alias is ambiguous (matches more than one + registered key) or when the name is unknown. + + Parameters + ---------- + name + User-supplied identifier: an entry-point key or a snake-case + class-name alias. + registry + Mapping whose keys are entry-point strings. + + Returns + ------- + str + The canonical registry key. + + Raises + ------ + ValueError + If `name` matches multiple registry entries via alias, or no + registry entry at all. The message lists the candidates or the + known keys to aid recovery. + """ + if name in registry: + return name + candidates = sorted(k for k in registry if entry_point_class_alias(k) == name) + if len(candidates) == 1: + return candidates[0] + if candidates: + raise ValueError( + f"Entry-point alias {name!r} is ambiguous. " + f"Specify one of: {', '.join(candidates)}" + ) + raise ValueError( + f"Unknown entry-point alias {name!r}. Known: {', '.join(sorted(registry))}" + ) diff --git a/packages/overture-schema-system/src/overture/schema/system/field_path.py b/packages/overture-schema-system/src/overture/schema/system/field_path.py new file mode 100644 index 000000000..a63a0265a --- /dev/null +++ b/packages/overture-schema-system/src/overture/schema/system/field_path.py @@ -0,0 +1,301 @@ +"""Structural representation of a field path through a nested schema. + +A `FieldPath` is one of two variants: + +- `ScalarPath` -- a sequence of `StructSegment` values locating a value + that requires no iteration to reach. +- `ArrayPath` -- a sequence of `StructSegment` and `ArraySegment` values, + with at least one `ArraySegment`, locating a value reached by iterating + one or more arrays. Each `ArraySegment` carries `iter_count`, the number + of `[]` markers on its name in the canonical encoding (multi-depth + segments encode nested-list iteration without an intervening struct, + e.g. `list[list[X]]` parses as a single `ArraySegment` with + `iter_count=2`). + +The canonical string form (`str(path)`) round-trips through `parse`. +Code that needs to emit a path into source or labels calls `str(path)` +at the boundary; everything else operates on segments. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TypeAlias + +__all__ = [ + "ArrayPath", + "ArraySegment", + "FieldPath", + "PathSegment", + "ScalarPath", + "StructSegment", + "coerce", + "parse", + "promote_terminal_array", +] + + +@dataclass(frozen=True, slots=True) +class StructSegment: + """A struct field navigation step.""" + + name: str + + +@dataclass(frozen=True, slots=True) +class ArraySegment: + """An array column entered with one or more levels of iteration. + + `iter_count` records the number of `[]` markers immediately following + the segment name; values > 1 correspond to nested lists like + `list[list[X]]`. + """ + + name: str + iter_count: int = 1 + + +PathSegment: TypeAlias = StructSegment | ArraySegment + + +@dataclass(frozen=True, slots=True) +class ScalarPath: + """Locate a non-iterated value in a row.""" + + segments: tuple[StructSegment, ...] = () + + def append_struct(self, name: str) -> ScalarPath: + return ScalarPath(segments=self.segments + (StructSegment(name=name),)) + + def append_array(self, name: str, iter_count: int = 1) -> ArrayPath: + return ArrayPath( + segments=self.segments + (ArraySegment(name=name, iter_count=iter_count),) + ) + + def __str__(self) -> str: + return ".".join(s.name for s in self.segments) + + +@dataclass(frozen=True, slots=True) +class ArrayPath: + """Locate an iterated value; iteration structure is part of the location. + + Invariant: `segments` contains at least one `ArraySegment`. + """ + + segments: tuple[PathSegment, ...] + + def __post_init__(self) -> None: + if not any(isinstance(s, ArraySegment) for s in self.segments): + raise ValueError("ArrayPath must contain at least one ArraySegment") + + def append_struct(self, name: str) -> ArrayPath: + return ArrayPath(segments=self.segments + (StructSegment(name=name),)) + + def append_array(self, name: str, iter_count: int = 1) -> ArrayPath: + return ArrayPath( + segments=self.segments + (ArraySegment(name=name, iter_count=iter_count),) + ) + + @property + def column_prefix(self) -> ScalarPath: + """Struct segments before the first ArraySegment. + + Returns an empty `ScalarPath(())` when the array is the first + segment. + """ + prefix: list[StructSegment] = [] + for seg in self.segments: + if isinstance(seg, ArraySegment): + break + prefix.append(seg) + return ScalarPath(segments=tuple(prefix)) + + @property + def column_path(self) -> str: + """Dotted name of the outermost array column. + + The struct prefix plus the first ArraySegment's name (unbracketed). + This is what `F.col(...)` or `array_check("...", ...)` consumes. + """ + first_prefix, first_array, _first_iter = self.array_chunks[0] + return ".".join((*first_prefix, first_array)) + + @property + def leaf(self) -> tuple[str, ...]: + """Names of struct segments after the last ArraySegment.""" + last_array = next( + i + for i in range(len(self.segments) - 1, -1, -1) + if isinstance(self.segments[i], ArraySegment) + ) + return tuple(s.name for s in self.segments[last_array + 1 :]) + + @property + def array_chunks( + self, + ) -> tuple[tuple[tuple[str, ...], str, int], ...]: + """One chunk per ArraySegment. + + Each entry is `(prefix_structs, array_name, iter_count)` where + `prefix_structs` is the sequence of struct segment names between + the previous ArraySegment (or the start of the path) and this + ArraySegment. + """ + chunks: list[tuple[tuple[str, ...], str, int]] = [] + prefix: list[str] = [] + for seg in self.segments: + if isinstance(seg, ArraySegment): + chunks.append((tuple(prefix), seg.name, seg.iter_count)) + prefix = [] + else: + prefix.append(seg.name) + return tuple(chunks) + + def element_relative_gate(self, gate: FieldPath) -> tuple[str, ...] | None: + """Path inside this array's element scope that names *gate*. + + Three return states: + + - ``tuple[str, ...]`` (non-empty) -- "reachable with descent": + `gate` enters the same outer array as this path and names a + struct descendant inside its element. The returned segments + name that descendant relative to the element. + - ``()`` -- "reachable, no descent": `gate` is the outer array + itself; the element variable IS the gated value. + - ``None`` -- "not reachable": `gate` does not cross into this + path's element scope (different outer array, scalar gate, + mismatched struct prefix, etc.). Callers must apply the gate + at column level instead. + + Raises `NotImplementedError` when `gate` enters the same outer + array but contains a nested `ArraySegment` past the boundary; + the element scope is a struct, so a gate path inside it must be + struct-only. + + Example: `parse("items[].x").element_relative_gate(parse( + "items[].nested")) == ("nested",)`. + """ + column_prefix = self.column_prefix.segments + n_prefix = len(column_prefix) + if not isinstance(gate, ArrayPath): + return None + gate_segs = gate.segments + if len(gate_segs) <= n_prefix: + return None + for i in range(n_prefix): + if not isinstance(gate_segs[i], StructSegment): + return None + if gate_segs[i].name != column_prefix[i].name: + return None + target_first_array_name = self.segments[n_prefix].name + gate_boundary = gate_segs[n_prefix] + if not isinstance(gate_boundary, ArraySegment): + return None + if gate_boundary.name != target_first_array_name: + return None + inner_segments = gate_segs[n_prefix + 1 :] + for seg in inner_segments: + if not isinstance(seg, StructSegment): + raise NotImplementedError( + f"gate path contains a nested array segment past the " + f"element boundary (gate={gate!r}, self={self!r})" + ) + return tuple(s.name for s in inner_segments) + + @property + def iter_struct_paths(self) -> tuple[tuple[str, ...], ...]: + """Per non-outermost iteration: the struct path that reaches its array. + + For each ArraySegment past the first, emit `(prefix_structs + + array_name)` -- the navigation FROM the previous iteration's + element TO this array. For each `iter_count > 1` on an + ArraySegment, emit `iter_count - 1` additional `()` entries + representing extra iterations inside the same (already-named) + array. + + Returns an empty tuple when the path iterates only once. + """ + paths: list[tuple[str, ...]] = [] + for chunk_idx, (prefix_structs, arr_name, iter_count) in enumerate( + self.array_chunks + ): + if chunk_idx > 0: + paths.append(prefix_structs + (arr_name,)) + for _ in range(iter_count - 1): + paths.append(()) + return tuple(paths) + + def __str__(self) -> str: + return ".".join(_segment_str(s) for s in self.segments) + + +FieldPath: TypeAlias = ScalarPath | ArrayPath + + +def _segment_str(seg: PathSegment) -> str: + if isinstance(seg, ArraySegment): + return seg.name + "[]" * seg.iter_count + return seg.name + + +def parse(encoded: str) -> FieldPath: + """Parse a canonical encoded path like `"items[].nested.value"`. + + Trailing `[]` markers on a dotted part produce an `ArraySegment` + with matching `iter_count`. The empty string returns the empty + `ScalarPath`. Raises `ValueError` when any dotted part has an empty + name (e.g. `".a"`, `"a..b"`, `"[]"`). + """ + if not encoded: + return ScalarPath() + segments: list[PathSegment] = [] + struct_segments: list[StructSegment] = [] + has_array = False + for part in encoded.split("."): + depth = 0 + while part.endswith("[]"): + part = part[:-2] + depth += 1 + if not part: + raise ValueError(f"FieldPath part has empty name in {encoded!r}") + if depth > 0: + has_array = True + segments.append(ArraySegment(name=part, iter_count=depth)) + else: + struct = StructSegment(name=part) + segments.append(struct) + struct_segments.append(struct) + if has_array: + return ArrayPath(segments=tuple(segments)) + return ScalarPath(segments=tuple(struct_segments)) + + +def coerce(value: FieldPath | str) -> FieldPath: + """Return *value* as a `FieldPath`, parsing it from string if needed.""" + if isinstance(value, str): + return parse(value) + return value + + +def promote_terminal_array(path: FieldPath) -> ArrayPath: + """Promote *path*'s terminal segment to an iterated `ArraySegment`. + + A `StructSegment` terminal is *replaced* with `ArraySegment(name, + iter_count=1)`; an `ArraySegment` terminal has its `iter_count` + incremented. This is how a walker records entering a `list[...]` + layer on the field it is already pointing at -- unlike `append_array`, + which adds a new segment for a fresh nested array. Repeated calls + build the multi-iteration terminal of a `list[list[X]]` field. + + Raises `ValueError` on an empty path: there is no terminal segment + to promote. + """ + if not path.segments: + raise ValueError("cannot promote the terminal of an empty path") + *prefix, last = path.segments + if isinstance(last, ArraySegment): + promoted = ArraySegment(name=last.name, iter_count=last.iter_count + 1) + else: + promoted = ArraySegment(name=last.name, iter_count=1) + return ArrayPath(segments=(*prefix, promoted)) diff --git a/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py b/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py index 14a1ebae1..0ba1be6ce 100644 --- a/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py +++ b/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py @@ -1,3 +1,4 @@ +import re from typing import Annotated import pytest @@ -210,7 +211,6 @@ class TestModel(BaseModel): def test_stripped_constraint_json_schema_pattern(self) -> None: """StrippedConstraint's JSON schema pattern accepts empty string and rejects leading/trailing whitespace.""" - import re class TestModel(BaseModel): text: Annotated[str, StrippedConstraint()] diff --git a/packages/overture-schema-codegen/tests/test_naming.py b/packages/overture-schema-system/tests/test_case.py similarity index 67% rename from packages/overture-schema-codegen/tests/test_naming.py rename to packages/overture-schema-system/tests/test_case.py index 77e4d5773..21cddcb5a 100644 --- a/packages/overture-schema-codegen/tests/test_naming.py +++ b/packages/overture-schema-system/tests/test_case.py @@ -1,7 +1,8 @@ """Tests for PascalCase to snake_case conversion.""" import pytest -from overture.schema.codegen.extraction.case_conversion import to_snake_case + +from overture.schema.system.case import to_snake_case class TestToSnakeCase: @@ -14,10 +15,11 @@ class TestToSnakeCase: ("BuildingPart", "building_part"), ("RoadSegment", "road_segment"), ("Place", "place"), - ("simple", "simple"), # Already lowercase - ("HTTPServer", "http_server"), # Consecutive caps + ("simple", "simple"), + ("HTTPServer", "http_server"), + ("HTMLParser", "html_parser"), ], ) def test_converts_pascal_to_snake(self, input_name: str, expected: str) -> None: - """PascalCase names should convert to snake_case.""" + """PascalCase names convert to snake_case; acronyms collapse.""" assert to_snake_case(input_name) == expected diff --git a/packages/overture-schema-system/tests/test_discovery_entry_point.py b/packages/overture-schema-system/tests/test_discovery_entry_point.py new file mode 100644 index 000000000..3f8c766af --- /dev/null +++ b/packages/overture-schema-system/tests/test_discovery_entry_point.py @@ -0,0 +1,97 @@ +"""Tests for entry-point string utilities.""" + +from pathlib import PurePosixPath + +import pytest + +from overture.schema.system.discovery.entry_point import ( + entry_point_class_alias, + entry_point_to_path, + resolve_entry_point_key, +) + + +class TestEntryPointToPath: + def test_typical_overture_entry_point(self) -> None: + path, cls = entry_point_to_path("overture.schema.places:Place") + assert path == PurePosixPath("overture/schema/places") + assert cls == "Place" + + def test_single_segment_module(self) -> None: + path, cls = entry_point_to_path("myschema:Foo") + assert path == PurePosixPath("myschema") + assert cls == "Foo" + + def test_deeply_nested_module(self) -> None: + path, cls = entry_point_to_path("a.b.c.d.e:Thing") + assert path == PurePosixPath("a/b/c/d/e") + assert cls == "Thing" + + def test_missing_colon_raises(self) -> None: + with pytest.raises(ValueError, match="module:Class"): + entry_point_to_path("overture.schema.places.Place") + + def test_class_name_with_dot_kept(self) -> None: + # Class name after the colon is taken verbatim — Python class + # names can't contain dots, but we don't validate. + path, cls = entry_point_to_path("a.b:Outer.Inner") + assert path == PurePosixPath("a/b") + assert cls == "Outer.Inner" + + +class TestEntryPointClassAlias: + def test_returns_snake_case_class_name(self) -> None: + assert entry_point_class_alias("overture.schema.places:Place") == "place" + + def test_handles_pascal_case_class(self) -> None: + assert ( + entry_point_class_alias("overture.schema.buildings:BuildingPart") + == "building_part" + ) + + def test_handles_acronyms(self) -> None: + assert ( + entry_point_class_alias("overture.schema.places:HTMLParser") + == "html_parser" + ) + + def test_bare_name_is_snake_cased(self) -> None: + # Tolerant of registry keys that aren't entry-point-formatted — + # the snake-case form of the whole string is returned. + assert entry_point_class_alias("BareName") == "bare_name" + + +class TestResolveEntryPointKey: + def test_exact_match(self) -> None: + registry = {"overture.schema.places:Place": object()} + assert ( + resolve_entry_point_key("overture.schema.places:Place", registry) + == "overture.schema.places:Place" + ) + + def test_snake_case_alias_match(self) -> None: + registry = {"overture.schema.places:Place": object()} + assert ( + resolve_entry_point_key("place", registry) == "overture.schema.places:Place" + ) + + def test_ambiguous_lists_candidates(self) -> None: + registry = { + "overture.schema.places:Place": object(), + "annex.schema.places:Place": object(), + } + with pytest.raises(ValueError, match="ambiguous"): + resolve_entry_point_key("place", registry) + + def test_unknown_lists_known(self) -> None: + registry = {"overture.schema.places:Place": object()} + with pytest.raises(ValueError, match="Unknown"): + resolve_entry_point_key("zzz", registry) + + def test_acronym_class_name_resolves(self) -> None: + registry = { + "ns.a:HTMLParser": object(), + "ns.b:HTMLParser": object(), + } + with pytest.raises(ValueError, match=r"ns\.a:HTMLParser"): + resolve_entry_point_key("html_parser", registry) diff --git a/packages/overture-schema-system/tests/test_field_path.py b/packages/overture-schema-system/tests/test_field_path.py new file mode 100644 index 000000000..0b3614ebf --- /dev/null +++ b/packages/overture-schema-system/tests/test_field_path.py @@ -0,0 +1,376 @@ +"""Tests for FieldPath, the structural path type for nested schemas.""" + +from __future__ import annotations + +import re + +import pytest + +from overture.schema.system.field_path import ( + ArrayPath, + ArraySegment, + ScalarPath, + StructSegment, + coerce, + parse, + promote_terminal_array, +) + + +class TestParseAndRoundTrip: + def test_empty_path_parses_to_empty_scalar(self) -> None: + assert parse("") == ScalarPath(segments=()) + + def test_single_segment(self) -> None: + path = parse("name") + assert path == ScalarPath(segments=(StructSegment(name="name"),)) + + def test_dotted_path(self) -> None: + path = parse("bbox.xmin") + assert path == ScalarPath( + segments=(StructSegment(name="bbox"), StructSegment(name="xmin")) + ) + + def test_array_segment(self) -> None: + path = parse("items[]") + assert path == ArrayPath(segments=(ArraySegment(name="items", iter_count=1),)) + + def test_array_with_nested_field(self) -> None: + path = parse("items[].value") + assert path == ArrayPath( + segments=( + ArraySegment(name="items", iter_count=1), + StructSegment(name="value"), + ) + ) + + def test_nested_list_depth(self) -> None: + path = parse("hierarchies[][]") + assert path == ArrayPath( + segments=(ArraySegment(name="hierarchies", iter_count=2),) + ) + + def test_nested_list_with_leaf(self) -> None: + path = parse("hierarchies[][].value") + assert path == ArrayPath( + segments=( + ArraySegment(name="hierarchies", iter_count=2), + StructSegment(name="value"), + ) + ) + + def test_complex_path(self) -> None: + path = parse("speed_limits[].when.vehicle[].dimension") + assert path == ArrayPath( + segments=( + ArraySegment(name="speed_limits", iter_count=1), + StructSegment(name="when"), + ArraySegment(name="vehicle", iter_count=1), + StructSegment(name="dimension"), + ) + ) + + @pytest.mark.parametrize( + "encoded", + [ + "", + "name", + "bbox.xmin", + "items[]", + "items[].value", + "hierarchies[][]", + "hierarchies[][].value", + "speed_limits[].when.vehicle[].dimension", + "tags_min_length", + ], + ) + def test_str_round_trip(self, encoded: str) -> None: + assert str(parse(encoded)) == encoded + + +class TestScalarVsArrayPartition: + def test_no_array_returns_scalar_path(self) -> None: + assert isinstance(parse("a.b.c"), ScalarPath) + + def test_with_array_returns_array_path(self) -> None: + assert isinstance(parse("a.b[].c"), ArrayPath) + + def test_empty_is_scalar(self) -> None: + assert isinstance(parse(""), ScalarPath) + + +class TestStr: + def test_empty_renders_as_empty(self) -> None: + assert str(ScalarPath()) == "" + + def test_scalar_path_renders_dotted(self) -> None: + path = ScalarPath( + segments=(StructSegment(name="bbox"), StructSegment(name="xmin")) + ) + assert str(path) == "bbox.xmin" + + def test_array_path_renders_with_brackets(self) -> None: + path = ArrayPath( + segments=( + ArraySegment(name="speed_limits", iter_count=1), + StructSegment(name="when"), + ) + ) + assert str(path) == "speed_limits[].when" + + def test_array_path_renders_multi_depth(self) -> None: + path = ArrayPath(segments=(ArraySegment(name="hierarchies", iter_count=2),)) + assert str(path) == "hierarchies[][]" + + +class TestAppendStruct: + def test_scalar_append_struct_returns_scalar(self) -> None: + path = ScalarPath().append_struct("name") + assert path == parse("name") + assert isinstance(path, ScalarPath) + + def test_scalar_chain_struct(self) -> None: + path = ScalarPath().append_struct("bbox").append_struct("xmin") + assert path == parse("bbox.xmin") + + def test_array_append_struct_returns_array(self) -> None: + path = parse("items[]") + assert isinstance(path, ArrayPath) + result = path.append_struct("value") + assert result == parse("items[].value") + assert isinstance(result, ArrayPath) + + +class TestAppendArray: + def test_scalar_append_array_returns_array_path(self) -> None: + path = ScalarPath().append_array("items") + assert path == parse("items[]") + assert isinstance(path, ArrayPath) + + def test_scalar_append_array_after_struct(self) -> None: + path = ScalarPath().append_struct("outer").append_array("items") + assert path == parse("outer.items[]") + + def test_scalar_append_array_multi_depth(self) -> None: + path = ScalarPath().append_array("hierarchies", iter_count=2) + assert path == parse("hierarchies[][]") + + def test_array_append_array(self) -> None: + path = parse("outer[]") + assert isinstance(path, ArrayPath) + result = path.append_array("inner") + assert result == parse("outer[].inner[]") + + +class TestPromoteTerminalArray: + def test_scalar_struct_terminal_becomes_array(self) -> None: + assert promote_terminal_array(parse("tags")) == parse("tags[]") + + def test_struct_prefix_is_preserved(self) -> None: + assert promote_terminal_array(parse("outer.tags")) == parse("outer.tags[]") + + def test_struct_terminal_inside_array_path(self) -> None: + assert promote_terminal_array(parse("items[].tags")) == parse("items[].tags[]") + + def test_array_terminal_increments_iter_count(self) -> None: + assert promote_terminal_array(parse("tags[]")) == parse("tags[][]") + + def test_consecutive_promotions_stack(self) -> None: + assert promote_terminal_array(promote_terminal_array(parse("grid"))) == parse( + "grid[][]" + ) + + def test_array_terminal_inside_array_path(self) -> None: + assert promote_terminal_array(parse("items[].grid[]")) == parse( + "items[].grid[][]" + ) + + def test_empty_path_raises(self) -> None: + with pytest.raises(ValueError, match="empty path"): + promote_terminal_array(ScalarPath()) + + +class TestColumnPrefix: + def test_array_at_start_has_empty_prefix(self) -> None: + path = parse("items[].value") + assert isinstance(path, ArrayPath) + assert path.column_prefix == ScalarPath(()) + + def test_struct_prefix_before_array(self) -> None: + path = parse("parent.items[].value") + assert isinstance(path, ArrayPath) + assert path.column_prefix == parse("parent") + + def test_dotted_struct_prefix(self) -> None: + path = parse("a.b.c[].d") + assert isinstance(path, ArrayPath) + assert path.column_prefix == parse("a.b") + + +class TestLeaf: + def test_no_leaf_after_array(self) -> None: + path = parse("items[]") + assert isinstance(path, ArrayPath) + assert path.leaf == () + + def test_single_struct_leaf(self) -> None: + path = parse("items[].value") + assert isinstance(path, ArrayPath) + assert path.leaf == ("value",) + + def test_nested_struct_leaf(self) -> None: + path = parse("items[].nested.value") + assert isinstance(path, ArrayPath) + assert path.leaf == ("nested", "value") + + def test_uses_last_array(self) -> None: + path = parse("speed_limits[].when.vehicle[].dimension") + assert isinstance(path, ArrayPath) + assert path.leaf == ("dimension",) + + +class TestArrayChunks: + def test_single_top_level_array(self) -> None: + path = parse("items[]") + assert isinstance(path, ArrayPath) + assert path.array_chunks == (((), "items", 1),) + + def test_single_array_with_struct_prefix(self) -> None: + path = parse("parent.items[].value") + assert isinstance(path, ArrayPath) + assert path.array_chunks == ((("parent",), "items", 1),) + + def test_nested_arrays(self) -> None: + path = parse("speed_limits[].when.vehicle[].dimension") + assert isinstance(path, ArrayPath) + assert path.array_chunks == ( + ((), "speed_limits", 1), + (("when",), "vehicle", 1), + ) + + def test_multi_depth_array(self) -> None: + path = parse("hierarchies[][].value") + assert isinstance(path, ArrayPath) + assert path.array_chunks == (((), "hierarchies", 2),) + + +class TestIterStructPaths: + def test_single_iteration_is_empty(self) -> None: + path = parse("items[].value") + assert isinstance(path, ArrayPath) + assert path.iter_struct_paths == () + + def test_nested_arrays_emit_navigation_path(self) -> None: + path = parse("speed_limits[].when.vehicle[].dimension") + assert isinstance(path, ArrayPath) + assert path.iter_struct_paths == (("when", "vehicle"),) + + def test_multi_depth_array_expands_extra_iterations(self) -> None: + path = parse("hierarchies[][].value") + assert isinstance(path, ArrayPath) + assert path.iter_struct_paths == ((),) + + def test_multi_depth_inner_array_combines_navigation_and_expansion(self) -> None: + path = parse("rules[].tags[][].value") + assert isinstance(path, ArrayPath) + assert path.iter_struct_paths == (("tags",), ()) + + +class TestElementRelativeGate: + def test_gate_inside_same_outer_array(self) -> None: + target = parse("items[].value") + gate = parse("items[].nested") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) == ("nested",) + + def test_gate_at_outer_array_root_returns_empty(self) -> None: + target = parse("items[].value") + gate = parse("items[]") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) == () + + def test_gate_with_dotted_struct_inside_element(self) -> None: + target = parse("items[].value") + gate = parse("items[].a.b") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) == ("a", "b") + + def test_scalar_gate_returns_none(self) -> None: + target = parse("items[].value") + gate = parse("other") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) is None + + def test_different_outer_array_returns_none(self) -> None: + target = parse("items[].value") + gate = parse("other[].x") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) is None + + def test_struct_prefix_must_match(self) -> None: + target = parse("parent.items[].value") + gate = parse("items[].x") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) is None + + def test_matching_struct_prefix(self) -> None: + target = parse("parent.items[].value") + gate = parse("parent.items[].x") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) == ("x",) + + def test_inner_array_segment_raises(self) -> None: + target = parse("items[].value") + gate = parse("items[].nested[]") + assert isinstance(target, ArrayPath) + with pytest.raises(NotImplementedError, match="nested array segment"): + target.element_relative_gate(gate) + + +class TestArrayPathInvariant: + def test_rejects_segments_without_array(self) -> None: + with pytest.raises(ValueError, match="at least one ArraySegment"): + ArrayPath(segments=(StructSegment(name="a"),)) + + +class TestEqualityAndHashing: + def test_paths_with_same_segments_are_equal(self) -> None: + assert parse("items[].value") == parse("items[].value") + + def test_different_paths_unequal(self) -> None: + assert parse("items[].value") != parse("items[].other") + + def test_scalar_array_unequal(self) -> None: + assert parse("items") != parse("items[]") + + def test_hashable(self) -> None: + s = {parse("a.b"), parse("a.b"), parse("c")} + assert len(s) == 2 + + def test_string_is_not_equal_to_path(self) -> None: + assert parse("items[].value") != "items[].value" + + +class TestCoerce: + def test_passes_through_scalar(self) -> None: + path = parse("a.b") + assert coerce(path) is path + + def test_passes_through_array(self) -> None: + path = parse("items[].value") + assert coerce(path) is path + + def test_parses_string(self) -> None: + assert coerce("items[].value") == parse("items[].value") + + +class TestParseRejectsEmptyParts: + @pytest.mark.parametrize("encoded", [".a", "a..b", "[]", "a.[]", ".[]"]) + def test_raises_value_error_on_empty_part(self, encoded: str) -> None: + with pytest.raises(ValueError, match="empty name"): + parse(encoded) + + @pytest.mark.parametrize("encoded", [".a", "a..b", "[]"]) + def test_error_includes_input_string(self, encoded: str) -> None: + with pytest.raises(ValueError, match=re.escape(repr(encoded))): + parse(encoded) diff --git a/packages/overture-schema-transportation-theme/pyproject.toml b/packages/overture-schema-transportation-theme/pyproject.toml index 51614e4ae..7824ec665 100644 --- a/packages/overture-schema-transportation-theme/pyproject.toml +++ b/packages/overture-schema-transportation-theme/pyproject.toml @@ -160,7 +160,6 @@ network = "za:regional" ref = "R33" # Rail segment: disused railway, Mpulungu, Zambia (2026-02-18.0) -# Populates rail_flags with values to cover the rail_flags[].values xfail. [[examples.Segment]] class = "unknown" geometry = "LINESTRING (30.9844394 -12.7185733, 30.9818611 -12.7207838, 30.9815908 -12.7210751)" diff --git a/packages/overture-schema-transportation-theme/src/overture/schema/transportation/models.py b/packages/overture-schema-transportation-theme/src/overture/schema/transportation/models.py index 0d2685df9..260fd4574 100644 --- a/packages/overture-schema-transportation-theme/src/overture/schema/transportation/models.py +++ b/packages/overture-schema-transportation-theme/src/overture/schema/transportation/models.py @@ -37,7 +37,7 @@ def _connector_type() -> type[OvertureFeature]: - from .connector import Connector + from .connector import Connector # noqa: PLC0415 return Connector diff --git a/pyproject.toml b/pyproject.toml index c21f4bc17..154546081 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,7 @@ pythonpath = [ "packages/overture-schema-common/tests", "packages/overture-schema-divisions-theme/tests", "packages/overture-schema-places-theme/tests", + "packages/overture-schema-pyspark/tests", "packages/overture-schema-system/tests", "packages/overture-schema-transportation-theme/tests", "packages/overture-schema/tests", diff --git a/uv.lock b/uv.lock index e22235af7..e1cfed99f 100644 --- a/uv.lock +++ b/uv.lock @@ -2,7 +2,8 @@ version = 1 revision = 3 requires-python = ">=3.10" resolution-markers = [ - "python_full_version >= '3.11'", + "python_full_version >= '3.15'", + "python_full_version >= '3.11' and python_full_version < '3.15'", "python_full_version < '3.11'", ] @@ -22,6 +23,7 @@ members = [ "overture-schema-common", "overture-schema-divisions-theme", "overture-schema-places-theme", + "overture-schema-pyspark", "overture-schema-system", "overture-schema-transportation-theme", "overture-schema-workspace", @@ -38,14 +40,14 @@ wheels = [ [[package]] name = "click" -version = "8.3.1" +version = "8.3.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bb/63/f9e1ea081ce35720d8b92acde70daaedace594dc93b693c869e0d5910718/click-8.3.3.tar.gz", hash = "sha256:398329ad4837b2ff7cbe1dd166a4c0f8900c3ca3a218de04466f38f6497f18a2", size = 328061, upload-time = "2026-04-22T15:11:27.506Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, + { url = "https://files.pythonhosted.org/packages/ae/44/c1221527f6a71a01ec6fbad7fa78f1d50dfa02217385cf0fa3eec7087d59/click-8.3.3-py3-none-any.whl", hash = "sha256:a2bf429bb3033c89fa4936ffb35d5cb471e3719e1f3c8a7c3fff0b8314305613", size = 110502, upload-time = "2026-04-22T15:11:25.044Z" }, ] [[package]] @@ -59,101 +61,115 @@ wheels = [ [[package]] name = "coverage" -version = "7.13.2" +version = "7.13.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ad/49/349848445b0e53660e258acbcc9b0d014895b6739237920886672240f84b/coverage-7.13.2.tar.gz", hash = "sha256:044c6951ec37146b72a50cc81ef02217d27d4c3640efd2640311393cbbf143d3", size = 826523, upload-time = "2026-01-25T13:00:04.889Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/e0/70553e3000e345daff267cec284ce4cbf3fc141b6da229ac52775b5428f1/coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179", size = 915967, upload-time = "2026-03-17T10:33:18.341Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/2d/63e37369c8e81a643afe54f76073b020f7b97ddbe698c5c944b51b0a2bc5/coverage-7.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4af3b01763909f477ea17c962e2cca8f39b350a4e46e3a30838b2c12e31b81b", size = 218842, upload-time = "2026-01-25T12:57:15.3Z" }, - { url = "https://files.pythonhosted.org/packages/57/06/86ce882a8d58cbcb3030e298788988e618da35420d16a8c66dac34f138d0/coverage-7.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:36393bd2841fa0b59498f75466ee9bdec4f770d3254f031f23e8fd8e140ffdd2", size = 219360, upload-time = "2026-01-25T12:57:17.572Z" }, - { url = "https://files.pythonhosted.org/packages/cd/84/70b0eb1ee19ca4ef559c559054c59e5b2ae4ec9af61398670189e5d276e9/coverage-7.13.2-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9cc7573518b7e2186bd229b1a0fe24a807273798832c27032c4510f47ffdb896", size = 246123, upload-time = "2026-01-25T12:57:19.087Z" }, - { url = "https://files.pythonhosted.org/packages/35/fb/05b9830c2e8275ebc031e0019387cda99113e62bb500ab328bb72578183b/coverage-7.13.2-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ca9566769b69a5e216a4e176d54b9df88f29d750c5b78dbb899e379b4e14b30c", size = 247930, upload-time = "2026-01-25T12:57:20.929Z" }, - { url = "https://files.pythonhosted.org/packages/81/aa/3f37858ca2eed4f09b10ca3c6ddc9041be0a475626cd7fd2712f4a2d526f/coverage-7.13.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c9bdea644e94fd66d75a6f7e9a97bb822371e1fe7eadae2cacd50fcbc28e4dc", size = 249804, upload-time = "2026-01-25T12:57:22.904Z" }, - { url = "https://files.pythonhosted.org/packages/b6/b3/c904f40c56e60a2d9678a5ee8df3d906d297d15fb8bec5756c3b0a67e2df/coverage-7.13.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5bd447332ec4f45838c1ad42268ce21ca87c40deb86eabd59888859b66be22a5", size = 246815, upload-time = "2026-01-25T12:57:24.314Z" }, - { url = "https://files.pythonhosted.org/packages/41/91/ddc1c5394ca7fd086342486440bfdd6b9e9bda512bf774599c7c7a0081e0/coverage-7.13.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7c79ad5c28a16a1277e1187cf83ea8dafdcc689a784228a7d390f19776db7c31", size = 247843, upload-time = "2026-01-25T12:57:26.544Z" }, - { url = "https://files.pythonhosted.org/packages/87/d2/cdff8f4cd33697883c224ea8e003e9c77c0f1a837dc41d95a94dd26aad67/coverage-7.13.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:76e06ccacd1fb6ada5d076ed98a8c6f66e2e6acd3df02819e2ee29fd637b76ad", size = 245850, upload-time = "2026-01-25T12:57:28.507Z" }, - { url = "https://files.pythonhosted.org/packages/f5/42/e837febb7866bf2553ab53dd62ed52f9bb36d60c7e017c55376ad21fbb05/coverage-7.13.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:49d49e9a5e9f4dc3d3dac95278a020afa6d6bdd41f63608a76fa05a719d5b66f", size = 246116, upload-time = "2026-01-25T12:57:30.16Z" }, - { url = "https://files.pythonhosted.org/packages/09/b1/4a3f935d7df154df02ff4f71af8d61298d713a7ba305d050ae475bfbdde2/coverage-7.13.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ed2bce0e7bfa53f7b0b01c722da289ef6ad4c18ebd52b1f93704c21f116360c8", size = 246720, upload-time = "2026-01-25T12:57:32.165Z" }, - { url = "https://files.pythonhosted.org/packages/e1/fe/538a6fd44c515f1c5197a3f078094cbaf2ce9f945df5b44e29d95c864bff/coverage-7.13.2-cp310-cp310-win32.whl", hash = "sha256:1574983178b35b9af4db4a9f7328a18a14a0a0ce76ffaa1c1bacb4cc82089a7c", size = 221465, upload-time = "2026-01-25T12:57:33.511Z" }, - { url = "https://files.pythonhosted.org/packages/5e/09/4b63a024295f326ec1a40ec8def27799300ce8775b1cbf0d33b1790605c4/coverage-7.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:a360a8baeb038928ceb996f5623a4cd508728f8f13e08d4e96ce161702f3dd99", size = 222397, upload-time = "2026-01-25T12:57:34.927Z" }, - { url = "https://files.pythonhosted.org/packages/6c/01/abca50583a8975bb6e1c59eff67ed8e48bb127c07dad5c28d9e96ccc09ec/coverage-7.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:060ebf6f2c51aff5ba38e1f43a2095e087389b1c69d559fde6049a4b0001320e", size = 218971, upload-time = "2026-01-25T12:57:36.953Z" }, - { url = "https://files.pythonhosted.org/packages/eb/0e/b6489f344d99cd1e5b4d5e1be52dfd3f8a3dc5112aa6c33948da8cabad4e/coverage-7.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1ea8ca9db5e7469cd364552985e15911548ea5b69c48a17291f0cac70484b2e", size = 219473, upload-time = "2026-01-25T12:57:38.934Z" }, - { url = "https://files.pythonhosted.org/packages/17/11/db2f414915a8e4ec53f60b17956c27f21fb68fcf20f8a455ce7c2ccec638/coverage-7.13.2-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b780090d15fd58f07cf2011943e25a5f0c1c894384b13a216b6c86c8a8a7c508", size = 249896, upload-time = "2026-01-25T12:57:40.365Z" }, - { url = "https://files.pythonhosted.org/packages/80/06/0823fe93913663c017e508e8810c998c8ebd3ec2a5a85d2c3754297bdede/coverage-7.13.2-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:88a800258d83acb803c38175b4495d293656d5fac48659c953c18e5f539a274b", size = 251810, upload-time = "2026-01-25T12:57:42.045Z" }, - { url = "https://files.pythonhosted.org/packages/61/dc/b151c3cc41b28cdf7f0166c5fa1271cbc305a8ec0124cce4b04f74791a18/coverage-7.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6326e18e9a553e674d948536a04a80d850a5eeefe2aae2e6d7cf05d54046c01b", size = 253920, upload-time = "2026-01-25T12:57:44.026Z" }, - { url = "https://files.pythonhosted.org/packages/2d/35/e83de0556e54a4729a2b94ea816f74ce08732e81945024adee46851c2264/coverage-7.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:59562de3f797979e1ff07c587e2ac36ba60ca59d16c211eceaa579c266c5022f", size = 250025, upload-time = "2026-01-25T12:57:45.624Z" }, - { url = "https://files.pythonhosted.org/packages/39/67/af2eb9c3926ce3ea0d58a0d2516fcbdacf7a9fc9559fe63076beaf3f2596/coverage-7.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:27ba1ed6f66b0e2d61bfa78874dffd4f8c3a12f8e2b5410e515ab345ba7bc9c3", size = 251612, upload-time = "2026-01-25T12:57:47.713Z" }, - { url = "https://files.pythonhosted.org/packages/26/62/5be2e25f3d6c711d23b71296f8b44c978d4c8b4e5b26871abfc164297502/coverage-7.13.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8be48da4d47cc68754ce643ea50b3234557cbefe47c2f120495e7bd0a2756f2b", size = 249670, upload-time = "2026-01-25T12:57:49.378Z" }, - { url = "https://files.pythonhosted.org/packages/b3/51/400d1b09a8344199f9b6a6fc1868005d766b7ea95e7882e494fa862ca69c/coverage-7.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2a47a4223d3361b91176aedd9d4e05844ca67d7188456227b6bf5e436630c9a1", size = 249395, upload-time = "2026-01-25T12:57:50.86Z" }, - { url = "https://files.pythonhosted.org/packages/e0/36/f02234bc6e5230e2f0a63fd125d0a2093c73ef20fdf681c7af62a140e4e7/coverage-7.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c6f141b468740197d6bd38f2b26ade124363228cc3f9858bd9924ab059e00059", size = 250298, upload-time = "2026-01-25T12:57:52.287Z" }, - { url = "https://files.pythonhosted.org/packages/b0/06/713110d3dd3151b93611c9cbfc65c15b4156b44f927fced49ac0b20b32a4/coverage-7.13.2-cp311-cp311-win32.whl", hash = "sha256:89567798404af067604246e01a49ef907d112edf2b75ef814b1364d5ce267031", size = 221485, upload-time = "2026-01-25T12:57:53.876Z" }, - { url = "https://files.pythonhosted.org/packages/16/0c/3ae6255fa1ebcb7dec19c9a59e85ef5f34566d1265c70af5b2fc981da834/coverage-7.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:21dd57941804ae2ac7e921771a5e21bbf9aabec317a041d164853ad0a96ce31e", size = 222421, upload-time = "2026-01-25T12:57:55.433Z" }, - { url = "https://files.pythonhosted.org/packages/b5/37/fabc3179af4d61d89ea47bd04333fec735cd5e8b59baad44fed9fc4170d7/coverage-7.13.2-cp311-cp311-win_arm64.whl", hash = "sha256:10758e0586c134a0bafa28f2d37dd2cdb5e4a90de25c0fc0c77dabbad46eca28", size = 221088, upload-time = "2026-01-25T12:57:57.41Z" }, - { url = "https://files.pythonhosted.org/packages/46/39/e92a35f7800222d3f7b2cbb7bbc3b65672ae8d501cb31801b2d2bd7acdf1/coverage-7.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f106b2af193f965d0d3234f3f83fc35278c7fb935dfbde56ae2da3dd2c03b84d", size = 219142, upload-time = "2026-01-25T12:58:00.448Z" }, - { url = "https://files.pythonhosted.org/packages/45/7a/8bf9e9309c4c996e65c52a7c5a112707ecdd9fbaf49e10b5a705a402bbb4/coverage-7.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78f45d21dc4d5d6bd29323f0320089ef7eae16e4bef712dff79d184fa7330af3", size = 219503, upload-time = "2026-01-25T12:58:02.451Z" }, - { url = "https://files.pythonhosted.org/packages/87/93/17661e06b7b37580923f3f12406ac91d78aeed293fb6da0b69cc7957582f/coverage-7.13.2-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fae91dfecd816444c74531a9c3d6ded17a504767e97aa674d44f638107265b99", size = 251006, upload-time = "2026-01-25T12:58:04.059Z" }, - { url = "https://files.pythonhosted.org/packages/12/f0/f9e59fb8c310171497f379e25db060abef9fa605e09d63157eebec102676/coverage-7.13.2-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:264657171406c114787b441484de620e03d8f7202f113d62fcd3d9688baa3e6f", size = 253750, upload-time = "2026-01-25T12:58:05.574Z" }, - { url = "https://files.pythonhosted.org/packages/e5/b1/1935e31add2232663cf7edd8269548b122a7d100047ff93475dbaaae673e/coverage-7.13.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae47d8dcd3ded0155afbb59c62bd8ab07ea0fd4902e1c40567439e6db9dcaf2f", size = 254862, upload-time = "2026-01-25T12:58:07.647Z" }, - { url = "https://files.pythonhosted.org/packages/af/59/b5e97071ec13df5f45da2b3391b6cdbec78ba20757bc92580a5b3d5fa53c/coverage-7.13.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8a0b33e9fd838220b007ce8f299114d406c1e8edb21336af4c97a26ecfd185aa", size = 251420, upload-time = "2026-01-25T12:58:09.309Z" }, - { url = "https://files.pythonhosted.org/packages/3f/75/9495932f87469d013dc515fb0ce1aac5fa97766f38f6b1a1deb1ee7b7f3a/coverage-7.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b3becbea7f3ce9a2d4d430f223ec15888e4deb31395840a79e916368d6004cce", size = 252786, upload-time = "2026-01-25T12:58:10.909Z" }, - { url = "https://files.pythonhosted.org/packages/6a/59/af550721f0eb62f46f7b8cb7e6f1860592189267b1c411a4e3a057caacee/coverage-7.13.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f819c727a6e6eeb8711e4ce63d78c620f69630a2e9d53bc95ca5379f57b6ba94", size = 250928, upload-time = "2026-01-25T12:58:12.449Z" }, - { url = "https://files.pythonhosted.org/packages/9b/b1/21b4445709aae500be4ab43bbcfb4e53dc0811c3396dcb11bf9f23fd0226/coverage-7.13.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:4f7b71757a3ab19f7ba286e04c181004c1d61be921795ee8ba6970fd0ec91da5", size = 250496, upload-time = "2026-01-25T12:58:14.047Z" }, - { url = "https://files.pythonhosted.org/packages/ba/b1/0f5d89dfe0392990e4f3980adbde3eb34885bc1effb2dc369e0bf385e389/coverage-7.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b7fc50d2afd2e6b4f6f2f403b70103d280a8e0cb35320cbbe6debcda02a1030b", size = 252373, upload-time = "2026-01-25T12:58:15.976Z" }, - { url = "https://files.pythonhosted.org/packages/01/c9/0cf1a6a57a9968cc049a6b896693faa523c638a5314b1fc374eb2b2ac904/coverage-7.13.2-cp312-cp312-win32.whl", hash = "sha256:292250282cf9bcf206b543d7608bda17ca6fc151f4cbae949fc7e115112fbd41", size = 221696, upload-time = "2026-01-25T12:58:17.517Z" }, - { url = "https://files.pythonhosted.org/packages/4d/05/d7540bf983f09d32803911afed135524570f8c47bb394bf6206c1dc3a786/coverage-7.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:eeea10169fac01549a7921d27a3e517194ae254b542102267bef7a93ed38c40e", size = 222504, upload-time = "2026-01-25T12:58:19.115Z" }, - { url = "https://files.pythonhosted.org/packages/15/8b/1a9f037a736ced0a12aacf6330cdaad5008081142a7070bc58b0f7930cbc/coverage-7.13.2-cp312-cp312-win_arm64.whl", hash = "sha256:2a5b567f0b635b592c917f96b9a9cb3dbd4c320d03f4bf94e9084e494f2e8894", size = 221120, upload-time = "2026-01-25T12:58:21.334Z" }, - { url = "https://files.pythonhosted.org/packages/a7/f0/3d3eac7568ab6096ff23791a526b0048a1ff3f49d0e236b2af6fb6558e88/coverage-7.13.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ed75de7d1217cf3b99365d110975f83af0528c849ef5180a12fd91b5064df9d6", size = 219168, upload-time = "2026-01-25T12:58:23.376Z" }, - { url = "https://files.pythonhosted.org/packages/a3/a6/f8b5cfeddbab95fdef4dcd682d82e5dcff7a112ced57a959f89537ee9995/coverage-7.13.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97e596de8fa9bada4d88fde64a3f4d37f1b6131e4faa32bad7808abc79887ddc", size = 219537, upload-time = "2026-01-25T12:58:24.932Z" }, - { url = "https://files.pythonhosted.org/packages/7b/e6/8d8e6e0c516c838229d1e41cadcec91745f4b1031d4db17ce0043a0423b4/coverage-7.13.2-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:68c86173562ed4413345410c9480a8d64864ac5e54a5cda236748031e094229f", size = 250528, upload-time = "2026-01-25T12:58:26.567Z" }, - { url = "https://files.pythonhosted.org/packages/8e/78/befa6640f74092b86961f957f26504c8fba3d7da57cc2ab7407391870495/coverage-7.13.2-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7be4d613638d678b2b3773b8f687537b284d7074695a43fe2fbbfc0e31ceaed1", size = 253132, upload-time = "2026-01-25T12:58:28.251Z" }, - { url = "https://files.pythonhosted.org/packages/9d/10/1630db1edd8ce675124a2ee0f7becc603d2bb7b345c2387b4b95c6907094/coverage-7.13.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7f63ce526a96acd0e16c4af8b50b64334239550402fb1607ce6a584a6d62ce9", size = 254374, upload-time = "2026-01-25T12:58:30.294Z" }, - { url = "https://files.pythonhosted.org/packages/ed/1d/0d9381647b1e8e6d310ac4140be9c428a0277330991e0c35bdd751e338a4/coverage-7.13.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:406821f37f864f968e29ac14c3fccae0fec9fdeba48327f0341decf4daf92d7c", size = 250762, upload-time = "2026-01-25T12:58:32.036Z" }, - { url = "https://files.pythonhosted.org/packages/43/e4/5636dfc9a7c871ee8776af83ee33b4c26bc508ad6cee1e89b6419a366582/coverage-7.13.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ee68e5a4e3e5443623406b905db447dceddffee0dceb39f4e0cd9ec2a35004b5", size = 252502, upload-time = "2026-01-25T12:58:33.961Z" }, - { url = "https://files.pythonhosted.org/packages/02/2a/7ff2884d79d420cbb2d12fed6fff727b6d0ef27253140d3cdbbd03187ee0/coverage-7.13.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2ee0e58cca0c17dd9c6c1cdde02bb705c7b3fbfa5f3b0b5afeda20d4ebff8ef4", size = 250463, upload-time = "2026-01-25T12:58:35.529Z" }, - { url = "https://files.pythonhosted.org/packages/91/c0/ba51087db645b6c7261570400fc62c89a16278763f36ba618dc8657a187b/coverage-7.13.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:6e5bbb5018bf76a56aabdb64246b5288d5ae1b7d0dd4d0534fe86df2c2992d1c", size = 250288, upload-time = "2026-01-25T12:58:37.226Z" }, - { url = "https://files.pythonhosted.org/packages/03/07/44e6f428551c4d9faf63ebcefe49b30e5c89d1be96f6a3abd86a52da9d15/coverage-7.13.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a55516c68ef3e08e134e818d5e308ffa6b1337cc8b092b69b24287bf07d38e31", size = 252063, upload-time = "2026-01-25T12:58:38.821Z" }, - { url = "https://files.pythonhosted.org/packages/c2/67/35b730ad7e1859dd57e834d1bc06080d22d2f87457d53f692fce3f24a5a9/coverage-7.13.2-cp313-cp313-win32.whl", hash = "sha256:5b20211c47a8abf4abc3319d8ce2464864fa9f30c5fcaf958a3eed92f4f1fef8", size = 221716, upload-time = "2026-01-25T12:58:40.484Z" }, - { url = "https://files.pythonhosted.org/packages/0d/82/e5fcf5a97c72f45fc14829237a6550bf49d0ab882ac90e04b12a69db76b4/coverage-7.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:14f500232e521201cf031549fb1ebdfc0a40f401cf519157f76c397e586c3beb", size = 222522, upload-time = "2026-01-25T12:58:43.247Z" }, - { url = "https://files.pythonhosted.org/packages/b1/f1/25d7b2f946d239dd2d6644ca2cc060d24f97551e2af13b6c24c722ae5f97/coverage-7.13.2-cp313-cp313-win_arm64.whl", hash = "sha256:9779310cb5a9778a60c899f075a8514c89fa6d10131445c2207fc893e0b14557", size = 221145, upload-time = "2026-01-25T12:58:45Z" }, - { url = "https://files.pythonhosted.org/packages/9e/f7/080376c029c8f76fadfe43911d0daffa0cbdc9f9418a0eead70c56fb7f4b/coverage-7.13.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e64fa5a1e41ce5df6b547cbc3d3699381c9e2c2c369c67837e716ed0f549d48e", size = 219861, upload-time = "2026-01-25T12:58:46.586Z" }, - { url = "https://files.pythonhosted.org/packages/42/11/0b5e315af5ab35f4c4a70e64d3314e4eec25eefc6dec13be3a7d5ffe8ac5/coverage-7.13.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b01899e82a04085b6561eb233fd688474f57455e8ad35cd82286463ba06332b7", size = 220207, upload-time = "2026-01-25T12:58:48.277Z" }, - { url = "https://files.pythonhosted.org/packages/b2/0c/0874d0318fb1062117acbef06a09cf8b63f3060c22265adaad24b36306b7/coverage-7.13.2-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:838943bea48be0e2768b0cf7819544cdedc1bbb2f28427eabb6eb8c9eb2285d3", size = 261504, upload-time = "2026-01-25T12:58:49.904Z" }, - { url = "https://files.pythonhosted.org/packages/83/5e/1cd72c22ecb30751e43a72f40ba50fcef1b7e93e3ea823bd9feda8e51f9a/coverage-7.13.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:93d1d25ec2b27e90bcfef7012992d1f5121b51161b8bffcda756a816cf13c2c3", size = 263582, upload-time = "2026-01-25T12:58:51.582Z" }, - { url = "https://files.pythonhosted.org/packages/9b/da/8acf356707c7a42df4d0657020308e23e5a07397e81492640c186268497c/coverage-7.13.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93b57142f9621b0d12349c43fc7741fe578e4bc914c1e5a54142856cfc0bf421", size = 266008, upload-time = "2026-01-25T12:58:53.234Z" }, - { url = "https://files.pythonhosted.org/packages/41/41/ea1730af99960309423c6ea8d6a4f1fa5564b2d97bd1d29dda4b42611f04/coverage-7.13.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f06799ae1bdfff7ccb8665d75f8291c69110ba9585253de254688aa8a1ccc6c5", size = 260762, upload-time = "2026-01-25T12:58:55.372Z" }, - { url = "https://files.pythonhosted.org/packages/22/fa/02884d2080ba71db64fdc127b311db60e01fe6ba797d9c8363725e39f4d5/coverage-7.13.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:7f9405ab4f81d490811b1d91c7a20361135a2df4c170e7f0b747a794da5b7f23", size = 263571, upload-time = "2026-01-25T12:58:57.52Z" }, - { url = "https://files.pythonhosted.org/packages/d2/6b/4083aaaeba9b3112f55ac57c2ce7001dc4d8fa3fcc228a39f09cc84ede27/coverage-7.13.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f9ab1d5b86f8fbc97a5b3cd6280a3fd85fef3b028689d8a2c00918f0d82c728c", size = 261200, upload-time = "2026-01-25T12:58:59.255Z" }, - { url = "https://files.pythonhosted.org/packages/e9/d2/aea92fa36d61955e8c416ede9cf9bf142aa196f3aea214bb67f85235a050/coverage-7.13.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:f674f59712d67e841525b99e5e2b595250e39b529c3bda14764e4f625a3fa01f", size = 260095, upload-time = "2026-01-25T12:59:01.066Z" }, - { url = "https://files.pythonhosted.org/packages/0d/ae/04ffe96a80f107ea21b22b2367175c621da920063260a1c22f9452fd7866/coverage-7.13.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c6cadac7b8ace1ba9144feb1ae3cb787a6065ba6d23ffc59a934b16406c26573", size = 262284, upload-time = "2026-01-25T12:59:02.802Z" }, - { url = "https://files.pythonhosted.org/packages/1c/7a/6f354dcd7dfc41297791d6fb4e0d618acb55810bde2c1fd14b3939e05c2b/coverage-7.13.2-cp313-cp313t-win32.whl", hash = "sha256:14ae4146465f8e6e6253eba0cccd57423e598a4cb925958b240c805300918343", size = 222389, upload-time = "2026-01-25T12:59:04.563Z" }, - { url = "https://files.pythonhosted.org/packages/8d/d5/080ad292a4a3d3daf411574be0a1f56d6dee2c4fdf6b005342be9fac807f/coverage-7.13.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9074896edd705a05769e3de0eac0a8388484b503b68863dd06d5e473f874fd47", size = 223450, upload-time = "2026-01-25T12:59:06.677Z" }, - { url = "https://files.pythonhosted.org/packages/88/96/df576fbacc522e9fb8d1c4b7a7fc62eb734be56e2cba1d88d2eabe08ea3f/coverage-7.13.2-cp313-cp313t-win_arm64.whl", hash = "sha256:69e526e14f3f854eda573d3cf40cffd29a1a91c684743d904c33dbdcd0e0f3e7", size = 221707, upload-time = "2026-01-25T12:59:08.363Z" }, - { url = "https://files.pythonhosted.org/packages/55/53/1da9e51a0775634b04fcc11eb25c002fc58ee4f92ce2e8512f94ac5fc5bf/coverage-7.13.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:387a825f43d680e7310e6f325b2167dd093bc8ffd933b83e9aa0983cf6e0a2ef", size = 219213, upload-time = "2026-01-25T12:59:11.909Z" }, - { url = "https://files.pythonhosted.org/packages/46/35/b3caac3ebbd10230fea5a33012b27d19e999a17c9285c4228b4b2e35b7da/coverage-7.13.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f0d7fea9d8e5d778cd5a9e8fc38308ad688f02040e883cdc13311ef2748cb40f", size = 219549, upload-time = "2026-01-25T12:59:13.638Z" }, - { url = "https://files.pythonhosted.org/packages/76/9c/e1cf7def1bdc72c1907e60703983a588f9558434a2ff94615747bd73c192/coverage-7.13.2-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e080afb413be106c95c4ee96b4fffdc9e2fa56a8bbf90b5c0918e5c4449412f5", size = 250586, upload-time = "2026-01-25T12:59:15.808Z" }, - { url = "https://files.pythonhosted.org/packages/ba/49/f54ec02ed12be66c8d8897270505759e057b0c68564a65c429ccdd1f139e/coverage-7.13.2-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a7fc042ba3c7ce25b8a9f097eb0f32a5ce1ccdb639d9eec114e26def98e1f8a4", size = 253093, upload-time = "2026-01-25T12:59:17.491Z" }, - { url = "https://files.pythonhosted.org/packages/fb/5e/aaf86be3e181d907e23c0f61fccaeb38de8e6f6b47aed92bf57d8fc9c034/coverage-7.13.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0ba505e021557f7f8173ee8cd6b926373d8653e5ff7581ae2efce1b11ef4c27", size = 254446, upload-time = "2026-01-25T12:59:19.752Z" }, - { url = "https://files.pythonhosted.org/packages/28/c8/a5fa01460e2d75b0c853b392080d6829d3ca8b5ab31e158fa0501bc7c708/coverage-7.13.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7de326f80e3451bd5cc7239ab46c73ddb658fe0b7649476bc7413572d36cd548", size = 250615, upload-time = "2026-01-25T12:59:21.928Z" }, - { url = "https://files.pythonhosted.org/packages/86/0b/6d56315a55f7062bb66410732c24879ccb2ec527ab6630246de5fe45a1df/coverage-7.13.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:abaea04f1e7e34841d4a7b343904a3f59481f62f9df39e2cd399d69a187a9660", size = 252452, upload-time = "2026-01-25T12:59:23.592Z" }, - { url = "https://files.pythonhosted.org/packages/30/19/9bc550363ebc6b0ea121977ee44d05ecd1e8bf79018b8444f1028701c563/coverage-7.13.2-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9f93959ee0c604bccd8e0697be21de0887b1f73efcc3aa73a3ec0fd13feace92", size = 250418, upload-time = "2026-01-25T12:59:25.392Z" }, - { url = "https://files.pythonhosted.org/packages/1f/53/580530a31ca2f0cc6f07a8f2ab5460785b02bb11bdf815d4c4d37a4c5169/coverage-7.13.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:13fe81ead04e34e105bf1b3c9f9cdf32ce31736ee5d90a8d2de02b9d3e1bcb82", size = 250231, upload-time = "2026-01-25T12:59:27.888Z" }, - { url = "https://files.pythonhosted.org/packages/e2/42/dd9093f919dc3088cb472893651884bd675e3df3d38a43f9053656dca9a2/coverage-7.13.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d6d16b0f71120e365741bca2cb473ca6fe38930bc5431c5e850ba949f708f892", size = 251888, upload-time = "2026-01-25T12:59:29.636Z" }, - { url = "https://files.pythonhosted.org/packages/fa/a6/0af4053e6e819774626e133c3d6f70fae4d44884bfc4b126cb647baee8d3/coverage-7.13.2-cp314-cp314-win32.whl", hash = "sha256:9b2f4714bb7d99ba3790ee095b3b4ac94767e1347fe424278a0b10acb3ff04fe", size = 221968, upload-time = "2026-01-25T12:59:31.424Z" }, - { url = "https://files.pythonhosted.org/packages/c4/cc/5aff1e1f80d55862442855517bb8ad8ad3a68639441ff6287dde6a58558b/coverage-7.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:e4121a90823a063d717a96e0a0529c727fb31ea889369a0ee3ec00ed99bf6859", size = 222783, upload-time = "2026-01-25T12:59:33.118Z" }, - { url = "https://files.pythonhosted.org/packages/de/20/09abafb24f84b3292cc658728803416c15b79f9ee5e68d25238a895b07d9/coverage-7.13.2-cp314-cp314-win_arm64.whl", hash = "sha256:6873f0271b4a15a33e7590f338d823f6f66f91ed147a03938d7ce26efd04eee6", size = 221348, upload-time = "2026-01-25T12:59:34.939Z" }, - { url = "https://files.pythonhosted.org/packages/b6/60/a3820c7232db63be060e4019017cd3426751c2699dab3c62819cdbcea387/coverage-7.13.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f61d349f5b7cd95c34017f1927ee379bfbe9884300d74e07cf630ccf7a610c1b", size = 219950, upload-time = "2026-01-25T12:59:36.624Z" }, - { url = "https://files.pythonhosted.org/packages/fd/37/e4ef5975fdeb86b1e56db9a82f41b032e3d93a840ebaf4064f39e770d5c5/coverage-7.13.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a43d34ce714f4ca674c0d90beb760eb05aad906f2c47580ccee9da8fe8bfb417", size = 220209, upload-time = "2026-01-25T12:59:38.339Z" }, - { url = "https://files.pythonhosted.org/packages/54/df/d40e091d00c51adca1e251d3b60a8b464112efa3004949e96a74d7c19a64/coverage-7.13.2-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bff1b04cb9d4900ce5c56c4942f047dc7efe57e2608cb7c3c8936e9970ccdbee", size = 261576, upload-time = "2026-01-25T12:59:40.446Z" }, - { url = "https://files.pythonhosted.org/packages/c5/44/5259c4bed54e3392e5c176121af9f71919d96dde853386e7730e705f3520/coverage-7.13.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6ae99e4560963ad8e163e819e5d77d413d331fd00566c1e0856aa252303552c1", size = 263704, upload-time = "2026-01-25T12:59:42.346Z" }, - { url = "https://files.pythonhosted.org/packages/16/bd/ae9f005827abcbe2c70157459ae86053971c9fa14617b63903abbdce26d9/coverage-7.13.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e79a8c7d461820257d9aa43716c4efc55366d7b292e46b5b37165be1d377405d", size = 266109, upload-time = "2026-01-25T12:59:44.073Z" }, - { url = "https://files.pythonhosted.org/packages/a2/c0/8e279c1c0f5b1eaa3ad9b0fb7a5637fc0379ea7d85a781c0fe0bb3cfc2ab/coverage-7.13.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:060ee84f6a769d40c492711911a76811b4befb6fba50abb450371abb720f5bd6", size = 260686, upload-time = "2026-01-25T12:59:45.804Z" }, - { url = "https://files.pythonhosted.org/packages/b2/47/3a8112627e9d863e7cddd72894171c929e94491a597811725befdcd76bce/coverage-7.13.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bca209d001fd03ea2d978f8a4985093240a355c93078aee3f799852c23f561a", size = 263568, upload-time = "2026-01-25T12:59:47.929Z" }, - { url = "https://files.pythonhosted.org/packages/92/bc/7ea367d84afa3120afc3ce6de294fd2dcd33b51e2e7fbe4bbfd200f2cb8c/coverage-7.13.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:6b8092aa38d72f091db61ef83cb66076f18f02da3e1a75039a4f218629600e04", size = 261174, upload-time = "2026-01-25T12:59:49.717Z" }, - { url = "https://files.pythonhosted.org/packages/33/b7/f1092dcecb6637e31cc2db099581ee5c61a17647849bae6b8261a2b78430/coverage-7.13.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:4a3158dc2dcce5200d91ec28cd315c999eebff355437d2765840555d765a6e5f", size = 260017, upload-time = "2026-01-25T12:59:51.463Z" }, - { url = "https://files.pythonhosted.org/packages/2b/cd/f3d07d4b95fbe1a2ef0958c15da614f7e4f557720132de34d2dc3aa7e911/coverage-7.13.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3973f353b2d70bd9796cc12f532a05945232ccae966456c8ed7034cb96bbfd6f", size = 262337, upload-time = "2026-01-25T12:59:53.407Z" }, - { url = "https://files.pythonhosted.org/packages/e0/db/b0d5b2873a07cb1e06a55d998697c0a5a540dcefbf353774c99eb3874513/coverage-7.13.2-cp314-cp314t-win32.whl", hash = "sha256:79f6506a678a59d4ded048dc72f1859ebede8ec2b9a2d509ebe161f01c2879d3", size = 222749, upload-time = "2026-01-25T12:59:56.316Z" }, - { url = "https://files.pythonhosted.org/packages/e5/2f/838a5394c082ac57d85f57f6aba53093b30d9089781df72412126505716f/coverage-7.13.2-cp314-cp314t-win_amd64.whl", hash = "sha256:196bfeabdccc5a020a57d5a368c681e3a6ceb0447d153aeccc1ab4d70a5032ba", size = 223857, upload-time = "2026-01-25T12:59:58.201Z" }, - { url = "https://files.pythonhosted.org/packages/44/d4/b608243e76ead3a4298824b50922b89ef793e50069ce30316a65c1b4d7ef/coverage-7.13.2-cp314-cp314t-win_arm64.whl", hash = "sha256:69269ab58783e090bfbf5b916ab3d188126e22d6070bbfc93098fdd474ef937c", size = 221881, upload-time = "2026-01-25T13:00:00.449Z" }, - { url = "https://files.pythonhosted.org/packages/d2/db/d291e30fdf7ea617a335531e72294e0c723356d7fdde8fba00610a76bda9/coverage-7.13.2-py3-none-any.whl", hash = "sha256:40ce1ea1e25125556d8e76bd0b61500839a07944cc287ac21d5626f3e620cad5", size = 210943, upload-time = "2026-01-25T13:00:02.388Z" }, + { url = "https://files.pythonhosted.org/packages/69/33/e8c48488c29a73fd089f9d71f9653c1be7478f2ad6b5bc870db11a55d23d/coverage-7.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5", size = 219255, upload-time = "2026-03-17T10:29:51.081Z" }, + { url = "https://files.pythonhosted.org/packages/da/bd/b0ebe9f677d7f4b74a3e115eec7ddd4bcf892074963a00d91e8b164a6386/coverage-7.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf", size = 219772, upload-time = "2026-03-17T10:29:52.867Z" }, + { url = "https://files.pythonhosted.org/packages/48/cc/5cb9502f4e01972f54eedd48218bb203fe81e294be606a2bc93970208013/coverage-7.13.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8", size = 246532, upload-time = "2026-03-17T10:29:54.688Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d8/3217636d86c7e7b12e126e4f30ef1581047da73140614523af7495ed5f2d/coverage-7.13.5-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4", size = 248333, upload-time = "2026-03-17T10:29:56.221Z" }, + { url = "https://files.pythonhosted.org/packages/2b/30/2002ac6729ba2d4357438e2ed3c447ad8562866c8c63fc16f6dfc33afe56/coverage-7.13.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d", size = 250211, upload-time = "2026-03-17T10:29:57.938Z" }, + { url = "https://files.pythonhosted.org/packages/6c/85/552496626d6b9359eb0e2f86f920037c9cbfba09b24d914c6e1528155f7d/coverage-7.13.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930", size = 252125, upload-time = "2026-03-17T10:29:59.388Z" }, + { url = "https://files.pythonhosted.org/packages/44/21/40256eabdcbccdb6acf6b381b3016a154399a75fe39d406f790ae84d1f3c/coverage-7.13.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d", size = 247219, upload-time = "2026-03-17T10:30:01.199Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/96e2a6c3f21a0ea77d7830b254a1542d0328acc8d7bdf6a284ba7e529f77/coverage-7.13.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40", size = 248248, upload-time = "2026-03-17T10:30:03.317Z" }, + { url = "https://files.pythonhosted.org/packages/da/ba/8477f549e554827da390ec659f3c38e4b6d95470f4daafc2d8ff94eaa9c2/coverage-7.13.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878", size = 246254, upload-time = "2026-03-17T10:30:04.832Z" }, + { url = "https://files.pythonhosted.org/packages/55/59/bc22aef0e6aa179d5b1b001e8b3654785e9adf27ef24c93dc4228ebd5d68/coverage-7.13.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400", size = 250067, upload-time = "2026-03-17T10:30:06.535Z" }, + { url = "https://files.pythonhosted.org/packages/de/1b/c6a023a160806a5137dca53468fd97530d6acad24a22003b1578a9c2e429/coverage-7.13.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0", size = 246521, upload-time = "2026-03-17T10:30:08.486Z" }, + { url = "https://files.pythonhosted.org/packages/2d/3f/3532c85a55aa2f899fa17c186f831cfa1aa434d88ff792a709636f64130e/coverage-7.13.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0", size = 247126, upload-time = "2026-03-17T10:30:09.966Z" }, + { url = "https://files.pythonhosted.org/packages/aa/2e/b9d56af4a24ef45dfbcda88e06870cb7d57b2b0bfa3a888d79b4c8debd76/coverage-7.13.5-cp310-cp310-win32.whl", hash = "sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58", size = 221860, upload-time = "2026-03-17T10:30:11.393Z" }, + { url = "https://files.pythonhosted.org/packages/9f/cc/d938417e7a4d7f0433ad4edee8bb2acdc60dc7ac5af19e2a07a048ecbee3/coverage-7.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e", size = 222788, upload-time = "2026-03-17T10:30:12.886Z" }, + { url = "https://files.pythonhosted.org/packages/4b/37/d24c8f8220ff07b839b2c043ea4903a33b0f455abe673ae3c03bbdb7f212/coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d", size = 219381, upload-time = "2026-03-17T10:30:14.68Z" }, + { url = "https://files.pythonhosted.org/packages/35/8b/cd129b0ca4afe886a6ce9d183c44d8301acbd4ef248622e7c49a23145605/coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587", size = 219880, upload-time = "2026-03-17T10:30:16.231Z" }, + { url = "https://files.pythonhosted.org/packages/55/2f/e0e5b237bffdb5d6c530ce87cc1d413a5b7d7dfd60fb067ad6d254c35c76/coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642", size = 250303, upload-time = "2026-03-17T10:30:17.748Z" }, + { url = "https://files.pythonhosted.org/packages/92/be/b1afb692be85b947f3401375851484496134c5554e67e822c35f28bf2fbc/coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b", size = 252218, upload-time = "2026-03-17T10:30:19.804Z" }, + { url = "https://files.pythonhosted.org/packages/da/69/2f47bb6fa1b8d1e3e5d0c4be8ccb4313c63d742476a619418f85740d597b/coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686", size = 254326, upload-time = "2026-03-17T10:30:21.321Z" }, + { url = "https://files.pythonhosted.org/packages/d5/d0/79db81da58965bd29dabc8f4ad2a2af70611a57cba9d1ec006f072f30a54/coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743", size = 256267, upload-time = "2026-03-17T10:30:23.094Z" }, + { url = "https://files.pythonhosted.org/packages/e5/32/d0d7cc8168f91ddab44c0ce4806b969df5f5fdfdbb568eaca2dbc2a04936/coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75", size = 250430, upload-time = "2026-03-17T10:30:25.311Z" }, + { url = "https://files.pythonhosted.org/packages/4d/06/a055311d891ddbe231cd69fdd20ea4be6e3603ffebddf8704b8ca8e10a3c/coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209", size = 252017, upload-time = "2026-03-17T10:30:27.284Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f6/d0fd2d21e29a657b5f77a2fe7082e1568158340dceb941954f776dce1b7b/coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a", size = 250080, upload-time = "2026-03-17T10:30:29.481Z" }, + { url = "https://files.pythonhosted.org/packages/4e/ab/0d7fb2efc2e9a5eb7ddcc6e722f834a69b454b7e6e5888c3a8567ecffb31/coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e", size = 253843, upload-time = "2026-03-17T10:30:31.301Z" }, + { url = "https://files.pythonhosted.org/packages/ba/6f/7467b917bbf5408610178f62a49c0ed4377bb16c1657f689cc61470da8ce/coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd", size = 249802, upload-time = "2026-03-17T10:30:33.358Z" }, + { url = "https://files.pythonhosted.org/packages/75/2c/1172fb689df92135f5bfbbd69fc83017a76d24ea2e2f3a1154007e2fb9f8/coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8", size = 250707, upload-time = "2026-03-17T10:30:35.2Z" }, + { url = "https://files.pythonhosted.org/packages/67/21/9ac389377380a07884e3b48ba7a620fcd9dbfaf1d40565facdc6b36ec9ef/coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf", size = 221880, upload-time = "2026-03-17T10:30:36.775Z" }, + { url = "https://files.pythonhosted.org/packages/af/7f/4cd8a92531253f9d7c1bbecd9fa1b472907fb54446ca768c59b531248dc5/coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9", size = 222816, upload-time = "2026-03-17T10:30:38.891Z" }, + { url = "https://files.pythonhosted.org/packages/12/a6/1d3f6155fb0010ca68eba7fe48ca6c9da7385058b77a95848710ecf189b1/coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028", size = 221483, upload-time = "2026-03-17T10:30:40.463Z" }, + { url = "https://files.pythonhosted.org/packages/a0/c3/a396306ba7db865bf96fc1fb3b7fd29bcbf3d829df642e77b13555163cd6/coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01", size = 219554, upload-time = "2026-03-17T10:30:42.208Z" }, + { url = "https://files.pythonhosted.org/packages/a6/16/a68a19e5384e93f811dccc51034b1fd0b865841c390e3c931dcc4699e035/coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422", size = 219908, upload-time = "2026-03-17T10:30:43.906Z" }, + { url = "https://files.pythonhosted.org/packages/29/72/20b917c6793af3a5ceb7fb9c50033f3ec7865f2911a1416b34a7cfa0813b/coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f", size = 251419, upload-time = "2026-03-17T10:30:45.545Z" }, + { url = "https://files.pythonhosted.org/packages/8c/49/cd14b789536ac6a4778c453c6a2338bc0a2fb60c5a5a41b4008328b9acc1/coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5", size = 254159, upload-time = "2026-03-17T10:30:47.204Z" }, + { url = "https://files.pythonhosted.org/packages/9d/00/7b0edcfe64e2ed4c0340dac14a52ad0f4c9bd0b8b5e531af7d55b703db7c/coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376", size = 255270, upload-time = "2026-03-17T10:30:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/93/89/7ffc4ba0f5d0a55c1e84ea7cee39c9fc06af7b170513d83fbf3bbefce280/coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256", size = 257538, upload-time = "2026-03-17T10:30:50.77Z" }, + { url = "https://files.pythonhosted.org/packages/81/bd/73ddf85f93f7e6fa83e77ccecb6162d9415c79007b4bc124008a4995e4a7/coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c", size = 251821, upload-time = "2026-03-17T10:30:52.5Z" }, + { url = "https://files.pythonhosted.org/packages/a0/81/278aff4e8dec4926a0bcb9486320752811f543a3ce5b602cc7a29978d073/coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5", size = 253191, upload-time = "2026-03-17T10:30:54.543Z" }, + { url = "https://files.pythonhosted.org/packages/70/ee/fe1621488e2e0a58d7e94c4800f0d96f79671553488d401a612bebae324b/coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09", size = 251337, upload-time = "2026-03-17T10:30:56.663Z" }, + { url = "https://files.pythonhosted.org/packages/37/a6/f79fb37aa104b562207cc23cb5711ab6793608e246cae1e93f26b2236ed9/coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9", size = 255404, upload-time = "2026-03-17T10:30:58.427Z" }, + { url = "https://files.pythonhosted.org/packages/75/f0/ed15262a58ec81ce457ceb717b7f78752a1713556b19081b76e90896e8d4/coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf", size = 250903, upload-time = "2026-03-17T10:31:00.093Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e9/9129958f20e7e9d4d56d51d42ccf708d15cac355ff4ac6e736e97a9393d2/coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c", size = 252780, upload-time = "2026-03-17T10:31:01.916Z" }, + { url = "https://files.pythonhosted.org/packages/a4/d7/0ad9b15812d81272db94379fe4c6df8fd17781cc7671fdfa30c76ba5ff7b/coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf", size = 222093, upload-time = "2026-03-17T10:31:03.642Z" }, + { url = "https://files.pythonhosted.org/packages/29/3d/821a9a5799fac2556bcf0bd37a70d1d11fa9e49784b6d22e92e8b2f85f18/coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810", size = 222900, upload-time = "2026-03-17T10:31:05.651Z" }, + { url = "https://files.pythonhosted.org/packages/d4/fa/2238c2ad08e35cf4f020ea721f717e09ec3152aea75d191a7faf3ef009a8/coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de", size = 221515, upload-time = "2026-03-17T10:31:07.293Z" }, + { url = "https://files.pythonhosted.org/packages/74/8c/74fedc9663dcf168b0a059d4ea756ecae4da77a489048f94b5f512a8d0b3/coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1", size = 219576, upload-time = "2026-03-17T10:31:09.045Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c9/44fb661c55062f0818a6ffd2685c67aa30816200d5f2817543717d4b92eb/coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3", size = 219942, upload-time = "2026-03-17T10:31:10.708Z" }, + { url = "https://files.pythonhosted.org/packages/5f/13/93419671cee82b780bab7ea96b67c8ef448f5f295f36bf5031154ec9a790/coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26", size = 250935, upload-time = "2026-03-17T10:31:12.392Z" }, + { url = "https://files.pythonhosted.org/packages/ac/68/1666e3a4462f8202d836920114fa7a5ee9275d1fa45366d336c551a162dd/coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3", size = 253541, upload-time = "2026-03-17T10:31:14.247Z" }, + { url = "https://files.pythonhosted.org/packages/4e/5e/3ee3b835647be646dcf3c65a7c6c18f87c27326a858f72ab22c12730773d/coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b", size = 254780, upload-time = "2026-03-17T10:31:16.193Z" }, + { url = "https://files.pythonhosted.org/packages/44/b3/cb5bd1a04cfcc49ede6cd8409d80bee17661167686741e041abc7ee1b9a9/coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a", size = 256912, upload-time = "2026-03-17T10:31:17.89Z" }, + { url = "https://files.pythonhosted.org/packages/1b/66/c1dceb7b9714473800b075f5c8a84f4588f887a90eb8645282031676e242/coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969", size = 251165, upload-time = "2026-03-17T10:31:19.605Z" }, + { url = "https://files.pythonhosted.org/packages/b7/62/5502b73b97aa2e53ea22a39cf8649ff44827bef76d90bf638777daa27a9d/coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161", size = 252908, upload-time = "2026-03-17T10:31:21.312Z" }, + { url = "https://files.pythonhosted.org/packages/7d/37/7792c2d69854397ca77a55c4646e5897c467928b0e27f2d235d83b5d08c6/coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15", size = 250873, upload-time = "2026-03-17T10:31:23.565Z" }, + { url = "https://files.pythonhosted.org/packages/a3/23/bc866fb6163be52a8a9e5d708ba0d3b1283c12158cefca0a8bbb6e247a43/coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1", size = 255030, upload-time = "2026-03-17T10:31:25.58Z" }, + { url = "https://files.pythonhosted.org/packages/7d/8b/ef67e1c222ef49860701d346b8bbb70881bef283bd5f6cbba68a39a086c7/coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6", size = 250694, upload-time = "2026-03-17T10:31:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/46/0d/866d1f74f0acddbb906db212e096dee77a8e2158ca5e6bb44729f9d93298/coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17", size = 252469, upload-time = "2026-03-17T10:31:29.472Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f5/be742fec31118f02ce42b21c6af187ad6a344fed546b56ca60caacc6a9a0/coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85", size = 222112, upload-time = "2026-03-17T10:31:31.526Z" }, + { url = "https://files.pythonhosted.org/packages/66/40/7732d648ab9d069a46e686043241f01206348e2bbf128daea85be4d6414b/coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b", size = 222923, upload-time = "2026-03-17T10:31:33.633Z" }, + { url = "https://files.pythonhosted.org/packages/48/af/fea819c12a095781f6ccd504890aaddaf88b8fab263c4940e82c7b770124/coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664", size = 221540, upload-time = "2026-03-17T10:31:35.445Z" }, + { url = "https://files.pythonhosted.org/packages/23/d2/17879af479df7fbbd44bd528a31692a48f6b25055d16482fdf5cdb633805/coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d", size = 220262, upload-time = "2026-03-17T10:31:37.184Z" }, + { url = "https://files.pythonhosted.org/packages/5b/4c/d20e554f988c8f91d6a02c5118f9abbbf73a8768a3048cb4962230d5743f/coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0", size = 220617, upload-time = "2026-03-17T10:31:39.245Z" }, + { url = "https://files.pythonhosted.org/packages/29/9c/f9f5277b95184f764b24e7231e166dfdb5780a46d408a2ac665969416d61/coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806", size = 261912, upload-time = "2026-03-17T10:31:41.324Z" }, + { url = "https://files.pythonhosted.org/packages/d5/f6/7f1ab39393eeb50cfe4747ae8ef0e4fc564b989225aa1152e13a180d74f8/coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3", size = 263987, upload-time = "2026-03-17T10:31:43.724Z" }, + { url = "https://files.pythonhosted.org/packages/a0/d7/62c084fb489ed9c6fbdf57e006752e7c516ea46fd690e5ed8b8617c7d52e/coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9", size = 266416, upload-time = "2026-03-17T10:31:45.769Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f6/df63d8660e1a0bff6125947afda112a0502736f470d62ca68b288ea762d8/coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd", size = 267558, upload-time = "2026-03-17T10:31:48.293Z" }, + { url = "https://files.pythonhosted.org/packages/5b/02/353ca81d36779bd108f6d384425f7139ac3c58c750dcfaafe5d0bee6436b/coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606", size = 261163, upload-time = "2026-03-17T10:31:50.125Z" }, + { url = "https://files.pythonhosted.org/packages/2c/16/2e79106d5749bcaf3aee6d309123548e3276517cd7851faa8da213bc61bf/coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e", size = 263981, upload-time = "2026-03-17T10:31:51.961Z" }, + { url = "https://files.pythonhosted.org/packages/29/c7/c29e0c59ffa6942030ae6f50b88ae49988e7e8da06de7ecdbf49c6d4feae/coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0", size = 261604, upload-time = "2026-03-17T10:31:53.872Z" }, + { url = "https://files.pythonhosted.org/packages/40/48/097cdc3db342f34006a308ab41c3a7c11c3f0d84750d340f45d88a782e00/coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87", size = 265321, upload-time = "2026-03-17T10:31:55.997Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1f/4994af354689e14fd03a75f8ec85a9a68d94e0188bbdab3fc1516b55e512/coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479", size = 260502, upload-time = "2026-03-17T10:31:58.308Z" }, + { url = "https://files.pythonhosted.org/packages/22/c6/9bb9ef55903e628033560885f5c31aa227e46878118b63ab15dc7ba87797/coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2", size = 262688, upload-time = "2026-03-17T10:32:00.141Z" }, + { url = "https://files.pythonhosted.org/packages/14/4f/f5df9007e50b15e53e01edea486814783a7f019893733d9e4d6caad75557/coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a", size = 222788, upload-time = "2026-03-17T10:32:02.246Z" }, + { url = "https://files.pythonhosted.org/packages/e1/98/aa7fccaa97d0f3192bec013c4e6fd6d294a6ed44b640e6bb61f479e00ed5/coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819", size = 223851, upload-time = "2026-03-17T10:32:04.416Z" }, + { url = "https://files.pythonhosted.org/packages/3d/8b/e5c469f7352651e5f013198e9e21f97510b23de957dd06a84071683b4b60/coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911", size = 222104, upload-time = "2026-03-17T10:32:06.65Z" }, + { url = "https://files.pythonhosted.org/packages/8e/77/39703f0d1d4b478bfd30191d3c14f53caf596fac00efb3f8f6ee23646439/coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f", size = 219621, upload-time = "2026-03-17T10:32:08.589Z" }, + { url = "https://files.pythonhosted.org/packages/e2/3e/51dff36d99ae14639a133d9b164d63e628532e2974d8b1edb99dd1ebc733/coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e", size = 219953, upload-time = "2026-03-17T10:32:10.507Z" }, + { url = "https://files.pythonhosted.org/packages/6a/6c/1f1917b01eb647c2f2adc9962bd66c79eb978951cab61bdc1acab3290c07/coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a", size = 250992, upload-time = "2026-03-17T10:32:12.41Z" }, + { url = "https://files.pythonhosted.org/packages/22/e5/06b1f88f42a5a99df42ce61208bdec3bddb3d261412874280a19796fc09c/coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510", size = 253503, upload-time = "2026-03-17T10:32:14.449Z" }, + { url = "https://files.pythonhosted.org/packages/80/28/2a148a51e5907e504fa7b85490277734e6771d8844ebcc48764a15e28155/coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247", size = 254852, upload-time = "2026-03-17T10:32:16.56Z" }, + { url = "https://files.pythonhosted.org/packages/61/77/50e8d3d85cc0b7ebe09f30f151d670e302c7ff4a1bf6243f71dd8b0981fa/coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6", size = 257161, upload-time = "2026-03-17T10:32:19.004Z" }, + { url = "https://files.pythonhosted.org/packages/3b/c4/b5fd1d4b7bf8d0e75d997afd3925c59ba629fc8616f1b3aae7605132e256/coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0", size = 251021, upload-time = "2026-03-17T10:32:21.344Z" }, + { url = "https://files.pythonhosted.org/packages/f8/66/6ea21f910e92d69ef0b1c3346ea5922a51bad4446c9126db2ae96ee24c4c/coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882", size = 252858, upload-time = "2026-03-17T10:32:23.506Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ea/879c83cb5d61aa2a35fb80e72715e92672daef8191b84911a643f533840c/coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740", size = 250823, upload-time = "2026-03-17T10:32:25.516Z" }, + { url = "https://files.pythonhosted.org/packages/8a/fb/616d95d3adb88b9803b275580bdeee8bd1b69a886d057652521f83d7322f/coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16", size = 255099, upload-time = "2026-03-17T10:32:27.944Z" }, + { url = "https://files.pythonhosted.org/packages/1c/93/25e6917c90ec1c9a56b0b26f6cad6408e5f13bb6b35d484a0d75c9cf000d/coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0", size = 250638, upload-time = "2026-03-17T10:32:29.914Z" }, + { url = "https://files.pythonhosted.org/packages/fc/7b/dc1776b0464145a929deed214aef9fb1493f159b59ff3c7eeeedf91eddd0/coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0", size = 252295, upload-time = "2026-03-17T10:32:31.981Z" }, + { url = "https://files.pythonhosted.org/packages/ea/fb/99cbbc56a26e07762a2740713f3c8f9f3f3106e3a3dd8cc4474954bccd34/coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc", size = 222360, upload-time = "2026-03-17T10:32:34.233Z" }, + { url = "https://files.pythonhosted.org/packages/8d/b7/4758d4f73fb536347cc5e4ad63662f9d60ba9118cb6785e9616b2ce5d7fa/coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633", size = 223174, upload-time = "2026-03-17T10:32:36.369Z" }, + { url = "https://files.pythonhosted.org/packages/2c/f2/24d84e1dfe70f8ac9fdf30d338239860d0d1d5da0bda528959d0ebc9da28/coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8", size = 221739, upload-time = "2026-03-17T10:32:38.736Z" }, + { url = "https://files.pythonhosted.org/packages/60/5b/4a168591057b3668c2428bff25dd3ebc21b629d666d90bcdfa0217940e84/coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b", size = 220351, upload-time = "2026-03-17T10:32:41.196Z" }, + { url = "https://files.pythonhosted.org/packages/f5/21/1fd5c4dbfe4a58b6b99649125635df46decdfd4a784c3cd6d410d303e370/coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c", size = 220612, upload-time = "2026-03-17T10:32:43.204Z" }, + { url = "https://files.pythonhosted.org/packages/d6/fe/2a924b3055a5e7e4512655a9d4609781b0d62334fa0140c3e742926834e2/coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9", size = 261985, upload-time = "2026-03-17T10:32:45.514Z" }, + { url = "https://files.pythonhosted.org/packages/d7/0d/c8928f2bd518c45990fe1a2ab8db42e914ef9b726c975facc4282578c3eb/coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29", size = 264107, upload-time = "2026-03-17T10:32:47.971Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ae/4ae35bbd9a0af9d820362751f0766582833c211224b38665c0f8de3d487f/coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607", size = 266513, upload-time = "2026-03-17T10:32:50.1Z" }, + { url = "https://files.pythonhosted.org/packages/9c/20/d326174c55af36f74eac6ae781612d9492f060ce8244b570bb9d50d9d609/coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90", size = 267650, upload-time = "2026-03-17T10:32:52.391Z" }, + { url = "https://files.pythonhosted.org/packages/7a/5e/31484d62cbd0eabd3412e30d74386ece4a0837d4f6c3040a653878bfc019/coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3", size = 261089, upload-time = "2026-03-17T10:32:54.544Z" }, + { url = "https://files.pythonhosted.org/packages/e9/d8/49a72d6de146eebb0b7e48cc0f4bc2c0dd858e3d4790ab2b39a2872b62bd/coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab", size = 263982, upload-time = "2026-03-17T10:32:56.803Z" }, + { url = "https://files.pythonhosted.org/packages/06/3b/0351f1bd566e6e4dd39e978efe7958bde1d32f879e85589de147654f57bb/coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562", size = 261579, upload-time = "2026-03-17T10:32:59.466Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ce/796a2a2f4017f554d7810f5c573449b35b1e46788424a548d4d19201b222/coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2", size = 265316, upload-time = "2026-03-17T10:33:01.847Z" }, + { url = "https://files.pythonhosted.org/packages/3d/16/d5ae91455541d1a78bc90abf495be600588aff8f6db5c8b0dae739fa39c9/coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea", size = 260427, upload-time = "2026-03-17T10:33:03.945Z" }, + { url = "https://files.pythonhosted.org/packages/48/11/07f413dba62db21fb3fad5d0de013a50e073cc4e2dc4306e770360f6dfc8/coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a", size = 262745, upload-time = "2026-03-17T10:33:06.285Z" }, + { url = "https://files.pythonhosted.org/packages/91/15/d792371332eb4663115becf4bad47e047d16234b1aff687b1b18c58d60ae/coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215", size = 223146, upload-time = "2026-03-17T10:33:08.756Z" }, + { url = "https://files.pythonhosted.org/packages/db/51/37221f59a111dca5e85be7dbf09696323b5b9f13ff65e0641d535ed06ea8/coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43", size = 224254, upload-time = "2026-03-17T10:33:11.174Z" }, + { url = "https://files.pythonhosted.org/packages/54/83/6acacc889de8987441aa7d5adfbdbf33d288dad28704a67e574f1df9bcbb/coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45", size = 222276, upload-time = "2026-03-17T10:33:13.466Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ee/a4cf96b8ce1e566ed238f0659ac2d3f007ed1d14b181bcb684e19561a69a/coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61", size = 211346, upload-time = "2026-03-17T10:33:15.691Z" }, ] [package.optional-dependencies] @@ -163,14 +179,14 @@ toml = [ [[package]] name = "deepdiff" -version = "8.6.1" +version = "9.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "orderly-set" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/19/76/36c9aab3d5c19a94091f7c6c6e784efca50d87b124bf026c36e94719f33c/deepdiff-8.6.1.tar.gz", hash = "sha256:ec56d7a769ca80891b5200ec7bd41eec300ced91ebcc7797b41eb2b3f3ff643a", size = 634054, upload-time = "2025-09-03T19:40:41.461Z" } +sdist = { url = "https://files.pythonhosted.org/packages/24/20/63dd34163ed07393968128dc8c7ab948c96e47c4ce76976ea533de64909d/deepdiff-9.0.0.tar.gz", hash = "sha256:4872005306237b5b50829803feff58a1dfd20b2b357a55de22e7ded65b2008a7", size = 151952, upload-time = "2026-03-30T05:52:23.769Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/e6/efe534ef0952b531b630780e19cabd416e2032697019d5295defc6ef9bd9/deepdiff-8.6.1-py3-none-any.whl", hash = "sha256:ee8708a7f7d37fb273a541fa24ad010ed484192cd0c4ffc0fa0ed5e2d4b9e78b", size = 91378, upload-time = "2025-09-03T19:40:39.679Z" }, + { url = "https://files.pythonhosted.org/packages/dc/c4/da7089cd7aa4ab554f56e18a7fb08dcfed8fd2ae91fa528f5b1be207a148/deepdiff-9.0.0-py3-none-any.whl", hash = "sha256:b1ae0dd86290d86a03de5fbee728fde43095c1472ae4974bdab23ab4656305bd", size = 170540, upload-time = "2026-03-30T05:52:22.008Z" }, ] [[package]] @@ -209,11 +225,11 @@ wheels = [ [[package]] name = "idna" -version = "3.11" +version = "3.13" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ce/cc/762dfb036166873f0059f3b7de4565e1b5bc3d6f28a414c13da27e442f99/idna-3.13.tar.gz", hash = "sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242", size = 194210, upload-time = "2026-04-22T16:42:42.314Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, + { url = "https://files.pythonhosted.org/packages/5d/13/ad7d7ca3808a898b4612b6fe93cde56b53f3034dcde235acb1f0e1df24c6/idna-3.13-py3-none-any.whl", hash = "sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3", size = 68629, upload-time = "2026-04-22T16:42:40.909Z" }, ] [[package]] @@ -239,87 +255,96 @@ wheels = [ [[package]] name = "jsonpath-ng" -version = "1.7.0" +version = "1.8.0" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ply" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/6d/86/08646239a313f895186ff0a4573452038eed8c86f54380b3ebac34d32fb2/jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c", size = 37838, upload-time = "2024-10-11T15:41:42.404Z" } +sdist = { url = "https://files.pythonhosted.org/packages/32/58/250751940d75c8019659e15482d548a4aa3b6ce122c515102a4bfdac50e3/jsonpath_ng-1.8.0.tar.gz", hash = "sha256:54252968134b5e549ea5b872f1df1168bd7defe1a52fed5a358c194e1943ddc3", size = 74513, upload-time = "2026-02-24T14:42:06.182Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/35/5a/73ecb3d82f8615f32ccdadeb9356726d6cae3a4bbc840b437ceb95708063/jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6", size = 30105, upload-time = "2024-11-20T17:58:30.418Z" }, + { url = "https://files.pythonhosted.org/packages/03/99/33c7d78a3fb70d545fd5411ac67a651c81602cc09c9cf0df383733f068c5/jsonpath_ng-1.8.0-py3-none-any.whl", hash = "sha256:b8dde192f8af58d646fc031fac9c99fe4d00326afc4148f1f043c601a8cfe138", size = 67844, upload-time = "2026-02-28T00:53:19.637Z" }, ] [[package]] name = "librt" -version = "0.7.8" +version = "0.9.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/24/5f3646ff414285e0f7708fa4e946b9bf538345a41d1c375c439467721a5e/librt-0.7.8.tar.gz", hash = "sha256:1a4ede613941d9c3470b0368be851df6bb78ab218635512d0370b27a277a0862", size = 148323, upload-time = "2026-01-14T12:56:16.876Z" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/6b/3d5c13fb3e3c4f43206c8f9dfed13778c2ed4f000bacaa0b7ce3c402a265/librt-0.9.0.tar.gz", hash = "sha256:a0951822531e7aee6e0dfb556b30d5ee36bbe234faf60c20a16c01be3530869d", size = 184368, upload-time = "2026-04-09T16:06:26.173Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/44/13/57b06758a13550c5f09563893b004f98e9537ee6ec67b7df85c3571c8832/librt-0.7.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b45306a1fc5f53c9330fbee134d8b3227fe5da2ab09813b892790400aa49352d", size = 56521, upload-time = "2026-01-14T12:54:40.066Z" }, - { url = "https://files.pythonhosted.org/packages/c2/24/bbea34d1452a10612fb45ac8356f95351ba40c2517e429602160a49d1fd0/librt-0.7.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:864c4b7083eeee250ed55135d2127b260d7eb4b5e953a9e5df09c852e327961b", size = 58456, upload-time = "2026-01-14T12:54:41.471Z" }, - { url = "https://files.pythonhosted.org/packages/04/72/a168808f92253ec3a810beb1eceebc465701197dbc7e865a1c9ceb3c22c7/librt-0.7.8-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6938cc2de153bc927ed8d71c7d2f2ae01b4e96359126c602721340eb7ce1a92d", size = 164392, upload-time = "2026-01-14T12:54:42.843Z" }, - { url = "https://files.pythonhosted.org/packages/14/5c/4c0d406f1b02735c2e7af8ff1ff03a6577b1369b91aa934a9fa2cc42c7ce/librt-0.7.8-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:66daa6ac5de4288a5bbfbe55b4caa7bf0cd26b3269c7a476ffe8ce45f837f87d", size = 172959, upload-time = "2026-01-14T12:54:44.602Z" }, - { url = "https://files.pythonhosted.org/packages/82/5f/3e85351c523f73ad8d938989e9a58c7f59fb9c17f761b9981b43f0025ce7/librt-0.7.8-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4864045f49dc9c974dadb942ac56a74cd0479a2aafa51ce272c490a82322ea3c", size = 186717, upload-time = "2026-01-14T12:54:45.986Z" }, - { url = "https://files.pythonhosted.org/packages/08/f8/18bfe092e402d00fe00d33aa1e01dda1bd583ca100b393b4373847eade6d/librt-0.7.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a36515b1328dc5b3ffce79fe204985ca8572525452eacabee2166f44bb387b2c", size = 184585, upload-time = "2026-01-14T12:54:47.139Z" }, - { url = "https://files.pythonhosted.org/packages/4e/fc/f43972ff56fd790a9fa55028a52ccea1875100edbb856b705bd393b601e3/librt-0.7.8-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b7e7f140c5169798f90b80d6e607ed2ba5059784968a004107c88ad61fb3641d", size = 180497, upload-time = "2026-01-14T12:54:48.946Z" }, - { url = "https://files.pythonhosted.org/packages/e1/3a/25e36030315a410d3ad0b7d0f19f5f188e88d1613d7d3fd8150523ea1093/librt-0.7.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ff71447cb778a4f772ddc4ce360e6ba9c95527ed84a52096bd1bbf9fee2ec7c0", size = 200052, upload-time = "2026-01-14T12:54:50.382Z" }, - { url = "https://files.pythonhosted.org/packages/fc/b8/f3a5a1931ae2a6ad92bf6893b9ef44325b88641d58723529e2c2935e8abe/librt-0.7.8-cp310-cp310-win32.whl", hash = "sha256:047164e5f68b7a8ebdf9fae91a3c2161d3192418aadd61ddd3a86a56cbe3dc85", size = 43477, upload-time = "2026-01-14T12:54:51.815Z" }, - { url = "https://files.pythonhosted.org/packages/fe/91/c4202779366bc19f871b4ad25db10fcfa1e313c7893feb942f32668e8597/librt-0.7.8-cp310-cp310-win_amd64.whl", hash = "sha256:d6f254d096d84156a46a84861183c183d30734e52383602443292644d895047c", size = 49806, upload-time = "2026-01-14T12:54:53.149Z" }, - { url = "https://files.pythonhosted.org/packages/1b/a3/87ea9c1049f2c781177496ebee29430e4631f439b8553a4969c88747d5d8/librt-0.7.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ff3e9c11aa260c31493d4b3197d1e28dd07768594a4f92bec4506849d736248f", size = 56507, upload-time = "2026-01-14T12:54:54.156Z" }, - { url = "https://files.pythonhosted.org/packages/5e/4a/23bcef149f37f771ad30203d561fcfd45b02bc54947b91f7a9ac34815747/librt-0.7.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddb52499d0b3ed4aa88746aaf6f36a08314677d5c346234c3987ddc506404eac", size = 58455, upload-time = "2026-01-14T12:54:55.978Z" }, - { url = "https://files.pythonhosted.org/packages/22/6e/46eb9b85c1b9761e0f42b6e6311e1cc544843ac897457062b9d5d0b21df4/librt-0.7.8-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e9c0afebbe6ce177ae8edba0c7c4d626f2a0fc12c33bb993d163817c41a7a05c", size = 164956, upload-time = "2026-01-14T12:54:57.311Z" }, - { url = "https://files.pythonhosted.org/packages/7a/3f/aa7c7f6829fb83989feb7ba9aa11c662b34b4bd4bd5b262f2876ba3db58d/librt-0.7.8-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:631599598e2c76ded400c0a8722dec09217c89ff64dc54b060f598ed68e7d2a8", size = 174364, upload-time = "2026-01-14T12:54:59.089Z" }, - { url = "https://files.pythonhosted.org/packages/3f/2d/d57d154b40b11f2cb851c4df0d4c4456bacd9b1ccc4ecb593ddec56c1a8b/librt-0.7.8-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c1ba843ae20db09b9d5c80475376168feb2640ce91cd9906414f23cc267a1ff", size = 188034, upload-time = "2026-01-14T12:55:00.141Z" }, - { url = "https://files.pythonhosted.org/packages/59/f9/36c4dad00925c16cd69d744b87f7001792691857d3b79187e7a673e812fb/librt-0.7.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b5b007bb22ea4b255d3ee39dfd06d12534de2fcc3438567d9f48cdaf67ae1ae3", size = 186295, upload-time = "2026-01-14T12:55:01.303Z" }, - { url = "https://files.pythonhosted.org/packages/23/9b/8a9889d3df5efb67695a67785028ccd58e661c3018237b73ad081691d0cb/librt-0.7.8-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:dbd79caaf77a3f590cbe32dc2447f718772d6eea59656a7dcb9311161b10fa75", size = 181470, upload-time = "2026-01-14T12:55:02.492Z" }, - { url = "https://files.pythonhosted.org/packages/43/64/54d6ef11afca01fef8af78c230726a9394759f2addfbf7afc5e3cc032a45/librt-0.7.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:87808a8d1e0bd62a01cafc41f0fd6818b5a5d0ca0d8a55326a81643cdda8f873", size = 201713, upload-time = "2026-01-14T12:55:03.919Z" }, - { url = "https://files.pythonhosted.org/packages/2d/29/73e7ed2991330b28919387656f54109139b49e19cd72902f466bd44415fd/librt-0.7.8-cp311-cp311-win32.whl", hash = "sha256:31724b93baa91512bd0a376e7cf0b59d8b631ee17923b1218a65456fa9bda2e7", size = 43803, upload-time = "2026-01-14T12:55:04.996Z" }, - { url = "https://files.pythonhosted.org/packages/3f/de/66766ff48ed02b4d78deea30392ae200bcbd99ae61ba2418b49fd50a4831/librt-0.7.8-cp311-cp311-win_amd64.whl", hash = "sha256:978e8b5f13e52cf23a9e80f3286d7546baa70bc4ef35b51d97a709d0b28e537c", size = 50080, upload-time = "2026-01-14T12:55:06.489Z" }, - { url = "https://files.pythonhosted.org/packages/6f/e3/33450438ff3a8c581d4ed7f798a70b07c3206d298cf0b87d3806e72e3ed8/librt-0.7.8-cp311-cp311-win_arm64.whl", hash = "sha256:20e3946863d872f7cabf7f77c6c9d370b8b3d74333d3a32471c50d3a86c0a232", size = 43383, upload-time = "2026-01-14T12:55:07.49Z" }, - { url = "https://files.pythonhosted.org/packages/56/04/79d8fcb43cae376c7adbab7b2b9f65e48432c9eced62ac96703bcc16e09b/librt-0.7.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9b6943885b2d49c48d0cff23b16be830ba46b0152d98f62de49e735c6e655a63", size = 57472, upload-time = "2026-01-14T12:55:08.528Z" }, - { url = "https://files.pythonhosted.org/packages/b4/ba/60b96e93043d3d659da91752689023a73981336446ae82078cddf706249e/librt-0.7.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:46ef1f4b9b6cc364b11eea0ecc0897314447a66029ee1e55859acb3dd8757c93", size = 58986, upload-time = "2026-01-14T12:55:09.466Z" }, - { url = "https://files.pythonhosted.org/packages/7c/26/5215e4cdcc26e7be7eee21955a7e13cbf1f6d7d7311461a6014544596fac/librt-0.7.8-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:907ad09cfab21e3c86e8f1f87858f7049d1097f77196959c033612f532b4e592", size = 168422, upload-time = "2026-01-14T12:55:10.499Z" }, - { url = "https://files.pythonhosted.org/packages/0f/84/e8d1bc86fa0159bfc24f3d798d92cafd3897e84c7fea7fe61b3220915d76/librt-0.7.8-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2991b6c3775383752b3ca0204842743256f3ad3deeb1d0adc227d56b78a9a850", size = 177478, upload-time = "2026-01-14T12:55:11.577Z" }, - { url = "https://files.pythonhosted.org/packages/57/11/d0268c4b94717a18aa91df1100e767b010f87b7ae444dafaa5a2d80f33a6/librt-0.7.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:03679b9856932b8c8f674e87aa3c55ea11c9274301f76ae8dc4d281bda55cf62", size = 192439, upload-time = "2026-01-14T12:55:12.7Z" }, - { url = "https://files.pythonhosted.org/packages/8d/56/1e8e833b95fe684f80f8894ae4d8b7d36acc9203e60478fcae599120a975/librt-0.7.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3968762fec1b2ad34ce57458b6de25dbb4142713e9ca6279a0d352fa4e9f452b", size = 191483, upload-time = "2026-01-14T12:55:13.838Z" }, - { url = "https://files.pythonhosted.org/packages/17/48/f11cf28a2cb6c31f282009e2208312aa84a5ee2732859f7856ee306176d5/librt-0.7.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:bb7a7807523a31f03061288cc4ffc065d684c39db7644c676b47d89553c0d714", size = 185376, upload-time = "2026-01-14T12:55:15.017Z" }, - { url = "https://files.pythonhosted.org/packages/b8/6a/d7c116c6da561b9155b184354a60a3d5cdbf08fc7f3678d09c95679d13d9/librt-0.7.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad64a14b1e56e702e19b24aae108f18ad1bf7777f3af5fcd39f87d0c5a814449", size = 206234, upload-time = "2026-01-14T12:55:16.571Z" }, - { url = "https://files.pythonhosted.org/packages/61/de/1975200bb0285fc921c5981d9978ce6ce11ae6d797df815add94a5a848a3/librt-0.7.8-cp312-cp312-win32.whl", hash = "sha256:0241a6ed65e6666236ea78203a73d800dbed896cf12ae25d026d75dc1fcd1dac", size = 44057, upload-time = "2026-01-14T12:55:18.077Z" }, - { url = "https://files.pythonhosted.org/packages/8e/cd/724f2d0b3461426730d4877754b65d39f06a41ac9d0a92d5c6840f72b9ae/librt-0.7.8-cp312-cp312-win_amd64.whl", hash = "sha256:6db5faf064b5bab9675c32a873436b31e01d66ca6984c6f7f92621656033a708", size = 50293, upload-time = "2026-01-14T12:55:19.179Z" }, - { url = "https://files.pythonhosted.org/packages/bd/cf/7e899acd9ee5727ad8160fdcc9994954e79fab371c66535c60e13b968ffc/librt-0.7.8-cp312-cp312-win_arm64.whl", hash = "sha256:57175aa93f804d2c08d2edb7213e09276bd49097611aefc37e3fa38d1fb99ad0", size = 43574, upload-time = "2026-01-14T12:55:20.185Z" }, - { url = "https://files.pythonhosted.org/packages/a1/fe/b1f9de2829cf7fc7649c1dcd202cfd873837c5cc2fc9e526b0e7f716c3d2/librt-0.7.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4c3995abbbb60b3c129490fa985dfe6cac11d88fc3c36eeb4fb1449efbbb04fc", size = 57500, upload-time = "2026-01-14T12:55:21.219Z" }, - { url = "https://files.pythonhosted.org/packages/eb/d4/4a60fbe2e53b825f5d9a77325071d61cd8af8506255067bf0c8527530745/librt-0.7.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:44e0c2cbc9bebd074cf2cdbe472ca185e824be4e74b1c63a8e934cea674bebf2", size = 59019, upload-time = "2026-01-14T12:55:22.256Z" }, - { url = "https://files.pythonhosted.org/packages/6a/37/61ff80341ba5159afa524445f2d984c30e2821f31f7c73cf166dcafa5564/librt-0.7.8-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4d2f1e492cae964b3463a03dc77a7fe8742f7855d7258c7643f0ee32b6651dd3", size = 169015, upload-time = "2026-01-14T12:55:23.24Z" }, - { url = "https://files.pythonhosted.org/packages/1c/86/13d4f2d6a93f181ebf2fc953868826653ede494559da8268023fe567fca3/librt-0.7.8-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:451e7ffcef8f785831fdb791bd69211f47e95dc4c6ddff68e589058806f044c6", size = 178161, upload-time = "2026-01-14T12:55:24.826Z" }, - { url = "https://files.pythonhosted.org/packages/88/26/e24ef01305954fc4d771f1f09f3dd682f9eb610e1bec188ffb719374d26e/librt-0.7.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3469e1af9f1380e093ae06bedcbdd11e407ac0b303a56bbe9afb1d6824d4982d", size = 193015, upload-time = "2026-01-14T12:55:26.04Z" }, - { url = "https://files.pythonhosted.org/packages/88/a0/92b6bd060e720d7a31ed474d046a69bd55334ec05e9c446d228c4b806ae3/librt-0.7.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f11b300027ce19a34f6d24ebb0a25fd0e24a9d53353225a5c1e6cadbf2916b2e", size = 192038, upload-time = "2026-01-14T12:55:27.208Z" }, - { url = "https://files.pythonhosted.org/packages/06/bb/6f4c650253704279c3a214dad188101d1b5ea23be0606628bc6739456624/librt-0.7.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4adc73614f0d3c97874f02f2c7fd2a27854e7e24ad532ea6b965459c5b757eca", size = 186006, upload-time = "2026-01-14T12:55:28.594Z" }, - { url = "https://files.pythonhosted.org/packages/dc/00/1c409618248d43240cadf45f3efb866837fa77e9a12a71481912135eb481/librt-0.7.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:60c299e555f87e4c01b2eca085dfccda1dde87f5a604bb45c2906b8305819a93", size = 206888, upload-time = "2026-01-14T12:55:30.214Z" }, - { url = "https://files.pythonhosted.org/packages/d9/83/b2cfe8e76ff5c1c77f8a53da3d5de62d04b5ebf7cf913e37f8bca43b5d07/librt-0.7.8-cp313-cp313-win32.whl", hash = "sha256:b09c52ed43a461994716082ee7d87618096851319bf695d57ec123f2ab708951", size = 44126, upload-time = "2026-01-14T12:55:31.44Z" }, - { url = "https://files.pythonhosted.org/packages/a9/0b/c59d45de56a51bd2d3a401fc63449c0ac163e4ef7f523ea8b0c0dee86ec5/librt-0.7.8-cp313-cp313-win_amd64.whl", hash = "sha256:f8f4a901a3fa28969d6e4519deceab56c55a09d691ea7b12ca830e2fa3461e34", size = 50262, upload-time = "2026-01-14T12:55:33.01Z" }, - { url = "https://files.pythonhosted.org/packages/fc/b9/973455cec0a1ec592395250c474164c4a58ebf3e0651ee920fef1a2623f1/librt-0.7.8-cp313-cp313-win_arm64.whl", hash = "sha256:43d4e71b50763fcdcf64725ac680d8cfa1706c928b844794a7aa0fa9ac8e5f09", size = 43600, upload-time = "2026-01-14T12:55:34.054Z" }, - { url = "https://files.pythonhosted.org/packages/1a/73/fa8814c6ce2d49c3827829cadaa1589b0bf4391660bd4510899393a23ebc/librt-0.7.8-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:be927c3c94c74b05128089a955fba86501c3b544d1d300282cc1b4bd370cb418", size = 57049, upload-time = "2026-01-14T12:55:35.056Z" }, - { url = "https://files.pythonhosted.org/packages/53/fe/f6c70956da23ea235fd2e3cc16f4f0b4ebdfd72252b02d1164dd58b4e6c3/librt-0.7.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7b0803e9008c62a7ef79058233db7ff6f37a9933b8f2573c05b07ddafa226611", size = 58689, upload-time = "2026-01-14T12:55:36.078Z" }, - { url = "https://files.pythonhosted.org/packages/1f/4d/7a2481444ac5fba63050d9abe823e6bc16896f575bfc9c1e5068d516cdce/librt-0.7.8-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:79feb4d00b2a4e0e05c9c56df707934f41fcb5fe53fd9efb7549068d0495b758", size = 166808, upload-time = "2026-01-14T12:55:37.595Z" }, - { url = "https://files.pythonhosted.org/packages/ac/3c/10901d9e18639f8953f57c8986796cfbf4c1c514844a41c9197cf87cb707/librt-0.7.8-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b9122094e3f24aa759c38f46bd8863433820654927370250f460ae75488b66ea", size = 175614, upload-time = "2026-01-14T12:55:38.756Z" }, - { url = "https://files.pythonhosted.org/packages/db/01/5cbdde0951a5090a80e5ba44e6357d375048123c572a23eecfb9326993a7/librt-0.7.8-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7e03bea66af33c95ce3addf87a9bf1fcad8d33e757bc479957ddbc0e4f7207ac", size = 189955, upload-time = "2026-01-14T12:55:39.939Z" }, - { url = "https://files.pythonhosted.org/packages/6a/b4/e80528d2f4b7eaf1d437fcbd6fc6ba4cbeb3e2a0cb9ed5a79f47c7318706/librt-0.7.8-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f1ade7f31675db00b514b98f9ab9a7698c7282dad4be7492589109471852d398", size = 189370, upload-time = "2026-01-14T12:55:41.057Z" }, - { url = "https://files.pythonhosted.org/packages/c1/ab/938368f8ce31a9787ecd4becb1e795954782e4312095daf8fd22420227c8/librt-0.7.8-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a14229ac62adcf1b90a15992f1ab9c69ae8b99ffb23cb64a90878a6e8a2f5b81", size = 183224, upload-time = "2026-01-14T12:55:42.328Z" }, - { url = "https://files.pythonhosted.org/packages/3c/10/559c310e7a6e4014ac44867d359ef8238465fb499e7eb31b6bfe3e3f86f5/librt-0.7.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5bcaaf624fd24e6a0cb14beac37677f90793a96864c67c064a91458611446e83", size = 203541, upload-time = "2026-01-14T12:55:43.501Z" }, - { url = "https://files.pythonhosted.org/packages/f8/db/a0db7acdb6290c215f343835c6efda5b491bb05c3ddc675af558f50fdba3/librt-0.7.8-cp314-cp314-win32.whl", hash = "sha256:7aa7d5457b6c542ecaed79cec4ad98534373c9757383973e638ccced0f11f46d", size = 40657, upload-time = "2026-01-14T12:55:44.668Z" }, - { url = "https://files.pythonhosted.org/packages/72/e0/4f9bdc2a98a798511e81edcd6b54fe82767a715e05d1921115ac70717f6f/librt-0.7.8-cp314-cp314-win_amd64.whl", hash = "sha256:3d1322800771bee4a91f3b4bd4e49abc7d35e65166821086e5afd1e6c0d9be44", size = 46835, upload-time = "2026-01-14T12:55:45.655Z" }, - { url = "https://files.pythonhosted.org/packages/f9/3d/59c6402e3dec2719655a41ad027a7371f8e2334aa794ed11533ad5f34969/librt-0.7.8-cp314-cp314-win_arm64.whl", hash = "sha256:5363427bc6a8c3b1719f8f3845ea53553d301382928a86e8fab7984426949bce", size = 39885, upload-time = "2026-01-14T12:55:47.138Z" }, - { url = "https://files.pythonhosted.org/packages/4e/9c/2481d80950b83085fb14ba3c595db56330d21bbc7d88a19f20165f3538db/librt-0.7.8-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ca916919793a77e4a98d4a1701e345d337ce53be4a16620f063191f7322ac80f", size = 59161, upload-time = "2026-01-14T12:55:48.45Z" }, - { url = "https://files.pythonhosted.org/packages/96/79/108df2cfc4e672336765d54e3ff887294c1cc36ea4335c73588875775527/librt-0.7.8-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:54feb7b4f2f6706bb82325e836a01be805770443e2400f706e824e91f6441dde", size = 61008, upload-time = "2026-01-14T12:55:49.527Z" }, - { url = "https://files.pythonhosted.org/packages/46/f2/30179898f9994a5637459d6e169b6abdc982012c0a4b2d4c26f50c06f911/librt-0.7.8-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:39a4c76fee41007070f872b648cc2f711f9abf9a13d0c7162478043377b52c8e", size = 187199, upload-time = "2026-01-14T12:55:50.587Z" }, - { url = "https://files.pythonhosted.org/packages/b4/da/f7563db55cebdc884f518ba3791ad033becc25ff68eb70902b1747dc0d70/librt-0.7.8-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac9c8a458245c7de80bc1b9765b177055efff5803f08e548dd4bb9ab9a8d789b", size = 198317, upload-time = "2026-01-14T12:55:51.991Z" }, - { url = "https://files.pythonhosted.org/packages/b3/6c/4289acf076ad371471fa86718c30ae353e690d3de6167f7db36f429272f1/librt-0.7.8-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95b67aa7eff150f075fda09d11f6bfb26edffd300f6ab1666759547581e8f666", size = 210334, upload-time = "2026-01-14T12:55:53.682Z" }, - { url = "https://files.pythonhosted.org/packages/4a/7f/377521ac25b78ac0a5ff44127a0360ee6d5ddd3ce7327949876a30533daa/librt-0.7.8-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:535929b6eff670c593c34ff435d5440c3096f20fa72d63444608a5aef64dd581", size = 211031, upload-time = "2026-01-14T12:55:54.827Z" }, - { url = "https://files.pythonhosted.org/packages/c5/b1/e1e96c3e20b23d00cf90f4aad48f0deb4cdfec2f0ed8380d0d85acf98bbf/librt-0.7.8-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:63937bd0f4d1cb56653dc7ae900d6c52c41f0015e25aaf9902481ee79943b33a", size = 204581, upload-time = "2026-01-14T12:55:56.811Z" }, - { url = "https://files.pythonhosted.org/packages/43/71/0f5d010e92ed9747e14bef35e91b6580533510f1e36a8a09eb79ee70b2f0/librt-0.7.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cf243da9e42d914036fd362ac3fa77d80a41cadcd11ad789b1b5eec4daaf67ca", size = 224731, upload-time = "2026-01-14T12:55:58.175Z" }, - { url = "https://files.pythonhosted.org/packages/22/f0/07fb6ab5c39a4ca9af3e37554f9d42f25c464829254d72e4ebbd81da351c/librt-0.7.8-cp314-cp314t-win32.whl", hash = "sha256:171ca3a0a06c643bd0a2f62a8944e1902c94aa8e5da4db1ea9a8daf872685365", size = 41173, upload-time = "2026-01-14T12:55:59.315Z" }, - { url = "https://files.pythonhosted.org/packages/24/d4/7e4be20993dc6a782639625bd2f97f3c66125c7aa80c82426956811cfccf/librt-0.7.8-cp314-cp314t-win_amd64.whl", hash = "sha256:445b7304145e24c60288a2f172b5ce2ca35c0f81605f5299f3fa567e189d2e32", size = 47668, upload-time = "2026-01-14T12:56:00.261Z" }, - { url = "https://files.pythonhosted.org/packages/fc/85/69f92b2a7b3c0f88ffe107c86b952b397004b5b8ea5a81da3d9c04c04422/librt-0.7.8-cp314-cp314t-win_arm64.whl", hash = "sha256:8766ece9de08527deabcd7cb1b4f1a967a385d26e33e536d6d8913db6ef74f06", size = 40550, upload-time = "2026-01-14T12:56:01.542Z" }, + { url = "https://files.pythonhosted.org/packages/f3/4a/c64265d71b84030174ff3ac2cd16d8b664072afab8c41fccd8e2ee5a6f8d/librt-0.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f8e12706dcb8ff6b3ed57514a19e45c49ad00bcd423e87b2b2e4b5f64578443", size = 67529, upload-time = "2026-04-09T16:04:27.373Z" }, + { url = "https://files.pythonhosted.org/packages/23/b1/30ca0b3a8bdac209a00145c66cf42e5e7da2cc056ffc6ebc5c7b430ddd34/librt-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4e3dda8345307fd7306db0ed0cb109a63a2c85ba780eb9dc2d09b2049a931f9c", size = 70248, upload-time = "2026-04-09T16:04:28.758Z" }, + { url = "https://files.pythonhosted.org/packages/fa/fc/c6018dc181478d6ac5aa24a5846b8185101eb90894346db239eb3ea53209/librt-0.9.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:de7dac64e3eb832ffc7b840eb8f52f76420cde1b845be51b2a0f6b870890645e", size = 202184, upload-time = "2026-04-09T16:04:29.893Z" }, + { url = "https://files.pythonhosted.org/packages/bf/58/d69629f002203370ef41ea69ff71c49a2c618aec39b226ff49986ecd8623/librt-0.9.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22a904cbdb678f7cb348c90d543d3c52f581663d687992fee47fd566dcbf5285", size = 212926, upload-time = "2026-04-09T16:04:31.126Z" }, + { url = "https://files.pythonhosted.org/packages/cc/55/01d859f57824e42bd02465c77bec31fa5ef9d8c2bcee702ccf8ef1b9f508/librt-0.9.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:224b9727eb8bc188bc3bcf29d969dba0cd61b01d9bac80c41575520cc4baabb2", size = 225664, upload-time = "2026-04-09T16:04:32.352Z" }, + { url = "https://files.pythonhosted.org/packages/9b/02/32f63ad0ef085a94a70315291efe1151a48b9947af12261882f8445b2a30/librt-0.9.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e94cbc6ad9a6aeea46d775cbb11f361022f778a9cc8cc90af653d3a594b057ce", size = 219534, upload-time = "2026-04-09T16:04:33.667Z" }, + { url = "https://files.pythonhosted.org/packages/6a/5a/9d77111a183c885acf3b3b6e4c00f5b5b07b5817028226499a55f1fedc59/librt-0.9.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7bc30ad339f4e1a01d4917d645e522a0bc0030644d8973f6346397c93ba1503f", size = 227322, upload-time = "2026-04-09T16:04:34.945Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e7/05d700c93063753e12ab230b972002a3f8f3b9c95d8a980c2f646c8b6963/librt-0.9.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:56d65b583cf43b8cf4c8fbe1e1da20fa3076cc32a1149a141507af1062718236", size = 223407, upload-time = "2026-04-09T16:04:36.22Z" }, + { url = "https://files.pythonhosted.org/packages/c0/26/26c3124823c67c987456977c683da9a27cc874befc194ddcead5f9988425/librt-0.9.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:0a1be03168b2691ba61927e299b352a6315189199ca18a57b733f86cb3cc8d38", size = 221302, upload-time = "2026-04-09T16:04:37.62Z" }, + { url = "https://files.pythonhosted.org/packages/50/2b/c7cc2be5cf4ff7b017d948a789256288cb33a517687ff1995e72a7eea79f/librt-0.9.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:63c12efcd160e1d14da11af0c46c0217473e1e0d2ae1acbccc83f561ea4c2a7b", size = 243893, upload-time = "2026-04-09T16:04:38.909Z" }, + { url = "https://files.pythonhosted.org/packages/62/d3/da553d37417a337d12660450535d5fd51373caffbedf6962173c87867246/librt-0.9.0-cp310-cp310-win32.whl", hash = "sha256:e9002e98dcb1c0a66723592520decd86238ddcef168b37ff6cfb559200b4b774", size = 55375, upload-time = "2026-04-09T16:04:40.148Z" }, + { url = "https://files.pythonhosted.org/packages/9b/5a/46fa357bab8311b6442a83471591f2f9e5b15ecc1d2121a43725e0c529b8/librt-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:9fcb461fbf70654a52a7cc670e606f04449e2374c199b1825f754e16dacfedd8", size = 62581, upload-time = "2026-04-09T16:04:41.452Z" }, + { url = "https://files.pythonhosted.org/packages/e2/1e/2ec7afcebcf3efea593d13aee18bbcfdd3a243043d848ebf385055e9f636/librt-0.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90904fac73c478f4b83f4ed96c99c8208b75e6f9a8a1910548f69a00f1eaa671", size = 67155, upload-time = "2026-04-09T16:04:42.933Z" }, + { url = "https://files.pythonhosted.org/packages/18/77/72b85afd4435268338ad4ec6231b3da8c77363f212a0227c1ff3b45e4d35/librt-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:789fff71757facc0738e8d89e3b84e4f0251c1c975e85e81b152cdaca927cc2d", size = 69916, upload-time = "2026-04-09T16:04:44.042Z" }, + { url = "https://files.pythonhosted.org/packages/27/fb/948ea0204fbe2e78add6d46b48330e58d39897e425560674aee302dca81c/librt-0.9.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1bf465d1e5b0a27713862441f6467b5ab76385f4ecf8f1f3a44f8aa3c695b4b6", size = 199635, upload-time = "2026-04-09T16:04:45.5Z" }, + { url = "https://files.pythonhosted.org/packages/ac/cd/894a29e251b296a27957856804cfd21e93c194aa131de8bb8032021be07e/librt-0.9.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f819e0c6413e259a17a7c0d49f97f405abadd3c2a316a3b46c6440b7dbbedbb1", size = 211051, upload-time = "2026-04-09T16:04:47.016Z" }, + { url = "https://files.pythonhosted.org/packages/18/8f/dcaed0bc084a35f3721ff2d081158db569d2c57ea07d35623ddaca5cfc8e/librt-0.9.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e0785c2fb4a81e1aece366aa3e2e039f4a4d7d21aaaded5227d7f3c703427882", size = 224031, upload-time = "2026-04-09T16:04:48.207Z" }, + { url = "https://files.pythonhosted.org/packages/03/44/88f6c1ed1132cd418601cc041fbd92fed28b3a09f39de81978e0822d13ff/librt-0.9.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:80b25c7b570a86c03b5da69e665809deb39265476e8e21d96a9328f9762f9990", size = 218069, upload-time = "2026-04-09T16:04:50.025Z" }, + { url = "https://files.pythonhosted.org/packages/a3/90/7d02e981c2db12188d82b4410ff3e35bfdb844b26aecd02233626f46af2b/librt-0.9.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d4d16b608a1c43d7e33142099a75cd93af482dadce0bf82421e91cad077157f4", size = 224857, upload-time = "2026-04-09T16:04:51.684Z" }, + { url = "https://files.pythonhosted.org/packages/ef/c3/c77e706b7215ca32e928d47535cf13dbc3d25f096f84ddf8fbc06693e229/librt-0.9.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:194fc1a32e1e21fe809d38b5faea66cc65eaa00217c8901fbdb99866938adbdb", size = 219865, upload-time = "2026-04-09T16:04:52.949Z" }, + { url = "https://files.pythonhosted.org/packages/52/d1/32b0c1a0eb8461c70c11656c46a29f760b7c7edf3c36d6f102470c17170f/librt-0.9.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:8c6bc1384d9738781cfd41d09ad7f6e8af13cfea2c75ece6bd6d2566cdea2076", size = 218451, upload-time = "2026-04-09T16:04:54.174Z" }, + { url = "https://files.pythonhosted.org/packages/74/d1/adfd0f9c44761b1d49b1bec66173389834c33ee2bd3c7fd2e2367f1942d4/librt-0.9.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:15cb151e52a044f06e54ac7f7b47adbfc89b5c8e2b63e1175a9d587c43e8942a", size = 241300, upload-time = "2026-04-09T16:04:55.452Z" }, + { url = "https://files.pythonhosted.org/packages/09/b0/9074b64407712f0003c27f5b1d7655d1438979155f049720e8a1abd9b1a1/librt-0.9.0-cp311-cp311-win32.whl", hash = "sha256:f100bfe2acf8a3689af9d0cc660d89f17286c9c795f9f18f7b62dd1a6b247ae6", size = 55668, upload-time = "2026-04-09T16:04:56.689Z" }, + { url = "https://files.pythonhosted.org/packages/24/19/40b77b77ce80b9389fb03971431b09b6b913911c38d412059e0b3e2a9ef2/librt-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:0b73e4266307e51c95e09c0750b7ec383c561d2e97d58e473f6f6a209952fbb8", size = 62976, upload-time = "2026-04-09T16:04:57.733Z" }, + { url = "https://files.pythonhosted.org/packages/70/9d/9fa7a64041e29035cb8c575af5f0e3840be1b97b4c4d9061e0713f171849/librt-0.9.0-cp311-cp311-win_arm64.whl", hash = "sha256:bc5518873822d2faa8ebdd2c1a4d7c8ef47b01a058495ab7924cb65bdbf5fc9a", size = 53502, upload-time = "2026-04-09T16:04:58.806Z" }, + { url = "https://files.pythonhosted.org/packages/bf/90/89ddba8e1c20b0922783cd93ed8e64f34dc05ab59c38a9c7e313632e20ff/librt-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9b3e3bc363f71bda1639a4ee593cb78f7fbfeacc73411ec0d4c92f00730010a4", size = 68332, upload-time = "2026-04-09T16:05:00.09Z" }, + { url = "https://files.pythonhosted.org/packages/a8/40/7aa4da1fb08bdeeb540cb07bfc8207cb32c5c41642f2594dbd0098a0662d/librt-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0a09c2f5869649101738653a9b7ab70cf045a1105ac66cbb8f4055e61df78f2d", size = 70581, upload-time = "2026-04-09T16:05:01.213Z" }, + { url = "https://files.pythonhosted.org/packages/48/ac/73a2187e1031041e93b7e3a25aae37aa6f13b838c550f7e0f06f66766212/librt-0.9.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5ca8e133d799c948db2ab1afc081c333a825b5540475164726dcbf73537e5c2f", size = 203984, upload-time = "2026-04-09T16:05:02.542Z" }, + { url = "https://files.pythonhosted.org/packages/5e/3d/23460d571e9cbddb405b017681df04c142fb1b04cbfce77c54b08e28b108/librt-0.9.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:603138ee838ee1583f1b960b62d5d0007845c5c423feb68e44648b1359014e27", size = 215762, upload-time = "2026-04-09T16:05:04.127Z" }, + { url = "https://files.pythonhosted.org/packages/de/1e/42dc7f8ab63e65b20640d058e63e97fd3e482c1edbda3570d813b4d0b927/librt-0.9.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4003f70c56a5addd6aa0897f200dd59afd3bf7bcd5b3cce46dd21f925743bc2", size = 230288, upload-time = "2026-04-09T16:05:05.883Z" }, + { url = "https://files.pythonhosted.org/packages/dc/08/ca812b6d8259ad9ece703397f8ad5c03af5b5fedfce64279693d3ce4087c/librt-0.9.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:78042f6facfd98ecb25e9829c7e37cce23363d9d7c83bc5f72702c5059eb082b", size = 224103, upload-time = "2026-04-09T16:05:07.148Z" }, + { url = "https://files.pythonhosted.org/packages/b6/3f/620490fb2fa66ffd44e7f900254bc110ebec8dac6c1b7514d64662570e6f/librt-0.9.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a361c9434a64d70a7dbb771d1de302c0cc9f13c0bffe1cf7e642152814b35265", size = 232122, upload-time = "2026-04-09T16:05:08.386Z" }, + { url = "https://files.pythonhosted.org/packages/e9/83/12864700a1b6a8be458cf5d05db209b0d8e94ae281e7ec261dbe616597b4/librt-0.9.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:dd2c7e082b0b92e1baa4da28163a808672485617bc855cc22a2fd06978fa9084", size = 225045, upload-time = "2026-04-09T16:05:09.707Z" }, + { url = "https://files.pythonhosted.org/packages/fd/1b/845d339c29dc7dbc87a2e992a1ba8d28d25d0e0372f9a0a2ecebde298186/librt-0.9.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7e6274fd33fc5b2a14d41c9119629d3ff395849d8bcbc80cf637d9e8d2034da8", size = 227372, upload-time = "2026-04-09T16:05:10.942Z" }, + { url = "https://files.pythonhosted.org/packages/8d/fe/277985610269d926a64c606f761d58d3db67b956dbbf40024921e95e7fcb/librt-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5093043afb226ecfa1400120d1ebd4442b4f99977783e4f4f7248879009b227f", size = 248224, upload-time = "2026-04-09T16:05:12.254Z" }, + { url = "https://files.pythonhosted.org/packages/92/1b/ee486d244b8de6b8b5dbaefabe6bfdd4a72e08f6353edf7d16d27114da8d/librt-0.9.0-cp312-cp312-win32.whl", hash = "sha256:9edcc35d1cae9fd5320171b1a838c7da8a5c968af31e82ecc3dff30b4be0957f", size = 55986, upload-time = "2026-04-09T16:05:13.529Z" }, + { url = "https://files.pythonhosted.org/packages/89/7a/ba1737012308c17dc6d5516143b5dce9a2c7ba3474afd54e11f44a4d1ef3/librt-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc2917258e131ae5f958a4d872e07555b51cb7466a43433218061c74ef33745", size = 63260, upload-time = "2026-04-09T16:05:14.68Z" }, + { url = "https://files.pythonhosted.org/packages/36/e4/01752c113da15127f18f7bf11142f5640038f062407a611c059d0036c6aa/librt-0.9.0-cp312-cp312-win_arm64.whl", hash = "sha256:90e6d5420fc8a300518d4d2288154ff45005e920425c22cbbfe8330f3f754bd9", size = 53694, upload-time = "2026-04-09T16:05:16.095Z" }, + { url = "https://files.pythonhosted.org/packages/5f/d7/1b3e26fffde1452d82f5666164858a81c26ebe808e7ae8c9c88628981540/librt-0.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f29b68cd9714531672db62cc54f6e8ff981900f824d13fa0e00749189e13778e", size = 68367, upload-time = "2026-04-09T16:05:17.243Z" }, + { url = "https://files.pythonhosted.org/packages/a5/5b/c61b043ad2e091fbe1f2d35d14795e545d0b56b03edaa390fa1dcee3d160/librt-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d5c8a5929ac325729f6119802070b561f4db793dffc45e9ac750992a4ed4d22", size = 70595, upload-time = "2026-04-09T16:05:18.471Z" }, + { url = "https://files.pythonhosted.org/packages/a3/22/2448471196d8a73370aa2f23445455dc42712c21404081fcd7a03b9e0749/librt-0.9.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:756775d25ec8345b837ab52effee3ad2f3b2dfd6bbee3e3f029c517bd5d8f05a", size = 204354, upload-time = "2026-04-09T16:05:19.593Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5e/39fc4b153c78cfd2c8a2dcb32700f2d41d2312aa1050513183be4540930d/librt-0.9.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b8f5d00b49818f4e2b1667db994488b045835e0ac16fe2f924f3871bd2b8ac5", size = 216238, upload-time = "2026-04-09T16:05:20.868Z" }, + { url = "https://files.pythonhosted.org/packages/d7/42/bc2d02d0fa7badfa63aa8d6dcd8793a9f7ef5a94396801684a51ed8d8287/librt-0.9.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c81aef782380f0f13ead670aae01825eb653b44b046aa0e5ebbb79f76ed4aa11", size = 230589, upload-time = "2026-04-09T16:05:22.305Z" }, + { url = "https://files.pythonhosted.org/packages/c8/7b/e2d95cc513866373692aa5edf98080d5602dd07cabfb9e5d2f70df2f25f7/librt-0.9.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:66b58fed90a545328e80d575467244de3741e088c1af928f0b489ebec3ef3858", size = 224610, upload-time = "2026-04-09T16:05:23.647Z" }, + { url = "https://files.pythonhosted.org/packages/31/d5/6cec4607e998eaba57564d06a1295c21b0a0c8de76e4e74d699e627bd98c/librt-0.9.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e78fb7419e07d98c2af4b8567b72b3eaf8cb05caad642e9963465569c8b2d87e", size = 232558, upload-time = "2026-04-09T16:05:25.025Z" }, + { url = "https://files.pythonhosted.org/packages/95/8c/27f1d8d3aaf079d3eb26439bf0b32f1482340c3552e324f7db9dca858671/librt-0.9.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2c3786f0f4490a5cd87f1ed6cefae833ad6b1060d52044ce0434a2e85893afd0", size = 225521, upload-time = "2026-04-09T16:05:26.311Z" }, + { url = "https://files.pythonhosted.org/packages/6b/d8/1e0d43b1c329b416017619469b3c3801a25a6a4ef4a1c68332aeaa6f72ca/librt-0.9.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8494cfc61e03542f2d381e71804990b3931175a29b9278fdb4a5459948778dc2", size = 227789, upload-time = "2026-04-09T16:05:27.624Z" }, + { url = "https://files.pythonhosted.org/packages/2c/b4/d3d842e88610fcd4c8eec7067b0c23ef2d7d3bff31496eded6a83b0f99be/librt-0.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:07cf11f769831186eeac424376e6189f20ace4f7263e2134bdb9757340d84d4d", size = 248616, upload-time = "2026-04-09T16:05:29.181Z" }, + { url = "https://files.pythonhosted.org/packages/ec/28/527df8ad0d1eb6c8bdfa82fc190f1f7c4cca5a1b6d7b36aeabf95b52d74d/librt-0.9.0-cp313-cp313-win32.whl", hash = "sha256:850d6d03177e52700af605fd60db7f37dcb89782049a149674d1a9649c2138fd", size = 56039, upload-time = "2026-04-09T16:05:30.709Z" }, + { url = "https://files.pythonhosted.org/packages/f3/a7/413652ad0d92273ee5e30c000fc494b361171177c83e57c060ecd3c21538/librt-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:a5af136bfba820d592f86c67affcef9b3ff4d4360ac3255e341e964489b48519", size = 63264, upload-time = "2026-04-09T16:05:31.881Z" }, + { url = "https://files.pythonhosted.org/packages/a4/0a/92c244309b774e290ddb15e93363846ae7aa753d9586b8aad511c5e6145b/librt-0.9.0-cp313-cp313-win_arm64.whl", hash = "sha256:4c4d0440a3a8e31d962340c3e1cc3fc9ee7febd34c8d8f770d06adb947779ea5", size = 53728, upload-time = "2026-04-09T16:05:33.31Z" }, + { url = "https://files.pythonhosted.org/packages/cd/c1/184e539543f06ea2912f4b92a5ffaede4f9b392689e3f00acbf8134bee92/librt-0.9.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:3f05d145df35dca5056a8bc3838e940efebd893a54b3e19b2dda39ceaa299bcb", size = 67830, upload-time = "2026-04-09T16:05:34.517Z" }, + { url = "https://files.pythonhosted.org/packages/f3/ad/23399bdcb7afca819acacdef31b37ee59de261bd66b503a7995c03c4b0dc/librt-0.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1c587494461ebd42229d0f1739f3aa34237dd9980623ecf1be8d3bcba79f4499", size = 70280, upload-time = "2026-04-09T16:05:35.649Z" }, + { url = "https://files.pythonhosted.org/packages/9f/0b/4542dc5a2b8772dbf92cafb9194701230157e73c14b017b6961a23598b03/librt-0.9.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b0a2040f801406b93657a70b72fa12311063a319fee72ce98e1524da7200171f", size = 201925, upload-time = "2026-04-09T16:05:36.739Z" }, + { url = "https://files.pythonhosted.org/packages/31/d4/8ee7358b08fd0cfce051ef96695380f09b3c2c11b77c9bfbc367c921cce5/librt-0.9.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f38bc489037eca88d6ebefc9c4d41a4e07c8e8b4de5188a9e6d290273ad7ebb1", size = 212381, upload-time = "2026-04-09T16:05:38.043Z" }, + { url = "https://files.pythonhosted.org/packages/f2/94/a2025fe442abedf8b038038dab3dba942009ad42b38ea064a1a9e6094241/librt-0.9.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3fd278f5e6bf7c75ccd6d12344eb686cc020712683363b66f46ac79d37c799f", size = 227065, upload-time = "2026-04-09T16:05:39.394Z" }, + { url = "https://files.pythonhosted.org/packages/7c/e9/b9fcf6afa909f957cfbbf918802f9dada1bd5d3c1da43d722fd6a310dc3f/librt-0.9.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fcbdf2a9ca24e87bbebb47f1fe34e531ef06f104f98c9ccfc953a3f3344c567a", size = 221333, upload-time = "2026-04-09T16:05:40.999Z" }, + { url = "https://files.pythonhosted.org/packages/ac/7c/ba54cd6aa6a3c8cd12757a6870e0c79a64b1e6327f5248dcff98423f4d43/librt-0.9.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e306d956cfa027fe041585f02a1602c32bfa6bb8ebea4899d373383295a6c62f", size = 229051, upload-time = "2026-04-09T16:05:42.605Z" }, + { url = "https://files.pythonhosted.org/packages/4b/4b/8cfdbad314c8677a0148bf0b70591d6d18587f9884d930276098a235461b/librt-0.9.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:465814ab157986acb9dfa5ccd7df944be5eefc0d08d31ec6e8d88bc71251d845", size = 222492, upload-time = "2026-04-09T16:05:43.842Z" }, + { url = "https://files.pythonhosted.org/packages/1f/d1/2eda69563a1a88706808decdce035e4b32755dbfbb0d05e1a65db9547ed1/librt-0.9.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:703f4ae36d6240bfe24f542bac784c7e4194ec49c3ba5a994d02891649e2d85b", size = 223849, upload-time = "2026-04-09T16:05:45.054Z" }, + { url = "https://files.pythonhosted.org/packages/04/44/b2ed37df6be5b3d42cfe36318e0598e80843d5c6308dd63d0bf4e0ce5028/librt-0.9.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3be322a15ee5e70b93b7a59cfd074614f22cc8c9ff18bd27f474e79137ea8d3b", size = 245001, upload-time = "2026-04-09T16:05:46.34Z" }, + { url = "https://files.pythonhosted.org/packages/47/e7/617e412426df89169dd2a9ed0cc8752d5763336252c65dbf945199915119/librt-0.9.0-cp314-cp314-win32.whl", hash = "sha256:b8da9f8035bb417770b1e1610526d87ad4fc58a2804dc4d79c53f6d2cf5a6eb9", size = 51799, upload-time = "2026-04-09T16:05:47.738Z" }, + { url = "https://files.pythonhosted.org/packages/24/ed/c22ca4db0ca3cbc285e4d9206108746beda561a9792289c3c31281d7e9df/librt-0.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:b8bd70d5d816566a580d193326912f4a76ec2d28a97dc4cd4cc831c0af8e330e", size = 59165, upload-time = "2026-04-09T16:05:49.198Z" }, + { url = "https://files.pythonhosted.org/packages/24/56/875398fafa4cbc8f15b89366fc3287304ddd3314d861f182a4b87595ace0/librt-0.9.0-cp314-cp314-win_arm64.whl", hash = "sha256:fc5758e2b7a56532dc33e3c544d78cbaa9ecf0a0f2a2da2df882c1d6b99a317f", size = 49292, upload-time = "2026-04-09T16:05:50.362Z" }, + { url = "https://files.pythonhosted.org/packages/4c/61/bc448ecbf9b2d69c5cff88fe41496b19ab2a1cbda0065e47d4d0d51c0867/librt-0.9.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f24b90b0e0c8cc9491fb1693ae91fe17cb7963153a1946395acdbdd5818429a4", size = 70175, upload-time = "2026-04-09T16:05:51.564Z" }, + { url = "https://files.pythonhosted.org/packages/60/f2/c47bb71069a73e2f04e70acbd196c1e5cc411578ac99039a224b98920fd4/librt-0.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3fe56e80badb66fdcde06bef81bbaa5bfcf6fbd7aefb86222d9e369c38c6b228", size = 72951, upload-time = "2026-04-09T16:05:52.699Z" }, + { url = "https://files.pythonhosted.org/packages/29/19/0549df59060631732df758e8886d92088da5fdbedb35b80e4643664e8412/librt-0.9.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:527b5b820b47a09e09829051452bb0d1dd2122261254e2a6f674d12f1d793d54", size = 225864, upload-time = "2026-04-09T16:05:53.895Z" }, + { url = "https://files.pythonhosted.org/packages/9d/f8/3b144396d302ac08e50f89e64452c38db84bc7b23f6c60479c5d3abd303c/librt-0.9.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d429bdd4ac0ab17c8e4a8af0ed2a7440b16eba474909ab357131018fe8c7e71", size = 241155, upload-time = "2026-04-09T16:05:55.191Z" }, + { url = "https://files.pythonhosted.org/packages/7a/ce/ee67ec14581de4043e61d05786d2aed6c9b5338816b7859bcf07455c6a9f/librt-0.9.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7202bdcac47d3a708271c4304a474a8605a4a9a4a709e954bf2d3241140aa938", size = 252235, upload-time = "2026-04-09T16:05:56.549Z" }, + { url = "https://files.pythonhosted.org/packages/8a/fa/0ead15daa2b293a54101550b08d4bafe387b7d4a9fc6d2b985602bae69b6/librt-0.9.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c0d620e74897f8c2613b3c4e2e9c1e422eb46d2ddd07df540784d44117836af3", size = 244963, upload-time = "2026-04-09T16:05:57.858Z" }, + { url = "https://files.pythonhosted.org/packages/29/68/9fbf9a9aa704ba87689e40017e720aced8d9a4d2b46b82451d8142f91ec9/librt-0.9.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d69fc39e627908f4c03297d5a88d9284b73f4d90b424461e32e8c2485e21c283", size = 257364, upload-time = "2026-04-09T16:05:59.686Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8d/9d60869f1b6716c762e45f66ed945b1e5dd649f7377684c3b176ae424648/librt-0.9.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:c2640e23d2b7c98796f123ffd95cf2022c7777aa8a4a3b98b36c570d37e85eee", size = 247661, upload-time = "2026-04-09T16:06:00.938Z" }, + { url = "https://files.pythonhosted.org/packages/70/ff/a5c365093962310bfdb4f6af256f191085078ffb529b3f0cbebb5b33ebe2/librt-0.9.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:451daa98463b7695b0a30aa56bf637831ea559e7b8101ac2ef6382e8eb15e29c", size = 248238, upload-time = "2026-04-09T16:06:02.537Z" }, + { url = "https://files.pythonhosted.org/packages/a0/3c/2d34365177f412c9e19c0a29f969d70f5343f27634b76b765a54d8b27705/librt-0.9.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:928bd06eca2c2bbf4349e5b817f837509b0604342e65a502de1d50a7570afd15", size = 269457, upload-time = "2026-04-09T16:06:03.833Z" }, + { url = "https://files.pythonhosted.org/packages/bc/cd/de45b239ea3bdf626f982a00c14bfcf2e12d261c510ba7db62c5969a27cd/librt-0.9.0-cp314-cp314t-win32.whl", hash = "sha256:a9c63e04d003bc0fb6a03b348018b9a3002f98268200e22cc80f146beac5dc40", size = 52453, upload-time = "2026-04-09T16:06:05.229Z" }, + { url = "https://files.pythonhosted.org/packages/7f/f9/bfb32ae428aa75c0c533915622176f0a17d6da7b72b5a3c6363685914f70/librt-0.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f162af66a2ed3f7d1d161a82ca584efd15acd9c1cff190a373458c32f7d42118", size = 60044, upload-time = "2026-04-09T16:06:06.398Z" }, + { url = "https://files.pythonhosted.org/packages/aa/47/7d70414bcdbb3bc1f458a8d10558f00bbfdb24e5a11740fc8197e12c3255/librt-0.9.0-cp314-cp314t-win_arm64.whl", hash = "sha256:a4b25c6c25cac5d0d9d6d6da855195b254e0021e513e0249f0e3b444dc6e0e61", size = 50009, upload-time = "2026-04-09T16:06:07.995Z" }, ] [[package]] @@ -336,11 +361,11 @@ wheels = [ [[package]] name = "markdown2" -version = "2.5.4" +version = "2.5.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/42/f8/b2ae8bf5f28f9b510ae097415e6e4cb63226bb28d7ee01aec03a755ba03b/markdown2-2.5.4.tar.gz", hash = "sha256:a09873f0b3c23dbfae589b0080587df52ad75bb09a5fa6559147554736676889", size = 145652, upload-time = "2025-07-27T16:16:24.307Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/ae/07d4a5fcaa5509221287d289323d75ac8eda5a5a4ac9de2accf7bbcc2b88/markdown2-2.5.5.tar.gz", hash = "sha256:001547e68f6e7fcf0f1cb83f7e82f48aa7d48b2c6a321f0cd20a853a8a2d1664", size = 157249, upload-time = "2026-03-02T20:46:53.411Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/06/2697b5043c3ecb720ce0d243fc7cf5024c0b5b1e450506e9b21939019963/markdown2-2.5.4-py3-none-any.whl", hash = "sha256:3c4b2934e677be7fec0e6f2de4410e116681f4ad50ec8e5ba7557be506d3f439", size = 49954, upload-time = "2025-07-27T16:16:23.026Z" }, + { url = "https://files.pythonhosted.org/packages/43/af/4b3891eb0a49d6cfd5cbf3e9bf514c943afc2b0f13e2c57cc57cd88ecc21/markdown2-2.5.5-py3-none-any.whl", hash = "sha256:be798587e09d1f52d2e4d96a649c4b82a778c75f9929aad52a2c95747fa26941", size = 56250, upload-time = "2026-03-02T20:46:52.032Z" }, ] [[package]] @@ -439,7 +464,7 @@ wheels = [ [[package]] name = "mypy" -version = "1.19.1" +version = "1.20.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "librt", marker = "platform_python_implementation != 'PyPy'" }, @@ -448,39 +473,51 @@ dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f5/db/4efed9504bc01309ab9c2da7e352cc223569f05478012b5d9ece38fd44d2/mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba", size = 3582404, upload-time = "2025-12-15T05:03:48.42Z" } +sdist = { url = "https://files.pythonhosted.org/packages/04/af/e3d4b3e9ec91a0ff9aabfdb38692952acf49bbb899c2e4c29acb3a6da3ae/mypy-1.20.2.tar.gz", hash = "sha256:e8222c26daaafd9e8626dec58ae36029f82585890589576f769a650dd20fd665", size = 3817349, upload-time = "2026-04-21T17:12:28.473Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2f/63/e499890d8e39b1ff2df4c0c6ce5d371b6844ee22b8250687a99fd2f657a8/mypy-1.19.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f05aa3d375b385734388e844bc01733bd33c644ab48e9684faa54e5389775ec", size = 13101333, upload-time = "2025-12-15T05:03:03.28Z" }, - { url = "https://files.pythonhosted.org/packages/72/4b/095626fc136fba96effc4fd4a82b41d688ab92124f8c4f7564bffe5cf1b0/mypy-1.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:022ea7279374af1a5d78dfcab853fe6a536eebfda4b59deab53cd21f6cd9f00b", size = 12164102, upload-time = "2025-12-15T05:02:33.611Z" }, - { url = "https://files.pythonhosted.org/packages/0c/5b/952928dd081bf88a83a5ccd49aaecfcd18fd0d2710c7ff07b8fb6f7032b9/mypy-1.19.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee4c11e460685c3e0c64a4c5de82ae143622410950d6be863303a1c4ba0e36d6", size = 12765799, upload-time = "2025-12-15T05:03:28.44Z" }, - { url = "https://files.pythonhosted.org/packages/2a/0d/93c2e4a287f74ef11a66fb6d49c7a9f05e47b0a4399040e6719b57f500d2/mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de759aafbae8763283b2ee5869c7255391fbc4de3ff171f8f030b5ec48381b74", size = 13522149, upload-time = "2025-12-15T05:02:36.011Z" }, - { url = "https://files.pythonhosted.org/packages/7b/0e/33a294b56aaad2b338d203e3a1d8b453637ac36cb278b45005e0901cf148/mypy-1.19.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ab43590f9cd5108f41aacf9fca31841142c786827a74ab7cc8a2eacb634e09a1", size = 13810105, upload-time = "2025-12-15T05:02:40.327Z" }, - { url = "https://files.pythonhosted.org/packages/0e/fd/3e82603a0cb66b67c5e7abababce6bf1a929ddf67bf445e652684af5c5a0/mypy-1.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:2899753e2f61e571b3971747e302d5f420c3fd09650e1951e99f823bc3089dac", size = 10057200, upload-time = "2025-12-15T05:02:51.012Z" }, - { url = "https://files.pythonhosted.org/packages/ef/47/6b3ebabd5474d9cdc170d1342fbf9dddc1b0ec13ec90bf9004ee6f391c31/mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288", size = 13028539, upload-time = "2025-12-15T05:03:44.129Z" }, - { url = "https://files.pythonhosted.org/packages/5c/a6/ac7c7a88a3c9c54334f53a941b765e6ec6c4ebd65d3fe8cdcfbe0d0fd7db/mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab", size = 12083163, upload-time = "2025-12-15T05:03:37.679Z" }, - { url = "https://files.pythonhosted.org/packages/67/af/3afa9cf880aa4a2c803798ac24f1d11ef72a0c8079689fac5cfd815e2830/mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6", size = 12687629, upload-time = "2025-12-15T05:02:31.526Z" }, - { url = "https://files.pythonhosted.org/packages/2d/46/20f8a7114a56484ab268b0ab372461cb3a8f7deed31ea96b83a4e4cfcfca/mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331", size = 13436933, upload-time = "2025-12-15T05:03:15.606Z" }, - { url = "https://files.pythonhosted.org/packages/5b/f8/33b291ea85050a21f15da910002460f1f445f8007adb29230f0adea279cb/mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925", size = 13661754, upload-time = "2025-12-15T05:02:26.731Z" }, - { url = "https://files.pythonhosted.org/packages/fd/a3/47cbd4e85bec4335a9cd80cf67dbc02be21b5d4c9c23ad6b95d6c5196bac/mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042", size = 10055772, upload-time = "2025-12-15T05:03:26.179Z" }, - { url = "https://files.pythonhosted.org/packages/06/8a/19bfae96f6615aa8a0604915512e0289b1fad33d5909bf7244f02935d33a/mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1", size = 13206053, upload-time = "2025-12-15T05:03:46.622Z" }, - { url = "https://files.pythonhosted.org/packages/a5/34/3e63879ab041602154ba2a9f99817bb0c85c4df19a23a1443c8986e4d565/mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e", size = 12219134, upload-time = "2025-12-15T05:03:24.367Z" }, - { url = "https://files.pythonhosted.org/packages/89/cc/2db6f0e95366b630364e09845672dbee0cbf0bbe753a204b29a944967cd9/mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2", size = 12731616, upload-time = "2025-12-15T05:02:44.725Z" }, - { url = "https://files.pythonhosted.org/packages/00/be/dd56c1fd4807bc1eba1cf18b2a850d0de7bacb55e158755eb79f77c41f8e/mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8", size = 13620847, upload-time = "2025-12-15T05:03:39.633Z" }, - { url = "https://files.pythonhosted.org/packages/6d/42/332951aae42b79329f743bf1da088cd75d8d4d9acc18fbcbd84f26c1af4e/mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a", size = 13834976, upload-time = "2025-12-15T05:03:08.786Z" }, - { url = "https://files.pythonhosted.org/packages/6f/63/e7493e5f90e1e085c562bb06e2eb32cae27c5057b9653348d38b47daaecc/mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13", size = 10118104, upload-time = "2025-12-15T05:03:10.834Z" }, - { url = "https://files.pythonhosted.org/packages/de/9f/a6abae693f7a0c697dbb435aac52e958dc8da44e92e08ba88d2e42326176/mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250", size = 13201927, upload-time = "2025-12-15T05:02:29.138Z" }, - { url = "https://files.pythonhosted.org/packages/9a/a4/45c35ccf6e1c65afc23a069f50e2c66f46bd3798cbe0d680c12d12935caa/mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b", size = 12206730, upload-time = "2025-12-15T05:03:01.325Z" }, - { url = "https://files.pythonhosted.org/packages/05/bb/cdcf89678e26b187650512620eec8368fded4cfd99cfcb431e4cdfd19dec/mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e", size = 12724581, upload-time = "2025-12-15T05:03:20.087Z" }, - { url = "https://files.pythonhosted.org/packages/d1/32/dd260d52babf67bad8e6770f8e1102021877ce0edea106e72df5626bb0ec/mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef", size = 13616252, upload-time = "2025-12-15T05:02:49.036Z" }, - { url = "https://files.pythonhosted.org/packages/71/d0/5e60a9d2e3bd48432ae2b454b7ef2b62a960ab51292b1eda2a95edd78198/mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75", size = 13840848, upload-time = "2025-12-15T05:02:55.95Z" }, - { url = "https://files.pythonhosted.org/packages/98/76/d32051fa65ecf6cc8c6610956473abdc9b4c43301107476ac03559507843/mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd", size = 10135510, upload-time = "2025-12-15T05:02:58.438Z" }, - { url = "https://files.pythonhosted.org/packages/de/eb/b83e75f4c820c4247a58580ef86fcd35165028f191e7e1ba57128c52782d/mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1", size = 13199744, upload-time = "2025-12-15T05:03:30.823Z" }, - { url = "https://files.pythonhosted.org/packages/94/28/52785ab7bfa165f87fcbb61547a93f98bb20e7f82f90f165a1f69bce7b3d/mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718", size = 12215815, upload-time = "2025-12-15T05:02:42.323Z" }, - { url = "https://files.pythonhosted.org/packages/0a/c6/bdd60774a0dbfb05122e3e925f2e9e846c009e479dcec4821dad881f5b52/mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b", size = 12740047, upload-time = "2025-12-15T05:03:33.168Z" }, - { url = "https://files.pythonhosted.org/packages/32/2a/66ba933fe6c76bd40d1fe916a83f04fed253152f451a877520b3c4a5e41e/mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045", size = 13601998, upload-time = "2025-12-15T05:03:13.056Z" }, - { url = "https://files.pythonhosted.org/packages/e3/da/5055c63e377c5c2418760411fd6a63ee2b96cf95397259038756c042574f/mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957", size = 13807476, upload-time = "2025-12-15T05:03:17.977Z" }, - { url = "https://files.pythonhosted.org/packages/cd/09/4ebd873390a063176f06b0dbf1f7783dd87bd120eae7727fa4ae4179b685/mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f", size = 10281872, upload-time = "2025-12-15T05:03:05.549Z" }, - { url = "https://files.pythonhosted.org/packages/8d/f4/4ce9a05ce5ded1de3ec1c1d96cf9f9504a04e54ce0ed55cfa38619a32b8d/mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247", size = 2471239, upload-time = "2025-12-15T05:03:07.248Z" }, + { url = "https://files.pythonhosted.org/packages/76/97/ce2502df2cecf2ef997b6c6527c4a223b92feb9e7b790cdc8dcd683f3a8a/mypy-1.20.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cf5a4db6dca263010e2c7bff081c89383c72d187ba2cf4c44759aac970e2f0c4", size = 14457059, upload-time = "2026-04-21T17:06:14.935Z" }, + { url = "https://files.pythonhosted.org/packages/c9/34/417ee60b822cc80c0f3dc9f495ad7fd8dbb8d8b2cf4baf22d4046d25d01d/mypy-1.20.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7b0e817b518bff7facd7f85ea05b643ad8bdcce684cf29784987b0a7c8e1f997", size = 13346816, upload-time = "2026-04-21T17:10:41.433Z" }, + { url = "https://files.pythonhosted.org/packages/4a/85/e20951978702df58379d0bcc2e8f7ccdca4e78cd7dc66dd3ddbf9b29d517/mypy-1.20.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97d7b9a485b40f8ca425460e89bf1da2814625b2da627c0dcc6aa46c92631d14", size = 13772593, upload-time = "2026-04-21T17:08:11.24Z" }, + { url = "https://files.pythonhosted.org/packages/63/a5/5441a13259ec516c56fd5de0fd96a69a9590ae6c5e5d3e5174aa84b97973/mypy-1.20.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e1c12f6d2db3d78b909b5f77513c11eb7f2dd2782b96a3ab6dffc7d44575c99", size = 14656635, upload-time = "2026-04-21T17:09:54.042Z" }, + { url = "https://files.pythonhosted.org/packages/3b/51/b89c69157c5e1f19fd125a65d991166a26906e7902f026f00feebbcfa2b9/mypy-1.20.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:89dce27e142d25ffbc154c1819383b69f2e9234dc4ed4766f42e0e8cb264ab5c", size = 14943278, upload-time = "2026-04-21T17:09:15.599Z" }, + { url = "https://files.pythonhosted.org/packages/e9/44/6b0eeecfe96d7cce1d71c66b8e03cb304aa70ec11f1955dc1d6b46aca3c3/mypy-1.20.2-cp310-cp310-win_amd64.whl", hash = "sha256:f376e37f9bf2a946872fc5fd1199c99310748e3c26c7a26683f13f8bdb756cbd", size = 10851915, upload-time = "2026-04-21T17:06:03.5Z" }, + { url = "https://files.pythonhosted.org/packages/3c/36/6593dc88545d75fb96416184be5392da5e2a8e8c2802a8597913e16ae25c/mypy-1.20.2-cp310-cp310-win_arm64.whl", hash = "sha256:6e2b469efd811707bc530fd1effef0f5d6eebcb7fe376affae69025da4b979a2", size = 9786676, upload-time = "2026-04-21T17:07:02.035Z" }, + { url = "https://files.pythonhosted.org/packages/1f/4d/9ebeae211caccbdaddde7ed5e31dfcf57faac66be9b11deb1dc6526c8078/mypy-1.20.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4077797a273e56e8843d001e9dfe4ba10e33323d6ade647ff260e5cd97d9758c", size = 14371307, upload-time = "2026-04-21T17:08:56.442Z" }, + { url = "https://files.pythonhosted.org/packages/95/d7/93473d34b61f04fac1aecc01368485c89c5c4af7a4b9a0cab5d77d04b63f/mypy-1.20.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cdecf62abcc4292500d7858aeae87a1f8f1150f4c4dd08fb0b336ee79b2a6df3", size = 13258917, upload-time = "2026-04-21T17:05:50.978Z" }, + { url = "https://files.pythonhosted.org/packages/e2/30/3dd903e8bafb7b5f7bf87fcd58f8382086dea2aa19f0a7b357f21f63071b/mypy-1.20.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c566c3a88b6ece59b3d70f65bedef17304f48eb52ff040a6a18214e1917b3254", size = 13700516, upload-time = "2026-04-21T17:11:33.161Z" }, + { url = "https://files.pythonhosted.org/packages/07/05/c61a140aba4c729ac7bc99ae26fc627c78a6e08f5b9dd319244ea71a3d7e/mypy-1.20.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0deb80d062b2479f2c87ae568f89845afc71d11bc41b04179e58165fd9f31e98", size = 14562889, upload-time = "2026-04-21T17:05:27.674Z" }, + { url = "https://files.pythonhosted.org/packages/fd/87/da78243742ffa8a36d98c3010f0d829f93d5da4e6786f1a1a6f2ad616502/mypy-1.20.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bba9ad231e92a3e424b3e56b65aa17704993425bba97e302c832f9466bb85bac", size = 14803844, upload-time = "2026-04-21T17:10:06.2Z" }, + { url = "https://files.pythonhosted.org/packages/37/52/10a1ddf91b40f843943a3c6db51e2df59c9e237f29d355e95eaab427461f/mypy-1.20.2-cp311-cp311-win_amd64.whl", hash = "sha256:baf593f2765fa3a6b1ef95807dbaa3d25b594f6a52adcc506a6b9cb115e1be67", size = 10846300, upload-time = "2026-04-21T17:12:23.886Z" }, + { url = "https://files.pythonhosted.org/packages/20/02/f9a4415b664c53bd34d6709be59da303abcae986dc4ac847b402edb6fa1e/mypy-1.20.2-cp311-cp311-win_arm64.whl", hash = "sha256:20175a1c0f49863946ec20b7f63255768058ac4f07d2b9ded6a6b46cfb5a9100", size = 9779498, upload-time = "2026-04-21T17:09:23.695Z" }, + { url = "https://files.pythonhosted.org/packages/71/4e/7560e4528db9e9b147e4c0f22660466bf30a0a1fe3d63d1b9d3b0fd354ee/mypy-1.20.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4dbfcf869f6b0517f70cf0030ba6ea1d6645e132337a7d5204a18d8d5636c02b", size = 14539393, upload-time = "2026-04-21T17:07:12.52Z" }, + { url = "https://files.pythonhosted.org/packages/32/d9/34a5efed8124f5a9234f55ac6a4ced4201e2c5b81e1109c49ad23190ec8c/mypy-1.20.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4b6481b228d072315b053210b01ac320e1be243dc17f9e5887ef167f23f5fae4", size = 13361642, upload-time = "2026-04-21T17:06:53.742Z" }, + { url = "https://files.pythonhosted.org/packages/d1/14/eb377acf78c03c92d566a1510cda8137348215b5335085ef662ab82ecd3a/mypy-1.20.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:34397cdced6b90b836e38182076049fdb41424322e0b0728c946b0939ebdf9f6", size = 13740347, upload-time = "2026-04-21T17:12:04.73Z" }, + { url = "https://files.pythonhosted.org/packages/b9/94/7e4634a32b641aa1c112422eed1bbece61ee16205f674190e8b536f884de/mypy-1.20.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5da6976f20cae27059ea8d0c86e7cef3de720e04c4bb9ee18e3690fdb792066", size = 14734042, upload-time = "2026-04-21T17:07:43.16Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f3/f7e62395cb7f434541b4491a01149a4439e28ace4c0c632bbf5431e92d1f/mypy-1.20.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:56908d7e08318d39f85b1f0c6cfd47b0cac1a130da677630dac0de3e0623e102", size = 14964958, upload-time = "2026-04-21T17:11:00.665Z" }, + { url = "https://files.pythonhosted.org/packages/3e/0d/47e3c3a0ec2a876e35aeac365df3cac7776c36bbd4ed18cc521e1b9d255b/mypy-1.20.2-cp312-cp312-win_amd64.whl", hash = "sha256:d52ad8d78522da1d308789df651ee5379088e77c76cb1994858d40a426b343b9", size = 10911340, upload-time = "2026-04-21T17:10:49.179Z" }, + { url = "https://files.pythonhosted.org/packages/d6/b2/6c852d72e0ea8b01f49da817fb52539993cde327e7d010e0103dc12d0dac/mypy-1.20.2-cp312-cp312-win_arm64.whl", hash = "sha256:785b08db19c9f214dc37d65f7c165d19a30fcecb48abfa30f31b01b5acaabb58", size = 9833947, upload-time = "2026-04-21T17:09:05.267Z" }, + { url = "https://files.pythonhosted.org/packages/5b/c4/b93812d3a192c9bcf5df405bd2f30277cd0e48106a14d1023c7f6ed6e39b/mypy-1.20.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:edfbfca868cdd6bd8d974a60f8a3682f5565d3f5c99b327640cedd24c4264026", size = 14524670, upload-time = "2026-04-21T17:10:30.737Z" }, + { url = "https://files.pythonhosted.org/packages/f3/47/42c122501bff18eaf1e8f457f5c017933452d8acdc52918a9f59f6812955/mypy-1.20.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e2877a02380adfcdbc69071a0f74d6e9dbbf593c0dc9d174e1f223ffd5281943", size = 13336218, upload-time = "2026-04-21T17:08:44.069Z" }, + { url = "https://files.pythonhosted.org/packages/92/8f/75bbc92f41725fbd585fb17b440b1119b576105df1013622983e18640a93/mypy-1.20.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7488448de6007cd5177c6cea0517ac33b4c0f5ee9b5e9f2be51ce75511a85517", size = 13724906, upload-time = "2026-04-21T17:08:01.02Z" }, + { url = "https://files.pythonhosted.org/packages/a1/32/4c49da27a606167391ff0c39aa955707a00edc500572e562f7c36c08a71f/mypy-1.20.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bb9c2fa06887e21d6a3a868762acb82aec34e2c6fd0174064f27c93ede68ad15", size = 14726046, upload-time = "2026-04-21T17:11:22.354Z" }, + { url = "https://files.pythonhosted.org/packages/7f/fc/4e354a1bd70216359deb0c9c54847ee6b32ef78dfb09f5131ff99b494078/mypy-1.20.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d56a78b646f2e3daa865bc70cd5ec5a46c50045801ca8ff17a0c43abc97e3ee", size = 14955587, upload-time = "2026-04-21T17:12:16.033Z" }, + { url = "https://files.pythonhosted.org/packages/62/b2/c0f2056e9eb8f08c62cafd9715e4584b89132bdc832fcf85d27d07b5f3e5/mypy-1.20.2-cp313-cp313-win_amd64.whl", hash = "sha256:2a4102b03bb7481d9a91a6da8d174740c9c8c4401024684b9ca3b7cc5e49852f", size = 10922681, upload-time = "2026-04-21T17:06:35.842Z" }, + { url = "https://files.pythonhosted.org/packages/e5/14/065e333721f05de8ef683d0aa804c23026bcc287446b61cac657b902ccac/mypy-1.20.2-cp313-cp313-win_arm64.whl", hash = "sha256:a95a9248b0c6fd933a442c03c3b113c3b61320086b88e2c444676d3fd1ca3330", size = 9830560, upload-time = "2026-04-21T17:07:51.023Z" }, + { url = "https://files.pythonhosted.org/packages/ae/d1/b4ec96b0ecc620a4443570c6e95c867903428cfcde4206518eafdd5880c3/mypy-1.20.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:419413398fe250aae057fd2fe50166b61077083c9b82754c341cf4fd73038f30", size = 14524561, upload-time = "2026-04-21T17:06:27.325Z" }, + { url = "https://files.pythonhosted.org/packages/3a/63/d2c2ff4fa66bc49477d32dfa26e8a167ba803ea6a69c5efb416036909d30/mypy-1.20.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e73c07f23009962885c197ccb9b41356a30cc0e5a1d0c2ea8fd8fb1362d7f924", size = 13363883, upload-time = "2026-04-21T17:11:11.239Z" }, + { url = "https://files.pythonhosted.org/packages/2a/56/983916806bf4eddeaaa2c9230903c3669c6718552a921154e1c5182c701f/mypy-1.20.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c64e5973df366b747646fc98da921f9d6eba9716d57d1db94a83c026a08e0fb", size = 13742945, upload-time = "2026-04-21T17:08:34.181Z" }, + { url = "https://files.pythonhosted.org/packages/19/65/0cd9285ab010ee8214c83d67c6b49417c40d86ce46f1aa109457b5a9b8d7/mypy-1.20.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a65aa591af023864fd08a97da9974e919452cfe19cb146c8a5dc692626445dc", size = 14706163, upload-time = "2026-04-21T17:05:15.51Z" }, + { url = "https://files.pythonhosted.org/packages/94/97/48ff3b297cafcc94d185243a9190836fb1b01c1b0918fff64e941e973cc9/mypy-1.20.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4fef51b01e638974a6e69885687e9bd40c8d1e09a6cd291cca0619625cf1f558", size = 14938677, upload-time = "2026-04-21T17:05:39.562Z" }, + { url = "https://files.pythonhosted.org/packages/fd/a1/1b4233d255bdd0b38a1f284feeb1c143ca508c19184964e22f8d837ec851/mypy-1.20.2-cp314-cp314-win_amd64.whl", hash = "sha256:913485a03f1bcf5d279409a9d2b9ed565c151f61c09f29991e5faa14033da4c8", size = 11089322, upload-time = "2026-04-21T17:06:44.29Z" }, + { url = "https://files.pythonhosted.org/packages/78/c2/ce7ee2ba36aeb954ba50f18fa25d9c1188578654b97d02a66a15b6f09531/mypy-1.20.2-cp314-cp314-win_arm64.whl", hash = "sha256:c3bae4f855d965b5453784300c12ffc63a548304ac7f99e55d4dc7c898673aa3", size = 10017775, upload-time = "2026-04-21T17:07:20.732Z" }, + { url = "https://files.pythonhosted.org/packages/4e/a1/9d93a7d0b5859af0ead82b4888b46df6c8797e1bc5e1e262a08518c6d48e/mypy-1.20.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2de3dcea53babc1c3237a19002bc3d228ce1833278f093b8d619e06e7cc79609", size = 15549002, upload-time = "2026-04-21T17:08:23.107Z" }, + { url = "https://files.pythonhosted.org/packages/00/d2/09a6a10ee1bf0008f6c144d9676f2ca6a12512151b4e0ad0ff6c4fac5337/mypy-1.20.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:52b176444e2e5054dfcbcb8c75b0b719865c96247b37407184bbfca5c353f2c2", size = 14401942, upload-time = "2026-04-21T17:07:31.837Z" }, + { url = "https://files.pythonhosted.org/packages/57/da/9594b75c3c019e805250bed3583bdf4443ff9e6ef08f97e39ae308cb06f2/mypy-1.20.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:688c3312e5dadb573a2c69c82af3a298d43ecf9e6d264e0f95df960b5f6ac19c", size = 15041649, upload-time = "2026-04-21T17:09:34.653Z" }, + { url = "https://files.pythonhosted.org/packages/97/77/f75a65c278e6e8eba2071f7f5a90481891053ecc39878cc444634d892abe/mypy-1.20.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29752dbbf8cc53f89f6ac096d363314333045c257c9c75cbd189ca2de0455744", size = 15864588, upload-time = "2026-04-21T17:11:44.936Z" }, + { url = "https://files.pythonhosted.org/packages/d7/46/1a4e1c66e96c1a3246ddf5403d122ac9b0a8d2b7e65730b9d6533ba7a6d3/mypy-1.20.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:803203d2b6ea644982c644895c2f78b28d0e208bba7b27d9b921e0ec5eb207c6", size = 16093956, upload-time = "2026-04-21T17:10:17.683Z" }, + { url = "https://files.pythonhosted.org/packages/5a/2c/78a8851264dec38cd736ca5b8bc9380674df0dd0be7792f538916157716c/mypy-1.20.2-cp314-cp314t-win_amd64.whl", hash = "sha256:9bcb8aa397ff0093c824182fd76a935a9ba7ad097fcbef80ae89bf6c1731d8ec", size = 12568661, upload-time = "2026-04-21T17:11:54.473Z" }, + { url = "https://files.pythonhosted.org/packages/83/01/cd7318aa03493322ce275a0e14f4f52b8896335e4e79d4fb8153a7ad2b77/mypy-1.20.2-cp314-cp314t-win_arm64.whl", hash = "sha256:e061b58443f1736f8a37c48978d7ab581636d6ab03e3d4f99e3fa90463bb9382", size = 10389240, upload-time = "2026-04-21T17:09:42.719Z" }, + { url = "https://files.pythonhosted.org/packages/28/9a/f23c163e25b11074188251b0b5a0342625fc1cdb6af604757174fa9acc9b/mypy-1.20.2-py3-none-any.whl", hash = "sha256:a94c5a76ab46c5e6257c7972b6c8cff0574201ca7dc05647e33e795d78680563", size = 2637314, upload-time = "2026-04-21T17:05:54.5Z" }, ] [[package]] @@ -559,84 +596,85 @@ wheels = [ [[package]] name = "numpy" -version = "2.4.1" +version = "2.4.4" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.11'", + "python_full_version >= '3.15'", + "python_full_version >= '3.11' and python_full_version < '3.15'", ] -sdist = { url = "https://files.pythonhosted.org/packages/24/62/ae72ff66c0f1fd959925b4c11f8c2dea61f47f6acaea75a08512cdfe3fed/numpy-2.4.1.tar.gz", hash = "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", size = 20721320, upload-time = "2026-01-10T06:44:59.619Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/9f/b8cef5bffa569759033adda9481211426f12f53299629b410340795c2514/numpy-2.4.4.tar.gz", hash = "sha256:2d390634c5182175533585cc89f3608a4682ccb173cc9bb940b2881c8d6f8fa0", size = 20731587, upload-time = "2026-03-29T13:22:01.298Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/34/2b1bc18424f3ad9af577f6ce23600319968a70575bd7db31ce66731bbef9/numpy-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0cce2a669e3c8ba02ee563c7835f92c153cf02edff1ae05e1823f1dde21b16a5", size = 16944563, upload-time = "2026-01-10T06:42:14.615Z" }, - { url = "https://files.pythonhosted.org/packages/2c/57/26e5f97d075aef3794045a6ca9eada6a4ed70eb9a40e7a4a93f9ac80d704/numpy-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:899d2c18024984814ac7e83f8f49d8e8180e2fbe1b2e252f2e7f1d06bea92425", size = 12645658, upload-time = "2026-01-10T06:42:17.298Z" }, - { url = "https://files.pythonhosted.org/packages/8e/ba/80fc0b1e3cb2fd5c6143f00f42eb67762aa043eaa05ca924ecc3222a7849/numpy-2.4.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:09aa8a87e45b55a1c2c205d42e2808849ece5c484b2aab11fecabec3841cafba", size = 5474132, upload-time = "2026-01-10T06:42:19.637Z" }, - { url = "https://files.pythonhosted.org/packages/40/ae/0a5b9a397f0e865ec171187c78d9b57e5588afc439a04ba9cab1ebb2c945/numpy-2.4.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:edee228f76ee2dab4579fad6f51f6a305de09d444280109e0f75df247ff21501", size = 6804159, upload-time = "2026-01-10T06:42:21.44Z" }, - { url = "https://files.pythonhosted.org/packages/86/9c/841c15e691c7085caa6fd162f063eff494099c8327aeccd509d1ab1e36ab/numpy-2.4.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a92f227dbcdc9e4c3e193add1a189a9909947d4f8504c576f4a732fd0b54240a", size = 14708058, upload-time = "2026-01-10T06:42:23.546Z" }, - { url = "https://files.pythonhosted.org/packages/5d/9d/7862db06743f489e6a502a3b93136d73aea27d97b2cf91504f70a27501d6/numpy-2.4.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:538bf4ec353709c765ff75ae616c34d3c3dca1a68312727e8f2676ea644f8509", size = 16651501, upload-time = "2026-01-10T06:42:25.909Z" }, - { url = "https://files.pythonhosted.org/packages/a6/9c/6fc34ebcbd4015c6e5f0c0ce38264010ce8a546cb6beacb457b84a75dfc8/numpy-2.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ac08c63cb7779b85e9d5318e6c3518b424bc1f364ac4cb2c6136f12e5ff2dccc", size = 16492627, upload-time = "2026-01-10T06:42:28.938Z" }, - { url = "https://files.pythonhosted.org/packages/aa/63/2494a8597502dacda439f61b3c0db4da59928150e62be0e99395c3ad23c5/numpy-2.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4f9c360ecef085e5841c539a9a12b883dff005fbd7ce46722f5e9cef52634d82", size = 18585052, upload-time = "2026-01-10T06:42:31.312Z" }, - { url = "https://files.pythonhosted.org/packages/6a/93/098e1162ae7522fc9b618d6272b77404c4656c72432ecee3abc029aa3de0/numpy-2.4.1-cp311-cp311-win32.whl", hash = "sha256:0f118ce6b972080ba0758c6087c3617b5ba243d806268623dc34216d69099ba0", size = 6236575, upload-time = "2026-01-10T06:42:33.872Z" }, - { url = "https://files.pythonhosted.org/packages/8c/de/f5e79650d23d9e12f38a7bc6b03ea0835b9575494f8ec94c11c6e773b1b1/numpy-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:18e14c4d09d55eef39a6ab5b08406e84bc6869c1e34eef45564804f90b7e0574", size = 12604479, upload-time = "2026-01-10T06:42:35.778Z" }, - { url = "https://files.pythonhosted.org/packages/dd/65/e1097a7047cff12ce3369bd003811516b20ba1078dbdec135e1cd7c16c56/numpy-2.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:6461de5113088b399d655d45c3897fa188766415d0f568f175ab071c8873bd73", size = 10578325, upload-time = "2026-01-10T06:42:38.518Z" }, - { url = "https://files.pythonhosted.org/packages/78/7f/ec53e32bf10c813604edf07a3682616bd931d026fcde7b6d13195dfb684a/numpy-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d3703409aac693fa82c0aee023a1ae06a6e9d065dba10f5e8e80f642f1e9d0a2", size = 16656888, upload-time = "2026-01-10T06:42:40.913Z" }, - { url = "https://files.pythonhosted.org/packages/b8/e0/1f9585d7dae8f14864e948fd7fa86c6cb72dee2676ca2748e63b1c5acfe0/numpy-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7211b95ca365519d3596a1d8688a95874cc94219d417504d9ecb2df99fa7bfa8", size = 12373956, upload-time = "2026-01-10T06:42:43.091Z" }, - { url = "https://files.pythonhosted.org/packages/8e/43/9762e88909ff2326f5e7536fa8cb3c49fb03a7d92705f23e6e7f553d9cb3/numpy-2.4.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5adf01965456a664fc727ed69cc71848f28d063217c63e1a0e200a118d5eec9a", size = 5202567, upload-time = "2026-01-10T06:42:45.107Z" }, - { url = "https://files.pythonhosted.org/packages/4b/ee/34b7930eb61e79feb4478800a4b95b46566969d837546aa7c034c742ef98/numpy-2.4.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:26f0bcd9c79a00e339565b303badc74d3ea2bd6d52191eeca5f95936cad107d0", size = 6549459, upload-time = "2026-01-10T06:42:48.152Z" }, - { url = "https://files.pythonhosted.org/packages/79/e3/5f115fae982565771be994867c89bcd8d7208dbfe9469185497d70de5ddf/numpy-2.4.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0093e85df2960d7e4049664b26afc58b03236e967fb942354deef3208857a04c", size = 14404859, upload-time = "2026-01-10T06:42:49.947Z" }, - { url = "https://files.pythonhosted.org/packages/d9/7d/9c8a781c88933725445a859cac5d01b5871588a15969ee6aeb618ba99eee/numpy-2.4.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ad270f438cbdd402c364980317fb6b117d9ec5e226fff5b4148dd9aa9fc6e02", size = 16371419, upload-time = "2026-01-10T06:42:52.409Z" }, - { url = "https://files.pythonhosted.org/packages/a6/d2/8aa084818554543f17cf4162c42f162acbd3bb42688aefdba6628a859f77/numpy-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:297c72b1b98100c2e8f873d5d35fb551fce7040ade83d67dd51d38c8d42a2162", size = 16182131, upload-time = "2026-01-10T06:42:54.694Z" }, - { url = "https://files.pythonhosted.org/packages/60/db/0425216684297c58a8df35f3284ef56ec4a043e6d283f8a59c53562caf1b/numpy-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf6470d91d34bf669f61d515499859fa7a4c2f7c36434afb70e82df7217933f9", size = 18295342, upload-time = "2026-01-10T06:42:56.991Z" }, - { url = "https://files.pythonhosted.org/packages/31/4c/14cb9d86240bd8c386c881bafbe43f001284b7cce3bc01623ac9475da163/numpy-2.4.1-cp312-cp312-win32.whl", hash = "sha256:b6bcf39112e956594b3331316d90c90c90fb961e39696bda97b89462f5f3943f", size = 5959015, upload-time = "2026-01-10T06:42:59.631Z" }, - { url = "https://files.pythonhosted.org/packages/51/cf/52a703dbeb0c65807540d29699fef5fda073434ff61846a564d5c296420f/numpy-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:e1a27bb1b2dee45a2a53f5ca6ff2d1a7f135287883a1689e930d44d1ff296c87", size = 12310730, upload-time = "2026-01-10T06:43:01.627Z" }, - { url = "https://files.pythonhosted.org/packages/69/80/a828b2d0ade5e74a9fe0f4e0a17c30fdc26232ad2bc8c9f8b3197cf7cf18/numpy-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:0e6e8f9d9ecf95399982019c01223dc130542960a12edfa8edd1122dfa66a8a8", size = 10312166, upload-time = "2026-01-10T06:43:03.673Z" }, - { url = "https://files.pythonhosted.org/packages/04/68/732d4b7811c00775f3bd522a21e8dd5a23f77eb11acdeb663e4a4ebf0ef4/numpy-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d797454e37570cfd61143b73b8debd623c3c0952959adb817dd310a483d58a1b", size = 16652495, upload-time = "2026-01-10T06:43:06.283Z" }, - { url = "https://files.pythonhosted.org/packages/20/ca/857722353421a27f1465652b2c66813eeeccea9d76d5f7b74b99f298e60e/numpy-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c55962006156aeef1629b953fd359064aa47e4d82cfc8e67f0918f7da3344f", size = 12368657, upload-time = "2026-01-10T06:43:09.094Z" }, - { url = "https://files.pythonhosted.org/packages/81/0d/2377c917513449cc6240031a79d30eb9a163d32a91e79e0da47c43f2c0c8/numpy-2.4.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:71abbea030f2cfc3092a0ff9f8c8fdefdc5e0bf7d9d9c99663538bb0ecdac0b9", size = 5197256, upload-time = "2026-01-10T06:43:13.634Z" }, - { url = "https://files.pythonhosted.org/packages/17/39/569452228de3f5de9064ac75137082c6214be1f5c532016549a7923ab4b5/numpy-2.4.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b55aa56165b17aaf15520beb9cbd33c9039810e0d9643dd4379e44294c7303e", size = 6545212, upload-time = "2026-01-10T06:43:15.661Z" }, - { url = "https://files.pythonhosted.org/packages/8c/a4/77333f4d1e4dac4395385482557aeecf4826e6ff517e32ca48e1dafbe42a/numpy-2.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0faba4a331195bfa96f93dd9dfaa10b2c7aa8cda3a02b7fd635e588fe821bf5", size = 14402871, upload-time = "2026-01-10T06:43:17.324Z" }, - { url = "https://files.pythonhosted.org/packages/ba/87/d341e519956273b39d8d47969dd1eaa1af740615394fe67d06f1efa68773/numpy-2.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e3087f53e2b4428766b54932644d148613c5a595150533ae7f00dab2f319a8", size = 16359305, upload-time = "2026-01-10T06:43:19.376Z" }, - { url = "https://files.pythonhosted.org/packages/32/91/789132c6666288eaa20ae8066bb99eba1939362e8f1a534949a215246e97/numpy-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:49e792ec351315e16da54b543db06ca8a86985ab682602d90c60ef4ff4db2a9c", size = 16181909, upload-time = "2026-01-10T06:43:21.808Z" }, - { url = "https://files.pythonhosted.org/packages/cf/b8/090b8bd27b82a844bb22ff8fdf7935cb1980b48d6e439ae116f53cdc2143/numpy-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79e9e06c4c2379db47f3f6fc7a8652e7498251789bf8ff5bd43bf478ef314ca2", size = 18284380, upload-time = "2026-01-10T06:43:23.957Z" }, - { url = "https://files.pythonhosted.org/packages/67/78/722b62bd31842ff029412271556a1a27a98f45359dea78b1548a3a9996aa/numpy-2.4.1-cp313-cp313-win32.whl", hash = "sha256:3d1a100e48cb266090a031397863ff8a30050ceefd798f686ff92c67a486753d", size = 5957089, upload-time = "2026-01-10T06:43:27.535Z" }, - { url = "https://files.pythonhosted.org/packages/da/a6/cf32198b0b6e18d4fbfa9a21a992a7fca535b9bb2b0cdd217d4a3445b5ca/numpy-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:92a0e65272fd60bfa0d9278e0484c2f52fe03b97aedc02b357f33fe752c52ffb", size = 12307230, upload-time = "2026-01-10T06:43:29.298Z" }, - { url = "https://files.pythonhosted.org/packages/44/6c/534d692bfb7d0afe30611320c5fb713659dcb5104d7cc182aff2aea092f5/numpy-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:20d4649c773f66cc2fc36f663e091f57c3b7655f936a4c681b4250855d1da8f5", size = 10313125, upload-time = "2026-01-10T06:43:31.782Z" }, - { url = "https://files.pythonhosted.org/packages/da/a1/354583ac5c4caa566de6ddfbc42744409b515039e085fab6e0ff942e0df5/numpy-2.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f93bc6892fe7b0663e5ffa83b61aab510aacffd58c16e012bb9352d489d90cb7", size = 12496156, upload-time = "2026-01-10T06:43:34.237Z" }, - { url = "https://files.pythonhosted.org/packages/51/b0/42807c6e8cce58c00127b1dc24d365305189991f2a7917aa694a109c8d7d/numpy-2.4.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:178de8f87948163d98a4c9ab5bee4ce6519ca918926ec8df195af582de28544d", size = 5324663, upload-time = "2026-01-10T06:43:36.211Z" }, - { url = "https://files.pythonhosted.org/packages/fe/55/7a621694010d92375ed82f312b2f28017694ed784775269115323e37f5e2/numpy-2.4.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:98b35775e03ab7f868908b524fc0a84d38932d8daf7b7e1c3c3a1b6c7a2c9f15", size = 6645224, upload-time = "2026-01-10T06:43:37.884Z" }, - { url = "https://files.pythonhosted.org/packages/50/96/9fa8635ed9d7c847d87e30c834f7109fac5e88549d79ef3324ab5c20919f/numpy-2.4.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941c2a93313d030f219f3a71fd3d91a728b82979a5e8034eb2e60d394a2b83f9", size = 14462352, upload-time = "2026-01-10T06:43:39.479Z" }, - { url = "https://files.pythonhosted.org/packages/03/d1/8cf62d8bb2062da4fb82dd5d49e47c923f9c0738032f054e0a75342faba7/numpy-2.4.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:529050522e983e00a6c1c6b67411083630de8b57f65e853d7b03d9281b8694d2", size = 16407279, upload-time = "2026-01-10T06:43:41.93Z" }, - { url = "https://files.pythonhosted.org/packages/86/1c/95c86e17c6b0b31ce6ef219da00f71113b220bcb14938c8d9a05cee0ff53/numpy-2.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2302dc0224c1cbc49bb94f7064f3f923a971bfae45c33870dcbff63a2a550505", size = 16248316, upload-time = "2026-01-10T06:43:44.121Z" }, - { url = "https://files.pythonhosted.org/packages/30/b4/e7f5ff8697274c9d0fa82398b6a372a27e5cef069b37df6355ccb1f1db1a/numpy-2.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9171a42fcad32dcf3fa86f0a4faa5e9f8facefdb276f54b8b390d90447cff4e2", size = 18329884, upload-time = "2026-01-10T06:43:46.613Z" }, - { url = "https://files.pythonhosted.org/packages/37/a4/b073f3e9d77f9aec8debe8ca7f9f6a09e888ad1ba7488f0c3b36a94c03ac/numpy-2.4.1-cp313-cp313t-win32.whl", hash = "sha256:382ad67d99ef49024f11d1ce5dcb5ad8432446e4246a4b014418ba3a1175a1f4", size = 6081138, upload-time = "2026-01-10T06:43:48.854Z" }, - { url = "https://files.pythonhosted.org/packages/16/16/af42337b53844e67752a092481ab869c0523bc95c4e5c98e4dac4e9581ac/numpy-2.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:62fea415f83ad8fdb6c20840578e5fbaf5ddd65e0ec6c3c47eda0f69da172510", size = 12447478, upload-time = "2026-01-10T06:43:50.476Z" }, - { url = "https://files.pythonhosted.org/packages/6c/f8/fa85b2eac68ec631d0b631abc448552cb17d39afd17ec53dcbcc3537681a/numpy-2.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a7870e8c5fc11aef57d6fea4b4085e537a3a60ad2cdd14322ed531fdca68d261", size = 10382981, upload-time = "2026-01-10T06:43:52.575Z" }, - { url = "https://files.pythonhosted.org/packages/1b/a7/ef08d25698e0e4b4efbad8d55251d20fe2a15f6d9aa7c9b30cd03c165e6f/numpy-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3869ea1ee1a1edc16c29bbe3a2f2a4e515cc3a44d43903ad41e0cacdbaf733dc", size = 16652046, upload-time = "2026-01-10T06:43:54.797Z" }, - { url = "https://files.pythonhosted.org/packages/8f/39/e378b3e3ca13477e5ac70293ec027c438d1927f18637e396fe90b1addd72/numpy-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e867df947d427cdd7a60e3e271729090b0f0df80f5f10ab7dd436f40811699c3", size = 12378858, upload-time = "2026-01-10T06:43:57.099Z" }, - { url = "https://files.pythonhosted.org/packages/c3/74/7ec6154f0006910ed1fdbb7591cf4432307033102b8a22041599935f8969/numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:e3bd2cb07841166420d2fa7146c96ce00cb3410664cbc1a6be028e456c4ee220", size = 5207417, upload-time = "2026-01-10T06:43:59.037Z" }, - { url = "https://files.pythonhosted.org/packages/f7/b7/053ac11820d84e42f8feea5cb81cc4fcd1091499b45b1ed8c7415b1bf831/numpy-2.4.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:f0a90aba7d521e6954670550e561a4cb925713bd944445dbe9e729b71f6cabee", size = 6542643, upload-time = "2026-01-10T06:44:01.852Z" }, - { url = "https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556", size = 14418963, upload-time = "2026-01-10T06:44:04.047Z" }, - { url = "https://files.pythonhosted.org/packages/eb/c0/3ed5083d94e7ffd7c404e54619c088e11f2e1939a9544f5397f4adb1b8ba/numpy-2.4.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f44de05659b67d20499cbc96d49f2650769afcb398b79b324bb6e297bfe3844", size = 16363811, upload-time = "2026-01-10T06:44:06.207Z" }, - { url = "https://files.pythonhosted.org/packages/0e/68/42b66f1852bf525050a67315a4fb94586ab7e9eaa541b1bef530fab0c5dd/numpy-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:69e7419c9012c4aaf695109564e3387f1259f001b4326dfa55907b098af082d3", size = 16197643, upload-time = "2026-01-10T06:44:08.33Z" }, - { url = "https://files.pythonhosted.org/packages/d2/40/e8714fc933d85f82c6bfc7b998a0649ad9769a32f3494ba86598aaf18a48/numpy-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2ffd257026eb1b34352e749d7cc1678b5eeec3e329ad8c9965a797e08ccba205", size = 18289601, upload-time = "2026-01-10T06:44:10.841Z" }, - { url = "https://files.pythonhosted.org/packages/80/9a/0d44b468cad50315127e884802351723daca7cf1c98d102929468c81d439/numpy-2.4.1-cp314-cp314-win32.whl", hash = "sha256:727c6c3275ddefa0dc078524a85e064c057b4f4e71ca5ca29a19163c607be745", size = 6005722, upload-time = "2026-01-10T06:44:13.332Z" }, - { url = "https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:7d5d7999df434a038d75a748275cd6c0094b0ecdb0837342b332a82defc4dc4d", size = 12438590, upload-time = "2026-01-10T06:44:15.006Z" }, - { url = "https://files.pythonhosted.org/packages/e9/da/a598d5cb260780cf4d255102deba35c1d072dc028c4547832f45dd3323a8/numpy-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:ce9ce141a505053b3c7bce3216071f3bf5c182b8b28930f14cd24d43932cd2df", size = 10596180, upload-time = "2026-01-10T06:44:17.386Z" }, - { url = "https://files.pythonhosted.org/packages/de/bc/ea3f2c96fcb382311827231f911723aeff596364eb6e1b6d1d91128aa29b/numpy-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e53170557d37ae404bf8d542ca5b7c629d6efa1117dac6a83e394142ea0a43f", size = 12498774, upload-time = "2026-01-10T06:44:19.467Z" }, - { url = "https://files.pythonhosted.org/packages/aa/ab/ef9d939fe4a812648c7a712610b2ca6140b0853c5efea361301006c02ae5/numpy-2.4.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:a73044b752f5d34d4232f25f18160a1cc418ea4507f5f11e299d8ac36875f8a0", size = 5327274, upload-time = "2026-01-10T06:44:23.189Z" }, - { url = "https://files.pythonhosted.org/packages/bd/31/d381368e2a95c3b08b8cf7faac6004849e960f4a042d920337f71cef0cae/numpy-2.4.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:fb1461c99de4d040666ca0444057b06541e5642f800b71c56e6ea92d6a853a0c", size = 6648306, upload-time = "2026-01-10T06:44:25.012Z" }, - { url = "https://files.pythonhosted.org/packages/c8/e5/0989b44ade47430be6323d05c23207636d67d7362a1796ccbccac6773dd2/numpy-2.4.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423797bdab2eeefbe608d7c1ec7b2b4fd3c58d51460f1ee26c7500a1d9c9ee93", size = 14464653, upload-time = "2026-01-10T06:44:26.706Z" }, - { url = "https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42", size = 16405144, upload-time = "2026-01-10T06:44:29.378Z" }, - { url = "https://files.pythonhosted.org/packages/f8/a3/0c63fe66b534888fa5177cc7cef061541064dbe2b4b60dcc60ffaf0d2157/numpy-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42d7dd5fa36d16d52a84f821eb96031836fd405ee6955dd732f2023724d0aa01", size = 16247425, upload-time = "2026-01-10T06:44:31.721Z" }, - { url = "https://files.pythonhosted.org/packages/6b/2b/55d980cfa2c93bd40ff4c290bf824d792bd41d2fe3487b07707559071760/numpy-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7b6b5e28bbd47b7532698e5db2fe1db693d84b58c254e4389d99a27bb9b8f6b", size = 18330053, upload-time = "2026-01-10T06:44:34.617Z" }, - { url = "https://files.pythonhosted.org/packages/23/12/8b5fc6b9c487a09a7957188e0943c9ff08432c65e34567cabc1623b03a51/numpy-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:5de60946f14ebe15e713a6f22850c2372fa72f4ff9a432ab44aa90edcadaa65a", size = 6152482, upload-time = "2026-01-10T06:44:36.798Z" }, - { url = "https://files.pythonhosted.org/packages/00/a5/9f8ca5856b8940492fc24fbe13c1bc34d65ddf4079097cf9e53164d094e1/numpy-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:8f085da926c0d491ffff3096f91078cc97ea67e7e6b65e490bc8dcda65663be2", size = 12627117, upload-time = "2026-01-10T06:44:38.828Z" }, - { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121, upload-time = "2026-01-10T06:44:41.644Z" }, - { url = "https://files.pythonhosted.org/packages/1e/48/d86f97919e79314a1cdee4c832178763e6e98e623e123d0bada19e92c15a/numpy-2.4.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8ad35f20be147a204e28b6a0575fbf3540c5e5f802634d4258d55b1ff5facce1", size = 16822202, upload-time = "2026-01-10T06:44:43.738Z" }, - { url = "https://files.pythonhosted.org/packages/51/e9/1e62a7f77e0f37dcfb0ad6a9744e65df00242b6ea37dfafb55debcbf5b55/numpy-2.4.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:8097529164c0f3e32bb89412a0905d9100bf434d9692d9fc275e18dcf53c9344", size = 12569985, upload-time = "2026-01-10T06:44:45.945Z" }, - { url = "https://files.pythonhosted.org/packages/c7/7e/914d54f0c801342306fdcdce3e994a56476f1b818c46c47fc21ae968088c/numpy-2.4.1-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:ea66d2b41ca4a1630aae5507ee0a71647d3124d1741980138aa8f28f44dac36e", size = 5398484, upload-time = "2026-01-10T06:44:48.012Z" }, - { url = "https://files.pythonhosted.org/packages/1c/d8/9570b68584e293a33474e7b5a77ca404f1dcc655e40050a600dee81d27fb/numpy-2.4.1-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:d3f8f0df9f4b8be57b3bf74a1d087fec68f927a2fab68231fdb442bf2c12e426", size = 6713216, upload-time = "2026-01-10T06:44:49.725Z" }, - { url = "https://files.pythonhosted.org/packages/33/9b/9dd6e2db8d49eb24f86acaaa5258e5f4c8ed38209a4ee9de2d1a0ca25045/numpy-2.4.1-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2023ef86243690c2791fd6353e5b4848eedaa88ca8a2d129f462049f6d484696", size = 14538937, upload-time = "2026-01-10T06:44:51.498Z" }, - { url = "https://files.pythonhosted.org/packages/53/87/d5bd995b0f798a37105b876350d346eea5838bd8f77ea3d7a48392f3812b/numpy-2.4.1-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8361ea4220d763e54cff2fbe7d8c93526b744f7cd9ddab47afeff7e14e8503be", size = 16479830, upload-time = "2026-01-10T06:44:53.931Z" }, - { url = "https://files.pythonhosted.org/packages/5b/c7/b801bf98514b6ae6475e941ac05c58e6411dd863ea92916bfd6d510b08c1/numpy-2.4.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:4f1b68ff47680c2925f8063402a693ede215f0257f02596b1318ecdfb1d79e33", size = 12492579, upload-time = "2026-01-10T06:44:57.094Z" }, + { url = "https://files.pythonhosted.org/packages/ef/c6/4218570d8c8ecc9704b5157a3348e486e84ef4be0ed3e38218ab473c83d2/numpy-2.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f983334aea213c99992053ede6168500e5f086ce74fbc4acc3f2b00f5762e9db", size = 16976799, upload-time = "2026-03-29T13:18:15.438Z" }, + { url = "https://files.pythonhosted.org/packages/dd/92/b4d922c4a5f5dab9ed44e6153908a5c665b71acf183a83b93b690996e39b/numpy-2.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72944b19f2324114e9dc86a159787333b77874143efcf89a5167ef83cfee8af0", size = 14971552, upload-time = "2026-03-29T13:18:18.606Z" }, + { url = "https://files.pythonhosted.org/packages/8a/dc/df98c095978fa6ee7b9a9387d1d58cbb3d232d0e69ad169a4ce784bde4fd/numpy-2.4.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:86b6f55f5a352b48d7fbfd2dbc3d5b780b2d79f4d3c121f33eb6efb22e9a2015", size = 5476566, upload-time = "2026-03-29T13:18:21.532Z" }, + { url = "https://files.pythonhosted.org/packages/28/34/b3fdcec6e725409223dd27356bdf5a3c2cc2282e428218ecc9cb7acc9763/numpy-2.4.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:ba1f4fc670ed79f876f70082eff4f9583c15fb9a4b89d6188412de4d18ae2f40", size = 6806482, upload-time = "2026-03-29T13:18:23.634Z" }, + { url = "https://files.pythonhosted.org/packages/68/62/63417c13aa35d57bee1337c67446761dc25ea6543130cf868eace6e8157b/numpy-2.4.4-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a87ec22c87be071b6bdbd27920b129b94f2fc964358ce38f3822635a3e2e03d", size = 15973376, upload-time = "2026-03-29T13:18:26.677Z" }, + { url = "https://files.pythonhosted.org/packages/cf/c5/9fcb7e0e69cef59cf10c746b84f7d58b08bc66a6b7d459783c5a4f6101a6/numpy-2.4.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:df3775294accfdd75f32c74ae39fcba920c9a378a2fc18a12b6820aa8c1fb502", size = 16925137, upload-time = "2026-03-29T13:18:30.14Z" }, + { url = "https://files.pythonhosted.org/packages/7e/43/80020edacb3f84b9efdd1591120a4296462c23fd8db0dde1666f6ef66f13/numpy-2.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0d4e437e295f18ec29bc79daf55e8a47a9113df44d66f702f02a293d93a2d6dd", size = 17329414, upload-time = "2026-03-29T13:18:33.733Z" }, + { url = "https://files.pythonhosted.org/packages/fd/06/af0658593b18a5f73532d377188b964f239eb0894e664a6c12f484472f97/numpy-2.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6aa3236c78803afbcb255045fbef97a9e25a1f6c9888357d205ddc42f4d6eba5", size = 18658397, upload-time = "2026-03-29T13:18:37.511Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ce/13a09ed65f5d0ce5c7dd0669250374c6e379910f97af2c08c57b0608eee4/numpy-2.4.4-cp311-cp311-win32.whl", hash = "sha256:30caa73029a225b2d40d9fae193e008e24b2026b7ee1a867b7ee8d96ca1a448e", size = 6239499, upload-time = "2026-03-29T13:18:40.372Z" }, + { url = "https://files.pythonhosted.org/packages/bd/63/05d193dbb4b5eec1eca73822d80da98b511f8328ad4ae3ca4caf0f4db91d/numpy-2.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:6bbe4eb67390b0a0265a2c25458f6b90a409d5d069f1041e6aff1e27e3d9a79e", size = 12614257, upload-time = "2026-03-29T13:18:42.95Z" }, + { url = "https://files.pythonhosted.org/packages/87/c5/8168052f080c26fa984c413305012be54741c9d0d74abd7fbeeccae3889f/numpy-2.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:fcfe2045fd2e8f3cb0ce9d4ba6dba6333b8fa05bb8a4939c908cd43322d14c7e", size = 10486775, upload-time = "2026-03-29T13:18:45.835Z" }, + { url = "https://files.pythonhosted.org/packages/28/05/32396bec30fb2263770ee910142f49c1476d08e8ad41abf8403806b520ce/numpy-2.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:15716cfef24d3a9762e3acdf87e27f58dc823d1348f765bbea6bef8c639bfa1b", size = 16689272, upload-time = "2026-03-29T13:18:49.223Z" }, + { url = "https://files.pythonhosted.org/packages/c5/f3/a983d28637bfcd763a9c7aafdb6d5c0ebf3d487d1e1459ffdb57e2f01117/numpy-2.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23cbfd4c17357c81021f21540da84ee282b9c8fba38a03b7b9d09ba6b951421e", size = 14699573, upload-time = "2026-03-29T13:18:52.629Z" }, + { url = "https://files.pythonhosted.org/packages/9b/fd/e5ecca1e78c05106d98028114f5c00d3eddb41207686b2b7de3e477b0e22/numpy-2.4.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8b3b60bb7cba2c8c81837661c488637eee696f59a877788a396d33150c35d842", size = 5204782, upload-time = "2026-03-29T13:18:55.579Z" }, + { url = "https://files.pythonhosted.org/packages/de/2f/702a4594413c1a8632092beae8aba00f1d67947389369b3777aed783fdca/numpy-2.4.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e4a010c27ff6f210ff4c6ef34394cd61470d01014439b192ec22552ee867f2a8", size = 6552038, upload-time = "2026-03-29T13:18:57.769Z" }, + { url = "https://files.pythonhosted.org/packages/7f/37/eed308a8f56cba4d1fdf467a4fc67ef4ff4bf1c888f5fc980481890104b1/numpy-2.4.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9e75681b59ddaa5e659898085ae0eaea229d054f2ac0c7e563a62205a700121", size = 15670666, upload-time = "2026-03-29T13:19:00.341Z" }, + { url = "https://files.pythonhosted.org/packages/0a/0d/0e3ecece05b7a7e87ab9fb587855548da437a061326fff64a223b6dcb78a/numpy-2.4.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:81f4a14bee47aec54f883e0cad2d73986640c1590eb9bfaaba7ad17394481e6e", size = 16645480, upload-time = "2026-03-29T13:19:03.63Z" }, + { url = "https://files.pythonhosted.org/packages/34/49/f2312c154b82a286758ee2f1743336d50651f8b5195db18cdb63675ff649/numpy-2.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:62d6b0f03b694173f9fcb1fb317f7222fd0b0b103e784c6549f5e53a27718c44", size = 17020036, upload-time = "2026-03-29T13:19:07.428Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e9/736d17bd77f1b0ec4f9901aaec129c00d59f5d84d5e79bba540ef12c2330/numpy-2.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbc356aae7adf9e6336d336b9c8111d390a05df88f1805573ebb0807bd06fd1d", size = 18368643, upload-time = "2026-03-29T13:19:10.775Z" }, + { url = "https://files.pythonhosted.org/packages/63/f6/d417977c5f519b17c8a5c3bc9e8304b0908b0e21136fe43bf628a1343914/numpy-2.4.4-cp312-cp312-win32.whl", hash = "sha256:0d35aea54ad1d420c812bfa0385c71cd7cc5bcf7c65fed95fc2cd02fe8c79827", size = 5961117, upload-time = "2026-03-29T13:19:13.464Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5b/e1deebf88ff431b01b7406ca3583ab2bbb90972bbe1c568732e49c844f7e/numpy-2.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:b5f0362dc928a6ecd9db58868fca5e48485205e3855957bdedea308f8672ea4a", size = 12320584, upload-time = "2026-03-29T13:19:16.155Z" }, + { url = "https://files.pythonhosted.org/packages/58/89/e4e856ac82a68c3ed64486a544977d0e7bdd18b8da75b78a577ca31c4395/numpy-2.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:846300f379b5b12cc769334464656bc882e0735d27d9726568bc932fdc49d5ec", size = 10221450, upload-time = "2026-03-29T13:19:18.994Z" }, + { url = "https://files.pythonhosted.org/packages/14/1d/d0a583ce4fefcc3308806a749a536c201ed6b5ad6e1322e227ee4848979d/numpy-2.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08f2e31ed5e6f04b118e49821397f12767934cfdd12a1ce86a058f91e004ee50", size = 16684933, upload-time = "2026-03-29T13:19:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/c1/62/2b7a48fbb745d344742c0277f01286dead15f3f68e4f359fbfcf7b48f70f/numpy-2.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e823b8b6edc81e747526f70f71a9c0a07ac4e7ad13020aa736bb7c9d67196115", size = 14694532, upload-time = "2026-03-29T13:19:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/e5/87/499737bfba066b4a3bebff24a8f1c5b2dee410b209bc6668c9be692580f0/numpy-2.4.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4a19d9dba1a76618dd86b164d608566f393f8ec6ac7c44f0cc879011c45e65af", size = 5199661, upload-time = "2026-03-29T13:19:28.31Z" }, + { url = "https://files.pythonhosted.org/packages/cd/da/464d551604320d1491bc345efed99b4b7034143a85787aab78d5691d5a0e/numpy-2.4.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d2a8490669bfe99a233298348acc2d824d496dee0e66e31b66a6022c2ad74a5c", size = 6547539, upload-time = "2026-03-29T13:19:30.97Z" }, + { url = "https://files.pythonhosted.org/packages/7d/90/8d23e3b0dafd024bf31bdec225b3bb5c2dbfa6912f8a53b8659f21216cbf/numpy-2.4.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45dbed2ab436a9e826e302fcdcbe9133f9b0006e5af7168afb8963a6520da103", size = 15668806, upload-time = "2026-03-29T13:19:33.887Z" }, + { url = "https://files.pythonhosted.org/packages/d1/73/a9d864e42a01896bb5974475438f16086be9ba1f0d19d0bb7a07427c4a8b/numpy-2.4.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c901b15172510173f5cb310eae652908340f8dede90fff9e3bf6c0d8dfd92f83", size = 16632682, upload-time = "2026-03-29T13:19:37.336Z" }, + { url = "https://files.pythonhosted.org/packages/34/fb/14570d65c3bde4e202a031210475ae9cde9b7686a2e7dc97ee67d2833b35/numpy-2.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:99d838547ace2c4aace6c4f76e879ddfe02bb58a80c1549928477862b7a6d6ed", size = 17019810, upload-time = "2026-03-29T13:19:40.963Z" }, + { url = "https://files.pythonhosted.org/packages/8a/77/2ba9d87081fd41f6d640c83f26fb7351e536b7ce6dd9061b6af5904e8e46/numpy-2.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0aec54fd785890ecca25a6003fd9a5aed47ad607bbac5cd64f836ad8666f4959", size = 18357394, upload-time = "2026-03-29T13:19:44.859Z" }, + { url = "https://files.pythonhosted.org/packages/a2/23/52666c9a41708b0853fa3b1a12c90da38c507a3074883823126d4e9d5b30/numpy-2.4.4-cp313-cp313-win32.whl", hash = "sha256:07077278157d02f65c43b1b26a3886bce886f95d20aabd11f87932750dfb14ed", size = 5959556, upload-time = "2026-03-29T13:19:47.661Z" }, + { url = "https://files.pythonhosted.org/packages/57/fb/48649b4971cde70d817cf97a2a2fdc0b4d8308569f1dd2f2611959d2e0cf/numpy-2.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:5c70f1cc1c4efbe316a572e2d8b9b9cc44e89b95f79ca3331553fbb63716e2bf", size = 12317311, upload-time = "2026-03-29T13:19:50.67Z" }, + { url = "https://files.pythonhosted.org/packages/ba/d8/11490cddd564eb4de97b4579ef6bfe6a736cc07e94c1598590ae25415e01/numpy-2.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:ef4059d6e5152fa1a39f888e344c73fdc926e1b2dd58c771d67b0acfbf2aa67d", size = 10222060, upload-time = "2026-03-29T13:19:54.229Z" }, + { url = "https://files.pythonhosted.org/packages/99/5d/dab4339177a905aad3e2221c915b35202f1ec30d750dd2e5e9d9a72b804b/numpy-2.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4bbc7f303d125971f60ec0aaad5e12c62d0d2c925f0ab1273debd0e4ba37aba5", size = 14822302, upload-time = "2026-03-29T13:19:57.585Z" }, + { url = "https://files.pythonhosted.org/packages/eb/e4/0564a65e7d3d97562ed6f9b0fd0fb0a6f559ee444092f105938b50043876/numpy-2.4.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:4d6d57903571f86180eb98f8f0c839fa9ebbfb031356d87f1361be91e433f5b7", size = 5327407, upload-time = "2026-03-29T13:20:00.601Z" }, + { url = "https://files.pythonhosted.org/packages/29/8d/35a3a6ce5ad371afa58b4700f1c820f8f279948cca32524e0a695b0ded83/numpy-2.4.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:4636de7fd195197b7535f231b5de9e4b36d2c440b6e566d2e4e4746e6af0ca93", size = 6647631, upload-time = "2026-03-29T13:20:02.855Z" }, + { url = "https://files.pythonhosted.org/packages/f4/da/477731acbd5a58a946c736edfdabb2ac5b34c3d08d1ba1a7b437fa0884df/numpy-2.4.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ad2e2ef14e0b04e544ea2fa0a36463f847f113d314aa02e5b402fdf910ef309e", size = 15727691, upload-time = "2026-03-29T13:20:06.004Z" }, + { url = "https://files.pythonhosted.org/packages/e6/db/338535d9b152beabeb511579598418ba0212ce77cf9718edd70262cc4370/numpy-2.4.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a285b3b96f951841799528cd1f4f01cd70e7e0204b4abebac9463eecfcf2a40", size = 16681241, upload-time = "2026-03-29T13:20:09.417Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a9/ad248e8f58beb7a0219b413c9c7d8151c5d285f7f946c3e26695bdbbe2df/numpy-2.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f8474c4241bc18b750be2abea9d7a9ec84f46ef861dbacf86a4f6e043401f79e", size = 17085767, upload-time = "2026-03-29T13:20:13.126Z" }, + { url = "https://files.pythonhosted.org/packages/b5/1a/3b88ccd3694681356f70da841630e4725a7264d6a885c8d442a697e1146b/numpy-2.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4e874c976154687c1f71715b034739b45c7711bec81db01914770373d125e392", size = 18403169, upload-time = "2026-03-29T13:20:17.096Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c9/fcfd5d0639222c6eac7f304829b04892ef51c96a75d479214d77e3ce6e33/numpy-2.4.4-cp313-cp313t-win32.whl", hash = "sha256:9c585a1790d5436a5374bac930dad6ed244c046ed91b2b2a3634eb2971d21008", size = 6083477, upload-time = "2026-03-29T13:20:20.195Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e3/3938a61d1c538aaec8ed6fd6323f57b0c2d2d2219512434c5c878db76553/numpy-2.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:93e15038125dc1e5345d9b5b68aa7f996ec33b98118d18c6ca0d0b7d6198b7e8", size = 12457487, upload-time = "2026-03-29T13:20:22.946Z" }, + { url = "https://files.pythonhosted.org/packages/97/6a/7e345032cc60501721ef94e0e30b60f6b0bd601f9174ebd36389a2b86d40/numpy-2.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:0dfd3f9d3adbe2920b68b5cd3d51444e13a10792ec7154cd0a2f6e74d4ab3233", size = 10292002, upload-time = "2026-03-29T13:20:25.909Z" }, + { url = "https://files.pythonhosted.org/packages/6e/06/c54062f85f673dd5c04cbe2f14c3acb8c8b95e3384869bb8cc9bff8cb9df/numpy-2.4.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f169b9a863d34f5d11b8698ead99febeaa17a13ca044961aa8e2662a6c7766a0", size = 16684353, upload-time = "2026-03-29T13:20:29.504Z" }, + { url = "https://files.pythonhosted.org/packages/4c/39/8a320264a84404c74cc7e79715de85d6130fa07a0898f67fb5cd5bd79908/numpy-2.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2483e4584a1cb3092da4470b38866634bafb223cbcd551ee047633fd2584599a", size = 14704914, upload-time = "2026-03-29T13:20:33.547Z" }, + { url = "https://files.pythonhosted.org/packages/91/fb/287076b2614e1d1044235f50f03748f31fa287e3dbe6abeb35cdfa351eca/numpy-2.4.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:2d19e6e2095506d1736b7d80595e0f252d76b89f5e715c35e06e937679ea7d7a", size = 5210005, upload-time = "2026-03-29T13:20:36.45Z" }, + { url = "https://files.pythonhosted.org/packages/63/eb/fcc338595309910de6ecabfcef2419a9ce24399680bfb149421fa2df1280/numpy-2.4.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6a246d5914aa1c820c9443ddcee9c02bec3e203b0c080349533fae17727dfd1b", size = 6544974, upload-time = "2026-03-29T13:20:39.014Z" }, + { url = "https://files.pythonhosted.org/packages/44/5d/e7e9044032a716cdfaa3fba27a8e874bf1c5f1912a1ddd4ed071bf8a14a6/numpy-2.4.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:989824e9faf85f96ec9c7761cd8d29c531ad857bfa1daa930cba85baaecf1a9a", size = 15684591, upload-time = "2026-03-29T13:20:42.146Z" }, + { url = "https://files.pythonhosted.org/packages/98/7c/21252050676612625449b4807d6b695b9ce8a7c9e1c197ee6216c8a65c7c/numpy-2.4.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27a8d92cd10f1382a67d7cf4db7ce18341b66438bdd9f691d7b0e48d104c2a9d", size = 16637700, upload-time = "2026-03-29T13:20:46.204Z" }, + { url = "https://files.pythonhosted.org/packages/b1/29/56d2bbef9465db24ef25393383d761a1af4f446a1df9b8cded4fe3a5a5d7/numpy-2.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e44319a2953c738205bf3354537979eaa3998ed673395b964c1176083dd46252", size = 17035781, upload-time = "2026-03-29T13:20:50.242Z" }, + { url = "https://files.pythonhosted.org/packages/e3/2b/a35a6d7589d21f44cea7d0a98de5ddcbb3d421b2622a5c96b1edf18707c3/numpy-2.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e892aff75639bbef0d2a2cfd55535510df26ff92f63c92cd84ef8d4ba5a5557f", size = 18362959, upload-time = "2026-03-29T13:20:54.019Z" }, + { url = "https://files.pythonhosted.org/packages/64/c9/d52ec581f2390e0f5f85cbfd80fb83d965fc15e9f0e1aec2195faa142cde/numpy-2.4.4-cp314-cp314-win32.whl", hash = "sha256:1378871da56ca8943c2ba674530924bb8ca40cd228358a3b5f302ad60cf875fc", size = 6008768, upload-time = "2026-03-29T13:20:56.912Z" }, + { url = "https://files.pythonhosted.org/packages/fa/22/4cc31a62a6c7b74a8730e31a4274c5dc80e005751e277a2ce38e675e4923/numpy-2.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:715d1c092715954784bc79e1174fc2a90093dc4dc84ea15eb14dad8abdcdeb74", size = 12449181, upload-time = "2026-03-29T13:20:59.548Z" }, + { url = "https://files.pythonhosted.org/packages/70/2e/14cda6f4d8e396c612d1bf97f22958e92148801d7e4f110cabebdc0eef4b/numpy-2.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:2c194dd721e54ecad9ad387c1d35e63dce5c4450c6dc7dd5611283dda239aabb", size = 10496035, upload-time = "2026-03-29T13:21:02.524Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/8fed8c8d848d7ecea092dc3469643f9d10bc3a134a815a3b033da1d2039b/numpy-2.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2aa0613a5177c264ff5921051a5719d20095ea586ca88cc802c5c218d1c67d3e", size = 14824958, upload-time = "2026-03-29T13:21:05.671Z" }, + { url = "https://files.pythonhosted.org/packages/05/1a/d8007a5138c179c2bf33ef44503e83d70434d2642877ee8fbb230e7c0548/numpy-2.4.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:42c16925aa5a02362f986765f9ebabf20de75cdefdca827d14315c568dcab113", size = 5330020, upload-time = "2026-03-29T13:21:08.635Z" }, + { url = "https://files.pythonhosted.org/packages/99/64/ffb99ac6ae93faf117bcbd5c7ba48a7f45364a33e8e458545d3633615dda/numpy-2.4.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:874f200b2a981c647340f841730fc3a2b54c9d940566a3c4149099591e2c4c3d", size = 6650758, upload-time = "2026-03-29T13:21:10.949Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6e/795cc078b78a384052e73b2f6281ff7a700e9bf53bcce2ee579d4f6dd879/numpy-2.4.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9b39d38a9bd2ae1becd7eac1303d031c5c110ad31f2b319c6e7d98b135c934d", size = 15729948, upload-time = "2026-03-29T13:21:14.047Z" }, + { url = "https://files.pythonhosted.org/packages/5f/86/2acbda8cc2af5f3d7bfc791192863b9e3e19674da7b5e533fded124d1299/numpy-2.4.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b268594bccac7d7cf5844c7732e3f20c50921d94e36d7ec9b79e9857694b1b2f", size = 16679325, upload-time = "2026-03-29T13:21:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/bc/59/cafd83018f4aa55e0ac6fa92aa066c0a1877b77a615ceff1711c260ffae8/numpy-2.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ac6b31e35612a26483e20750126d30d0941f949426974cace8e6b5c58a3657b0", size = 17084883, upload-time = "2026-03-29T13:21:21.106Z" }, + { url = "https://files.pythonhosted.org/packages/f0/85/a42548db84e65ece46ab2caea3d3f78b416a47af387fcbb47ec28e660dc2/numpy-2.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8e3ed142f2728df44263aaf5fb1f5b0b99f4070c553a0d7f033be65338329150", size = 18403474, upload-time = "2026-03-29T13:21:24.828Z" }, + { url = "https://files.pythonhosted.org/packages/ed/ad/483d9e262f4b831000062e5d8a45e342166ec8aaa1195264982bca267e62/numpy-2.4.4-cp314-cp314t-win32.whl", hash = "sha256:dddbbd259598d7240b18c9d87c56a9d2fb3b02fe266f49a7c101532e78c1d871", size = 6155500, upload-time = "2026-03-29T13:21:28.205Z" }, + { url = "https://files.pythonhosted.org/packages/c7/03/2fc4e14c7bd4ff2964b74ba90ecb8552540b6315f201df70f137faa5c589/numpy-2.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:a7164afb23be6e37ad90b2f10426149fd75aee07ca55653d2aa41e66c4ef697e", size = 12637755, upload-time = "2026-03-29T13:21:31.107Z" }, + { url = "https://files.pythonhosted.org/packages/58/78/548fb8e07b1a341746bfbecb32f2c268470f45fa028aacdbd10d9bc73aab/numpy-2.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:ba203255017337d39f89bdd58417f03c4426f12beed0440cfd933cb15f8669c7", size = 10566643, upload-time = "2026-03-29T13:21:34.339Z" }, + { url = "https://files.pythonhosted.org/packages/6b/33/8fae8f964a4f63ed528264ddf25d2b683d0b663e3cba26961eb838a7c1bd/numpy-2.4.4-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:58c8b5929fcb8287cbd6f0a3fae19c6e03a5c48402ae792962ac465224a629a4", size = 16854491, upload-time = "2026-03-29T13:21:38.03Z" }, + { url = "https://files.pythonhosted.org/packages/bc/d0/1aabee441380b981cf8cdda3ae7a46aa827d1b5a8cce84d14598bc94d6d9/numpy-2.4.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:eea7ac5d2dce4189771cedb559c738a71512768210dc4e4753b107a2048b3d0e", size = 14895830, upload-time = "2026-03-29T13:21:41.509Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b8/aafb0d1065416894fccf4df6b49ef22b8db045187949545bced89c034b8e/numpy-2.4.4-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:51fc224f7ca4d92656d5a5eb315f12eb5fe2c97a66249aa7b5f562528a3be38c", size = 5400927, upload-time = "2026-03-29T13:21:44.747Z" }, + { url = "https://files.pythonhosted.org/packages/d6/77/063baa20b08b431038c7f9ff5435540c7b7265c78cf56012a483019ca72d/numpy-2.4.4-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:28a650663f7314afc3e6ec620f44f333c386aad9f6fc472030865dc0ebb26ee3", size = 6715557, upload-time = "2026-03-29T13:21:47.406Z" }, + { url = "https://files.pythonhosted.org/packages/c7/a8/379542d45a14f149444c5c4c4e7714707239ce9cc1de8c2803958889da14/numpy-2.4.4-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:19710a9ca9992d7174e9c52f643d4272dcd1558c5f7af7f6f8190f633bd651a7", size = 15804253, upload-time = "2026-03-29T13:21:50.753Z" }, + { url = "https://files.pythonhosted.org/packages/a2/c8/f0a45426d6d21e7ea3310a15cf90c43a14d9232c31a837702dba437f3373/numpy-2.4.4-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9b2aec6af35c113b05695ebb5749a787acd63cafc83086a05771d1e1cd1e555f", size = 16753552, upload-time = "2026-03-29T13:21:54.344Z" }, + { url = "https://files.pythonhosted.org/packages/04/74/f4c001f4714c3ad9ce037e18cf2b9c64871a84951eaa0baf683a9ca9301c/numpy-2.4.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f2cf083b324a467e1ab358c105f6cad5ea950f50524668a80c486ff1db24e119", size = 12509075, upload-time = "2026-03-29T13:21:57.644Z" }, ] [[package]] @@ -806,6 +844,16 @@ dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, ] +[package.dev-dependencies] +test = [ + { name = "overture-schema-addresses-theme" }, + { name = "overture-schema-base-theme" }, + { name = "overture-schema-buildings-theme" }, + { name = "overture-schema-divisions-theme" }, + { name = "overture-schema-places-theme" }, + { name = "overture-schema-transportation-theme" }, +] + [package.metadata] requires-dist = [ { name = "click", specifier = ">=8.1" }, @@ -816,6 +864,16 @@ requires-dist = [ { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0" }, ] +[package.metadata.requires-dev] +test = [ + { name = "overture-schema-addresses-theme", editable = "packages/overture-schema-addresses-theme" }, + { name = "overture-schema-base-theme", editable = "packages/overture-schema-base-theme" }, + { name = "overture-schema-buildings-theme", editable = "packages/overture-schema-buildings-theme" }, + { name = "overture-schema-divisions-theme", editable = "packages/overture-schema-divisions-theme" }, + { name = "overture-schema-places-theme", editable = "packages/overture-schema-places-theme" }, + { name = "overture-schema-transportation-theme", editable = "packages/overture-schema-transportation-theme" }, +] + [[package]] name = "overture-schema-common" source = { editable = "packages/overture-schema-common" } @@ -878,6 +936,22 @@ requires-dist = [ { name = "pydantic", extras = ["email"], specifier = ">=2.12.0" }, ] +[[package]] +name = "overture-schema-pyspark" +source = { editable = "packages/overture-schema-pyspark" } +dependencies = [ + { name = "click" }, + { name = "overture-schema-system" }, + { name = "pyspark" }, +] + +[package.metadata] +requires-dist = [ + { name = "click", specifier = ">=8.0" }, + { name = "overture-schema-system", editable = "packages/overture-schema-system" }, + { name = "pyspark", specifier = ">=3.4" }, +] + [[package]] name = "overture-schema-system" source = { editable = "packages/overture-schema-system" } @@ -953,20 +1027,20 @@ dev = [ [[package]] name = "packaging" -version = "26.0" +version = "26.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/f1/e7a6dd94a8d4a5626c03e4e99c87f241ba9e350cd9e6d75123f992427270/packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661", size = 228134, upload-time = "2026-04-24T20:15:23.917Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, + { url = "https://files.pythonhosted.org/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195, upload-time = "2026-04-24T20:15:22.081Z" }, ] [[package]] name = "pathspec" -version = "1.0.4" +version = "1.1.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fa/36/e27608899f9b8d4dff0617b2d9ab17ca5608956ca44461ac14ac48b44015/pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", size = 131200, upload-time = "2026-01-27T03:59:46.938Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/82/42f767fc1c1143d6fd36efb827202a2d997a375e160a71eb2888a925aac1/pathspec-1.1.1.tar.gz", hash = "sha256:17db5ecd524104a120e173814c90367a96a98d07c45b2e10c2f3919fff91bf5a", size = 135180, upload-time = "2026-04-27T01:46:08.907Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" }, + { url = "https://files.pythonhosted.org/packages/f1/d9/7fb5aa316bc299258e68c73ba3bddbc499654a07f151cba08f6153988714/pathspec-1.1.1-py3-none-any.whl", hash = "sha256:a00ce642f577bf7f473932318056212bc4f8bfdf53128c78bbd5af0b9b20b189", size = 57328, upload-time = "2026-04-27T01:46:07.06Z" }, ] [[package]] @@ -994,17 +1068,17 @@ wheels = [ ] [[package]] -name = "ply" -version = "3.11" +name = "py4j" +version = "0.10.9.9" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e5/69/882ee5c9d017149285cab114ebeab373308ef0f874fcdac9beb90e0ac4da/ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3", size = 159130, upload-time = "2018-02-15T19:01:31.097Z" } +sdist = { url = "https://files.pythonhosted.org/packages/38/31/0b210511177070c8d5d3059556194352e5753602fa64b85b7ab81ec1a009/py4j-0.10.9.9.tar.gz", hash = "sha256:f694cad19efa5bd1dee4f3e5270eb406613c974394035e5bfc4ec1aba870b879", size = 761089, upload-time = "2025-01-15T03:53:18.624Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567, upload-time = "2018-02-15T19:01:27.172Z" }, + { url = "https://files.pythonhosted.org/packages/bd/db/ea0203e495be491c85af87b66e37acfd3bf756fd985f87e46fc5e3bf022c/py4j-0.10.9.9-py2.py3-none-any.whl", hash = "sha256:c7c26e4158defb37b0bb124933163641a2ff6e3a3913f7811b0ddbe07ed61533", size = 203008, upload-time = "2025-01-15T03:53:15.648Z" }, ] [[package]] name = "pydantic" -version = "2.12.5" +version = "2.13.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-types" }, @@ -1012,9 +1086,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/e4/40d09941a2cebcb20609b86a559817d5b9291c49dd6f8c87e5feffbe703a/pydantic-2.13.3.tar.gz", hash = "sha256:af09e9d1d09f4e7fe37145c1f577e1d61ceb9a41924bf0094a36506285d0a84d", size = 844068, upload-time = "2026-04-20T14:46:43.632Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, + { url = "https://files.pythonhosted.org/packages/f3/0a/fd7d723f8f8153418fb40cf9c940e82004fce7e987026b08a68a36dd3fe7/pydantic-2.13.3-py3-none-any.whl", hash = "sha256:6db14ac8dfc9a1e57f87ea2c0de670c251240f43cb0c30a5130e9720dc612927", size = 471981, upload-time = "2026-04-20T14:46:41.402Z" }, ] [package.optional-dependencies] @@ -1024,120 +1098,118 @@ email = [ [[package]] name = "pydantic-core" -version = "2.41.5" +version = "2.46.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2a/ef/f7abb56c49382a246fd2ce9c799691e3c3e7175ec74b14d99e798bcddb1a/pydantic_core-2.46.3.tar.gz", hash = "sha256:41c178f65b8c29807239d47e6050262eb6bf84eb695e41101e62e38df4a5bc2c", size = 471412, upload-time = "2026-04-20T14:40:56.672Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/90/32c9941e728d564b411d574d8ee0cf09b12ec978cb22b294995bae5549a5/pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146", size = 2107298, upload-time = "2025-11-04T13:39:04.116Z" }, - { url = "https://files.pythonhosted.org/packages/fb/a8/61c96a77fe28993d9a6fb0f4127e05430a267b235a124545d79fea46dd65/pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2", size = 1901475, upload-time = "2025-11-04T13:39:06.055Z" }, - { url = "https://files.pythonhosted.org/packages/5d/b6/338abf60225acc18cdc08b4faef592d0310923d19a87fba1faf05af5346e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97", size = 1918815, upload-time = "2025-11-04T13:39:10.41Z" }, - { url = "https://files.pythonhosted.org/packages/d1/1c/2ed0433e682983d8e8cba9c8d8ef274d4791ec6a6f24c58935b90e780e0a/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9", size = 2065567, upload-time = "2025-11-04T13:39:12.244Z" }, - { url = "https://files.pythonhosted.org/packages/b3/24/cf84974ee7d6eae06b9e63289b7b8f6549d416b5c199ca2d7ce13bbcf619/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52", size = 2230442, upload-time = "2025-11-04T13:39:13.962Z" }, - { url = "https://files.pythonhosted.org/packages/fd/21/4e287865504b3edc0136c89c9c09431be326168b1eb7841911cbc877a995/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941", size = 2350956, upload-time = "2025-11-04T13:39:15.889Z" }, - { url = "https://files.pythonhosted.org/packages/a8/76/7727ef2ffa4b62fcab916686a68a0426b9b790139720e1934e8ba797e238/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a", size = 2068253, upload-time = "2025-11-04T13:39:17.403Z" }, - { url = "https://files.pythonhosted.org/packages/d5/8c/a4abfc79604bcb4c748e18975c44f94f756f08fb04218d5cb87eb0d3a63e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c", size = 2177050, upload-time = "2025-11-04T13:39:19.351Z" }, - { url = "https://files.pythonhosted.org/packages/67/b1/de2e9a9a79b480f9cb0b6e8b6ba4c50b18d4e89852426364c66aa82bb7b3/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2", size = 2147178, upload-time = "2025-11-04T13:39:21Z" }, - { url = "https://files.pythonhosted.org/packages/16/c1/dfb33f837a47b20417500efaa0378adc6635b3c79e8369ff7a03c494b4ac/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556", size = 2341833, upload-time = "2025-11-04T13:39:22.606Z" }, - { url = "https://files.pythonhosted.org/packages/47/36/00f398642a0f4b815a9a558c4f1dca1b4020a7d49562807d7bc9ff279a6c/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49", size = 2321156, upload-time = "2025-11-04T13:39:25.843Z" }, - { url = "https://files.pythonhosted.org/packages/7e/70/cad3acd89fde2010807354d978725ae111ddf6d0ea46d1ea1775b5c1bd0c/pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba", size = 1989378, upload-time = "2025-11-04T13:39:27.92Z" }, - { url = "https://files.pythonhosted.org/packages/76/92/d338652464c6c367e5608e4488201702cd1cbb0f33f7b6a85a60fe5f3720/pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9", size = 2013622, upload-time = "2025-11-04T13:39:29.848Z" }, - { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" }, - { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" }, - { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" }, - { url = "https://files.pythonhosted.org/packages/6e/6a/30d94a9674a7fe4f4744052ed6c5e083424510be1e93da5bc47569d11810/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8", size = 2063890, upload-time = "2025-11-04T13:39:36.053Z" }, - { url = "https://files.pythonhosted.org/packages/50/be/76e5d46203fcb2750e542f32e6c371ffa9b8ad17364cf94bb0818dbfb50c/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e", size = 2229740, upload-time = "2025-11-04T13:39:37.753Z" }, - { url = "https://files.pythonhosted.org/packages/d3/ee/fed784df0144793489f87db310a6bbf8118d7b630ed07aa180d6067e653a/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1", size = 2350021, upload-time = "2025-11-04T13:39:40.94Z" }, - { url = "https://files.pythonhosted.org/packages/c8/be/8fed28dd0a180dca19e72c233cbf58efa36df055e5b9d90d64fd1740b828/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b", size = 2066378, upload-time = "2025-11-04T13:39:42.523Z" }, - { url = "https://files.pythonhosted.org/packages/b0/3b/698cf8ae1d536a010e05121b4958b1257f0b5522085e335360e53a6b1c8b/pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b", size = 2175761, upload-time = "2025-11-04T13:39:44.553Z" }, - { url = "https://files.pythonhosted.org/packages/b8/ba/15d537423939553116dea94ce02f9c31be0fa9d0b806d427e0308ec17145/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284", size = 2146303, upload-time = "2025-11-04T13:39:46.238Z" }, - { url = "https://files.pythonhosted.org/packages/58/7f/0de669bf37d206723795f9c90c82966726a2ab06c336deba4735b55af431/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594", size = 2340355, upload-time = "2025-11-04T13:39:48.002Z" }, - { url = "https://files.pythonhosted.org/packages/e5/de/e7482c435b83d7e3c3ee5ee4451f6e8973cff0eb6007d2872ce6383f6398/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e", size = 2319875, upload-time = "2025-11-04T13:39:49.705Z" }, - { url = "https://files.pythonhosted.org/packages/fe/e6/8c9e81bb6dd7560e33b9053351c29f30c8194b72f2d6932888581f503482/pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b", size = 1987549, upload-time = "2025-11-04T13:39:51.842Z" }, - { url = "https://files.pythonhosted.org/packages/11/66/f14d1d978ea94d1bc21fc98fcf570f9542fe55bfcc40269d4e1a21c19bf7/pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe", size = 2011305, upload-time = "2025-11-04T13:39:53.485Z" }, - { url = "https://files.pythonhosted.org/packages/56/d8/0e271434e8efd03186c5386671328154ee349ff0354d83c74f5caaf096ed/pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f", size = 1972902, upload-time = "2025-11-04T13:39:56.488Z" }, - { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, - { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, - { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, - { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, - { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, - { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, - { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, - { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, - { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, - { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, - { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, - { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, - { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, - { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, - { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, - { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, - { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, - { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, - { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, - { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, - { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, - { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, - { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, - { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, - { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, - { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, - { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, - { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, - { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, - { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, - { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, - { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, - { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, - { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, - { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, - { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, - { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, - { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, - { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, - { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, - { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, - { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, - { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, - { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, - { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, - { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, - { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, - { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, - { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, - { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, - { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, - { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, - { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, - { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, - { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, - { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, - { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" }, - { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" }, - { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" }, - { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905, upload-time = "2025-11-04T13:42:47.156Z" }, - { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, - { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, - { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, - { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, - { url = "https://files.pythonhosted.org/packages/e6/b0/1a2aa41e3b5a4ba11420aba2d091b2d17959c8d1519ece3627c371951e73/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8", size = 2103351, upload-time = "2025-11-04T13:43:02.058Z" }, - { url = "https://files.pythonhosted.org/packages/a4/ee/31b1f0020baaf6d091c87900ae05c6aeae101fa4e188e1613c80e4f1ea31/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a", size = 1925363, upload-time = "2025-11-04T13:43:05.159Z" }, - { url = "https://files.pythonhosted.org/packages/e1/89/ab8e86208467e467a80deaca4e434adac37b10a9d134cd2f99b28a01e483/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b", size = 2135615, upload-time = "2025-11-04T13:43:08.116Z" }, - { url = "https://files.pythonhosted.org/packages/99/0a/99a53d06dd0348b2008f2f30884b34719c323f16c3be4e6cc1203b74a91d/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2", size = 2175369, upload-time = "2025-11-04T13:43:12.49Z" }, - { url = "https://files.pythonhosted.org/packages/6d/94/30ca3b73c6d485b9bb0bc66e611cff4a7138ff9736b7e66bcf0852151636/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093", size = 2144218, upload-time = "2025-11-04T13:43:15.431Z" }, - { url = "https://files.pythonhosted.org/packages/87/57/31b4f8e12680b739a91f472b5671294236b82586889ef764b5fbc6669238/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a", size = 2329951, upload-time = "2025-11-04T13:43:18.062Z" }, - { url = "https://files.pythonhosted.org/packages/7d/73/3c2c8edef77b8f7310e6fb012dbc4b8551386ed575b9eb6fb2506e28a7eb/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963", size = 2318428, upload-time = "2025-11-04T13:43:20.679Z" }, - { url = "https://files.pythonhosted.org/packages/2f/02/8559b1f26ee0d502c74f9cca5c0d2fd97e967e083e006bbbb4e97f3a043a/pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a", size = 2147009, upload-time = "2025-11-04T13:43:23.286Z" }, - { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" }, - { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" }, - { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" }, - { url = "https://files.pythonhosted.org/packages/83/d9/8412d7f06f616bbc053d30cb4e5f76786af3221462ad5eee1f202021eb4e/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1", size = 2174762, upload-time = "2025-11-04T13:43:34.744Z" }, - { url = "https://files.pythonhosted.org/packages/55/4c/162d906b8e3ba3a99354e20faa1b49a85206c47de97a639510a0e673f5da/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84", size = 2143141, upload-time = "2025-11-04T13:43:37.701Z" }, - { url = "https://files.pythonhosted.org/packages/1f/f2/f11dd73284122713f5f89fc940f370d035fa8e1e078d446b3313955157fe/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770", size = 2330317, upload-time = "2025-11-04T13:43:40.406Z" }, - { url = "https://files.pythonhosted.org/packages/88/9d/b06ca6acfe4abb296110fb1273a4d848a0bfb2ff65f3ee92127b3244e16b/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f", size = 2316992, upload-time = "2025-11-04T13:43:43.602Z" }, - { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" }, + { url = "https://files.pythonhosted.org/packages/22/98/b50eb9a411e87483b5c65dba4fa430a06bac4234d3403a40e5a9905ebcd0/pydantic_core-2.46.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:1da3786b8018e60349680720158cc19161cc3b4bdd815beb0a321cd5ce1ad5b1", size = 2108971, upload-time = "2026-04-20T14:43:51.945Z" }, + { url = "https://files.pythonhosted.org/packages/08/4b/f364b9d161718ff2217160a4b5d41ce38de60aed91c3689ebffa1c939d23/pydantic_core-2.46.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc0988cb29d21bf4a9d5cf2ef970b5c0e38d8d8e107a493278c05dc6c1dda69f", size = 1949588, upload-time = "2026-04-20T14:44:10.386Z" }, + { url = "https://files.pythonhosted.org/packages/8f/8b/30bd03ee83b2f5e29f5ba8e647ab3c456bf56f2ec72fdbcc0215484a0854/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f9067c3bfadd04c55484b89c0d267981b2f3512850f6f66e1e74204a4e4ce3", size = 1975986, upload-time = "2026-04-20T14:43:57.106Z" }, + { url = "https://files.pythonhosted.org/packages/3c/54/13ccf954d84ec275d5d023d5786e4aa48840bc9f161f2838dc98e1153518/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a642ac886ecf6402d9882d10c405dcf4b902abeb2972cd5fb4a48c83cd59279a", size = 2055830, upload-time = "2026-04-20T14:44:15.499Z" }, + { url = "https://files.pythonhosted.org/packages/be/0e/65f38125e660fdbd72aa858e7dfae893645cfa0e7b13d333e174a367cd23/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79f561438481f28681584b89e2effb22855e2179880314bcddbf5968e935e807", size = 2222340, upload-time = "2026-04-20T14:41:51.353Z" }, + { url = "https://files.pythonhosted.org/packages/d1/88/f3ab7739efe0e7e80777dbb84c59eb98518e3f57ea433206194c2e425272/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57a973eae4665352a47cf1a99b4ee864620f2fe663a217d7a8da68a1f3a5bfda", size = 2280727, upload-time = "2026-04-20T14:41:30.461Z" }, + { url = "https://files.pythonhosted.org/packages/2a/6d/c228219080817bec4982f9531cadb18da6aaa770fdeb114f49c237ac2c9f/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83d002b97072a53ea150d63e0a3adfae5670cef5aa8a6e490240e482d3b22e57", size = 2092158, upload-time = "2026-04-20T14:44:07.305Z" }, + { url = "https://files.pythonhosted.org/packages/0f/b1/525a16711e7c6d61635fac3b0bd54600b5c5d9f60c6fc5aaab26b64a2297/pydantic_core-2.46.3-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:b40ddd51e7c44b28cfaef746c9d3c506d658885e0a46f9eeef2ee815cbf8e045", size = 2116626, upload-time = "2026-04-20T14:42:34.118Z" }, + { url = "https://files.pythonhosted.org/packages/ef/7c/17d30673351439a6951bf54f564cf2443ab00ae264ec9df00e2efd710eb5/pydantic_core-2.46.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ac5ec7fb9b87f04ee839af2d53bcadea57ded7d229719f56c0ed895bff987943", size = 2160691, upload-time = "2026-04-20T14:41:14.023Z" }, + { url = "https://files.pythonhosted.org/packages/86/66/af8adbcbc0886ead7f1a116606a534d75a307e71e6e08226000d51b880d2/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a3b11c812f61b3129c4905781a2601dfdfdea5fe1e6c1cfb696b55d14e9c054f", size = 2182543, upload-time = "2026-04-20T14:40:48.886Z" }, + { url = "https://files.pythonhosted.org/packages/b0/37/6de71e0f54c54a4190010f57deb749e1ddf75c568ada3b1320b70067f121/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:1108da631e602e5b3c38d6d04fe5bb3bfa54349e6918e3ca6cf570b2e2b2f9d4", size = 2324513, upload-time = "2026-04-20T14:42:36.121Z" }, + { url = "https://files.pythonhosted.org/packages/51/b1/9fc74ce94f603d5ef59ff258ca9c2c8fb902fb548d340a96f77f4d1c3b7f/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:de885175515bcfa98ae618c1df7a072f13d179f81376c8007112af20567fd08a", size = 2361853, upload-time = "2026-04-20T14:43:24.886Z" }, + { url = "https://files.pythonhosted.org/packages/40/d0/4c652fc592db35f100279ee751d5a145aca1b9a7984b9684ba7c1b5b0535/pydantic_core-2.46.3-cp310-cp310-win32.whl", hash = "sha256:d11058e3201527d41bc6b545c79187c9e4bf85e15a236a6007f0e991518882b7", size = 1980465, upload-time = "2026-04-20T14:44:46.239Z" }, + { url = "https://files.pythonhosted.org/packages/27/b8/a920453c38afbe1f355e1ea0b0d94a0a3e0b0879d32d793108755fa171d5/pydantic_core-2.46.3-cp310-cp310-win_amd64.whl", hash = "sha256:3612edf65c8ea67ac13616c4d23af12faef1ae435a8a93e5934c2a0cbbdd1fd6", size = 2073884, upload-time = "2026-04-20T14:43:01.201Z" }, + { url = "https://files.pythonhosted.org/packages/22/a2/1ba90a83e85a3f94c796b184f3efde9c72f2830dcda493eea8d59ba78e6d/pydantic_core-2.46.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ab124d49d0459b2373ecf54118a45c28a1e6d4192a533fbc915e70f556feb8e5", size = 2106740, upload-time = "2026-04-20T14:41:20.932Z" }, + { url = "https://files.pythonhosted.org/packages/b6/f6/99ae893c89a0b9d3daec9f95487aa676709aa83f67643b3f0abaf4ab628a/pydantic_core-2.46.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cca67d52a5c7a16aed2b3999e719c4bcf644074eac304a5d3d62dd70ae7d4b2c", size = 1948293, upload-time = "2026-04-20T14:43:42.115Z" }, + { url = "https://files.pythonhosted.org/packages/3e/b8/2e8e636dc9e3f16c2e16bf0849e24be82c5ee82c603c65fc0326666328fc/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c024e08c0ba23e6fd68c771a521e9d6a792f2ebb0fa734296b36394dc30390e", size = 1973222, upload-time = "2026-04-20T14:41:57.841Z" }, + { url = "https://files.pythonhosted.org/packages/34/36/0e730beec4d83c5306f417afbd82ff237d9a21e83c5edf675f31ed84c1fe/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6645ce7eec4928e29a1e3b3d5c946621d105d3e79f0c9cddf07c2a9770949287", size = 2053852, upload-time = "2026-04-20T14:40:43.077Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f0/3071131f47e39136a17814576e0fada9168569f7f8c0e6ac4d1ede6a4958/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a712c7118e6c5ea96562f7b488435172abb94a3c53c22c9efc1412264a45cbbe", size = 2221134, upload-time = "2026-04-20T14:43:03.349Z" }, + { url = "https://files.pythonhosted.org/packages/2f/a9/a2dc023eec5aa4b02a467874bad32e2446957d2adcab14e107eab502e978/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69a868ef3ff206343579021c40faf3b1edc64b1cc508ff243a28b0a514ccb050", size = 2279785, upload-time = "2026-04-20T14:41:19.285Z" }, + { url = "https://files.pythonhosted.org/packages/0a/44/93f489d16fb63fbd41c670441536541f6e8cfa1e5a69f40bc9c5d30d8c90/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc7e8c32db809aa0f6ea1d6869ebc8518a65d5150fdfad8bcae6a49ae32a22e2", size = 2089404, upload-time = "2026-04-20T14:43:10.108Z" }, + { url = "https://files.pythonhosted.org/packages/2a/78/8692e3aa72b2d004f7a5d937f1dfdc8552ba26caf0bec75f342c40f00dec/pydantic_core-2.46.3-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:3481bd1341dc85779ee506bc8e1196a277ace359d89d28588a9468c3ecbe63fa", size = 2114898, upload-time = "2026-04-20T14:44:51.475Z" }, + { url = "https://files.pythonhosted.org/packages/6a/62/e83133f2e7832532060175cebf1f13748f4c7e7e7165cdd1f611f174494b/pydantic_core-2.46.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8690eba565c6d68ffd3a8655525cbdd5246510b44a637ee2c6c03a7ebfe64d3c", size = 2157856, upload-time = "2026-04-20T14:43:46.64Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ec/6a500e3ad7718ee50583fae79c8651f5d37e3abce1fa9ae177ae65842c53/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4de88889d7e88d50d40ee5b39d5dac0bcaef9ba91f7e536ac064e6b2834ecccf", size = 2180168, upload-time = "2026-04-20T14:42:00.302Z" }, + { url = "https://files.pythonhosted.org/packages/d8/53/8267811054b1aa7fc1dc7ded93812372ef79a839f5e23558136a6afbfde1/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:e480080975c1ef7f780b8f99ed72337e7cc5efea2e518a20a692e8e7b278eb8b", size = 2322885, upload-time = "2026-04-20T14:41:05.253Z" }, + { url = "https://files.pythonhosted.org/packages/c8/c1/1c0acdb3aa0856ddc4ecc55214578f896f2de16f400cf51627eb3c26c1c4/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:de3a5c376f8cd94da9a1b8fd3dd1c16c7a7b216ed31dc8ce9fd7a22bf13b836e", size = 2360328, upload-time = "2026-04-20T14:41:43.991Z" }, + { url = "https://files.pythonhosted.org/packages/f0/d0/ef39cd0f4a926814f360e71c1adeab48ad214d9727e4deb48eedfb5bce1a/pydantic_core-2.46.3-cp311-cp311-win32.whl", hash = "sha256:fc331a5314ffddd5385b9ee9d0d2fee0b13c27e0e02dad71b1ae5d6561f51eeb", size = 1979464, upload-time = "2026-04-20T14:43:12.215Z" }, + { url = "https://files.pythonhosted.org/packages/18/9c/f41951b0d858e343f1cf09398b2a7b3014013799744f2c4a8ad6a3eec4f2/pydantic_core-2.46.3-cp311-cp311-win_amd64.whl", hash = "sha256:b5b9c6cf08a8a5e502698f5e153056d12c34b8fb30317e0c5fd06f45162a6346", size = 2070837, upload-time = "2026-04-20T14:41:47.707Z" }, + { url = "https://files.pythonhosted.org/packages/9f/1e/264a17cd582f6ed50950d4d03dd5fefd84e570e238afe1cb3e25cf238769/pydantic_core-2.46.3-cp311-cp311-win_arm64.whl", hash = "sha256:5dfd51cf457482f04ec49491811a2b8fd5b843b64b11eecd2d7a1ee596ea78a6", size = 2053647, upload-time = "2026-04-20T14:42:27.535Z" }, + { url = "https://files.pythonhosted.org/packages/4b/cb/5b47425556ecc1f3fe18ed2a0083188aa46e1dd812b06e406475b3a5d536/pydantic_core-2.46.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b11b59b3eee90a80a36701ddb4576d9ae31f93f05cb9e277ceaa09e6bf074a67", size = 2101946, upload-time = "2026-04-20T14:40:52.581Z" }, + { url = "https://files.pythonhosted.org/packages/a1/4f/2fb62c2267cae99b815bbf4a7b9283812c88ca3153ef29f7707200f1d4e5/pydantic_core-2.46.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af8653713055ea18a3abc1537fe2ebc42f5b0bbb768d1eb79fd74eb47c0ac089", size = 1951612, upload-time = "2026-04-20T14:42:42.996Z" }, + { url = "https://files.pythonhosted.org/packages/50/6e/b7348fd30d6556d132cddd5bd79f37f96f2601fe0608afac4f5fb01ec0b3/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75a519dab6d63c514f3a81053e5266c549679e4aa88f6ec57f2b7b854aceb1b0", size = 1977027, upload-time = "2026-04-20T14:42:02.001Z" }, + { url = "https://files.pythonhosted.org/packages/82/11/31d60ee2b45540d3fb0b29302a393dbc01cd771c473f5b5147bcd353e593/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6cd87cb1575b1ad05ba98894c5b5c96411ef678fa2f6ed2576607095b8d9789", size = 2063008, upload-time = "2026-04-20T14:44:17.952Z" }, + { url = "https://files.pythonhosted.org/packages/8a/db/3a9d1957181b59258f44a2300ab0f0be9d1e12d662a4f57bb31250455c52/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f80a55484b8d843c8ada81ebf70a682f3f00a3d40e378c06cf17ecb44d280d7d", size = 2233082, upload-time = "2026-04-20T14:40:57.934Z" }, + { url = "https://files.pythonhosted.org/packages/9c/e1/3277c38792aeb5cfb18c2f0c5785a221d9ff4e149abbe1184d53d5f72273/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3861f1731b90c50a3266316b9044f5c9b405eecb8e299b0a7120596334e4fe9c", size = 2304615, upload-time = "2026-04-20T14:42:12.584Z" }, + { url = "https://files.pythonhosted.org/packages/5e/d5/e3d9717c9eba10855325650afd2a9cba8e607321697f18953af9d562da2f/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb528e295ed31570ac3dcc9bfdd6e0150bc11ce6168ac87a8082055cf1a67395", size = 2094380, upload-time = "2026-04-20T14:43:05.522Z" }, + { url = "https://files.pythonhosted.org/packages/a1/20/abac35dedcbfd66c6f0b03e4e3564511771d6c9b7ede10a362d03e110d9b/pydantic_core-2.46.3-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:367508faa4973b992b271ba1494acaab36eb7e8739d1e47be5035fb1ea225396", size = 2135429, upload-time = "2026-04-20T14:41:55.549Z" }, + { url = "https://files.pythonhosted.org/packages/6c/a5/41bfd1df69afad71b5cf0535055bccc73022715ad362edbc124bc1e021d7/pydantic_core-2.46.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ad3c826fe523e4becf4fe39baa44286cff85ef137c729a2c5e269afbfd0905d", size = 2174582, upload-time = "2026-04-20T14:41:45.96Z" }, + { url = "https://files.pythonhosted.org/packages/79/65/38d86ea056b29b2b10734eb23329b7a7672ca604df4f2b6e9c02d4ee22fe/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ec638c5d194ef8af27db69f16c954a09797c0dc25015ad6123eb2c73a4d271ca", size = 2187533, upload-time = "2026-04-20T14:40:55.367Z" }, + { url = "https://files.pythonhosted.org/packages/b6/55/a1129141678a2026badc539ad1dee0a71d06f54c2f06a4bd68c030ac781b/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:28ed528c45446062ee66edb1d33df5d88828ae167de76e773a3c7f64bd14e976", size = 2332985, upload-time = "2026-04-20T14:44:13.05Z" }, + { url = "https://files.pythonhosted.org/packages/d7/60/cb26f4077719f709e54819f4e8e1d43f4091f94e285eb6bd21e1190a7b7c/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aed19d0c783886d5bd86d80ae5030006b45e28464218747dcf83dabfdd092c7b", size = 2373670, upload-time = "2026-04-20T14:41:53.421Z" }, + { url = "https://files.pythonhosted.org/packages/6b/7e/c3f21882bdf1d8d086876f81b5e296206c69c6082551d776895de7801fa0/pydantic_core-2.46.3-cp312-cp312-win32.whl", hash = "sha256:06d5d8820cbbdb4147578c1fe7ffcd5b83f34508cb9f9ab76e807be7db6ff0a4", size = 1966722, upload-time = "2026-04-20T14:44:30.588Z" }, + { url = "https://files.pythonhosted.org/packages/57/be/6b5e757b859013ebfbd7adba02f23b428f37c86dcbf78b5bb0b4ffd36e99/pydantic_core-2.46.3-cp312-cp312-win_amd64.whl", hash = "sha256:c3212fda0ee959c1dd04c60b601ec31097aaa893573a3a1abd0a47bcac2968c1", size = 2072970, upload-time = "2026-04-20T14:42:54.248Z" }, + { url = "https://files.pythonhosted.org/packages/bf/f8/a989b21cc75e9a32d24192ef700eea606521221a89faa40c919ce884f2b1/pydantic_core-2.46.3-cp312-cp312-win_arm64.whl", hash = "sha256:f1f8338dd7a7f31761f1f1a3c47503a9a3b34eea3c8b01fa6ee96408affb5e72", size = 2035963, upload-time = "2026-04-20T14:44:20.4Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3c/9b5e8eb9821936d065439c3b0fb1490ffa64163bfe7e1595985a47896073/pydantic_core-2.46.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:12bc98de041458b80c86c56b24df1d23832f3e166cbaff011f25d187f5c62c37", size = 2102109, upload-time = "2026-04-20T14:41:24.219Z" }, + { url = "https://files.pythonhosted.org/packages/91/97/1c41d1f5a19f241d8069f1e249853bcce378cdb76eec8ab636d7bc426280/pydantic_core-2.46.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:85348b8f89d2c3508b65b16c3c33a4da22b8215138d8b996912bb1532868885f", size = 1951820, upload-time = "2026-04-20T14:42:14.236Z" }, + { url = "https://files.pythonhosted.org/packages/30/b4/d03a7ae14571bc2b6b3c7b122441154720619afe9a336fa3a95434df5e2f/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1105677a6df914b1fb71a81b96c8cce7726857e1717d86001f29be06a25ee6f8", size = 1977785, upload-time = "2026-04-20T14:42:31.648Z" }, + { url = "https://files.pythonhosted.org/packages/ae/0c/4086f808834b59e3c8f1aa26df8f4b6d998cdcf354a143d18ef41529d1fe/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87082cd65669a33adeba5470769e9704c7cf026cc30afb9cc77fd865578ebaad", size = 2062761, upload-time = "2026-04-20T14:40:37.093Z" }, + { url = "https://files.pythonhosted.org/packages/fa/71/a649be5a5064c2df0db06e0a512c2281134ed2fcc981f52a657936a7527c/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60e5f66e12c4f5212d08522963380eaaeac5ebd795826cfd19b2dfb0c7a52b9c", size = 2232989, upload-time = "2026-04-20T14:42:59.254Z" }, + { url = "https://files.pythonhosted.org/packages/a2/84/7756e75763e810b3a710f4724441d1ecc5883b94aacb07ca71c5fb5cfb69/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b6cdf19bf84128d5e7c37e8a73a0c5c10d51103a650ac585d42dd6ae233f2b7f", size = 2303975, upload-time = "2026-04-20T14:41:32.287Z" }, + { url = "https://files.pythonhosted.org/packages/6c/35/68a762e0c1e31f35fa0dac733cbd9f5b118042853698de9509c8e5bf128b/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031bb17f4885a43773c8c763089499f242aee2ea85cf17154168775dccdecf35", size = 2095325, upload-time = "2026-04-20T14:42:47.685Z" }, + { url = "https://files.pythonhosted.org/packages/77/bf/1bf8c9a8e91836c926eae5e3e51dce009bf495a60ca56060689d3df3f340/pydantic_core-2.46.3-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:bcf2a8b2982a6673693eae7348ef3d8cf3979c1d63b54fca7c397a635cc68687", size = 2133368, upload-time = "2026-04-20T14:41:22.766Z" }, + { url = "https://files.pythonhosted.org/packages/e5/50/87d818d6bab915984995157ceb2380f5aac4e563dddbed6b56f0ed057aba/pydantic_core-2.46.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28e8cf2f52d72ced402a137145923a762cbb5081e48b34312f7a0c8f55928ec3", size = 2173908, upload-time = "2026-04-20T14:42:52.044Z" }, + { url = "https://files.pythonhosted.org/packages/91/88/a311fb306d0bd6185db41fa14ae888fb81d0baf648a761ae760d30819d33/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:17eaface65d9fc5abb940003020309c1bf7a211f5f608d7870297c367e6f9022", size = 2186422, upload-time = "2026-04-20T14:43:29.55Z" }, + { url = "https://files.pythonhosted.org/packages/8f/79/28fd0d81508525ab2054fef7c77a638c8b5b0afcbbaeee493cf7c3fef7e1/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:93fd339f23408a07e98950a89644f92c54d8729719a40b30c0a30bb9ebc55d23", size = 2332709, upload-time = "2026-04-20T14:42:16.134Z" }, + { url = "https://files.pythonhosted.org/packages/b3/21/795bf5fe5c0f379308b8ef19c50dedab2e7711dbc8d0c2acf08f1c7daa05/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:23cbdb3aaa74dfe0837975dbf69b469753bbde8eacace524519ffdb6b6e89eb7", size = 2372428, upload-time = "2026-04-20T14:41:10.974Z" }, + { url = "https://files.pythonhosted.org/packages/45/b3/ed14c659cbe7605e3ef063077680a64680aec81eb1a04763a05190d49b7f/pydantic_core-2.46.3-cp313-cp313-win32.whl", hash = "sha256:610eda2e3838f401105e6326ca304f5da1e15393ae25dacae5c5c63f2c275b13", size = 1965601, upload-time = "2026-04-20T14:41:42.128Z" }, + { url = "https://files.pythonhosted.org/packages/ef/bb/adb70d9a762ddd002d723fbf1bd492244d37da41e3af7b74ad212609027e/pydantic_core-2.46.3-cp313-cp313-win_amd64.whl", hash = "sha256:68cc7866ed863db34351294187f9b729964c371ba33e31c26f478471c52e1ed0", size = 2071517, upload-time = "2026-04-20T14:43:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/52/eb/66faefabebfe68bd7788339c9c9127231e680b11906368c67ce112fdb47f/pydantic_core-2.46.3-cp313-cp313-win_arm64.whl", hash = "sha256:f64b5537ac62b231572879cd08ec05600308636a5d63bcbdb15063a466977bec", size = 2035802, upload-time = "2026-04-20T14:43:38.507Z" }, + { url = "https://files.pythonhosted.org/packages/7f/db/a7bcb4940183fda36022cd18ba8dd12f2dff40740ec7b58ce7457befa416/pydantic_core-2.46.3-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:afa3aa644f74e290cdede48a7b0bee37d1c35e71b05105f6b340d484af536d9b", size = 2097614, upload-time = "2026-04-20T14:44:38.374Z" }, + { url = "https://files.pythonhosted.org/packages/24/35/e4066358a22e3e99519db370494c7528f5a2aa1367370e80e27e20283543/pydantic_core-2.46.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ced3310e51aa425f7f77da8bbbb5212616655bedbe82c70944320bc1dbe5e018", size = 1951896, upload-time = "2026-04-20T14:40:53.996Z" }, + { url = "https://files.pythonhosted.org/packages/87/92/37cf4049d1636996e4b888c05a501f40a43ff218983a551d57f9d5e14f0d/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e29908922ce9da1a30b4da490bd1d3d82c01dcfdf864d2a74aacee674d0bfa34", size = 1979314, upload-time = "2026-04-20T14:41:49.446Z" }, + { url = "https://files.pythonhosted.org/packages/d8/36/9ff4d676dfbdfb2d591cf43f3d90ded01e15b1404fd101180ed2d62a2fd3/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0c9ff69140423eea8ed2d5477df3ba037f671f5e897d206d921bc9fdc39613e7", size = 2056133, upload-time = "2026-04-20T14:42:23.574Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f0/405b442a4d7ba855b06eec8b2bf9c617d43b8432d099dfdc7bf999293495/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b675ab0a0d5b1c8fdb81195dc5bcefea3f3c240871cdd7ff9a2de8aa50772eb2", size = 2228726, upload-time = "2026-04-20T14:44:22.816Z" }, + { url = "https://files.pythonhosted.org/packages/e7/f8/65cd92dd5a0bd89ba277a98ecbfaf6fc36bbd3300973c7a4b826d6ab1391/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0087084960f209a9a4af50ecd1fb063d9ad3658c07bb81a7a53f452dacbfb2ba", size = 2301214, upload-time = "2026-04-20T14:44:48.792Z" }, + { url = "https://files.pythonhosted.org/packages/fd/86/ef96a4c6e79e7a2d0410826a68fbc0eccc0fd44aa733be199d5fcac3bb87/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed42e6cc8e1b0e2b9b96e2276bad70ae625d10d6d524aed0c93de974ae029f9f", size = 2099927, upload-time = "2026-04-20T14:41:40.196Z" }, + { url = "https://files.pythonhosted.org/packages/6d/53/269caf30e0096e0a8a8f929d1982a27b3879872cca2d917d17c2f9fdf4fe/pydantic_core-2.46.3-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:f1771ce258afb3e4201e67d154edbbae712a76a6081079fe247c2f53c6322c22", size = 2128789, upload-time = "2026-04-20T14:41:15.868Z" }, + { url = "https://files.pythonhosted.org/packages/00/b0/1a6d9b6a587e118482910c244a1c5acf4d192604174132efd12bf0ac486f/pydantic_core-2.46.3-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a7610b6a5242a6c736d8ad47fd5fff87fcfe8f833b281b1c409c3d6835d9227f", size = 2173815, upload-time = "2026-04-20T14:44:25.152Z" }, + { url = "https://files.pythonhosted.org/packages/87/56/e7e00d4041a7e62b5a40815590114db3b535bf3ca0bf4dca9f16cef25246/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:ff5e7783bcc5476e1db448bf268f11cb257b1c276d3e89f00b5727be86dd0127", size = 2181608, upload-time = "2026-04-20T14:41:28.933Z" }, + { url = "https://files.pythonhosted.org/packages/e8/22/4bd23c3d41f7c185d60808a1de83c76cf5aeabf792f6c636a55c3b1ec7f9/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:9d2e32edcc143bc01e95300671915d9ca052d4f745aa0a49c48d4803f8a85f2c", size = 2326968, upload-time = "2026-04-20T14:42:03.962Z" }, + { url = "https://files.pythonhosted.org/packages/24/ac/66cd45129e3915e5ade3b292cb3bc7fd537f58f8f8dbdaba6170f7cabb74/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d83d1c6b87fa56b521479cff237e626a292f3b31b6345c15a99121b454c1", size = 2369842, upload-time = "2026-04-20T14:41:35.52Z" }, + { url = "https://files.pythonhosted.org/packages/a2/51/dd4248abb84113615473aa20d5545b7c4cd73c8644003b5259686f93996c/pydantic_core-2.46.3-cp314-cp314-win32.whl", hash = "sha256:07bc6d2a28c3adb4f7c6ae46aa4f2d2929af127f587ed44057af50bf1ce0f505", size = 1959661, upload-time = "2026-04-20T14:41:00.042Z" }, + { url = "https://files.pythonhosted.org/packages/20/eb/59980e5f1ae54a3b86372bd9f0fa373ea2d402e8cdcd3459334430f91e91/pydantic_core-2.46.3-cp314-cp314-win_amd64.whl", hash = "sha256:8940562319bc621da30714617e6a7eaa6b98c84e8c685bcdc02d7ed5e7c7c44e", size = 2071686, upload-time = "2026-04-20T14:43:16.471Z" }, + { url = "https://files.pythonhosted.org/packages/8c/db/1cf77e5247047dfee34bc01fa9bca134854f528c8eb053e144298893d370/pydantic_core-2.46.3-cp314-cp314-win_arm64.whl", hash = "sha256:5dcbbcf4d22210ced8f837c96db941bdb078f419543472aca5d9a0bb7cddc7df", size = 2026907, upload-time = "2026-04-20T14:43:31.732Z" }, + { url = "https://files.pythonhosted.org/packages/57/c0/b3df9f6a543276eadba0a48487b082ca1f201745329d97dbfa287034a230/pydantic_core-2.46.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:d0fe3dce1e836e418f912c1ad91c73357d03e556a4d286f441bf34fed2dbeecf", size = 2095047, upload-time = "2026-04-20T14:42:37.982Z" }, + { url = "https://files.pythonhosted.org/packages/66/57/886a938073b97556c168fd99e1a7305bb363cd30a6d2c76086bf0587b32a/pydantic_core-2.46.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9ce92e58abc722dac1bf835a6798a60b294e48eb0e625ec9fd994b932ac5feee", size = 1934329, upload-time = "2026-04-20T14:43:49.655Z" }, + { url = "https://files.pythonhosted.org/packages/0b/7c/b42eaa5c34b13b07ecb51da21761297a9b8eb43044c864a035999998f328/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a03e6467f0f5ab796a486146d1b887b2dc5e5f9b3288898c1b1c3ad974e53e4a", size = 1974847, upload-time = "2026-04-20T14:42:10.737Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9b/92b42db6543e7de4f99ae977101a2967b63122d4b6cf7773812da2d7d5b5/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2798b6ba041b9d70acfb9071a2ea13c8456dd1e6a5555798e41ba7b0790e329c", size = 2041742, upload-time = "2026-04-20T14:40:44.262Z" }, + { url = "https://files.pythonhosted.org/packages/0f/19/46fbe1efabb5aa2834b43b9454e70f9a83ad9c338c1291e48bdc4fecf167/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9be3e221bdc6d69abf294dcf7aff6af19c31a5cdcc8f0aa3b14be29df4bd03b1", size = 2236235, upload-time = "2026-04-20T14:41:27.307Z" }, + { url = "https://files.pythonhosted.org/packages/77/da/b3f95bc009ad60ec53120f5d16c6faa8cabdbe8a20d83849a1f2b8728148/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f13936129ce841f2a5ddf6f126fea3c43cd128807b5a59588c37cf10178c2e64", size = 2282633, upload-time = "2026-04-20T14:44:33.271Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6e/401336117722e28f32fb8220df676769d28ebdf08f2f4469646d404c43a3/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28b5f2ef03416facccb1c6ef744c69793175fd27e44ef15669201601cf423acb", size = 2109679, upload-time = "2026-04-20T14:44:41.065Z" }, + { url = "https://files.pythonhosted.org/packages/fc/53/b289f9bc8756a32fe718c46f55afaeaf8d489ee18d1a1e7be1db73f42cc4/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:830d1247d77ad23852314f069e9d7ddafeec5f684baf9d7e7065ed46a049c4e6", size = 2108342, upload-time = "2026-04-20T14:42:50.144Z" }, + { url = "https://files.pythonhosted.org/packages/10/5b/8292fc7c1f9111f1b2b7c1b0dcf1179edcd014fc3ea4517499f50b829d71/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0793c90c1a3c74966e7975eaef3ed30ebdff3260a0f815a62a22adc17e4c01c", size = 2157208, upload-time = "2026-04-20T14:42:08.133Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9e/f80044e9ec07580f057a89fc131f78dda7a58751ddf52bbe05eaf31db50f/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:d2d0aead851b66f5245ec0c4fb2612ef457f8bbafefdf65a2bf9d6bac6140f47", size = 2167237, upload-time = "2026-04-20T14:42:25.412Z" }, + { url = "https://files.pythonhosted.org/packages/f8/84/6781a1b037f3b96be9227edbd1101f6d3946746056231bf4ac48cdff1a8d/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:2f40e4246676beb31c5ce77c38a55ca4e465c6b38d11ea1bd935420568e0b1ab", size = 2312540, upload-time = "2026-04-20T14:40:40.313Z" }, + { url = "https://files.pythonhosted.org/packages/3e/db/19c0839feeb728e7df03255581f198dfdf1c2aeb1e174a8420b63c5252e5/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:cf489cf8986c543939aeee17a09c04d6ffb43bfef8ca16fcbcc5cfdcbed24dba", size = 2369556, upload-time = "2026-04-20T14:41:09.427Z" }, + { url = "https://files.pythonhosted.org/packages/e0/15/3228774cb7cd45f5f721ddf1b2242747f4eb834d0c491f0c02d606f09fed/pydantic_core-2.46.3-cp314-cp314t-win32.whl", hash = "sha256:ffe0883b56cfc05798bf994164d2b2ff03efe2d22022a2bb080f3b626176dd56", size = 1949756, upload-time = "2026-04-20T14:41:25.717Z" }, + { url = "https://files.pythonhosted.org/packages/b8/2a/c79cf53fd91e5a87e30d481809f52f9a60dd221e39de66455cf04deaad37/pydantic_core-2.46.3-cp314-cp314t-win_amd64.whl", hash = "sha256:706d9d0ce9cf4593d07270d8e9f53b161f90c57d315aeec4fb4fd7a8b10240d8", size = 2051305, upload-time = "2026-04-20T14:43:18.627Z" }, + { url = "https://files.pythonhosted.org/packages/0b/db/d8182a7f1d9343a032265aae186eb063fe26ca4c40f256b21e8da4498e89/pydantic_core-2.46.3-cp314-cp314t-win_arm64.whl", hash = "sha256:77706aeb41df6a76568434701e0917da10692da28cb69d5fb6919ce5fdb07374", size = 2026310, upload-time = "2026-04-20T14:41:01.778Z" }, + { url = "https://files.pythonhosted.org/packages/66/7f/03dbad45cd3aa9083fbc93c210ae8b005af67e4136a14186950a747c6874/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:9715525891ed524a0a1eb6d053c74d4d4ad5017677fb00af0b7c2644a31bae46", size = 2105683, upload-time = "2026-04-20T14:42:19.779Z" }, + { url = "https://files.pythonhosted.org/packages/26/22/4dc186ac8ea6b257e9855031f51b62a9637beac4d68ac06bee02f046f836/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:9d2f400712a99a013aff420ef1eb9be077f8189a36c1e3ef87660b4e1088a874", size = 1940052, upload-time = "2026-04-20T14:43:59.274Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ca/d376391a5aff1f2e8188960d7873543608130a870961c2b6b5236627c116/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd2aab0e2e9dc2daf36bd2686c982535d5e7b1d930a1344a7bb6e82baab42a76", size = 1988172, upload-time = "2026-04-20T14:41:17.469Z" }, + { url = "https://files.pythonhosted.org/packages/0e/6b/523b9f85c23788755d6ab949329de692a2e3a584bc6beb67fef5e035aa9d/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e9d76736da5f362fabfeea6a69b13b7f2be405c6d6966f06b2f6bfff7e64531", size = 2128596, upload-time = "2026-04-20T14:40:41.707Z" }, + { url = "https://files.pythonhosted.org/packages/34/42/f426db557e8ab2791bc7562052299944a118655496fbff99914e564c0a94/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:b12dd51f1187c2eb489af8e20f880362db98e954b54ab792fa5d92e8bcc6b803", size = 2091877, upload-time = "2026-04-20T14:43:27.091Z" }, + { url = "https://files.pythonhosted.org/packages/5c/4f/86a832a9d14df58e663bfdf4627dc00d3317c2bd583c4fb23390b0f04b8e/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:f00a0961b125f1a47af7bcc17f00782e12f4cd056f83416006b30111d941dfa3", size = 1932428, upload-time = "2026-04-20T14:40:45.781Z" }, + { url = "https://files.pythonhosted.org/packages/11/1a/fe857968954d93fb78e0d4b6df5c988c74c4aaa67181c60be7cfe327c0ca/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57697d7c056aca4bbb680200f96563e841a6386ac1129370a0102592f4dddff5", size = 1997550, upload-time = "2026-04-20T14:44:02.425Z" }, + { url = "https://files.pythonhosted.org/packages/17/eb/9d89ad2d9b0ba8cd65393d434471621b98912abb10fbe1df08e480ba57b5/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd35aa21299def8db7ef4fe5c4ff862941a9a158ca7b63d61e66fe67d30416b4", size = 2137657, upload-time = "2026-04-20T14:42:45.149Z" }, + { url = "https://files.pythonhosted.org/packages/1f/da/99d40830684f81dec901cac521b5b91c095394cc1084b9433393cde1c2df/pydantic_core-2.46.3-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:13afdd885f3d71280cf286b13b310ee0f7ccfefd1dbbb661514a474b726e2f25", size = 2107973, upload-time = "2026-04-20T14:42:06.175Z" }, + { url = "https://files.pythonhosted.org/packages/99/a5/87024121818d75bbb2a98ddbaf638e40e7a18b5e0f5492c9ca4b1b316107/pydantic_core-2.46.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f91c0aff3e3ee0928edd1232c57f643a7a003e6edf1860bc3afcdc749cb513f3", size = 1947191, upload-time = "2026-04-20T14:43:14.319Z" }, + { url = "https://files.pythonhosted.org/packages/60/62/0c1acfe10945b83a6a59d19fbaa92f48825381509e5701b855c08f13db76/pydantic_core-2.46.3-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6529d1d128321a58d30afcc97b49e98836542f68dd41b33c2e972bb9e5290536", size = 2123791, upload-time = "2026-04-20T14:43:22.766Z" }, + { url = "https://files.pythonhosted.org/packages/75/3e/3b2393b4c8f44285561dc30b00cf307a56a2eff7c483a824db3b8221ca51/pydantic_core-2.46.3-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:975c267cff4f7e7272eacbe50f6cc03ca9a3da4c4fbd66fffd89c94c1e311aa1", size = 2153197, upload-time = "2026-04-20T14:44:27.932Z" }, + { url = "https://files.pythonhosted.org/packages/ba/75/5af02fb35505051eee727c061f2881c555ab4f8ddb2d42da715a42c9731b/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:2b8e4f2bbdf71415c544b4b1138b8060db7b6611bc927e8064c769f64bed651c", size = 2181073, upload-time = "2026-04-20T14:43:20.729Z" }, + { url = "https://files.pythonhosted.org/packages/10/92/7e0e1bd9ca3c68305db037560ca2876f89b2647deb2f8b6319005de37505/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:e61ea8e9fff9606d09178f577ff8ccdd7206ff73d6552bcec18e1033c4254b85", size = 2315886, upload-time = "2026-04-20T14:44:04.826Z" }, + { url = "https://files.pythonhosted.org/packages/b8/d8/101655f27eaf3e44558ead736b2795d12500598beed4683f279396fa186e/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b504bda01bafc69b6d3c7a0c7f039dcf60f47fab70e06fe23f57b5c75bdc82b8", size = 2360528, upload-time = "2026-04-20T14:40:47.431Z" }, + { url = "https://files.pythonhosted.org/packages/07/0f/1c34a74c8d07136f0d729ffe5e1fdab04fbdaa7684f61a92f92511a84a15/pydantic_core-2.46.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b00b76f7142fc60c762ce579bd29c8fa44aaa56592dd3c54fab3928d0d4ca6ff", size = 2184144, upload-time = "2026-04-20T14:42:57Z" }, ] [[package]] @@ -1154,16 +1226,25 @@ wheels = [ [[package]] name = "pygments" -version = "2.19.2" +version = "2.20.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, ] +[[package]] +name = "pyspark" +version = "4.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "py4j" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/bf/58ee13add151469c25825b7125bbf62c3bdcec05eec4d458fcb5c5516066/pyspark-4.1.1.tar.gz", hash = "sha256:77f78984aa84fbe865c717dd37b49913b4e5c97d76ef6824f932f1aefa6621ec", size = 455359625, upload-time = "2026-01-09T09:38:38.28Z" } + [[package]] name = "pytest" -version = "9.0.2" +version = "9.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -1174,23 +1255,23 @@ dependencies = [ { name = "pygments" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] [[package]] name = "pytest-cov" -version = "7.0.0" +version = "7.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "coverage", extra = ["toml"] }, { name = "pluggy" }, { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b1/51/a849f96e117386044471c8ec2bd6cfebacda285da9525c9106aeb28da671/pytest_cov-7.1.0.tar.gz", hash = "sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2", size = 55592, upload-time = "2026-03-21T20:11:16.284Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, + { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" }, ] [[package]] @@ -1272,41 +1353,40 @@ wheels = [ [[package]] name = "rich" -version = "14.3.1" +version = "15.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a1/84/4831f881aa6ff3c976f6d6809b58cdfa350593ffc0dc3c58f5f6586780fb/rich-14.3.1.tar.gz", hash = "sha256:b8c5f568a3a749f9290ec6bddedf835cec33696bfc1e48bcfecb276c7386e4b8", size = 230125, upload-time = "2026-01-24T21:40:44.847Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/87/2a/a1810c8627b9ec8c57ec5ec325d306701ae7be50235e8fd81266e002a3cc/rich-14.3.1-py3-none-any.whl", hash = "sha256:da750b1aebbff0b372557426fb3f35ba56de8ef954b3190315eb64076d6fb54e", size = 309952, upload-time = "2026-01-24T21:40:42.969Z" }, + { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, ] [[package]] name = "ruff" -version = "0.14.14" +version = "0.15.12" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" } +sdist = { url = "https://files.pythonhosted.org/packages/99/43/3291f1cc9106f4c63bdce7a8d0df5047fe8422a75b091c16b5e9355e0b11/ruff-0.15.12.tar.gz", hash = "sha256:ecea26adb26b4232c0c2ca19ccbc0083a68344180bba2a600605538ce51a40a6", size = 4643852, upload-time = "2026-04-24T18:17:14.305Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" }, - { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" }, - { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" }, - { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" }, - { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" }, - { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" }, - { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" }, - { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" }, - { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" }, - { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" }, - { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" }, - { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" }, - { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" }, - { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" }, - { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" }, - { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" }, - { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" }, - { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, + { url = "https://files.pythonhosted.org/packages/c3/6e/e78ffb61d4686f3d96ba3df2c801161843746dcbcbb17a1e927d4829312b/ruff-0.15.12-py3-none-linux_armv6l.whl", hash = "sha256:f86f176e188e94d6bdbc09f09bfd9dc729059ad93d0e7390b5a73efe19f8861c", size = 10640713, upload-time = "2026-04-24T18:17:22.841Z" }, + { url = "https://files.pythonhosted.org/packages/ae/08/a317bc231fb9e7b93e4ef3089501e51922ff88d6936ce5cf870c4fe55419/ruff-0.15.12-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e3bcd123364c3770b8e1b7baaf343cc99a35f197c5c6e8af79015c666c423a6c", size = 11069267, upload-time = "2026-04-24T18:17:30.105Z" }, + { url = "https://files.pythonhosted.org/packages/aa/a4/f828e9718d3dce1f5f11c39c4f65afd32783c8b2aebb2e3d259e492c47bd/ruff-0.15.12-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fe87510d000220aa1ed530d4448a7c696a0cae1213e5ec30e5874287b66557b5", size = 10397182, upload-time = "2026-04-24T18:17:07.177Z" }, + { url = "https://files.pythonhosted.org/packages/71/e0/3310fc6d1b5e1fdea22bf3b1b807c7e187b581021b0d7d4514cccdb5fb71/ruff-0.15.12-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84a1630093121375a3e2a95b4a6dc7b59e2b4ee76216e32d81aae550a832d002", size = 10758012, upload-time = "2026-04-24T18:16:55.759Z" }, + { url = "https://files.pythonhosted.org/packages/11/c1/a606911aee04c324ddaa883ae418f3569792fd3c4a10c50e0dd0a2311e1e/ruff-0.15.12-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fb129f40f114f089ebe0ca56c0d251cf2061b17651d464bb6478dc01e69f11f5", size = 10447479, upload-time = "2026-04-24T18:16:51.677Z" }, + { url = "https://files.pythonhosted.org/packages/9d/68/4201e8444f0894f21ab4aeeaee68aa4f10b51613514a20d80bd628d57e88/ruff-0.15.12-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0c862b172d695db7598426b8af465e7e9ac00a3ea2a3630ee67eb82e366aaa6", size = 11234040, upload-time = "2026-04-24T18:17:16.529Z" }, + { url = "https://files.pythonhosted.org/packages/34/ff/8a6d6cf4ccc23fd67060874e832c18919d1557a0611ebef03fdb01fff11e/ruff-0.15.12-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2849ea9f3484c3aca43a82f484210370319e7170df4dfe4843395ddf6c57bc33", size = 12087377, upload-time = "2026-04-24T18:17:04.944Z" }, + { url = "https://files.pythonhosted.org/packages/85/f6/c669cf73f5152f623d34e69866a46d5e6185816b19fcd5b6dd8a2d299922/ruff-0.15.12-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e77c7e51c07fe396826d5969a5b846d9cd4c402535835fb6e21ce8b28fef847", size = 11367784, upload-time = "2026-04-24T18:17:25.409Z" }, + { url = "https://files.pythonhosted.org/packages/e8/39/c61d193b8a1daaa8977f7dea9e8d8ba866e02ea7b65d32f6861693aa4c12/ruff-0.15.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83b2f4f2f3b1026b5fb449b467d9264bf22067b600f7b6f41fc5958909f449d0", size = 11344088, upload-time = "2026-04-24T18:17:12.258Z" }, + { url = "https://files.pythonhosted.org/packages/c2/8d/49afab3645e31e12c590acb6d3b5b69d7aab5b81926dbaf7461f9441f37a/ruff-0.15.12-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9ba3b8f1afd7e2e43d8943e55f249e13f9682fde09711644a6e7290eb4f3e339", size = 11271770, upload-time = "2026-04-24T18:17:02.457Z" }, + { url = "https://files.pythonhosted.org/packages/46/06/33f41fe94403e2b755481cdfb9b7ef3e4e0ed031c4581124658d935d52b4/ruff-0.15.12-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e852ba9fdc890655e1d78f2df1499efbe0e54126bd405362154a75e2bde159c5", size = 10719355, upload-time = "2026-04-24T18:17:27.648Z" }, + { url = "https://files.pythonhosted.org/packages/0d/59/18aa4e014debbf559670e4048e39260a85c7fcee84acfd761ac01e7b8d35/ruff-0.15.12-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dd8aed930da53780d22fc70bdf84452c843cf64f8cb4eb38984319c24c5cd5fd", size = 10462758, upload-time = "2026-04-24T18:17:32.347Z" }, + { url = "https://files.pythonhosted.org/packages/25/e7/cc9f16fd0f3b5fddcbd7ec3d6ae30c8f3fde1047f32a4093a98d633c6570/ruff-0.15.12-py3-none-musllinux_1_2_i686.whl", hash = "sha256:01da3988d225628b709493d7dc67c3b9b12c0210016b08690ef9bd27970b262b", size = 10953498, upload-time = "2026-04-24T18:17:20.674Z" }, + { url = "https://files.pythonhosted.org/packages/72/7a/a9ba7f98c7a575978698f4230c5e8cc54bbc761af34f560818f933dafa0c/ruff-0.15.12-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:9cae0f92bd5700d1213188b31cd3bdd2b315361296d10b96b8e2337d3d11f53e", size = 11447765, upload-time = "2026-04-24T18:17:09.755Z" }, + { url = "https://files.pythonhosted.org/packages/ea/f9/0ae446942c846b8266059ad8a30702a35afae55f5cdc54c5adf8d7afdc27/ruff-0.15.12-py3-none-win32.whl", hash = "sha256:d0185894e038d7043ba8fd6aee7499ece6462dc0ea9f1e260c7451807c714c20", size = 10657277, upload-time = "2026-04-24T18:17:18.591Z" }, + { url = "https://files.pythonhosted.org/packages/33/f1/9614e03e1cdcbf9437570b5400ced8a720b5db22b28d8e0f1bda429f660d/ruff-0.15.12-py3-none-win_amd64.whl", hash = "sha256:c87a162d61ab3adca47c03f7f717c68672edec7d1b5499e652331780fe74950d", size = 11837758, upload-time = "2026-04-24T18:17:00.113Z" }, + { url = "https://files.pythonhosted.org/packages/c0/98/6beb4b351e472e5f4c4613f7c35a5290b8be2497e183825310c4c3a3984b/ruff-0.15.12-py3-none-win_arm64.whl", hash = "sha256:a538f7a82d061cee7be55542aca1d86d1393d55d81d4fcc314370f4340930d4f", size = 11120821, upload-time = "2026-04-24T18:16:57.979Z" }, ] [[package]] @@ -1315,7 +1395,7 @@ version = "2.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4d/bc/0989043118a27cccb4e906a46b7565ce36ca7b57f5a18b78f4f1b0f72d9d/shapely-2.1.2.tar.gz", hash = "sha256:2ed4ecb28320a433db18a5bf029986aa8afcfd740745e78847e330d5d94922a9", size = 315489, upload-time = "2025-09-24T13:51:41.432Z" } wheels = [ @@ -1388,78 +1468,78 @@ wheels = [ [[package]] name = "tomli" -version = "2.4.0" +version = "2.4.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/82/30/31573e9457673ab10aa432461bee537ce6cef177667deca369efb79df071/tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c", size = 17477, upload-time = "2026-01-11T11:22:38.165Z" } +sdist = { url = "https://files.pythonhosted.org/packages/22/de/48c59722572767841493b26183a0d1cc411d54fd759c5607c4590b6563a6/tomli-2.4.1.tar.gz", hash = "sha256:7c7e1a961a0b2f2472c1ac5b69affa0ae1132c39adcb67aba98568702b9cc23f", size = 17543, upload-time = "2026-03-25T20:22:03.828Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/d9/3dc2289e1f3b32eb19b9785b6a006b28ee99acb37d1d47f78d4c10e28bf8/tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867", size = 153663, upload-time = "2026-01-11T11:21:45.27Z" }, - { url = "https://files.pythonhosted.org/packages/51/32/ef9f6845e6b9ca392cd3f64f9ec185cc6f09f0a2df3db08cbe8809d1d435/tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9", size = 148469, upload-time = "2026-01-11T11:21:46.873Z" }, - { url = "https://files.pythonhosted.org/packages/d6/c2/506e44cce89a8b1b1e047d64bd495c22c9f71f21e05f380f1a950dd9c217/tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95", size = 236039, upload-time = "2026-01-11T11:21:48.503Z" }, - { url = "https://files.pythonhosted.org/packages/b3/40/e1b65986dbc861b7e986e8ec394598187fa8aee85b1650b01dd925ca0be8/tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76", size = 243007, upload-time = "2026-01-11T11:21:49.456Z" }, - { url = "https://files.pythonhosted.org/packages/9c/6f/6e39ce66b58a5b7ae572a0f4352ff40c71e8573633deda43f6a379d56b3e/tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d", size = 240875, upload-time = "2026-01-11T11:21:50.755Z" }, - { url = "https://files.pythonhosted.org/packages/aa/ad/cb089cb190487caa80204d503c7fd0f4d443f90b95cf4ef5cf5aa0f439b0/tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576", size = 246271, upload-time = "2026-01-11T11:21:51.81Z" }, - { url = "https://files.pythonhosted.org/packages/0b/63/69125220e47fd7a3a27fd0de0c6398c89432fec41bc739823bcc66506af6/tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a", size = 96770, upload-time = "2026-01-11T11:21:52.647Z" }, - { url = "https://files.pythonhosted.org/packages/1e/0d/a22bb6c83f83386b0008425a6cd1fa1c14b5f3dd4bad05e98cf3dbbf4a64/tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa", size = 107626, upload-time = "2026-01-11T11:21:53.459Z" }, - { url = "https://files.pythonhosted.org/packages/2f/6d/77be674a3485e75cacbf2ddba2b146911477bd887dda9d8c9dfb2f15e871/tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614", size = 94842, upload-time = "2026-01-11T11:21:54.831Z" }, - { url = "https://files.pythonhosted.org/packages/3c/43/7389a1869f2f26dba52404e1ef13b4784b6b37dac93bac53457e3ff24ca3/tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1", size = 154894, upload-time = "2026-01-11T11:21:56.07Z" }, - { url = "https://files.pythonhosted.org/packages/e9/05/2f9bf110b5294132b2edf13fe6ca6ae456204f3d749f623307cbb7a946f2/tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8", size = 149053, upload-time = "2026-01-11T11:21:57.467Z" }, - { url = "https://files.pythonhosted.org/packages/e8/41/1eda3ca1abc6f6154a8db4d714a4d35c4ad90adc0bcf700657291593fbf3/tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a", size = 243481, upload-time = "2026-01-11T11:21:58.661Z" }, - { url = "https://files.pythonhosted.org/packages/d2/6d/02ff5ab6c8868b41e7d4b987ce2b5f6a51d3335a70aa144edd999e055a01/tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1", size = 251720, upload-time = "2026-01-11T11:22:00.178Z" }, - { url = "https://files.pythonhosted.org/packages/7b/57/0405c59a909c45d5b6f146107c6d997825aa87568b042042f7a9c0afed34/tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b", size = 247014, upload-time = "2026-01-11T11:22:01.238Z" }, - { url = "https://files.pythonhosted.org/packages/2c/0e/2e37568edd944b4165735687cbaf2fe3648129e440c26d02223672ee0630/tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51", size = 251820, upload-time = "2026-01-11T11:22:02.727Z" }, - { url = "https://files.pythonhosted.org/packages/5a/1c/ee3b707fdac82aeeb92d1a113f803cf6d0f37bdca0849cb489553e1f417a/tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729", size = 97712, upload-time = "2026-01-11T11:22:03.777Z" }, - { url = "https://files.pythonhosted.org/packages/69/13/c07a9177d0b3bab7913299b9278845fc6eaaca14a02667c6be0b0a2270c8/tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da", size = 108296, upload-time = "2026-01-11T11:22:04.86Z" }, - { url = "https://files.pythonhosted.org/packages/18/27/e267a60bbeeee343bcc279bb9e8fbed0cbe224bc7b2a3dc2975f22809a09/tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3", size = 94553, upload-time = "2026-01-11T11:22:05.854Z" }, - { url = "https://files.pythonhosted.org/packages/34/91/7f65f9809f2936e1f4ce6268ae1903074563603b2a2bd969ebbda802744f/tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0", size = 154915, upload-time = "2026-01-11T11:22:06.703Z" }, - { url = "https://files.pythonhosted.org/packages/20/aa/64dd73a5a849c2e8f216b755599c511badde80e91e9bc2271baa7b2cdbb1/tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e", size = 149038, upload-time = "2026-01-11T11:22:07.56Z" }, - { url = "https://files.pythonhosted.org/packages/9e/8a/6d38870bd3d52c8d1505ce054469a73f73a0fe62c0eaf5dddf61447e32fa/tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4", size = 242245, upload-time = "2026-01-11T11:22:08.344Z" }, - { url = "https://files.pythonhosted.org/packages/59/bb/8002fadefb64ab2669e5b977df3f5e444febea60e717e755b38bb7c41029/tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e", size = 250335, upload-time = "2026-01-11T11:22:09.951Z" }, - { url = "https://files.pythonhosted.org/packages/a5/3d/4cdb6f791682b2ea916af2de96121b3cb1284d7c203d97d92d6003e91c8d/tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c", size = 245962, upload-time = "2026-01-11T11:22:11.27Z" }, - { url = "https://files.pythonhosted.org/packages/f2/4a/5f25789f9a460bd858ba9756ff52d0830d825b458e13f754952dd15fb7bb/tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f", size = 250396, upload-time = "2026-01-11T11:22:12.325Z" }, - { url = "https://files.pythonhosted.org/packages/aa/2f/b73a36fea58dfa08e8b3a268750e6853a6aac2a349241a905ebd86f3047a/tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86", size = 97530, upload-time = "2026-01-11T11:22:13.865Z" }, - { url = "https://files.pythonhosted.org/packages/3b/af/ca18c134b5d75de7e8dc551c5234eaba2e8e951f6b30139599b53de9c187/tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87", size = 108227, upload-time = "2026-01-11T11:22:15.224Z" }, - { url = "https://files.pythonhosted.org/packages/22/c3/b386b832f209fee8073c8138ec50f27b4460db2fdae9ffe022df89a57f9b/tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132", size = 94748, upload-time = "2026-01-11T11:22:16.009Z" }, - { url = "https://files.pythonhosted.org/packages/f3/c4/84047a97eb1004418bc10bdbcfebda209fca6338002eba2dc27cc6d13563/tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6", size = 154725, upload-time = "2026-01-11T11:22:17.269Z" }, - { url = "https://files.pythonhosted.org/packages/a8/5d/d39038e646060b9d76274078cddf146ced86dc2b9e8bbf737ad5983609a0/tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc", size = 148901, upload-time = "2026-01-11T11:22:18.287Z" }, - { url = "https://files.pythonhosted.org/packages/73/e5/383be1724cb30f4ce44983d249645684a48c435e1cd4f8b5cded8a816d3c/tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66", size = 243375, upload-time = "2026-01-11T11:22:19.154Z" }, - { url = "https://files.pythonhosted.org/packages/31/f0/bea80c17971c8d16d3cc109dc3585b0f2ce1036b5f4a8a183789023574f2/tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d", size = 250639, upload-time = "2026-01-11T11:22:20.168Z" }, - { url = "https://files.pythonhosted.org/packages/2c/8f/2853c36abbb7608e3f945d8a74e32ed3a74ee3a1f468f1ffc7d1cb3abba6/tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702", size = 246897, upload-time = "2026-01-11T11:22:21.544Z" }, - { url = "https://files.pythonhosted.org/packages/49/f0/6c05e3196ed5337b9fe7ea003e95fd3819a840b7a0f2bf5a408ef1dad8ed/tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8", size = 254697, upload-time = "2026-01-11T11:22:23.058Z" }, - { url = "https://files.pythonhosted.org/packages/f3/f5/2922ef29c9f2951883525def7429967fc4d8208494e5ab524234f06b688b/tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776", size = 98567, upload-time = "2026-01-11T11:22:24.033Z" }, - { url = "https://files.pythonhosted.org/packages/7b/31/22b52e2e06dd2a5fdbc3ee73226d763b184ff21fc24e20316a44ccc4d96b/tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475", size = 108556, upload-time = "2026-01-11T11:22:25.378Z" }, - { url = "https://files.pythonhosted.org/packages/48/3d/5058dff3255a3d01b705413f64f4306a141a8fd7a251e5a495e3f192a998/tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2", size = 96014, upload-time = "2026-01-11T11:22:26.138Z" }, - { url = "https://files.pythonhosted.org/packages/b8/4e/75dab8586e268424202d3a1997ef6014919c941b50642a1682df43204c22/tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9", size = 163339, upload-time = "2026-01-11T11:22:27.143Z" }, - { url = "https://files.pythonhosted.org/packages/06/e3/b904d9ab1016829a776d97f163f183a48be6a4deb87304d1e0116a349519/tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0", size = 159490, upload-time = "2026-01-11T11:22:28.399Z" }, - { url = "https://files.pythonhosted.org/packages/e3/5a/fc3622c8b1ad823e8ea98a35e3c632ee316d48f66f80f9708ceb4f2a0322/tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df", size = 269398, upload-time = "2026-01-11T11:22:29.345Z" }, - { url = "https://files.pythonhosted.org/packages/fd/33/62bd6152c8bdd4c305ad9faca48f51d3acb2df1f8791b1477d46ff86e7f8/tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d", size = 276515, upload-time = "2026-01-11T11:22:30.327Z" }, - { url = "https://files.pythonhosted.org/packages/4b/ff/ae53619499f5235ee4211e62a8d7982ba9e439a0fb4f2f351a93d67c1dd2/tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f", size = 273806, upload-time = "2026-01-11T11:22:32.56Z" }, - { url = "https://files.pythonhosted.org/packages/47/71/cbca7787fa68d4d0a9f7072821980b39fbb1b6faeb5f5cf02f4a5559fa28/tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b", size = 281340, upload-time = "2026-01-11T11:22:33.505Z" }, - { url = "https://files.pythonhosted.org/packages/f5/00/d595c120963ad42474cf6ee7771ad0d0e8a49d0f01e29576ee9195d9ecdf/tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087", size = 108106, upload-time = "2026-01-11T11:22:34.451Z" }, - { url = "https://files.pythonhosted.org/packages/de/69/9aa0c6a505c2f80e519b43764f8b4ba93b5a0bbd2d9a9de6e2b24271b9a5/tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd", size = 120504, upload-time = "2026-01-11T11:22:35.764Z" }, - { url = "https://files.pythonhosted.org/packages/b3/9f/f1668c281c58cfae01482f7114a4b88d345e4c140386241a1a24dcc9e7bc/tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4", size = 99561, upload-time = "2026-01-11T11:22:36.624Z" }, - { url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" }, + { url = "https://files.pythonhosted.org/packages/f4/11/db3d5885d8528263d8adc260bb2d28ebf1270b96e98f0e0268d32b8d9900/tomli-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f8f0fc26ec2cc2b965b7a3b87cd19c5c6b8c5e5f436b984e85f486d652285c30", size = 154704, upload-time = "2026-03-25T20:21:10.473Z" }, + { url = "https://files.pythonhosted.org/packages/6d/f7/675db52c7e46064a9aa928885a9b20f4124ecb9bc2e1ce74c9106648d202/tomli-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4ab97e64ccda8756376892c53a72bd1f964e519c77236368527f758fbc36a53a", size = 149454, upload-time = "2026-03-25T20:21:12.036Z" }, + { url = "https://files.pythonhosted.org/packages/61/71/81c50943cf953efa35bce7646caab3cf457a7d8c030b27cfb40d7235f9ee/tomli-2.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96481a5786729fd470164b47cdb3e0e58062a496f455ee41b4403be77cb5a076", size = 237561, upload-time = "2026-03-25T20:21:13.098Z" }, + { url = "https://files.pythonhosted.org/packages/48/c1/f41d9cb618acccca7df82aaf682f9b49013c9397212cb9f53219e3abac37/tomli-2.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a881ab208c0baf688221f8cecc5401bd291d67e38a1ac884d6736cbcd8247e9", size = 243824, upload-time = "2026-03-25T20:21:14.569Z" }, + { url = "https://files.pythonhosted.org/packages/22/e4/5a816ecdd1f8ca51fb756ef684b90f2780afc52fc67f987e3c61d800a46d/tomli-2.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47149d5bd38761ac8be13a84864bf0b7b70bc051806bc3669ab1cbc56216b23c", size = 242227, upload-time = "2026-03-25T20:21:15.712Z" }, + { url = "https://files.pythonhosted.org/packages/6b/49/2b2a0ef529aa6eec245d25f0c703e020a73955ad7edf73e7f54ddc608aa5/tomli-2.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ec9bfaf3ad2df51ace80688143a6a4ebc09a248f6ff781a9945e51937008fcbc", size = 247859, upload-time = "2026-03-25T20:21:17.001Z" }, + { url = "https://files.pythonhosted.org/packages/83/bd/6c1a630eaca337e1e78c5903104f831bda934c426f9231429396ce3c3467/tomli-2.4.1-cp311-cp311-win32.whl", hash = "sha256:ff2983983d34813c1aeb0fa89091e76c3a22889ee83ab27c5eeb45100560c049", size = 97204, upload-time = "2026-03-25T20:21:18.079Z" }, + { url = "https://files.pythonhosted.org/packages/42/59/71461df1a885647e10b6bb7802d0b8e66480c61f3f43079e0dcd315b3954/tomli-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:5ee18d9ebdb417e384b58fe414e8d6af9f4e7a0ae761519fb50f721de398dd4e", size = 108084, upload-time = "2026-03-25T20:21:18.978Z" }, + { url = "https://files.pythonhosted.org/packages/b8/83/dceca96142499c069475b790e7913b1044c1a4337e700751f48ed723f883/tomli-2.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:c2541745709bad0264b7d4705ad453b76ccd191e64aa6f0fc66b69a293a45ece", size = 95285, upload-time = "2026-03-25T20:21:20.309Z" }, + { url = "https://files.pythonhosted.org/packages/c1/ba/42f134a3fe2b370f555f44b1d72feebb94debcab01676bf918d0cb70e9aa/tomli-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c742f741d58a28940ce01d58f0ab2ea3ced8b12402f162f4d534dfe18ba1cd6a", size = 155924, upload-time = "2026-03-25T20:21:21.626Z" }, + { url = "https://files.pythonhosted.org/packages/dc/c7/62d7a17c26487ade21c5422b646110f2162f1fcc95980ef7f63e73c68f14/tomli-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7f86fd587c4ed9dd76f318225e7d9b29cfc5a9d43de44e5754db8d1128487085", size = 150018, upload-time = "2026-03-25T20:21:23.002Z" }, + { url = "https://files.pythonhosted.org/packages/5c/05/79d13d7c15f13bdef410bdd49a6485b1c37d28968314eabee452c22a7fda/tomli-2.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ff18e6a727ee0ab0388507b89d1bc6a22b138d1e2fa56d1ad494586d61d2eae9", size = 244948, upload-time = "2026-03-25T20:21:24.04Z" }, + { url = "https://files.pythonhosted.org/packages/10/90/d62ce007a1c80d0b2c93e02cab211224756240884751b94ca72df8a875ca/tomli-2.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:136443dbd7e1dee43c68ac2694fde36b2849865fa258d39bf822c10e8068eac5", size = 253341, upload-time = "2026-03-25T20:21:25.177Z" }, + { url = "https://files.pythonhosted.org/packages/1a/7e/caf6496d60152ad4ed09282c1885cca4eea150bfd007da84aea07bcc0a3e/tomli-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5e262d41726bc187e69af7825504c933b6794dc3fbd5945e41a79bb14c31f585", size = 248159, upload-time = "2026-03-25T20:21:26.364Z" }, + { url = "https://files.pythonhosted.org/packages/99/e7/c6f69c3120de34bbd882c6fba7975f3d7a746e9218e56ab46a1bc4b42552/tomli-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5cb41aa38891e073ee49d55fbc7839cfdb2bc0e600add13874d048c94aadddd1", size = 253290, upload-time = "2026-03-25T20:21:27.46Z" }, + { url = "https://files.pythonhosted.org/packages/d6/2f/4a3c322f22c5c66c4b836ec58211641a4067364f5dcdd7b974b4c5da300c/tomli-2.4.1-cp312-cp312-win32.whl", hash = "sha256:da25dc3563bff5965356133435b757a795a17b17d01dbc0f42fb32447ddfd917", size = 98141, upload-time = "2026-03-25T20:21:28.492Z" }, + { url = "https://files.pythonhosted.org/packages/24/22/4daacd05391b92c55759d55eaee21e1dfaea86ce5c571f10083360adf534/tomli-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:52c8ef851d9a240f11a88c003eacb03c31fc1c9c4ec64a99a0f922b93874fda9", size = 108847, upload-time = "2026-03-25T20:21:29.386Z" }, + { url = "https://files.pythonhosted.org/packages/68/fd/70e768887666ddd9e9f5d85129e84910f2db2796f9096aa02b721a53098d/tomli-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:f758f1b9299d059cc3f6546ae2af89670cb1c4d48ea29c3cacc4fe7de3058257", size = 95088, upload-time = "2026-03-25T20:21:30.677Z" }, + { url = "https://files.pythonhosted.org/packages/07/06/b823a7e818c756d9a7123ba2cda7d07bc2dd32835648d1a7b7b7a05d848d/tomli-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36d2bd2ad5fb9eaddba5226aa02c8ec3fa4f192631e347b3ed28186d43be6b54", size = 155866, upload-time = "2026-03-25T20:21:31.65Z" }, + { url = "https://files.pythonhosted.org/packages/14/6f/12645cf7f08e1a20c7eb8c297c6f11d31c1b50f316a7e7e1e1de6e2e7b7e/tomli-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:eb0dc4e38e6a1fd579e5d50369aa2e10acfc9cace504579b2faabb478e76941a", size = 149887, upload-time = "2026-03-25T20:21:33.028Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e0/90637574e5e7212c09099c67ad349b04ec4d6020324539297b634a0192b0/tomli-2.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7f2c7f2b9ca6bdeef8f0fa897f8e05085923eb091721675170254cbc5b02897", size = 243704, upload-time = "2026-03-25T20:21:34.51Z" }, + { url = "https://files.pythonhosted.org/packages/10/8f/d3ddb16c5a4befdf31a23307f72828686ab2096f068eaf56631e136c1fdd/tomli-2.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3c6818a1a86dd6dca7ddcaaf76947d5ba31aecc28cb1b67009a5877c9a64f3f", size = 251628, upload-time = "2026-03-25T20:21:36.012Z" }, + { url = "https://files.pythonhosted.org/packages/e3/f1/dbeeb9116715abee2485bf0a12d07a8f31af94d71608c171c45f64c0469d/tomli-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d312ef37c91508b0ab2cee7da26ec0b3ed2f03ce12bd87a588d771ae15dcf82d", size = 247180, upload-time = "2026-03-25T20:21:37.136Z" }, + { url = "https://files.pythonhosted.org/packages/d3/74/16336ffd19ed4da28a70959f92f506233bd7cfc2332b20bdb01591e8b1d1/tomli-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51529d40e3ca50046d7606fa99ce3956a617f9b36380da3b7f0dd3dd28e68cb5", size = 251674, upload-time = "2026-03-25T20:21:38.298Z" }, + { url = "https://files.pythonhosted.org/packages/16/f9/229fa3434c590ddf6c0aa9af64d3af4b752540686cace29e6281e3458469/tomli-2.4.1-cp313-cp313-win32.whl", hash = "sha256:2190f2e9dd7508d2a90ded5ed369255980a1bcdd58e52f7fe24b8162bf9fedbd", size = 97976, upload-time = "2026-03-25T20:21:39.316Z" }, + { url = "https://files.pythonhosted.org/packages/6a/1e/71dfd96bcc1c775420cb8befe7a9d35f2e5b1309798f009dca17b7708c1e/tomli-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:8d65a2fbf9d2f8352685bc1364177ee3923d6baf5e7f43ea4959d7d8bc326a36", size = 108755, upload-time = "2026-03-25T20:21:40.248Z" }, + { url = "https://files.pythonhosted.org/packages/83/7a/d34f422a021d62420b78f5c538e5b102f62bea616d1d75a13f0a88acb04a/tomli-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:4b605484e43cdc43f0954ddae319fb75f04cc10dd80d830540060ee7cd0243cd", size = 95265, upload-time = "2026-03-25T20:21:41.219Z" }, + { url = "https://files.pythonhosted.org/packages/3c/fb/9a5c8d27dbab540869f7c1f8eb0abb3244189ce780ba9cd73f3770662072/tomli-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fd0409a3653af6c147209d267a0e4243f0ae46b011aa978b1080359fddc9b6cf", size = 155726, upload-time = "2026-03-25T20:21:42.23Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/d2f816630cc771ad836af54f5001f47a6f611d2d39535364f148b6a92d6b/tomli-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a120733b01c45e9a0c34aeef92bf0cf1d56cfe81ed9d47d562f9ed591a9828ac", size = 149859, upload-time = "2026-03-25T20:21:43.386Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/66341bdb858ad9bd0ceab5a86f90eddab127cf8b046418009f2125630ecb/tomli-2.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:559db847dc486944896521f68d8190be1c9e719fced785720d2216fe7022b662", size = 244713, upload-time = "2026-03-25T20:21:44.474Z" }, + { url = "https://files.pythonhosted.org/packages/df/6d/c5fad00d82b3c7a3ab6189bd4b10e60466f22cfe8a08a9394185c8a8111c/tomli-2.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01f520d4f53ef97964a240a035ec2a869fe1a37dde002b57ebc4417a27ccd853", size = 252084, upload-time = "2026-03-25T20:21:45.62Z" }, + { url = "https://files.pythonhosted.org/packages/00/71/3a69e86f3eafe8c7a59d008d245888051005bd657760e96d5fbfb0b740c2/tomli-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7f94b27a62cfad8496c8d2513e1a222dd446f095fca8987fceef261225538a15", size = 247973, upload-time = "2026-03-25T20:21:46.937Z" }, + { url = "https://files.pythonhosted.org/packages/67/50/361e986652847fec4bd5e4a0208752fbe64689c603c7ae5ea7cb16b1c0ca/tomli-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ede3e6487c5ef5d28634ba3f31f989030ad6af71edfb0055cbbd14189ff240ba", size = 256223, upload-time = "2026-03-25T20:21:48.467Z" }, + { url = "https://files.pythonhosted.org/packages/8c/9a/b4173689a9203472e5467217e0154b00e260621caa227b6fa01feab16998/tomli-2.4.1-cp314-cp314-win32.whl", hash = "sha256:3d48a93ee1c9b79c04bb38772ee1b64dcf18ff43085896ea460ca8dec96f35f6", size = 98973, upload-time = "2026-03-25T20:21:49.526Z" }, + { url = "https://files.pythonhosted.org/packages/14/58/640ac93bf230cd27d002462c9af0d837779f8773bc03dee06b5835208214/tomli-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:88dceee75c2c63af144e456745e10101eb67361050196b0b6af5d717254dddf7", size = 109082, upload-time = "2026-03-25T20:21:50.506Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2f/702d5e05b227401c1068f0d386d79a589bb12bf64c3d2c72ce0631e3bc49/tomli-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:b8c198f8c1805dc42708689ed6864951fd2494f924149d3e4bce7710f8eb5232", size = 96490, upload-time = "2026-03-25T20:21:51.474Z" }, + { url = "https://files.pythonhosted.org/packages/45/4b/b877b05c8ba62927d9865dd980e34a755de541eb65fffba52b4cc495d4d2/tomli-2.4.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:d4d8fe59808a54658fcc0160ecfb1b30f9089906c50b23bcb4c69eddc19ec2b4", size = 164263, upload-time = "2026-03-25T20:21:52.543Z" }, + { url = "https://files.pythonhosted.org/packages/24/79/6ab420d37a270b89f7195dec5448f79400d9e9c1826df982f3f8e97b24fd/tomli-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7008df2e7655c495dd12d2a4ad038ff878d4ca4b81fccaf82b714e07eae4402c", size = 160736, upload-time = "2026-03-25T20:21:53.674Z" }, + { url = "https://files.pythonhosted.org/packages/02/e0/3630057d8eb170310785723ed5adcdfb7d50cb7e6455f85ba8a3deed642b/tomli-2.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d8591993e228b0c930c4bb0db464bdad97b3289fb981255d6c9a41aedc84b2d", size = 270717, upload-time = "2026-03-25T20:21:55.129Z" }, + { url = "https://files.pythonhosted.org/packages/7a/b4/1613716072e544d1a7891f548d8f9ec6ce2faf42ca65acae01d76ea06bb0/tomli-2.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:734e20b57ba95624ecf1841e72b53f6e186355e216e5412de414e3c51e5e3c41", size = 278461, upload-time = "2026-03-25T20:21:56.228Z" }, + { url = "https://files.pythonhosted.org/packages/05/38/30f541baf6a3f6df77b3df16b01ba319221389e2da59427e221ef417ac0c/tomli-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8a650c2dbafa08d42e51ba0b62740dae4ecb9338eefa093aa5c78ceb546fcd5c", size = 274855, upload-time = "2026-03-25T20:21:57.653Z" }, + { url = "https://files.pythonhosted.org/packages/77/a3/ec9dd4fd2c38e98de34223b995a3b34813e6bdadf86c75314c928350ed14/tomli-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:504aa796fe0569bb43171066009ead363de03675276d2d121ac1a4572397870f", size = 283144, upload-time = "2026-03-25T20:21:59.089Z" }, + { url = "https://files.pythonhosted.org/packages/ef/be/605a6261cac79fba2ec0c9827e986e00323a1945700969b8ee0b30d85453/tomli-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:b1d22e6e9387bf4739fbe23bfa80e93f6b0373a7f1b96c6227c32bef95a4d7a8", size = 108683, upload-time = "2026-03-25T20:22:00.214Z" }, + { url = "https://files.pythonhosted.org/packages/12/64/da524626d3b9cc40c168a13da8335fe1c51be12c0a63685cc6db7308daae/tomli-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2c1c351919aca02858f740c6d33adea0c5deea37f9ecca1cc1ef9e884a619d26", size = 121196, upload-time = "2026-03-25T20:22:01.169Z" }, + { url = "https://files.pythonhosted.org/packages/5a/cd/e80b62269fc78fc36c9af5a6b89c835baa8af28ff5ad28c7028d60860320/tomli-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:eab21f45c7f66c13f2a9e0e1535309cee140182a9cdae1e041d02e47291e8396", size = 100393, upload-time = "2026-03-25T20:22:02.137Z" }, + { url = "https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl", hash = "sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe", size = 14583, upload-time = "2026-03-25T20:22:03.012Z" }, ] [[package]] name = "types-pyyaml" -version = "6.0.12.20250915" +version = "6.0.12.20260408" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7e/69/3c51b36d04da19b92f9e815be12753125bd8bc247ba0470a982e6979e71c/types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3", size = 17522, upload-time = "2025-09-15T03:01:00.728Z" } +sdist = { url = "https://files.pythonhosted.org/packages/74/73/b759b1e413c31034cc01ecdfb96b38115d0ab4db55a752a3929f0cd449fd/types_pyyaml-6.0.12.20260408.tar.gz", hash = "sha256:92a73f2b8d7f39ef392a38131f76b970f8c66e4c42b3125ae872b7c93b556307", size = 17735, upload-time = "2026-04-08T04:30:50.974Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/e0/1eed384f02555dde685fff1a1ac805c1c7dcb6dd019c916fe659b1c1f9ec/types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6", size = 20338, upload-time = "2025-09-15T03:00:59.218Z" }, + { url = "https://files.pythonhosted.org/packages/1c/f0/c391068b86abb708882c6d75a08cd7d25b2c7227dab527b3a3685a3c635b/types_pyyaml-6.0.12.20260408-py3-none-any.whl", hash = "sha256:fbc42037d12159d9c801ebfcc79ebd28335a7c13b08a4cfbc6916df78fee9384", size = 20339, upload-time = "2026-04-08T04:30:50.113Z" }, ] [[package]] name = "types-shapely" -version = "2.1.0.20250917" +version = "2.1.0.20260408" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fa/19/7f28b10994433d43b9caa66f3b9bd6a0a9192b7ce8b5a7fc41534e54b821/types_shapely-2.1.0.20250917.tar.gz", hash = "sha256:5c56670742105aebe40c16414390d35fcaa55d6f774d328c1a18273ab0e2134a", size = 26363, upload-time = "2025-09-17T02:47:44.604Z" } +sdist = { url = "https://files.pythonhosted.org/packages/10/8d/bf9e3eb51249601e22d797481999a06fb34998c4db5c76804394f8a3fa28/types_shapely-2.1.0.20260408.tar.gz", hash = "sha256:8552549d9429baa52ec4331e43b5db3b334fc3a7f30da48663010b7454b1451c", size = 26529, upload-time = "2026-04-08T04:34:42.111Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/a9/554ac40810e530263b6163b30a2b623bc16aae3fb64416f5d2b3657d0729/types_shapely-2.1.0.20250917-py3-none-any.whl", hash = "sha256:9334a79339504d39b040426be4938d422cec419168414dc74972aa746a8bf3a1", size = 37813, upload-time = "2025-09-17T02:47:43.788Z" }, + { url = "https://files.pythonhosted.org/packages/8e/3d/cbec691f56e71636192a07bf6809f598bed06d869b03b4e2b1ad2f7df032/types_shapely-2.1.0.20260408-py3-none-any.whl", hash = "sha256:8a31e2b074342a363f0c9d0c7d6e1e6c0dcce302a92ef94d64d0ca2a2b94a1d1", size = 37818, upload-time = "2026-04-08T04:34:41.243Z" }, ] [[package]] From ef2ef01a6cae53630e63586bafbf2a5564ef2053 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Wed, 13 May 2026 09:37:46 -0700 Subject: [PATCH 05/11] chore(pyspark): generate PySpark expressions Generate PySpark expressions (and tests) for models defined in the workspace Signed-off-by: Seth Fitzsimmons --- .../pyspark/expressions/generated/__init__.py | 0 .../generated/overture/__init__.py | 0 .../generated/overture/schema/__init__.py | 0 .../overture/schema/addresses/__init__.py | 0 .../overture/schema/addresses/address.py | 564 ++ .../overture/schema/annex/__init__.py | 0 .../overture/schema/annex/sources.py | 486 ++ .../overture/schema/base/__init__.py | 0 .../overture/schema/base/bathymetry.py | 478 ++ .../overture/schema/base/infrastructure.py | 997 ++++ .../generated/overture/schema/base/land.py | 848 +++ .../overture/schema/base/land_cover.py | 492 ++ .../overture/schema/base/land_use.py | 948 ++++ .../generated/overture/schema/base/water.py | 791 +++ .../overture/schema/buildings/__init__.py | 0 .../overture/schema/buildings/building.py | 1025 ++++ .../schema/buildings/building_part.py | 930 +++ .../overture/schema/divisions/__init__.py | 0 .../overture/schema/divisions/division.py | 1550 +++++ .../schema/divisions/division_area.py | 962 ++++ .../schema/divisions/division_boundary.py | 782 +++ .../overture/schema/places/__init__.py | 0 .../generated/overture/schema/places/place.py | 1505 +++++ .../schema/transportation/__init__.py | 0 .../schema/transportation/connector.py | 372 ++ .../overture/schema/transportation/segment.py | 5053 +++++++++++++++++ .../tests/generated/__init__.py | 0 .../tests/generated/overture/__init__.py | 0 .../generated/overture/schema/__init__.py | 0 .../overture/schema/addresses/__init__.py | 0 .../overture/schema/addresses/test_address.py | 462 ++ .../overture/schema/annex/__init__.py | 0 .../overture/schema/annex/test_sources.py | 843 +++ .../overture/schema/base/__init__.py | 0 .../overture/schema/base/test_bathymetry.py | 401 ++ .../schema/base/test_infrastructure.py | 650 +++ .../overture/schema/base/test_land.py | 634 +++ .../overture/schema/base/test_land_cover.py | 401 ++ .../overture/schema/base/test_land_use.py | 650 +++ .../overture/schema/base/test_water.py | 620 ++ .../overture/schema/buildings/__init__.py | 0 .../schema/buildings/test_building.py | 708 +++ .../schema/buildings/test_building_part.py | 714 +++ .../overture/schema/divisions/__init__.py | 0 .../schema/divisions/test_division.py | 1049 ++++ .../schema/divisions/test_division_area.py | 759 +++ .../divisions/test_division_boundary.py | 574 ++ .../overture/schema/places/__init__.py | 0 .../overture/schema/places/test_place.py | 1207 ++++ .../schema/transportation/__init__.py | 0 .../schema/transportation/test_connector.py | 342 ++ .../transportation/test_segment_rail.py | 1676 ++++++ .../transportation/test_segment_road.py | 3085 ++++++++++ .../transportation/test_segment_water.py | 1596 ++++++ 54 files changed, 34154 insertions(+) create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py create mode 100644 packages/overture-schema-pyspark/tests/generated/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/annex/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/places/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py new file mode 100644 index 000000000..19d17b7ba --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py @@ -0,0 +1,564 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Address validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_max_length, + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["addresses"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["address"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _address_levels_min_length_check() -> Check: + return Check( + field="address_levels_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("address_levels"), 1), + shape=CheckShape.SCALAR, + root_field="address_levels", + ) + + +def _address_levels_max_length_check() -> Check: + return Check( + field="address_levels_max_length", + name="array_max_length", + expr=check_array_max_length(F.col("address_levels"), 5), + shape=CheckShape.SCALAR, + root_field="address_levels", + ) + + +def _address_levels_value_string_min_length_check() -> Check: + return Check( + field="address_levels[].value", + name="string_min_length", + expr=array_check( + "address_levels", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="address_levels", + ) + + +def _address_levels_value_stripped_check() -> Check: + return Check( + field="address_levels[].value", + name="stripped", + expr=array_check("address_levels", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="address_levels", + ) + + +def _country_required_check() -> Check: + return Check( + field="country", + name="required", + expr=check_required(F.col("country")), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _country_country_code_alpha2_check() -> Check: + return Check( + field="country", + name="country_code_alpha2", + expr=check_pattern( + F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _number_string_min_length_check() -> Check: + return Check( + field="number", + name="string_min_length", + expr=check_string_min_length(F.col("number"), 1), + shape=CheckShape.SCALAR, + root_field="number", + ) + + +def _number_stripped_check() -> Check: + return Check( + field="number", + name="stripped", + expr=check_stripped(F.col("number")), + shape=CheckShape.SCALAR, + root_field="number", + ) + + +def _postal_city_string_min_length_check() -> Check: + return Check( + field="postal_city", + name="string_min_length", + expr=check_string_min_length(F.col("postal_city"), 1), + shape=CheckShape.SCALAR, + root_field="postal_city", + ) + + +def _postal_city_stripped_check() -> Check: + return Check( + field="postal_city", + name="stripped", + expr=check_stripped(F.col("postal_city")), + shape=CheckShape.SCALAR, + root_field="postal_city", + ) + + +def _postcode_string_min_length_check() -> Check: + return Check( + field="postcode", + name="string_min_length", + expr=check_string_min_length(F.col("postcode"), 1), + shape=CheckShape.SCALAR, + root_field="postcode", + ) + + +def _postcode_stripped_check() -> Check: + return Check( + field="postcode", + name="stripped", + expr=check_stripped(F.col("postcode")), + shape=CheckShape.SCALAR, + root_field="postcode", + ) + + +def _street_string_min_length_check() -> Check: + return Check( + field="street", + name="string_min_length", + expr=check_string_min_length(F.col("street"), 1), + shape=CheckShape.SCALAR, + root_field="street", + ) + + +def _street_stripped_check() -> Check: + return Check( + field="street", + name="stripped", + expr=check_stripped(F.col("street")), + shape=CheckShape.SCALAR, + root_field="street", + ) + + +def _unit_string_min_length_check() -> Check: + return Check( + field="unit", + name="string_min_length", + expr=check_string_min_length(F.col("unit"), 1), + shape=CheckShape.SCALAR, + root_field="unit", + ) + + +def _unit_stripped_check() -> Check: + return Check( + field="unit", + name="stripped", + expr=check_stripped(F.col("unit")), + shape=CheckShape.SCALAR, + root_field="unit", + ) + + +def address_checks() -> list[Check]: + """All validation checks for address.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _address_levels_min_length_check(), + _address_levels_max_length_check(), + _address_levels_value_string_min_length_check(), + _address_levels_value_stripped_check(), + _country_required_check(), + _country_country_code_alpha2_check(), + _number_string_min_length_check(), + _number_stripped_check(), + _postal_city_string_min_length_check(), + _postal_city_stripped_check(), + _postcode_string_min_length_check(), + _postcode_stripped_check(), + _street_string_min_length_check(), + _street_stripped_check(), + _unit_string_min_length_check(), + _unit_stripped_check(), + ] + + +ADDRESS_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "address_levels", + ArrayType(StructType([StructField("value", StringType(), True)]), True), + True, + ), + StructField("country", StringType(), True), + StructField("number", StringType(), True), + StructField("postal_city", StringType(), True), + StructField("postcode", StringType(), True), + StructField("street", StringType(), True), + StructField("unit", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.POINT,) + +ENTRY_POINT = "overture.schema.addresses:Address" + +PARTITIONS: dict[str, str] = {"theme": "addresses"} + +FEATURE_VALIDATION = FeatureValidation( + schema=ADDRESS_SCHEMA, + checks=address_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py new file mode 100644 index 000000000..026130578 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py @@ -0,0 +1,486 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Sources validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + DoubleType, + LongType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_max_length, + check_array_min_length, + check_enum, + check_pattern, + check_required, + check_url_format, + check_url_length, +) + + +def _datasets_check() -> Check: + return Check( + field="datasets", + name="required", + expr=check_required(F.col("datasets")), + shape=CheckShape.SCALAR, + root_field="datasets", + ) + + +def _datasets_source_name_check() -> Check: + return Check( + field="datasets[].source_name", + name="required", + expr=array_check("datasets", lambda el: check_required(el["source_name"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_source_dataset_name_check() -> Check: + return Check( + field="datasets[].source_dataset_name", + name="required", + expr=array_check( + "datasets", lambda el: check_required(el["source_dataset_name"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_required_check() -> Check: + return Check( + field="datasets[].data_url", + name="required", + expr=array_check("datasets", lambda el: check_required(el["data_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_url_format_check() -> Check: + return Check( + field="datasets[].data_url", + name="url_format", + expr=array_check("datasets", lambda el: check_url_format(el["data_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_url_length_check() -> Check: + return Check( + field="datasets[].data_url", + name="url_length", + expr=array_check("datasets", lambda el: check_url_length(el["data_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_archived_required_check() -> Check: + return Check( + field="datasets[].data_url_archived", + name="required", + expr=array_check( + "datasets", lambda el: check_required(el["data_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_archived_url_format_check() -> Check: + return Check( + field="datasets[].data_url_archived", + name="url_format", + expr=array_check( + "datasets", lambda el: check_url_format(el["data_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_archived_url_length_check() -> Check: + return Check( + field="datasets[].data_url_archived", + name="url_length", + expr=array_check( + "datasets", lambda el: check_url_length(el["data_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_required_check() -> Check: + return Check( + field="datasets[].license_url", + name="required", + expr=array_check("datasets", lambda el: check_required(el["license_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_url_format_check() -> Check: + return Check( + field="datasets[].license_url", + name="url_format", + expr=array_check("datasets", lambda el: check_url_format(el["license_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_url_length_check() -> Check: + return Check( + field="datasets[].license_url", + name="url_length", + expr=array_check("datasets", lambda el: check_url_length(el["license_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_archived_required_check() -> Check: + return Check( + field="datasets[].license_url_archived", + name="required", + expr=array_check( + "datasets", lambda el: check_required(el["license_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_archived_url_format_check() -> Check: + return Check( + field="datasets[].license_url_archived", + name="url_format", + expr=array_check( + "datasets", lambda el: check_url_format(el["license_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_archived_url_length_check() -> Check: + return Check( + field="datasets[].license_url_archived", + name="url_length", + expr=array_check( + "datasets", lambda el: check_url_length(el["license_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_type_check() -> Check: + return Check( + field="datasets[].license_type", + name="required", + expr=array_check("datasets", lambda el: check_required(el["license_type"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_text_check() -> Check: + return Check( + field="datasets[].license_text", + name="required", + expr=array_check("datasets", lambda el: check_required(el["license_text"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_attribution_check() -> Check: + return Check( + field="datasets[].license_attribution", + name="required", + expr=array_check( + "datasets", lambda el: check_required(el["license_attribution"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_coverage_bbox_check() -> Check: + return Check( + field="datasets[].coverage_bbox", + name="required", + expr=array_check("datasets", lambda el: check_required(el["coverage_bbox"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_coverage_bbox_min_length_check() -> Check: + return Check( + field="datasets[].coverage_bbox_min_length", + name="array_min_length", + expr=array_check( + "datasets", lambda el: check_array_min_length(el["coverage_bbox"], 4) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_coverage_bbox_max_length_check() -> Check: + return Check( + field="datasets[].coverage_bbox_max_length", + name="array_max_length", + expr=array_check( + "datasets", lambda el: check_array_max_length(el["coverage_bbox"], 4) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_url_url_format_check() -> Check: + return Check( + field="datasets[].url", + name="url_format", + expr=array_check("datasets", lambda el: check_url_format(el["url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_url_url_length_check() -> Check: + return Check( + field="datasets[].url", + name="url_length", + expr=array_check("datasets", lambda el: check_url_length(el["url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_url_archived_url_format_check() -> Check: + return Check( + field="datasets[].url_archived", + name="url_format", + expr=array_check("datasets", lambda el: check_url_format(el["url_archived"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_url_archived_url_length_check() -> Check: + return Check( + field="datasets[].url_archived", + name="url_length", + expr=array_check("datasets", lambda el: check_url_length(el["url_archived"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_download_url_url_format_check() -> Check: + return Check( + field="datasets[].data_download_url[]", + name="url_format", + expr=nested_array_check( + "datasets", + lambda el: array_check( + el["data_download_url"], lambda inner: check_url_format(inner) + ), + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_download_url_url_length_check() -> Check: + return Check( + field="datasets[].data_download_url[]", + name="url_length", + expr=nested_array_check( + "datasets", + lambda el: array_check( + el["data_download_url"], lambda inner: check_url_length(inner) + ), + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_countries_check() -> Check: + return Check( + field="datasets[].countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "datasets", + lambda el: array_check( + el["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_build_source_check() -> Check: + return Check( + field="datasets[].build_source", + name="enum", + expr=array_check( + "datasets", + lambda el: check_enum( + el["build_source"], ["OpenAddresses", "tf-data-platform"] + ), + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_update_type_check() -> Check: + return Check( + field="datasets[].update_type", + name="enum", + expr=array_check( + "datasets", + lambda el: check_enum(el["update_type"], ["continuous", "manual"]), + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _license_priority_check() -> Check: + return Check( + field="license_priority", + name="required", + expr=check_required(F.col("license_priority")), + shape=CheckShape.SCALAR, + root_field="license_priority", + ) + + +def sources_checks() -> list[Check]: + """All validation checks for sources.""" + return [ + _datasets_check(), + _datasets_source_name_check(), + _datasets_source_dataset_name_check(), + _datasets_data_url_required_check(), + _datasets_data_url_url_format_check(), + _datasets_data_url_url_length_check(), + _datasets_data_url_archived_required_check(), + _datasets_data_url_archived_url_format_check(), + _datasets_data_url_archived_url_length_check(), + _datasets_license_url_required_check(), + _datasets_license_url_url_format_check(), + _datasets_license_url_url_length_check(), + _datasets_license_url_archived_required_check(), + _datasets_license_url_archived_url_format_check(), + _datasets_license_url_archived_url_length_check(), + _datasets_license_type_check(), + _datasets_license_text_check(), + _datasets_license_attribution_check(), + _datasets_coverage_bbox_check(), + _datasets_coverage_bbox_min_length_check(), + _datasets_coverage_bbox_max_length_check(), + _datasets_url_url_format_check(), + _datasets_url_url_length_check(), + _datasets_url_archived_url_format_check(), + _datasets_url_archived_url_length_check(), + _datasets_data_download_url_url_format_check(), + _datasets_data_download_url_url_length_check(), + _datasets_countries_check(), + _datasets_build_source_check(), + _datasets_update_type_check(), + _license_priority_check(), + ] + + +SOURCES_SCHEMA = StructType( + [ + StructField( + "datasets", + ArrayType( + StructType( + [ + StructField("source_name", StringType(), True), + StructField("source_dataset_name", StringType(), True), + StructField("data_url", StringType(), True), + StructField("data_url_archived", StringType(), True), + StructField("license_url", StringType(), True), + StructField("license_url_archived", StringType(), True), + StructField("license_type", StringType(), True), + StructField("license_text", StringType(), True), + StructField("license_attribution", StringType(), True), + StructField( + "coverage_bbox", ArrayType(DoubleType(), True), True + ), + StructField("inception_date", StringType(), True), + StructField("url", StringType(), True), + StructField("url_archived", StringType(), True), + StructField( + "data_download_url", ArrayType(StringType(), True), True + ), + StructField("countries", ArrayType(StringType(), True), True), + StructField("coverage_description", StringType(), True), + StructField("data_layer_name", StringType(), True), + StructField("oa_path", ArrayType(StringType(), True), True), + StructField( + "address_levels", ArrayType(StringType(), True), True + ), + StructField("file_format", StringType(), True), + StructField("update_frequency", StringType(), True), + StructField("build_source", StringType(), True), + StructField("update_type", StringType(), True), + StructField( + "update_schedule", ArrayType(StringType(), True), True + ), + StructField("known_issues", StringType(), True), + StructField("notes", StringType(), True), + StructField("requires_attribution", StringType(), True), + ] + ), + True, + ), + True, + ), + StructField("license_priority", MapType(StringType(), LongType(), True), True), + ] +) + +ENTRY_POINT = "overture.schema.annex:Sources" + +PARTITIONS: dict[str, str] = {} + +FEATURE_VALIDATION = FeatureValidation( + schema=SOURCES_SCHEMA, + checks=sources_checks, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py new file mode 100644 index 000000000..b57a1f074 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py @@ -0,0 +1,478 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Bathymetry validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["bathymetry"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _depth_required_check() -> Check: + return Check( + field="depth", + name="required", + expr=check_required(F.col("depth")), + shape=CheckShape.SCALAR, + root_field="depth", + ) + + +def _depth_bounds_check() -> Check: + return Check( + field="depth", + name="bounds", + expr=check_bounds(F.col("depth"), ge=0), + shape=CheckShape.SCALAR, + root_field="depth", + ) + + +def _cartography_prominence_bounds_check() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), ge=1), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_prominence_bounds_check_1() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), le=100), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def bathymetry_checks() -> list[Check]: + """All validation checks for bathymetry.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _depth_required_check(), + _depth_bounds_check(), + _cartography_prominence_bounds_check(), + _cartography_prominence_bounds_check_1(), + _cartography_min_zoom_bounds_check(), + _cartography_min_zoom_bounds_check_1(), + _cartography_max_zoom_bounds_check(), + _cartography_max_zoom_bounds_check_1(), + ] + + +BATHYMETRY_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("depth", IntegerType(), True), + StructField( + "cartography", + StructType( + [ + StructField("prominence", IntegerType(), True), + StructField("min_zoom", IntegerType(), True), + StructField("max_zoom", IntegerType(), True), + StructField("sort_key", IntegerType(), True), + ] + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.MULTI_POLYGON, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:Bathymetry" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=BATHYMETRY_SCHEMA, + checks=bathymetry_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py new file mode 100644 index 000000000..d388b7da1 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py @@ -0,0 +1,997 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Infrastructure validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["infrastructure"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _class_required_check() -> Check: + return Check( + field="class", + name="required", + expr=check_required(F.col("class")), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), + [ + "aerialway_station", + "airport", + "airport_gate", + "airstrip", + "apron", + "aqueduct", + "artwork", + "atm", + "barrier", + "bell_tower", + "bench", + "bicycle_parking", + "bicycle_rental", + "block", + "boardwalk", + "bollard", + "border_control", + "breakwater", + "bridge", + "bridge_support", + "bump_gate", + "bus_route", + "bus_station", + "bus_stop", + "bus_trap", + "cable", + "cable_barrier", + "cable_car", + "cable_distribution", + "camp_site", + "cantilever", + "catenary_mast", + "cattle_grid", + "chain", + "chair_lift", + "charging_station", + "city_wall", + "communication_line", + "communication_pole", + "communication_tower", + "connection", + "cooling", + "covered", + "crossing", + "cutline", + "cycle_barrier", + "dam", + "defensive", + "ditch", + "diving", + "drag_lift", + "drain", + "drinking_water", + "entrance", + "fence", + "ferry_terminal", + "fire_hydrant", + "fountain", + "full-height_turnstile", + "gasometer", + "gate", + "generator", + "give_way", + "gondola", + "goods", + "guard_rail", + "hampshire_gate", + "handrail", + "hedge", + "height_restrictor", + "heliostat", + "helipad", + "heliport", + "hose", + "information", + "insulator", + "international_airport", + "j-bar", + "jersey_barrier", + "kerb", + "kissing_gate", + "launchpad", + "lift_gate", + "lighting", + "lightning_protection", + "magic_carpet", + "manhole", + "milestone", + "military_airport", + "minaret", + "minor_line", + "mixed_lift", + "mobile_phone_tower", + "monitoring", + "motorcycle_parking", + "motorway_junction", + "movable", + "municipal_airport", + "observation", + "parking", + "parking_entrance", + "parking_space", + "pedestrian_crossing", + "picnic_table", + "pier", + "pipeline", + "plant", + "planter", + "platform", + "platter", + "portal", + "post_box", + "power_line", + "power_pole", + "power_tower", + "private_airport", + "pylon", + "quay", + "radar", + "railway_halt", + "railway_station", + "recycling", + "regional_airport", + "reservoir_covered", + "retaining_wall", + "roller_coaster", + "rope_tow", + "runway", + "sally_port", + "seaplane_airport", + "sewer", + "silo", + "siren", + "stile", + "stop", + "stop_position", + "stopway", + "storage_tank", + "street_cabinet", + "street_lamp", + "substation", + "subway_station", + "swing_gate", + "switch", + "t-bar", + "taxilane", + "taxiway", + "terminal", + "toilets", + "toll_booth", + "traffic_signals", + "transformer", + "trestle", + "utility_pole", + "vending_machine", + "viaduct", + "viewpoint", + "wall", + "waste_basket", + "waste_disposal", + "watchtower", + "water_tower", + "weir", + "zip_line", + ], + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "aerialway", + "airport", + "barrier", + "bridge", + "communication", + "emergency", + "manhole", + "pedestrian", + "pier", + "power", + "quay", + "recreation", + "tower", + "transit", + "transportation", + "utility", + "waste_management", + "water", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _height_check() -> Check: + return Check( + field="height", + name="bounds", + expr=check_bounds(F.col("height"), gt=0.0), + shape=CheckShape.SCALAR, + root_field="height", + ) + + +def _surface_check() -> Check: + return Check( + field="surface", + name="enum", + expr=check_enum( + F.col("surface"), + [ + "asphalt", + "cobblestone", + "compacted", + "concrete", + "concrete_plates", + "dirt", + "earth", + "fine_gravel", + "grass", + "gravel", + "ground", + "paved", + "paving_stones", + "pebblestone", + "recreation_grass", + "recreation_paved", + "recreation_sand", + "rubber", + "sand", + "sett", + "tartan", + "unpaved", + "wood", + "woodchips", + ], + ), + shape=CheckShape.SCALAR, + root_field="surface", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _wikidata_check() -> Check: + return Check( + field="wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="wikidata", + ) + + +def infrastructure_checks() -> list[Check]: + """All validation checks for infrastructure.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _class_required_check(), + _class_enum_check(), + _subtype_required_check(), + _subtype_enum_check(), + _height_check(), + _surface_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _wikidata_check(), + ] + + +INFRASTRUCTURE_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("class", StringType(), True), + StructField("subtype", StringType(), True), + StructField("height", DoubleType(), True), + StructField("surface", StringType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("level", IntegerType(), True), + StructField("source_tags", MapType(StringType(), StringType(), True), True), + StructField("wikidata", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:Infrastructure" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=INFRASTRUCTURE_SCHEMA, + checks=infrastructure_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py new file mode 100644 index 000000000..53b53b926 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py @@ -0,0 +1,848 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Land validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["land"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _class_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), + [ + "archipelago", + "bare_rock", + "beach", + "cave_entrance", + "cliff", + "desert", + "dune", + "fell", + "forest", + "glacier", + "grass", + "grassland", + "heath", + "hill", + "island", + "islet", + "land", + "meadow", + "meteor_crater", + "mountain_range", + "peak", + "peninsula", + "plateau", + "reef", + "ridge", + "rock", + "saddle", + "sand", + "scree", + "scrub", + "shingle", + "shrub", + "shrubbery", + "stone", + "tree", + "tree_row", + "tundra", + "valley", + "volcanic_caldera_rim", + "volcano", + "wetland", + "wood", + ], + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _subtype_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "crater", + "desert", + "forest", + "glacier", + "grass", + "land", + "physical", + "reef", + "rock", + "sand", + "shrub", + "tree", + "wetland", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _elevation_check() -> Check: + return Check( + field="elevation", + name="bounds", + expr=check_bounds(F.col("elevation"), le=9000), + shape=CheckShape.SCALAR, + root_field="elevation", + ) + + +def _surface_check() -> Check: + return Check( + field="surface", + name="enum", + expr=check_enum( + F.col("surface"), + [ + "asphalt", + "cobblestone", + "compacted", + "concrete", + "concrete_plates", + "dirt", + "earth", + "fine_gravel", + "grass", + "gravel", + "ground", + "paved", + "paving_stones", + "pebblestone", + "recreation_grass", + "recreation_paved", + "recreation_sand", + "rubber", + "sand", + "sett", + "tartan", + "unpaved", + "wood", + "woodchips", + ], + ), + shape=CheckShape.SCALAR, + root_field="surface", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _wikidata_check() -> Check: + return Check( + field="wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="wikidata", + ) + + +def land_checks() -> list[Check]: + """All validation checks for land.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _class_check(), + _subtype_check(), + _elevation_check(), + _surface_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _wikidata_check(), + ] + + +LAND_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("class", StringType(), True), + StructField("subtype", StringType(), True), + StructField("elevation", IntegerType(), True), + StructField("surface", StringType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("level", IntegerType(), True), + StructField("source_tags", MapType(StringType(), StringType(), True), True), + StructField("wikidata", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:Land" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=LAND_SCHEMA, + checks=land_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py new file mode 100644 index 000000000..7e65987e2 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py @@ -0,0 +1,492 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Land Cover validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["land_cover"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "barren", + "crop", + "forest", + "grass", + "mangrove", + "moss", + "shrub", + "snow", + "urban", + "wetland", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _cartography_prominence_bounds_check() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), ge=1), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_prominence_bounds_check_1() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), le=100), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def land_cover_checks() -> list[Check]: + """All validation checks for land_cover.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_required_check(), + _subtype_enum_check(), + _cartography_prominence_bounds_check(), + _cartography_prominence_bounds_check_1(), + _cartography_min_zoom_bounds_check(), + _cartography_min_zoom_bounds_check_1(), + _cartography_max_zoom_bounds_check(), + _cartography_max_zoom_bounds_check_1(), + ] + + +LAND_COVER_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField( + "cartography", + StructType( + [ + StructField("prominence", IntegerType(), True), + StructField("min_zoom", IntegerType(), True), + StructField("max_zoom", IntegerType(), True), + StructField("sort_key", IntegerType(), True), + ] + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.MULTI_POLYGON, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:LandCover" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=LAND_COVER_SCHEMA, + checks=land_cover_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py new file mode 100644 index 000000000..bf573f9bc --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py @@ -0,0 +1,948 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Land Use validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["land_use"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _class_required_check() -> Check: + return Check( + field="class", + name="required", + expr=check_required(F.col("class")), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), + [ + "aboriginal_land", + "airfield", + "allotments", + "animal_keeping", + "aquaculture", + "barracks", + "base", + "beach_resort", + "brownfield", + "bunker", + "camp_site", + "cemetery", + "clinic", + "college", + "commercial", + "connection", + "construction", + "danger_area", + "doctors", + "dog_park", + "downhill", + "driving_range", + "driving_school", + "education", + "environmental", + "fairway", + "farmland", + "farmyard", + "fatbike", + "flowerbed", + "forest", + "garages", + "garden", + "golf_course", + "grass", + "grave_yard", + "green", + "greenfield", + "greenhouse_horticulture", + "highway", + "hike", + "hospital", + "ice_skate", + "industrial", + "institutional", + "kindergarten", + "landfill", + "lateral_water_hazard", + "logging", + "marina", + "meadow", + "military", + "military_hospital", + "military_school", + "music_school", + "national_park", + "natural_monument", + "nature_reserve", + "naval_base", + "nordic", + "nuclear_explosion_site", + "obstacle_course", + "orchard", + "park", + "peat_cutting", + "pedestrian", + "pitch", + "plant_nursery", + "playground", + "plaza", + "protected", + "protected_landscape_seascape", + "quarry", + "railway", + "range", + "recreation_ground", + "religious", + "residential", + "resort", + "retail", + "rough", + "salt_pond", + "school", + "schoolyard", + "ski_jump", + "skitour", + "sled", + "sleigh", + "snow_park", + "species_management_area", + "stadium", + "state_park", + "static_caravan", + "strict_nature_reserve", + "tee", + "theme_park", + "track", + "traffic_island", + "training_area", + "trench", + "university", + "village_green", + "vineyard", + "water_hazard", + "water_park", + "wilderness_area", + "winter_sports", + "works", + "zoo", + ], + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "agriculture", + "aquaculture", + "campground", + "cemetery", + "construction", + "developed", + "education", + "entertainment", + "golf", + "grass", + "horticulture", + "landfill", + "managed", + "medical", + "military", + "park", + "pedestrian", + "protected", + "recreation", + "religious", + "residential", + "resource_extraction", + "transportation", + "winter_sports", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _elevation_check() -> Check: + return Check( + field="elevation", + name="bounds", + expr=check_bounds(F.col("elevation"), le=9000), + shape=CheckShape.SCALAR, + root_field="elevation", + ) + + +def _surface_check() -> Check: + return Check( + field="surface", + name="enum", + expr=check_enum( + F.col("surface"), + [ + "asphalt", + "cobblestone", + "compacted", + "concrete", + "concrete_plates", + "dirt", + "earth", + "fine_gravel", + "grass", + "gravel", + "ground", + "paved", + "paving_stones", + "pebblestone", + "recreation_grass", + "recreation_paved", + "recreation_sand", + "rubber", + "sand", + "sett", + "tartan", + "unpaved", + "wood", + "woodchips", + ], + ), + shape=CheckShape.SCALAR, + root_field="surface", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _wikidata_check() -> Check: + return Check( + field="wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="wikidata", + ) + + +def land_use_checks() -> list[Check]: + """All validation checks for land_use.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _class_required_check(), + _class_enum_check(), + _subtype_required_check(), + _subtype_enum_check(), + _elevation_check(), + _surface_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _wikidata_check(), + ] + + +LAND_USE_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("class", StringType(), True), + StructField("subtype", StringType(), True), + StructField("elevation", IntegerType(), True), + StructField("surface", StringType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("level", IntegerType(), True), + StructField("source_tags", MapType(StringType(), StringType(), True), True), + StructField("wikidata", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:LandUse" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=LAND_USE_SCHEMA, + checks=land_use_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py new file mode 100644 index 000000000..d0484e725 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py @@ -0,0 +1,791 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Water validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["water"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _class_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), + [ + "basin", + "bay", + "blowhole", + "canal", + "cape", + "ditch", + "dock", + "drain", + "fairway", + "fish_pass", + "fishpond", + "geyser", + "hot_spring", + "lagoon", + "lake", + "moat", + "ocean", + "oxbow", + "pond", + "reflecting_pool", + "reservoir", + "river", + "salt_pond", + "sea", + "sewage", + "shoal", + "spring", + "strait", + "stream", + "swimming_pool", + "tidal_channel", + "wastewater", + "water", + "water_storage", + "waterfall", + ], + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _subtype_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "canal", + "human_made", + "lake", + "ocean", + "physical", + "pond", + "reservoir", + "river", + "spring", + "stream", + "wastewater", + "water", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _wikidata_check() -> Check: + return Check( + field="wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="wikidata", + ) + + +def water_checks() -> list[Check]: + """All validation checks for water.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _class_check(), + _subtype_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _wikidata_check(), + ] + + +WATER_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("class", StringType(), True), + StructField("subtype", StringType(), True), + StructField("is_intermittent", BooleanType(), True), + StructField("is_salt", BooleanType(), True), + StructField("level", IntegerType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("source_tags", MapType(StringType(), StringType(), True), True), + StructField("wikidata", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:Water" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=WATER_SCHEMA, + checks=water_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py new file mode 100644 index 000000000..a73f69b89 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py @@ -0,0 +1,1025 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Building validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["buildings"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["building"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "agricultural", + "civic", + "commercial", + "education", + "entertainment", + "industrial", + "medical", + "military", + "outbuilding", + "religious", + "residential", + "service", + "transportation", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _class_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), + [ + "agricultural", + "allotment_house", + "apartments", + "barn", + "beach_hut", + "boathouse", + "bridge_structure", + "bungalow", + "bunker", + "cabin", + "carport", + "cathedral", + "chapel", + "church", + "civic", + "college", + "commercial", + "cowshed", + "detached", + "digester", + "dormitory", + "dwelling_house", + "factory", + "farm", + "farm_auxiliary", + "fire_station", + "garage", + "garages", + "ger", + "glasshouse", + "government", + "grandstand", + "greenhouse", + "guardhouse", + "hangar", + "hospital", + "hotel", + "house", + "houseboat", + "hut", + "industrial", + "kindergarten", + "kiosk", + "library", + "manufacture", + "military", + "monastery", + "mosque", + "office", + "outbuilding", + "parking", + "pavilion", + "post_office", + "presbytery", + "public", + "religious", + "residential", + "retail", + "roof", + "school", + "semi", + "semidetached_house", + "service", + "shed", + "shrine", + "silo", + "slurry_tank", + "sports_centre", + "sports_hall", + "stable", + "stadium", + "static_caravan", + "stilt_house", + "storage_tank", + "sty", + "supermarket", + "synagogue", + "temple", + "terrace", + "toilets", + "train_station", + "transformer_tower", + "transportation", + "trullo", + "university", + "warehouse", + "wayside_shrine", + ], + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _height_check() -> Check: + return Check( + field="height", + name="bounds", + expr=check_bounds(F.col("height"), gt=0.0), + shape=CheckShape.SCALAR, + root_field="height", + ) + + +def _num_floors_check() -> Check: + return Check( + field="num_floors", + name="bounds", + expr=check_bounds(F.col("num_floors"), gt=0), + shape=CheckShape.SCALAR, + root_field="num_floors", + ) + + +def _num_floors_underground_check() -> Check: + return Check( + field="num_floors_underground", + name="bounds", + expr=check_bounds(F.col("num_floors_underground"), gt=0), + shape=CheckShape.SCALAR, + root_field="num_floors_underground", + ) + + +def _min_floor_check() -> Check: + return Check( + field="min_floor", + name="bounds", + expr=check_bounds(F.col("min_floor"), gt=0), + shape=CheckShape.SCALAR, + root_field="min_floor", + ) + + +def _facade_color_check() -> Check: + return Check( + field="facade_color", + name="hex_color", + expr=check_pattern( + F.col("facade_color"), + "^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?\\z", + label="Hexadecimal color code in format #RGB or #RRGGBB", + ), + shape=CheckShape.SCALAR, + root_field="facade_color", + ) + + +def _facade_material_check() -> Check: + return Check( + field="facade_material", + name="enum", + expr=check_enum( + F.col("facade_material"), + [ + "brick", + "cement_block", + "clay", + "concrete", + "glass", + "metal", + "plaster", + "plastic", + "stone", + "timber_framing", + "wood", + ], + ), + shape=CheckShape.SCALAR, + root_field="facade_material", + ) + + +def _roof_material_check() -> Check: + return Check( + field="roof_material", + name="enum", + expr=check_enum( + F.col("roof_material"), + [ + "concrete", + "copper", + "eternit", + "glass", + "grass", + "gravel", + "metal", + "plastic", + "roof_tiles", + "slate", + "solar_panels", + "tar_paper", + "thatch", + "wood", + ], + ), + shape=CheckShape.SCALAR, + root_field="roof_material", + ) + + +def _roof_shape_check() -> Check: + return Check( + field="roof_shape", + name="enum", + expr=check_enum( + F.col("roof_shape"), + [ + "dome", + "flat", + "gabled", + "gambrel", + "half_hipped", + "hipped", + "mansard", + "onion", + "pyramidal", + "round", + "saltbox", + "sawtooth", + "skillion", + "spherical", + ], + ), + shape=CheckShape.SCALAR, + root_field="roof_shape", + ) + + +def _roof_direction_bounds_check() -> Check: + return Check( + field="roof_direction", + name="bounds", + expr=check_bounds(F.col("roof_direction"), ge=0.0), + shape=CheckShape.SCALAR, + root_field="roof_direction", + ) + + +def _roof_direction_bounds_check_1() -> Check: + return Check( + field="roof_direction", + name="bounds", + expr=check_bounds(F.col("roof_direction"), lt=360.0), + shape=CheckShape.SCALAR, + root_field="roof_direction", + ) + + +def _roof_orientation_check() -> Check: + return Check( + field="roof_orientation", + name="enum", + expr=check_enum(F.col("roof_orientation"), ["across", "along"]), + shape=CheckShape.SCALAR, + root_field="roof_orientation", + ) + + +def _roof_color_check() -> Check: + return Check( + field="roof_color", + name="hex_color", + expr=check_pattern( + F.col("roof_color"), + "^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?\\z", + label="Hexadecimal color code in format #RGB or #RRGGBB", + ), + shape=CheckShape.SCALAR, + root_field="roof_color", + ) + + +def building_checks() -> list[Check]: + """All validation checks for building.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_check(), + _class_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _height_check(), + _num_floors_check(), + _num_floors_underground_check(), + _min_floor_check(), + _facade_color_check(), + _facade_material_check(), + _roof_material_check(), + _roof_shape_check(), + _roof_direction_bounds_check(), + _roof_direction_bounds_check_1(), + _roof_orientation_check(), + _roof_color_check(), + ] + + +BUILDING_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField("class", StringType(), True), + StructField("has_parts", BooleanType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("level", IntegerType(), True), + StructField("height", DoubleType(), True), + StructField("is_underground", BooleanType(), True), + StructField("num_floors", IntegerType(), True), + StructField("num_floors_underground", IntegerType(), True), + StructField("min_height", DoubleType(), True), + StructField("min_floor", IntegerType(), True), + StructField("facade_color", StringType(), True), + StructField("facade_material", StringType(), True), + StructField("roof_material", StringType(), True), + StructField("roof_shape", StringType(), True), + StructField("roof_direction", DoubleType(), True), + StructField("roof_orientation", StringType(), True), + StructField("roof_color", StringType(), True), + StructField("roof_height", DoubleType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.MULTI_POLYGON, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.buildings:Building" + +PARTITIONS: dict[str, str] = {"theme": "buildings"} + +FEATURE_VALIDATION = FeatureValidation( + schema=BUILDING_SCHEMA, + checks=building_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py new file mode 100644 index 000000000..8a3a96eec --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py @@ -0,0 +1,930 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Building Part validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["buildings"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["building_part"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _building_id_required_check() -> Check: + return Check( + field="building_id", + name="required", + expr=check_required(F.col("building_id")), + shape=CheckShape.SCALAR, + root_field="building_id", + ) + + +def _building_id_string_min_length_check() -> Check: + return Check( + field="building_id", + name="string_min_length", + expr=check_string_min_length(F.col("building_id"), 1), + shape=CheckShape.SCALAR, + root_field="building_id", + ) + + +def _building_id_no_whitespace_check() -> Check: + return Check( + field="building_id", + name="no_whitespace", + expr=check_pattern( + F.col("building_id"), + "^\\S+\\z", + label="String without whitespace characters", + ), + shape=CheckShape.SCALAR, + root_field="building_id", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _height_check() -> Check: + return Check( + field="height", + name="bounds", + expr=check_bounds(F.col("height"), gt=0.0), + shape=CheckShape.SCALAR, + root_field="height", + ) + + +def _num_floors_check() -> Check: + return Check( + field="num_floors", + name="bounds", + expr=check_bounds(F.col("num_floors"), gt=0), + shape=CheckShape.SCALAR, + root_field="num_floors", + ) + + +def _num_floors_underground_check() -> Check: + return Check( + field="num_floors_underground", + name="bounds", + expr=check_bounds(F.col("num_floors_underground"), gt=0), + shape=CheckShape.SCALAR, + root_field="num_floors_underground", + ) + + +def _min_floor_check() -> Check: + return Check( + field="min_floor", + name="bounds", + expr=check_bounds(F.col("min_floor"), gt=0), + shape=CheckShape.SCALAR, + root_field="min_floor", + ) + + +def _facade_color_check() -> Check: + return Check( + field="facade_color", + name="hex_color", + expr=check_pattern( + F.col("facade_color"), + "^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?\\z", + label="Hexadecimal color code in format #RGB or #RRGGBB", + ), + shape=CheckShape.SCALAR, + root_field="facade_color", + ) + + +def _facade_material_check() -> Check: + return Check( + field="facade_material", + name="enum", + expr=check_enum( + F.col("facade_material"), + [ + "brick", + "cement_block", + "clay", + "concrete", + "glass", + "metal", + "plaster", + "plastic", + "stone", + "timber_framing", + "wood", + ], + ), + shape=CheckShape.SCALAR, + root_field="facade_material", + ) + + +def _roof_material_check() -> Check: + return Check( + field="roof_material", + name="enum", + expr=check_enum( + F.col("roof_material"), + [ + "concrete", + "copper", + "eternit", + "glass", + "grass", + "gravel", + "metal", + "plastic", + "roof_tiles", + "slate", + "solar_panels", + "tar_paper", + "thatch", + "wood", + ], + ), + shape=CheckShape.SCALAR, + root_field="roof_material", + ) + + +def _roof_shape_check() -> Check: + return Check( + field="roof_shape", + name="enum", + expr=check_enum( + F.col("roof_shape"), + [ + "dome", + "flat", + "gabled", + "gambrel", + "half_hipped", + "hipped", + "mansard", + "onion", + "pyramidal", + "round", + "saltbox", + "sawtooth", + "skillion", + "spherical", + ], + ), + shape=CheckShape.SCALAR, + root_field="roof_shape", + ) + + +def _roof_direction_bounds_check() -> Check: + return Check( + field="roof_direction", + name="bounds", + expr=check_bounds(F.col("roof_direction"), ge=0.0), + shape=CheckShape.SCALAR, + root_field="roof_direction", + ) + + +def _roof_direction_bounds_check_1() -> Check: + return Check( + field="roof_direction", + name="bounds", + expr=check_bounds(F.col("roof_direction"), lt=360.0), + shape=CheckShape.SCALAR, + root_field="roof_direction", + ) + + +def _roof_orientation_check() -> Check: + return Check( + field="roof_orientation", + name="enum", + expr=check_enum(F.col("roof_orientation"), ["across", "along"]), + shape=CheckShape.SCALAR, + root_field="roof_orientation", + ) + + +def _roof_color_check() -> Check: + return Check( + field="roof_color", + name="hex_color", + expr=check_pattern( + F.col("roof_color"), + "^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?\\z", + label="Hexadecimal color code in format #RGB or #RRGGBB", + ), + shape=CheckShape.SCALAR, + root_field="roof_color", + ) + + +def building_part_checks() -> list[Check]: + """All validation checks for building_part.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _building_id_required_check(), + _building_id_string_min_length_check(), + _building_id_no_whitespace_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _height_check(), + _num_floors_check(), + _num_floors_underground_check(), + _min_floor_check(), + _facade_color_check(), + _facade_material_check(), + _roof_material_check(), + _roof_shape_check(), + _roof_direction_bounds_check(), + _roof_direction_bounds_check_1(), + _roof_orientation_check(), + _roof_color_check(), + ] + + +BUILDING_PART_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("building_id", StringType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("level", IntegerType(), True), + StructField("height", DoubleType(), True), + StructField("is_underground", BooleanType(), True), + StructField("num_floors", IntegerType(), True), + StructField("num_floors_underground", IntegerType(), True), + StructField("min_height", DoubleType(), True), + StructField("min_floor", IntegerType(), True), + StructField("facade_color", StringType(), True), + StructField("facade_material", StringType(), True), + StructField("roof_material", StringType(), True), + StructField("roof_shape", StringType(), True), + StructField("roof_direction", DoubleType(), True), + StructField("roof_orientation", StringType(), True), + StructField("roof_color", StringType(), True), + StructField("roof_height", DoubleType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.MULTI_POLYGON, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.buildings:BuildingPart" + +PARTITIONS: dict[str, str] = {"theme": "buildings"} + +FEATURE_VALIDATION = FeatureValidation( + schema=BUILDING_PART_SCHEMA, + checks=building_part_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py new file mode 100644 index 000000000..cd42da918 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py @@ -0,0 +1,1550 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Division validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_forbid_if, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_require_if, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _cartography_prominence_bounds_check() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), ge=1), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_prominence_bounds_check_1() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), le=100), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _names_check() -> Check: + return Check( + field="names", + name="required", + expr=check_required(F.col("names")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=check_required(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["divisions"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["division"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "country", + "dependency", + "macroregion", + "region", + "macrocounty", + "county", + "localadmin", + "locality", + "borough", + "macrohood", + "neighborhood", + "microhood", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _country_required_check() -> Check: + return Check( + field="country", + name="required", + expr=check_required(F.col("country")), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _country_country_code_alpha2_check() -> Check: + return Check( + field="country", + name="country_code_alpha2", + expr=check_pattern( + F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _hierarchies_check() -> Check: + return Check( + field="hierarchies", + name="required", + expr=check_required(F.col("hierarchies")), + shape=CheckShape.SCALAR, + root_field="hierarchies", + ) + + +def _hierarchies_min_length_check() -> Check: + return Check( + field="hierarchies_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("hierarchies"), 1), + shape=CheckShape.SCALAR, + root_field="hierarchies", + ) + + +def _hierarchies_unique_check() -> Check: + return Check( + field="hierarchies_unique", + name="struct_unique", + expr=check_struct_unique(F.col("hierarchies")), + shape=CheckShape.SCALAR, + root_field="hierarchies", + ) + + +def _hierarchies_min_length_check_1() -> Check: + return Check( + field="hierarchies[]_min_length", + name="array_min_length", + expr=array_check("hierarchies", lambda el: check_array_min_length(el, 1)), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_unique_check_1() -> Check: + return Check( + field="hierarchies[]_unique", + name="struct_unique", + expr=array_check("hierarchies", lambda el: check_struct_unique(el)), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_division_id_required_check() -> Check: + return Check( + field="hierarchies[][].division_id", + name="required", + expr=nested_array_check( + "hierarchies", + lambda el: array_check( + el, lambda inner: check_required(inner["division_id"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_division_id_string_min_length_check() -> Check: + return Check( + field="hierarchies[][].division_id", + name="string_min_length", + expr=nested_array_check( + "hierarchies", + lambda el: array_check( + el, lambda inner: check_string_min_length(inner["division_id"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_division_id_no_whitespace_check() -> Check: + return Check( + field="hierarchies[][].division_id", + name="no_whitespace", + expr=nested_array_check( + "hierarchies", + lambda el: array_check( + el, + lambda inner: check_pattern( + inner["division_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_subtype_required_check() -> Check: + return Check( + field="hierarchies[][].subtype", + name="required", + expr=nested_array_check( + "hierarchies", + lambda el: array_check(el, lambda inner: check_required(inner["subtype"])), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_subtype_enum_check() -> Check: + return Check( + field="hierarchies[][].subtype", + name="enum", + expr=nested_array_check( + "hierarchies", + lambda el: array_check( + el, + lambda inner: check_enum( + inner["subtype"], + [ + "country", + "dependency", + "macroregion", + "region", + "macrocounty", + "county", + "localadmin", + "locality", + "borough", + "macrohood", + "neighborhood", + "microhood", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_name_required_check() -> Check: + return Check( + field="hierarchies[][].name", + name="required", + expr=nested_array_check( + "hierarchies", + lambda el: array_check(el, lambda inner: check_required(inner["name"])), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_name_string_min_length_check() -> Check: + return Check( + field="hierarchies[][].name", + name="string_min_length", + expr=nested_array_check( + "hierarchies", + lambda el: array_check( + el, lambda inner: check_string_min_length(inner["name"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_name_stripped_check() -> Check: + return Check( + field="hierarchies[][].name", + name="stripped", + expr=nested_array_check( + "hierarchies", + lambda el: array_check(el, lambda inner: check_stripped(inner["name"])), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _parent_division_id_string_min_length_check() -> Check: + return Check( + field="parent_division_id", + name="string_min_length", + expr=check_string_min_length(F.col("parent_division_id"), 1), + shape=CheckShape.SCALAR, + root_field="parent_division_id", + ) + + +def _parent_division_id_no_whitespace_check() -> Check: + return Check( + field="parent_division_id", + name="no_whitespace", + expr=check_pattern( + F.col("parent_division_id"), + "^\\S+\\z", + label="String without whitespace characters", + ), + shape=CheckShape.SCALAR, + root_field="parent_division_id", + ) + + +def _admin_level_bounds_check() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), ge=0), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _admin_level_bounds_check_1() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), le=16), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _class_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), ["megacity", "city", "town", "village", "hamlet"] + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _region_check() -> Check: + return Check( + field="region", + name="region_code", + expr=check_pattern( + F.col("region"), + "^[A-Z]{2}-[A-Z0-9]{1,3}\\z", + label="ISO 3166-2 subdivision code", + ), + shape=CheckShape.SCALAR, + root_field="region", + ) + + +def _perspectives_mode_required_check() -> Check: + return Check( + field="perspectives.mode", + name="required", + expr=F.when( + F.col("perspectives").isNotNull(), + check_required(F.col("perspectives.mode")), + ), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_mode_enum_check() -> Check: + return Check( + field="perspectives.mode", + name="enum", + expr=check_enum(F.col("perspectives.mode"), ["accepted_by", "disputed_by"]), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_check() -> Check: + return Check( + field="perspectives.countries", + name="required", + expr=F.when( + F.col("perspectives").isNotNull(), + check_required(F.col("perspectives.countries")), + ), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_min_length_check() -> Check: + return Check( + field="perspectives.countries_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("perspectives.countries"), 1), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_unique_check() -> Check: + return Check( + field="perspectives.countries_unique", + name="struct_unique", + expr=check_struct_unique(F.col("perspectives.countries")), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_check_1() -> Check: + return Check( + field="perspectives.countries[]", + name="country_code_alpha2", + expr=array_check( + "perspectives.countries", + lambda el: check_pattern( + el, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + shape=CheckShape.ARRAY, + root_field="perspectives", + ) + + +def _norms_driving_side_check() -> Check: + return Check( + field="norms.driving_side", + name="enum", + expr=check_enum(F.col("norms.driving_side"), ["left", "right"]), + shape=CheckShape.SCALAR, + root_field="norms", + ) + + +def _population_check() -> Check: + return Check( + field="population", + name="bounds", + expr=check_bounds(F.col("population"), ge=0), + shape=CheckShape.SCALAR, + root_field="population", + ) + + +def _capital_division_ids_min_length_check() -> Check: + return Check( + field="capital_division_ids_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("capital_division_ids"), 1), + shape=CheckShape.SCALAR, + root_field="capital_division_ids", + ) + + +def _capital_division_ids_unique_check() -> Check: + return Check( + field="capital_division_ids_unique", + name="struct_unique", + expr=check_struct_unique(F.col("capital_division_ids")), + shape=CheckShape.SCALAR, + root_field="capital_division_ids", + ) + + +def _capital_division_ids_string_min_length_check() -> Check: + return Check( + field="capital_division_ids[]", + name="string_min_length", + expr=array_check( + "capital_division_ids", lambda el: check_string_min_length(el, 1) + ), + shape=CheckShape.ARRAY, + root_field="capital_division_ids", + ) + + +def _capital_division_ids_no_whitespace_check() -> Check: + return Check( + field="capital_division_ids[]", + name="no_whitespace", + expr=array_check( + "capital_division_ids", + lambda el: check_pattern( + el, "^\\S+\\z", label="String without whitespace characters" + ), + ), + shape=CheckShape.ARRAY, + root_field="capital_division_ids", + ) + + +def _capital_of_divisions_min_length_check() -> Check: + return Check( + field="capital_of_divisions_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("capital_of_divisions"), 1), + shape=CheckShape.SCALAR, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_unique_check() -> Check: + return Check( + field="capital_of_divisions_unique", + name="struct_unique", + expr=check_struct_unique(F.col("capital_of_divisions")), + shape=CheckShape.SCALAR, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_division_id_required_check() -> Check: + return Check( + field="capital_of_divisions[].division_id", + name="required", + expr=array_check( + "capital_of_divisions", lambda el: check_required(el["division_id"]) + ), + shape=CheckShape.ARRAY, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_division_id_string_min_length_check() -> Check: + return Check( + field="capital_of_divisions[].division_id", + name="string_min_length", + expr=array_check( + "capital_of_divisions", + lambda el: check_string_min_length(el["division_id"], 1), + ), + shape=CheckShape.ARRAY, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_division_id_no_whitespace_check() -> Check: + return Check( + field="capital_of_divisions[].division_id", + name="no_whitespace", + expr=array_check( + "capital_of_divisions", + lambda el: check_pattern( + el["division_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + shape=CheckShape.ARRAY, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_subtype_required_check() -> Check: + return Check( + field="capital_of_divisions[].subtype", + name="required", + expr=array_check( + "capital_of_divisions", lambda el: check_required(el["subtype"]) + ), + shape=CheckShape.ARRAY, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_subtype_enum_check() -> Check: + return Check( + field="capital_of_divisions[].subtype", + name="enum", + expr=array_check( + "capital_of_divisions", + lambda el: check_enum( + el["subtype"], + [ + "country", + "dependency", + "macroregion", + "region", + "macrocounty", + "county", + "localadmin", + "locality", + "borough", + "macrohood", + "neighborhood", + "microhood", + ], + ), + ), + shape=CheckShape.ARRAY, + root_field="capital_of_divisions", + ) + + +def _wikidata_check() -> Check: + return Check( + field="wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="wikidata", + ) + + +def _check_require_if_0_check() -> Check: + return Check( + field="admin_level_required_0", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "county", "subtype = 'county'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_1_check() -> Check: + return Check( + field="admin_level_required_1", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macrocounty", + "subtype = 'macrocounty'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_2_check() -> Check: + return Check( + field="admin_level_required_2", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "region", "subtype = 'region'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_3_check() -> Check: + return Check( + field="admin_level_required_3", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macroregion", + "subtype = 'macroregion'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_4_check() -> Check: + return Check( + field="admin_level_required_4", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "dependency", + "subtype = 'dependency'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_5_check() -> Check: + return Check( + field="admin_level_required_5", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "country", "subtype = 'country'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_6_check() -> Check: + return Check( + field="parent_division_id_required", + name="require_if", + expr=check_require_if( + F.col("parent_division_id"), + F.col("subtype") != "country", + "subtype != 'country'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_7_check() -> Check: + return Check( + field="parent_division_id_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("parent_division_id"), + F.col("subtype") == "country", + "subtype = 'country'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def division_checks() -> list[Check]: + """All validation checks for division.""" + return [ + _cartography_prominence_bounds_check(), + _cartography_prominence_bounds_check_1(), + _cartography_min_zoom_bounds_check(), + _cartography_min_zoom_bounds_check_1(), + _cartography_max_zoom_bounds_check(), + _cartography_max_zoom_bounds_check_1(), + _names_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_required_check(), + _subtype_enum_check(), + _country_required_check(), + _country_country_code_alpha2_check(), + _hierarchies_check(), + _hierarchies_min_length_check(), + _hierarchies_unique_check(), + _hierarchies_min_length_check_1(), + _hierarchies_unique_check_1(), + _hierarchies_division_id_required_check(), + _hierarchies_division_id_string_min_length_check(), + _hierarchies_division_id_no_whitespace_check(), + _hierarchies_subtype_required_check(), + _hierarchies_subtype_enum_check(), + _hierarchies_name_required_check(), + _hierarchies_name_string_min_length_check(), + _hierarchies_name_stripped_check(), + _parent_division_id_string_min_length_check(), + _parent_division_id_no_whitespace_check(), + _admin_level_bounds_check(), + _admin_level_bounds_check_1(), + _class_check(), + _region_check(), + _perspectives_mode_required_check(), + _perspectives_mode_enum_check(), + _perspectives_countries_check(), + _perspectives_countries_min_length_check(), + _perspectives_countries_unique_check(), + _perspectives_countries_check_1(), + _norms_driving_side_check(), + _population_check(), + _capital_division_ids_min_length_check(), + _capital_division_ids_unique_check(), + _capital_division_ids_string_min_length_check(), + _capital_division_ids_no_whitespace_check(), + _capital_of_divisions_min_length_check(), + _capital_of_divisions_unique_check(), + _capital_of_divisions_division_id_required_check(), + _capital_of_divisions_division_id_string_min_length_check(), + _capital_of_divisions_division_id_no_whitespace_check(), + _capital_of_divisions_subtype_required_check(), + _capital_of_divisions_subtype_enum_check(), + _wikidata_check(), + _check_require_if_0_check(), + _check_require_if_1_check(), + _check_require_if_2_check(), + _check_require_if_3_check(), + _check_require_if_4_check(), + _check_require_if_5_check(), + _check_require_if_6_check(), + _check_forbid_if_7_check(), + ] + + +DIVISION_SCHEMA = StructType( + [ + StructField( + "cartography", + StructType( + [ + StructField("prominence", IntegerType(), True), + StructField("min_zoom", IntegerType(), True), + StructField("max_zoom", IntegerType(), True), + StructField("sort_key", IntegerType(), True), + ] + ), + True, + ), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField("country", StringType(), True), + StructField( + "hierarchies", + ArrayType( + ArrayType( + StructType( + [ + StructField("division_id", StringType(), True), + StructField("subtype", StringType(), True), + StructField("name", StringType(), True), + ] + ), + True, + ), + True, + ), + True, + ), + StructField("parent_division_id", StringType(), True), + StructField("admin_level", IntegerType(), True), + StructField("class", StringType(), True), + StructField("local_type", MapType(StringType(), StringType(), True), True), + StructField("region", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField("countries", ArrayType(StringType(), True), True), + ] + ), + True, + ), + StructField( + "norms", StructType([StructField("driving_side", StringType(), True)]), True + ), + StructField("population", IntegerType(), True), + StructField("capital_division_ids", ArrayType(StringType(), True), True), + StructField( + "capital_of_divisions", + ArrayType( + StructType( + [ + StructField("division_id", StringType(), True), + StructField("subtype", StringType(), True), + ] + ), + True, + ), + True, + ), + StructField("wikidata", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.POINT,) + +ENTRY_POINT = "overture.schema.divisions:Division" + +PARTITIONS: dict[str, str] = {"theme": "divisions"} + +FEATURE_VALIDATION = FeatureValidation( + schema=DIVISION_SCHEMA, + checks=division_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py new file mode 100644 index 000000000..dffe20713 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py @@ -0,0 +1,962 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Division Area validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_radio_group, + check_require_if, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _names_check() -> Check: + return Check( + field="names", + name="required", + expr=check_required(F.col("names")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=check_required(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["divisions"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["division_area"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "country", + "dependency", + "macroregion", + "region", + "macrocounty", + "county", + "localadmin", + "locality", + "borough", + "macrohood", + "neighborhood", + "microhood", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _class_required_check() -> Check: + return Check( + field="class", + name="required", + expr=check_required(F.col("class")), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum(F.col("class"), ["land", "maritime"]), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _division_id_required_check() -> Check: + return Check( + field="division_id", + name="required", + expr=check_required(F.col("division_id")), + shape=CheckShape.SCALAR, + root_field="division_id", + ) + + +def _division_id_string_min_length_check() -> Check: + return Check( + field="division_id", + name="string_min_length", + expr=check_string_min_length(F.col("division_id"), 1), + shape=CheckShape.SCALAR, + root_field="division_id", + ) + + +def _division_id_no_whitespace_check() -> Check: + return Check( + field="division_id", + name="no_whitespace", + expr=check_pattern( + F.col("division_id"), + "^\\S+\\z", + label="String without whitespace characters", + ), + shape=CheckShape.SCALAR, + root_field="division_id", + ) + + +def _country_required_check() -> Check: + return Check( + field="country", + name="required", + expr=check_required(F.col("country")), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _country_country_code_alpha2_check() -> Check: + return Check( + field="country", + name="country_code_alpha2", + expr=check_pattern( + F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _region_check() -> Check: + return Check( + field="region", + name="region_code", + expr=check_pattern( + F.col("region"), + "^[A-Z]{2}-[A-Z0-9]{1,3}\\z", + label="ISO 3166-2 subdivision code", + ), + shape=CheckShape.SCALAR, + root_field="region", + ) + + +def _admin_level_bounds_check() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), ge=0), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _admin_level_bounds_check_1() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), le=16), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _check_radio_group_0_check() -> Check: + return Check( + field="radio_group", + name="radio_group", + expr=check_radio_group( + [F.col("is_land"), F.col("is_territorial")], ["is_land", "is_territorial"] + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_1_check() -> Check: + return Check( + field="admin_level_required_0", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "county", "subtype = 'county'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_2_check() -> Check: + return Check( + field="admin_level_required_1", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macrocounty", + "subtype = 'macrocounty'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_3_check() -> Check: + return Check( + field="admin_level_required_2", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "region", "subtype = 'region'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_4_check() -> Check: + return Check( + field="admin_level_required_3", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macroregion", + "subtype = 'macroregion'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_5_check() -> Check: + return Check( + field="admin_level_required_4", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "dependency", + "subtype = 'dependency'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_6_check() -> Check: + return Check( + field="admin_level_required_5", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "country", "subtype = 'country'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def division_area_checks() -> list[Check]: + """All validation checks for division_area.""" + return [ + _names_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_required_check(), + _subtype_enum_check(), + _class_required_check(), + _class_enum_check(), + _division_id_required_check(), + _division_id_string_min_length_check(), + _division_id_no_whitespace_check(), + _country_required_check(), + _country_country_code_alpha2_check(), + _region_check(), + _admin_level_bounds_check(), + _admin_level_bounds_check_1(), + _check_radio_group_0_check(), + _check_require_if_1_check(), + _check_require_if_2_check(), + _check_require_if_3_check(), + _check_require_if_4_check(), + _check_require_if_5_check(), + _check_require_if_6_check(), + ] + + +DIVISION_AREA_SCHEMA = StructType( + [ + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField("class", StringType(), True), + StructField("is_land", BooleanType(), True), + StructField("is_territorial", BooleanType(), True), + StructField("division_id", StringType(), True), + StructField("country", StringType(), True), + StructField("region", StringType(), True), + StructField("admin_level", IntegerType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.MULTI_POLYGON, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.divisions:DivisionArea" + +PARTITIONS: dict[str, str] = {"theme": "divisions"} + +FEATURE_VALIDATION = FeatureValidation( + schema=DIVISION_AREA_SCHEMA, + checks=division_area_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py new file mode 100644 index 000000000..68c7b1f62 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py @@ -0,0 +1,782 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Division Boundary validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_max_length, + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_forbid_if, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_radio_group, + check_require_if, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.LINE_STRING, GeometryType.MULTI_LINE_STRING + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["divisions"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["division_boundary"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "country", + "dependency", + "macroregion", + "region", + "macrocounty", + "county", + "localadmin", + "locality", + "borough", + "macrohood", + "neighborhood", + "microhood", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _class_required_check() -> Check: + return Check( + field="class", + name="required", + expr=check_required(F.col("class")), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum(F.col("class"), ["land", "maritime"]), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _division_ids_check() -> Check: + return Check( + field="division_ids", + name="required", + expr=check_required(F.col("division_ids")), + shape=CheckShape.SCALAR, + root_field="division_ids", + ) + + +def _division_ids_min_length_check() -> Check: + return Check( + field="division_ids_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("division_ids"), 2), + shape=CheckShape.SCALAR, + root_field="division_ids", + ) + + +def _division_ids_max_length_check() -> Check: + return Check( + field="division_ids_max_length", + name="array_max_length", + expr=check_array_max_length(F.col("division_ids"), 2), + shape=CheckShape.SCALAR, + root_field="division_ids", + ) + + +def _division_ids_unique_check() -> Check: + return Check( + field="division_ids_unique", + name="struct_unique", + expr=check_struct_unique(F.col("division_ids")), + shape=CheckShape.SCALAR, + root_field="division_ids", + ) + + +def _division_ids_string_min_length_check() -> Check: + return Check( + field="division_ids[]", + name="string_min_length", + expr=array_check("division_ids", lambda el: check_string_min_length(el, 1)), + shape=CheckShape.ARRAY, + root_field="division_ids", + ) + + +def _division_ids_no_whitespace_check() -> Check: + return Check( + field="division_ids[]", + name="no_whitespace", + expr=array_check( + "division_ids", + lambda el: check_pattern( + el, "^\\S+\\z", label="String without whitespace characters" + ), + ), + shape=CheckShape.ARRAY, + root_field="division_ids", + ) + + +def _country_check() -> Check: + return Check( + field="country", + name="country_code_alpha2", + expr=check_pattern( + F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _region_check() -> Check: + return Check( + field="region", + name="region_code", + expr=check_pattern( + F.col("region"), + "^[A-Z]{2}-[A-Z0-9]{1,3}\\z", + label="ISO 3166-2 subdivision code", + ), + shape=CheckShape.SCALAR, + root_field="region", + ) + + +def _admin_level_bounds_check() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), ge=0), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _admin_level_bounds_check_1() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), le=16), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _perspectives_mode_required_check() -> Check: + return Check( + field="perspectives.mode", + name="required", + expr=F.when( + F.col("perspectives").isNotNull(), + check_required(F.col("perspectives.mode")), + ), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_mode_enum_check() -> Check: + return Check( + field="perspectives.mode", + name="enum", + expr=check_enum(F.col("perspectives.mode"), ["accepted_by", "disputed_by"]), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_check() -> Check: + return Check( + field="perspectives.countries", + name="required", + expr=F.when( + F.col("perspectives").isNotNull(), + check_required(F.col("perspectives.countries")), + ), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_min_length_check() -> Check: + return Check( + field="perspectives.countries_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("perspectives.countries"), 1), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_unique_check() -> Check: + return Check( + field="perspectives.countries_unique", + name="struct_unique", + expr=check_struct_unique(F.col("perspectives.countries")), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_check_1() -> Check: + return Check( + field="perspectives.countries[]", + name="country_code_alpha2", + expr=array_check( + "perspectives.countries", + lambda el: check_pattern( + el, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + shape=CheckShape.ARRAY, + root_field="perspectives", + ) + + +def _check_radio_group_0_check() -> Check: + return Check( + field="radio_group", + name="radio_group", + expr=check_radio_group( + [F.col("is_land"), F.col("is_territorial")], ["is_land", "is_territorial"] + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_1_check() -> Check: + return Check( + field="admin_level_required_0", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "county", "subtype = 'county'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_2_check() -> Check: + return Check( + field="admin_level_required_1", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macrocounty", + "subtype = 'macrocounty'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_3_check() -> Check: + return Check( + field="admin_level_required_2", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "region", "subtype = 'region'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_4_check() -> Check: + return Check( + field="admin_level_required_3", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macroregion", + "subtype = 'macroregion'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_5_check() -> Check: + return Check( + field="admin_level_required_4", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "dependency", + "subtype = 'dependency'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_6_check() -> Check: + return Check( + field="admin_level_required_5", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "country", "subtype = 'country'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_7_check() -> Check: + return Check( + field="country_required", + name="require_if", + expr=check_require_if( + F.col("country"), F.col("subtype") != "country", "subtype != 'country'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_8_check() -> Check: + return Check( + field="country_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("country"), F.col("subtype") == "country", "subtype = 'country'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def division_boundary_checks() -> list[Check]: + """All validation checks for division_boundary.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_required_check(), + _subtype_enum_check(), + _class_required_check(), + _class_enum_check(), + _division_ids_check(), + _division_ids_min_length_check(), + _division_ids_max_length_check(), + _division_ids_unique_check(), + _division_ids_string_min_length_check(), + _division_ids_no_whitespace_check(), + _country_check(), + _region_check(), + _admin_level_bounds_check(), + _admin_level_bounds_check_1(), + _perspectives_mode_required_check(), + _perspectives_mode_enum_check(), + _perspectives_countries_check(), + _perspectives_countries_min_length_check(), + _perspectives_countries_unique_check(), + _perspectives_countries_check_1(), + _check_radio_group_0_check(), + _check_require_if_1_check(), + _check_require_if_2_check(), + _check_require_if_3_check(), + _check_require_if_4_check(), + _check_require_if_5_check(), + _check_require_if_6_check(), + _check_require_if_7_check(), + _check_forbid_if_8_check(), + ] + + +DIVISION_BOUNDARY_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField("class", StringType(), True), + StructField("is_land", BooleanType(), True), + StructField("is_territorial", BooleanType(), True), + StructField("division_ids", ArrayType(StringType(), True), True), + StructField("country", StringType(), True), + StructField("region", StringType(), True), + StructField("admin_level", IntegerType(), True), + StructField("is_disputed", BooleanType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField("countries", ArrayType(StringType(), True), True), + ] + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.LINE_STRING, + GeometryType.MULTI_LINE_STRING, +) + +ENTRY_POINT = "overture.schema.divisions:DivisionBoundary" + +PARTITIONS: dict[str, str] = {"theme": "divisions"} + +FEATURE_VALIDATION = FeatureValidation( + schema=DIVISION_BOUNDARY_SCHEMA, + checks=division_boundary_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py new file mode 100644 index 000000000..c9d448f6a --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py @@ -0,0 +1,1505 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Place validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_email, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, + check_url_format, + check_url_length, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["places"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["place"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _operating_status_check() -> Check: + return Check( + field="operating_status", + name="enum", + expr=check_enum( + F.col("operating_status"), + ["open", "permanently_closed", "temporarily_closed"], + ), + shape=CheckShape.SCALAR, + root_field="operating_status", + ) + + +def _categories_primary_required_check() -> Check: + return Check( + field="categories.primary", + name="required", + expr=F.when( + F.col("categories").isNotNull(), check_required(F.col("categories.primary")) + ), + shape=CheckShape.SCALAR, + root_field="categories", + ) + + +def _categories_primary_snake_case_check() -> Check: + return Check( + field="categories.primary", + name="snake_case", + expr=check_pattern( + F.col("categories.primary"), + "^[a-z0-9]+(_[a-z0-9]+)*\\z", + label="Category in snake_case format", + ), + shape=CheckShape.SCALAR, + root_field="categories", + ) + + +def _categories_alternate_unique_check() -> Check: + return Check( + field="categories.alternate_unique", + name="struct_unique", + expr=check_struct_unique(F.col("categories.alternate")), + shape=CheckShape.SCALAR, + root_field="categories", + ) + + +def _categories_alternate_check() -> Check: + return Check( + field="categories.alternate[]", + name="snake_case", + expr=array_check( + "categories.alternate", + lambda el: check_pattern( + el, "^[a-z0-9]+(_[a-z0-9]+)*\\z", label="Category in snake_case format" + ), + ), + shape=CheckShape.ARRAY, + root_field="categories", + ) + + +def _basic_category_check() -> Check: + return Check( + field="basic_category", + name="snake_case", + expr=check_pattern( + F.col("basic_category"), + "^[a-z0-9]+(_[a-z0-9]+)*\\z", + label="Category in snake_case format", + ), + shape=CheckShape.SCALAR, + root_field="basic_category", + ) + + +def _taxonomy_primary_required_check() -> Check: + return Check( + field="taxonomy.primary", + name="required", + expr=F.when( + F.col("taxonomy").isNotNull(), check_required(F.col("taxonomy.primary")) + ), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_primary_snake_case_check() -> Check: + return Check( + field="taxonomy.primary", + name="snake_case", + expr=check_pattern( + F.col("taxonomy.primary"), + "^[a-z0-9]+(_[a-z0-9]+)*\\z", + label="Category in snake_case format", + ), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_hierarchy_check() -> Check: + return Check( + field="taxonomy.hierarchy", + name="required", + expr=F.when( + F.col("taxonomy").isNotNull(), check_required(F.col("taxonomy.hierarchy")) + ), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_hierarchy_min_length_check() -> Check: + return Check( + field="taxonomy.hierarchy_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("taxonomy.hierarchy"), 1), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_hierarchy_unique_check() -> Check: + return Check( + field="taxonomy.hierarchy_unique", + name="struct_unique", + expr=check_struct_unique(F.col("taxonomy.hierarchy")), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_hierarchy_check_1() -> Check: + return Check( + field="taxonomy.hierarchy[]", + name="snake_case", + expr=array_check( + "taxonomy.hierarchy", + lambda el: check_pattern( + el, "^[a-z0-9]+(_[a-z0-9]+)*\\z", label="Category in snake_case format" + ), + ), + shape=CheckShape.ARRAY, + root_field="taxonomy", + ) + + +def _taxonomy_alternates_min_length_check() -> Check: + return Check( + field="taxonomy.alternates_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("taxonomy.alternates"), 1), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_alternates_unique_check() -> Check: + return Check( + field="taxonomy.alternates_unique", + name="struct_unique", + expr=check_struct_unique(F.col("taxonomy.alternates")), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_alternates_check() -> Check: + return Check( + field="taxonomy.alternates[]", + name="snake_case", + expr=array_check( + "taxonomy.alternates", + lambda el: check_pattern( + el, "^[a-z0-9]+(_[a-z0-9]+)*\\z", label="Category in snake_case format" + ), + ), + shape=CheckShape.ARRAY, + root_field="taxonomy", + ) + + +def _confidence_bounds_check() -> Check: + return Check( + field="confidence", + name="bounds", + expr=check_bounds(F.col("confidence"), ge=0.0), + shape=CheckShape.SCALAR, + root_field="confidence", + ) + + +def _confidence_bounds_check_1() -> Check: + return Check( + field="confidence", + name="bounds", + expr=check_bounds(F.col("confidence"), le=1.0), + shape=CheckShape.SCALAR, + root_field="confidence", + ) + + +def _websites_min_length_check() -> Check: + return Check( + field="websites_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("websites"), 1), + shape=CheckShape.SCALAR, + root_field="websites", + ) + + +def _websites_unique_check() -> Check: + return Check( + field="websites_unique", + name="struct_unique", + expr=check_struct_unique(F.col("websites")), + shape=CheckShape.SCALAR, + root_field="websites", + ) + + +def _websites_url_format_check() -> Check: + return Check( + field="websites[]", + name="url_format", + expr=array_check("websites", lambda el: check_url_format(el)), + shape=CheckShape.ARRAY, + root_field="websites", + ) + + +def _websites_url_length_check() -> Check: + return Check( + field="websites[]", + name="url_length", + expr=array_check("websites", lambda el: check_url_length(el)), + shape=CheckShape.ARRAY, + root_field="websites", + ) + + +def _socials_min_length_check() -> Check: + return Check( + field="socials_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("socials"), 1), + shape=CheckShape.SCALAR, + root_field="socials", + ) + + +def _socials_unique_check() -> Check: + return Check( + field="socials_unique", + name="struct_unique", + expr=check_struct_unique(F.col("socials")), + shape=CheckShape.SCALAR, + root_field="socials", + ) + + +def _socials_url_format_check() -> Check: + return Check( + field="socials[]", + name="url_format", + expr=array_check("socials", lambda el: check_url_format(el)), + shape=CheckShape.ARRAY, + root_field="socials", + ) + + +def _socials_url_length_check() -> Check: + return Check( + field="socials[]", + name="url_length", + expr=array_check("socials", lambda el: check_url_length(el)), + shape=CheckShape.ARRAY, + root_field="socials", + ) + + +def _emails_min_length_check() -> Check: + return Check( + field="emails_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("emails"), 1), + shape=CheckShape.SCALAR, + root_field="emails", + ) + + +def _emails_unique_check() -> Check: + return Check( + field="emails_unique", + name="struct_unique", + expr=check_struct_unique(F.col("emails")), + shape=CheckShape.SCALAR, + root_field="emails", + ) + + +def _emails_check() -> Check: + return Check( + field="emails[]", + name="email", + expr=array_check("emails", lambda el: check_email(el)), + shape=CheckShape.ARRAY, + root_field="emails", + ) + + +def _phones_min_length_check() -> Check: + return Check( + field="phones_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("phones"), 1), + shape=CheckShape.SCALAR, + root_field="phones", + ) + + +def _phones_unique_check() -> Check: + return Check( + field="phones_unique", + name="struct_unique", + expr=check_struct_unique(F.col("phones")), + shape=CheckShape.SCALAR, + root_field="phones", + ) + + +def _phones_check() -> Check: + return Check( + field="phones[]", + name="phone_number", + expr=array_check( + "phones", + lambda el: check_pattern( + el, + "^\\+\\d{1,3}[\\s\\-\\(\\)0-9]+\\z", + label="International phone number (+ followed by country code and number)", + ), + ), + shape=CheckShape.ARRAY, + root_field="phones", + ) + + +def _brand_names_primary_required_check() -> Check: + return Check( + field="brand.names.primary", + name="required", + expr=F.when( + F.col("brand.names").isNotNull(), + check_required(F.col("brand.names.primary")), + ), + shape=CheckShape.SCALAR, + root_field="brand", + ) + + +def _brand_names_primary_string_min_length_check() -> Check: + return Check( + field="brand.names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("brand.names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="brand", + ) + + +def _brand_names_primary_stripped_check() -> Check: + return Check( + field="brand.names.primary", + name="stripped", + expr=check_stripped(F.col("brand.names.primary")), + shape=CheckShape.SCALAR, + root_field="brand", + ) + + +def _brand_names_rules_value_required_check() -> Check: + return Check( + field="brand.names.rules[].value", + name="required", + expr=array_check("brand.names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_value_string_min_length_check() -> Check: + return Check( + field="brand.names.rules[].value", + name="string_min_length", + expr=array_check( + "brand.names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_value_stripped_check() -> Check: + return Check( + field="brand.names.rules[].value", + name="stripped", + expr=array_check("brand.names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_variant_required_check() -> Check: + return Check( + field="brand.names.rules[].variant", + name="required", + expr=array_check("brand.names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_variant_enum_check() -> Check: + return Check( + field="brand.names.rules[].variant", + name="enum", + expr=array_check( + "brand.names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_language_check() -> Check: + return Check( + field="brand.names.rules[].language", + name="language_tag", + expr=array_check( + "brand.names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="brand.names.rules[].perspectives.mode", + name="required", + expr=array_check( + "brand.names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="brand.names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "brand.names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_countries_check() -> Check: + return Check( + field="brand.names.rules[].perspectives.countries", + name="required", + expr=array_check( + "brand.names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="brand.names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "brand.names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="brand.names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "brand.names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="brand.names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "brand.names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_between_linear_range_length_check() -> Check: + return Check( + field="brand.names.rules[].between", + name="linear_range_length", + expr=array_check( + "brand.names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="brand.names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "brand.names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_between_linear_range_order_check() -> Check: + return Check( + field="brand.names.rules[].between", + name="linear_range_order", + expr=array_check( + "brand.names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_side_check() -> Check: + return Check( + field="brand.names.rules[].side", + name="enum", + expr=array_check( + "brand.names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_wikidata_check() -> Check: + return Check( + field="brand.wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("brand.wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="brand", + ) + + +def _addresses_min_length_check() -> Check: + return Check( + field="addresses_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("addresses"), 1), + shape=CheckShape.SCALAR, + root_field="addresses", + ) + + +def _addresses_region_check() -> Check: + return Check( + field="addresses[].region", + name="region_code", + expr=array_check( + "addresses", + lambda el: check_pattern( + el["region"], + "^[A-Z]{2}-[A-Z0-9]{1,3}\\z", + label="ISO 3166-2 subdivision code", + ), + ), + shape=CheckShape.ARRAY, + root_field="addresses", + ) + + +def _addresses_country_check() -> Check: + return Check( + field="addresses[].country", + name="country_code_alpha2", + expr=array_check( + "addresses", + lambda el: check_pattern( + el["country"], "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + shape=CheckShape.ARRAY, + root_field="addresses", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def place_checks() -> list[Check]: + """All validation checks for place.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _operating_status_check(), + _categories_primary_required_check(), + _categories_primary_snake_case_check(), + _categories_alternate_unique_check(), + _categories_alternate_check(), + _basic_category_check(), + _taxonomy_primary_required_check(), + _taxonomy_primary_snake_case_check(), + _taxonomy_hierarchy_check(), + _taxonomy_hierarchy_min_length_check(), + _taxonomy_hierarchy_unique_check(), + _taxonomy_hierarchy_check_1(), + _taxonomy_alternates_min_length_check(), + _taxonomy_alternates_unique_check(), + _taxonomy_alternates_check(), + _confidence_bounds_check(), + _confidence_bounds_check_1(), + _websites_min_length_check(), + _websites_unique_check(), + _websites_url_format_check(), + _websites_url_length_check(), + _socials_min_length_check(), + _socials_unique_check(), + _socials_url_format_check(), + _socials_url_length_check(), + _emails_min_length_check(), + _emails_unique_check(), + _emails_check(), + _phones_min_length_check(), + _phones_unique_check(), + _phones_check(), + _brand_names_primary_required_check(), + _brand_names_primary_string_min_length_check(), + _brand_names_primary_stripped_check(), + _brand_names_rules_value_required_check(), + _brand_names_rules_value_string_min_length_check(), + _brand_names_rules_value_stripped_check(), + _brand_names_rules_variant_required_check(), + _brand_names_rules_variant_enum_check(), + _brand_names_rules_language_check(), + _brand_names_rules_perspectives_mode_required_check(), + _brand_names_rules_perspectives_mode_enum_check(), + _brand_names_rules_perspectives_countries_check(), + _brand_names_rules_perspectives_countries_min_length_check(), + _brand_names_rules_perspectives_countries_unique_check(), + _brand_names_rules_perspectives_countries_check_1(), + _brand_names_rules_between_linear_range_length_check(), + _brand_names_rules_between_linear_range_bounds_check(), + _brand_names_rules_between_linear_range_order_check(), + _brand_names_rules_side_check(), + _brand_wikidata_check(), + _addresses_min_length_check(), + _addresses_region_check(), + _addresses_country_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + ] + + +PLACE_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("operating_status", StringType(), True), + StructField( + "categories", + StructType( + [ + StructField("primary", StringType(), True), + StructField("alternate", ArrayType(StringType(), True), True), + ] + ), + True, + ), + StructField("basic_category", StringType(), True), + StructField( + "taxonomy", + StructType( + [ + StructField("primary", StringType(), True), + StructField("hierarchy", ArrayType(StringType(), True), True), + StructField("alternates", ArrayType(StringType(), True), True), + ] + ), + True, + ), + StructField("confidence", DoubleType(), True), + StructField("websites", ArrayType(StringType(), True), True), + StructField("socials", ArrayType(StringType(), True), True), + StructField("emails", ArrayType(StringType(), True), True), + StructField("phones", ArrayType(StringType(), True), True), + StructField( + "brand", + StructType( + [ + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", + MapType(StringType(), StringType(), True), + True, + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField( + "value", StringType(), True + ), + StructField( + "variant", StringType(), True + ), + StructField( + "language", StringType(), True + ), + StructField( + "perspectives", + StructType( + [ + StructField( + "mode", + StringType(), + True, + ), + StructField( + "countries", + ArrayType( + StringType(), True + ), + True, + ), + ] + ), + True, + ), + StructField( + "between", + ArrayType(DoubleType(), True), + True, + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("wikidata", StringType(), True), + ] + ), + True, + ), + StructField( + "addresses", + ArrayType( + StructType( + [ + StructField("freeform", StringType(), True), + StructField("locality", StringType(), True), + StructField("postcode", StringType(), True), + StructField("region", StringType(), True), + StructField("country", StringType(), True), + ] + ), + True, + ), + True, + ), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.POINT,) + +ENTRY_POINT = "overture.schema.places:Place" + +PARTITIONS: dict[str, str] = {"theme": "places"} + +FEATURE_VALIDATION = FeatureValidation( + schema=PLACE_SCHEMA, + checks=place_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py new file mode 100644 index 000000000..5813ca61b --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py @@ -0,0 +1,372 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Connector validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["transportation"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["connector"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def connector_checks() -> list[Check]: + """All validation checks for connector.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + ] + + +CONNECTOR_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.POINT,) + +ENTRY_POINT = "overture.schema.transportation:Connector" + +PARTITIONS: dict[str, str] = {"theme": "transportation"} + +FEATURE_VALIDATION = FeatureValidation( + schema=CONNECTOR_SCHEMA, + checks=connector_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py new file mode 100644 index 000000000..cc9fd32bc --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py @@ -0,0 +1,5053 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Segment validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_forbid_if, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_require_any_of, + check_require_if, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type(F.col("geometry"), GeometryType.LINE_STRING), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["transportation"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["segment"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum(F.col("subtype"), ["road", "rail", "water"]), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _access_restrictions_min_length_check() -> Check: + return Check( + field="access_restrictions_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("access_restrictions"), 1), + shape=CheckShape.SCALAR, + root_field="access_restrictions", + ) + + +def _access_restrictions_unique_check() -> Check: + return Check( + field="access_restrictions_unique", + name="struct_unique", + expr=check_struct_unique(F.col("access_restrictions")), + shape=CheckShape.SCALAR, + root_field="access_restrictions", + ) + + +def _access_restrictions_access_type_required_check() -> Check: + return Check( + field="access_restrictions[].access_type", + name="required", + expr=array_check( + "access_restrictions", lambda el: check_required(el["access_type"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_access_type_enum_check() -> Check: + return Check( + field="access_restrictions[].access_type", + name="enum", + expr=array_check( + "access_restrictions", + lambda el: check_enum( + el["access_type"], ["allowed", "denied", "designated"] + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_between_linear_range_length_check() -> Check: + return Check( + field="access_restrictions[].between", + name="linear_range_length", + expr=array_check( + "access_restrictions", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_between_linear_range_bounds_check() -> Check: + return Check( + field="access_restrictions[].between", + name="linear_range_bounds", + expr=array_check( + "access_restrictions", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_between_linear_range_order_check() -> Check: + return Check( + field="access_restrictions[].between", + name="linear_range_order", + expr=array_check( + "access_restrictions", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_heading_check() -> Check: + return Check( + field="access_restrictions[].when.heading", + name="enum", + expr=array_check( + "access_restrictions", + lambda el: check_enum(el["when"]["heading"], ["forward", "backward"]), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_mode_min_length_check() -> Check: + return Check( + field="access_restrictions[].when.mode_min_length", + name="array_min_length", + expr=array_check( + "access_restrictions", + lambda el: check_array_min_length(el["when"]["mode"], 1), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_mode_unique_check() -> Check: + return Check( + field="access_restrictions[].when.mode_unique", + name="struct_unique", + expr=array_check( + "access_restrictions", lambda el: check_struct_unique(el["when"]["mode"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_mode_check() -> Check: + return Check( + field="access_restrictions[].when.mode[]", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["mode"], + lambda inner: check_enum( + inner, + [ + "vehicle", + "motor_vehicle", + "car", + "truck", + "motorcycle", + "foot", + "bicycle", + "bus", + "hgv", + "hov", + "emergency", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_using_min_length_check() -> Check: + return Check( + field="access_restrictions[].when.using_min_length", + name="array_min_length", + expr=array_check( + "access_restrictions", + lambda el: check_array_min_length(el["when"]["using"], 1), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_using_unique_check() -> Check: + return Check( + field="access_restrictions[].when.using_unique", + name="struct_unique", + expr=array_check( + "access_restrictions", lambda el: check_struct_unique(el["when"]["using"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_using_check() -> Check: + return Check( + field="access_restrictions[].when.using[]", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["using"], + lambda inner: check_enum( + inner, + [ + "as_customer", + "at_destination", + "to_deliver", + "to_farm", + "for_forestry", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_recognized_min_length_check() -> Check: + return Check( + field="access_restrictions[].when.recognized_min_length", + name="array_min_length", + expr=array_check( + "access_restrictions", + lambda el: check_array_min_length(el["when"]["recognized"], 1), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_recognized_unique_check() -> Check: + return Check( + field="access_restrictions[].when.recognized_unique", + name="struct_unique", + expr=array_check( + "access_restrictions", + lambda el: check_struct_unique(el["when"]["recognized"]), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_recognized_check() -> Check: + return Check( + field="access_restrictions[].when.recognized[]", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["recognized"], + lambda inner: check_enum( + inner, + [ + "as_permitted", + "as_private", + "as_disabled", + "as_employee", + "as_student", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_min_length_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle_min_length", + name="array_min_length", + expr=array_check( + "access_restrictions", + lambda el: check_array_min_length(el["when"]["vehicle"], 1), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_unique_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle_unique", + name="struct_unique", + expr=array_check( + "access_restrictions", lambda el: check_struct_unique(el["when"]["vehicle"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_dimension_required_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].dimension", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], lambda inner: check_required(inner["dimension"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_dimension_enum_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].dimension", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["dimension"], + ["axle_count", "height", "length", "weight", "width"], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_comparison_required_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].comparison", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], lambda inner: check_required(inner["comparison"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_comparison_enum_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].comparison", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["comparison"], + [ + "greater_than", + "greater_than_equal", + "equal", + "less_than", + "less_than_equal", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_value_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].value", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["axle_count"]), + check_required(inner["value"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_value_required_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].value", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "weight", "width"]), + check_required(inner["value"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_value_bounds_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].value", + name="bounds", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "weight", "width"]), + check_bounds(inner["value"], ge=0.0), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_unit_required_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_required(inner["unit"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_unit_enum_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_enum( + inner["unit"], ["in", "ft", "yd", "mi", "cm", "m", "km"] + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_unit_required_check_1() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), check_required(inner["unit"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_unit_enum_check_1() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), + check_enum(inner["unit"], ["oz", "lb", "st", "lt", "g", "kg", "t"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _connectors_min_length_check() -> Check: + return Check( + field="connectors_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("connectors"), 2), + shape=CheckShape.SCALAR, + root_field="connectors", + ) + + +def _connectors_unique_check() -> Check: + return Check( + field="connectors_unique", + name="struct_unique", + expr=check_struct_unique(F.col("connectors")), + shape=CheckShape.SCALAR, + root_field="connectors", + ) + + +def _connectors_connector_id_required_check() -> Check: + return Check( + field="connectors[].connector_id", + name="required", + expr=array_check("connectors", lambda el: check_required(el["connector_id"])), + shape=CheckShape.ARRAY, + root_field="connectors", + ) + + +def _connectors_connector_id_string_min_length_check() -> Check: + return Check( + field="connectors[].connector_id", + name="string_min_length", + expr=array_check( + "connectors", lambda el: check_string_min_length(el["connector_id"], 1) + ), + shape=CheckShape.ARRAY, + root_field="connectors", + ) + + +def _connectors_connector_id_no_whitespace_check() -> Check: + return Check( + field="connectors[].connector_id", + name="no_whitespace", + expr=array_check( + "connectors", + lambda el: check_pattern( + el["connector_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + shape=CheckShape.ARRAY, + root_field="connectors", + ) + + +def _connectors_at_bounds_check() -> Check: + return Check( + field="connectors[].at", + name="bounds", + expr=array_check("connectors", lambda el: check_bounds(el["at"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="connectors", + ) + + +def _connectors_at_bounds_check_1() -> Check: + return Check( + field="connectors[].at", + name="bounds", + expr=array_check("connectors", lambda el: check_bounds(el["at"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="connectors", + ) + + +def _level_rules_value_check() -> Check: + return Check( + field="level_rules[].value", + name="required", + expr=array_check("level_rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="level_rules", + ) + + +def _level_rules_between_linear_range_length_check() -> Check: + return Check( + field="level_rules[].between", + name="linear_range_length", + expr=array_check( + "level_rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="level_rules", + ) + + +def _level_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="level_rules[].between", + name="linear_range_bounds", + expr=array_check( + "level_rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="level_rules", + ) + + +def _level_rules_between_linear_range_order_check() -> Check: + return Check( + field="level_rules[].between", + name="linear_range_order", + expr=array_check( + "level_rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="level_rules", + ) + + +def _routes_name_string_min_length_check() -> Check: + return Check( + field="routes[].name", + name="string_min_length", + expr=array_check("routes", lambda el: check_string_min_length(el["name"], 1)), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_name_stripped_check() -> Check: + return Check( + field="routes[].name", + name="stripped", + expr=array_check("routes", lambda el: check_stripped(el["name"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_network_string_min_length_check() -> Check: + return Check( + field="routes[].network", + name="string_min_length", + expr=array_check( + "routes", lambda el: check_string_min_length(el["network"], 1) + ), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_network_stripped_check() -> Check: + return Check( + field="routes[].network", + name="stripped", + expr=array_check("routes", lambda el: check_stripped(el["network"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_ref_string_min_length_check() -> Check: + return Check( + field="routes[].ref", + name="string_min_length", + expr=array_check("routes", lambda el: check_string_min_length(el["ref"], 1)), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_ref_stripped_check() -> Check: + return Check( + field="routes[].ref", + name="stripped", + expr=array_check("routes", lambda el: check_stripped(el["ref"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_symbol_string_min_length_check() -> Check: + return Check( + field="routes[].symbol", + name="string_min_length", + expr=array_check("routes", lambda el: check_string_min_length(el["symbol"], 1)), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_symbol_stripped_check() -> Check: + return Check( + field="routes[].symbol", + name="stripped", + expr=array_check("routes", lambda el: check_stripped(el["symbol"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_wikidata_check() -> Check: + return Check( + field="routes[].wikidata", + name="wikidata_id", + expr=array_check( + "routes", + lambda el: check_pattern( + el["wikidata"], + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + ), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_between_linear_range_length_check() -> Check: + return Check( + field="routes[].between", + name="linear_range_length", + expr=array_check("routes", lambda el: check_linear_range_length(el["between"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_between_linear_range_bounds_check() -> Check: + return Check( + field="routes[].between", + name="linear_range_bounds", + expr=array_check("routes", lambda el: check_linear_range_bounds(el["between"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_between_linear_range_order_check() -> Check: + return Check( + field="routes[].between", + name="linear_range_order", + expr=array_check("routes", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _subclass_rules_value_required_check() -> Check: + return Check( + field="subclass_rules[].value", + name="required", + expr=array_check("subclass_rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="subclass_rules", + ) + + +def _subclass_rules_value_enum_check() -> Check: + return Check( + field="subclass_rules[].value", + name="enum", + expr=array_check( + "subclass_rules", + lambda el: check_enum( + el["value"], + [ + "link", + "sidewalk", + "crosswalk", + "parking_aisle", + "driveway", + "alley", + "cycle_crossing", + ], + ), + ), + shape=CheckShape.ARRAY, + root_field="subclass_rules", + ) + + +def _subclass_rules_between_linear_range_length_check() -> Check: + return Check( + field="subclass_rules[].between", + name="linear_range_length", + expr=array_check( + "subclass_rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="subclass_rules", + ) + + +def _subclass_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="subclass_rules[].between", + name="linear_range_bounds", + expr=array_check( + "subclass_rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="subclass_rules", + ) + + +def _subclass_rules_between_linear_range_order_check() -> Check: + return Check( + field="subclass_rules[].between", + name="linear_range_order", + expr=array_check( + "subclass_rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="subclass_rules", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _class_required_check() -> Check: + return Check( + field="class", + name="required", + expr=F.when(F.col("subtype").isin(["road"]), check_required(F.col("class"))), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check() -> Check: + return Check( + field="class", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + check_enum( + F.col("class"), + [ + "motorway", + "primary", + "secondary", + "tertiary", + "residential", + "living_street", + "trunk", + "unclassified", + "service", + "pedestrian", + "footway", + "steps", + "path", + "track", + "cycleway", + "bridleway", + "unknown", + ], + ), + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _destinations_from_connector_id_required_check() -> Check: + return Check( + field="destinations[].from_connector_id", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", lambda el: check_required(el["from_connector_id"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_from_connector_id_string_min_length_check() -> Check: + return Check( + field="destinations[].from_connector_id", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_string_min_length(el["from_connector_id"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_from_connector_id_no_whitespace_check() -> Check: + return Check( + field="destinations[].from_connector_id", + name="no_whitespace", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_pattern( + el["from_connector_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_connector_id_required_check() -> Check: + return Check( + field="destinations[].to_connector_id", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", lambda el: check_required(el["to_connector_id"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_connector_id_string_min_length_check() -> Check: + return Check( + field="destinations[].to_connector_id", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_string_min_length(el["to_connector_id"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_connector_id_no_whitespace_check() -> Check: + return Check( + field="destinations[].to_connector_id", + name="no_whitespace", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_pattern( + el["to_connector_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_segment_id_required_check() -> Check: + return Check( + field="destinations[].to_segment_id", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("destinations", lambda el: check_required(el["to_segment_id"])), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_segment_id_string_min_length_check() -> Check: + return Check( + field="destinations[].to_segment_id", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_string_min_length(el["to_segment_id"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_segment_id_no_whitespace_check() -> Check: + return Check( + field="destinations[].to_segment_id", + name="no_whitespace", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_pattern( + el["to_segment_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_final_heading_required_check() -> Check: + return Check( + field="destinations[].final_heading", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("destinations", lambda el: check_required(el["final_heading"])), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_final_heading_enum_check() -> Check: + return Check( + field="destinations[].final_heading", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_enum(el["final_heading"], ["forward", "backward"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_min_length_check() -> Check: + return Check( + field="destinations[].labels_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", lambda el: check_array_min_length(el["labels"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_unique_check() -> Check: + return Check( + field="destinations[].labels_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("destinations", lambda el: check_struct_unique(el["labels"])), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_value_required_check() -> Check: + return Check( + field="destinations[].labels[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["labels"], lambda inner: check_required(inner["value"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_value_string_min_length_check() -> Check: + return Check( + field="destinations[].labels[].value", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["labels"], + lambda inner: check_string_min_length(inner["value"], 1), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_value_stripped_check() -> Check: + return Check( + field="destinations[].labels[].value", + name="stripped", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["labels"], lambda inner: check_stripped(inner["value"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_type_required_check() -> Check: + return Check( + field="destinations[].labels[].type", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["labels"], lambda inner: check_required(inner["type"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_type_enum_check() -> Check: + return Check( + field="destinations[].labels[].type", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["labels"], + lambda inner: check_enum( + inner["type"], + [ + "street", + "country", + "route_ref", + "toward_route_ref", + "unknown", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_symbols_unique_check() -> Check: + return Check( + field="destinations[].symbols_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("destinations", lambda el: check_struct_unique(el["symbols"])), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_symbols_check() -> Check: + return Check( + field="destinations[].symbols[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["symbols"], + lambda inner: check_enum( + inner, + [ + "motorway", + "airport", + "hospital", + "center", + "industrial", + "parking", + "bus", + "train_station", + "rest_area", + "ferry", + "motorroad", + "fuel", + "viewpoint", + "fuel_diesel", + "food", + "lodging", + "info", + "camp_site", + "interchange", + "restrooms", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_when_heading_required_check() -> Check: + return Check( + field="destinations[].when.heading", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: F.when( + el["when"].isNotNull(), check_required(el["when"]["heading"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_when_heading_enum_check() -> Check: + return Check( + field="destinations[].when.heading", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_enum(el["when"]["heading"], ["forward", "backward"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _prohibited_transitions_sequence_check() -> Check: + return Check( + field="prohibited_transitions[].sequence", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", lambda el: check_required(el["sequence"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].sequence_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_array_min_length(el["sequence"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_unique_check() -> Check: + return Check( + field="prohibited_transitions[].sequence_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", lambda el: check_struct_unique(el["sequence"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_connector_id_required_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].connector_id", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], lambda inner: check_required(inner["connector_id"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_connector_id_string_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].connector_id", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], + lambda inner: check_string_min_length(inner["connector_id"], 1), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_connector_id_no_whitespace_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].connector_id", + name="no_whitespace", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], + lambda inner: check_pattern( + inner["connector_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_segment_id_required_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].segment_id", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], lambda inner: check_required(inner["segment_id"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_segment_id_string_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].segment_id", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], + lambda inner: check_string_min_length(inner["segment_id"], 1), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_segment_id_no_whitespace_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].segment_id", + name="no_whitespace", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], + lambda inner: check_pattern( + inner["segment_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_final_heading_required_check() -> Check: + return Check( + field="prohibited_transitions[].final_heading", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", lambda el: check_required(el["final_heading"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_final_heading_enum_check() -> Check: + return Check( + field="prohibited_transitions[].final_heading", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_enum(el["final_heading"], ["forward", "backward"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_between_linear_range_length_check() -> Check: + return Check( + field="prohibited_transitions[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_linear_range_length(el["between"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_between_linear_range_bounds_check() -> Check: + return Check( + field="prohibited_transitions[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_linear_range_bounds(el["between"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_between_linear_range_order_check() -> Check: + return Check( + field="prohibited_transitions[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_linear_range_order(el["between"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_heading_check() -> Check: + return Check( + field="prohibited_transitions[].when.heading", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_enum(el["when"]["heading"], ["forward", "backward"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_mode_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].when.mode_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_array_min_length(el["when"]["mode"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_mode_unique_check() -> Check: + return Check( + field="prohibited_transitions[].when.mode_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_struct_unique(el["when"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_mode_check() -> Check: + return Check( + field="prohibited_transitions[].when.mode[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["mode"], + lambda inner: check_enum( + inner, + [ + "vehicle", + "motor_vehicle", + "car", + "truck", + "motorcycle", + "foot", + "bicycle", + "bus", + "hgv", + "hov", + "emergency", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_using_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].when.using_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_array_min_length(el["when"]["using"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_using_unique_check() -> Check: + return Check( + field="prohibited_transitions[].when.using_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_struct_unique(el["when"]["using"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_using_check() -> Check: + return Check( + field="prohibited_transitions[].when.using[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["using"], + lambda inner: check_enum( + inner, + [ + "as_customer", + "at_destination", + "to_deliver", + "to_farm", + "for_forestry", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_recognized_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].when.recognized_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_array_min_length(el["when"]["recognized"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_recognized_unique_check() -> Check: + return Check( + field="prohibited_transitions[].when.recognized_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_struct_unique(el["when"]["recognized"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_recognized_check() -> Check: + return Check( + field="prohibited_transitions[].when.recognized[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["recognized"], + lambda inner: check_enum( + inner, + [ + "as_permitted", + "as_private", + "as_disabled", + "as_employee", + "as_student", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_array_min_length(el["when"]["vehicle"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_unique_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_struct_unique(el["when"]["vehicle"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_dimension_required_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].dimension", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_required(inner["dimension"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_dimension_enum_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].dimension", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["dimension"], + ["axle_count", "height", "length", "weight", "width"], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_comparison_required_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].comparison", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_required(inner["comparison"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_comparison_enum_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].comparison", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["comparison"], + [ + "greater_than", + "greater_than_equal", + "equal", + "less_than", + "less_than_equal", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_value_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["axle_count"]), + check_required(inner["value"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_value_required_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin( + ["height", "length", "weight", "width"] + ), + check_required(inner["value"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_value_bounds_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin( + ["height", "length", "weight", "width"] + ), + check_bounds(inner["value"], ge=0.0), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_unit_required_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_required(inner["unit"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_unit_enum_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_enum( + inner["unit"], ["in", "ft", "yd", "mi", "cm", "m", "km"] + ), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_unit_required_check_1() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), + check_required(inner["unit"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_unit_enum_check_1() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), + check_enum( + inner["unit"], ["oz", "lb", "st", "lt", "g", "kg", "t"] + ), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _road_flags_min_length_check() -> Check: + return Check( + field="road_flags_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + check_array_min_length(F.col("road_flags"), 1), + ), + shape=CheckShape.SCALAR, + root_field="road_flags", + ) + + +def _road_flags_unique_check() -> Check: + return Check( + field="road_flags_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), check_struct_unique(F.col("road_flags")) + ), + shape=CheckShape.SCALAR, + root_field="road_flags", + ) + + +def _road_flags_values_check() -> Check: + return Check( + field="road_flags[].values", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("road_flags", lambda el: check_required(el["values"])), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_values_min_length_check() -> Check: + return Check( + field="road_flags[].values_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_flags", lambda el: check_array_min_length(el["values"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_values_unique_check() -> Check: + return Check( + field="road_flags[].values_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("road_flags", lambda el: check_struct_unique(el["values"])), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_values_check_1() -> Check: + return Check( + field="road_flags[].values[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "road_flags", + lambda el: array_check( + el["values"], + lambda inner: check_enum( + inner, + [ + "is_bridge", + "is_link", + "is_tunnel", + "is_under_construction", + "is_abandoned", + "is_covered", + "is_indoor", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_between_linear_range_length_check() -> Check: + return Check( + field="road_flags[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_flags", lambda el: check_linear_range_length(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_between_linear_range_bounds_check() -> Check: + return Check( + field="road_flags[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_flags", lambda el: check_linear_range_bounds(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_between_linear_range_order_check() -> Check: + return Check( + field="road_flags[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_flags", lambda el: check_linear_range_order(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_surface_min_length_check() -> Check: + return Check( + field="road_surface_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + check_array_min_length(F.col("road_surface"), 1), + ), + shape=CheckShape.SCALAR, + root_field="road_surface", + ) + + +def _road_surface_unique_check() -> Check: + return Check( + field="road_surface_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), check_struct_unique(F.col("road_surface")) + ), + shape=CheckShape.SCALAR, + root_field="road_surface", + ) + + +def _road_surface_value_required_check() -> Check: + return Check( + field="road_surface[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("road_surface", lambda el: check_required(el["value"])), + ), + shape=CheckShape.ARRAY, + root_field="road_surface", + ) + + +def _road_surface_value_enum_check() -> Check: + return Check( + field="road_surface[].value", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_surface", + lambda el: check_enum( + el["value"], + [ + "unknown", + "paved", + "unpaved", + "gravel", + "dirt", + "paving_stones", + "metal", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="road_surface", + ) + + +def _road_surface_between_linear_range_length_check() -> Check: + return Check( + field="road_surface[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_surface", lambda el: check_linear_range_length(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_surface", + ) + + +def _road_surface_between_linear_range_bounds_check() -> Check: + return Check( + field="road_surface[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_surface", lambda el: check_linear_range_bounds(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_surface", + ) + + +def _road_surface_between_linear_range_order_check() -> Check: + return Check( + field="road_surface[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_surface", lambda el: check_linear_range_order(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_surface", + ) + + +def _speed_limits_min_length_check() -> Check: + return Check( + field="speed_limits_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + check_array_min_length(F.col("speed_limits"), 1), + ), + shape=CheckShape.SCALAR, + root_field="speed_limits", + ) + + +def _speed_limits_unique_check() -> Check: + return Check( + field="speed_limits_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), check_struct_unique(F.col("speed_limits")) + ), + shape=CheckShape.SCALAR, + root_field="speed_limits", + ) + + +def _speed_limits_max_speed_value_required_check() -> Check: + return Check( + field="speed_limits[].max_speed.value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: F.when( + el["max_speed"].isNotNull(), + check_required(el["max_speed"]["value"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_max_speed_value_bounds_check() -> Check: + return Check( + field="speed_limits[].max_speed.value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_bounds(el["max_speed"]["value"], ge=1) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_max_speed_value_bounds_check_1() -> Check: + return Check( + field="speed_limits[].max_speed.value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_bounds(el["max_speed"]["value"], le=350), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_max_speed_unit_required_check() -> Check: + return Check( + field="speed_limits[].max_speed.unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: F.when( + el["max_speed"].isNotNull(), check_required(el["max_speed"]["unit"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_max_speed_unit_enum_check() -> Check: + return Check( + field="speed_limits[].max_speed.unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_enum(el["max_speed"]["unit"], ["mph", "km/h"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_min_speed_value_required_check() -> Check: + return Check( + field="speed_limits[].min_speed.value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: F.when( + el["min_speed"].isNotNull(), + check_required(el["min_speed"]["value"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_min_speed_value_bounds_check() -> Check: + return Check( + field="speed_limits[].min_speed.value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_bounds(el["min_speed"]["value"], ge=1) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_min_speed_value_bounds_check_1() -> Check: + return Check( + field="speed_limits[].min_speed.value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_bounds(el["min_speed"]["value"], le=350), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_min_speed_unit_required_check() -> Check: + return Check( + field="speed_limits[].min_speed.unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: F.when( + el["min_speed"].isNotNull(), check_required(el["min_speed"]["unit"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_min_speed_unit_enum_check() -> Check: + return Check( + field="speed_limits[].min_speed.unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_enum(el["min_speed"]["unit"], ["mph", "km/h"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_between_linear_range_length_check() -> Check: + return Check( + field="speed_limits[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_linear_range_length(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_between_linear_range_bounds_check() -> Check: + return Check( + field="speed_limits[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_linear_range_bounds(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_between_linear_range_order_check() -> Check: + return Check( + field="speed_limits[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_linear_range_order(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_heading_check() -> Check: + return Check( + field="speed_limits[].when.heading", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_enum(el["when"]["heading"], ["forward", "backward"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_mode_min_length_check() -> Check: + return Check( + field="speed_limits[].when.mode_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_array_min_length(el["when"]["mode"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_mode_unique_check() -> Check: + return Check( + field="speed_limits[].when.mode_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_struct_unique(el["when"]["mode"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_mode_check() -> Check: + return Check( + field="speed_limits[].when.mode[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["mode"], + lambda inner: check_enum( + inner, + [ + "vehicle", + "motor_vehicle", + "car", + "truck", + "motorcycle", + "foot", + "bicycle", + "bus", + "hgv", + "hov", + "emergency", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_using_min_length_check() -> Check: + return Check( + field="speed_limits[].when.using_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_array_min_length(el["when"]["using"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_using_unique_check() -> Check: + return Check( + field="speed_limits[].when.using_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_struct_unique(el["when"]["using"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_using_check() -> Check: + return Check( + field="speed_limits[].when.using[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["using"], + lambda inner: check_enum( + inner, + [ + "as_customer", + "at_destination", + "to_deliver", + "to_farm", + "for_forestry", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_recognized_min_length_check() -> Check: + return Check( + field="speed_limits[].when.recognized_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_array_min_length(el["when"]["recognized"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_recognized_unique_check() -> Check: + return Check( + field="speed_limits[].when.recognized_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_struct_unique(el["when"]["recognized"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_recognized_check() -> Check: + return Check( + field="speed_limits[].when.recognized[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["recognized"], + lambda inner: check_enum( + inner, + [ + "as_permitted", + "as_private", + "as_disabled", + "as_employee", + "as_student", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_min_length_check() -> Check: + return Check( + field="speed_limits[].when.vehicle_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_array_min_length(el["when"]["vehicle"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_unique_check() -> Check: + return Check( + field="speed_limits[].when.vehicle_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_struct_unique(el["when"]["vehicle"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_dimension_required_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].dimension", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_required(inner["dimension"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_dimension_enum_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].dimension", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["dimension"], + ["axle_count", "height", "length", "weight", "width"], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_comparison_required_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].comparison", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_required(inner["comparison"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_comparison_enum_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].comparison", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["comparison"], + [ + "greater_than", + "greater_than_equal", + "equal", + "less_than", + "less_than_equal", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_value_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["axle_count"]), + check_required(inner["value"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_value_required_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin( + ["height", "length", "weight", "width"] + ), + check_required(inner["value"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_value_bounds_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin( + ["height", "length", "weight", "width"] + ), + check_bounds(inner["value"], ge=0.0), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_unit_required_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_required(inner["unit"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_unit_enum_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_enum( + inner["unit"], ["in", "ft", "yd", "mi", "cm", "m", "km"] + ), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_unit_required_check_1() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), + check_required(inner["unit"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_unit_enum_check_1() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), + check_enum( + inner["unit"], ["oz", "lb", "st", "lt", "g", "kg", "t"] + ), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _subclass_check() -> Check: + return Check( + field="subclass", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + check_enum( + F.col("subclass"), + [ + "link", + "sidewalk", + "crosswalk", + "parking_aisle", + "driveway", + "alley", + "cycle_crossing", + ], + ), + ), + shape=CheckShape.SCALAR, + root_field="subclass", + ) + + +def _width_rules_min_length_check() -> Check: + return Check( + field="width_rules_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + check_array_min_length(F.col("width_rules"), 1), + ), + shape=CheckShape.SCALAR, + root_field="width_rules", + ) + + +def _width_rules_unique_check() -> Check: + return Check( + field="width_rules_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), check_struct_unique(F.col("width_rules")) + ), + shape=CheckShape.SCALAR, + root_field="width_rules", + ) + + +def _width_rules_value_required_check() -> Check: + return Check( + field="width_rules[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("width_rules", lambda el: check_required(el["value"])), + ), + shape=CheckShape.ARRAY, + root_field="width_rules", + ) + + +def _width_rules_value_bounds_check() -> Check: + return Check( + field="width_rules[].value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("width_rules", lambda el: check_bounds(el["value"], gt=0.0)), + ), + shape=CheckShape.ARRAY, + root_field="width_rules", + ) + + +def _width_rules_between_linear_range_length_check() -> Check: + return Check( + field="width_rules[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "width_rules", lambda el: check_linear_range_length(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="width_rules", + ) + + +def _width_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="width_rules[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "width_rules", lambda el: check_linear_range_bounds(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="width_rules", + ) + + +def _width_rules_between_linear_range_order_check() -> Check: + return Check( + field="width_rules[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "width_rules", lambda el: check_linear_range_order(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="width_rules", + ) + + +def _class_required_check_1() -> Check: + return Check( + field="class", + name="required", + expr=F.when(F.col("subtype").isin(["rail"]), check_required(F.col("class"))), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check_1() -> Check: + return Check( + field="class", + name="enum", + expr=F.when( + F.col("subtype").isin(["rail"]), + check_enum( + F.col("class"), + [ + "funicular", + "light_rail", + "monorail", + "narrow_gauge", + "standard_gauge", + "subway", + "tram", + "unknown", + ], + ), + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _rail_flags_min_length_check() -> Check: + return Check( + field="rail_flags_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["rail"]), + check_array_min_length(F.col("rail_flags"), 1), + ), + shape=CheckShape.SCALAR, + root_field="rail_flags", + ) + + +def _rail_flags_unique_check() -> Check: + return Check( + field="rail_flags_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["rail"]), check_struct_unique(F.col("rail_flags")) + ), + shape=CheckShape.SCALAR, + root_field="rail_flags", + ) + + +def _rail_flags_values_check() -> Check: + return Check( + field="rail_flags[].values", + name="required", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check("rail_flags", lambda el: check_required(el["values"])), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_values_min_length_check() -> Check: + return Check( + field="rail_flags[].values_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check( + "rail_flags", lambda el: check_array_min_length(el["values"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_values_unique_check() -> Check: + return Check( + field="rail_flags[].values_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check("rail_flags", lambda el: check_struct_unique(el["values"])), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_values_check_1() -> Check: + return Check( + field="rail_flags[].values[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["rail"]), + nested_array_check( + "rail_flags", + lambda el: array_check( + el["values"], + lambda inner: check_enum( + inner, + [ + "is_bridge", + "is_tunnel", + "is_under_construction", + "is_abandoned", + "is_covered", + "is_passenger", + "is_freight", + "is_disused", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_between_linear_range_length_check() -> Check: + return Check( + field="rail_flags[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check( + "rail_flags", lambda el: check_linear_range_length(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_between_linear_range_bounds_check() -> Check: + return Check( + field="rail_flags[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check( + "rail_flags", lambda el: check_linear_range_bounds(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_between_linear_range_order_check() -> Check: + return Check( + field="rail_flags[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check( + "rail_flags", lambda el: check_linear_range_order(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _access_restrictions_when_vehicle_check_forbid_if_0_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit_forbidden", + name="forbid_if", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_forbid_if( + inner["unit"], + inner["dimension"] == "axle_count", + "dimension = 'axle_count'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_check_require_if_1_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit_required_0", + name="require_if", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "height", + "dimension = 'height'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_check_require_if_2_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit_required_1", + name="require_if", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "length", + "dimension = 'length'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_check_require_if_3_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit_required_2", + name="require_if", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "weight", + "dimension = 'weight'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_check_require_if_4_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit_required_3", + name="require_if", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], inner["dimension"] == "width", "dimension = 'width'" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_check_require_any_of_5_check() -> Check: + return Check( + field="access_restrictions[].when", + name="require_any_of", + expr=array_check( + "access_restrictions", + lambda el: check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _destinations_check_require_any_of_6_check() -> Check: + return Check( + field="destinations[]", + name="require_any_of", + expr=array_check( + "destinations", + lambda el: check_require_any_of( + [el["labels"], el["symbols"]], ["labels", "symbols"] + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _prohibited_transitions_when_vehicle_check_forbid_if_7_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit_forbidden", + name="forbid_if", + expr=nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_forbid_if( + inner["unit"], + inner["dimension"] == "axle_count", + "dimension = 'axle_count'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_check_require_if_8_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit_required_0", + name="require_if", + expr=nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "height", + "dimension = 'height'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_check_require_if_9_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit_required_1", + name="require_if", + expr=nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "length", + "dimension = 'length'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_check_require_if_10_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit_required_2", + name="require_if", + expr=nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "weight", + "dimension = 'weight'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_check_require_if_11_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit_required_3", + name="require_if", + expr=nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], inner["dimension"] == "width", "dimension = 'width'" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_check_require_any_of_12_check() -> Check: + return Check( + field="prohibited_transitions[].when", + name="require_any_of", + expr=array_check( + "prohibited_transitions", + lambda el: check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _speed_limits_when_vehicle_check_forbid_if_13_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit_forbidden", + name="forbid_if", + expr=nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_forbid_if( + inner["unit"], + inner["dimension"] == "axle_count", + "dimension = 'axle_count'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_check_require_if_14_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit_required_0", + name="require_if", + expr=nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "height", + "dimension = 'height'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_check_require_if_15_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit_required_1", + name="require_if", + expr=nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "length", + "dimension = 'length'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_check_require_if_16_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit_required_2", + name="require_if", + expr=nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "weight", + "dimension = 'weight'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_check_require_if_17_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit_required_3", + name="require_if", + expr=nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], inner["dimension"] == "width", "dimension = 'width'" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_check_require_any_of_18_check() -> Check: + return Check( + field="speed_limits[].when", + name="require_any_of", + expr=array_check( + "speed_limits", + lambda el: check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_check_require_any_of_19_check() -> Check: + return Check( + field="speed_limits[]", + name="require_any_of", + expr=array_check( + "speed_limits", + lambda el: check_require_any_of( + [el["max_speed"]["value"], el["min_speed"]["value"]], + ["max_speed.value", "min_speed.value"], + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _check_forbid_if_20_check() -> Check: + return Check( + field="class_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("class"), F.col("subtype") == "water", "subtype = 'water'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_21_check() -> Check: + return Check( + field="class_required_0", + name="require_if", + expr=check_require_if( + F.col("class"), F.col("subtype") == "rail", "subtype = 'rail'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_22_check() -> Check: + return Check( + field="class_required_1", + name="require_if", + expr=check_require_if( + F.col("class"), F.col("subtype") == "road", "subtype = 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_23_check() -> Check: + return Check( + field="destinations_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("destinations"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_24_check() -> Check: + return Check( + field="prohibited_transitions_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("prohibited_transitions"), + F.col("subtype") != "road", + "subtype != 'road'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_25_check() -> Check: + return Check( + field="road_flags_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("road_flags"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_26_check() -> Check: + return Check( + field="road_surface_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("road_surface"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_27_check() -> Check: + return Check( + field="speed_limits_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("speed_limits"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_28_check() -> Check: + return Check( + field="subclass_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("subclass"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_29_check() -> Check: + return Check( + field="width_rules_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("width_rules"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_30_check() -> Check: + return Check( + field="rail_flags_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("rail_flags"), F.col("subtype") != "rail", "subtype != 'rail'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def segment_checks() -> list[Check]: + """All validation checks for segment.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_required_check(), + _subtype_enum_check(), + _access_restrictions_min_length_check(), + _access_restrictions_unique_check(), + _access_restrictions_access_type_required_check(), + _access_restrictions_access_type_enum_check(), + _access_restrictions_between_linear_range_length_check(), + _access_restrictions_between_linear_range_bounds_check(), + _access_restrictions_between_linear_range_order_check(), + _access_restrictions_when_heading_check(), + _access_restrictions_when_mode_min_length_check(), + _access_restrictions_when_mode_unique_check(), + _access_restrictions_when_mode_check(), + _access_restrictions_when_using_min_length_check(), + _access_restrictions_when_using_unique_check(), + _access_restrictions_when_using_check(), + _access_restrictions_when_recognized_min_length_check(), + _access_restrictions_when_recognized_unique_check(), + _access_restrictions_when_recognized_check(), + _access_restrictions_when_vehicle_min_length_check(), + _access_restrictions_when_vehicle_unique_check(), + _access_restrictions_when_vehicle_dimension_required_check(), + _access_restrictions_when_vehicle_dimension_enum_check(), + _access_restrictions_when_vehicle_comparison_required_check(), + _access_restrictions_when_vehicle_comparison_enum_check(), + _access_restrictions_when_vehicle_value_check(), + _access_restrictions_when_vehicle_value_required_check(), + _access_restrictions_when_vehicle_value_bounds_check(), + _access_restrictions_when_vehicle_unit_required_check(), + _access_restrictions_when_vehicle_unit_enum_check(), + _access_restrictions_when_vehicle_unit_required_check_1(), + _access_restrictions_when_vehicle_unit_enum_check_1(), + _connectors_min_length_check(), + _connectors_unique_check(), + _connectors_connector_id_required_check(), + _connectors_connector_id_string_min_length_check(), + _connectors_connector_id_no_whitespace_check(), + _connectors_at_bounds_check(), + _connectors_at_bounds_check_1(), + _level_rules_value_check(), + _level_rules_between_linear_range_length_check(), + _level_rules_between_linear_range_bounds_check(), + _level_rules_between_linear_range_order_check(), + _routes_name_string_min_length_check(), + _routes_name_stripped_check(), + _routes_network_string_min_length_check(), + _routes_network_stripped_check(), + _routes_ref_string_min_length_check(), + _routes_ref_stripped_check(), + _routes_symbol_string_min_length_check(), + _routes_symbol_stripped_check(), + _routes_wikidata_check(), + _routes_between_linear_range_length_check(), + _routes_between_linear_range_bounds_check(), + _routes_between_linear_range_order_check(), + _subclass_rules_value_required_check(), + _subclass_rules_value_enum_check(), + _subclass_rules_between_linear_range_length_check(), + _subclass_rules_between_linear_range_bounds_check(), + _subclass_rules_between_linear_range_order_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _class_required_check(), + _class_enum_check(), + _destinations_from_connector_id_required_check(), + _destinations_from_connector_id_string_min_length_check(), + _destinations_from_connector_id_no_whitespace_check(), + _destinations_to_connector_id_required_check(), + _destinations_to_connector_id_string_min_length_check(), + _destinations_to_connector_id_no_whitespace_check(), + _destinations_to_segment_id_required_check(), + _destinations_to_segment_id_string_min_length_check(), + _destinations_to_segment_id_no_whitespace_check(), + _destinations_final_heading_required_check(), + _destinations_final_heading_enum_check(), + _destinations_labels_min_length_check(), + _destinations_labels_unique_check(), + _destinations_labels_value_required_check(), + _destinations_labels_value_string_min_length_check(), + _destinations_labels_value_stripped_check(), + _destinations_labels_type_required_check(), + _destinations_labels_type_enum_check(), + _destinations_symbols_unique_check(), + _destinations_symbols_check(), + _destinations_when_heading_required_check(), + _destinations_when_heading_enum_check(), + _prohibited_transitions_sequence_check(), + _prohibited_transitions_sequence_min_length_check(), + _prohibited_transitions_sequence_unique_check(), + _prohibited_transitions_sequence_connector_id_required_check(), + _prohibited_transitions_sequence_connector_id_string_min_length_check(), + _prohibited_transitions_sequence_connector_id_no_whitespace_check(), + _prohibited_transitions_sequence_segment_id_required_check(), + _prohibited_transitions_sequence_segment_id_string_min_length_check(), + _prohibited_transitions_sequence_segment_id_no_whitespace_check(), + _prohibited_transitions_final_heading_required_check(), + _prohibited_transitions_final_heading_enum_check(), + _prohibited_transitions_between_linear_range_length_check(), + _prohibited_transitions_between_linear_range_bounds_check(), + _prohibited_transitions_between_linear_range_order_check(), + _prohibited_transitions_when_heading_check(), + _prohibited_transitions_when_mode_min_length_check(), + _prohibited_transitions_when_mode_unique_check(), + _prohibited_transitions_when_mode_check(), + _prohibited_transitions_when_using_min_length_check(), + _prohibited_transitions_when_using_unique_check(), + _prohibited_transitions_when_using_check(), + _prohibited_transitions_when_recognized_min_length_check(), + _prohibited_transitions_when_recognized_unique_check(), + _prohibited_transitions_when_recognized_check(), + _prohibited_transitions_when_vehicle_min_length_check(), + _prohibited_transitions_when_vehicle_unique_check(), + _prohibited_transitions_when_vehicle_dimension_required_check(), + _prohibited_transitions_when_vehicle_dimension_enum_check(), + _prohibited_transitions_when_vehicle_comparison_required_check(), + _prohibited_transitions_when_vehicle_comparison_enum_check(), + _prohibited_transitions_when_vehicle_value_check(), + _prohibited_transitions_when_vehicle_value_required_check(), + _prohibited_transitions_when_vehicle_value_bounds_check(), + _prohibited_transitions_when_vehicle_unit_required_check(), + _prohibited_transitions_when_vehicle_unit_enum_check(), + _prohibited_transitions_when_vehicle_unit_required_check_1(), + _prohibited_transitions_when_vehicle_unit_enum_check_1(), + _road_flags_min_length_check(), + _road_flags_unique_check(), + _road_flags_values_check(), + _road_flags_values_min_length_check(), + _road_flags_values_unique_check(), + _road_flags_values_check_1(), + _road_flags_between_linear_range_length_check(), + _road_flags_between_linear_range_bounds_check(), + _road_flags_between_linear_range_order_check(), + _road_surface_min_length_check(), + _road_surface_unique_check(), + _road_surface_value_required_check(), + _road_surface_value_enum_check(), + _road_surface_between_linear_range_length_check(), + _road_surface_between_linear_range_bounds_check(), + _road_surface_between_linear_range_order_check(), + _speed_limits_min_length_check(), + _speed_limits_unique_check(), + _speed_limits_max_speed_value_required_check(), + _speed_limits_max_speed_value_bounds_check(), + _speed_limits_max_speed_value_bounds_check_1(), + _speed_limits_max_speed_unit_required_check(), + _speed_limits_max_speed_unit_enum_check(), + _speed_limits_min_speed_value_required_check(), + _speed_limits_min_speed_value_bounds_check(), + _speed_limits_min_speed_value_bounds_check_1(), + _speed_limits_min_speed_unit_required_check(), + _speed_limits_min_speed_unit_enum_check(), + _speed_limits_between_linear_range_length_check(), + _speed_limits_between_linear_range_bounds_check(), + _speed_limits_between_linear_range_order_check(), + _speed_limits_when_heading_check(), + _speed_limits_when_mode_min_length_check(), + _speed_limits_when_mode_unique_check(), + _speed_limits_when_mode_check(), + _speed_limits_when_using_min_length_check(), + _speed_limits_when_using_unique_check(), + _speed_limits_when_using_check(), + _speed_limits_when_recognized_min_length_check(), + _speed_limits_when_recognized_unique_check(), + _speed_limits_when_recognized_check(), + _speed_limits_when_vehicle_min_length_check(), + _speed_limits_when_vehicle_unique_check(), + _speed_limits_when_vehicle_dimension_required_check(), + _speed_limits_when_vehicle_dimension_enum_check(), + _speed_limits_when_vehicle_comparison_required_check(), + _speed_limits_when_vehicle_comparison_enum_check(), + _speed_limits_when_vehicle_value_check(), + _speed_limits_when_vehicle_value_required_check(), + _speed_limits_when_vehicle_value_bounds_check(), + _speed_limits_when_vehicle_unit_required_check(), + _speed_limits_when_vehicle_unit_enum_check(), + _speed_limits_when_vehicle_unit_required_check_1(), + _speed_limits_when_vehicle_unit_enum_check_1(), + _subclass_check(), + _width_rules_min_length_check(), + _width_rules_unique_check(), + _width_rules_value_required_check(), + _width_rules_value_bounds_check(), + _width_rules_between_linear_range_length_check(), + _width_rules_between_linear_range_bounds_check(), + _width_rules_between_linear_range_order_check(), + _class_required_check_1(), + _class_enum_check_1(), + _rail_flags_min_length_check(), + _rail_flags_unique_check(), + _rail_flags_values_check(), + _rail_flags_values_min_length_check(), + _rail_flags_values_unique_check(), + _rail_flags_values_check_1(), + _rail_flags_between_linear_range_length_check(), + _rail_flags_between_linear_range_bounds_check(), + _rail_flags_between_linear_range_order_check(), + _access_restrictions_when_vehicle_check_forbid_if_0_check(), + _access_restrictions_when_vehicle_check_require_if_1_check(), + _access_restrictions_when_vehicle_check_require_if_2_check(), + _access_restrictions_when_vehicle_check_require_if_3_check(), + _access_restrictions_when_vehicle_check_require_if_4_check(), + _access_restrictions_when_check_require_any_of_5_check(), + _destinations_check_require_any_of_6_check(), + _prohibited_transitions_when_vehicle_check_forbid_if_7_check(), + _prohibited_transitions_when_vehicle_check_require_if_8_check(), + _prohibited_transitions_when_vehicle_check_require_if_9_check(), + _prohibited_transitions_when_vehicle_check_require_if_10_check(), + _prohibited_transitions_when_vehicle_check_require_if_11_check(), + _prohibited_transitions_when_check_require_any_of_12_check(), + _speed_limits_when_vehicle_check_forbid_if_13_check(), + _speed_limits_when_vehicle_check_require_if_14_check(), + _speed_limits_when_vehicle_check_require_if_15_check(), + _speed_limits_when_vehicle_check_require_if_16_check(), + _speed_limits_when_vehicle_check_require_if_17_check(), + _speed_limits_when_check_require_any_of_18_check(), + _speed_limits_check_require_any_of_19_check(), + _check_forbid_if_20_check(), + _check_require_if_21_check(), + _check_require_if_22_check(), + _check_forbid_if_23_check(), + _check_forbid_if_24_check(), + _check_forbid_if_25_check(), + _check_forbid_if_26_check(), + _check_forbid_if_27_check(), + _check_forbid_if_28_check(), + _check_forbid_if_29_check(), + _check_forbid_if_30_check(), + ] + + +SEGMENT_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField( + "access_restrictions", + ArrayType( + StructType( + [ + StructField("access_type", StringType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + StructField( + "when", + StructType( + [ + StructField("heading", StringType(), True), + StructField("during", StringType(), True), + StructField( + "mode", ArrayType(StringType(), True), True + ), + StructField( + "using", ArrayType(StringType(), True), True + ), + StructField( + "recognized", + ArrayType(StringType(), True), + True, + ), + StructField( + "vehicle", + ArrayType( + StructType( + [ + StructField( + "dimension", StringType(), True + ), + StructField( + "comparison", StringType(), True + ), + StructField( + "value", DoubleType(), True + ), + StructField( + "unit", StringType(), True + ), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + ] + ), + True, + ), + True, + ), + StructField( + "connectors", + ArrayType( + StructType( + [ + StructField("connector_id", StringType(), True), + StructField("at", DoubleType(), True), + ] + ), + True, + ), + True, + ), + StructField( + "level_rules", + ArrayType( + StructType( + [ + StructField("value", IntegerType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "routes", + ArrayType( + StructType( + [ + StructField("name", StringType(), True), + StructField("network", StringType(), True), + StructField("ref", StringType(), True), + StructField("symbol", StringType(), True), + StructField("wikidata", StringType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "subclass_rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("class", StringType(), True), + StructField( + "destinations", + ArrayType( + StructType( + [ + StructField("from_connector_id", StringType(), True), + StructField("to_connector_id", StringType(), True), + StructField("to_segment_id", StringType(), True), + StructField("final_heading", StringType(), True), + StructField( + "labels", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("type", StringType(), True), + ] + ), + True, + ), + True, + ), + StructField("symbols", ArrayType(StringType(), True), True), + StructField( + "when", + StructType([StructField("heading", StringType(), True)]), + True, + ), + ] + ), + True, + ), + True, + ), + StructField( + "prohibited_transitions", + ArrayType( + StructType( + [ + StructField( + "sequence", + ArrayType( + StructType( + [ + StructField("connector_id", StringType(), True), + StructField("segment_id", StringType(), True), + ] + ), + True, + ), + True, + ), + StructField("final_heading", StringType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + StructField( + "when", + StructType( + [ + StructField("heading", StringType(), True), + StructField("during", StringType(), True), + StructField( + "mode", ArrayType(StringType(), True), True + ), + StructField( + "using", ArrayType(StringType(), True), True + ), + StructField( + "recognized", + ArrayType(StringType(), True), + True, + ), + StructField( + "vehicle", + ArrayType( + StructType( + [ + StructField( + "dimension", StringType(), True + ), + StructField( + "comparison", StringType(), True + ), + StructField( + "value", DoubleType(), True + ), + StructField( + "unit", StringType(), True + ), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + ] + ), + True, + ), + True, + ), + StructField( + "road_flags", + ArrayType( + StructType( + [ + StructField("values", ArrayType(StringType(), True), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "road_surface", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "speed_limits", + ArrayType( + StructType( + [ + StructField( + "max_speed", + StructType( + [ + StructField("value", IntegerType(), True), + StructField("unit", StringType(), True), + ] + ), + True, + ), + StructField( + "min_speed", + StructType( + [ + StructField("value", IntegerType(), True), + StructField("unit", StringType(), True), + ] + ), + True, + ), + StructField("is_max_speed_variable", BooleanType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + StructField( + "when", + StructType( + [ + StructField("heading", StringType(), True), + StructField("during", StringType(), True), + StructField( + "mode", ArrayType(StringType(), True), True + ), + StructField( + "using", ArrayType(StringType(), True), True + ), + StructField( + "recognized", + ArrayType(StringType(), True), + True, + ), + StructField( + "vehicle", + ArrayType( + StructType( + [ + StructField( + "dimension", StringType(), True + ), + StructField( + "comparison", StringType(), True + ), + StructField( + "value", DoubleType(), True + ), + StructField( + "unit", StringType(), True + ), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + ] + ), + True, + ), + True, + ), + StructField("subclass", StringType(), True), + StructField( + "width_rules", + ArrayType( + StructType( + [ + StructField("value", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "rail_flags", + ArrayType( + StructType( + [ + StructField("values", ArrayType(StringType(), True), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.LINE_STRING,) + +ENTRY_POINT = "overture.schema.transportation:Segment" + +PARTITIONS: dict[str, str] = {"theme": "transportation"} + +FEATURE_VALIDATION = FeatureValidation( + schema=SEGMENT_SCHEMA, + checks=segment_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/tests/generated/__init__.py b/packages/overture-schema-pyspark/tests/generated/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py new file mode 100644 index 000000000..b8da5893d --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py @@ -0,0 +1,462 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for address.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.addresses.address import ( + ADDRESS_SCHEMA, + address_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "9b51bb94-b26f-5f88-ad00-affc1e8f1935", + "geometry": "POINT (0 0)", + "theme": "addresses", + "type": "address", + "version": 0, + "country": "US", +} + + +BASE_ROW_POPULATED: dict = { + "id": "9b51bb94-b26f-5f88-ad00-affc1e8f1935", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "POINT (0 0)", + "theme": "addresses", + "type": "address", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "address_levels": [{"value": "a"}], + "country": "US", + "number": "a", + "postal_city": "a", + "postcode": "a", + "street": "a", + "unit": "a", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="address::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="address::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="address::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="address::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="address::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="address::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="address::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="address::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "LINESTRING (0 0, 1 1)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="address::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="address::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="address::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="address::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="address::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="address::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="address::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="address::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="address::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="address::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="address::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="address::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="address::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="address::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="address::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="address::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="address::address_levels_min_length:array_min_length", + scaffold={"address_levels": [{}]}, + mutate=set_at_path("address_levels", []), + expected_field="address_levels_min_length", + expected_check="array_min_length", + ), + Scenario( + id="address::address_levels_max_length:array_max_length", + scaffold={"address_levels": [{}]}, + mutate=set_at_path("address_levels", [{}, {}, {}, {}, {}, {}]), + expected_field="address_levels_max_length", + expected_check="array_max_length", + ), + Scenario( + id="address::address_levels[].value:string_min_length", + scaffold={"address_levels": [{"value": "a"}]}, + mutate=set_at_path("address_levels[].value", ""), + expected_field="address_levels[].value", + expected_check="string_min_length", + ), + Scenario( + id="address::address_levels[].value:stripped", + scaffold={"address_levels": [{"value": "a"}]}, + mutate=set_at_path("address_levels[].value", " has spaces "), + expected_field="address_levels[].value", + expected_check="stripped", + ), + Scenario( + id="address::country:required", + scaffold={}, + mutate=set_at_path("country", None), + expected_field="country", + expected_check="required", + ), + Scenario( + id="address::country:country_code_alpha2", + scaffold={}, + mutate=set_at_path("country", "99"), + expected_field="country", + expected_check="country_code_alpha2", + ), + Scenario( + id="address::number:string_min_length", + scaffold={"number": "a"}, + mutate=set_at_path("number", ""), + expected_field="number", + expected_check="string_min_length", + ), + Scenario( + id="address::number:stripped", + scaffold={"number": "a"}, + mutate=set_at_path("number", " has spaces "), + expected_field="number", + expected_check="stripped", + ), + Scenario( + id="address::postal_city:string_min_length", + scaffold={"postal_city": "a"}, + mutate=set_at_path("postal_city", ""), + expected_field="postal_city", + expected_check="string_min_length", + ), + Scenario( + id="address::postal_city:stripped", + scaffold={"postal_city": "a"}, + mutate=set_at_path("postal_city", " has spaces "), + expected_field="postal_city", + expected_check="stripped", + ), + Scenario( + id="address::postcode:string_min_length", + scaffold={"postcode": "a"}, + mutate=set_at_path("postcode", ""), + expected_field="postcode", + expected_check="string_min_length", + ), + Scenario( + id="address::postcode:stripped", + scaffold={"postcode": "a"}, + mutate=set_at_path("postcode", " has spaces "), + expected_field="postcode", + expected_check="stripped", + ), + Scenario( + id="address::street:string_min_length", + scaffold={"street": "a"}, + mutate=set_at_path("street", ""), + expected_field="street", + expected_check="string_min_length", + ), + Scenario( + id="address::street:stripped", + scaffold={"street": "a"}, + mutate=set_at_path("street", " has spaces "), + expected_field="street", + expected_check="stripped", + ), + Scenario( + id="address::unit:string_min_length", + scaffold={"unit": "a"}, + mutate=set_at_path("unit", ""), + expected_field="unit", + expected_check="string_min_length", + ), + Scenario( + id="address::unit:stripped", + scaffold={"unit": "a"}, + mutate=set_at_path("unit", " has spaces "), + expected_field="unit", + expected_check="stripped", + ), + Scenario( + id="address::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return address_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + ADDRESS_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="address", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + ADDRESS_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="address", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("address::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("address::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py new file mode 100644 index 000000000..168630de6 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py @@ -0,0 +1,843 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for sources.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.annex.sources import ( + SOURCES_SCHEMA, + sources_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ], + "license_priority": {}, +} + + +BASE_ROW_POPULATED: dict = { + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "inception_date": "2024-01-01", + "url": "https://example.com/", + "url_archived": "https://example.com/", + "data_download_url": ["https://example.com/"], + "countries": ["US"], + "coverage_description": "", + "data_layer_name": "", + "oa_path": [""], + "address_levels": [""], + "file_format": "", + "update_frequency": "", + "build_source": "OpenAddresses", + "update_type": "continuous", + "update_schedule": [""], + "known_issues": "", + "notes": "", + "requires_attribution": "", + } + ], + "license_priority": {}, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="sources::datasets:required", + scaffold={}, + mutate=set_at_path("datasets", None), + expected_field="datasets", + expected_check="required", + ), + Scenario( + id="sources::datasets[].source_name:required", + scaffold={ + "datasets": [ + { + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "source_name": "", + } + ] + }, + mutate=set_at_path("datasets[].source_name", None), + expected_field="datasets[].source_name", + expected_check="required", + ), + Scenario( + id="sources::datasets[].source_dataset_name:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "source_dataset_name": "", + } + ] + }, + mutate=set_at_path("datasets[].source_dataset_name", None), + expected_field="datasets[].source_dataset_name", + expected_check="required", + ), + Scenario( + id="sources::datasets[].data_url:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].data_url", None), + expected_field="datasets[].data_url", + expected_check="required", + ), + Scenario( + id="sources::datasets[].data_url:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].data_url", "not-a-url"), + expected_field="datasets[].data_url", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].data_url:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].data_url", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].data_url", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].data_url_archived:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].data_url_archived", None), + expected_field="datasets[].data_url_archived", + expected_check="required", + ), + Scenario( + id="sources::datasets[].data_url_archived:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].data_url_archived", "not-a-url"), + expected_field="datasets[].data_url_archived", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].data_url_archived:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].data_url_archived", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].data_url_archived", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].license_url:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].license_url", None), + expected_field="datasets[].license_url", + expected_check="required", + ), + Scenario( + id="sources::datasets[].license_url:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].license_url", "not-a-url"), + expected_field="datasets[].license_url", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].license_url:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].license_url", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].license_url", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].license_url_archived:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].license_url_archived", None), + expected_field="datasets[].license_url_archived", + expected_check="required", + ), + Scenario( + id="sources::datasets[].license_url_archived:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].license_url_archived", "not-a-url"), + expected_field="datasets[].license_url_archived", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].license_url_archived:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].license_url_archived", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].license_url_archived", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].license_type:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_type": "", + } + ] + }, + mutate=set_at_path("datasets[].license_type", None), + expected_field="datasets[].license_type", + expected_check="required", + ), + Scenario( + id="sources::datasets[].license_text:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_text": "", + } + ] + }, + mutate=set_at_path("datasets[].license_text", None), + expected_field="datasets[].license_text", + expected_check="required", + ), + Scenario( + id="sources::datasets[].license_attribution:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_attribution": "", + } + ] + }, + mutate=set_at_path("datasets[].license_attribution", None), + expected_field="datasets[].license_attribution", + expected_check="required", + ), + Scenario( + id="sources::datasets[].coverage_bbox:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, + mutate=set_at_path("datasets[].coverage_bbox", None), + expected_field="datasets[].coverage_bbox", + expected_check="required", + ), + Scenario( + id="sources::datasets[].coverage_bbox_min_length:array_min_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, + mutate=set_at_path("datasets[].coverage_bbox", []), + expected_field="datasets[].coverage_bbox_min_length", + expected_check="array_min_length", + ), + Scenario( + id="sources::datasets[].coverage_bbox_max_length:array_max_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, + mutate=set_at_path("datasets[].coverage_bbox", [{}, {}, {}, {}, {}]), + expected_field="datasets[].coverage_bbox_max_length", + expected_check="array_max_length", + ), + Scenario( + id="sources::datasets[].url:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "url": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].url", "not-a-url"), + expected_field="datasets[].url", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].url:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "url": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].url", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].url", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].url_archived:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].url_archived", "not-a-url"), + expected_field="datasets[].url_archived", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].url_archived:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].url_archived", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].url_archived", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].data_download_url[]:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_download_url": ["https://example.com/"], + } + ] + }, + mutate=set_at_path("datasets[].data_download_url[]", "not-a-url"), + expected_field="datasets[].data_download_url[]", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].data_download_url[]:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_download_url": ["https://example.com/"], + } + ] + }, + mutate=set_at_path( + "datasets[].data_download_url[]", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].data_download_url[]", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].countries[]:country_code_alpha2", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "countries": ["US"], + } + ] + }, + mutate=set_at_path("datasets[].countries[]", "99"), + expected_field="datasets[].countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="sources::datasets[].build_source:enum", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "build_source": "OpenAddresses", + } + ] + }, + mutate=set_at_path("datasets[].build_source", "__INVALID__"), + expected_field="datasets[].build_source", + expected_check="enum", + ), + Scenario( + id="sources::datasets[].update_type:enum", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "update_type": "continuous", + } + ] + }, + mutate=set_at_path("datasets[].update_type", "__INVALID__"), + expected_field="datasets[].update_type", + expected_check="enum", + ), + Scenario( + id="sources::license_priority:required", + scaffold={}, + mutate=set_at_path("license_priority", None), + expected_field="license_priority", + expected_check="required", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return sources_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SOURCES_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="sources", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SOURCES_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="sources", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("sources::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("sources::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py new file mode 100644 index 000000000..eddc5ff2a --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py @@ -0,0 +1,401 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for bathymetry.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.bathymetry import ( + BATHYMETRY_SCHEMA, + bathymetry_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "e1c02779-55d2-5d7e-8673-b7de1642ae68", + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "base", + "type": "bathymetry", + "version": 0, + "depth": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "e1c02779-55d2-5d7e-8673-b7de1642ae68", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "base", + "type": "bathymetry", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "depth": 0, + "cartography": {"prominence": 1, "min_zoom": 0, "max_zoom": 0, "sort_key": 0}, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="bathymetry::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="bathymetry::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="bathymetry::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="bathymetry::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="bathymetry::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="bathymetry::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="bathymetry::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="bathymetry::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="bathymetry::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="bathymetry::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="bathymetry::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="bathymetry::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="bathymetry::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="bathymetry::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="bathymetry::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="bathymetry::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="bathymetry::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="bathymetry::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="bathymetry::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="bathymetry::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="bathymetry::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="bathymetry::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="bathymetry::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="bathymetry::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="bathymetry::depth:required", + scaffold={}, + mutate=set_at_path("depth", None), + expected_field="depth", + expected_check="required", + ), + Scenario( + id="bathymetry::depth:bounds", + scaffold={}, + mutate=set_at_path("depth", -1), + expected_field="depth", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.prominence:bounds", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 0), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.prominence:bounds_1", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 101), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.min_zoom:bounds", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", -1), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.min_zoom:bounds_1", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", 24), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.max_zoom:bounds", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", -1), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.max_zoom:bounds_1", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", 24), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="bathymetry::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return bathymetry_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BATHYMETRY_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="bathymetry", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BATHYMETRY_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="bathymetry", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("bathymetry::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("bathymetry::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py new file mode 100644 index 000000000..ff98049f6 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py @@ -0,0 +1,650 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for infrastructure.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.infrastructure import ( + INFRASTRUCTURE_SCHEMA, + infrastructure_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "e6cc8648-6bf9-5147-994f-621f86c9f103", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "infrastructure", + "version": 0, + "class": "aerialway_station", + "subtype": "aerialway", +} + + +BASE_ROW_POPULATED: dict = { + "id": "e6cc8648-6bf9-5147-994f-621f86c9f103", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "infrastructure", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "class": "aerialway_station", + "subtype": "aerialway", + "height": 1.0, + "surface": "asphalt", + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "level": 0, + "source_tags": {}, + "wikidata": "Q42", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="infrastructure::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="infrastructure::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="infrastructure::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="infrastructure::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="infrastructure::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="infrastructure::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="infrastructure::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="infrastructure::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "GEOMETRYCOLLECTION EMPTY"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="infrastructure::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="infrastructure::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="infrastructure::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="infrastructure::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="infrastructure::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="infrastructure::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="infrastructure::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="infrastructure::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="infrastructure::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="infrastructure::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="infrastructure::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="infrastructure::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="infrastructure::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="infrastructure::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="infrastructure::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="infrastructure::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="infrastructure::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="infrastructure::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="infrastructure::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="infrastructure::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="infrastructure::height:bounds", + scaffold={"height": 1.0}, + mutate=set_at_path("height", 0.0), + expected_field="height", + expected_check="bounds", + ), + Scenario( + id="infrastructure::surface:enum", + scaffold={"surface": "asphalt"}, + mutate=set_at_path("surface", "__INVALID__"), + expected_field="surface", + expected_check="enum", + ), + Scenario( + id="infrastructure::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="infrastructure::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="infrastructure::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="infrastructure::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="infrastructure::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="infrastructure::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="infrastructure::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="infrastructure::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="infrastructure::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="infrastructure::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="infrastructure::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="infrastructure::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="infrastructure::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="infrastructure::wikidata:wikidata_id", + scaffold={"wikidata": "Q42"}, + mutate=set_at_path("wikidata", "P999"), + expected_field="wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="infrastructure::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return infrastructure_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + INFRASTRUCTURE_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="infrastructure", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + INFRASTRUCTURE_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="infrastructure", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("infrastructure::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("infrastructure::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py new file mode 100644 index 000000000..6b07a4fdc --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py @@ -0,0 +1,634 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for land.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.land import ( + LAND_SCHEMA, + land_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "52a8b331-e001-5c79-8dab-dba632af0028", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "land", + "version": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "52a8b331-e001-5c79-8dab-dba632af0028", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "land", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "class": "archipelago", + "subtype": "crater", + "elevation": 9000, + "surface": "asphalt", + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "level": 0, + "source_tags": {}, + "wikidata": "Q42", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="land::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="land::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="land::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="land::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="land::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="land::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="land::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="land::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "GEOMETRYCOLLECTION EMPTY"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="land::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="land::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="land::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="land::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="land::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="land::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="land::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="land::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="land::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="land::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="land::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="land::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="land::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="land::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="land::class:enum", + scaffold={"class": "archipelago"}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="land::subtype:enum", + scaffold={"subtype": "crater"}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="land::elevation:bounds", + scaffold={"elevation": 9000}, + mutate=set_at_path("elevation", 9001), + expected_field="elevation", + expected_check="bounds", + ), + Scenario( + id="land::surface:enum", + scaffold={"surface": "asphalt"}, + mutate=set_at_path("surface", "__INVALID__"), + expected_field="surface", + expected_check="enum", + ), + Scenario( + id="land::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="land::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="land::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="land::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="land::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="land::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="land::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="land::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="land::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="land::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="land::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="land::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="land::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="land::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="land::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="land::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="land::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="land::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="land::wikidata:wikidata_id", + scaffold={"wikidata": "Q42"}, + mutate=set_at_path("wikidata", "P999"), + expected_field="wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="land::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="land::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return land_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="land", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="land", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("land::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("land::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py new file mode 100644 index 000000000..c2783e05c --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py @@ -0,0 +1,401 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for land_cover.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.land_cover import ( + LAND_COVER_SCHEMA, + land_cover_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "b03200e9-9f2f-52ac-bae8-e562b3fd26cc", + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "base", + "type": "land_cover", + "version": 0, + "subtype": "barren", +} + + +BASE_ROW_POPULATED: dict = { + "id": "b03200e9-9f2f-52ac-bae8-e562b3fd26cc", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "base", + "type": "land_cover", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "barren", + "cartography": {"prominence": 1, "min_zoom": 0, "max_zoom": 0, "sort_key": 0}, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="land_cover::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="land_cover::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="land_cover::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="land_cover::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="land_cover::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="land_cover::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="land_cover::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="land_cover::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="land_cover::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="land_cover::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="land_cover::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="land_cover::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="land_cover::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="land_cover::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="land_cover::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="land_cover::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="land_cover::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="land_cover::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="land_cover::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="land_cover::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land_cover::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land_cover::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="land_cover::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="land_cover::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="land_cover::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="land_cover::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="land_cover::cartography.prominence:bounds", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 0), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="land_cover::cartography.prominence:bounds_1", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 101), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="land_cover::cartography.min_zoom:bounds", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", -1), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="land_cover::cartography.min_zoom:bounds_1", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", 24), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="land_cover::cartography.max_zoom:bounds", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", -1), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="land_cover::cartography.max_zoom:bounds_1", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", 24), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="land_cover::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return land_cover_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_COVER_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="land_cover", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_COVER_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="land_cover", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("land_cover::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("land_cover::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py new file mode 100644 index 000000000..f19165178 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py @@ -0,0 +1,650 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for land_use.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.land_use import ( + LAND_USE_SCHEMA, + land_use_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "fe1e5b5f-3ae6-5c23-ba83-444a90ccd659", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "land_use", + "version": 0, + "class": "aboriginal_land", + "subtype": "agriculture", +} + + +BASE_ROW_POPULATED: dict = { + "id": "fe1e5b5f-3ae6-5c23-ba83-444a90ccd659", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "land_use", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "class": "aboriginal_land", + "subtype": "agriculture", + "elevation": 9000, + "surface": "asphalt", + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "level": 0, + "source_tags": {}, + "wikidata": "Q42", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="land_use::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="land_use::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="land_use::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="land_use::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="land_use::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="land_use::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="land_use::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="land_use::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "GEOMETRYCOLLECTION EMPTY"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="land_use::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="land_use::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="land_use::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="land_use::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="land_use::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="land_use::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="land_use::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="land_use::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="land_use::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="land_use::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="land_use::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="land_use::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land_use::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land_use::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="land_use::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="land_use::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="land_use::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="land_use::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="land_use::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="land_use::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="land_use::elevation:bounds", + scaffold={"elevation": 9000}, + mutate=set_at_path("elevation", 9001), + expected_field="elevation", + expected_check="bounds", + ), + Scenario( + id="land_use::surface:enum", + scaffold={"surface": "asphalt"}, + mutate=set_at_path("surface", "__INVALID__"), + expected_field="surface", + expected_check="enum", + ), + Scenario( + id="land_use::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="land_use::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="land_use::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="land_use::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="land_use::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="land_use::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="land_use::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="land_use::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="land_use::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="land_use::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="land_use::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="land_use::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="land_use::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="land_use::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="land_use::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="land_use::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="land_use::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="land_use::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="land_use::wikidata:wikidata_id", + scaffold={"wikidata": "Q42"}, + mutate=set_at_path("wikidata", "P999"), + expected_field="wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="land_use::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="land_use::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return land_use_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_USE_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="land_use", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_USE_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="land_use", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("land_use::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("land_use::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py new file mode 100644 index 000000000..1c460c47f --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py @@ -0,0 +1,620 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for water.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.water import ( + WATER_SCHEMA, + water_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "a7a5e73a-79c0-55d7-ab4d-5f9fc65fe915", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "water", + "version": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "a7a5e73a-79c0-55d7-ab4d-5f9fc65fe915", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "water", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "class": "basin", + "subtype": "canal", + "is_intermittent": False, + "is_salt": False, + "level": 0, + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "source_tags": {}, + "wikidata": "Q42", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="water::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="water::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="water::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="water::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="water::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="water::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="water::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="water::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "GEOMETRYCOLLECTION EMPTY"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="water::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="water::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="water::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="water::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="water::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="water::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="water::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="water::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="water::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="water::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="water::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="water::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="water::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="water::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="water::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="water::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="water::class:enum", + scaffold={"class": "basin"}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="water::subtype:enum", + scaffold={"subtype": "canal"}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="water::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="water::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="water::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="water::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="water::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="water::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="water::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="water::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="water::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="water::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="water::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="water::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="water::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="water::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="water::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="water::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="water::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="water::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="water::wikidata:wikidata_id", + scaffold={"wikidata": "Q42"}, + mutate=set_at_path("wikidata", "P999"), + expected_field="wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="water::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="water::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return water_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + WATER_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="water", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + WATER_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="water", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("water::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("water::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py new file mode 100644 index 000000000..ebfd4a131 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py @@ -0,0 +1,708 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for building.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.buildings.building import ( + BUILDING_SCHEMA, + building_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "f59ea25f-5910-56e0-b595-25dd9d65ef4b", + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "buildings", + "type": "building", + "version": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "f59ea25f-5910-56e0-b595-25dd9d65ef4b", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "buildings", + "type": "building", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "agricultural", + "class": "agricultural", + "has_parts": False, + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "level": 0, + "height": 1.0, + "is_underground": False, + "num_floors": 1, + "num_floors_underground": 1, + "min_height": 0.0, + "min_floor": 1, + "facade_color": "#aabbcc", + "facade_material": "brick", + "roof_material": "concrete", + "roof_shape": "dome", + "roof_direction": 0.0, + "roof_orientation": "across", + "roof_color": "#aabbcc", + "roof_height": 0.0, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="building::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="building::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="building::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="building::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="building::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="building::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="building::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="building::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="building::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="building::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="building::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="building::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="building::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="building::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="building::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="building::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="building::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="building::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="building::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="building::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="building::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="building::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="building::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="building::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="building::subtype:enum", + scaffold={"subtype": "agricultural"}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="building::class:enum", + scaffold={"class": "agricultural"}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="building::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="building::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="building::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="building::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="building::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="building::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="building::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="building::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="building::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="building::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="building::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="building::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="building::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="building::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="building::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="building::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="building::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="building::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="building::height:bounds", + scaffold={"height": 1.0}, + mutate=set_at_path("height", 0.0), + expected_field="height", + expected_check="bounds", + ), + Scenario( + id="building::num_floors:bounds", + scaffold={"num_floors": 1}, + mutate=set_at_path("num_floors", 0), + expected_field="num_floors", + expected_check="bounds", + ), + Scenario( + id="building::num_floors_underground:bounds", + scaffold={"num_floors_underground": 1}, + mutate=set_at_path("num_floors_underground", 0), + expected_field="num_floors_underground", + expected_check="bounds", + ), + Scenario( + id="building::min_floor:bounds", + scaffold={"min_floor": 1}, + mutate=set_at_path("min_floor", 0), + expected_field="min_floor", + expected_check="bounds", + ), + Scenario( + id="building::facade_color:hex_color", + scaffold={"facade_color": "#aabbcc"}, + mutate=set_at_path("facade_color", "not-hex"), + expected_field="facade_color", + expected_check="hex_color", + ), + Scenario( + id="building::facade_material:enum", + scaffold={"facade_material": "brick"}, + mutate=set_at_path("facade_material", "__INVALID__"), + expected_field="facade_material", + expected_check="enum", + ), + Scenario( + id="building::roof_material:enum", + scaffold={"roof_material": "concrete"}, + mutate=set_at_path("roof_material", "__INVALID__"), + expected_field="roof_material", + expected_check="enum", + ), + Scenario( + id="building::roof_shape:enum", + scaffold={"roof_shape": "dome"}, + mutate=set_at_path("roof_shape", "__INVALID__"), + expected_field="roof_shape", + expected_check="enum", + ), + Scenario( + id="building::roof_direction:bounds", + scaffold={"roof_direction": 0.0}, + mutate=set_at_path("roof_direction", -1.0), + expected_field="roof_direction", + expected_check="bounds", + ), + Scenario( + id="building::roof_direction:bounds_1", + scaffold={"roof_direction": 0.0}, + mutate=set_at_path("roof_direction", 360.0), + expected_field="roof_direction", + expected_check="bounds", + ), + Scenario( + id="building::roof_orientation:enum", + scaffold={"roof_orientation": "across"}, + mutate=set_at_path("roof_orientation", "__INVALID__"), + expected_field="roof_orientation", + expected_check="enum", + ), + Scenario( + id="building::roof_color:hex_color", + scaffold={"roof_color": "#aabbcc"}, + mutate=set_at_path("roof_color", "not-hex"), + expected_field="roof_color", + expected_check="hex_color", + ), + Scenario( + id="building::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="building::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return building_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BUILDING_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="building", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BUILDING_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="building", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("building::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("building::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py new file mode 100644 index 000000000..73ab44863 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py @@ -0,0 +1,714 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for building_part.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.buildings.building_part import ( + BUILDING_PART_SCHEMA, + building_part_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "c039cf20-2e1c-5116-a393-4d834e447d46", + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "buildings", + "type": "building_part", + "version": 0, + "building_id": "a", +} + + +BASE_ROW_POPULATED: dict = { + "id": "c039cf20-2e1c-5116-a393-4d834e447d46", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "buildings", + "type": "building_part", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "building_id": "a", + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "level": 0, + "height": 1.0, + "is_underground": False, + "num_floors": 1, + "num_floors_underground": 1, + "min_height": 0.0, + "min_floor": 1, + "facade_color": "#aabbcc", + "facade_material": "brick", + "roof_material": "concrete", + "roof_shape": "dome", + "roof_direction": 0.0, + "roof_orientation": "across", + "roof_color": "#aabbcc", + "roof_height": 0.0, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="building_part::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="building_part::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="building_part::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="building_part::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="building_part::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="building_part::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="building_part::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="building_part::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="building_part::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="building_part::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="building_part::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="building_part::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="building_part::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="building_part::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="building_part::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="building_part::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="building_part::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="building_part::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="building_part::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="building_part::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="building_part::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="building_part::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="building_part::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="building_part::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="building_part::building_id:required", + scaffold={}, + mutate=set_at_path("building_id", None), + expected_field="building_id", + expected_check="required", + ), + Scenario( + id="building_part::building_id:string_min_length", + scaffold={}, + mutate=set_at_path("building_id", ""), + expected_field="building_id", + expected_check="string_min_length", + ), + Scenario( + id="building_part::building_id:no_whitespace", + scaffold={}, + mutate=set_at_path("building_id", "has whitespace"), + expected_field="building_id", + expected_check="no_whitespace", + ), + Scenario( + id="building_part::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="building_part::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="building_part::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="building_part::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="building_part::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="building_part::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="building_part::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="building_part::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="building_part::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="building_part::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="building_part::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="building_part::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="building_part::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="building_part::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="building_part::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="building_part::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="building_part::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="building_part::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="building_part::height:bounds", + scaffold={"height": 1.0}, + mutate=set_at_path("height", 0.0), + expected_field="height", + expected_check="bounds", + ), + Scenario( + id="building_part::num_floors:bounds", + scaffold={"num_floors": 1}, + mutate=set_at_path("num_floors", 0), + expected_field="num_floors", + expected_check="bounds", + ), + Scenario( + id="building_part::num_floors_underground:bounds", + scaffold={"num_floors_underground": 1}, + mutate=set_at_path("num_floors_underground", 0), + expected_field="num_floors_underground", + expected_check="bounds", + ), + Scenario( + id="building_part::min_floor:bounds", + scaffold={"min_floor": 1}, + mutate=set_at_path("min_floor", 0), + expected_field="min_floor", + expected_check="bounds", + ), + Scenario( + id="building_part::facade_color:hex_color", + scaffold={"facade_color": "#aabbcc"}, + mutate=set_at_path("facade_color", "not-hex"), + expected_field="facade_color", + expected_check="hex_color", + ), + Scenario( + id="building_part::facade_material:enum", + scaffold={"facade_material": "brick"}, + mutate=set_at_path("facade_material", "__INVALID__"), + expected_field="facade_material", + expected_check="enum", + ), + Scenario( + id="building_part::roof_material:enum", + scaffold={"roof_material": "concrete"}, + mutate=set_at_path("roof_material", "__INVALID__"), + expected_field="roof_material", + expected_check="enum", + ), + Scenario( + id="building_part::roof_shape:enum", + scaffold={"roof_shape": "dome"}, + mutate=set_at_path("roof_shape", "__INVALID__"), + expected_field="roof_shape", + expected_check="enum", + ), + Scenario( + id="building_part::roof_direction:bounds", + scaffold={"roof_direction": 0.0}, + mutate=set_at_path("roof_direction", -1.0), + expected_field="roof_direction", + expected_check="bounds", + ), + Scenario( + id="building_part::roof_direction:bounds_1", + scaffold={"roof_direction": 0.0}, + mutate=set_at_path("roof_direction", 360.0), + expected_field="roof_direction", + expected_check="bounds", + ), + Scenario( + id="building_part::roof_orientation:enum", + scaffold={"roof_orientation": "across"}, + mutate=set_at_path("roof_orientation", "__INVALID__"), + expected_field="roof_orientation", + expected_check="enum", + ), + Scenario( + id="building_part::roof_color:hex_color", + scaffold={"roof_color": "#aabbcc"}, + mutate=set_at_path("roof_color", "not-hex"), + expected_field="roof_color", + expected_check="hex_color", + ), + Scenario( + id="building_part::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="building_part::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return building_part_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BUILDING_PART_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="building_part", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BUILDING_PART_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="building_part", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("building_part::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("building_part::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py new file mode 100644 index 000000000..399495474 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py @@ -0,0 +1,1049 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for division.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.divisions.division import ( + DIVISION_SCHEMA, + division_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_forbid_if, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "names": {"primary": "a"}, + "id": "97a2a97d-1eb8-5161-9ae5-bfb82594ed67", + "geometry": "POINT (0 0)", + "theme": "divisions", + "type": "division", + "version": 0, + "subtype": "country", + "country": "US", + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]], + "admin_level": 0, +} + + +BASE_ROW_POPULATED: dict = { + "cartography": {"prominence": 1, "min_zoom": 0, "max_zoom": 0, "sort_key": 0}, + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "id": "97a2a97d-1eb8-5161-9ae5-bfb82594ed67", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "POINT (0 0)", + "theme": "divisions", + "type": "division", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "country", + "country": "US", + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]], + "admin_level": 0, + "class": "megacity", + "local_type": {}, + "region": "US-CA", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "norms": {"driving_side": "left"}, + "population": 0, + "capital_division_ids": ["a"], + "capital_of_divisions": [{"division_id": "a", "subtype": "country"}], + "wikidata": "Q42", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="division::cartography.prominence:bounds", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 0), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="division::cartography.prominence:bounds_1", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 101), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="division::cartography.min_zoom:bounds", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", -1), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="division::cartography.min_zoom:bounds_1", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", 24), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="division::cartography.max_zoom:bounds", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", -1), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="division::cartography.max_zoom:bounds_1", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", 24), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="division::names:required", + scaffold={}, + mutate=set_at_path("names", None), + expected_field="names", + expected_check="required", + ), + Scenario( + id="division::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="division::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="division::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="division::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="division::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="division::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="division::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="division::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="division::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="division::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="division::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="division::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="division::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="division::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="division::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="division::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="division::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="division::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="division::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="division::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="division::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="division::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="division::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="division::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="division::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "LINESTRING (0 0, 1 1)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="division::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="division::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="division::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="division::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="division::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="division::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="division::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="division::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="division::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="division::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="division::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="division::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="division::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="division::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="division::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="division::country:required", + scaffold={}, + mutate=set_at_path("country", None), + expected_field="country", + expected_check="required", + ), + Scenario( + id="division::country:country_code_alpha2", + scaffold={}, + mutate=set_at_path("country", "99"), + expected_field="country", + expected_check="country_code_alpha2", + ), + Scenario( + id="division::hierarchies:required", + scaffold={}, + mutate=set_at_path("hierarchies", None), + expected_field="hierarchies", + expected_check="required", + ), + Scenario( + id="division::hierarchies_min_length:array_min_length", + scaffold={}, + mutate=set_at_path("hierarchies", []), + expected_field="hierarchies_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::hierarchies[]_min_length:array_min_length", + scaffold={}, + mutate=set_at_path("hierarchies[]", []), + expected_field="hierarchies[]_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::hierarchies[][].division_id:required", + scaffold={ + "hierarchies": [[{"subtype": "country", "name": "a", "division_id": "a"}]] + }, + mutate=set_at_path("hierarchies[][].division_id", None), + expected_field="hierarchies[][].division_id", + expected_check="required", + ), + Scenario( + id="division::hierarchies[][].division_id:string_min_length", + scaffold={ + "hierarchies": [[{"subtype": "country", "name": "a", "division_id": "a"}]] + }, + mutate=set_at_path("hierarchies[][].division_id", ""), + expected_field="hierarchies[][].division_id", + expected_check="string_min_length", + ), + Scenario( + id="division::hierarchies[][].division_id:no_whitespace", + scaffold={ + "hierarchies": [[{"subtype": "country", "name": "a", "division_id": "a"}]] + }, + mutate=set_at_path("hierarchies[][].division_id", "has whitespace"), + expected_field="hierarchies[][].division_id", + expected_check="no_whitespace", + ), + Scenario( + id="division::hierarchies[][].subtype:required", + scaffold={ + "hierarchies": [[{"division_id": "a", "name": "a", "subtype": "country"}]] + }, + mutate=set_at_path("hierarchies[][].subtype", None), + expected_field="hierarchies[][].subtype", + expected_check="required", + ), + Scenario( + id="division::hierarchies[][].subtype:enum", + scaffold={ + "hierarchies": [[{"division_id": "a", "name": "a", "subtype": "country"}]] + }, + mutate=set_at_path("hierarchies[][].subtype", "__INVALID__"), + expected_field="hierarchies[][].subtype", + expected_check="enum", + ), + Scenario( + id="division::hierarchies[][].name:required", + scaffold={ + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]] + }, + mutate=set_at_path("hierarchies[][].name", None), + expected_field="hierarchies[][].name", + expected_check="required", + ), + Scenario( + id="division::hierarchies[][].name:string_min_length", + scaffold={ + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]] + }, + mutate=set_at_path("hierarchies[][].name", ""), + expected_field="hierarchies[][].name", + expected_check="string_min_length", + ), + Scenario( + id="division::hierarchies[][].name:stripped", + scaffold={ + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]] + }, + mutate=set_at_path("hierarchies[][].name", " has spaces "), + expected_field="hierarchies[][].name", + expected_check="stripped", + ), + Scenario( + id="division::parent_division_id:string_min_length", + scaffold={"parent_division_id": "a"}, + mutate=set_at_path("parent_division_id", ""), + expected_field="parent_division_id", + expected_check="string_min_length", + ), + Scenario( + id="division::parent_division_id:no_whitespace", + scaffold={"parent_division_id": "a"}, + mutate=set_at_path("parent_division_id", "has whitespace"), + expected_field="parent_division_id", + expected_check="no_whitespace", + ), + Scenario( + id="division::admin_level:bounds", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", -1), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division::admin_level:bounds_1", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", 17), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division::class:enum", + scaffold={"class": "megacity"}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="division::region:region_code", + scaffold={"region": "US-CA"}, + mutate=set_at_path("region", "99-999"), + expected_field="region", + expected_check="region_code", + ), + Scenario( + id="division::perspectives.mode:required", + scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + mutate=set_at_path("perspectives.mode", None), + expected_field="perspectives.mode", + expected_check="required", + ), + Scenario( + id="division::perspectives.mode:enum", + scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + mutate=set_at_path("perspectives.mode", "__INVALID__"), + expected_field="perspectives.mode", + expected_check="enum", + ), + Scenario( + id="division::perspectives.countries:required", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries", None), + expected_field="perspectives.countries", + expected_check="required", + ), + Scenario( + id="division::perspectives.countries_min_length:array_min_length", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries", []), + expected_field="perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::perspectives.countries[]:country_code_alpha2", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries[]", "99"), + expected_field="perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="division::norms.driving_side:enum", + scaffold={"norms": {"driving_side": "left"}}, + mutate=set_at_path("norms.driving_side", "__INVALID__"), + expected_field="norms.driving_side", + expected_check="enum", + ), + Scenario( + id="division::population:bounds", + scaffold={"population": 0}, + mutate=set_at_path("population", -1), + expected_field="population", + expected_check="bounds", + ), + Scenario( + id="division::capital_division_ids_min_length:array_min_length", + scaffold={"capital_division_ids": ["a"]}, + mutate=set_at_path("capital_division_ids", []), + expected_field="capital_division_ids_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::capital_division_ids[]:string_min_length", + scaffold={"capital_division_ids": ["a"]}, + mutate=set_at_path("capital_division_ids[]", ""), + expected_field="capital_division_ids[]", + expected_check="string_min_length", + ), + Scenario( + id="division::capital_division_ids[]:no_whitespace", + scaffold={"capital_division_ids": ["a"]}, + mutate=set_at_path("capital_division_ids[]", "has whitespace"), + expected_field="capital_division_ids[]", + expected_check="no_whitespace", + ), + Scenario( + id="division::capital_of_divisions_min_length:array_min_length", + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, + mutate=set_at_path("capital_of_divisions", []), + expected_field="capital_of_divisions_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::capital_of_divisions[].division_id:required", + scaffold={"capital_of_divisions": [{"subtype": "country", "division_id": "a"}]}, + mutate=set_at_path("capital_of_divisions[].division_id", None), + expected_field="capital_of_divisions[].division_id", + expected_check="required", + ), + Scenario( + id="division::capital_of_divisions[].division_id:string_min_length", + scaffold={"capital_of_divisions": [{"subtype": "country", "division_id": "a"}]}, + mutate=set_at_path("capital_of_divisions[].division_id", ""), + expected_field="capital_of_divisions[].division_id", + expected_check="string_min_length", + ), + Scenario( + id="division::capital_of_divisions[].division_id:no_whitespace", + scaffold={"capital_of_divisions": [{"subtype": "country", "division_id": "a"}]}, + mutate=set_at_path("capital_of_divisions[].division_id", "has whitespace"), + expected_field="capital_of_divisions[].division_id", + expected_check="no_whitespace", + ), + Scenario( + id="division::capital_of_divisions[].subtype:required", + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, + mutate=set_at_path("capital_of_divisions[].subtype", None), + expected_field="capital_of_divisions[].subtype", + expected_check="required", + ), + Scenario( + id="division::capital_of_divisions[].subtype:enum", + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, + mutate=set_at_path("capital_of_divisions[].subtype", "__INVALID__"), + expected_field="capital_of_divisions[].subtype", + expected_check="enum", + ), + Scenario( + id="division::wikidata:wikidata_id", + scaffold={"wikidata": "Q42"}, + mutate=set_at_path("wikidata", "P999"), + expected_field="wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="division::model:require_if:0", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "county"), + expected_field="admin_level_required_0", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:1", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macrocounty" + ), + expected_field="admin_level_required_1", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:2", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "region"), + expected_field="admin_level_required_2", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:3", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macroregion" + ), + expected_field="admin_level_required_3", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:4", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "dependency" + ), + expected_field="admin_level_required_4", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:5", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "country" + ), + expected_field="admin_level_required_5", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:6", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["parent_division_id"], "subtype", "country", negate=True + ), + expected_field="parent_division_id_required", + expected_check="require_if", + ), + Scenario( + id="division::model:forbid_if:7", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, ["parent_division_id"], "subtype", "country" + ), + expected_field="parent_division_id_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="division::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::hierarchies_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "hierarchies"), + expected_field="hierarchies_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::hierarchies[]_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "hierarchies[]"), + expected_field="hierarchies[]_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::perspectives.countries_unique:struct_unique", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), + expected_field="perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::capital_division_ids_unique:struct_unique", + scaffold={"capital_division_ids": ["a"]}, + mutate=lambda row: mutate_unique_items(row, "capital_division_ids"), + expected_field="capital_division_ids_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::capital_of_divisions_unique:struct_unique", + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, + mutate=lambda row: mutate_unique_items(row, "capital_of_divisions"), + expected_field="capital_of_divisions_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return division_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="division", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="division", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("division::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("division::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py new file mode 100644 index 000000000..9f4d8e2f8 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py @@ -0,0 +1,759 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for division_area.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.divisions.division_area import ( + DIVISION_AREA_SCHEMA, + division_area_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_radio_group, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "names": {"primary": "a"}, + "id": "4619f66f-2d01-5776-ba67-01e9f3ccd9d7", + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "divisions", + "type": "division_area", + "version": 0, + "subtype": "country", + "class": "land", + "division_id": "a", + "country": "US", + "is_land": True, + "admin_level": 0, +} + + +BASE_ROW_POPULATED: dict = { + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "id": "4619f66f-2d01-5776-ba67-01e9f3ccd9d7", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "divisions", + "type": "division_area", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "country", + "class": "land", + "is_land": True, + "is_territorial": False, + "division_id": "a", + "country": "US", + "region": "US-CA", + "admin_level": 0, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="division_area::names:required", + scaffold={}, + mutate=set_at_path("names", None), + expected_field="names", + expected_check="required", + ), + Scenario( + id="division_area::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="division_area::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="division_area::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="division_area::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="division_area::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="division_area::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="division_area::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="division_area::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="division_area::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="division_area::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="division_area::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="division_area::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="division_area::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division_area::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="division_area::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="division_area::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="division_area::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="division_area::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="division_area::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="division_area::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="division_area::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="division_area::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="division_area::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="division_area::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="division_area::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="division_area::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="division_area::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="division_area::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="division_area::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="division_area::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="division_area::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="division_area::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="division_area::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division_area::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="division_area::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="division_area::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="division_area::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="division_area::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division_area::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division_area::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="division_area::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="division_area::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="division_area::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="division_area::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="division_area::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="division_area::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="division_area::division_id:required", + scaffold={}, + mutate=set_at_path("division_id", None), + expected_field="division_id", + expected_check="required", + ), + Scenario( + id="division_area::division_id:string_min_length", + scaffold={}, + mutate=set_at_path("division_id", ""), + expected_field="division_id", + expected_check="string_min_length", + ), + Scenario( + id="division_area::division_id:no_whitespace", + scaffold={}, + mutate=set_at_path("division_id", "has whitespace"), + expected_field="division_id", + expected_check="no_whitespace", + ), + Scenario( + id="division_area::country:required", + scaffold={}, + mutate=set_at_path("country", None), + expected_field="country", + expected_check="required", + ), + Scenario( + id="division_area::country:country_code_alpha2", + scaffold={}, + mutate=set_at_path("country", "99"), + expected_field="country", + expected_check="country_code_alpha2", + ), + Scenario( + id="division_area::region:region_code", + scaffold={"region": "US-CA"}, + mutate=set_at_path("region", "99-999"), + expected_field="region", + expected_check="region_code", + ), + Scenario( + id="division_area::admin_level:bounds", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", -1), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division_area::admin_level:bounds_1", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", 17), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division_area::model:radio_group:0", + scaffold={}, + mutate=lambda row: mutate_radio_group(row, ["is_land", "is_territorial"]), + expected_field="radio_group", + expected_check="radio_group", + ), + Scenario( + id="division_area::model:require_if:1", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "county"), + expected_field="admin_level_required_0", + expected_check="require_if", + ), + Scenario( + id="division_area::model:require_if:2", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macrocounty" + ), + expected_field="admin_level_required_1", + expected_check="require_if", + ), + Scenario( + id="division_area::model:require_if:3", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "region"), + expected_field="admin_level_required_2", + expected_check="require_if", + ), + Scenario( + id="division_area::model:require_if:4", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macroregion" + ), + expected_field="admin_level_required_3", + expected_check="require_if", + ), + Scenario( + id="division_area::model:require_if:5", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "dependency" + ), + expected_field="admin_level_required_4", + expected_check="require_if", + ), + Scenario( + id="division_area::model:require_if:6", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "country" + ), + expected_field="admin_level_required_5", + expected_check="require_if", + ), + Scenario( + id="division_area::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="division_area::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return division_area_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_AREA_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="division_area", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_AREA_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="division_area", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("division_area::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("division_area::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py new file mode 100644 index 000000000..27e05e731 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py @@ -0,0 +1,574 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for division_boundary.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.divisions.division_boundary import ( + DIVISION_BOUNDARY_SCHEMA, + division_boundary_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_forbid_if, + mutate_radio_group, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "3c9e8190-33ce-5962-9668-d467336901b4", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "divisions", + "type": "division_boundary", + "version": 0, + "subtype": "country", + "class": "land", + "division_ids": ["a", "a1"], + "is_land": True, + "admin_level": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "3c9e8190-33ce-5962-9668-d467336901b4", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "divisions", + "type": "division_boundary", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "country", + "class": "land", + "is_land": True, + "is_territorial": False, + "division_ids": ["a", "a1"], + "region": "US-CA", + "admin_level": 0, + "is_disputed": False, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="division_boundary::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="division_boundary::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="division_boundary::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="division_boundary::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="division_boundary::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="division_boundary::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="division_boundary::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="division_boundary::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="division_boundary::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="division_boundary::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="division_boundary::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="division_boundary::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="division_boundary::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="division_boundary::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="division_boundary::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division_boundary::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="division_boundary::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="division_boundary::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="division_boundary::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="division_boundary::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division_boundary::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division_boundary::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="division_boundary::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="division_boundary::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="division_boundary::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="division_boundary::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="division_boundary::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="division_boundary::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="division_boundary::division_ids:required", + scaffold={}, + mutate=set_at_path("division_ids", None), + expected_field="division_ids", + expected_check="required", + ), + Scenario( + id="division_boundary::division_ids_min_length:array_min_length", + scaffold={}, + mutate=set_at_path("division_ids", []), + expected_field="division_ids_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division_boundary::division_ids_max_length:array_max_length", + scaffold={}, + mutate=set_at_path("division_ids", [{}, {}, {}]), + expected_field="division_ids_max_length", + expected_check="array_max_length", + ), + Scenario( + id="division_boundary::division_ids[]:string_min_length", + scaffold={}, + mutate=set_at_path("division_ids[]", ""), + expected_field="division_ids[]", + expected_check="string_min_length", + ), + Scenario( + id="division_boundary::division_ids[]:no_whitespace", + scaffold={}, + mutate=set_at_path("division_ids[]", "has whitespace"), + expected_field="division_ids[]", + expected_check="no_whitespace", + ), + Scenario( + id="division_boundary::country:country_code_alpha2", + scaffold={"country": "US"}, + mutate=set_at_path("country", "99"), + expected_field="country", + expected_check="country_code_alpha2", + ), + Scenario( + id="division_boundary::region:region_code", + scaffold={"region": "US-CA"}, + mutate=set_at_path("region", "99-999"), + expected_field="region", + expected_check="region_code", + ), + Scenario( + id="division_boundary::admin_level:bounds", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", -1), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division_boundary::admin_level:bounds_1", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", 17), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division_boundary::perspectives.mode:required", + scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + mutate=set_at_path("perspectives.mode", None), + expected_field="perspectives.mode", + expected_check="required", + ), + Scenario( + id="division_boundary::perspectives.mode:enum", + scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + mutate=set_at_path("perspectives.mode", "__INVALID__"), + expected_field="perspectives.mode", + expected_check="enum", + ), + Scenario( + id="division_boundary::perspectives.countries:required", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries", None), + expected_field="perspectives.countries", + expected_check="required", + ), + Scenario( + id="division_boundary::perspectives.countries_min_length:array_min_length", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries", []), + expected_field="perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division_boundary::perspectives.countries[]:country_code_alpha2", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries[]", "99"), + expected_field="perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="division_boundary::model:radio_group:0", + scaffold={}, + mutate=lambda row: mutate_radio_group(row, ["is_land", "is_territorial"]), + expected_field="radio_group", + expected_check="radio_group", + ), + Scenario( + id="division_boundary::model:require_if:1", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "county"), + expected_field="admin_level_required_0", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:2", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macrocounty" + ), + expected_field="admin_level_required_1", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:3", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "region"), + expected_field="admin_level_required_2", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:4", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macroregion" + ), + expected_field="admin_level_required_3", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:5", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "dependency" + ), + expected_field="admin_level_required_4", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:6", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "country" + ), + expected_field="admin_level_required_5", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:7", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["country"], "subtype", "country", negate=True + ), + expected_field="country_required", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:forbid_if:8", + scaffold={}, + mutate=lambda row: mutate_forbid_if(row, ["country"], "subtype", "country"), + expected_field="country_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="division_boundary::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="division_boundary::division_ids_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "division_ids"), + expected_field="division_ids_unique", + expected_check="struct_unique", + ), + Scenario( + id="division_boundary::perspectives.countries_unique:struct_unique", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), + expected_field="perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return division_boundary_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_BOUNDARY_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="division_boundary", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_BOUNDARY_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="division_boundary", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("division_boundary::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("division_boundary::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py new file mode 100644 index 000000000..ad8fc0002 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py @@ -0,0 +1,1207 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for place.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.places.place import ( + PLACE_SCHEMA, + place_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "771dc733-3cd9-5ec4-a0b9-946ff01afb4e", + "geometry": "POINT (0 0)", + "theme": "places", + "type": "place", + "version": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "771dc733-3cd9-5ec4-a0b9-946ff01afb4e", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "POINT (0 0)", + "theme": "places", + "type": "place", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "operating_status": "open", + "categories": {"primary": "snake_case", "alternate": ["snake_case"]}, + "basic_category": "snake_case", + "taxonomy": { + "primary": "snake_case", + "hierarchy": ["snake_case"], + "alternates": ["snake_case"], + }, + "confidence": 0.0, + "websites": ["https://example.com/"], + "socials": ["https://example.com/"], + "emails": ["user@example.com"], + "phones": ["+1 555-555-5555"], + "brand": { + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "wikidata": "Q42", + }, + "addresses": [ + { + "freeform": "", + "locality": "", + "postcode": "", + "region": "US-CA", + "country": "US", + } + ], + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="place::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="place::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="place::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="place::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="place::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="place::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="place::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="place::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "LINESTRING (0 0, 1 1)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="place::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="place::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="place::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="place::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="place::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="place::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="place::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="place::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="place::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="place::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="place::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="place::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="place::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="place::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="place::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="place::operating_status:enum", + scaffold={"operating_status": "open"}, + mutate=set_at_path("operating_status", "__INVALID__"), + expected_field="operating_status", + expected_check="enum", + ), + Scenario( + id="place::categories.primary:required", + scaffold={"categories": {"primary": "snake_case"}}, + mutate=set_at_path("categories.primary", None), + expected_field="categories.primary", + expected_check="required", + ), + Scenario( + id="place::categories.primary:snake_case", + scaffold={"categories": {"primary": "snake_case"}}, + mutate=set_at_path("categories.primary", "HAS SPACES"), + expected_field="categories.primary", + expected_check="snake_case", + ), + Scenario( + id="place::categories.alternate[]:snake_case", + scaffold={"categories": {"primary": "snake_case", "alternate": ["snake_case"]}}, + mutate=set_at_path("categories.alternate[]", "HAS SPACES"), + expected_field="categories.alternate[]", + expected_check="snake_case", + ), + Scenario( + id="place::basic_category:snake_case", + scaffold={"basic_category": "snake_case"}, + mutate=set_at_path("basic_category", "HAS SPACES"), + expected_field="basic_category", + expected_check="snake_case", + ), + Scenario( + id="place::taxonomy.primary:required", + scaffold={"taxonomy": {"hierarchy": ["snake_case"], "primary": "snake_case"}}, + mutate=set_at_path("taxonomy.primary", None), + expected_field="taxonomy.primary", + expected_check="required", + ), + Scenario( + id="place::taxonomy.primary:snake_case", + scaffold={"taxonomy": {"hierarchy": ["snake_case"], "primary": "snake_case"}}, + mutate=set_at_path("taxonomy.primary", "HAS SPACES"), + expected_field="taxonomy.primary", + expected_check="snake_case", + ), + Scenario( + id="place::taxonomy.hierarchy:required", + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, + mutate=set_at_path("taxonomy.hierarchy", None), + expected_field="taxonomy.hierarchy", + expected_check="required", + ), + Scenario( + id="place::taxonomy.hierarchy_min_length:array_min_length", + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, + mutate=set_at_path("taxonomy.hierarchy", []), + expected_field="taxonomy.hierarchy_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::taxonomy.hierarchy[]:snake_case", + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, + mutate=set_at_path("taxonomy.hierarchy[]", "HAS SPACES"), + expected_field="taxonomy.hierarchy[]", + expected_check="snake_case", + ), + Scenario( + id="place::taxonomy.alternates_min_length:array_min_length", + scaffold={ + "taxonomy": { + "primary": "snake_case", + "hierarchy": ["snake_case"], + "alternates": ["snake_case"], + } + }, + mutate=set_at_path("taxonomy.alternates", []), + expected_field="taxonomy.alternates_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::taxonomy.alternates[]:snake_case", + scaffold={ + "taxonomy": { + "primary": "snake_case", + "hierarchy": ["snake_case"], + "alternates": ["snake_case"], + } + }, + mutate=set_at_path("taxonomy.alternates[]", "HAS SPACES"), + expected_field="taxonomy.alternates[]", + expected_check="snake_case", + ), + Scenario( + id="place::confidence:bounds", + scaffold={"confidence": 0.0}, + mutate=set_at_path("confidence", -1.0), + expected_field="confidence", + expected_check="bounds", + ), + Scenario( + id="place::confidence:bounds_1", + scaffold={"confidence": 0.0}, + mutate=set_at_path("confidence", 2.0), + expected_field="confidence", + expected_check="bounds", + ), + Scenario( + id="place::websites_min_length:array_min_length", + scaffold={"websites": ["https://example.com/"]}, + mutate=set_at_path("websites", []), + expected_field="websites_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::websites[]:url_format", + scaffold={"websites": ["https://example.com/"]}, + mutate=set_at_path("websites[]", "not-a-url"), + expected_field="websites[]", + expected_check="url_format", + ), + Scenario( + id="place::websites[]:url_length", + scaffold={"websites": ["https://example.com/"]}, + mutate=set_at_path( + "websites[]", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="websites[]", + expected_check="url_length", + ), + Scenario( + id="place::socials_min_length:array_min_length", + scaffold={"socials": ["https://example.com/"]}, + mutate=set_at_path("socials", []), + expected_field="socials_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::socials[]:url_format", + scaffold={"socials": ["https://example.com/"]}, + mutate=set_at_path("socials[]", "not-a-url"), + expected_field="socials[]", + expected_check="url_format", + ), + Scenario( + id="place::socials[]:url_length", + scaffold={"socials": ["https://example.com/"]}, + mutate=set_at_path( + "socials[]", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="socials[]", + expected_check="url_length", + ), + Scenario( + id="place::emails_min_length:array_min_length", + scaffold={"emails": ["user@example.com"]}, + mutate=set_at_path("emails", []), + expected_field="emails_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::emails[]:email", + scaffold={"emails": ["user@example.com"]}, + mutate=set_at_path("emails[]", "not-an-email"), + expected_field="emails[]", + expected_check="email", + ), + Scenario( + id="place::phones_min_length:array_min_length", + scaffold={"phones": ["+1 555-555-5555"]}, + mutate=set_at_path("phones", []), + expected_field="phones_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::phones[]:phone_number", + scaffold={"phones": ["+1 555-555-5555"]}, + mutate=set_at_path("phones[]", "1234567890"), + expected_field="phones[]", + expected_check="phone_number", + ), + Scenario( + id="place::brand.names.primary:required", + scaffold={"brand": {"names": {"primary": "a"}}}, + mutate=set_at_path("brand.names.primary", None), + expected_field="brand.names.primary", + expected_check="required", + ), + Scenario( + id="place::brand.names.primary:string_min_length", + scaffold={"brand": {"names": {"primary": "a"}}}, + mutate=set_at_path("brand.names.primary", ""), + expected_field="brand.names.primary", + expected_check="string_min_length", + ), + Scenario( + id="place::brand.names.primary:stripped", + scaffold={"brand": {"names": {"primary": "a"}}}, + mutate=set_at_path("brand.names.primary", " has spaces "), + expected_field="brand.names.primary", + expected_check="stripped", + ), + Scenario( + id="place::brand.names.rules[].value:required", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"variant": "common", "value": "a"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].value", None), + expected_field="brand.names.rules[].value", + expected_check="required", + ), + Scenario( + id="place::brand.names.rules[].value:string_min_length", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"variant": "common", "value": "a"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].value", ""), + expected_field="brand.names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="place::brand.names.rules[].value:stripped", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"variant": "common", "value": "a"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].value", " has spaces "), + expected_field="brand.names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="place::brand.names.rules[].variant:required", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].variant", None), + expected_field="brand.names.rules[].variant", + expected_check="required", + ), + Scenario( + id="place::brand.names.rules[].variant:enum", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].variant", "__INVALID__"), + expected_field="brand.names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="place::brand.names.rules[].language:language_tag", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].language", "123"), + expected_field="brand.names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="place::brand.names.rules[].perspectives.mode:required", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "countries": ["US"], + "mode": "accepted_by", + }, + } + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].perspectives.mode", None), + expected_field="brand.names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="place::brand.names.rules[].perspectives.mode:enum", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "countries": ["US"], + "mode": "accepted_by", + }, + } + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].perspectives.mode", "__INVALID__"), + expected_field="brand.names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="place::brand.names.rules[].perspectives.countries:required", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "mode": "accepted_by", + "countries": ["US"], + }, + } + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].perspectives.countries", None), + expected_field="brand.names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="place::brand.names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "mode": "accepted_by", + "countries": ["US"], + }, + } + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].perspectives.countries", []), + expected_field="brand.names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::brand.names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "mode": "accepted_by", + "countries": ["US"], + }, + } + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].perspectives.countries[]", "99"), + expected_field="brand.names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="place::brand.names.rules[].between:linear_range_length", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + {"value": "a", "variant": "common", "between": [0.0, 1.0]} + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].between", [0.5]), + expected_field="brand.names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="place::brand.names.rules[].between:linear_range_bounds", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + {"value": "a", "variant": "common", "between": [0.0, 1.0]} + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].between", [1.5, 2.0]), + expected_field="brand.names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="place::brand.names.rules[].between:linear_range_order", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + {"value": "a", "variant": "common", "between": [0.0, 1.0]} + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].between", [0.8, 0.2]), + expected_field="brand.names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="place::brand.names.rules[].side:enum", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].side", "__INVALID__"), + expected_field="brand.names.rules[].side", + expected_check="enum", + ), + Scenario( + id="place::brand.wikidata:wikidata_id", + scaffold={"brand": {"wikidata": "Q42"}}, + mutate=set_at_path("brand.wikidata", "P999"), + expected_field="brand.wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="place::addresses_min_length:array_min_length", + scaffold={"addresses": [{}]}, + mutate=set_at_path("addresses", []), + expected_field="addresses_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::addresses[].region:region_code", + scaffold={"addresses": [{"region": "US-CA"}]}, + mutate=set_at_path("addresses[].region", "99-999"), + expected_field="addresses[].region", + expected_check="region_code", + ), + Scenario( + id="place::addresses[].country:country_code_alpha2", + scaffold={"addresses": [{"country": "US"}]}, + mutate=set_at_path("addresses[].country", "99"), + expected_field="addresses[].country", + expected_check="country_code_alpha2", + ), + Scenario( + id="place::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="place::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="place::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="place::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="place::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="place::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="place::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="place::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="place::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="place::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="place::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="place::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="place::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="place::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="place::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="place::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="place::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="place::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::categories.alternate_unique:struct_unique", + scaffold={"categories": {"primary": "snake_case", "alternate": ["snake_case"]}}, + mutate=lambda row: mutate_unique_items(row, "categories.alternate"), + expected_field="categories.alternate_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::taxonomy.hierarchy_unique:struct_unique", + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, + mutate=lambda row: mutate_unique_items(row, "taxonomy.hierarchy"), + expected_field="taxonomy.hierarchy_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::taxonomy.alternates_unique:struct_unique", + scaffold={ + "taxonomy": { + "primary": "snake_case", + "hierarchy": ["snake_case"], + "alternates": ["snake_case"], + } + }, + mutate=lambda row: mutate_unique_items(row, "taxonomy.alternates"), + expected_field="taxonomy.alternates_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::websites_unique:struct_unique", + scaffold={"websites": ["https://example.com/"]}, + mutate=lambda row: mutate_unique_items(row, "websites"), + expected_field="websites_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::socials_unique:struct_unique", + scaffold={"socials": ["https://example.com/"]}, + mutate=lambda row: mutate_unique_items(row, "socials"), + expected_field="socials_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::emails_unique:struct_unique", + scaffold={"emails": ["user@example.com"]}, + mutate=lambda row: mutate_unique_items(row, "emails"), + expected_field="emails_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::phones_unique:struct_unique", + scaffold={"phones": ["+1 555-555-5555"]}, + mutate=lambda row: mutate_unique_items(row, "phones"), + expected_field="phones_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::brand.names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "mode": "accepted_by", + "countries": ["US"], + }, + } + ], + } + } + }, + mutate=lambda row: mutate_unique_items( + row, "brand.names.rules[].perspectives.countries" + ), + expected_field="brand.names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return place_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + PLACE_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="place", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + PLACE_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="place", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("place::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("place::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py new file mode 100644 index 000000000..6552a950a --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py @@ -0,0 +1,342 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for connector.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.transportation.connector import ( + CONNECTOR_SCHEMA, + connector_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "257724e0-9751-53b0-9891-95a9ffa523da", + "geometry": "POINT (0 0)", + "theme": "transportation", + "type": "connector", + "version": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "257724e0-9751-53b0-9891-95a9ffa523da", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "POINT (0 0)", + "theme": "transportation", + "type": "connector", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="connector::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="connector::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="connector::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="connector::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="connector::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="connector::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="connector::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="connector::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "LINESTRING (0 0, 1 1)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="connector::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="connector::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="connector::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="connector::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="connector::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="connector::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="connector::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="connector::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="connector::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="connector::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="connector::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="connector::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="connector::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="connector::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="connector::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="connector::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="connector::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return connector_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + CONNECTOR_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="connector", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + CONNECTOR_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="connector", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("connector::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("connector::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py new file mode 100644 index 000000000..22cfd600b --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py @@ -0,0 +1,1676 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for segment.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.transportation.segment import ( + SEGMENT_SCHEMA, + segment_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_forbid_if, + mutate_require_any_of, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "subtype": "rail", + "class": "funicular", +} + + +BASE_ROW_POPULATED: dict = { + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "rail", + "access_restrictions": [ + { + "access_type": "allowed", + "between": [0.0, 1.0], + "when": { + "heading": "forward", + "during": "", + "mode": ["vehicle"], + "using": ["as_customer"], + "recognized": ["as_permitted"], + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ], + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1", "at": 0.0}], + "level_rules": [{"value": 0, "between": [0.0, 1.0]}], + "routes": [ + { + "name": "a", + "network": "a", + "ref": "a", + "symbol": "a", + "wikidata": "Q42", + "between": [0.0, 1.0], + } + ], + "subclass_rules": [{"value": "link", "between": [0.0, 1.0]}], + "class": "funicular", + "rail_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}], +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="segment::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="segment::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="segment::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="segment::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="segment::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="segment::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="segment::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="segment::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="segment::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="segment::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="segment::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="segment::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="segment::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="segment::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="segment::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="segment::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="segment::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="segment::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="segment::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions_min_length:array_min_length", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions", []), + expected_field="access_restrictions_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].access_type:required", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", None), + expected_field="access_restrictions[].access_type", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].access_type:enum", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", "__INVALID__"), + expected_field="access_restrictions[].access_type", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_length", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.5]), + expected_field="access_restrictions[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_bounds", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [1.5, 2.0]), + expected_field="access_restrictions[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_order", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.8, 0.2]), + expected_field="access_restrictions[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::access_restrictions[].when.heading:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"heading": "forward"}} + ] + }, + mutate=set_at_path("access_restrictions[].when.heading", "__INVALID__"), + expected_field="access_restrictions[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.mode_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode", []), + expected_field="access_restrictions[].when.mode_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.mode[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode[]", "__INVALID__"), + expected_field="access_restrictions[].when.mode[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.using_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using", []), + expected_field="access_restrictions[].when.using_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.using[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using[]", "__INVALID__"), + expected_field="access_restrictions[].when.using[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized", []), + expected_field="access_restrictions[].when.recognized_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.recognized[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized[]", "__INVALID__"), + expected_field="access_restrictions[].when.recognized[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle", []), + expected_field="access_restrictions[].when.vehicle_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].dimension", None), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].dimension", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].comparison", None), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].comparison", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "axle_count"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:bounds", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", -1.0), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="bounds", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::connectors_min_length:array_min_length", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=set_at_path("connectors", []), + expected_field="connectors_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:required", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", None), + expected_field="connectors[].connector_id", + expected_check="required", + ), + Scenario( + id="segment::connectors[].connector_id:string_min_length", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", ""), + expected_field="connectors[].connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:no_whitespace", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", "has whitespace"), + expected_field="connectors[].connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::connectors[].at:bounds", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", -1.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::connectors[].at:bounds_1", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", 2.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::level_rules[].value:required", + scaffold={"level_rules": [{"value": 0}]}, + mutate=set_at_path("level_rules[].value", None), + expected_field="level_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::level_rules[].between:linear_range_length", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.5]), + expected_field="level_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::level_rules[].between:linear_range_bounds", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [1.5, 2.0]), + expected_field="level_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::level_rules[].between:linear_range_order", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.8, 0.2]), + expected_field="level_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::routes[].name:string_min_length", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", ""), + expected_field="routes[].name", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].name:stripped", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", " has spaces "), + expected_field="routes[].name", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].network:string_min_length", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", ""), + expected_field="routes[].network", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].network:stripped", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", " has spaces "), + expected_field="routes[].network", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].ref:string_min_length", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", ""), + expected_field="routes[].ref", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].ref:stripped", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", " has spaces "), + expected_field="routes[].ref", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].symbol:string_min_length", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", ""), + expected_field="routes[].symbol", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].symbol:stripped", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", " has spaces "), + expected_field="routes[].symbol", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].wikidata:wikidata_id", + scaffold={"routes": [{"wikidata": "Q42"}]}, + mutate=set_at_path("routes[].wikidata", "P999"), + expected_field="routes[].wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="segment::routes[].between:linear_range_length", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.5]), + expected_field="routes[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::routes[].between:linear_range_bounds", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [1.5, 2.0]), + expected_field="routes[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::routes[].between:linear_range_order", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.8, 0.2]), + expected_field="routes[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subclass_rules[].value:required", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", None), + expected_field="subclass_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::subclass_rules[].value:enum", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", "__INVALID__"), + expected_field="subclass_rules[].value", + expected_check="enum", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_length", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.5]), + expected_field="subclass_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_bounds", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [1.5, 2.0]), + expected_field="subclass_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_order", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.8, 0.2]), + expected_field="subclass_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="segment::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="segment::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="segment::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="segment::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="segment::rail_flags_min_length:array_min_length", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("rail_flags", []), + expected_field="rail_flags_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::rail_flags[].values:required", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("rail_flags[].values", None), + expected_field="rail_flags[].values", + expected_check="required", + ), + Scenario( + id="segment::rail_flags[].values_min_length:array_min_length", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("rail_flags[].values", []), + expected_field="rail_flags[].values_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::rail_flags[].values[]:enum", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("rail_flags[].values[]", "__INVALID__"), + expected_field="rail_flags[].values[]", + expected_check="enum", + ), + Scenario( + id="segment::rail_flags[].between:linear_range_length", + scaffold={"rail_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("rail_flags[].between", [0.5]), + expected_field="rail_flags[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::rail_flags[].between:linear_range_bounds", + scaffold={"rail_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("rail_flags[].between", [1.5, 2.0]), + expected_field="rail_flags[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::rail_flags[].between:linear_range_order", + scaffold={"rail_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("rail_flags[].between", [0.8, 0.2]), + expected_field="rail_flags[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::model:forbid_if:0", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:1", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:2", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:3", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:4", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:5", + scaffold={"access_restrictions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="access_restrictions", + struct_path="when", + ), + expected_field="access_restrictions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:6", + scaffold={"destinations": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["labels", "symbols"], array_path="destinations" + ), + expected_field="destinations[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:7", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:8", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:9", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:10", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:11", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:12", + scaffold={"prohibited_transitions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="prohibited_transitions", + struct_path="when", + ), + expected_field="prohibited_transitions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:13", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:14", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:15", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:16", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:17", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:18", + scaffold={"speed_limits": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="speed_limits", + struct_path="when", + ), + expected_field="speed_limits[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:19", + scaffold={"speed_limits": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["max_speed.value", "min_speed.value"], array_path="speed_limits" + ), + expected_field="speed_limits[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:20", + scaffold={}, + mutate=lambda row: mutate_forbid_if(row, ["class"], "subtype", "water"), + expected_field="class_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:21", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "rail"), + expected_field="class_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:22", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "road"), + expected_field="class_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:forbid_if:23", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["destinations"], + "subtype", + "road", + negate=True, + fill_values={"destinations": [{}]}, + ), + expected_field="destinations_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:24", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["prohibited_transitions"], + "subtype", + "road", + negate=True, + fill_values={"prohibited_transitions": [{}]}, + ), + expected_field="prohibited_transitions_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:25", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_flags"], + "subtype", + "road", + negate=True, + fill_values={"road_flags": [{}]}, + ), + expected_field="road_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:26", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_surface"], + "subtype", + "road", + negate=True, + fill_values={"road_surface": [{}]}, + ), + expected_field="road_surface_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:27", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["speed_limits"], + "subtype", + "road", + negate=True, + fill_values={"speed_limits": [{}]}, + ), + expected_field="speed_limits_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:28", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, ["subclass"], "subtype", "road", negate=True + ), + expected_field="subclass_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:29", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["width_rules"], + "subtype", + "road", + negate=True, + fill_values={"width_rules": [{}]}, + ), + expected_field="width_rules_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:30", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["rail_flags"], + "subtype", + "rail", + negate=True, + fill_values={"rail_flags": [{}]}, + ), + expected_field="rail_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::rail_flags_unique:struct_unique", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "rail_flags"), + expected_field="rail_flags_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::rail_flags[].values_unique:struct_unique", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "rail_flags[].values"), + expected_field="rail_flags[].values_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return segment_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="segment", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="segment", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py new file mode 100644 index 000000000..0a8d0a946 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py @@ -0,0 +1,3085 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for segment.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.transportation.segment import ( + SEGMENT_SCHEMA, + segment_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_forbid_if, + mutate_require_any_of, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "subtype": "road", + "class": "motorway", +} + + +BASE_ROW_POPULATED: dict = { + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "road", + "access_restrictions": [ + { + "access_type": "allowed", + "between": [0.0, 1.0], + "when": { + "heading": "forward", + "during": "", + "mode": ["vehicle"], + "using": ["as_customer"], + "recognized": ["as_permitted"], + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ], + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1", "at": 0.0}], + "level_rules": [{"value": 0, "between": [0.0, 1.0]}], + "routes": [ + { + "name": "a", + "network": "a", + "ref": "a", + "symbol": "a", + "wikidata": "Q42", + "between": [0.0, 1.0], + } + ], + "subclass_rules": [{"value": "link", "between": [0.0, 1.0]}], + "class": "motorway", + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + "symbols": ["motorway"], + "when": {"heading": "forward"}, + } + ], + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "between": [0.0, 1.0], + "when": { + "heading": "forward", + "during": "", + "mode": ["vehicle"], + "using": ["as_customer"], + "recognized": ["as_permitted"], + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ], + "road_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}], + "road_surface": [{"value": "unknown", "between": [0.0, 1.0]}], + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "min_speed": {"value": 1, "unit": "mph"}, + "is_max_speed_variable": False, + "between": [0.0, 1.0], + "when": { + "heading": "forward", + "during": "", + "mode": ["vehicle"], + "using": ["as_customer"], + "recognized": ["as_permitted"], + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ], + "subclass": "link", + "width_rules": [{"value": 1.0, "between": [0.0, 1.0]}], +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="segment::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="segment::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="segment::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="segment::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="segment::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="segment::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="segment::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="segment::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="segment::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="segment::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="segment::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="segment::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="segment::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="segment::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="segment::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="segment::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="segment::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="segment::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="segment::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions_min_length:array_min_length", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions", []), + expected_field="access_restrictions_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].access_type:required", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", None), + expected_field="access_restrictions[].access_type", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].access_type:enum", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", "__INVALID__"), + expected_field="access_restrictions[].access_type", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_length", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.5]), + expected_field="access_restrictions[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_bounds", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [1.5, 2.0]), + expected_field="access_restrictions[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_order", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.8, 0.2]), + expected_field="access_restrictions[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::access_restrictions[].when.heading:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"heading": "forward"}} + ] + }, + mutate=set_at_path("access_restrictions[].when.heading", "__INVALID__"), + expected_field="access_restrictions[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.mode_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode", []), + expected_field="access_restrictions[].when.mode_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.mode[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode[]", "__INVALID__"), + expected_field="access_restrictions[].when.mode[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.using_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using", []), + expected_field="access_restrictions[].when.using_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.using[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using[]", "__INVALID__"), + expected_field="access_restrictions[].when.using[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized", []), + expected_field="access_restrictions[].when.recognized_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.recognized[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized[]", "__INVALID__"), + expected_field="access_restrictions[].when.recognized[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle", []), + expected_field="access_restrictions[].when.vehicle_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].dimension", None), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].dimension", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].comparison", None), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].comparison", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "axle_count"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:bounds", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", -1.0), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="bounds", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::connectors_min_length:array_min_length", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=set_at_path("connectors", []), + expected_field="connectors_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:required", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", None), + expected_field="connectors[].connector_id", + expected_check="required", + ), + Scenario( + id="segment::connectors[].connector_id:string_min_length", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", ""), + expected_field="connectors[].connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:no_whitespace", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", "has whitespace"), + expected_field="connectors[].connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::connectors[].at:bounds", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", -1.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::connectors[].at:bounds_1", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", 2.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::level_rules[].value:required", + scaffold={"level_rules": [{"value": 0}]}, + mutate=set_at_path("level_rules[].value", None), + expected_field="level_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::level_rules[].between:linear_range_length", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.5]), + expected_field="level_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::level_rules[].between:linear_range_bounds", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [1.5, 2.0]), + expected_field="level_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::level_rules[].between:linear_range_order", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.8, 0.2]), + expected_field="level_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::routes[].name:string_min_length", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", ""), + expected_field="routes[].name", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].name:stripped", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", " has spaces "), + expected_field="routes[].name", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].network:string_min_length", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", ""), + expected_field="routes[].network", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].network:stripped", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", " has spaces "), + expected_field="routes[].network", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].ref:string_min_length", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", ""), + expected_field="routes[].ref", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].ref:stripped", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", " has spaces "), + expected_field="routes[].ref", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].symbol:string_min_length", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", ""), + expected_field="routes[].symbol", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].symbol:stripped", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", " has spaces "), + expected_field="routes[].symbol", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].wikidata:wikidata_id", + scaffold={"routes": [{"wikidata": "Q42"}]}, + mutate=set_at_path("routes[].wikidata", "P999"), + expected_field="routes[].wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="segment::routes[].between:linear_range_length", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.5]), + expected_field="routes[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::routes[].between:linear_range_bounds", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [1.5, 2.0]), + expected_field="routes[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::routes[].between:linear_range_order", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.8, 0.2]), + expected_field="routes[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subclass_rules[].value:required", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", None), + expected_field="subclass_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::subclass_rules[].value:enum", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", "__INVALID__"), + expected_field="subclass_rules[].value", + expected_check="enum", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_length", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.5]), + expected_field="subclass_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_bounds", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [1.5, 2.0]), + expected_field="subclass_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_order", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.8, 0.2]), + expected_field="subclass_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="segment::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="segment::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="segment::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="segment::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="segment::destinations[].from_connector_id:required", + scaffold={ + "destinations": [ + { + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "from_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].from_connector_id", None), + expected_field="destinations[].from_connector_id", + expected_check="required", + ), + Scenario( + id="segment::destinations[].from_connector_id:string_min_length", + scaffold={ + "destinations": [ + { + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "from_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].from_connector_id", ""), + expected_field="destinations[].from_connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::destinations[].from_connector_id:no_whitespace", + scaffold={ + "destinations": [ + { + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "from_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].from_connector_id", "has whitespace"), + expected_field="destinations[].from_connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::destinations[].to_connector_id:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "to_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_connector_id", None), + expected_field="destinations[].to_connector_id", + expected_check="required", + ), + Scenario( + id="segment::destinations[].to_connector_id:string_min_length", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "to_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_connector_id", ""), + expected_field="destinations[].to_connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::destinations[].to_connector_id:no_whitespace", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "to_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_connector_id", "has whitespace"), + expected_field="destinations[].to_connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::destinations[].to_segment_id:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "final_heading": "forward", + "to_segment_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_segment_id", None), + expected_field="destinations[].to_segment_id", + expected_check="required", + ), + Scenario( + id="segment::destinations[].to_segment_id:string_min_length", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "final_heading": "forward", + "to_segment_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_segment_id", ""), + expected_field="destinations[].to_segment_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::destinations[].to_segment_id:no_whitespace", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "final_heading": "forward", + "to_segment_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_segment_id", "has whitespace"), + expected_field="destinations[].to_segment_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::destinations[].final_heading:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + } + ] + }, + mutate=set_at_path("destinations[].final_heading", None), + expected_field="destinations[].final_heading", + expected_check="required", + ), + Scenario( + id="segment::destinations[].final_heading:enum", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + } + ] + }, + mutate=set_at_path("destinations[].final_heading", "__INVALID__"), + expected_field="destinations[].final_heading", + expected_check="enum", + ), + Scenario( + id="segment::destinations[].labels_min_length:array_min_length", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + } + ] + }, + mutate=set_at_path("destinations[].labels", []), + expected_field="destinations[].labels_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::destinations[].labels[].value:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"type": "street", "value": "a"}], + } + ] + }, + mutate=set_at_path("destinations[].labels[].value", None), + expected_field="destinations[].labels[].value", + expected_check="required", + ), + Scenario( + id="segment::destinations[].labels[].value:string_min_length", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"type": "street", "value": "a"}], + } + ] + }, + mutate=set_at_path("destinations[].labels[].value", ""), + expected_field="destinations[].labels[].value", + expected_check="string_min_length", + ), + Scenario( + id="segment::destinations[].labels[].value:stripped", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"type": "street", "value": "a"}], + } + ] + }, + mutate=set_at_path("destinations[].labels[].value", " has spaces "), + expected_field="destinations[].labels[].value", + expected_check="stripped", + ), + Scenario( + id="segment::destinations[].labels[].type:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + } + ] + }, + mutate=set_at_path("destinations[].labels[].type", None), + expected_field="destinations[].labels[].type", + expected_check="required", + ), + Scenario( + id="segment::destinations[].labels[].type:enum", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + } + ] + }, + mutate=set_at_path("destinations[].labels[].type", "__INVALID__"), + expected_field="destinations[].labels[].type", + expected_check="enum", + ), + Scenario( + id="segment::destinations[].symbols[]:enum", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "symbols": ["motorway"], + } + ] + }, + mutate=set_at_path("destinations[].symbols[]", "__INVALID__"), + expected_field="destinations[].symbols[]", + expected_check="enum", + ), + Scenario( + id="segment::destinations[].when.heading:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "when": {"heading": "forward"}, + } + ] + }, + mutate=set_at_path("destinations[].when.heading", None), + expected_field="destinations[].when.heading", + expected_check="required", + ), + Scenario( + id="segment::destinations[].when.heading:enum", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "when": {"heading": "forward"}, + } + ] + }, + mutate=set_at_path("destinations[].when.heading", "__INVALID__"), + expected_field="destinations[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].sequence:required", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence", None), + expected_field="prohibited_transitions[].sequence", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].sequence_min_length:array_min_length", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence", []), + expected_field="prohibited_transitions[].sequence_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].connector_id:required", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"segment_id": "a", "connector_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence[].connector_id", None), + expected_field="prohibited_transitions[].sequence[].connector_id", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].connector_id:string_min_length", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"segment_id": "a", "connector_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence[].connector_id", ""), + expected_field="prohibited_transitions[].sequence[].connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].connector_id:no_whitespace", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"segment_id": "a", "connector_id": "a"}], + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].sequence[].connector_id", "has whitespace" + ), + expected_field="prohibited_transitions[].sequence[].connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].segment_id:required", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence[].segment_id", None), + expected_field="prohibited_transitions[].sequence[].segment_id", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].segment_id:string_min_length", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence[].segment_id", ""), + expected_field="prohibited_transitions[].sequence[].segment_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].segment_id:no_whitespace", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].sequence[].segment_id", "has whitespace" + ), + expected_field="prohibited_transitions[].sequence[].segment_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::prohibited_transitions[].final_heading:required", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + } + ] + }, + mutate=set_at_path("prohibited_transitions[].final_heading", None), + expected_field="prohibited_transitions[].final_heading", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].final_heading:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + } + ] + }, + mutate=set_at_path("prohibited_transitions[].final_heading", "__INVALID__"), + expected_field="prohibited_transitions[].final_heading", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].between:linear_range_length", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "between": [0.0, 1.0], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].between", [0.5]), + expected_field="prohibited_transitions[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::prohibited_transitions[].between:linear_range_bounds", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "between": [0.0, 1.0], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].between", [1.5, 2.0]), + expected_field="prohibited_transitions[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::prohibited_transitions[].between:linear_range_order", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "between": [0.0, 1.0], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].between", [0.8, 0.2]), + expected_field="prohibited_transitions[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::prohibited_transitions[].when.heading:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"heading": "forward"}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.heading", "__INVALID__"), + expected_field="prohibited_transitions[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.mode_min_length:array_min_length", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"mode": ["vehicle"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.mode", []), + expected_field="prohibited_transitions[].when.mode_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].when.mode[]:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"mode": ["vehicle"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.mode[]", "__INVALID__"), + expected_field="prohibited_transitions[].when.mode[]", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.using_min_length:array_min_length", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"using": ["as_customer"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.using", []), + expected_field="prohibited_transitions[].when.using_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].when.using[]:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"using": ["as_customer"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.using[]", "__INVALID__"), + expected_field="prohibited_transitions[].when.using[]", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.recognized_min_length:array_min_length", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"recognized": ["as_permitted"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.recognized", []), + expected_field="prohibited_transitions[].when.recognized_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].when.recognized[]:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"recognized": ["as_permitted"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.recognized[]", "__INVALID__"), + expected_field="prohibited_transitions[].when.recognized[]", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle_min_length:array_min_length", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle", []), + expected_field="prohibited_transitions[].when.vehicle_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].dimension:required", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].dimension", None), + expected_field="prohibited_transitions[].when.vehicle[].dimension", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].dimension:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{}]}, + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].when.vehicle[].dimension", "__INVALID__" + ), + expected_field="prohibited_transitions[].when.vehicle[].dimension", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].comparison:required", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].comparison", None), + expected_field="prohibited_transitions[].when.vehicle[].comparison", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].comparison:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{}]}, + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].when.vehicle[].comparison", "__INVALID__" + ), + expected_field="prohibited_transitions[].when.vehicle[].comparison", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].value:required", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "axle_count"}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].value", None), + expected_field="prohibited_transitions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].value:required_1", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].value", None), + expected_field="prohibited_transitions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].value:bounds", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].value", -1.0), + expected_field="prohibited_transitions[].when.vehicle[].value", + expected_check="bounds", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].unit:required", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].unit", None), + expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].unit:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].when.vehicle[].unit", "__INVALID__" + ), + expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].unit:required_1", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].unit", None), + expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].unit:enum_1", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].when.vehicle[].unit", "__INVALID__" + ), + expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::road_flags_min_length:array_min_length", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("road_flags", []), + expected_field="road_flags_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::road_flags[].values:required", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("road_flags[].values", None), + expected_field="road_flags[].values", + expected_check="required", + ), + Scenario( + id="segment::road_flags[].values_min_length:array_min_length", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("road_flags[].values", []), + expected_field="road_flags[].values_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::road_flags[].values[]:enum", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("road_flags[].values[]", "__INVALID__"), + expected_field="road_flags[].values[]", + expected_check="enum", + ), + Scenario( + id="segment::road_flags[].between:linear_range_length", + scaffold={"road_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_flags[].between", [0.5]), + expected_field="road_flags[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::road_flags[].between:linear_range_bounds", + scaffold={"road_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_flags[].between", [1.5, 2.0]), + expected_field="road_flags[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::road_flags[].between:linear_range_order", + scaffold={"road_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_flags[].between", [0.8, 0.2]), + expected_field="road_flags[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::road_surface_min_length:array_min_length", + scaffold={"road_surface": [{"value": "unknown"}]}, + mutate=set_at_path("road_surface", []), + expected_field="road_surface_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::road_surface[].value:required", + scaffold={"road_surface": [{"value": "unknown"}]}, + mutate=set_at_path("road_surface[].value", None), + expected_field="road_surface[].value", + expected_check="required", + ), + Scenario( + id="segment::road_surface[].value:enum", + scaffold={"road_surface": [{"value": "unknown"}]}, + mutate=set_at_path("road_surface[].value", "__INVALID__"), + expected_field="road_surface[].value", + expected_check="enum", + ), + Scenario( + id="segment::road_surface[].between:linear_range_length", + scaffold={"road_surface": [{"value": "unknown", "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_surface[].between", [0.5]), + expected_field="road_surface[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::road_surface[].between:linear_range_bounds", + scaffold={"road_surface": [{"value": "unknown", "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_surface[].between", [1.5, 2.0]), + expected_field="road_surface[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::road_surface[].between:linear_range_order", + scaffold={"road_surface": [{"value": "unknown", "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_surface[].between", [0.8, 0.2]), + expected_field="road_surface[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::speed_limits_min_length:array_min_length", + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, + mutate=set_at_path("speed_limits", []), + expected_field="speed_limits_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::speed_limits[].max_speed.value:required", + scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].max_speed.value", None), + expected_field="speed_limits[].max_speed.value", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].max_speed.value:bounds", + scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].max_speed.value", 0), + expected_field="speed_limits[].max_speed.value", + expected_check="bounds", + ), + Scenario( + id="segment::speed_limits[].max_speed.value:bounds_1", + scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].max_speed.value", 351), + expected_field="speed_limits[].max_speed.value", + expected_check="bounds", + ), + Scenario( + id="segment::speed_limits[].max_speed.unit:required", + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, + mutate=set_at_path("speed_limits[].max_speed.unit", None), + expected_field="speed_limits[].max_speed.unit", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].max_speed.unit:enum", + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, + mutate=set_at_path("speed_limits[].max_speed.unit", "__INVALID__"), + expected_field="speed_limits[].max_speed.unit", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].min_speed.value:required", + scaffold={"speed_limits": [{"min_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].min_speed.value", None), + expected_field="speed_limits[].min_speed.value", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].min_speed.value:bounds", + scaffold={"speed_limits": [{"min_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].min_speed.value", 0), + expected_field="speed_limits[].min_speed.value", + expected_check="bounds", + ), + Scenario( + id="segment::speed_limits[].min_speed.value:bounds_1", + scaffold={"speed_limits": [{"min_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].min_speed.value", 351), + expected_field="speed_limits[].min_speed.value", + expected_check="bounds", + ), + Scenario( + id="segment::speed_limits[].min_speed.unit:required", + scaffold={"speed_limits": [{"min_speed": {"value": 1, "unit": "mph"}}]}, + mutate=set_at_path("speed_limits[].min_speed.unit", None), + expected_field="speed_limits[].min_speed.unit", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].min_speed.unit:enum", + scaffold={"speed_limits": [{"min_speed": {"value": 1, "unit": "mph"}}]}, + mutate=set_at_path("speed_limits[].min_speed.unit", "__INVALID__"), + expected_field="speed_limits[].min_speed.unit", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].between:linear_range_length", + scaffold={"speed_limits": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("speed_limits[].between", [0.5]), + expected_field="speed_limits[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::speed_limits[].between:linear_range_bounds", + scaffold={"speed_limits": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("speed_limits[].between", [1.5, 2.0]), + expected_field="speed_limits[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::speed_limits[].between:linear_range_order", + scaffold={"speed_limits": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("speed_limits[].between", [0.8, 0.2]), + expected_field="speed_limits[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::speed_limits[].when.heading:enum", + scaffold={"speed_limits": [{"when": {"heading": "forward"}}]}, + mutate=set_at_path("speed_limits[].when.heading", "__INVALID__"), + expected_field="speed_limits[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.mode_min_length:array_min_length", + scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, + mutate=set_at_path("speed_limits[].when.mode", []), + expected_field="speed_limits[].when.mode_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::speed_limits[].when.mode[]:enum", + scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, + mutate=set_at_path("speed_limits[].when.mode[]", "__INVALID__"), + expected_field="speed_limits[].when.mode[]", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.using_min_length:array_min_length", + scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, + mutate=set_at_path("speed_limits[].when.using", []), + expected_field="speed_limits[].when.using_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::speed_limits[].when.using[]:enum", + scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, + mutate=set_at_path("speed_limits[].when.using[]", "__INVALID__"), + expected_field="speed_limits[].when.using[]", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.recognized_min_length:array_min_length", + scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, + mutate=set_at_path("speed_limits[].when.recognized", []), + expected_field="speed_limits[].when.recognized_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::speed_limits[].when.recognized[]:enum", + scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, + mutate=set_at_path("speed_limits[].when.recognized[]", "__INVALID__"), + expected_field="speed_limits[].when.recognized[]", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.vehicle_min_length:array_min_length", + scaffold={ + "speed_limits": [ + { + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + } + } + ] + }, + mutate=set_at_path("speed_limits[].when.vehicle", []), + expected_field="speed_limits[].when.vehicle_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].dimension:required", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].dimension", None), + expected_field="speed_limits[].when.vehicle[].dimension", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].dimension:enum", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].dimension", "__INVALID__"), + expected_field="speed_limits[].when.vehicle[].dimension", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].comparison:required", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].comparison", None), + expected_field="speed_limits[].when.vehicle[].comparison", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].comparison:enum", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].comparison", "__INVALID__"), + expected_field="speed_limits[].when.vehicle[].comparison", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].value:required", + scaffold={ + "speed_limits": [{"when": {"vehicle": [{"dimension": "axle_count"}]}}] + }, + mutate=set_at_path("speed_limits[].when.vehicle[].value", None), + expected_field="speed_limits[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].value:required_1", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].value", None), + expected_field="speed_limits[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].value:bounds", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].value", -1.0), + expected_field="speed_limits[].when.vehicle[].value", + expected_check="bounds", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].unit:required", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].unit", None), + expected_field="speed_limits[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].unit:enum", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].unit", "__INVALID__"), + expected_field="speed_limits[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].unit:required_1", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "weight"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].unit", None), + expected_field="speed_limits[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].unit:enum_1", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "weight"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].unit", "__INVALID__"), + expected_field="speed_limits[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::subclass:enum", + scaffold={"subclass": "link"}, + mutate=set_at_path("subclass", "__INVALID__"), + expected_field="subclass", + expected_check="enum", + ), + Scenario( + id="segment::width_rules_min_length:array_min_length", + scaffold={"width_rules": [{"value": 1.0}]}, + mutate=set_at_path("width_rules", []), + expected_field="width_rules_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::width_rules[].value:required", + scaffold={"width_rules": [{"value": 1.0}]}, + mutate=set_at_path("width_rules[].value", None), + expected_field="width_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::width_rules[].value:bounds", + scaffold={"width_rules": [{"value": 1.0}]}, + mutate=set_at_path("width_rules[].value", 0.0), + expected_field="width_rules[].value", + expected_check="bounds", + ), + Scenario( + id="segment::width_rules[].between:linear_range_length", + scaffold={"width_rules": [{"value": 1.0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("width_rules[].between", [0.5]), + expected_field="width_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::width_rules[].between:linear_range_bounds", + scaffold={"width_rules": [{"value": 1.0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("width_rules[].between", [1.5, 2.0]), + expected_field="width_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::width_rules[].between:linear_range_order", + scaffold={"width_rules": [{"value": 1.0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("width_rules[].between", [0.8, 0.2]), + expected_field="width_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::model:forbid_if:0", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:1", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:2", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:3", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:4", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:5", + scaffold={"access_restrictions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="access_restrictions", + struct_path="when", + ), + expected_field="access_restrictions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:6", + scaffold={"destinations": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["labels", "symbols"], array_path="destinations" + ), + expected_field="destinations[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:7", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:8", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:9", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:10", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:11", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:12", + scaffold={"prohibited_transitions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="prohibited_transitions", + struct_path="when", + ), + expected_field="prohibited_transitions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:13", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:14", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:15", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:16", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:17", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:18", + scaffold={"speed_limits": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="speed_limits", + struct_path="when", + ), + expected_field="speed_limits[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:19", + scaffold={"speed_limits": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["max_speed.value", "min_speed.value"], array_path="speed_limits" + ), + expected_field="speed_limits[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:20", + scaffold={}, + mutate=lambda row: mutate_forbid_if(row, ["class"], "subtype", "water"), + expected_field="class_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:21", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "rail"), + expected_field="class_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:22", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "road"), + expected_field="class_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:forbid_if:23", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["destinations"], + "subtype", + "road", + negate=True, + fill_values={"destinations": [{}]}, + ), + expected_field="destinations_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:24", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["prohibited_transitions"], + "subtype", + "road", + negate=True, + fill_values={"prohibited_transitions": [{}]}, + ), + expected_field="prohibited_transitions_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:25", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_flags"], + "subtype", + "road", + negate=True, + fill_values={"road_flags": [{}]}, + ), + expected_field="road_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:26", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_surface"], + "subtype", + "road", + negate=True, + fill_values={"road_surface": [{}]}, + ), + expected_field="road_surface_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:27", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["speed_limits"], + "subtype", + "road", + negate=True, + fill_values={"speed_limits": [{}]}, + ), + expected_field="speed_limits_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:28", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, ["subclass"], "subtype", "road", negate=True + ), + expected_field="subclass_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:29", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["width_rules"], + "subtype", + "road", + negate=True, + fill_values={"width_rules": [{}]}, + ), + expected_field="width_rules_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:30", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["rail_flags"], + "subtype", + "rail", + negate=True, + fill_values={"rail_flags": [{}]}, + ), + expected_field="rail_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::destinations[].labels_unique:struct_unique", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "destinations[].labels"), + expected_field="destinations[].labels_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::destinations[].symbols_unique:struct_unique", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "symbols": ["motorway"], + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "destinations[].symbols"), + expected_field="destinations[].symbols_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::prohibited_transitions[].sequence_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].sequence" + ), + expected_field="prohibited_transitions[].sequence_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::prohibited_transitions[].when.mode_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"mode": ["vehicle"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.mode" + ), + expected_field="prohibited_transitions[].when.mode_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::prohibited_transitions[].when.using_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"using": ["as_customer"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.using" + ), + expected_field="prohibited_transitions[].when.using_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::prohibited_transitions[].when.recognized_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"recognized": ["as_permitted"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.recognized" + ), + expected_field="prohibited_transitions[].when.recognized_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.vehicle" + ), + expected_field="prohibited_transitions[].when.vehicle_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::road_flags_unique:struct_unique", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "road_flags"), + expected_field="road_flags_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::road_flags[].values_unique:struct_unique", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "road_flags[].values"), + expected_field="road_flags[].values_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::road_surface_unique:struct_unique", + scaffold={"road_surface": [{"value": "unknown"}]}, + mutate=lambda row: mutate_unique_items(row, "road_surface"), + expected_field="road_surface_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::speed_limits_unique:struct_unique", + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits"), + expected_field="speed_limits_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::speed_limits[].when.mode_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.mode"), + expected_field="speed_limits[].when.mode_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::speed_limits[].when.using_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.using"), + expected_field="speed_limits[].when.using_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::speed_limits[].when.recognized_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.recognized"), + expected_field="speed_limits[].when.recognized_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::speed_limits[].when.vehicle_unique:struct_unique", + scaffold={ + "speed_limits": [ + { + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + } + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.vehicle"), + expected_field="speed_limits[].when.vehicle_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::width_rules_unique:struct_unique", + scaffold={"width_rules": [{"value": 1.0}]}, + mutate=lambda row: mutate_unique_items(row, "width_rules"), + expected_field="width_rules_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return segment_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="segment", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="segment", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py new file mode 100644 index 000000000..bf3e6b1d6 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py @@ -0,0 +1,1596 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for segment.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.transportation.segment import ( + SEGMENT_SCHEMA, + segment_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_forbid_if, + mutate_require_any_of, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "subtype": "water", +} + + +BASE_ROW_POPULATED: dict = { + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "water", + "access_restrictions": [ + { + "access_type": "allowed", + "between": [0.0, 1.0], + "when": { + "heading": "forward", + "during": "", + "mode": ["vehicle"], + "using": ["as_customer"], + "recognized": ["as_permitted"], + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ], + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1", "at": 0.0}], + "level_rules": [{"value": 0, "between": [0.0, 1.0]}], + "routes": [ + { + "name": "a", + "network": "a", + "ref": "a", + "symbol": "a", + "wikidata": "Q42", + "between": [0.0, 1.0], + } + ], + "subclass_rules": [{"value": "link", "between": [0.0, 1.0]}], +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="segment::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="segment::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="segment::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="segment::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="segment::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="segment::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="segment::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="segment::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="segment::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="segment::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="segment::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="segment::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="segment::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="segment::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="segment::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="segment::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="segment::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="segment::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="segment::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions_min_length:array_min_length", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions", []), + expected_field="access_restrictions_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].access_type:required", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", None), + expected_field="access_restrictions[].access_type", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].access_type:enum", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", "__INVALID__"), + expected_field="access_restrictions[].access_type", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_length", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.5]), + expected_field="access_restrictions[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_bounds", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [1.5, 2.0]), + expected_field="access_restrictions[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_order", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.8, 0.2]), + expected_field="access_restrictions[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::access_restrictions[].when.heading:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"heading": "forward"}} + ] + }, + mutate=set_at_path("access_restrictions[].when.heading", "__INVALID__"), + expected_field="access_restrictions[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.mode_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode", []), + expected_field="access_restrictions[].when.mode_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.mode[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode[]", "__INVALID__"), + expected_field="access_restrictions[].when.mode[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.using_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using", []), + expected_field="access_restrictions[].when.using_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.using[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using[]", "__INVALID__"), + expected_field="access_restrictions[].when.using[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized", []), + expected_field="access_restrictions[].when.recognized_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.recognized[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized[]", "__INVALID__"), + expected_field="access_restrictions[].when.recognized[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle", []), + expected_field="access_restrictions[].when.vehicle_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].dimension", None), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].dimension", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].comparison", None), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].comparison", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "axle_count"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:bounds", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", -1.0), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="bounds", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::connectors_min_length:array_min_length", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=set_at_path("connectors", []), + expected_field="connectors_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:required", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", None), + expected_field="connectors[].connector_id", + expected_check="required", + ), + Scenario( + id="segment::connectors[].connector_id:string_min_length", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", ""), + expected_field="connectors[].connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:no_whitespace", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", "has whitespace"), + expected_field="connectors[].connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::connectors[].at:bounds", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", -1.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::connectors[].at:bounds_1", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", 2.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::level_rules[].value:required", + scaffold={"level_rules": [{"value": 0}]}, + mutate=set_at_path("level_rules[].value", None), + expected_field="level_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::level_rules[].between:linear_range_length", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.5]), + expected_field="level_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::level_rules[].between:linear_range_bounds", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [1.5, 2.0]), + expected_field="level_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::level_rules[].between:linear_range_order", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.8, 0.2]), + expected_field="level_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::routes[].name:string_min_length", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", ""), + expected_field="routes[].name", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].name:stripped", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", " has spaces "), + expected_field="routes[].name", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].network:string_min_length", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", ""), + expected_field="routes[].network", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].network:stripped", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", " has spaces "), + expected_field="routes[].network", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].ref:string_min_length", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", ""), + expected_field="routes[].ref", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].ref:stripped", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", " has spaces "), + expected_field="routes[].ref", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].symbol:string_min_length", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", ""), + expected_field="routes[].symbol", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].symbol:stripped", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", " has spaces "), + expected_field="routes[].symbol", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].wikidata:wikidata_id", + scaffold={"routes": [{"wikidata": "Q42"}]}, + mutate=set_at_path("routes[].wikidata", "P999"), + expected_field="routes[].wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="segment::routes[].between:linear_range_length", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.5]), + expected_field="routes[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::routes[].between:linear_range_bounds", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [1.5, 2.0]), + expected_field="routes[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::routes[].between:linear_range_order", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.8, 0.2]), + expected_field="routes[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subclass_rules[].value:required", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", None), + expected_field="subclass_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::subclass_rules[].value:enum", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", "__INVALID__"), + expected_field="subclass_rules[].value", + expected_check="enum", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_length", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.5]), + expected_field="subclass_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_bounds", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [1.5, 2.0]), + expected_field="subclass_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_order", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.8, 0.2]), + expected_field="subclass_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="segment::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="segment::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="segment::model:forbid_if:0", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:1", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:2", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:3", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:4", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:5", + scaffold={"access_restrictions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="access_restrictions", + struct_path="when", + ), + expected_field="access_restrictions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:6", + scaffold={"destinations": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["labels", "symbols"], array_path="destinations" + ), + expected_field="destinations[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:7", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:8", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:9", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:10", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:11", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:12", + scaffold={"prohibited_transitions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="prohibited_transitions", + struct_path="when", + ), + expected_field="prohibited_transitions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:13", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:14", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:15", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:16", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:17", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:18", + scaffold={"speed_limits": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="speed_limits", + struct_path="when", + ), + expected_field="speed_limits[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:19", + scaffold={"speed_limits": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["max_speed.value", "min_speed.value"], array_path="speed_limits" + ), + expected_field="speed_limits[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:20", + scaffold={}, + mutate=lambda row: mutate_forbid_if(row, ["class"], "subtype", "water"), + expected_field="class_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:21", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "rail"), + expected_field="class_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:22", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "road"), + expected_field="class_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:forbid_if:23", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["destinations"], + "subtype", + "road", + negate=True, + fill_values={"destinations": [{}]}, + ), + expected_field="destinations_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:24", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["prohibited_transitions"], + "subtype", + "road", + negate=True, + fill_values={"prohibited_transitions": [{}]}, + ), + expected_field="prohibited_transitions_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:25", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_flags"], + "subtype", + "road", + negate=True, + fill_values={"road_flags": [{}]}, + ), + expected_field="road_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:26", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_surface"], + "subtype", + "road", + negate=True, + fill_values={"road_surface": [{}]}, + ), + expected_field="road_surface_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:27", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["speed_limits"], + "subtype", + "road", + negate=True, + fill_values={"speed_limits": [{}]}, + ), + expected_field="speed_limits_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:28", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, ["subclass"], "subtype", "road", negate=True + ), + expected_field="subclass_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:29", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["width_rules"], + "subtype", + "road", + negate=True, + fill_values={"width_rules": [{}]}, + ), + expected_field="width_rules_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:30", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["rail_flags"], + "subtype", + "rail", + negate=True, + fill_values={"rail_flags": [{}]}, + ), + expected_field="rail_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return segment_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="segment", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="segment", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations From 1da7d9e0256bc99e46b11b421c833652e188190b Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Wed, 13 May 2026 09:39:14 -0700 Subject: [PATCH 06/11] chore(ci): Pin Java 17 for lowest-direct cell PySpark 3.4 (the declared floor) doesn't run on Java 21, the default JDK on ubuntu-latest runners -- it hits NoSuchMethodException on java.nio.DirectByteBuffer.(long, int), removed in JDK 21. Pin the lowest-direct cell to Java 17 so the resolved pyspark==3.4.0 can actually start. The default cell (which resolves to a current pyspark 4.x) keeps the runner's default Java 21. Signed-off-by: Seth Fitzsimmons --- .github/workflows/check-python-code.yaml | 15 +- .../overture/schema/addresses/test_address.py | 14 +- .../overture/schema/base/test_bathymetry.py | 14 +- .../schema/base/test_infrastructure.py | 54 +- .../overture/schema/base/test_land.py | 54 +- .../overture/schema/base/test_land_cover.py | 14 +- .../overture/schema/base/test_land_use.py | 54 +- .../overture/schema/base/test_water.py | 54 +- .../schema/buildings/test_building.py | 54 +- .../schema/buildings/test_building_part.py | 54 +- .../schema/divisions/test_division.py | 124 ++-- .../schema/divisions/test_division_area.py | 54 +- .../divisions/test_division_boundary.py | 42 +- .../overture/schema/places/test_place.py | 214 +++--- .../schema/transportation/test_connector.py | 14 +- .../transportation/test_segment_rail.py | 434 ++++--------- .../transportation/test_segment_road.py | 612 +++++++++--------- .../transportation/test_segment_water.py | 406 ++++-------- 18 files changed, 964 insertions(+), 1317 deletions(-) diff --git a/.github/workflows/check-python-code.yaml b/.github/workflows/check-python-code.yaml index b3edac612..bf3c54aee 100644 --- a/.github/workflows/check-python-code.yaml +++ b/.github/workflows/check-python-code.yaml @@ -31,7 +31,8 @@ jobs: # Default resolution exercises the committed lock against every # supported Python minor version. The lowest-direct cell pins each # direct dependency to its declared floor (see UV_RESOLUTION below) - # and runs only on the Python floor. + # and runs only on the Python floor, since the resolved-low pyspark + # 3.4 wheels exist for 3.10/3.11 only. python: ["3.10", "3.11", "3.12", "3.13", "3.14"] resolution: [default] include: @@ -56,9 +57,19 @@ jobs: with: python-version: ${{ matrix.python }} + # PySpark 3.4 (the declared minimum) does not support Java 21, which is + # the default JDK on ubuntu-latest runners. Pin to Java 17 for the + # lowest-direct cell so the resolved pyspark==3.4.0 can actually start. + - name: Set up JDK 17 + if: matrix.resolution == 'lowest-direct' + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + distribution: temurin + java-version: '17' + # UV_RESOLUTION=lowest-direct makes `uv sync` re-resolve every direct # dependency to the lowest version permitted by pyproject.toml. This - # exercises the declared floor (e.g. pydantic==2.12.0) instead of + # exercises the declared floor (e.g. pyspark==3.4.0) instead of # whatever the committed lock happens to point at. Failures here mean # a direct dep's minimum needs to be bumped. Set via GITHUB_ENV only # in the relevant cell so default cells run with no UV_RESOLUTION at diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py index b8da5893d..3c5b66709 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py @@ -169,6 +169,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="address::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="address::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -368,13 +375,6 @@ expected_field="unit", expected_check="stripped", ), - Scenario( - id="address::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py index eddc5ff2a..9fdc1679d 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py @@ -164,6 +164,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="bathymetry::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="bathymetry::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -307,13 +314,6 @@ expected_field="cartography.max_zoom", expected_check="bounds", ), - Scenario( - id="bathymetry::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py index ff98049f6..33839d9ee 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py @@ -184,6 +184,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="infrastructure::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="infrastructure::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -463,6 +470,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="infrastructure::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="infrastructure::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -536,33 +563,6 @@ expected_field="wikidata", expected_check="wikidata_id", ), - Scenario( - id="infrastructure::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="infrastructure::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py index 6b07a4fdc..2784a5806 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py @@ -182,6 +182,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="land::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="land::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -447,6 +454,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="land::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="land::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -520,33 +547,6 @@ expected_field="wikidata", expected_check="wikidata_id", ), - Scenario( - id="land::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="land::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py index c2783e05c..962268634 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py @@ -164,6 +164,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="land_cover::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="land_cover::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -307,13 +314,6 @@ expected_field="cartography.max_zoom", expected_check="bounds", ), - Scenario( - id="land_cover::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py index f19165178..19bd140a9 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py @@ -184,6 +184,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="land_use::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="land_use::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -463,6 +470,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="land_use::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="land_use::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -536,33 +563,6 @@ expected_field="wikidata", expected_check="wikidata_id", ), - Scenario( - id="land_use::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="land_use::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py index 1c460c47f..9a1c9d57e 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py @@ -182,6 +182,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="water::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="water::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -433,6 +440,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="water::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="water::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -506,33 +533,6 @@ expected_field="wikidata", expected_check="wikidata_id", ), - Scenario( - id="water::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="water::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py index ebfd4a131..9102f235c 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py @@ -193,6 +193,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="building::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="building::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -444,6 +451,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="building::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="building::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -594,33 +621,6 @@ expected_field="roof_color", expected_check="hex_color", ), - Scenario( - id="building::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="building::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py index 73ab44863..45589511f 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py @@ -192,6 +192,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="building_part::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="building_part::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -450,6 +457,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="building_part::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="building_part::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -600,33 +627,6 @@ expected_field="roof_color", expected_check="hex_color", ), - Scenario( - id="building_part::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="building_part::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py index 399495474..0c16e74d7 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py @@ -286,6 +286,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="division::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="division::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -463,6 +483,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="division::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="division::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -592,6 +619,13 @@ expected_field="hierarchies_min_length", expected_check="array_min_length", ), + Scenario( + id="division::hierarchies_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "hierarchies"), + expected_field="hierarchies_unique", + expected_check="struct_unique", + ), Scenario( id="division::hierarchies[]_min_length:array_min_length", scaffold={}, @@ -599,6 +633,13 @@ expected_field="hierarchies[]_min_length", expected_check="array_min_length", ), + Scenario( + id="division::hierarchies[]_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "hierarchies[]"), + expected_field="hierarchies[]_unique", + expected_check="struct_unique", + ), Scenario( id="division::hierarchies[][].division_id:required", scaffold={ @@ -741,6 +782,13 @@ expected_field="perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="division::perspectives.countries_unique:struct_unique", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), + expected_field="perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="division::perspectives.countries[]:country_code_alpha2", scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, @@ -769,6 +817,13 @@ expected_field="capital_division_ids_min_length", expected_check="array_min_length", ), + Scenario( + id="division::capital_division_ids_unique:struct_unique", + scaffold={"capital_division_ids": ["a"]}, + mutate=lambda row: mutate_unique_items(row, "capital_division_ids"), + expected_field="capital_division_ids_unique", + expected_check="struct_unique", + ), Scenario( id="division::capital_division_ids[]:string_min_length", scaffold={"capital_division_ids": ["a"]}, @@ -790,6 +845,13 @@ expected_field="capital_of_divisions_min_length", expected_check="array_min_length", ), + Scenario( + id="division::capital_of_divisions_unique:struct_unique", + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, + mutate=lambda row: mutate_unique_items(row, "capital_of_divisions"), + expected_field="capital_of_divisions_unique", + expected_check="struct_unique", + ), Scenario( id="division::capital_of_divisions[].division_id:required", scaffold={"capital_of_divisions": [{"subtype": "country", "division_id": "a"}]}, @@ -900,68 +962,6 @@ expected_field="parent_division_id_forbidden", expected_check="forbid_if", ), - Scenario( - id="division::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::hierarchies_unique:struct_unique", - scaffold={}, - mutate=lambda row: mutate_unique_items(row, "hierarchies"), - expected_field="hierarchies_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::hierarchies[]_unique:struct_unique", - scaffold={}, - mutate=lambda row: mutate_unique_items(row, "hierarchies[]"), - expected_field="hierarchies[]_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::perspectives.countries_unique:struct_unique", - scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, - mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), - expected_field="perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::capital_division_ids_unique:struct_unique", - scaffold={"capital_division_ids": ["a"]}, - mutate=lambda row: mutate_unique_items(row, "capital_division_ids"), - expected_field="capital_division_ids_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::capital_of_divisions_unique:struct_unique", - scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, - mutate=lambda row: mutate_unique_items(row, "capital_of_divisions"), - expected_field="capital_of_divisions_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py index 9f4d8e2f8..d9170b23b 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py @@ -240,6 +240,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="division_area::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="division_area::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -417,6 +437,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="division_area::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="division_area::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -645,33 +672,6 @@ expected_field="admin_level_required_5", expected_check="require_if", ), - Scenario( - id="division_area::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="division_area::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py index 27e05e731..41f9d3d59 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py @@ -180,6 +180,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="division_boundary::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="division_boundary::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -316,6 +323,13 @@ expected_field="division_ids_max_length", expected_check="array_max_length", ), + Scenario( + id="division_boundary::division_ids_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "division_ids"), + expected_field="division_ids_unique", + expected_check="struct_unique", + ), Scenario( id="division_boundary::division_ids[]:string_min_length", scaffold={}, @@ -386,6 +400,13 @@ expected_field="perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="division_boundary::perspectives.countries_unique:struct_unique", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), + expected_field="perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="division_boundary::perspectives.countries[]:country_code_alpha2", scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, @@ -466,27 +487,6 @@ expected_field="country_forbidden", expected_check="forbid_if", ), - Scenario( - id="division_boundary::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="division_boundary::division_ids_unique:struct_unique", - scaffold={}, - mutate=lambda row: mutate_unique_items(row, "division_ids"), - expected_field="division_ids_unique", - expected_check="struct_unique", - ), - Scenario( - id="division_boundary::perspectives.countries_unique:struct_unique", - scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, - mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), - expected_field="perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py index ad8fc0002..b8a128bb0 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py @@ -214,6 +214,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="place::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="place::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -322,6 +329,13 @@ expected_field="categories.primary", expected_check="snake_case", ), + Scenario( + id="place::categories.alternate_unique:struct_unique", + scaffold={"categories": {"primary": "snake_case", "alternate": ["snake_case"]}}, + mutate=lambda row: mutate_unique_items(row, "categories.alternate"), + expected_field="categories.alternate_unique", + expected_check="struct_unique", + ), Scenario( id="place::categories.alternate[]:snake_case", scaffold={"categories": {"primary": "snake_case", "alternate": ["snake_case"]}}, @@ -364,6 +378,13 @@ expected_field="taxonomy.hierarchy_min_length", expected_check="array_min_length", ), + Scenario( + id="place::taxonomy.hierarchy_unique:struct_unique", + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, + mutate=lambda row: mutate_unique_items(row, "taxonomy.hierarchy"), + expected_field="taxonomy.hierarchy_unique", + expected_check="struct_unique", + ), Scenario( id="place::taxonomy.hierarchy[]:snake_case", scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, @@ -384,6 +405,19 @@ expected_field="taxonomy.alternates_min_length", expected_check="array_min_length", ), + Scenario( + id="place::taxonomy.alternates_unique:struct_unique", + scaffold={ + "taxonomy": { + "primary": "snake_case", + "hierarchy": ["snake_case"], + "alternates": ["snake_case"], + } + }, + mutate=lambda row: mutate_unique_items(row, "taxonomy.alternates"), + expected_field="taxonomy.alternates_unique", + expected_check="struct_unique", + ), Scenario( id="place::taxonomy.alternates[]:snake_case", scaffold={ @@ -418,6 +452,13 @@ expected_field="websites_min_length", expected_check="array_min_length", ), + Scenario( + id="place::websites_unique:struct_unique", + scaffold={"websites": ["https://example.com/"]}, + mutate=lambda row: mutate_unique_items(row, "websites"), + expected_field="websites_unique", + expected_check="struct_unique", + ), Scenario( id="place::websites[]:url_format", scaffold={"websites": ["https://example.com/"]}, @@ -442,6 +483,13 @@ expected_field="socials_min_length", expected_check="array_min_length", ), + Scenario( + id="place::socials_unique:struct_unique", + scaffold={"socials": ["https://example.com/"]}, + mutate=lambda row: mutate_unique_items(row, "socials"), + expected_field="socials_unique", + expected_check="struct_unique", + ), Scenario( id="place::socials[]:url_format", scaffold={"socials": ["https://example.com/"]}, @@ -466,6 +514,13 @@ expected_field="emails_min_length", expected_check="array_min_length", ), + Scenario( + id="place::emails_unique:struct_unique", + scaffold={"emails": ["user@example.com"]}, + mutate=lambda row: mutate_unique_items(row, "emails"), + expected_field="emails_unique", + expected_check="struct_unique", + ), Scenario( id="place::emails[]:email", scaffold={"emails": ["user@example.com"]}, @@ -480,6 +535,13 @@ expected_field="phones_min_length", expected_check="array_min_length", ), + Scenario( + id="place::phones_unique:struct_unique", + scaffold={"phones": ["+1 555-555-5555"]}, + mutate=lambda row: mutate_unique_items(row, "phones"), + expected_field="phones_unique", + expected_check="struct_unique", + ), Scenario( id="place::phones[]:phone_number", scaffold={"phones": ["+1 555-555-5555"]}, @@ -684,6 +746,31 @@ expected_field="brand.names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="place::brand.names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "mode": "accepted_by", + "countries": ["US"], + }, + } + ], + } + } + }, + mutate=lambda row: mutate_unique_items( + row, "brand.names.rules[].perspectives.countries" + ), + expected_field="brand.names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="place::brand.names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -947,6 +1034,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="place::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="place::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -1013,113 +1120,6 @@ expected_field="names.rules[].side", expected_check="enum", ), - Scenario( - id="place::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::categories.alternate_unique:struct_unique", - scaffold={"categories": {"primary": "snake_case", "alternate": ["snake_case"]}}, - mutate=lambda row: mutate_unique_items(row, "categories.alternate"), - expected_field="categories.alternate_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::taxonomy.hierarchy_unique:struct_unique", - scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, - mutate=lambda row: mutate_unique_items(row, "taxonomy.hierarchy"), - expected_field="taxonomy.hierarchy_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::taxonomy.alternates_unique:struct_unique", - scaffold={ - "taxonomy": { - "primary": "snake_case", - "hierarchy": ["snake_case"], - "alternates": ["snake_case"], - } - }, - mutate=lambda row: mutate_unique_items(row, "taxonomy.alternates"), - expected_field="taxonomy.alternates_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::websites_unique:struct_unique", - scaffold={"websites": ["https://example.com/"]}, - mutate=lambda row: mutate_unique_items(row, "websites"), - expected_field="websites_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::socials_unique:struct_unique", - scaffold={"socials": ["https://example.com/"]}, - mutate=lambda row: mutate_unique_items(row, "socials"), - expected_field="socials_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::emails_unique:struct_unique", - scaffold={"emails": ["user@example.com"]}, - mutate=lambda row: mutate_unique_items(row, "emails"), - expected_field="emails_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::phones_unique:struct_unique", - scaffold={"phones": ["+1 555-555-5555"]}, - mutate=lambda row: mutate_unique_items(row, "phones"), - expected_field="phones_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::brand.names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "brand": { - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": { - "mode": "accepted_by", - "countries": ["US"], - }, - } - ], - } - } - }, - mutate=lambda row: mutate_unique_items( - row, "brand.names.rules[].perspectives.countries" - ), - expected_field="brand.names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py index 6552a950a..7fb7739ad 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py @@ -161,6 +161,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="connector::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="connector::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -248,13 +255,6 @@ expected_field="sources[].between", expected_check="linear_range_order", ), - Scenario( - id="connector::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py index 22cfd600b..fda44888d 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py @@ -219,6 +219,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="segment::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -327,6 +334,13 @@ expected_field="access_restrictions_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].access_type:required", scaffold={"access_restrictions": [{"access_type": "allowed"}]}, @@ -390,6 +404,17 @@ expected_field="access_restrictions[].when.mode_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.mode[]:enum", scaffold={ @@ -412,6 +437,17 @@ expected_field="access_restrictions[].when.using_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.using[]:enum", scaffold={ @@ -434,6 +470,19 @@ expected_field="access_restrictions[].when.recognized_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.recognized[]:enum", scaffold={ @@ -468,6 +517,31 @@ expected_field="access_restrictions[].when.vehicle_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.vehicle[].dimension:required", scaffold={ @@ -621,6 +695,13 @@ expected_field="connectors_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), Scenario( id="segment::connectors[].connector_id:required", scaffold={"connectors": [{"connector_id": "a"}]}, @@ -953,6 +1034,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -1040,6 +1141,13 @@ expected_field="rail_flags_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::rail_flags_unique:struct_unique", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "rail_flags"), + expected_field="rail_flags_unique", + expected_check="struct_unique", + ), Scenario( id="segment::rail_flags[].values:required", scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, @@ -1054,6 +1162,13 @@ expected_field="rail_flags[].values_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::rail_flags[].values_unique:struct_unique", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "rail_flags[].values"), + expected_field="rail_flags[].values_unique", + expected_check="struct_unique", + ), Scenario( id="segment::rail_flags[].values[]:enum", scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, @@ -1165,210 +1280,28 @@ expected_check="require_any_of", ), Scenario( - id="segment::model:require_any_of:6", - scaffold={"destinations": [{}]}, - mutate=lambda row: mutate_require_any_of( - row, ["labels", "symbols"], array_path="destinations" - ), - expected_field="destinations[]", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:7", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_forbid_if( - row, - ["unit"], - "dimension", - "axle_count", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_forbidden", - expected_check="forbid_if", - ), - Scenario( - id="segment::model:require_if:8", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "height", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_0", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:9", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "length", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_1", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:10", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "weight", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_2", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:11", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "width", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_3", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_any_of:12", - scaffold={"prohibited_transitions": [{"when": {}}]}, - mutate=lambda row: mutate_require_any_of( - row, - ["heading", "during", "mode", "using", "recognized", "vehicle"], - array_path="prohibited_transitions", - struct_path="when", - ), - expected_field="prohibited_transitions[].when", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:13", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_forbid_if( - row, - ["unit"], - "dimension", - "axle_count", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_forbidden", - expected_check="forbid_if", - ), - Scenario( - id="segment::model:require_if:14", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "height", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_0", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:15", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "length", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_1", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:16", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "weight", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_2", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:17", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "width", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_3", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_any_of:18", - scaffold={"speed_limits": [{"when": {}}]}, - mutate=lambda row: mutate_require_any_of( - row, - ["heading", "during", "mode", "using", "recognized", "vehicle"], - array_path="speed_limits", - struct_path="when", - ), - expected_field="speed_limits[].when", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:require_any_of:19", - scaffold={"speed_limits": [{}]}, - mutate=lambda row: mutate_require_any_of( - row, ["max_speed.value", "min_speed.value"], array_path="speed_limits" - ), - expected_field="speed_limits[]", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:20", + id="segment::model:forbid_if:6", scaffold={}, mutate=lambda row: mutate_forbid_if(row, ["class"], "subtype", "water"), expected_field="class_forbidden", expected_check="forbid_if", ), Scenario( - id="segment::model:require_if:21", + id="segment::model:require_if:7", scaffold={}, mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "rail"), expected_field="class_required_0", expected_check="require_if", ), Scenario( - id="segment::model:require_if:22", + id="segment::model:require_if:8", scaffold={}, mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "road"), expected_field="class_required_1", expected_check="require_if", ), Scenario( - id="segment::model:forbid_if:23", + id="segment::model:forbid_if:9", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1382,7 +1315,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:24", + id="segment::model:forbid_if:10", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1396,7 +1329,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:25", + id="segment::model:forbid_if:11", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1410,7 +1343,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:26", + id="segment::model:forbid_if:12", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1424,7 +1357,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:27", + id="segment::model:forbid_if:13", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1438,7 +1371,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:28", + id="segment::model:forbid_if:14", scaffold={}, mutate=lambda row: mutate_forbid_if( row, ["subclass"], "subtype", "road", negate=True @@ -1447,7 +1380,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:29", + id="segment::model:forbid_if:15", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1461,7 +1394,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:30", + id="segment::model:forbid_if:16", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1474,121 +1407,6 @@ expected_field="rail_flags_forbidden", expected_check="forbid_if", ), - Scenario( - id="segment::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions_unique:struct_unique", - scaffold={"access_restrictions": [{"access_type": "allowed"}]}, - mutate=lambda row: mutate_unique_items(row, "access_restrictions"), - expected_field="access_restrictions_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.mode_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), - expected_field="access_restrictions[].when.mode_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.using_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), - expected_field="access_restrictions[].when.using_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.recognized_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.recognized" - ), - expected_field="access_restrictions[].when.recognized_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.vehicle_unique:struct_unique", - scaffold={ - "access_restrictions": [ - { - "access_type": "allowed", - "when": { - "vehicle": [ - { - "dimension": "height", - "comparison": "greater_than", - "value": 0.0, - "unit": "in", - } - ] - }, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.vehicle" - ), - expected_field="access_restrictions[].when.vehicle_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::connectors_unique:struct_unique", - scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, - mutate=lambda row: mutate_unique_items(row, "connectors"), - expected_field="connectors_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::rail_flags_unique:struct_unique", - scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, - mutate=lambda row: mutate_unique_items(row, "rail_flags"), - expected_field="rail_flags_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::rail_flags[].values_unique:struct_unique", - scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, - mutate=lambda row: mutate_unique_items(row, "rail_flags[].values"), - expected_field="rail_flags[].values_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py index 0a8d0a946..137862634 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py @@ -278,6 +278,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="segment::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -386,6 +393,13 @@ expected_field="access_restrictions_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].access_type:required", scaffold={"access_restrictions": [{"access_type": "allowed"}]}, @@ -449,6 +463,17 @@ expected_field="access_restrictions[].when.mode_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.mode[]:enum", scaffold={ @@ -471,6 +496,17 @@ expected_field="access_restrictions[].when.using_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.using[]:enum", scaffold={ @@ -493,6 +529,19 @@ expected_field="access_restrictions[].when.recognized_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.recognized[]:enum", scaffold={ @@ -527,6 +576,31 @@ expected_field="access_restrictions[].when.vehicle_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.vehicle[].dimension:required", scaffold={ @@ -680,6 +754,13 @@ expected_field="connectors_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), Scenario( id="segment::connectors[].connector_id:required", scaffold={"connectors": [{"connector_id": "a"}]}, @@ -1012,6 +1093,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -1285,6 +1386,23 @@ expected_field="destinations[].labels_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::destinations[].labels_unique:struct_unique", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "destinations[].labels"), + expected_field="destinations[].labels_unique", + expected_check="struct_unique", + ), Scenario( id="segment::destinations[].labels[].value:required", scaffold={ @@ -1370,6 +1488,23 @@ expected_field="destinations[].labels[].type", expected_check="enum", ), + Scenario( + id="segment::destinations[].symbols_unique:struct_unique", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "symbols": ["motorway"], + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "destinations[].symbols"), + expected_field="destinations[].symbols_unique", + expected_check="struct_unique", + ), Scenario( id="segment::destinations[].symbols[]:enum", scaffold={ @@ -1449,6 +1584,22 @@ expected_field="prohibited_transitions[].sequence_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::prohibited_transitions[].sequence_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].sequence" + ), + expected_field="prohibited_transitions[].sequence_unique", + expected_check="struct_unique", + ), Scenario( id="segment::prohibited_transitions[].sequence[].connector_id:required", scaffold={ @@ -1640,6 +1791,23 @@ expected_field="prohibited_transitions[].when.mode_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::prohibited_transitions[].when.mode_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"mode": ["vehicle"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.mode" + ), + expected_field="prohibited_transitions[].when.mode_unique", + expected_check="struct_unique", + ), Scenario( id="segment::prohibited_transitions[].when.mode[]:enum", scaffold={ @@ -1670,6 +1838,23 @@ expected_field="prohibited_transitions[].when.using_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::prohibited_transitions[].when.using_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"using": ["as_customer"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.using" + ), + expected_field="prohibited_transitions[].when.using_unique", + expected_check="struct_unique", + ), Scenario( id="segment::prohibited_transitions[].when.using[]:enum", scaffold={ @@ -1700,6 +1885,23 @@ expected_field="prohibited_transitions[].when.recognized_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::prohibited_transitions[].when.recognized_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"recognized": ["as_permitted"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.recognized" + ), + expected_field="prohibited_transitions[].when.recognized_unique", + expected_check="struct_unique", + ), Scenario( id="segment::prohibited_transitions[].when.recognized[]:enum", scaffold={ @@ -1739,6 +1941,32 @@ expected_field="prohibited_transitions[].when.vehicle_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.vehicle" + ), + expected_field="prohibited_transitions[].when.vehicle_unique", + expected_check="struct_unique", + ), Scenario( id="segment::prohibited_transitions[].when.vehicle[].dimension:required", scaffold={ @@ -1919,6 +2147,13 @@ expected_field="road_flags_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::road_flags_unique:struct_unique", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "road_flags"), + expected_field="road_flags_unique", + expected_check="struct_unique", + ), Scenario( id="segment::road_flags[].values:required", scaffold={"road_flags": [{"values": ["is_bridge"]}]}, @@ -1933,6 +2168,13 @@ expected_field="road_flags[].values_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::road_flags[].values_unique:struct_unique", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "road_flags[].values"), + expected_field="road_flags[].values_unique", + expected_check="struct_unique", + ), Scenario( id="segment::road_flags[].values[]:enum", scaffold={"road_flags": [{"values": ["is_bridge"]}]}, @@ -1968,6 +2210,13 @@ expected_field="road_surface_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::road_surface_unique:struct_unique", + scaffold={"road_surface": [{"value": "unknown"}]}, + mutate=lambda row: mutate_unique_items(row, "road_surface"), + expected_field="road_surface_unique", + expected_check="struct_unique", + ), Scenario( id="segment::road_surface[].value:required", scaffold={"road_surface": [{"value": "unknown"}]}, @@ -2010,6 +2259,13 @@ expected_field="speed_limits_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::speed_limits_unique:struct_unique", + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits"), + expected_field="speed_limits_unique", + expected_check="struct_unique", + ), Scenario( id="segment::speed_limits[].max_speed.value:required", scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, @@ -2115,6 +2371,13 @@ expected_field="speed_limits[].when.mode_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::speed_limits[].when.mode_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.mode"), + expected_field="speed_limits[].when.mode_unique", + expected_check="struct_unique", + ), Scenario( id="segment::speed_limits[].when.mode[]:enum", scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, @@ -2129,6 +2392,13 @@ expected_field="speed_limits[].when.using_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::speed_limits[].when.using_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.using"), + expected_field="speed_limits[].when.using_unique", + expected_check="struct_unique", + ), Scenario( id="segment::speed_limits[].when.using[]:enum", scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, @@ -2143,6 +2413,13 @@ expected_field="speed_limits[].when.recognized_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::speed_limits[].when.recognized_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.recognized"), + expected_field="speed_limits[].when.recognized_unique", + expected_check="struct_unique", + ), Scenario( id="segment::speed_limits[].when.recognized[]:enum", scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, @@ -2172,6 +2449,28 @@ expected_field="speed_limits[].when.vehicle_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::speed_limits[].when.vehicle_unique:struct_unique", + scaffold={ + "speed_limits": [ + { + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + } + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.vehicle"), + expected_field="speed_limits[].when.vehicle_unique", + expected_check="struct_unique", + ), Scenario( id="segment::speed_limits[].when.vehicle[].dimension:required", scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, @@ -2265,6 +2564,13 @@ expected_field="width_rules_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::width_rules_unique:struct_unique", + scaffold={"width_rules": [{"value": 1.0}]}, + mutate=lambda row: mutate_unique_items(row, "width_rules"), + expected_field="width_rules_unique", + expected_check="struct_unique", + ), Scenario( id="segment::width_rules[].value:required", scaffold={"width_rules": [{"value": 1.0}]}, @@ -2692,312 +2998,6 @@ expected_field="rail_flags_forbidden", expected_check="forbid_if", ), - Scenario( - id="segment::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions_unique:struct_unique", - scaffold={"access_restrictions": [{"access_type": "allowed"}]}, - mutate=lambda row: mutate_unique_items(row, "access_restrictions"), - expected_field="access_restrictions_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.mode_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), - expected_field="access_restrictions[].when.mode_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.using_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), - expected_field="access_restrictions[].when.using_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.recognized_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.recognized" - ), - expected_field="access_restrictions[].when.recognized_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.vehicle_unique:struct_unique", - scaffold={ - "access_restrictions": [ - { - "access_type": "allowed", - "when": { - "vehicle": [ - { - "dimension": "height", - "comparison": "greater_than", - "value": 0.0, - "unit": "in", - } - ] - }, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.vehicle" - ), - expected_field="access_restrictions[].when.vehicle_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::connectors_unique:struct_unique", - scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, - mutate=lambda row: mutate_unique_items(row, "connectors"), - expected_field="connectors_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::destinations[].labels_unique:struct_unique", - scaffold={ - "destinations": [ - { - "from_connector_id": "a", - "to_connector_id": "a", - "to_segment_id": "a", - "final_heading": "forward", - "labels": [{"value": "a", "type": "street"}], - } - ] - }, - mutate=lambda row: mutate_unique_items(row, "destinations[].labels"), - expected_field="destinations[].labels_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::destinations[].symbols_unique:struct_unique", - scaffold={ - "destinations": [ - { - "from_connector_id": "a", - "to_connector_id": "a", - "to_segment_id": "a", - "final_heading": "forward", - "symbols": ["motorway"], - } - ] - }, - mutate=lambda row: mutate_unique_items(row, "destinations[].symbols"), - expected_field="destinations[].symbols_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::prohibited_transitions[].sequence_unique:struct_unique", - scaffold={ - "prohibited_transitions": [ - { - "final_heading": "forward", - "sequence": [{"connector_id": "a", "segment_id": "a"}], - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "prohibited_transitions[].sequence" - ), - expected_field="prohibited_transitions[].sequence_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::prohibited_transitions[].when.mode_unique:struct_unique", - scaffold={ - "prohibited_transitions": [ - { - "sequence": [{"connector_id": "a", "segment_id": "a"}], - "final_heading": "forward", - "when": {"mode": ["vehicle"]}, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "prohibited_transitions[].when.mode" - ), - expected_field="prohibited_transitions[].when.mode_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::prohibited_transitions[].when.using_unique:struct_unique", - scaffold={ - "prohibited_transitions": [ - { - "sequence": [{"connector_id": "a", "segment_id": "a"}], - "final_heading": "forward", - "when": {"using": ["as_customer"]}, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "prohibited_transitions[].when.using" - ), - expected_field="prohibited_transitions[].when.using_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::prohibited_transitions[].when.recognized_unique:struct_unique", - scaffold={ - "prohibited_transitions": [ - { - "sequence": [{"connector_id": "a", "segment_id": "a"}], - "final_heading": "forward", - "when": {"recognized": ["as_permitted"]}, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "prohibited_transitions[].when.recognized" - ), - expected_field="prohibited_transitions[].when.recognized_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::prohibited_transitions[].when.vehicle_unique:struct_unique", - scaffold={ - "prohibited_transitions": [ - { - "sequence": [{"connector_id": "a", "segment_id": "a"}], - "final_heading": "forward", - "when": { - "vehicle": [ - { - "dimension": "height", - "comparison": "greater_than", - "value": 0.0, - "unit": "in", - } - ] - }, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "prohibited_transitions[].when.vehicle" - ), - expected_field="prohibited_transitions[].when.vehicle_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::road_flags_unique:struct_unique", - scaffold={"road_flags": [{"values": ["is_bridge"]}]}, - mutate=lambda row: mutate_unique_items(row, "road_flags"), - expected_field="road_flags_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::road_flags[].values_unique:struct_unique", - scaffold={"road_flags": [{"values": ["is_bridge"]}]}, - mutate=lambda row: mutate_unique_items(row, "road_flags[].values"), - expected_field="road_flags[].values_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::road_surface_unique:struct_unique", - scaffold={"road_surface": [{"value": "unknown"}]}, - mutate=lambda row: mutate_unique_items(row, "road_surface"), - expected_field="road_surface_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::speed_limits_unique:struct_unique", - scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, - mutate=lambda row: mutate_unique_items(row, "speed_limits"), - expected_field="speed_limits_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::speed_limits[].when.mode_unique:struct_unique", - scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, - mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.mode"), - expected_field="speed_limits[].when.mode_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::speed_limits[].when.using_unique:struct_unique", - scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, - mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.using"), - expected_field="speed_limits[].when.using_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::speed_limits[].when.recognized_unique:struct_unique", - scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, - mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.recognized"), - expected_field="speed_limits[].when.recognized_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::speed_limits[].when.vehicle_unique:struct_unique", - scaffold={ - "speed_limits": [ - { - "when": { - "vehicle": [ - { - "dimension": "height", - "comparison": "greater_than", - "value": 0.0, - "unit": "in", - } - ] - } - } - ] - }, - mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.vehicle"), - expected_field="speed_limits[].when.vehicle_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::width_rules_unique:struct_unique", - scaffold={"width_rules": [{"value": 1.0}]}, - mutate=lambda row: mutate_unique_items(row, "width_rules"), - expected_field="width_rules_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py index bf3e6b1d6..bfbe81702 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py @@ -216,6 +216,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="segment::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -324,6 +331,13 @@ expected_field="access_restrictions_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].access_type:required", scaffold={"access_restrictions": [{"access_type": "allowed"}]}, @@ -387,6 +401,17 @@ expected_field="access_restrictions[].when.mode_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.mode[]:enum", scaffold={ @@ -409,6 +434,17 @@ expected_field="access_restrictions[].when.using_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.using[]:enum", scaffold={ @@ -431,6 +467,19 @@ expected_field="access_restrictions[].when.recognized_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.recognized[]:enum", scaffold={ @@ -465,6 +514,31 @@ expected_field="access_restrictions[].when.vehicle_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.vehicle[].dimension:required", scaffold={ @@ -618,6 +692,13 @@ expected_field="connectors_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), Scenario( id="segment::connectors[].connector_id:required", scaffold={"connectors": [{"connector_id": "a"}]}, @@ -950,6 +1031,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -1099,210 +1200,28 @@ expected_check="require_any_of", ), Scenario( - id="segment::model:require_any_of:6", - scaffold={"destinations": [{}]}, - mutate=lambda row: mutate_require_any_of( - row, ["labels", "symbols"], array_path="destinations" - ), - expected_field="destinations[]", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:7", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_forbid_if( - row, - ["unit"], - "dimension", - "axle_count", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_forbidden", - expected_check="forbid_if", - ), - Scenario( - id="segment::model:require_if:8", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "height", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_0", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:9", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "length", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_1", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:10", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "weight", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_2", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:11", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "width", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_3", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_any_of:12", - scaffold={"prohibited_transitions": [{"when": {}}]}, - mutate=lambda row: mutate_require_any_of( - row, - ["heading", "during", "mode", "using", "recognized", "vehicle"], - array_path="prohibited_transitions", - struct_path="when", - ), - expected_field="prohibited_transitions[].when", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:13", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_forbid_if( - row, - ["unit"], - "dimension", - "axle_count", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_forbidden", - expected_check="forbid_if", - ), - Scenario( - id="segment::model:require_if:14", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "height", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_0", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:15", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "length", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_1", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:16", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "weight", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_2", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:17", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "width", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_3", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_any_of:18", - scaffold={"speed_limits": [{"when": {}}]}, - mutate=lambda row: mutate_require_any_of( - row, - ["heading", "during", "mode", "using", "recognized", "vehicle"], - array_path="speed_limits", - struct_path="when", - ), - expected_field="speed_limits[].when", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:require_any_of:19", - scaffold={"speed_limits": [{}]}, - mutate=lambda row: mutate_require_any_of( - row, ["max_speed.value", "min_speed.value"], array_path="speed_limits" - ), - expected_field="speed_limits[]", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:20", + id="segment::model:forbid_if:6", scaffold={}, mutate=lambda row: mutate_forbid_if(row, ["class"], "subtype", "water"), expected_field="class_forbidden", expected_check="forbid_if", ), Scenario( - id="segment::model:require_if:21", + id="segment::model:require_if:7", scaffold={}, mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "rail"), expected_field="class_required_0", expected_check="require_if", ), Scenario( - id="segment::model:require_if:22", + id="segment::model:require_if:8", scaffold={}, mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "road"), expected_field="class_required_1", expected_check="require_if", ), Scenario( - id="segment::model:forbid_if:23", + id="segment::model:forbid_if:9", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1316,7 +1235,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:24", + id="segment::model:forbid_if:10", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1330,7 +1249,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:25", + id="segment::model:forbid_if:11", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1344,7 +1263,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:26", + id="segment::model:forbid_if:12", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1358,7 +1277,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:27", + id="segment::model:forbid_if:13", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1372,7 +1291,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:28", + id="segment::model:forbid_if:14", scaffold={}, mutate=lambda row: mutate_forbid_if( row, ["subclass"], "subtype", "road", negate=True @@ -1381,7 +1300,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:29", + id="segment::model:forbid_if:15", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1395,7 +1314,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:30", + id="segment::model:forbid_if:16", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1408,107 +1327,6 @@ expected_field="rail_flags_forbidden", expected_check="forbid_if", ), - Scenario( - id="segment::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions_unique:struct_unique", - scaffold={"access_restrictions": [{"access_type": "allowed"}]}, - mutate=lambda row: mutate_unique_items(row, "access_restrictions"), - expected_field="access_restrictions_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.mode_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), - expected_field="access_restrictions[].when.mode_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.using_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), - expected_field="access_restrictions[].when.using_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.recognized_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.recognized" - ), - expected_field="access_restrictions[].when.recognized_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.vehicle_unique:struct_unique", - scaffold={ - "access_restrictions": [ - { - "access_type": "allowed", - "when": { - "vehicle": [ - { - "dimension": "height", - "comparison": "greater_than", - "value": 0.0, - "unit": "in", - } - ] - }, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.vehicle" - ), - expected_field="access_restrictions[].when.vehicle_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::connectors_unique:struct_unique", - scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, - mutate=lambda row: mutate_unique_items(row, "connectors"), - expected_field="connectors_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] From 1e3143c8b23169f0f8b52715fa2d890dbab65707 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Wed, 20 May 2026 14:02:22 -0700 Subject: [PATCH 07/11] fix(pyspark): don't crash on absent input columns validate_feature built check expressions referencing every column the schema declares, then evaluated them with an eager df.select. When the input DataFrame lacked a declared column, Spark's plan analysis raised an AnalysisException before the caller could inspect the schema mismatch, so a file missing a required column produced a Java stack trace instead of the schema-mismatch report the CLI is built to emit. Columns that compare_schemas reports as absent from the data now have their checks dropped, the same as --skip-columns columns; referencing them is what crashes Spark. The mismatch is still recorded in schema_mismatches, so the CLI reports it and exits cleanly (or, with --skip-schema-check, validates the columns that are present). The CLI also prints the --skip-columns invocation for the absent columns, so the escape hatch is discoverable from the error itself. Signed-off-by: Seth Fitzsimmons --- .../src/overture/schema/pyspark/cli.py | 13 ++++++++++ .../src/overture/schema/pyspark/validate.py | 21 ++++++++++++++- .../overture-schema-pyspark/tests/test_cli.py | 18 +++++++++++++ .../tests/test_validate.py | 26 +++++++++++++++++++ 4 files changed, 77 insertions(+), 1 deletion(-) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py index 1a8ada445..2be4f9aeb 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py @@ -220,6 +220,19 @@ def validate_cli( click.echo(f"Schema mismatches for {resolved}:", err=True) for m in result.schema_mismatches: click.echo(f" {m.path}: expected {m.expected}, got {m.actual}", err=True) + absent_columns = list( + dict.fromkeys( + m.path.split(".", 1)[0] + for m in result.schema_mismatches + if m.actual == "missing" + ) + ) + if absent_columns: + flags = " ".join(f"--skip-columns {c}" for c in absent_columns) + click.echo( + f" Re-run with `{flags}` to skip missing columns.", + err=True, + ) if not skip_schema_check: sys.exit(1) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py index 9b03ed34b..0274d6c18 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py @@ -311,11 +311,30 @@ def validate_feature( + "; ".join(parts) ) + # Schema columns the data lacks. A check referencing an absent column + # raises an AnalysisException during Spark plan analysis, so such checks + # are dropped before evaluation via the `excluded` filter below -- the + # same filter skip_columns feeds. Only that check-filtering is shared: + # a skip_columns mismatch was suppressed by the loop above, so the + # caller sees no mismatch and validation continues; an absent-column + # mismatch stays in `mismatches` and is reported, so the caller (the + # CLI) aborts unless --skip-schema-check. `--skip-columns` opts into + # that suppression -- it is not a restatement of the default. + # `Check.root_field` is column-granular, so filtering is all-or-nothing: + # if the data has the `bbox` struct but is missing only `bbox.xmin`, + # every check whose root_field is `bbox` is dropped, including checks on + # sub-fields that are present. Finer granularity would require + # sub-column awareness in Check, which it deliberately lacks. + absent_columns = { + m.path.split(".", 1)[0] for m in mismatches if m.actual == "missing" + } + # Check filtering + excluded = skip | absent_columns kept: list[Check] = [] suppressed: list[Check] = [] for chk in all_checks: - if chk.root_field is not None and chk.root_field in skip: + if chk.root_field is not None and chk.root_field in excluded: continue # structurally absent, not tracked in suppressed if chk.root_field is not None and chk.root_field in suppress_roots: suppressed.append(chk) diff --git a/packages/overture-schema-pyspark/tests/test_cli.py b/packages/overture-schema-pyspark/tests/test_cli.py index 037d6aaeb..2caaf558d 100644 --- a/packages/overture-schema-pyspark/tests/test_cli.py +++ b/packages/overture-schema-pyspark/tests/test_cli.py @@ -247,6 +247,24 @@ def test_validate_skip_columns(spark: SparkSession, tmp_path: Path) -> None: assert "0 / 1 rows with errors" in result.output +def test_validate_missing_column_suggests_skip_columns( + spark: SparkSession, tmp_path: Path +) -> None: + """A column absent from the data hints the --skip-columns flag.""" + input_path = str(tmp_path / "input.parquet") + + # Data missing the 'value' column the schema expects + spark.createDataFrame([Row(id="r1", theme="test", type="test_cli")]).write.parquet( + input_path + ) + + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path]) + assert result.exit_code != 0 + assert "Schema mismatch" in result.output + assert "--skip-columns value" in result.output + + def test_validate_ignore_extra_columns(spark: SparkSession, tmp_path: Path) -> None: """--ignore-extra-columns suppresses 'expected missing' schema mismatches.""" input_path = str(tmp_path / "input.parquet") diff --git a/packages/overture-schema-pyspark/tests/test_validate.py b/packages/overture-schema-pyspark/tests/test_validate.py index c3fe3ea08..f15f4e806 100644 --- a/packages/overture-schema-pyspark/tests/test_validate.py +++ b/packages/overture-schema-pyspark/tests/test_validate.py @@ -514,3 +514,29 @@ def test_all_checks_suppressed(self, vf_df: DataFrame) -> None: ) assert result.checks == [] assert result.error_rows().count() == 0 + + def test_missing_column_does_not_raise(self, spark: SparkSession) -> None: + # A DataFrame missing a required column causes AnalysisException when + # evaluate_checks references that column. validate_feature must detect + # structurally absent columns via schema_mismatches and silently drop + # the corresponding checks before calling evaluate_checks -- mirroring + # the skip_columns path. + schema_no_theme = StructType( + [f for f in _VF_SCHEMA.fields if f.name != "theme"] + ) + df = spark.createDataFrame( + [Row(id="1", type=_VF_TYPE, value="ok", sources="s")], + schema=schema_no_theme, + ) + result = validate_feature(df, _VF_TYPE) + # Must not raise -- returns normally + assert isinstance(result, ValidationResult) + # Missing column is reported as a schema mismatch + mismatch_paths = [m.path for m in result.schema_mismatches] + assert "theme" in mismatch_paths + # No check may reference the absent root field + missing_root_fields = {c.root_field for c in result.checks} + assert "theme" not in missing_root_fields + # Absent-column checks are silently dropped, not tracked in suppressed + suppressed_root_fields = {c.root_field for c in result.suppressed_checks} + assert "theme" not in suppressed_root_fields From 4b55cc0a4464112d8e75ddb61a5a311cd97abd7e Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Wed, 20 May 2026 14:02:46 -0700 Subject: [PATCH 08/11] fix(codegen): null-guard optional sub-model checks Model-level constraints (require_any_of and the like) generated for a sub-model reached through an optional field fired even when that field was null. Pydantic skips a model validator when the optional sub-model is absent, so the generated PySpark expression produced a false positive the schema itself never raises. ModelCheck now carries a gate: the optional-ancestor path that must be non-null for the constraint to apply. check_builder sets it when the constrained model is reached via an optional struct field inside an array; the renderer wraps the constraint in F.when(.isNotNull(), ...). Regenerated Segment expressions: the speed_limits[].when, access_restrictions[].when, and prohibited_transitions[].when require_any_of checks are now skipped when their when sub-model is null. Signed-off-by: Seth Fitzsimmons --- .../schema/codegen/pyspark/check_builder.py | 9 +- .../schema/codegen/pyspark/check_ir.py | 7 ++ .../schema/codegen/pyspark/renderer.py | 16 ++++ .../tests/test_pyspark_check_builder.py | 83 +++++++++++++++++++ .../tests/test_pyspark_renderer.py | 70 ++++++++++++++++ .../overture/schema/transportation/segment.py | 69 ++++++++------- 6 files changed, 223 insertions(+), 31 deletions(-) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py index 9e736a67c..885074ca6 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py @@ -394,11 +394,17 @@ def _recurse_into_model( model_checks.extend(sub_model_checks) if model_spec.constraints: + constraint_gate = ( + prefix + if is_optional and not field_is_list and isinstance(prefix, ArrayPath) + else None + ) sub_model_constraint_checks = _dispatch_model_constraints( model_spec.constraints, model_spec.fields, target=_model_constraint_target(prefix), arm=arm, + gate=constraint_gate, ) if sub_model_constraint_checks: _guard_struct_nested_anchor(prefix, model_spec.name) @@ -485,10 +491,11 @@ def _dispatch_model_constraints( *, target: FieldPath = ScalarPath(), arm: str | None = None, + gate: FieldPath | None = None, ) -> list[ModelCheck]: """Dispatch model constraints to ModelChecks.""" return [ - ModelCheck(descriptor=desc, target=target, arm=arm) + ModelCheck(descriptor=desc, target=target, arm=arm, gate=gate) for mc in constraints for desc in dispatch_model_constraint(mc, fields) ] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py index e9029c632..d7e769c31 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py @@ -76,8 +76,15 @@ class ModelCheck: value. Constraints discovered through a variant-specific field's sub-model or sub-union inherit the contributing outer arm, so they land only in that arm's test module. + + `gate` is the optional-ancestor path that must be non-null for the + constraint to apply. Set when the constrained model is reached via + an optional field (`field: Model | None`). The renderer wraps the + constraint expression in `F.when(.isNotNull(), ...)` so + the check is skipped when the optional model is absent (NULL). """ descriptor: ModelConstraintDescriptor target: FieldPath = ScalarPath() arm: str | None = None + gate: FieldPath | None = None diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py index 9728a499a..dcd687610 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py @@ -455,10 +455,26 @@ def _cols_and_names() -> tuple[str, str]: raise TypeError(f"Unhandled model constraint descriptor: {desc!r}") if isinstance(target, ArrayPath): + if check.gate is not None: + assert not target.iter_struct_paths, ( + f"gated ModelCheck with a nested-array target ({target!r}) is unsupported; " + f"the element-gate wrap assumes a single array level" + ) + element_relative = target.element_relative_gate(check.gate) + assert element_relative is not None, ( + f"ModelCheck gate={check.gate!r} is not reachable as an element-level " + f"accessor on target={target!r}; gates on ModelChecks must be ArrayPaths " + f"entering the same outer array as the target" + ) + inner_expr = _wrap_element_gate(inner_expr, var, element_relative) expr = _wrap_in_array_iteration( target.column_path, target.iter_struct_paths, inner_expr ) else: + assert check.gate is None, ( + f"ModelCheck gate={check.gate!r} paired with non-ArrayPath target={target!r}; " + f"a gate only makes sense when the constrained model is inside an array" + ) expr = inner_expr return _check_function_context( diff --git a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py index 0c89a0367..983b4d348 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py @@ -1336,6 +1336,10 @@ class _ArrayOfConstrainedModel(BaseModel): items: list[_ArrayElementWithConstraint] +class _OptionalArrayOfConstrainedModel(BaseModel): + items: list[_ArrayElementWithConstraint] | None = None + + @require_any_of("a", "b") class _NestedConstrainedStruct(BaseModel): a: str | None = None @@ -1875,3 +1879,82 @@ def test_primitive_bounds_excluded(self, nodes: list[Check]) -> None: d = dict(b.kwargs) assert d.get("ge") != -(2**31) assert d.get("le") != 2**31 - 1 + + +@require_any_of("x", "y") +class _OptionalSubModelConstrained(BaseModel): + """Sub-model with require_any_of on its own fields.""" + + x: str | None = None + y: str | None = None + + +class _ElementWithOptionalConstrained(BaseModel): + nested: _OptionalSubModelConstrained | None = None + + +class _ArrayOfElementWithOptionalConstrained(BaseModel): + items: list[_ElementWithOptionalConstrained] + + +class TestOptionalSubModelModelCheckGate: + """ModelCheck for a constraint on an optional sub-model carries gate set to its path. + + When the constrained model is reached via an optional field (`field: Model | None`), + the PySpark validator must skip the constraint when the field is NULL. The + `ModelCheck.gate` carries the path to the optional field so the renderer can emit + `F.when(.isNotNull(), ...)`. + """ + + def test_optional_nested_model_gate_set(self) -> None: + """items[].nested is optional -- gate == path to nested.""" + _, model_nodes = _checks_for(_ArrayOfElementWithOptionalConstrained) + nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("items[].nested") + ] + assert len(nodes) == 1 + assert nodes[0].gate == _path("items[].nested") + + def test_non_optional_sub_model_has_no_gate(self) -> None: + """Direct array element model (not optional) -- gate is None.""" + _, model_nodes = _checks_for(_ArrayOfConstrainedModel) + nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("items[]") + ] + assert len(nodes) == 1 + assert nodes[0].gate is None + + def test_optional_list_field_element_model_has_no_gate(self) -> None: + """Optional list field (list[Model] | None) -- element constraint gate is None. + + The field being optional means the list itself may be absent; but the + constrained model is reached via array iteration, not a nullable struct + field, so no element-level gate belongs. + """ + _, model_nodes = _checks_for(_OptionalArrayOfConstrainedModel) + nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("items[]") + ] + assert len(nodes) == 1 + assert nodes[0].gate is None + + def test_segment_speed_limits_when_has_gate(self) -> None: + """Segment.speed_limits[].when is optional -- gate == path to when.""" + from codegen_test_support import discover_feature + + spec = discover_feature("Segment") + _, model_nodes = build_checks(spec) + when_nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("speed_limits[].when") + ] + assert len(when_nodes) >= 1 + for node in when_nodes: + assert node.gate == _path("speed_limits[].when") diff --git a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py index 42775be41..91a63c380 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py @@ -33,6 +33,7 @@ ) from overture.schema.codegen.pyspark.schema_builder import build_schema from overture.schema.system.field_path import ( + ScalarPath, parse, ) from overture.schema.system.model_constraint import ( @@ -1095,3 +1096,72 @@ def test_nested_array_gate_applied_at_outermost_lambda(self) -> None: # Gate must be on el (the rule struct), not inner (the country string). assert 'el["perspectives"].isNotNull()' in source assert "inner[" not in source + + +@require_any_of("a", "b") +class _OptionalSubModel(BaseModel): + a: str | None = None + b: str | None = None + + +class _ElementWithOptional(BaseModel): + nested: _OptionalSubModel | None = None + + +class _ArrayWithOptionalSubModel(BaseModel): + items: list[_ElementWithOptional] + + +class TestGatedModelConstraintRendering: + """ModelCheck with gate wraps the constraint in F.when(.isNotNull(), ...).""" + + def test_gated_model_check_wraps_in_f_when(self) -> None: + """A gated ModelCheck on items[].nested emits F.when(el['nested'].isNotNull(), ...).""" + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + target=_path("items[].nested"), + gate=_path("items[].nested"), + ) + source = _render_model_node(check) + assert 'el["nested"].isNotNull()' in source + assert "check_require_any_of" in source + assert "F.when(" in source + + def test_gated_model_check_is_parseable(self) -> None: + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + target=_path("items[].nested"), + gate=_path("items[].nested"), + ) + source = _render_model_node(check) + ast.parse(source) + + def test_ungated_model_check_no_f_when(self) -> None: + """A ModelCheck without gate does NOT emit isNotNull wrapping.""" + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("x", "y")), + target=_path("items[]"), + gate=None, + ) + source = _render_model_node(check) + assert "isNotNull" not in source + assert "check_require_any_of" in source + + def test_full_render_optional_sub_model_has_when_guard(self) -> None: + """End-to-end: rendering _ArrayWithOptionalSubModel emits the isNotNull guard.""" + source = _render(_ArrayWithOptionalSubModel, "arr_optional_sub") + assert 'el["nested"].isNotNull()' in source + + def test_full_render_optional_sub_model_parseable(self) -> None: + source = _render(_ArrayWithOptionalSubModel, "arr_optional_sub") + ast.parse(source) + + def test_gated_model_check_assertion_on_non_array_target(self) -> None: + """A gate paired with a non-ArrayPath target raises AssertionError.""" + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + target=ScalarPath(), + gate=_path("items[].nested"), + ) + with pytest.raises(AssertionError, match="gate.*non-ArrayPath"): + _render_model_node(check) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py index cc9fd32bc..539999f21 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py @@ -3919,16 +3919,19 @@ def _access_restrictions_when_check_require_any_of_5_check() -> Check: name="require_any_of", expr=array_check( "access_restrictions", - lambda el: check_require_any_of( - [ - el["when"]["heading"], - el["when"]["during"], - el["when"]["mode"], - el["when"]["using"], - el["when"]["recognized"], - el["when"]["vehicle"], - ], - ["heading", "during", "mode", "using", "recognized", "vehicle"], + lambda el: F.when( + el["when"].isNotNull(), + check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), ), ), shape=CheckShape.ARRAY, @@ -4055,16 +4058,19 @@ def _prohibited_transitions_when_check_require_any_of_12_check() -> Check: name="require_any_of", expr=array_check( "prohibited_transitions", - lambda el: check_require_any_of( - [ - el["when"]["heading"], - el["when"]["during"], - el["when"]["mode"], - el["when"]["using"], - el["when"]["recognized"], - el["when"]["vehicle"], - ], - ["heading", "during", "mode", "using", "recognized", "vehicle"], + lambda el: F.when( + el["when"].isNotNull(), + check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), ), ), shape=CheckShape.ARRAY, @@ -4176,16 +4182,19 @@ def _speed_limits_when_check_require_any_of_18_check() -> Check: name="require_any_of", expr=array_check( "speed_limits", - lambda el: check_require_any_of( - [ - el["when"]["heading"], - el["when"]["during"], - el["when"]["mode"], - el["when"]["using"], - el["when"]["recognized"], - el["when"]["vehicle"], - ], - ["heading", "during", "mode", "using", "recognized", "vehicle"], + lambda el: F.when( + el["when"].isNotNull(), + check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), ), ), shape=CheckShape.ARRAY, From 1c218154063d07731a36aa5c97dbd5eb407135fc Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Tue, 23 Jun 2026 20:42:42 -0700 Subject: [PATCH 09/11] feat(pyspark): map key/value validation and codegen hardening This lands map key/value validation in the PySpark and Markdown generators, renames the codegen's core "feature" vocabulary to "model", and closes correctness gaps across validation generation, constraint identity, base-row synthesis, and the runtime checks. Map key/value validation (PySpark). Dict-typed fields such as names.common (dict[LanguageTag, StrippedString], present on nearly every feature with names) got no key or value validation, so invalid language-tag keys and unstripped values passed overture-validate while failing Pydantic. A MapPath IR locates a map's keys or values, the check builder descends into MapOf.key/.value, and the renderer emits map_keys_check/map_values_check over F.map_keys/F.map_values, reusing the null-guarded array transform. A map value that is a sub-model is descended into the way a list element is, so its field and model-level constraints generate checks. Shapes with no representable MapPath -- a container nested in a map value, an array-shaped map projection, a map reached through an array -- raise at generation time rather than silently drop a constraint or emit a mis-typed check. Map key/value rendering (Markdown). Map references render every key and value type and their directly-applied constraints. Map sides link to their type pages, container wrappers (a list or map inside a map) survive instead of being peeled, model-valued map values link, and every variant is named rather than collapsing to map. A bare side folds into the surrounding map<...> code span so two adjacent backtick spans cannot corrupt the CommonMark. feature to model vocabulary. The codegen generates validation for any Pydantic BaseModel, not only geospatial features, so the root abstraction is now ModelSpec (a RecordSpec, or a tagged UnionSpec of records) and the runtime surface is validate_model, model_keys, model_names, ModelValidation, and the emitted MODEL_VALIDATION constant, in place of validate_feature, feature_keys, feature_names, FeatureValidation, and FEATURE_VALIDATION. The registry walks the new constant name and the module template is renamed to match. The overture-validate CLI argument and the GeoJSON-feature validator keep feature vocabulary, where the geospatial meaning is correct. Graceful degradation over absent and skipped columns. Each Check carries one read_columns set, the top-level columns it actually reads, replacing the split root_field/referenced_fields mechanism that left an unresolvable F.col() for a row-root constraint over a struct field and for an in-array constraint branching on a row-root discriminator. Checks whose columns are skipped or structurally absent -- top-level, nested (sources[].confidence resolves to sources), or model-level -- are dropped before Spark instead of crashing with a raw AnalysisException. The CLI backstop classifies the exception through the structured PySpark error API rather than its message text, so a real planning bug propagates as a traceback instead of steering the operator to suppress it with --skip-columns. ValidationResult exposes absent_columns, the same derivation that drives check dropping. evaluate_checks and explain_errors reject input columns colliding with their reserved _err_N, field, check, and message names. Field-check label disambiguation. Discriminated unions produced multiple field checks sharing a (field, name) identity -- segment emitted two required checks on access_restrictions[].when.vehicle[].value, and sources[].confidence collided on bounds across many models -- leaving colliding rows indistinguishable in suppression, explain_errors metadata, and the conformance expected_field. Symmetric _N suffixes disambiguate them, computed over the unfiltered check list so per-arm test filtering cannot hide a collision the shared expression module still carries. Label and disambiguated function-name derivation is unified into one flattening shared by the expression and test renderers, which also fixes a per-arm asymmetry where a model-check base label spanning an arm boundary made the per-arm test expect a field the module never emits. Base-row and test-data value synthesis. Row synthesis handles Not(FieldEqCondition), used by Division and DivisionBoundary as ~IS_COUNTRY, instead of silently treating it as unsatisfied, and merges every bound constraint on a field before choosing a value so both-exclusive intervals and sibling Gt/Lt constraints are jointly satisfied. forbid_if fill values are typed for non-string scalars (0, 0.0, False) rather than falling back to a string sentinel for a non-string column. One CONSTRAINT_VALUES table pairs the valid and invalid value for each string constraint, replacing two parallel tables that drifted silently. SparkCategory dispatch is exhaustive: an unhandled or binary category raises at generation time instead of emitting a wrong fill value. Raw Field(pattern=) handling fails loud on an uncurated pattern naming the table to update, and requires PydanticMetadata before reading an object's .pattern, restoring the unhandled-constraint TypeError contract. Constraint identity. Deduplication compares constraints by value. System FieldConstraint subclasses inherited object identity, so two structurally identical UniqueItemsConstraint or PatternConstraint instances reported as divergent for any union field shared across members. FieldConstraint now defines value equality and hashing keyed on the concrete type and normalized instance state, reducing a compiled re.Pattern to (pattern, flags) so a case-insensitive pattern is not masked and reducing container attributes to hashable forms, so a set of constraints deduplicates by rule. The union fingerprint compares the constraint objects directly, falling back to a value-stable repr only for pydantic's internal Field() metadata, the lone constraint type that still compares by identity. Constraint attributes must themselves be value types, a contract the base states for future authors. Runtime checks. check_geometry_type reads the full four-byte WKB type word and normalizes ISO and EWKB encodings to the OGC base type, so 3D (Z/M/ZM) geometries in GeoParquet no longer false-fail every geometry-type check. check_url_format matches the scheme case-insensitively, matching Pydantic's HttpUrl. check_bounds rejects NaN under a lower-only bound, which Spark's ordering otherwise lets pass. resolve_read completes a partial partition path, appending a type=Y leaf below a theme=X/ path so one feature's checks no longer run against every type sharing a theme directory. A compiled, flagged re.Pattern, the only Pydantic carrier for re.IGNORECASE, is honored in both the pyspark check and the markdown display instead of crashing the model's generation. Variant gating and extraction. A discriminated union reached under a struct prefix, and a gate crossing mismatched array nesting, raise rather than emit a mis-gated check; both are preemptive, since no current schema reaches that state. Forward-ref and self-referential field annotations resolve against their owning model before classification rather than crashing the terminal classifier on an unresolved string. A required list[X | None] field no longer inherits is_optional from element nullability. Build, structure, and docs. The check-python-code CI paths trigger on the Makefile and the workflow themselves, uv-sync no longer routes its errors to /dev/null, and test-all runs the full suite unconditionally so golden-JSON and example-only changes are not deselected by testmon. typing-extensions is declared to match its unconditional import. Shared helpers replace duplicated logic -- register_model, schema_const_name, enum_source, a struct-only-prefix predicate, and a model-spec discovery entry point that gives every discovery site one extraction carrying partition layout -- and several docstrings and CLI help strings are corrected to match current behavior. Signed-off-by: Seth Fitzsimmons --- .github/workflows/check-python-code.yaml | 4 + Makefile | 7 +- packages/overture-schema-codegen/README.md | 4 +- .../overture-schema-codegen/docs/design.md | 42 +- .../docs/walkthrough.md | 38 +- .../overture-schema-codegen/pyproject.toml | 1 + .../src/overture/schema/codegen/cli.py | 50 +- .../schema/codegen/extraction/field.py | 4 +- .../codegen/extraction/field_constraints.py | 36 +- .../schema/codegen/extraction/field_walk.py | 53 ++ .../codegen/extraction/length_constraints.py | 10 +- .../codegen/extraction/model_extraction.py | 21 +- .../schema/codegen/extraction/specs.py | 25 +- .../codegen/extraction/type_analyzer.py | 100 ++- .../codegen/extraction/type_registry.py | 40 ++ .../codegen/extraction/union_extraction.py | 27 +- .../schema/codegen/layout/type_collection.py | 12 +- .../codegen/markdown/path_assignment.py | 6 +- .../schema/codegen/markdown/pipeline.py | 26 +- .../schema/codegen/markdown/renderer.py | 36 +- .../codegen/markdown/reverse_references.py | 40 +- .../schema/codegen/markdown/type_format.py | 204 +++--- .../schema/codegen/pyspark/__init__.py | 2 +- .../schema/codegen/pyspark/_render_common.py | 261 +++++++- .../schema/codegen/pyspark/check_builder.py | 257 +++++-- .../codegen/pyspark/constraint_dispatch.py | 157 ++++- .../schema/codegen/pyspark/pipeline.py | 60 +- .../schema/codegen/pyspark/renderer.py | 266 +++++--- .../schema/codegen/pyspark/schema_builder.py | 15 +- .../templates/_check_function.py.jinja2 | 2 +- ...odule.py.jinja2 => model_module.py.jinja2} | 12 +- .../pyspark/templates/test_module.py.jinja2 | 16 +- .../codegen/pyspark/test_data/base_row.py | 209 +++--- .../pyspark/test_data/constraint_values.py | 203 ++++++ .../pyspark/test_data/invalid_value.py | 69 +- .../codegen/pyspark/test_data/scaffold.py | 20 +- .../schema/codegen/pyspark/test_renderer.py | 145 ++-- .../overture/schema/codegen/spec_discovery.py | 44 ++ .../tests/codegen_test_support.py | 73 +- .../overture-schema-codegen/tests/conftest.py | 4 +- .../overture-schema-codegen/tests/test_cli.py | 8 +- .../tests/test_constraint_description.py | 32 + .../tests/test_field_walk.py | 71 ++ .../tests/test_golden_markdown.py | 18 +- .../tests/test_integration_real_models.py | 45 +- .../tests/test_markdown_renderer.py | 162 +++-- .../tests/test_markdown_type_format.py | 176 ++++- .../tests/test_model_extraction.py | 122 +++- .../tests/test_model_extractor.py | 4 +- .../tests/test_pyspark_base_row.py | 272 +++++++- .../tests/test_pyspark_check_builder.py | 277 +++++++- .../tests/test_pyspark_constraint_dispatch.py | 157 ++++- .../tests/test_pyspark_constraint_values.py | 231 +++++++ .../tests/test_pyspark_invalid_value.py | 31 +- .../tests/test_pyspark_pipeline.py | 48 +- .../tests/test_pyspark_renderer.py | 513 +++++++++++++- .../tests/test_pyspark_scaffold.py | 36 +- .../tests/test_pyspark_schema_builder.py | 12 +- .../tests/test_pyspark_test_renderer.py | 313 ++++++++- .../tests/test_reverse_references.py | 70 +- .../tests/test_specs.py | 13 +- .../tests/test_type_analyzer.py | 129 +++- .../tests/test_type_collection.py | 35 +- .../tests/test_type_placement.py | 20 +- .../tests/test_type_registry.py | 32 + .../tests/test_union_extraction.py | 143 +++- packages/overture-schema-pyspark/README.md | 20 +- .../src/overture/schema/pyspark/__init__.py | 12 +- .../src/overture/schema/pyspark/_registry.py | 10 +- .../src/overture/schema/pyspark/check.py | 14 +- .../src/overture/schema/pyspark/cli.py | 120 +++- .../pyspark/expressions/column_patterns.py | 38 ++ .../expressions/constraint_expressions.py | 123 +++- .../overture/schema/addresses/address.py | 90 +-- .../overture/schema/annex/sources.py | 94 ++- .../overture/schema/base/bathymetry.py | 86 +-- .../overture/schema/base/infrastructure.py | 141 ++-- .../generated/overture/schema/base/land.py | 137 ++-- .../overture/schema/base/land_cover.py | 86 +-- .../overture/schema/base/land_use.py | 141 ++-- .../generated/overture/schema/base/water.py | 133 ++-- .../overture/schema/buildings/building.py | 159 +++-- .../schema/buildings/building_part.py | 161 +++-- .../overture/schema/divisions/division.py | 288 ++++---- .../schema/divisions/division_area.py | 171 +++-- .../schema/divisions/division_boundary.py | 120 ++-- .../generated/overture/schema/places/place.py | 268 +++++--- .../schema/transportation/connector.py | 58 +- .../overture/schema/transportation/segment.py | 633 +++++++++--------- .../overture/schema/pyspark/schema_check.py | 15 + .../src/overture/schema/pyspark/validate.py | 139 +++- .../tests/_support/harness.py | 53 +- .../tests/_support/helpers.py | 10 +- .../tests/_support/mutations.py | 115 +++- .../tests/_support/registry.py | 43 ++ .../tests/expressions/test_column_patterns.py | 132 ++++ .../test_constraint_expressions.py | 111 +++ .../tests/expressions/test_schema_check.py | 30 + .../overture/schema/addresses/test_address.py | 12 +- .../overture/schema/annex/test_sources.py | 23 +- .../overture/schema/base/test_bathymetry.py | 36 +- .../schema/base/test_infrastructure.py | 34 +- .../overture/schema/base/test_land.py | 34 +- .../overture/schema/base/test_land_cover.py | 36 +- .../overture/schema/base/test_land_use.py | 34 +- .../overture/schema/base/test_water.py | 34 +- .../schema/buildings/test_building.py | 42 +- .../schema/buildings/test_building_part.py | 42 +- .../schema/divisions/test_division.py | 78 ++- .../schema/divisions/test_division_area.py | 38 +- .../divisions/test_division_boundary.py | 20 +- .../overture/schema/places/test_place.py | 58 +- .../schema/transportation/test_connector.py | 12 +- .../transportation/test_segment_rail.py | 70 +- .../transportation/test_segment_road.py | 134 ++-- .../transportation/test_segment_water.py | 62 +- .../tests/test_check.py | 2 +- .../overture-schema-pyspark/tests/test_cli.py | 173 ++++- .../tests/test_harness.py | 38 +- .../tests/test_mutations.py | 179 +++++ .../tests/test_validate.py | 415 ++++++++++-- .../field_constraint/field_constraint.py | 49 +- .../src/overture/schema/system/field_path.py | 195 +++++- .../test_constraint_equality.py | 109 +++ .../tests/test_field_path.py | 239 +++++++ uv.lock | 2 + 126 files changed, 8771 insertions(+), 2751 deletions(-) rename packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/{feature_module.py.jinja2 => model_module.py.jinja2} (83%) create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/constraint_values.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/spec_discovery.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_constraint_values.py create mode 100644 packages/overture-schema-pyspark/tests/_support/registry.py create mode 100644 packages/overture-schema-system/tests/field_constraint/test_constraint_equality.py diff --git a/.github/workflows/check-python-code.yaml b/.github/workflows/check-python-code.yaml index bf3c54aee..ab555ca97 100644 --- a/.github/workflows/check-python-code.yaml +++ b/.github/workflows/check-python-code.yaml @@ -6,12 +6,16 @@ on: - 'packages/**' - 'pyproject.toml' - 'uv.lock' + - 'Makefile' + - '.github/workflows/check-python-code.yaml' push: branches: [main, dev] paths: - 'packages/**' - 'pyproject.toml' - 'uv.lock' + - 'Makefile' + - '.github/workflows/check-python-code.yaml' permissions: contents: read diff --git a/Makefile b/Makefile index 4488d0125..201c77358 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ default: test-all install: uv-sync uv-sync: - @uv sync --all-packages --all-extras 2> /dev/null + @uv sync --all-packages --all-extras -q PYSPARK_EXPRESSIONS := packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated PYSPARK_GENERATED_TESTS := packages/overture-schema-pyspark/tests/generated @@ -25,8 +25,11 @@ generate-pyspark: uv-sync clean-pyspark check: uv-sync generate-pyspark @$(MAKE) -j test-only doctest-only lint-only mypy-only +# test-all is the unconditional full run -- testmon-independent, unlike the +# incremental test/test-only targets -- so data-only changes (golden JSON, +# [[examples]]) that testmon cannot see still get exercised. test-all: uv-sync - @uv run pytest -W error $(TESTMON) packages/ + @uv run pytest -W error packages/ test: uv-sync @uv run pytest -W error $(TESTMON) packages/ -x -q --tb=short diff --git a/packages/overture-schema-codegen/README.md b/packages/overture-schema-codegen/README.md index d83cb7967..61c029505 100644 --- a/packages/overture-schema-codegen/README.md +++ b/packages/overture-schema-codegen/README.md @@ -41,14 +41,14 @@ Rendering Output formatting, all presentation decisions ^ Output Layout What to generate, where it goes, how outputs link ^ -Extraction TypeInfo, FieldSpec, ModelSpec, UnionSpec +Extraction TypeInfo, FieldSpec, RecordSpec, UnionSpec ^ Discovery discover_models() from overture-schema-common ``` **Discovery** loads registered Pydantic models via entry points. The return dict includes both concrete `BaseModel` subclasses (like `Building`) and discriminated union -type aliases (like `Segment`). Both satisfy the `FeatureSpec` protocol and flow through +type aliases (like `Segment`). Both satisfy the `ModelSpec` protocol and flow through the same pipeline. **Extraction** unwraps type annotations into specs. `analyze_type()` is the central diff --git a/packages/overture-schema-codegen/docs/design.md b/packages/overture-schema-codegen/docs/design.md index 67e78892e..ec4e3ec1a 100644 --- a/packages/overture-schema-codegen/docs/design.md +++ b/packages/overture-schema-codegen/docs/design.md @@ -45,7 +45,7 @@ tests that verify behavior of generated code. - Markdown documentation pages with field tables, cross-page links, constraint descriptions, and examples. -- PySpark validation modules: per-feature expression builders, StructType schemas, +- PySpark validation modules: per-model expression builders, StructType schemas, a feature registry, and generated conformance test modules. ## Architecture @@ -57,7 +57,7 @@ Rendering Output formatting, all presentation decisions ^ Output Layout What to generate, where it goes, how outputs link ^ -Extraction FieldShape, FieldSpec, ModelSpec, EnumSpec, ... +Extraction FieldShape, FieldSpec, RecordSpec, EnumSpec, ... ^ Discovery discover_models() from overture-schema-common ``` @@ -85,8 +85,8 @@ graph TD EX["extraction/type_analyzer / extractors"] end - EX -->|"FeatureSpec[]"| OL - EX -->|"FeatureSpec[]"| PS + EX -->|"ModelSpec[]"| OL + EX -->|"ModelSpec[]"| PS subgraph "Output Layout (Markdown)" OL["layout/type_collection"] @@ -172,7 +172,7 @@ constraints land on structurally distinct nodes without any numeric bookkeeping. Extraction is split by entity kind: -- `extraction/model_extraction.py`: Pydantic model -> `ModelSpec` (fields in MRO-aware +- `extraction/model_extraction.py`: Pydantic model -> `RecordSpec` (fields in MRO-aware documentation order, alias-resolved names, model-level constraints) - `extraction/enum_extraction.py`: Enum class -> `EnumSpec` - `extraction/newtype_extraction.py`: NewType -> `NewTypeSpec` @@ -185,7 +185,7 @@ and sub-unions during extraction, building `ModelRef`/`UnionRef` terminals with specs resolved. A shared cache and cycle detection (`starts_cycle=True`) prevent infinite recursion and duplicate extraction. -### Unions and FeatureSpec +### Unions and ModelSpec Discriminated unions (e.g. `Segment = Annotated[Union[RoadSegment, ...], Discriminator(...)]`) are type aliases, not classes. `UnionSpec` captures the union @@ -194,9 +194,9 @@ Fields shared across all variants appear once; fields present in some variants a wrapped in `AnnotatedField` with `variant_sources` indicating which members contribute them. The common base class is identified so shared fields can be deduplicated. -`FeatureSpec` is a type alias `ModelSpec | UnionSpec`. Code that operates on "any -top-level feature" -- supplementary type collection, rendering dispatch -- uses -`FeatureSpec` so union and model features flow through the same pipeline. Consumers +`ModelSpec` is a type alias `RecordSpec | UnionSpec`. Code that operates on "any +top-level model" -- supplementary type collection, rendering dispatch -- uses +`ModelSpec` so records and unions flow through the same pipeline. Consumers narrow with `isinstance` when arm-specific attributes are needed. ### Constraints @@ -218,7 +218,7 @@ reference each other. `collect_all_supplementary_types()` walks the field trees of all feature specs to extract the supplementary types that need their own output: enums, semantic NewTypes, sub-models, and Pydantic built-in types (`HttpUrl`, `EmailStr`). Returns `dict[TypeIdentity, -SupplementarySpec]`, where `SupplementarySpec = EnumSpec | NewTypeSpec | ModelSpec | +SupplementarySpec]`, where `SupplementarySpec = EnumSpec | NewTypeSpec | RecordSpec | PydanticTypeSpec`. `TypeIdentity` pairs a unique Python object with its display name so registry lookups remain stable when two distinct types share a name. @@ -259,7 +259,7 @@ to registered primitives. ### Markdown renderer Jinja2 templates for feature, enum, NewType, primitives, and geometry pages. -`render_feature()` walks each field's `FieldShape` tree and expands `ModelRef` +`render_model()` walks each field's `FieldShape` tree and expands `ModelRef` terminals inline with dot-notation (e.g., `sources[].dataset`), stopping at `ModelRef.starts_cycle`. `format_type()` in `markdown/type_format.py` converts a `FieldShape` into link-aware display strings using `LinkContext`. @@ -287,7 +287,7 @@ fields absent from the concrete variant instance. ## PySpark Pipeline -The PySpark codegen transforms extracted `FeatureSpec` trees into validation expression +The PySpark codegen transforms extracted `ModelSpec` trees into validation expression modules and generated conformance test modules. `pyspark/pipeline.py` exposes `generate_pyspark_module` (single spec) and `generate_pyspark_modules` (all specs). @@ -354,19 +354,19 @@ wider Spark numeric type wins). ### Renderer -`pyspark/renderer.py` emits per-feature Python modules containing: +`pyspark/renderer.py` emits per-model Python modules containing: - Private `_fieldname_check()` functions returning `Check(field=, name=, expr=, shape=, root_field=)` -- A public `feature_checks() -> list[Check]` function calling all of them -- A per-feature `FEATURENAME_SCHEMA` StructType constant (e.g. `ADDRESS_SCHEMA`, `SEGMENT_SCHEMA`) +- A public `_checks() -> list[Check]` function calling all of them +- A per-model `MODELNAME_SCHEMA` StructType constant (e.g. `ADDRESS_SCHEMA`, `SEGMENT_SCHEMA`) - An `ENTRY_POINT` string, a `PARTITIONS` dict describing the feature's Hive partition - layout (empty when not partitioned), and a `FEATURE_VALIDATION` constant pairing the + layout (empty when not partitioned), and a `MODEL_VALIDATION` constant pairing the schema and checks The registry is not generated. `_registry.py` lives hand-written in the `overture-schema-pyspark` package and walks the `expressions.generated` namespace at -import time, collecting every module that exposes `ENTRY_POINT` and `FEATURE_VALIDATION` -into a `dict[str, FeatureValidation]`. Modules that also expose `PARTITIONS` populate a +import time, collecting every module that exposes `ENTRY_POINT` and `MODEL_VALIDATION` +into a `dict[str, ModelValidation]`. Modules that also expose `PARTITIONS` populate a parallel partition map keyed by entry point. Expression rendering handles scalar expressions, array_check/nested_array_check chains, @@ -376,7 +376,7 @@ Output is formatted with ruff. ### Test Renderer -`pyspark/test_renderer.py` emits per-feature pytest modules containing: +`pyspark/test_renderer.py` emits per-model pytest modules containing: - `BASE_ROW_SPARSE` / `BASE_ROW_POPULATED` -- valid synthetic rows - `SCENARIOS: list[Scenario]` -- generated test cases, each carrying a @@ -394,7 +394,7 @@ Union specs with multiple discriminator arms produce one test module per arm. `pyspark/test_data/` is a subpackage with three modules: - `base_row.py` -- `generate_base_row` / `generate_populated_row` produce sparse - (required only) and fully populated valid rows from a `FeatureSpec`. Consults field + (required only) and fully populated valid rows from a `ModelSpec`. Consults field constraints to produce constraint-satisfying values (country codes, geometry WKT, bounds-respecting numbers). `generate_arm_rows` / `generate_populated_arm_rows` produce one row per discriminator arm for union specs. @@ -422,7 +422,7 @@ PySpark validation diverges from Pydantic validation in two documented areas: **Adding a new output target**: Add a column to `TypeMapping` in `extraction/type_registry.py` for type-name resolution. Write a pipeline module that -consumes `FeatureSpec` trees and a renderer that produces output. The extraction layer is +consumes `ModelSpec` trees and a renderer that produces output. The extraction layer is target-independent. Register the format in `cli.py`. **Adding a new type kind**: Add a variant to `FieldShape` in `extraction/field.py`. diff --git a/packages/overture-schema-codegen/docs/walkthrough.md b/packages/overture-schema-codegen/docs/walkthrough.md index 2cd1e2b27..a755accc5 100644 --- a/packages/overture-schema-codegen/docs/walkthrough.md +++ b/packages/overture-schema-codegen/docs/walkthrough.md @@ -64,8 +64,8 @@ it. The entry point `overture:transportation:segment` maps to The codegen classifies these at the CLI boundary: `is_model_class` identifies concrete `BaseModel` subclasses, `is_union_alias` calls `analyze_type` to identify discriminated -unions. From that point forward both model features and union features are `FeatureSpec` values -(`ModelSpec | UnionSpec`) and flow through the same pipeline. +unions. From that point forward both records and unions are `ModelSpec` values +(`RecordSpec | UnionSpec`) and flow through the same pipeline. ## 2. Leaf utilities @@ -191,7 +191,7 @@ description, required flag. `ModelRef` and `UnionRef` shapes carry their resolve (populated during `extract_model` recursion), so consumers can follow the tree without a separate expansion pass. -**ModelSpec** represents one Pydantic model: class name, cleaned docstring, fields in +**RecordSpec** represents one Pydantic model: class name, cleaned docstring, fields in documentation order, source class reference, the entry point string that located it, and model-level constraints from decorators like `@require_any_of`. @@ -201,21 +201,21 @@ model-level constraints from decorators like `@require_any_of`. with `variant_sources` -- a tuple of `BaseModel` subclasses indicating which union members contribute that field, or `None` for fields from `TransportationSegment` shared across all members. The `fields` cached property unwraps this for code that doesn't need -provenance. Each member also has its already-extracted `ModelSpec` retained in +provenance. Each member also has its already-extracted `RecordSpec` retained in `member_specs: list[MemberSpec]` so downstream consumers (check builder, base-row generator) reuse it instead of re-extracting the subtree. `UnionSpec` uses `eq=False` because it contains mutable lists and a `cached_property` -- dataclass-generated `__eq__` would be unreliable. -**FeatureSpec** is the type alias `ModelSpec | UnionSpec`. Type collection, rendering -dispatch, and example loading all operate on `FeatureSpec`. Consumers narrow with +**ModelSpec** is the type alias `RecordSpec | UnionSpec`. Type collection, rendering +dispatch, and example loading all operate on `ModelSpec`. Consumers narrow with `isinstance` when they need `UnionSpec`-specific attributes like `discriminator_field`. **EnumSpec** and **EnumMemberSpec** serve enums. **NewTypeSpec** serves NewTypes. **NumericSpec** serves numeric primitives with an `Interval` for bounds and optional `float_bits`. -**SupplementarySpec** is the union type alias `EnumSpec | NewTypeSpec | ModelSpec | +**SupplementarySpec** is the union type alias `EnumSpec | NewTypeSpec | RecordSpec | PydanticTypeSpec` -- the set of non-feature types that need their own output pages. `PydanticTypeSpec` covers Pydantic built-ins like `HttpUrl` and `EmailStr` (carrying the class plus a pointer back to Pydantic's docs). `NumericSpec` and geometry types are @@ -252,7 +252,7 @@ precedence. ## 6. Model extraction -`extract_model` converts a Pydantic `BaseModel` subclass into a `ModelSpec`. +`extract_model` converts a Pydantic `BaseModel` subclass into a `RecordSpec`. ### Field ordering @@ -291,9 +291,9 @@ shared cache keyed by Python class and an ancestor set for cycle detection. The cache insert happens *before* recursion. Without this ordering, a back-edge encounter would find no cached entry and infinite-loop instead of marking -`starts_cycle=True`. The sequence: create the partial `ModelSpec`, insert it into the +`starts_cycle=True`. The sequence: create the partial `RecordSpec`, insert it into the cache, then populate its fields. Shared references (the same sub-model used in multiple -fields) reuse the cached `ModelSpec` without marking cycles. +fields) reuse the cached `RecordSpec` without marking cycles. `UnionRef` fields resolve via the `union_resolver` callback -- they appear as a single row in the output, linking to their members, rather than expanding inline. @@ -432,7 +432,7 @@ their own pages. `walk_shape` from `field_walk.py` handles recursion into `Array `MapOf`, and `NewTypeShape` wrappers. `ModelRef` fields follow their `.model` reference (populated during `extract_model` -recursion) into nested `ModelSpec` trees. +recursion) into nested `RecordSpec` trees. A single field matches multiple conditions independently. A semantic NewType wrapping a `ModelRef` triggers both NewType extraction and model collection. The checks use @@ -543,7 +543,7 @@ spans. ### Field expansion -`render_feature` dispatches on spec type. `ModelSpec` gets `_expand_model_fields`, which +`render_model` dispatches on spec type. `RecordSpec` gets `_expand_model_fields`, which walks the pre-populated `FieldSpec.model` tree and produces dot-notation rows. `sources[0].dataset` appears as a single row in the flat field table, with `[]` appended per nesting level to list-of-model fields (so a doubly-nested list gets @@ -640,12 +640,12 @@ Seven steps (tree expansion now happens inside `extract_model`): 5. **Render each feature** with its `LinkContext`, loaded examples, and used-by entries. 6. **Render each supplementary type** -- dispatching to `render_enum`, `render_newtype`, - `render_feature` (for sub-models), or `render_pydantic_type` based on spec type. + `render_model` (for sub-models), or `render_pydantic_type` based on spec type. 7. **Render aggregate pages** for primitives and geometry. The return value is `list[RenderedPage]` -- frozen dataclasses carrying content, output -path, and a boolean `is_feature` flag. The caller decides what to do with them. +path, and a boolean `is_model` flag. The caller decides what to do with them. ### The CLI @@ -690,18 +690,18 @@ shared parent. The extractor calls `extract_model` on the common base and on eac member -- the results are cached on the `UnionSpec` as `member_specs` -- and partitions the non-shared fields into `AnnotatedField` entries with variant provenance. `extract_discriminator` finds `subtype` and builds `{"road": RoadSegment, "rail": -RailSegment, "water": WaterSegment}`. The result is a `UnionSpec` (a `FeatureSpec`). +RailSegment, "water": WaterSegment}`. The result is a `UnionSpec` (a `ModelSpec`). Meanwhile, concrete models like `Building` go through `extract_model`, which calls `analyze_type` on each field annotation. A field typed `FeatureVersion` unwraps through two NewType layers and an `Annotated` layer, producing a `NewTypeShape(name="FeatureVersion", inner=Primitive(base_type="int32", constraints=(...)))` shape with constraint provenance -linking `ge=0` back to the `int32` NewType. Both extraction paths produce `FeatureSpec` +linking `ge=0` back to the `int32` NewType. Both extraction paths produce `ModelSpec` values. **Pipeline entry.** The feature specs enter `generate_markdown_pages`. Sub-model `FieldShape` trees are fully resolved -- `ModelRef` nodes already carry their -`ModelSpec` from recursive `extract_model` calls. No separate expansion pass is needed. +`RecordSpec` from recursive `extract_model` calls. No separate expansion pass is needed. **Layout.** `partition_numeric_and_geometry_types` reads the system module's exports. `collect_all_supplementary_types` walks Segment's field shapes and discovers referenced @@ -719,7 +719,7 @@ that Segment references `Subtype`, `Id`, `Sources`, and other types. These refer populate "Used By" sections: the `Subtype` enum page shows that Segment uses it. **Rendering.** The pipeline builds a `LinkContext` from Segment's output path and the -full registry. `render_feature` dispatches to `_expand_union_fields` because the spec is +full registry. `render_model` dispatches to `_expand_union_fields` because the spec is a `UnionSpec`. Shared fields from `TransportationSegment` render as plain rows. Variant-specific fields get italic tags: `` `road_class` *(Road)* ``. The renderer formats each field's `FieldShape` via `format_type`, which resolves links through the @@ -737,7 +737,7 @@ The Jinja2 template assembles the field table, optional constraints section, exa and "Used By" partial into markdown. **Output.** The pipeline returns a `RenderedPage` with Segment's content, its output -path, and `is_feature=True`. The CLI prepends Docusaurus frontmatter and writes the +path, and `is_model=True`. The CLI prepends Docusaurus frontmatter and writes the file. `_category_.json` files get generated for sidebar navigation. **The layering principle.** At every stage, the modules that do the work never reach diff --git a/packages/overture-schema-codegen/pyproject.toml b/packages/overture-schema-codegen/pyproject.toml index 044b592ce..c3ee37847 100644 --- a/packages/overture-schema-codegen/pyproject.toml +++ b/packages/overture-schema-codegen/pyproject.toml @@ -10,6 +10,7 @@ dependencies = [ "overture-schema-common", "overture-schema-system", "tomli>=2.0; python_version < '3.11'", + "typing-extensions>=4.0", ] description = "Code generator that produces documentation and code from Pydantic models" dynamic = ["version"] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py index fa2610a04..667843692 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py @@ -12,22 +12,15 @@ filter_models, ) -from .extraction.model_extraction import extract_model -from .extraction.specs import ( - FeatureSpec, - is_model_class, - is_union_alias, - partitions_from_tags, -) -from .extraction.union_extraction import extract_union +from .extraction.specs import ModelSpec from .layout.module_layout import ( OUTPUT_ROOT, compute_schema_root, - entry_point_class, entry_point_module, ) from .markdown.pipeline import generate_markdown_pages from .pyspark.pipeline import generate_pyspark_modules +from .spec_discovery import extract_model_spec log = logging.getLogger(__name__) @@ -87,7 +80,7 @@ def list_models() -> None: type=click.Path(path_type=Path), default=None, help="Write output files directly into this directory (default: stdout). " - "For pyspark, writes expression modules (*.py) and a _registry.py. " + "For pyspark, writes expression modules (*.py). " "For markdown, writes theme subdirectories.", ) @click.option( @@ -115,53 +108,42 @@ def generate( if output_dir: output_dir.mkdir(parents=True, exist_ok=True) - feature_specs: list[FeatureSpec] = [] - for key, entry in models.items(): - partitions = partitions_from_tags(key.tags) - if is_model_class(entry): - feature_specs.append( - extract_model(entry, entry_point=key.entry_point, partitions=partitions) - ) - elif is_union_alias(entry): - feature_specs.append( - extract_union( - entry_point_class(key.entry_point), - entry, - entry_point=key.entry_point, - partitions=partitions, - ) - ) + model_specs: list[ModelSpec] = [ + spec + for key, entry in models.items() + if (spec := extract_model_spec(key, entry)) is not None + ] if output_format == "pyspark": - _generate_pyspark(feature_specs, output_dir, test_output_dir) + _generate_pyspark(model_specs, output_dir, test_output_dir) else: module_paths = [entry_point_module(k.entry_point) for k in all_models] schema_root = compute_schema_root(module_paths) - _generate_markdown(feature_specs, schema_root, output_dir) + _generate_markdown(model_specs, schema_root, output_dir) def _generate_markdown( - feature_specs: list[FeatureSpec], + model_specs: list[ModelSpec], schema_root: str, output_dir: Path | None, ) -> None: """Generate markdown with directory layout and placement-aware links.""" - pages = generate_markdown_pages(feature_specs, schema_root) + pages = generate_markdown_pages(model_specs, schema_root) for page in pages: content = ( - f"{_FEATURE_FRONTMATTER}{page.content}" if page.is_feature else page.content + f"{_FEATURE_FRONTMATTER}{page.content}" if page.is_model else page.content ) _write_output(content, output_dir, page.path) if output_dir: - feature_paths = {page.path for page in pages if page.is_feature} + feature_paths = {page.path for page in pages if page.is_model} all_paths = {page.path for page in pages} _write_category_files(output_dir, all_paths, feature_paths) def _generate_pyspark( - feature_specs: list[FeatureSpec], + model_specs: list[ModelSpec], output_dir: Path | None, test_output_dir: Path | None = None, ) -> None: @@ -170,7 +152,7 @@ def _generate_pyspark( Output is syntactically valid Python; we assume a code formatter runs over the written directories afterwards to match existing conventions. """ - modules = generate_pyspark_modules(feature_specs) + modules = generate_pyspark_modules(model_specs) for mod in modules.source: _write_output(mod.content, output_dir, mod.path) if test_output_dir is not None: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py index 1be5d6d7b..3fc37f4fb 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py @@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, TypeAlias if TYPE_CHECKING: - from .specs import ModelSpec, UnionSpec + from .specs import RecordSpec, UnionSpec __all__ = [ "AnyScalar", @@ -101,7 +101,7 @@ class ModelRef: consumers that recurse into models must stop at cycle starts. """ - model: ModelSpec + model: RecordSpec starts_cycle: bool = False diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py index 141af58d2..3403d0e37 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py @@ -8,6 +8,7 @@ from __future__ import annotations +import re from collections.abc import Callable from annotated_types import Ge, Gt, Interval, Le, Lt @@ -124,14 +125,41 @@ def _constraint_class_description(constraint: object) -> str | None: return line or None +# re.UNICODE is the implicit default on compiled `str` patterns; rendering it +# would stamp a noise `(?u)` group onto every pattern. Every other flag with a +# visible matching effect is surfaced in the documented pattern. Unlike the +# pyspark dispatch (`compiled_pattern_source`) -- which must reject flags +# Spark's rlike cannot honor -- display is faithful for known flags and never +# fails: a flag absent from this table is dropped from the rendered group, not +# raised on. A new flag added to pyspark's supported set with a visible effect +# belongs here too, or docs will hide that pattern's real behavior. +_DISPLAY_FLAG_LETTERS: tuple[tuple[re.RegexFlag, str], ...] = ( + (re.IGNORECASE, "i"), + (re.MULTILINE, "m"), + (re.DOTALL, "s"), + (re.VERBOSE, "x"), + (re.ASCII, "a"), +) + + +def _inline_flag_prefix(flags: int) -> str: + """Render set regex flags as an inline group like `(?im)`, or "" if none.""" + letters = "".join(c for flag, c in _DISPLAY_FLAG_LETTERS if flags & flag) + return f"(?{letters})" if letters else "" + + def _constraint_pattern(constraint: object) -> str | None: - """Extract the regex pattern string from a constraint, if present. + """Return a constraint's compiled regex as displayable source, or None. - Traverses two levels: constraint.pattern is a compiled re.Pattern - object, and re.Pattern.pattern is the raw string. + Prepends an inline-flag group (e.g. `(?i)` for case-insensitivity) so a + flagged pattern reads as the regex that actually matches rather than its + bare, misleading source. Returns None when `constraint.pattern` is not a + compiled `re.Pattern`. """ compiled = getattr(constraint, "pattern", None) - return getattr(compiled, "pattern", None) + if not isinstance(compiled, re.Pattern): + return None + return f"{_inline_flag_prefix(compiled.flags)}{compiled.pattern}" def constraint_display_text( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py index 86d385d60..baea83b7b 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py @@ -11,6 +11,7 @@ from __future__ import annotations from collections.abc import Callable, Iterator +from enum import Enum from typing_extensions import assert_never @@ -30,8 +31,10 @@ __all__ = [ "all_constraints", + "enum_source", "has_array_layer", "list_depth", + "map_key_value_constraints", "newtype_name", "shape_children", "terminal_model_ref", @@ -88,6 +91,34 @@ def terminal_model_ref(shape: FieldShape) -> ModelRef | None: return terminal if isinstance(terminal, ModelRef) else None +def enum_source(shape: FieldShape) -> type[Enum] | None: + """Return the `Enum` class backing a `Primitive`, or `None`. + + Returns the `Enum` subclass stored in `Primitive.source_type` when + `shape` is a `Primitive` and `source_type` is an `Enum` subclass. + Returns `None` for every other shape, including wrappers: a + `NewTypeShape` wrapping an enum-backed `Primitive` returns `None`, + not the inner enum. + + Parameters + ---------- + shape + The shape to inspect. + + Returns + ------- + type[Enum] or None + The `Enum` class when `shape` is a `Primitive` backed by one, + `None` otherwise. + """ + if not isinstance(shape, Primitive): + return None + src = shape.source_type + if isinstance(src, type) and issubclass(src, Enum): + return src + return None + + def shape_children(shape: FieldShape) -> Iterator[FieldShape]: """Yield direct child shapes within *shape* (one level deep). @@ -213,3 +244,25 @@ def all_constraints(shape: FieldShape) -> tuple[ConstraintSource, ...]: return tuple(collected) case _: assert_never(cur) + + +def map_key_value_constraints( + shape: FieldShape, +) -> tuple[tuple[ConstraintSource, ...], tuple[ConstraintSource, ...]]: + """Return a `MapOf` terminal's (key_constraints, value_constraints), or `((), ())`. + + Looks through `NewTypeShape` / `ArrayOf` wrappers to find a `MapOf`, + then gathers each side's constraints with `all_constraints`. This + surfaces per-key and per-value rules that `all_constraints` on the + enclosing field deliberately stops short of (it treats `MapOf` as a + terminal). Returns `((), ())` when *shape* has no `MapOf` terminal. + """ + cur = shape + while True: + match cur: + case NewTypeShape(inner=inner) | ArrayOf(element=inner): + cur = inner + case MapOf(key=key, value=value): + return all_constraints(key), all_constraints(value) + case _: + return (), () diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py index 36e3cfed6..0d8635efa 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py @@ -7,10 +7,12 @@ `ArrayMinLen` / `ArrayMaxLen` for `ArrayOf` layers, `ScalarMinLen` / `ScalarMaxLen` for scalar layers. -These are codegen-internal classes -- Pydantic users continue to write -`Annotated[X, MinLen(n)]` in their schemas; the wrapping happens inside -`type_analyzer.attach_constraints` when the constraint reaches its -target layer. +These are codegen-internal classes -- schema authors continue to write +the normal Pydantic form (`Field(min_length=n)` / `Field(max_length=n)`), +which Pydantic lowers into the `annotated_types.MinLen` / `MaxLen` +metadata described above. The wrapping into these layer-typed variants +happens inside `type_analyzer.attach_constraints` when the constraint +reaches its target layer. """ from __future__ import annotations diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py index d3ef371e9..809d9569e 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py @@ -1,4 +1,4 @@ -"""Pydantic model extraction into `ModelSpec`.""" +"""Pydantic model extraction into `RecordSpec`.""" from __future__ import annotations @@ -17,7 +17,7 @@ ModelRef, UnionRef, ) -from .specs import FieldSpec, ModelSpec, is_model_class +from .specs import FieldSpec, RecordSpec, is_model_class from .type_analyzer import ( ModelResolver, UnionResolver, @@ -125,8 +125,8 @@ def extract_model( *, entry_point: str | None = None, partitions: Mapping[str, str] | None = None, -) -> ModelSpec: - """Extract a fully-resolved `ModelSpec` from a Pydantic model class. +) -> RecordSpec: + """Extract a fully-resolved `RecordSpec` from a Pydantic model class. Recurses into sub-models and unions, producing `ModelRef` / `UnionRef` terminals with their specs resolved. Cycles in the @@ -149,17 +149,17 @@ def _extract_model_recursive( *, entry_point: str | None, partitions: Mapping[str, str], - cache: dict[type, ModelSpec], + cache: dict[type, RecordSpec], ancestors: frozenset[type], -) -> ModelSpec: +) -> RecordSpec: """Inner recursive helper for `extract_model`. - Inserts the (partial) `ModelSpec` into `cache` before populating + Inserts the (partial) `RecordSpec` into `cache` before populating its fields so cycles can find it. `ancestors` is the set of types currently on the recursion stack -- a sub-field whose source type appears there is a back-edge and gets `starts_cycle=True`. """ - spec = ModelSpec( + spec = RecordSpec( name=model_class.__name__, description=clean_docstring(model_class.__doc__), fields=[], @@ -181,6 +181,7 @@ def _extract_model_recursive( continue shape, is_optional, ti_description = analyze_type( annotation, + owner=model_class, model_resolver=model_resolver, union_resolver=union_resolver, ) @@ -200,14 +201,14 @@ def _extract_model_recursive( def _make_resolvers( - cache: dict[type, ModelSpec], + cache: dict[type, RecordSpec], ancestors: frozenset[type], ) -> tuple[ModelResolver, UnionResolver]: """Build the resolvers that recursively extract sub-models / sub-unions. `cache` shares already-extracted sub-specs across a single extraction so sub-models referenced more than once share a - `ModelSpec`. `ancestors` carries the recursion stack for cycle + `RecordSpec`. `ancestors` carries the recursion stack for cycle detection -- a back-edge produces a `ModelRef` pointing at the in-progress ancestor spec with `starts_cycle=True`. """ diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py index 3aac1e648..f4b676f7b 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py @@ -20,10 +20,10 @@ "AnnotatedField", "EnumMemberSpec", "EnumSpec", - "FeatureSpec", + "ModelSpec", "FieldSpec", "MemberSpec", - "ModelSpec", + "RecordSpec", "NewTypeSpec", "NumericSpec", "PydanticTypeSpec", @@ -84,7 +84,7 @@ def module(self) -> str: class _SourceTypeIdentityMixin: """Mixin providing `identity` from `source_type` and `name`. - Shared by EnumSpec, ModelSpec, NewTypeSpec, and PydanticTypeSpec -- + Shared by EnumSpec, RecordSpec, NewTypeSpec, and PydanticTypeSpec -- each has a `source_type` (the Python class/callable) and a `name`. UnionSpec uses `source_annotation` instead, so it defines its own `identity`. @@ -136,7 +136,7 @@ class FieldSpec: @dataclass -class ModelSpec(_SourceTypeIdentityMixin): +class RecordSpec(_SourceTypeIdentityMixin): """Specification for a Pydantic model.""" name: str @@ -158,7 +158,7 @@ class AnnotatedField: @dataclass class MemberSpec: - """A union member's class paired with its extracted `ModelSpec`. + """A union member's class paired with its extracted `RecordSpec`. `extract_union` already runs `extract_model` on every member to build the merged `annotated_fields`; retaining the result here lets @@ -167,7 +167,7 @@ class MemberSpec: """ member_cls: type[BaseModel] - spec: ModelSpec + spec: RecordSpec @dataclass @@ -241,15 +241,16 @@ def docs_url(self) -> str: ) -FeatureSpec: TypeAlias = ModelSpec | UnionSpec -"""Top-level feature types passed through the extraction pipeline. +ModelSpec: TypeAlias = RecordSpec | UnionSpec +"""A model is one record, or a tagged union of records. -Consumers narrow with `isinstance` when an arm-specific attribute -is needed (e.g. `UnionSpec.discriminator_field`). +The top-level type passed through the extraction pipeline. Consumers +narrow with `isinstance` when an arm-specific attribute is needed +(e.g. `UnionSpec.discriminator_field`). """ -SupplementarySpec = EnumSpec | NewTypeSpec | ModelSpec | PydanticTypeSpec -"""Non-feature types referenced by feature models. +SupplementarySpec = EnumSpec | NewTypeSpec | RecordSpec | PydanticTypeSpec +"""Supplementary types referenced by models. Excludes NumericSpec and geometry types, which are extracted separately via dedicated functions. diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py index 349f1a375..55cd12715 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py @@ -5,6 +5,13 @@ at a time, and produces a `FieldShape` describing the structure with constraints attached to the layer they target. +Forward references encountered along the way are resolved against the +`owner` model's namespace before classification. Builtin generics store +`list["Node"]`'s element as a bare `str` (not a `ForwardRef`), which +neither Pydantic nor `typing.get_type_hints` resolves; resolving it here +lets a self-referential field reach its model terminal so the cycle +guard in `extract_model` engages. + Each `Annotated` frame attaches its metadata to the shape its inner annotation unwraps to, so that, e.g., the inner and outer `MinLen` in `Annotated[list[Annotated[str, MinLen(2)]], MinLen(3)]` land on @@ -24,12 +31,21 @@ import types from collections.abc import Callable from dataclasses import dataclass, replace -from typing import Annotated, Any, Literal, NoReturn, Union, get_args, get_origin +from typing import ( + Annotated, + Any, + ForwardRef, + Literal, + NoReturn, + Union, + get_args, + get_origin, +) from annotated_types import MaxLen, MinLen from pydantic import BaseModel from pydantic.fields import FieldInfo -from typing_extensions import Sentinel, assert_never +from typing_extensions import Sentinel, assert_never, evaluate_forward_ref from .docstring import clean_docstring from .field import ( @@ -39,8 +55,10 @@ FieldShape, LiteralScalar, MapOf, + ModelRef, NewTypeShape, Primitive, + UnionRef, ) from .field_walk import terminal_of from .length_constraints import ArrayMaxLen, ArrayMinLen, ScalarMaxLen, ScalarMinLen @@ -74,6 +92,7 @@ class _NewTypeCtx: "ConstraintSource", "ModelResolver", "UnionResolver", + "UnresolvedForwardRefError", "UnsupportedUnionError", "analyze_type", "attach_constraints", @@ -88,6 +107,15 @@ class UnsupportedUnionError(TypeError): """Raised when `analyze_type` encounters a multi-type union it cannot represent.""" +class UnresolvedForwardRefError(TypeError): + """Raised when a forward-reference annotation cannot be resolved to a type. + + Subclasses `TypeError` so callers that already guard `analyze_type` + with `except (TypeError, UnsupportedUnionError)` treat an unresolvable + forward ref as the analyzable-shape failure it is. + """ + + ModelResolver = Callable[[type[BaseModel]], FieldShape] """Resolver invoked when `analyze_type` reaches a `BaseModel` terminal.""" @@ -161,6 +189,7 @@ def _filter_sentinel_arms(args: tuple[object, ...]) -> list[object]: def analyze_type( annotation: object, *, + owner: type | None = None, model_resolver: ModelResolver | None = None, union_resolver: UnionResolver | None = None, ) -> tuple[FieldShape, bool, str | None]: @@ -170,10 +199,15 @@ def analyze_type( ---------- annotation The annotation to analyze. + owner + The model class these annotations belong to. Supplies the + namespace for resolving forward references (`list["Node"]` + stores `"Node"` as a bare string). When None, an unresolvable + forward ref raises `UnresolvedForwardRefError`. model_resolver Optional callback invoked when the terminal is a `BaseModel` subclass. Returns the `FieldShape` to use at that position -- - typically a `ModelRef` with a resolved `ModelSpec`. Defaults to + typically a `ModelRef` with a resolved `RecordSpec`. Defaults to a `Scalar` carrying the class as `source_type` for callers that cannot resolve sub-models (e.g. dict key/value analysis). union_resolver @@ -191,6 +225,7 @@ def analyze_type( return _unwrap( annotation, newtype_ctx=None, + owner=owner, model_resolver=model_resolver, union_resolver=union_resolver, ) @@ -200,6 +235,7 @@ def _unwrap( annotation: object, *, newtype_ctx: _NewTypeCtx | None, + owner: type | None, model_resolver: ModelResolver | None, union_resolver: UnionResolver | None, ) -> tuple[FieldShape, bool, str | None]: @@ -211,6 +247,9 @@ def _unwrap( The innermost `NewType` currently in scope, or None. Sets the terminal `Primitive.base_type` and tags constraints with their contributing `NewType`. + owner + The model class supplying the namespace for forward-ref + resolution; invariant across the walk. Returns ------- @@ -226,10 +265,14 @@ def _recurse( return _unwrap( annotation, newtype_ctx=newtype_ctx, + owner=owner, model_resolver=model_resolver, union_resolver=union_resolver, ) + if isinstance(annotation, (str, ForwardRef)): + annotation = _resolve_forward_ref(annotation, owner) + origin = get_origin(annotation) if is_newtype(annotation): @@ -300,8 +343,8 @@ def _recurse( args = get_args(annotation) if not args: raise TypeError("Bare list without type argument is not supported") - element, opt, desc = _recurse(args[0], newtype_ctx) - return ArrayOf(element=element, constraints=()), opt, desc + element, _, _ = _recurse(args[0], newtype_ctx) + return ArrayOf(element=element, constraints=()), False, None if origin is dict: args = get_args(annotation) @@ -314,6 +357,38 @@ def _recurse( return _terminal(annotation, newtype_ctx, model_resolver), False, None +def _resolve_forward_ref(annotation: str | ForwardRef, owner: type | None) -> object: + """Resolve a string / `ForwardRef` annotation to its type object. + + Resolves against *owner*'s module and class namespaces, plus *owner* + bound to its own name. The class namespace lets a forward ref to a + nested model (`Outer.Inner`) resolve; the self-name binding lets a + self-referential model defined in a local scope (e.g. a test body) + resolve `"Owner"` even when it is absent from the module globals. + Raises `UnresolvedForwardRefError` for a name not in scope + (`NameError`), a missing attribute on a dotted reference + (`AttributeError`), or a string that is not a valid type expression + (`SyntaxError`) -- a clean, named failure in place of the opaque + `TypeError` the terminal classifier would otherwise raise on a bare + string. + """ + if owner is not None: + localns = {**vars(owner), owner.__name__: owner} + else: + localns = None + try: + ref = ForwardRef(annotation) if isinstance(annotation, str) else annotation + return evaluate_forward_ref(ref, owner=owner, locals=localns) + except (NameError, SyntaxError, AttributeError) as exc: + target = ( + annotation if isinstance(annotation, str) else annotation.__forward_arg__ + ) + context = f" while extracting {owner.__qualname__}" if owner is not None else "" + raise UnresolvedForwardRefError( + f"Cannot resolve forward reference {target!r}{context}" + ) from exc + + def _constraint_source( constraint: object, newtype_ctx: _NewTypeCtx | None ) -> ConstraintSource: @@ -356,8 +431,11 @@ def attach_constraints( to the `.constraints` of the first `ArrayOf`, `MapOf`, `Primitive`, `LiteralScalar`, or `AnyScalar` reached. Does not descend into `ArrayOf.element` or `MapOf.key` / `.value`. `ModelRef` / `UnionRef` - carry no constraints -- constraints destined for a model terminal - are dropped (preserved verbatim from current behavior). + carry no constraints, so a constraint destined for a model/union + terminal (`Annotated[SomeModel, SomeConstraint()]`) raises + `NotImplementedError` rather than vanishing from both docs and + validation -- no current schema field does this, and silently + dropping it would diverge the generated output from the source. Length constraints (`annotated_types.MinLen` / `MaxLen`) are wrapped into the typed `length_constraints` variants matching the @@ -380,8 +458,14 @@ def attach_constraints( case Primitive() | LiteralScalar() | AnyScalar(): wrapped = tuple(_wrap_length_for_scalar(cs) for cs in constraints) return replace(shape, constraints=wrapped + shape.constraints) + case ModelRef() | UnionRef(): + names = ", ".join(type(cs.constraint).__name__ for cs in constraints) + raise NotImplementedError( + f"Constraints ({names}) on a model/union terminal are not " + f"supported; attach them to a scalar or array layer instead" + ) case _: - return shape + assert_never(shape) def _wrap_length_for_array(cs: ConstraintSource) -> ConstraintSource: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py index 19a3007e0..edb7a96fc 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py @@ -1,18 +1,23 @@ """Type registry mapping Python types to target representations.""" from dataclasses import dataclass +from typing import Literal from .field import FieldShape from .field_walk import newtype_name, terminal_primitive __all__ = [ + "SparkCategory", "TypeMapping", "PRIMITIVE_TYPES", "get_type_mapping", "is_semantic_newtype", + "primitive_spark_category", "resolve_type_name", ] +SparkCategory = Literal["string", "int", "float", "bool", "other"] + @dataclass(frozen=True) class TypeMapping: @@ -75,6 +80,41 @@ def get_type_mapping(type_name: str) -> TypeMapping | None: return PRIMITIVE_TYPES.get(type_name) +# BinaryType() is intentionally absent: geometry maps to BinaryType() in +# PRIMITIVE_TYPES but falls through to "other" here, not a numeric/string/bool scalar. +_SPARK_TYPE_CATEGORIES: dict[str, SparkCategory] = { + "StringType()": "string", + "IntegerType()": "int", + "LongType()": "int", + "FloatType()": "float", + "DoubleType()": "float", + "BooleanType()": "bool", +} + + +def primitive_spark_category(base_type: str) -> SparkCategory: + """Return the Spark category for a primitive base type name. + + Parameters + ---------- + base_type + A primitive type name (`"int32"`, `"float64"`, `"bool"`, `"str"`, ...). + + Returns + ------- + SparkCategory + `"string"` for string-valued types, `"int"` for integer types, + `"float"` for floating-point types, `"bool"` for boolean types, + `"other"` for binary, geometry, or unregistered types. Unknown + types fall back to `"other"`, preserving string-default behavior + for any future unregistered type. + """ + mapping = get_type_mapping(base_type) + if mapping is None or mapping.spark is None: + return "other" + return _SPARK_TYPE_CATEGORIES.get(mapping.spark, "other") + + def resolve_type_name(shape: FieldShape) -> str: """Resolve a shape to its markdown base type name string. diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py index cd3870a5e..922b2a887 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py @@ -133,7 +133,21 @@ def _structural_fingerprint(spec: FieldSpec) -> _TypeShape: return (base_type, kind, spec.is_optional, depth) -def _constraints_fingerprint(spec: FieldSpec) -> frozenset[str]: +def _fingerprint_key(constraint: object) -> object: + """Return a value-stable set key for a single constraint. + + Constraints with value equality -- every `FieldConstraint`, the + `annotated_types` dataclasses, `GeometryTypeConstraint` -- key as + themselves. Foreign metadata that falls back to identity equality, namely + pydantic's internal `Field(...)` metadata, keys on its value-stable `repr` + so two equal-valued instances still collapse. + """ + if type(constraint).__eq__ is object.__eq__: + return repr(constraint) + return constraint + + +def _constraints_fingerprint(spec: FieldSpec) -> frozenset[object]: """Constraints declared anywhere in *spec*'s shape tree, as a comparable set. `_structural_fingerprint` deliberately ignores constraints so that @@ -141,8 +155,13 @@ def _constraints_fingerprint(spec: FieldSpec) -> frozenset[str]: metadata still collapse to one `AnnotatedField`. This captures what that ignores, so collisions with diverging constraints fail loudly instead of silently keeping the last member's `FieldSpec`. + + Constraint identity lives on the constraints themselves: `FieldConstraint` + subclasses define value equality and hashing, so equal rules collapse in + the set. `_fingerprint_key` covers the lone foreign holdout that still + compares by identity. """ - constraints: list[str] = [] + keys: list[object] = [] def collect(shape: FieldShape) -> None: match shape: @@ -154,12 +173,12 @@ def collect(shape: FieldShape) -> None: | MapOf(constraints=cs) ): for source in cs: - constraints.append(repr(source.constraint)) + keys.append(_fingerprint_key(source.constraint)) case ModelRef() | UnionRef() | NewTypeShape(): pass walk_shape(spec.shape, collect) - return frozenset(constraints) + return frozenset(keys) def extract_union( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py index 621249ec1..c4d597c13 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py @@ -24,9 +24,9 @@ from ..extraction.newtype_extraction import extract_newtype from ..extraction.pydantic_extraction import extract_pydantic_type from ..extraction.specs import ( - FeatureSpec, FieldSpec, ModelSpec, + RecordSpec, SupplementarySpec, TypeIdentity, is_pydantic_sourced, @@ -38,7 +38,7 @@ def collect_all_supplementary_types( - feature_specs: Sequence[FeatureSpec], + model_specs: Sequence[ModelSpec], ) -> dict[TypeIdentity, SupplementarySpec]: """Collect supplementary types by walking expanded feature trees. @@ -47,7 +47,7 @@ def collect_all_supplementary_types( with the same class name from different modules are keyed separately. """ - feature_objs: set[object] = {spec.identity.obj for spec in feature_specs} + feature_objs: set[object] = {spec.identity.obj for spec in model_specs} all_specs: dict[TypeIdentity, SupplementarySpec] = {} visited_models: set[object] = set() @@ -58,7 +58,7 @@ def _register_newtype(newtype_ref: object, name: str) -> bool: all_specs[nt_id] = extract_newtype(newtype_ref) return True - def _collect_from_model(model_spec: ModelSpec) -> None: + def _collect_from_model(model_spec: RecordSpec) -> None: if ( model_spec.source_type in visited_models or model_spec.source_type in feature_objs @@ -90,7 +90,7 @@ def _collect_from_shape(shape: FieldShape) -> None: def _visit(node: FieldShape) -> None: match node: - case NewTypeShape(name=name, ref=ref): + case NewTypeShape(name=name, ref=ref) if is_semantic_newtype(node): if _register_newtype(ref, name): _collect_inner_newtypes(ref) case UnionRef(union=u): @@ -116,7 +116,7 @@ def _collect_from_fields(fields: list[FieldSpec]) -> None: for field_spec in fields: _collect_from_shape(field_spec.shape) - for spec in feature_specs: + for spec in model_specs: _collect_from_fields(spec.fields) return all_specs diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py index 9f38f63a1..7113ab8ca 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py @@ -10,7 +10,7 @@ from overture.schema.system.case import to_snake_case from ..extraction.specs import ( - FeatureSpec, + ModelSpec, PydanticTypeSpec, SupplementarySpec, TypeIdentity, @@ -30,7 +30,7 @@ def build_placement_registry( - feature_specs: Sequence[FeatureSpec], + model_specs: Sequence[ModelSpec], all_specs: dict[TypeIdentity, SupplementarySpec], numeric_names: list[TypeIdentity], geometry_names: list[TypeIdentity], @@ -46,7 +46,7 @@ def build_placement_registry( ) feature_dirs: set[PurePosixPath] = set() - for spec in feature_specs: + for spec in model_specs: spec_dir = output_dir_for_entry_point(spec.entry_point, schema_root) registry[spec.identity] = _md_path(spec_dir, spec.name) feature_dirs.add(spec_dir) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py index 8a6bb8348..262782609 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py @@ -17,10 +17,10 @@ from ..extraction.numeric_extraction import extract_numerics from ..extraction.specs import ( EnumSpec, - FeatureSpec, ModelSpec, NewTypeSpec, PydanticTypeSpec, + RecordSpec, SupplementarySpec, TypeIdentity, UnionSpec, @@ -36,8 +36,8 @@ ) from .renderer import ( render_enum, - render_feature, render_geometry_from_values, + render_model, render_newtype, render_primitives_from_specs, render_pydantic_type, @@ -57,11 +57,11 @@ class RenderedPage: content: str path: PurePosixPath - is_feature: bool = False + is_model: bool = False def _load_model_examples( - spec: FeatureSpec, + spec: ModelSpec, ) -> list[ExampleRecord] | None: """Load examples for a feature spec, returning None when absent.""" if isinstance(spec, UnionSpec): @@ -101,8 +101,8 @@ def _render_supplement( content = render_enum(spec, link_ctx=ctx, used_by=used_by) case NewTypeSpec(): content = render_newtype(spec, ctx, used_by=used_by) - case ModelSpec(): - content = render_feature(spec, ctx, used_by=used_by) + case RecordSpec(): + content = render_model(spec, ctx, used_by=used_by) case PydanticTypeSpec(): content = render_pydantic_type(spec, link_ctx=ctx, used_by=used_by) case _: @@ -136,7 +136,7 @@ def partition_numeric_and_geometry_types( def generate_markdown_pages( - feature_specs: Sequence[FeatureSpec], + model_specs: Sequence[ModelSpec], schema_root: str, ) -> list[RenderedPage]: """Generate all markdown pages from feature specs. @@ -148,22 +148,22 @@ def generate_markdown_pages( numeric_names, geometry_names = partition_numeric_and_geometry_types( _system_primitive ) - all_specs = collect_all_supplementary_types(feature_specs) + all_specs = collect_all_supplementary_types(model_specs) registry = build_placement_registry( - feature_specs, all_specs, numeric_names, geometry_names, schema_root + model_specs, all_specs, numeric_names, geometry_names, schema_root ) - reverse_refs = compute_reverse_references(feature_specs, all_specs) + reverse_refs = compute_reverse_references(model_specs, all_specs) pages: list[RenderedPage] = [] - for spec in feature_specs: + for spec in model_specs: output_path = registry[spec.identity] ctx = LinkContext(output_path, registry) examples = _load_model_examples(spec) used_by = reverse_refs.get(spec.identity) - content = render_feature(spec, link_ctx=ctx, examples=examples, used_by=used_by) - pages.append(RenderedPage(content=content, path=output_path, is_feature=True)) + content = render_model(spec, link_ctx=ctx, examples=examples, used_by=used_by) + pages.append(RenderedPage(content=content, path=output_path, is_model=True)) for tid, supp_spec in all_specs.items(): pages.append(_render_supplement(tid, supp_spec, registry, reverse_refs)) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py index 0a5c9d08f..10c96a803 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py @@ -4,7 +4,7 @@ import functools import json import re -from collections.abc import Callable +from collections.abc import Callable, Iterable from dataclasses import dataclass from pathlib import Path from typing import TypedDict, cast @@ -16,17 +16,22 @@ from ..extraction.examples import ExampleRecord from ..extraction.field import ConstraintSource from ..extraction.field_constraints import constraint_display_text -from ..extraction.field_walk import all_constraints, list_depth, terminal_model_ref +from ..extraction.field_walk import ( + all_constraints, + list_depth, + map_key_value_constraints, + terminal_model_ref, +) from ..extraction.model_constraints import analyze_model_constraints from ..extraction.specs import ( AnnotatedField, EnumSpec, - FeatureSpec, FieldSpec, ModelSpec, NewTypeSpec, NumericSpec, PydanticTypeSpec, + RecordSpec, TypeIdentity, UnionSpec, ) @@ -40,7 +45,7 @@ __all__ = [ "render_enum", - "render_feature", + "render_model", "render_geometry_from_values", "render_newtype", "render_primitives_from_specs", @@ -241,11 +246,18 @@ def _annotate_field_constraints( constraints appear on the NewType's own page instead. """ link_fn = _link_fn_from_ctx(ctx) - notes = [ - constraint_display_text(cs, link_fn=link_fn) - for cs in all_constraints(field.shape) - if cs.source_ref is None - ] + + def directly_applied(prefix: str, sources: Iterable[ConstraintSource]) -> list[str]: + return [ + f"{prefix}{constraint_display_text(cs, link_fn=link_fn)}" + for cs in sources + if cs.source_ref is None + ] + + key_constraints, value_constraints = map_key_value_constraints(field.shape) + notes = directly_applied("", all_constraints(field.shape)) + notes += directly_applied("key: ", key_constraints) + notes += directly_applied("value: ", value_constraints) if notes: _annotate_constraint_notes(row, notes) @@ -379,8 +391,8 @@ def _expand_union_fields( return result -def render_feature( - spec: FeatureSpec, +def render_model( + spec: ModelSpec, link_ctx: LinkContext | None = None, examples: list[ExampleRecord] | None = None, used_by: list[UsedByEntry] | None = None, @@ -395,7 +407,7 @@ def render_feature( if isinstance(spec, UnionSpec): fields = _expand_union_fields(spec, link_ctx, constraint_notes=field_notes) - elif isinstance(spec, ModelSpec): + elif isinstance(spec, RecordSpec): fields = _expand_model_fields(spec.fields, link_ctx) _annotate_top_level_constraints(fields, field_notes) else: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py index 39f841345..b163280bd 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py @@ -13,15 +13,14 @@ ModelRef, NewTypeShape, Primitive, - Scalar, UnionRef, ) -from ..extraction.field_walk import terminal_of, walk_shape +from ..extraction.field_walk import all_constraints, walk_shape from ..extraction.specs import ( - FeatureSpec, FieldSpec, ModelSpec, NewTypeSpec, + RecordSpec, SupplementarySpec, TypeIdentity, UnionSpec, @@ -51,7 +50,7 @@ class UsedByEntry: def compute_reverse_references( - feature_specs: Sequence[FeatureSpec], + model_specs: Sequence[ModelSpec], all_specs: Mapping[TypeIdentity, SupplementarySpec], ) -> dict[TypeIdentity, list[UsedByEntry]]: """Compute reverse references from types to their referrers. @@ -61,8 +60,8 @@ def compute_reverse_references( Parameters ---------- - feature_specs - Feature-level specs (ModelSpec or UnionSpec). + model_specs + Feature-level specs (RecordSpec or UnionSpec). all_specs Supplementary types (enums, newtypes, sub-models). """ @@ -114,7 +113,7 @@ def collect_from_fields( for field_spec in fields: collect_from_shape(field_spec.shape, referrer, referrer_kind) - def collect_from_model_spec(spec: ModelSpec, referrer: TypeIdentity) -> None: + def collect_from_model_spec(spec: RecordSpec, referrer: TypeIdentity) -> None: collect_from_fields(spec.fields, referrer, UsedByKind.MODEL) def collect_from_union_spec(spec: UnionSpec) -> None: @@ -129,20 +128,21 @@ def collect_from_newtype_spec(spec: NewTypeSpec, referrer: TypeIdentity) -> None # spec.shape already has the outer NewTypeShape stripped. collect_from_shape(spec.shape, referrer, UsedByKind.NEWTYPE) - # Inherited NewTypes from constraint sources (constraint chains). - terminal = terminal_of(spec.shape) - if isinstance(terminal, Scalar): - for cs in terminal.constraints: - if cs.source_ref is not None and cs.source_name is not None: - add_reference( - TypeIdentity(cs.source_ref, cs.source_name), - referrer, - UsedByKind.NEWTYPE, - ) + # Inherited NewTypes from constraint sources at every layer + # (array / map / scalar), not just the terminal scalar -- a + # NewType chaining through an array NewType carries the inner + # NewType's provenance on the array layer. + for cs in all_constraints(spec.shape): + if cs.source_ref is not None and cs.source_name is not None: + add_reference( + TypeIdentity(cs.source_ref, cs.source_name), + referrer, + UsedByKind.NEWTYPE, + ) # Collect from features - for spec in feature_specs: - if isinstance(spec, ModelSpec): + for spec in model_specs: + if isinstance(spec, RecordSpec): collect_from_model_spec(spec, spec.identity) elif isinstance(spec, UnionSpec): collect_from_union_spec(spec) @@ -151,7 +151,7 @@ def collect_from_newtype_spec(spec: NewTypeSpec, referrer: TypeIdentity) -> None for tid, supp_spec in all_specs.items(): if isinstance(supp_spec, NewTypeSpec): collect_from_newtype_spec(supp_spec, tid) - elif isinstance(supp_spec, ModelSpec): + elif isinstance(supp_spec, RecordSpec): collect_from_model_spec(supp_spec, tid) # Sort into deterministic lists. diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py index baaff8668..4a445ebbc 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py @@ -6,6 +6,7 @@ from enum import Enum from pydantic import BaseModel +from typing_extensions import assert_never from ..extraction.field import ( AnyScalar, @@ -99,23 +100,43 @@ def _format_union_members( return separator.join(resolve_type_link(TypeIdentity.of(m), ctx) for m in members) +def _model_ref_identity(model_ref: ModelRef) -> TypeIdentity | None: + """Return a linkable identity for a `ModelRef`, or None when unsourced. + + A `ModelRef` links by its `source_type` (the original Python class) + paired with the model name. Returns None when `source_type` is absent + -- a synthesized spec with no backing class has no page to link to. + """ + src = model_ref.model.source_type + if src is None: + return None + return TypeIdentity(src, model_ref.model.name) + + def _model_link(model_ref: ModelRef, ctx: LinkContext | None) -> str: """Resolve a `ModelRef` to a markdown link or fallback code span.""" - src = model_ref.model.source_type - if src is not None: - return resolve_type_link(TypeIdentity(src, model_ref.model.name), ctx) + identity = _model_ref_identity(model_ref) + if identity is not None: + return resolve_type_link(identity, ctx) return f"`{model_ref.model.name}`" def _scalar_identity(scalar: Primitive) -> TypeIdentity | None: - """Return a linkable identity for a `Primitive`'s `source_type`, if any.""" + """Return a linkable identity for a `Primitive`'s `source_type`, if any. + + Enum / BaseModel / Pydantic-sourced types link by their own + identity and class name. Class-based registered primitives + (`Geometry`, `BBox`) are plain classes -- not BaseModel, not + Pydantic-sourced -- so they link by object identity to their + aggregate page under the markdown registry name (`geometry`, `bbox`). + """ src = scalar.source_type - if src is None: + if not isinstance(src, type): return None - if isinstance(src, type) and ( - issubclass(src, Enum) or issubclass(src, BaseModel) or is_pydantic_sourced(src) - ): + if issubclass(src, Enum) or issubclass(src, BaseModel) or is_pydantic_sourced(src): return TypeIdentity.of(src) + if get_type_mapping(src.__name__) is not None: + return TypeIdentity(src, _registry_name(scalar)) return None @@ -152,34 +173,109 @@ def _registry_name(scalar: Scalar) -> str: def _format_map(shape: MapOf, ctx: LinkContext | None) -> str: - """Format a `MapOf` as a bare `map` code span (no outer wrappers).""" - key = _markdown_name_for_shape(shape.key) - value = _markdown_name_for_shape(shape.value) - return f"`map<{key}, {value}>`" + """Format a `MapOf` as a `map` code span, linking key/value types. + + Semantic NewTypes and Enum / BaseModel-sourced key/value types link + to their pages; primitives stay bare. Output is identical whether the + map is rendered in a field cell or as a NewType's underlying type -- + both paths route through here. + + A link has to break out of the surrounding code span, so any bare side + is folded into the adjacent `map<...>` span rather than wrapped in its + own backticks. Two backtick spans must never abut: CommonMark reads the + resulting `` as a two-backtick delimiter and swallows the link. + """ + key_str, key_linked = _map_side(shape.key, ctx) + val_str, val_linked = _map_side(shape.value, ctx) + if not key_linked and not val_linked: + return f"`map<{key_str}, {val_str}>`" + if key_linked and val_linked: + return f"`map<`{key_str}`,`{val_str}`>`" + if key_linked: + return f"`map<`{key_str}`,{val_str}>`" + return f"`map<{key_str},`{val_str}`>`" + + +def _map_side(shape: FieldShape, ctx: LinkContext | None) -> tuple[str, bool]: + """Render one map key/value as (text, is_link). + + Returns a page link when the side resolves to one, else its + container-aware bare name (so a `list<...>` / `map<...>` wrapper + survives instead of collapsing to its element). The flag tells + `_format_map` whether the side breaks out of the surrounding code span. + """ + link = _map_side_link(shape, ctx) + if link is not None: + return link, True + return _bare_map_side_name(shape), False + + +def _map_side_link(shape: FieldShape, ctx: LinkContext | None) -> str | None: + """Return a markdown link for a map key/value that has its own page. + + Links a semantic NewType, a model (`ModelRef`), or an Enum / + BaseModel-sourced primitive when `ctx` resolves a page for it. + NewType and primitive sides link through `list<...>` layers; a model + side links only when it is the direct map side (`depth == 0`), so a + `list`-valued map keeps its `list<...>` wrapper from + `_bare_map_side_name` rather than collapsing to a bare model link. + Returns None when the side has no page; the caller renders a bare + name instead. + """ + identity: TypeIdentity | None = None + depth, cur = _peel_arrays(shape) + if isinstance(cur, NewTypeShape) and is_semantic_newtype(shape): + identity = TypeIdentity(cur.ref, cur.name) + elif depth == 0 and isinstance(cur, ModelRef): + identity = _model_ref_identity(cur) + elif isinstance(cur, Primitive) and cur.source_type is not None: + src = cur.source_type + if isinstance(src, type) and ( + issubclass(src, Enum) or issubclass(src, BaseModel) + ): + identity = TypeIdentity(src, cur.base_type) + if identity and ctx: + href = ctx.resolve_link(identity) + if href: + return _code_link(identity.name, href) + return None -def _markdown_name_for_shape(shape: FieldShape) -> str: - """Return a bare markdown name (no link, no backticks) for a shape. +def _bare_map_side_name(shape: FieldShape) -> str: + """Bare markdown name for a map key/value, recursing through containers. - Used inside `map` rendering. Picks the semantic NewType name - when wrapping a registered primitive, otherwise the registry name - of the terminal scalar. + Every variant resolves to a real name: `list<...>` / `map<...>` + wrappers recurse, scalars use their registry name (so `Any` is `Any`, + not `?`), semantic NewTypes and models use their type name, and a + pass-through NewType resolves through the registry like the scalar it + aliases. There is no `?` fallback -- a side that can't be named is a + bug, not a placeholder. + + A union-valued map is the one shape left unrendered: no schema field + uses one, and its `\\|`-separated members do not compose cleanly into + a bare `map<...>` span. It raises so the gap surfaces loudly when a + field first needs it, rather than shipping a half-rendered value. """ - if isinstance(shape, NewTypeShape): - return shape.name - if isinstance(shape, Scalar): - return _registry_name(shape) - if isinstance(shape, ModelRef): - return shape.model.name - if isinstance(shape, ArrayOf): - inner = _markdown_name_for_shape(shape.element) - return f"list<{inner}>" - if isinstance(shape, MapOf): - return ( - f"map<{_markdown_name_for_shape(shape.key)}, " - f"{_markdown_name_for_shape(shape.value)}>" - ) - return "?" + match shape: + case ArrayOf(element=element): + return f"list<{_bare_map_side_name(element)}>" + case MapOf(key=key, value=value): + return f"map<{_bare_map_side_name(key)}, {_bare_map_side_name(value)}>" + case NewTypeShape(name=name) if is_semantic_newtype(shape): + return name + case NewTypeShape(): + return resolve_type_name(shape) + case ModelRef(model=model): + return model.name + case Primitive() | LiteralScalar() | AnyScalar(): + return _registry_name(shape) + case UnionRef(): + raise NotImplementedError( + "union-typed map key/value is not rendered in markdown; " + "add handling here when a schema field first needs one" + ) + case _: + assert_never(shape) def format_type(field: FieldSpec, ctx: LinkContext | None = None) -> str: @@ -258,40 +354,6 @@ def _peel_to_terminal(shape: FieldShape) -> FieldShape: return shape -def _linked_or_backticked( - shape: FieldShape, ctx: LinkContext | None -) -> tuple[str, bool]: - """Return (formatted_string, has_link) for a shape component. - - Used by NewType page rendering to format the underlying type with - a link to its source page when one exists. - """ - identity: TypeIdentity | None = None - _, cur = _peel_arrays(shape) - if isinstance(cur, NewTypeShape) and is_semantic_newtype(shape): - identity = TypeIdentity(cur.ref, cur.name) - elif isinstance(cur, Primitive) and cur.source_type is not None: - src = cur.source_type - if isinstance(src, type) and ( - issubclass(src, Enum) or issubclass(src, BaseModel) - ): - identity = TypeIdentity(src, cur.base_type) - if identity and ctx: - href = ctx.resolve_link(identity) - if href: - return _code_link(identity.name, href), True - return _markdown_name_for_underlying(shape), False - - -def _markdown_name_for_underlying(shape: FieldShape) -> str: - """Bare markdown display name for a NewType's underlying type.""" - if is_semantic_newtype(shape): - _, cur = _peel_arrays(shape) - if isinstance(cur, NewTypeShape): - return cur.name - return resolve_type_name(shape) - - def format_underlying_type(shape: FieldShape, ctx: LinkContext | None = None) -> str: """Format a NewType's underlying type for the page header, with links.""" terminal = _peel_to_terminal(shape) @@ -299,15 +361,7 @@ def format_underlying_type(shape: FieldShape, ctx: LinkContext | None = None) -> return _format_union_members(terminal.union.members, ctx, separator=" | ") if isinstance(terminal, MapOf): - key_str, key_linked = _linked_or_backticked(terminal.key, ctx) - val_str, val_linked = _linked_or_backticked(terminal.value, ctx) - if key_linked or val_linked: - if not key_linked: - key_str = f"`{key_str}`" - if not val_linked: - val_str = f"`{val_str}`" - return f"`map<`{key_str}`,`{val_str}`>`" - return f"`map<{key_str}, {val_str}>`" + return _format_map(terminal, ctx) # For underlying-type rendering on a NewType's own page, skip the # is_semantic_newtype path to avoid self-linking: this shape diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py index 13a0e841a..3383f3a19 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py @@ -1 +1 @@ -"""PySpark codegen pipeline: FeatureSpec to expression and test modules.""" +"""PySpark codegen pipeline: ModelSpec to expression and test modules.""" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py index fc1f68e57..7274e14b5 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py @@ -5,25 +5,34 @@ - `jinja_env` -- the cached Jinja2 environment. - `py_literal` / `tuple_literal` -- render Python values back to source code. - `parse_field_eq` -- unwrap a `FieldEqCondition` / `Not(FieldEqCondition)`. +- schema constant naming -- `schema_const_name` (the cross-module contract + between expression and test modules). - check/label naming -- `check_name`, `field_label`, `column_level_suffix`, - `model_constraint_field_label`, `COLUMN_LEVEL_FUNCTIONS` (membership), - and `_COLUMN_LEVEL_SUFFIXES` (label suffix lookup). -- collision disambiguation -- `disambiguate` (function names) and - `compute_label_suffixes` (violation labels). + `sanitize_field_name`, `COLUMN_LEVEL_FUNCTIONS` (membership), and + `_COLUMN_LEVEL_SUFFIXES` (label suffix lookup). +- emission rows -- `field_check_rows` and `model_check_rows` flatten a + check list into ordered rows carrying each row's final label, check + name, and (for field checks) disambiguated function name. The renderer + and test renderer both consume these rows, so the flatten-and-suffix + logic lives here once rather than in two positionally-coupled passes. + `disambiguate` (asymmetric, function-name keyed) and `_occurrence_indices` + (the shared collision primitive) back them. """ from __future__ import annotations import functools +import re from collections import Counter from collections.abc import Hashable, Iterable +from dataclasses import dataclass from enum import Enum from pathlib import Path from typing import NamedTuple, TypeVar from jinja2 import Environment, FileSystemLoader -from overture.schema.system.field_path import ArrayPath +from overture.schema.system.field_path import ArrayPath, MapProjection from overture.schema.system.model_constraint import ( Condition, FieldEqCondition, @@ -35,16 +44,22 @@ __all__ = [ "COLUMN_LEVEL_FUNCTIONS", + "FieldCheckRow", "FieldEq", + "ModelCheckRow", "check_name", "column_level_suffix", - "compute_label_suffixes", "disambiguate", + "field_check_rows", "field_label", "jinja_env", - "model_constraint_field_label", + "map_runtime_helper", + "model_check_rows", "parse_field_eq", "py_literal", + "require_field_eq", + "sanitize_field_name", + "schema_const_name", "tuple_literal", ] @@ -71,6 +86,23 @@ "check_struct_unique": "_unique", } +_MAP_RUNTIME_HELPERS: dict[MapProjection, str] = { + MapProjection.KEY: "map_keys_check", + MapProjection.VALUE: "map_values_check", +} + + +def map_runtime_helper(projection: MapProjection) -> str: + """PySpark column-patterns helper name for a map projection. + + `MapProjection.KEY` -> `map_keys_check`; + `MapProjection.VALUE` -> `map_values_check`. This is a pyspark-layer + concern; the mapping lives here rather than on `MapProjection` itself + (a system-package enum) to avoid a layering violation. + """ + return _MAP_RUNTIME_HELPERS[projection] + + _TEMPLATES_DIR = Path(__file__).parent / "templates" @@ -88,6 +120,15 @@ def jinja_env() -> Environment: return env +def schema_const_name(model_name: str) -> str: + """Name of the generated `MODELNAME_SCHEMA` StructType constant. + + A cross-module contract: the generated test module imports this + constant by name from the generated expression module. + """ + return f"{model_name.upper()}_SCHEMA" + + _CHECK_PREFIX = "check_" @@ -119,6 +160,12 @@ def py_literal(value: object) -> str: return "[" + ", ".join(py_literal(v) for v in value) + "]" if isinstance(value, tuple): return tuple_literal(py_literal(v) for v in value) + if isinstance(value, frozenset): + if not value: + return "frozenset()" + # Sort the rendered items so regenerated source is stable across runs + # (set iteration order is not). + return "frozenset({" + ", ".join(sorted(py_literal(v) for v in value)) + "})" return repr(value) @@ -146,6 +193,20 @@ def parse_field_eq(condition: Condition) -> FieldEq | None: return None +def require_field_eq(condition: Condition) -> FieldEq: + """Unwrap a field-equality condition, raising on any other shape. + + The strict companion to `parse_field_eq`, for callers that only + handle `FieldEqCondition` / `Not(FieldEqCondition)`: a new condition + subtype fails loudly here, in one place, rather than slipping through + several independent `None` checks with drifting error messages. + """ + parsed = parse_field_eq(condition) + if parsed is None: + raise TypeError(f"Unhandled condition type: {type(condition).__name__}") + return parsed + + def check_name(function: str, override: str | None = None) -> str: """Strip the `check_` prefix to produce a human-readable check name.""" if override is not None: @@ -153,6 +214,17 @@ def check_name(function: str, override: str | None = None) -> str: return function.removeprefix(_CHECK_PREFIX) +# Collapses runs of path punctuation (`.`, `[`, `]`, `{`, `}`, `_`) to a +# single `_` for identifier sanitization (e.g. `names.common{key}` -> +# `names_common_key`). +_PATH_SEPARATOR_RUN = re.compile(r"[.\[\]{}_]+") + + +def sanitize_field_name(field: str) -> str: + """Convert an encoded field-path string to a valid Python identifier fragment.""" + return _PATH_SEPARATOR_RUN.sub("_", field).strip("_") + + def column_level_suffix(check: Check) -> str: """Return the column-level label suffix for `check`, or empty string. @@ -180,9 +252,9 @@ def _model_check_base_label(check: ModelCheck) -> str: """Compute the violation field label sans collision suffix. - `require_if` / `forbid_if` produce a per-target label - (`field_required` / `path.field_forbidden`) since each descriptor - now carries a single target field (multi-field decorators split - at dispatch time). + (`field_required` / `path.field_forbidden`); each descriptor + carries a single target field (multi-field decorators split at + dispatch time). - Other kinds (`require_any_of`, `radio_group`, `min_fields_set`) name the whole constraint; on `ArrayPath` targets they use the path itself so anchors are distinguishable across nestings. @@ -202,23 +274,13 @@ def _model_check_base_label(check: ModelCheck) -> str: return f"{check.target}.{target}{kind_suffix}" -def model_constraint_field_label(check: ModelCheck, label_suffix: str) -> str: - """Compute the field label for a model constraint check. - - `label_suffix` (from `compute_label_suffixes`) disambiguates labels - that would otherwise collide -- e.g. two `@require_any_of` on the - same model, or two `@require_if(["x"], ...)` with different - conditions. - """ - return f"{_model_check_base_label(check)}{label_suffix}" - - def _occurrence_indices(keys: list[_K]) -> list[tuple[int, int]]: """Pair each key with `(occurrence_index, total_count)`. `occurrence_index` is the 0-based position of the key among its equal siblings; `total_count` is how many times the key appears in - `keys`. Both `disambiguate` and `compute_label_suffixes` need this + `keys`. Both collision styles -- `disambiguate` (function names) and + the symmetric label suffixing in the row builders -- need this "where am I within my collision group" view. """ counts: Counter[_K] = Counter(keys) @@ -235,7 +297,9 @@ def disambiguate(names: list[str]) -> list[str]: The first occurrence of a name is left bare; the second becomes `name_1`, the third `name_2`, and so on. Names that appear once are - untouched. + untouched. This is the asymmetric style, keyed on the function-name + string: leaving the first occurrence bare keeps readable identifiers + for the common no-collision case. Assumes no input name already matches a generated `name_N` form; a collision there would reintroduce a duplicate. Field names in @@ -247,19 +311,148 @@ def disambiguate(names: list[str]) -> list[str]: ] -def compute_label_suffixes(model_checks: list[ModelCheck]) -> list[str]: - """Pre-compute field label suffixes, adding counters only for collisions. +def _symmetric_label_suffixes(keys: list[_K]) -> list[str]: + """Per-key violation-label collision suffixes, symmetric across a group. - Unlike `disambiguate`, every colliding entry receives a `_N` suffix - including the first one (`_0`, `_1`, ...). This is symmetric on - purpose: violation labels for a colliding group all share the same - base name, so each needs an explicit collision index to stay - distinct. `disambiguate` operates on Python function names where - leaving the first occurrence bare preserves readable identifiers - for the common no-collision case. + Every member of a colliding group receives a `_N` suffix including + the first (`_0`, `_1`, ...); unique keys stay bare. Symmetric unlike + `disambiguate` because violation labels in a colliding group all + share the same base name, so each needs an explicit index to stay a + distinct `Check.field` identity (which keys `suppress` matching, + `explain_errors` metadata, and the test's `expected_field`). + """ + return [f"_{idx}" if total > 1 else "" for idx, total in _occurrence_indices(keys)] + + +@dataclass(frozen=True, slots=True) +class FieldCheckRow: + """One emitted field-check row, with its final derived strings. + + The renderer emits one row per descriptor of each `Check`. + `field_check_rows` flattens the check list into these rows once, + computing both the symmetric `label` collision suffix and the + asymmetric `func_name` disambiguation, so the renderer and test + renderer agree without each re-deriving them by a positional index. + + Attributes + ---------- + check + The originating field check. + descriptor_idx + Index of this row's descriptor within `check.descriptors`. + label + The violation `field=` label, including any collision suffix. + name + The check name (`check_name(desc.function, desc.check_name)`). + func_name + The disambiguated private `_..._check` function name. + """ + + check: Check + descriptor_idx: int + label: str + name: str + func_name: str + + +def field_check_rows(field_checks: list[Check]) -> list[FieldCheckRow]: + """Flatten field checks into emission rows with final derived strings. + + Computes both collision passes over the *unfiltered* list, so the + expression module (rendered once across every arm) and a per-arm test + module agree: a per-arm subset could otherwise hide a collision the + shared module still carries, emitting an `expected_field` the module + never produces. Callers filter the returned rows to an arm afterward + rather than computing suffixes over a subset. + + Parameters + ---------- + field_checks + The complete field-check list for one generated module, before + any per-arm filtering. + + Returns + ------- + list + One `FieldCheckRow` per emitted `(check, descriptor)`, in + flattened emission order. + """ + flattened: list[tuple[Check, int, str, str]] = [] + raw_func_names: list[str] = [] + for check in field_checks: + label = field_label(check) + multi = len(check.descriptors) > 1 + for desc_idx, desc in enumerate(check.descriptors): + name = check_name(desc.function, desc.check_name) + func_suffix = f"_{name}" if multi else "" + raw_func_names.append(f"_{sanitize_field_name(label)}{func_suffix}_check") + flattened.append((check, desc_idx, label, name)) + func_names = disambiguate(raw_func_names) + label_suffixes = _symmetric_label_suffixes( + [(label, name) for _check, _idx, label, name in flattened] + ) + return [ + FieldCheckRow(check, desc_idx, f"{label}{label_suffix}", name, func_name) + for (check, desc_idx, label, name), label_suffix, func_name in zip( + flattened, label_suffixes, func_names, strict=True + ) + ] + + +@dataclass(frozen=True, slots=True) +class ModelCheckRow: + """One emitted model-check row, with its final derived strings. + + Model function names embed `idx` and are unique by construction, so + a row carries no `func_name` -- the renderer builds it from `idx`. + + Attributes + ---------- + check + The originating model check. + idx + Position of this check in the unfiltered model-check list; the + renderer embeds it in the private function name. + label + The violation `field=` label, including any collision suffix. + name + The check name (`check_name(model_constraint_function(...))`). + """ + + check: ModelCheck + idx: int + label: str + name: str + + +def model_check_rows(model_checks: list[ModelCheck]) -> list[ModelCheckRow]: + """Flatten model checks into emission rows with final derived strings. + + Like `field_check_rows`, label collision suffixes are computed over + the *unfiltered* list so the expression module and per-arm test + modules agree; callers filter the returned rows to an arm afterward. + + Parameters + ---------- + model_checks + The complete model-check list for one generated module, before + any per-arm filtering. + + Returns + ------- + list + One `ModelCheckRow` per model check, in list order. """ base_labels = [_model_check_base_label(check) for check in model_checks] + label_suffixes = _symmetric_label_suffixes(base_labels) return [ - f"_{idx}" if total > 1 else "" - for idx, total in _occurrence_indices(base_labels) + ModelCheckRow( + check, + idx, + f"{base_label}{label_suffix}", + check_name(model_constraint_function(check.descriptor)), + ) + for idx, (check, base_label, label_suffix) in enumerate( + zip(model_checks, base_labels, label_suffixes, strict=True) + ) ] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py index 885074ca6..f2c3b6dda 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py @@ -20,7 +20,6 @@ from collections import defaultdict from dataclasses import dataclass, replace -from enum import Enum from pydantic import BaseModel from typing_extensions import assert_never @@ -29,8 +28,12 @@ ArrayPath, ArraySegment, FieldPath, + MapPath, + MapProjection, + MapSegment, ScalarPath, promote_terminal_array, + promote_terminal_map, ) from overture.schema.system.model_constraint import ( FieldEqCondition, @@ -51,8 +54,15 @@ Scalar, UnionRef, ) -from ..extraction.field_walk import terminal_primitive -from ..extraction.specs import FeatureSpec, FieldSpec, ModelSpec, UnionSpec +from ..extraction.field_walk import ( + all_constraints, + enum_source, + has_array_layer, + terminal_of, + terminal_primitive, + terminal_scalar, +) +from ..extraction.specs import FieldSpec, ModelSpec, RecordSpec, UnionSpec from ..extraction.type_registry import PRIMITIVE_TYPES from ._render_common import COLUMN_LEVEL_FUNCTIONS from .check_ir import ( @@ -97,10 +107,9 @@ def _enum_values(scalar: Scalar) -> list[object] | None: """Return enum/literal values for a terminal `Scalar`, or `None`.""" if isinstance(scalar, LiteralScalar): return list(scalar.values) - if isinstance(scalar, Primitive): - src = scalar.source_type - if isinstance(src, type) and issubclass(src, Enum): - return [m.value for m in src] + src = enum_source(scalar) + if src is not None: + return [m.value for m in src] return None @@ -230,47 +239,144 @@ def _walk_field_shape( case ModelRef(): return _ref_terminal_checks(shape, path, required, required_gate) - case Primitive() | LiteralScalar() | AnyScalar() | MapOf(): - # `MapOf` shares this arm: a map is a terminal the walker - # does not descend into. Length constraints on a MapOf are - # rejected at extraction (`attach_constraints` raises). - # No schema field exercises map-level constraints today. - constraints = shape.constraints - element_descriptors = list(carried_element) - enum_values = _enum_values(shape) if isinstance(shape, Scalar) else None - if enum_values is not None: - element_descriptors.append( - ExpressionDescriptor( - function="check_enum", - args=(tuple(enum_values),), - ) + case Primitive() | LiteralScalar() | AnyScalar(): + return _terminal_scalar_checks( + shape, + path, + base_type=base_type, + required=required, + required_gate=required_gate, + carried_element=carried_element, + ), None + + case MapOf(key=key_shape, value=value_shape): + # A map is itself a terminal column: its own value carries the + # required check and any map-level constraints (currently always + # empty -- map-level length constraints are rejected at + # extraction). The key and value layers are walked separately so + # their per-key/per-value constraints land on `MapPath` targets. + # A `ModelRef`/`UnionRef` projection hands back a `_ShapeTerminal` + # for the caller to descend into, exactly as a `list[Model]` + # element does. + field_checks = _terminal_scalar_checks( + shape, + path, + base_type=base_type, + required=required, + required_gate=required_gate, + carried_element=carried_element, + ) + key_checks, key_terminal = _map_projection_checks( + key_shape, path, MapProjection.KEY + ) + value_checks, value_terminal = _map_projection_checks( + value_shape, path, MapProjection.VALUE + ) + if key_terminal is not None and value_terminal is not None: + raise NotImplementedError( + "map with a model key and a model value is not supported" ) - element_descriptors.extend( - _dispatch_layer_constraints(constraints, base_type) + terminal = value_terminal if value_terminal is not None else key_terminal + return [*field_checks, *key_checks, *value_checks], terminal + + assert_never(shape) + + +def _terminal_scalar_checks( + shape: Scalar | MapOf, + path: FieldPath, + *, + base_type: str | None, + required: bool, + required_gate: FieldPath | None, + carried_element: list[ExpressionDescriptor], +) -> list[Check]: + """Build the Check(s) for a terminal value: enum, constraints, base type. + + Shared by the scalar-terminal arm and a map field's own value -- a + `MapOf` is itself a terminal column, distinct from its key/value layers. + """ + element_descriptors = list(carried_element) + enum_values = _enum_values(shape) if isinstance(shape, Scalar) else None + if enum_values is not None: + element_descriptors.append( + ExpressionDescriptor(function="check_enum", args=(tuple(enum_values),)) + ) + element_descriptors.extend( + _dispatch_layer_constraints(shape.constraints, base_type) + ) + if base_type is not None: + base_descriptors = dispatch_base_type(base_type) + if base_descriptors is not None: + element_descriptors.extend(base_descriptors) + element_descriptors = list(dict.fromkeys(element_descriptors)) + + if required: + return [ + Check( + descriptors=(_required_descriptor(required_gate), *element_descriptors), + target=path, ) - if base_type is not None: - base_descriptors = dispatch_base_type(base_type) - if base_descriptors is not None: - element_descriptors.extend(base_descriptors) - element_descriptors = list(dict.fromkeys(element_descriptors)) + ] + if element_descriptors: + return [Check(descriptors=tuple(element_descriptors), target=path)] + return [] - if required: - return [ - Check( - descriptors=( - _required_descriptor(required_gate), - *element_descriptors, - ), - target=path, - ) - ], None - if element_descriptors: - return [ - Check(descriptors=tuple(element_descriptors), target=path) - ], None - return [], None - assert_never(shape) +def _map_projection_checks( + sub_shape: FieldShape, + map_path: FieldPath, + projection: MapProjection, +) -> tuple[list[Check], _ShapeTerminal | None]: + """Walk a map's key or value shape, emitting checks on a `MapPath` target. + + Supports two shapes reached without array iteration: a scalar terminal + (`dict[K, scalar]` -- per-key/value constraints land on a bare `MapPath`) + and a `ModelRef`/`UnionRef` terminal (`dict[K, Model]` -- the returned + `_ShapeTerminal` lets the caller descend into the model's fields and + constraints on a `MapPath` leaf, mirroring a `list[Model]` element). + + Two shapes fall outside that bound and have no representable `MapPath`: + + - a key/value carrying an array layer (`dict[K, list[V]]`), whose scalar + terminal sits under an `ArrayOf` that `terminal_scalar` would unwrap; + - a map reached through an array (`list[dict[K, V]]`, a `map_path` with + an `ArraySegment`), whose key/value can't anchor a struct-prefixed + `MapPath`. + + Each is handled the same way: an unsupported shape carrying a key/value + constraint (or a model to descend into) raises `NotImplementedError` to + keep the dropped check loud; an unconstrained, non-model shape yields no + checks, since there is nothing to validate. The constraint -- not the + shape alone -- is what stays loud, matching the silent treatment of + unconstrained maps. + """ + reached_through_array = isinstance(map_path, ArrayPath) + is_ref_terminal = isinstance(terminal_of(sub_shape), (ModelRef, UnionRef)) + if reached_through_array or has_array_layer(sub_shape): + if all_constraints(sub_shape) or is_ref_terminal: + raise NotImplementedError( + f"map {projection.value} on an unsupported shape (list layer " + f"or map nested in an array) is not supported ({sub_shape!r})" + ) + return [], None + if not is_ref_terminal and terminal_scalar(sub_shape) is None: + if all_constraints(sub_shape): + raise NotImplementedError( + f"map {projection.value} carrying a constraint on a non-scalar " + f"terminal is not supported ({sub_shape!r})" + ) + return [], None + primitive = terminal_primitive(sub_shape) + sub_checks, terminal = _walk_field_shape( + sub_shape, + promote_terminal_map(map_path, projection), + base_type=primitive.base_type if primitive is not None else None, + required=False, + required_gate=None, + carried_element=[], + ) + return sub_checks, terminal def _ref_terminal_checks( @@ -311,6 +417,9 @@ def _build_field_checks( shared. It propagates to any model constraints discovered through this field's sub-models so per-arm test modules can filter them correctly. """ + # `prefix` is a ScalarPath/ArrayPath, or a MapPath when descending into + # a `dict[K, Model]` value model -- all three define `append_struct`, + # which extends the path's struct leaf with this field's name. path = prefix.append_struct(field_spec.name) checks, terminal = _walk_field_shape( field_spec.shape, @@ -354,7 +463,7 @@ def _build_field_checks( def _recurse_into_model( - model_spec: ModelSpec, + model_spec: RecordSpec, prefix: FieldPath = ScalarPath(), is_optional: bool = False, nullable_gate: FieldPath | None = None, @@ -365,8 +474,10 @@ def _recurse_into_model( `prefix` is the terminal path the shape walker reached the `ModelRef` at, defaulting to the empty `ScalarPath()` at the row root. Its terminal - segment is an `ArraySegment` exactly when the field is itself a list, - which resets the nullable gate (array iteration handles element + segment is an `ArraySegment` (the field is a list) or a `MapSegment` + (the field is a `dict[K, Model]` reached through its key/value + projection) exactly when the model is reached through iteration, which + resets the nullable gate (the iteration itself handles per-element nullability). `arm` propagates from the union arm whose variant-specific field led @@ -375,8 +486,8 @@ def _recurse_into_model( test). """ last_seg = prefix.segments[-1] if prefix.segments else None - field_is_list = isinstance(last_seg, ArraySegment) - if field_is_list: + field_is_iterated = isinstance(last_seg, (ArraySegment, MapSegment)) + if field_is_iterated: child_gate: FieldPath | None = None else: child_gate = prefix if is_optional else nullable_gate @@ -396,7 +507,7 @@ def _recurse_into_model( if model_spec.constraints: constraint_gate = ( prefix - if is_optional and not field_is_list and isinstance(prefix, ArrayPath) + if is_optional and not field_is_iterated and isinstance(prefix, ArrayPath) else None ) sub_model_constraint_checks = _dispatch_model_constraints( @@ -412,6 +523,17 @@ def _recurse_into_model( return field_checks, model_checks +def _is_struct_only_prefix(prefix: FieldPath) -> bool: + """Non-root struct path with no array traversal. + + True when `prefix` has one or more struct segments but no array + iteration -- meaning discriminator column access and model-constraint + targeting cannot use the prefix without resolving it into a + struct-qualified path, which the current renderer does not support. + """ + return not isinstance(prefix, ArrayPath) and bool(prefix.segments) + + def _guard_struct_nested_anchor(prefix: FieldPath, name: str) -> None: """Raise when emitting a model constraint at a struct-only prefix. @@ -419,9 +541,11 @@ def _guard_struct_nested_anchor(prefix: FieldPath, name: str) -> None: collapses to the row root, which is wrong for any non-skipped constraint. Today only `NoExtraFieldsConstraint` reaches here (and dispatches to None); a real descriptor at this depth is a renderer - gap, not a normal case. + gap, not a normal case. A `MapPath` is exempt -- it is a valid anchor + (`_model_constraint_target` keeps it, and the renderer wraps the check + in `map_values_check`/`map_keys_check`). """ - if not isinstance(prefix, ArrayPath) and prefix.segments: + if _is_struct_only_prefix(prefix) and not isinstance(prefix, MapPath): raise NotImplementedError( f"Model constraint on struct-nested {name!r} " f"(reached at {prefix!r}) -- the renderer has no anchor " @@ -429,6 +553,25 @@ def _guard_struct_nested_anchor(prefix: FieldPath, name: str) -> None: ) +def _guard_struct_nested_variant_fields(prefix: FieldPath, name: str) -> None: + """Raise when emitting variant-gated field checks at a struct-only prefix. + + A `ColumnGuard` carries a bare discriminator name that renders as + `F.col("")` -- a top-level column access. When the + union is reached through a plain struct field, the discriminator lives + at `.`, so the rendered gate reads the wrong + column. Raising loudly is safer than emitting a mis-gated check; no + current schema nests a discriminated union under a plain struct. + """ + if _is_struct_only_prefix(prefix): + raise NotImplementedError( + f"Discriminated union {name!r} with variant-gated field checks " + f"at struct-nested prefix {prefix!r} -- `ColumnGuard` would " + "render the discriminator as a top-level column, not a " + "struct-qualified path." + ) + + def _recurse_into_union( union_spec: UnionSpec, prefix: FieldPath = ScalarPath(), @@ -467,11 +610,14 @@ def _recurse_into_union( def _model_constraint_target(prefix: FieldPath) -> FieldPath: """Where a model constraint's check should be anchored. - Two supported cases: + Three supported cases: - `ArrayPath` -- constraints on a sub-model reached through array iteration target the array path (so the renderer wraps the check in `array_check`). + - `MapPath` -- constraints on a `dict[K, Model]` value model target the + map path (so the renderer wraps the check in `map_values_check`), + mirroring the array case. - Empty or struct-only `ScalarPath` -- constraints anchor at the row root. Pure struct nesting (e.g. `Names` reached at `ScalarPath('names')`) collapses here because the renderer has no @@ -482,7 +628,7 @@ def _model_constraint_target(prefix: FieldPath) -> FieldPath: observationally inert today; a non-skipped constraint at this depth would surface as a wrong-anchor bug. """ - return prefix if isinstance(prefix, ArrayPath) else ScalarPath() + return prefix if isinstance(prefix, (ArrayPath, MapPath)) else ScalarPath() def _dispatch_model_constraints( @@ -554,6 +700,7 @@ def _field_checks_for_union( ) model_checks.extend(sub_model_checks) if values and discriminator is not None: + _guard_struct_nested_variant_fields(prefix, spec.name) # Outer guards land first so the renderer composes # outer-then-inner (e.g. a `ColumnGuard` from a parent union, # then an `ElementGuard` from the nested union the field @@ -692,7 +839,7 @@ def require_check(field_name: str, condition: FieldEqCondition | Not) -> ModelCh def build_checks( - spec: FeatureSpec, + spec: ModelSpec, ) -> tuple[list[Check], list[ModelCheck]]: """Build all check IR for a feature spec. diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py index b02f5b735..ff8cc614c 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py @@ -14,6 +14,7 @@ from annotated_types import Ge, Gt, Interval, Le, Lt from pydantic import Strict +from pydantic._internal._fields import PydanticMetadata from overture.schema.system.case import to_snake_case from overture.schema.system.field_constraint.collection import UniqueItemsConstraint @@ -35,7 +36,8 @@ from overture.schema.system.primitive import GeometryTypeConstraint from overture.schema.system.ref import Reference -from ..extraction.field import FieldShape, ModelRef +from ..extraction.docstring import first_docstring_line +from ..extraction.field import FieldShape, ModelRef, Primitive from ..extraction.field_walk import has_array_layer, terminal_of from ..extraction.length_constraints import ( ArrayMaxLen, @@ -44,6 +46,7 @@ ScalarMinLen, ) from ..extraction.specs import FieldSpec +from ..extraction.type_registry import primitive_spark_category __all__ = [ "ExpressionDescriptor", @@ -114,13 +117,58 @@ class ExpressionDescriptor: } -def _normalize_anchor(pattern: str) -> str: +# re.UNICODE is Python's implicit default on compiled `str` patterns and needs +# no translation -- Java's regex engine is Unicode-aware without a flag. +# re.IGNORECASE maps to the inline `(?i)` flag Spark's rlike honors. A new +# supported flag with a visible matching effect also belongs in +# `field_constraints._DISPLAY_FLAG_LETTERS`, or docs will hide its behavior. +_SUPPORTED_PATTERN_FLAGS = re.IGNORECASE | re.UNICODE + + +def compiled_pattern_source(pattern: re.Pattern[str]) -> str: + """Return the Spark-regex source string for a compiled `re.Pattern`. + + A compiled `re.Pattern` is the only Pydantic carrier for a flagged pattern + (a bare `Field(pattern=str)` cannot express `re.I`). Translates the flags + Spark's `rlike` can honor into inline prefixes -- `re.IGNORECASE` becomes + `(?i)`, the idiom `constraint_expressions.check_url_format` already uses. + The ASCII/Unicode case-folding divergence between Java and Python is the + same accepted divergence documented at `check_pattern`. + + Raises + ------ + NotImplementedError + For any flag without a faithful `rlike` translation (e.g. + `re.MULTILINE`), naming the flag rather than silently dropping it. + """ + unsupported = re.RegexFlag(pattern.flags & ~_SUPPORTED_PATTERN_FLAGS) + if unsupported: + raise NotImplementedError( + f"check_pattern cannot translate regex flag {unsupported!r} to Spark rlike" + ) + source = pattern.pattern + # Only IGNORECASE emits a prefix; UNICODE passes the gate but is a no-op + # (Java is Unicode-aware unflagged). A new supported flag needs its own + # translation clause here, or it will pass the gate and be silently dropped. + if pattern.flags & re.IGNORECASE: + source = f"(?i){source}" + return source + + +def normalize_anchor(pattern: str) -> str: """Replace trailing `$` with `\\z` for Java/Spark regex compatibility. - Leaves an escaped trailing `\\$` (literal dollar match) untouched. + Uses backslash-parity to distinguish a real anchor from an escaped + literal `$`. Counts the run of backslashes immediately before the + final `$`: an even count means `$` is unescaped (convert to `\\z`); + an odd count means it is an escaped literal `$` (leave unchanged). """ - if pattern.endswith("$") and not pattern.endswith(r"\$"): - return pattern[:-1] + r"\z" + if not pattern.endswith("$"): + return pattern + prefix = pattern[:-1] # strip the trailing $ + backslashes = len(prefix) - len(prefix.rstrip("\\")) + if backslashes % 2 == 0: + return prefix + r"\z" return pattern @@ -135,11 +183,10 @@ def _pattern_label(constraint: PatternConstraint) -> str: """Extract a human-readable label from a PatternConstraint.""" if constraint.description: return constraint.description - doc = type(constraint).__doc__ - if doc: - return doc.strip().split("\n")[0].rstrip(".") + if (summary := first_docstring_line(type(constraint).__doc__)) is not None: + return summary.rstrip(".") name = type(constraint).__name__.removesuffix("Constraint") - return re.sub(r"(?<=[a-z0-9])([A-Z])", r" \1", name).lower() + return to_snake_case(name).replace("_", " ") _ConstraintHandler = Callable[[Any, str | None], ExpressionDescriptor | None] @@ -174,16 +221,51 @@ def _dispatch_pattern( constraint: PatternConstraint, _base_type: str | None, ) -> ExpressionDescriptor: - """Map a PatternConstraint (or subclass) to a check_pattern descriptor.""" + """Map a PatternConstraint (or subclass) to a check_pattern descriptor. + + The Python `re` pattern source is embedded verbatim (anchor and inline + flags aside) into a Java `rlike`. The two engines diverge on Unicode + shorthand classes and `.` line-terminator handling; that is an accepted + divergence, documented at `constraint_expressions.check_pattern`. + """ return ExpressionDescriptor( function="check_pattern", - args=(_normalize_anchor(constraint.pattern.pattern),), + args=(normalize_anchor(compiled_pattern_source(constraint.pattern)),), constraint_type=type(constraint), label=_pattern_label(constraint), check_name=_pattern_check_name(constraint), ) +def _raw_pattern(constraint: object) -> str | None: + """Return the Spark-regex source of raw pydantic `Field(pattern=)`, or None. + + Pydantic represents `Field(pattern=...)` as a `PydanticMetadata` marker + (the private `_PydanticGeneralMetadata`) carrying the pattern as either a + `str` (`Field(pattern="...")`) or a compiled `re.Pattern` + (`Field(pattern=re.compile(...))` -- the only carrier for a flagged, + e.g. case-insensitive, pattern). The schema's own `PatternConstraint` is + handled earlier; raw metadata reaches here from `dict[K, V]` keys/values + that used `Field(pattern=)` rather than a schema constraint class + (e.g. `Sources.license_priority`). + + The `PydanticMetadata` check -- not merely a `.pattern` attribute -- + keeps `dispatch_constraint`'s fallback contract intact: an unrelated future + constraint that happens to expose a `.pattern` still raises `TypeError` + rather than being silently dispatched as a `check_pattern`. A compiled + pattern carrying an untranslatable flag raises `NotImplementedError` via + `compiled_pattern_source`. + """ + if not isinstance(constraint, PydanticMetadata): + return None + pattern = getattr(constraint, "pattern", None) + if isinstance(pattern, str): + return pattern + if isinstance(pattern, re.Pattern): + return compiled_pattern_source(pattern) + return None + + # Ordered: the first matching entry wins, so any subclass relationship # between keys must place the more-specific class first. StrippedConstraint # subclasses PatternConstraint, so it must appear before the PatternConstraint @@ -217,11 +299,15 @@ def _dispatch_pattern( ), ( StrippedConstraint, - lambda _c, _bt: ExpressionDescriptor(function="check_stripped"), + lambda c, _bt: ExpressionDescriptor( + function="check_stripped", constraint_type=type(c) + ), ), ( JsonPointerConstraint, - lambda _c, _bt: ExpressionDescriptor(function="check_json_pointer"), + lambda c, _bt: ExpressionDescriptor( + function="check_json_pointer", constraint_type=type(c) + ), ), (PatternConstraint, _dispatch_pattern), # check_struct_unique uses Spark's array_distinct: structural equality on @@ -274,6 +360,17 @@ def dispatch_constraint( for key_types, handler in _CONSTRAINT_DISPATCH: if isinstance(constraint, key_types): return handler(constraint, base_type) + raw_pattern = _raw_pattern(constraint) + if raw_pattern is not None: + # Raw pydantic `Field(pattern=)` metadata. `constraint_type` stays + # None (the pydantic class is a private closure type, not a stable + # key); the curated valid/invalid pair lives in `PATTERN_VALUES`, + # keyed by the normalized pattern in `args`. + return ExpressionDescriptor( + function="check_pattern", + args=(normalize_anchor(raw_pattern),), + label="pattern", + ) raise TypeError(f"Unhandled constraint type: {type(constraint).__name__}") @@ -321,12 +418,14 @@ class RequireIf: class ForbidIf: """Descriptor for `check_forbid_if`: field must be absent when condition holds. - `field_shapes` pairs non-string field names with their `FieldShape` so - the test renderer can emit type-appropriate `fill_values` literals. + `field_shapes` pairs non-string-default field names with their `FieldShape` + so the test renderer can emit type-appropriate `fill_values` literals. Stored as a tuple of `(name, shape)` pairs so the descriptor is hashable; consumers convert with `dict()` when they need mapping access. String fields are omitted because the renderer defaults to - `""` for them without needing the shape. + `""` for them, which is correct. Arrays, model references, and + non-string scalars (int/uint/float/bool) require an explicit entry + so the renderer emits a typed literal (`[{}]`, `{}`, `0`, `False`, etc.). """ field_names: tuple[str, ...] @@ -394,11 +493,21 @@ def _unwrap_require_any_of_names( return tuple(result) -def _is_compound_shape(shape: FieldShape) -> bool: - """Whether `shape` needs a non-`{}` fill value in mutation helpers.""" +def _needs_explicit_fill(shape: FieldShape) -> bool: + """Whether `shape` needs an explicit (non-default-string) fill value. + + Arrays and model references need `[{}]` / `{}` fill. Non-string + scalars (int/uint/float/bool families) need a typed fill (0, False, + etc.). Plain string scalars are omitted -- the `""` default is correct. + """ if has_array_layer(shape): return True - return isinstance(terminal_of(shape), ModelRef) + terminal = terminal_of(shape) + if isinstance(terminal, ModelRef): + return True + if not isinstance(terminal, Primitive): + return False + return primitive_spark_category(terminal.base_type) in ("int", "float", "bool") def forbid_if_field_shapes( @@ -407,14 +516,16 @@ def forbid_if_field_shapes( ) -> tuple[tuple[str, FieldShape], ...]: """Build the `field_shapes` pairs for non-string ForbidIf targets. - Keeps only fields whose shape is compound (an array or a model - reference); string fields are omitted because the test renderer - defaults their fill value to `""` without needing the shape. + Keeps fields whose shape is an array, a model reference, or a + non-string scalar (int/uint/float/bool families). String fields are + omitted because the test renderer defaults their fill value to `""` + without needing the shape. """ return tuple( (name, shape) for name in field_names - if (shape := shape_by_name.get(name)) is not None and _is_compound_shape(shape) + if (shape := shape_by_name.get(name)) is not None + and _needs_explicit_fill(shape) ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py index a6033c0db..781131120 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py @@ -15,10 +15,10 @@ from overture.schema.system.discovery import entry_point_to_path from overture.schema.system.primitive import GeometryType -from ..extraction.specs import FeatureSpec, UnionSpec +from ..extraction.specs import ModelSpec, UnionSpec from .check_builder import build_checks from .check_ir import Check, ModelCheck -from .renderer import render_feature_module +from .renderer import render_model_module from .schema_builder import build_schema from .test_data.base_row import ( generate_arm_rows, @@ -78,16 +78,16 @@ def _support_prefix(directory: PurePosixPath) -> str: return "." * (len(directory.parts) + _DOTS_FROM_TEST_TO_TESTS_ROOT) -def _require_entry_point(spec: FeatureSpec) -> str: +def _require_entry_point(spec: ModelSpec) -> str: """Return *spec*'s entry point or raise if it's missing.""" if spec.entry_point is None: - msg = f"FeatureSpec {spec.name!r} has no entry_point." + msg = f"ModelSpec {spec.name!r} has no entry_point." raise ValueError(msg) return spec.entry_point -def _directory_and_feature_name(spec: FeatureSpec) -> tuple[PurePosixPath, str]: - """Return the output directory and snake_case feature name for a spec. +def _directory_and_model_name(spec: ModelSpec) -> tuple[PurePosixPath, str]: + """Return the output directory and snake_case model name for a spec. Both halves derive from the entry-point's class name so filenames and symbol names stay in sync with what the runtime registry @@ -102,7 +102,7 @@ def _extract_geometry_types( ) -> tuple[GeometryType, ...]: """Collect allowed geometry types from every `check_geometry_type` descriptor. - A feature may carry multiple `check_geometry_type` descriptors -- e.g. + A model may carry multiple `check_geometry_type` descriptors -- e.g. one per union arm with a distinct allowed-types set. The result is the union of all of them, sorted by name for deterministic output. """ @@ -132,40 +132,40 @@ def _init_modules(paths: Iterable[PurePosixPath]) -> list[GeneratedModule]: return [GeneratedModule(content="", path=d / "__init__.py") for d in sorted(dirs)] -def generate_pyspark_module(spec: FeatureSpec) -> GeneratedModule: - """Generate a PySpark validation module from a feature spec. +def generate_pyspark_module(spec: ModelSpec) -> GeneratedModule: + """Generate a PySpark validation module from a model spec. Parameters ---------- spec - The extracted feature spec to generate from. + The extracted model spec to generate from. Returns ------- GeneratedModule Module content and a relative output path mirroring the - feature's entry-point package layout. + model's entry-point package layout. """ return _render_module(spec, build_checks(spec)) def generate_pyspark_modules( - feature_specs: Sequence[FeatureSpec], + model_specs: Sequence[ModelSpec], ) -> PipelineOutput: - """Generate PySpark validation modules for all features. + """Generate PySpark validation modules for all models. Parameters ---------- - feature_specs - Extracted feature specs to generate from. + model_specs + Extracted model specs to generate from. Returns ------- PipelineOutput - Source-tree feature modules and test-tree modules. Each tree + Source-tree model modules and test-tree modules. Each tree includes the `__init__.py` files needed for its package layout. """ - items = [(spec, build_checks(spec)) for spec in feature_specs] + items = [(spec, build_checks(spec)) for spec in model_specs] source = [_render_module(spec, checks) for spec, checks in items] test: list[GeneratedModule] = [] for spec, checks in items: @@ -176,16 +176,16 @@ def generate_pyspark_modules( def _render_module( - spec: FeatureSpec, + spec: ModelSpec, checks: tuple[list[Check], list[ModelCheck]], ) -> GeneratedModule: - """Build checks, schema, and render for a feature spec.""" + """Build checks, schema, and render for a model spec.""" field_checks, model_checks = checks schema_fields = build_schema(spec) geometry_types = _extract_geometry_types(field_checks) - directory, feature_name = _directory_and_feature_name(spec) - content = render_feature_module( - feature_name, + directory, model_name = _directory_and_model_name(spec) + content = render_model_module( + model_name, field_checks, model_checks, schema_fields, @@ -195,12 +195,12 @@ def _render_module( ) return GeneratedModule( content=content, - path=directory / f"{feature_name}.py", + path=directory / f"{model_name}.py", ) def _select_arm_rows( - spec: FeatureSpec, + spec: ModelSpec, ) -> dict[str | None, tuple[dict[str, object], dict[str, object]]]: """Map each test module's arm key to its (sparse, populated) base rows. @@ -219,10 +219,10 @@ def _select_arm_rows( def _render_test_modules( - spec: FeatureSpec, + spec: ModelSpec, checks: tuple[list[Check], list[ModelCheck]], ) -> list[GeneratedModule]: - """Render test modules for a feature spec. + """Render test modules for a model spec. For union specs with multiple discriminator arms, produces one test module per arm. Each arm's test includes the field and @@ -230,8 +230,8 @@ def _render_test_modules( `render_test_module`. """ field_checks, model_checks = checks - directory, feature_name = _directory_and_feature_name(spec) - expression_import = ".".join([_OUTPUT_PACKAGE, *directory.parts, feature_name]) + directory, model_name = _directory_and_model_name(spec) + expression_import = ".".join([_OUTPUT_PACKAGE, *directory.parts, model_name]) support_prefix = _support_prefix(directory) modules: list[GeneratedModule] = [] @@ -240,7 +240,7 @@ def _render_test_modules( modules.append( GeneratedModule( content=render_test_module( - feature_name, + model_name, field_checks, model_checks, base_row_sparse=base_row_sparse, @@ -250,7 +250,7 @@ def _render_test_modules( expression_import=expression_import, support_prefix=support_prefix, ), - path=directory / f"test_{feature_name}{suffix}.py", + path=directory / f"test_{model_name}{suffix}.py", ) ) return modules diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py index dcd687610..a1acb8055 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py @@ -9,20 +9,24 @@ from overture.schema.system.field_path import ( ArrayPath, FieldPath, + MapPath, + MapProjection, ScalarPath, ) -from overture.schema.system.model_constraint import Condition from overture.schema.system.primitive import GeometryType from ._render_common import ( - check_name, - compute_label_suffixes, - disambiguate, - field_label, + FieldCheckRow, + FieldEq, + ModelCheckRow, + field_check_rows, jinja_env, - model_constraint_field_label, - parse_field_eq, + map_runtime_helper, + model_check_rows, py_literal, + require_field_eq, + sanitize_field_name, + schema_const_name, tuple_literal, ) from .check_ir import ( @@ -43,7 +47,7 @@ from .schema_builder import SHARED_TYPE_REFS, SchemaField __all__ = [ - "render_feature_module", + "render_model_module", ] # Descriptor function names that resolve to helpers from the @@ -52,7 +56,13 @@ # `_render_common.COLUMN_LEVEL_FUNCTIONS`, which classifies checks that # emit one Check per field rather than per array element. _COLUMN_PATTERN_HELPERS = frozenset( - {"array_check", "nested_array_check", "check_struct_unique"} + { + "array_check", + "nested_array_check", + "map_keys_check", + "map_values_check", + "check_struct_unique", + } ) _SHARED_STRUCT_REFS = frozenset(SHARED_TYPE_REFS.values()) @@ -78,20 +88,53 @@ ) -# Collapses runs of `.`, `[`, `]`, `_` to a single `_` for identifier sanitization. -_PATH_SEPARATOR_RUN = re.compile(r"[.\[\]_]+") +# A generated expression dereferences a top-level row column through one of a +# fixed set of forms, each taking the column name as a string literal: `F.col`, +# the outermost `array_check`/`nested_array_check`, and `map_keys_check`/ +# `map_values_check`. Inner array iterations use element accessors (`el[...]`), +# whose first argument is never a string literal and so never matches here. A +# new column-consuming wrapper must be added to this alternation; `read_columns` +# fails loudly (see `_require_read_columns`) if a check's expr matches none. +_COLUMN_READ = re.compile( + r'(?:F\.col|(?:nested_)?array_check|map_(?:keys|values)_check)\("([^"]+)"' +) + + +def _read_columns(expr: str) -> frozenset[str]: + """Top-level columns a rendered check expression dereferences. + Derived from the expression source itself rather than the check's + structure, so it stays correct as the renderer evolves: whatever + `F.col`/`array_check`/`map_*_check` the expression emits is what the + runtime reads. Dotted struct navigation (`bbox.xmin`, `names.rules`) + collapses to its top-level column, the granularity at which absence is + detected. + """ + return frozenset(m.group(1).split(".", 1)[0] for m in _COLUMN_READ.finditer(expr)) -def _sanitize_field_name(field: str) -> str: - """Convert an encoded field-path string to a valid Python identifier fragment.""" - return _PATH_SEPARATOR_RUN.sub("_", field).strip("_") +def _require_read_columns(expr: str, field: str, name: str) -> frozenset[str]: + """Top-level columns a generated check reads -- guaranteed non-empty. -def _render_condition_desc(condition: Condition) -> str: - """Render a Condition to a human-readable description string for error messages.""" - parsed = parse_field_eq(condition) - if parsed is None: - raise TypeError(f"Unhandled condition type: {type(condition).__name__}") + Every generated check dereferences at least one row column. An empty + result means `_read_columns` did not recognize a form `expr` uses -- + typically a newly added column-consuming wrapper absent from + `_COLUMN_READ`. Left silent, the runtime could not drop the check when + its column is absent (`validate_model` keys on `read_columns`), so an + unresolvable reference would reach Spark. This converts that latent + crash into a generation-time error naming the offending check. + """ + columns = _read_columns(expr) + if not columns: + raise ValueError( + f"check {field!r} ({name!r}) reads no top-level column; " + f"_read_columns recognized no column form in: {expr}" + ) + return columns + + +def _render_condition_desc(parsed: FieldEq) -> str: + """Render a parsed condition to a human-readable error-message description.""" display = repr( parsed.value.value if isinstance(parsed.value, Enum) else parsed.value ) @@ -100,13 +143,22 @@ def _render_condition_desc(condition: Condition) -> str: def _render_condition( - condition: Condition, *, in_array: bool = False, var: str = "el" + parsed: FieldEq, + *, + in_array: bool = False, + struct_path: tuple[str, ...] = (), + var: str = "el", ) -> str: - """Render a Condition to a PySpark Column expression string.""" - parsed = parse_field_eq(condition) - if parsed is None: - raise TypeError(f"Unhandled condition type: {type(condition).__name__}") - ref = _render_field_ref(parsed.field_name, in_array=in_array, var=var) + """Render a parsed condition to a PySpark Column expression string. + + `struct_path` is the leaf the constrained model was reached at; the + condition field lives beside the target field on that same model, so + its reference must navigate the same leaf (e.g. `el["inner"]["subtype"]`, + not `el["subtype"]`). + """ + ref = _render_field_ref( + parsed.field_name, in_array=in_array, struct_path=struct_path, var=var + ) op = "!=" if parsed.negated else "==" return f"{ref} {op} {py_literal(parsed.value)}" @@ -274,6 +326,37 @@ def _render_array_check_expr( ) +def _map_iter_var(projection: MapProjection) -> str: + """Lambda variable name for a map projection: `k` for keys, `v` for values.""" + return "k" if projection is MapProjection.KEY else "v" + + +def _wrap_in_map_iteration(target: MapPath, body: str) -> str: + """Wrap `body` in a map_keys_check / map_values_check projection lambda. + + The map helper projects the map (`F.map_keys` / `F.map_values`) and + applies the lambda to each projected key or value, the map analogue of + `_wrap_in_array_iteration`. `body` references the projected element via + the same `_map_iter_var(target.projection)` name this builds the lambda + parameter from. + """ + helper = map_runtime_helper(target.projection) + var = _map_iter_var(target.projection) + return f'{helper}("{target.map_column}", lambda {var}: {body})' + + +def _render_map_check_expr(target: MapPath, desc: ExpressionDescriptor) -> str: + """Render a MapPath target to a map_keys_check / map_values_check expression. + + A non-empty `target.leaf` navigates into a `dict[K, Model]` value struct + (`v["field"]`), mirroring an array element's leaf accessor; an empty leaf + applies the check to the projected scalar itself. + """ + var = _map_iter_var(target.projection) + body = _render_expr_call(desc, _element_accessor(var, target.leaf)) + return _wrap_in_map_iteration(target, body) + + def _render_variant_expr( inner_expr: str, variant_values: tuple[str, ...], @@ -300,40 +383,37 @@ def _render_column_gate(expr: str, gate: FieldPath) -> str: def _model_check_func_name(check: ModelCheck, idx: int) -> str: """Build the private function name for a model-constraint check. - Non-array targets emit `_{fn}_{idx}_check`. Array targets prefix the - column path -- using the full encoded `FieldPath` when the check is - reached via inner iteration or leaf struct navigation, otherwise the - outer column name alone -- so collisions across nested contexts get - distinct identifiers. + Array and map targets prefix the column path -- using the full encoded + `FieldPath` when the check is reached via inner iteration or leaf struct + navigation, otherwise the outer column name alone -- so collisions + across nested contexts get distinct identifiers. Row-root targets emit + `_{fn}_{idx}_check`. """ fn = model_constraint_function(check.descriptor) match check.target: case ArrayPath() as target: has_nested_path = bool(target.iter_struct_paths) or bool(target.leaf) prefix_source = str(target) if has_nested_path else target.column_path - prefix = _sanitize_field_name(prefix_source) + prefix = sanitize_field_name(prefix_source) + return f"_{prefix}_{fn}_{idx}_check" + case MapPath() as target: + prefix_source = str(target) if target.leaf else target.map_column + prefix = sanitize_field_name(prefix_source) return f"_{prefix}_{fn}_{idx}_check" case _: return f"_{fn}_{idx}_check" -def _root_field_for_target(target: FieldPath) -> str | None: - """Top-level schema column for a Check/ModelCheck target. - - Returns the first segment's name, or `None` for an empty path. - """ - return target.segments[0].name if target.segments else None - - def _check_shape_token(target: FieldPath) -> str: """Token naming the runtime `CheckShape` member for a target path. Mirrors the member names of `overture.schema.pyspark.check.CheckShape`; the check-function template prefixes `CheckShape.` to the result. An - `ArrayPath` target renders to an `array` expression, every + `ArrayPath` or `MapPath` target renders to an `array` + expression (the map helper iterates the projected keys/values), every other path to a nullable string. """ - return "ARRAY" if isinstance(target, ArrayPath) else "SCALAR" + return "ARRAY" if isinstance(target, (ArrayPath, MapPath)) else "SCALAR" def _render_check_expr(check: Check, descriptor_idx: int) -> str: @@ -369,6 +449,8 @@ def _render_check_expr(check: Check, descriptor_idx: int) -> str: element_guards=element_guards, gate_parts=gate_parts, ) + case MapPath(): + expr = _render_map_check_expr(check.target, desc) case _: raise TypeError( f"Unhandled FieldPath variant: {type(check.target).__name__}" @@ -380,7 +462,12 @@ def _render_check_expr(check: Check, descriptor_idx: int) -> str: def _check_function_context( - *, target: FieldPath, func_name: str, field: str, name: str, expr: str + *, + target: FieldPath, + func_name: str, + field: str, + name: str, + expr: str, ) -> dict[str, object]: """Assemble the template context dict for one check function.""" return { @@ -389,28 +476,28 @@ def _check_function_context( "check_name": name, "expr": expr, "shape": _check_shape_token(target), - "root_field": _root_field_for_target(target), + "read_columns": _require_read_columns(expr, field, name), } -def _render_check_function_context( - check: Check, func_name: str, descriptor_idx: int = 0 -) -> dict[str, object]: - """Build the template context for a per-field check function from a Check.""" - desc = check.descriptors[descriptor_idx] +def _render_check_function_context(row: FieldCheckRow) -> dict[str, object]: + """Build the template context for a per-field check function from a row. + + The row carries the final `func_name`, `label`, and `name`; the + collisions that produce them are resolved once in `field_check_rows`. + """ return _check_function_context( - target=check.target, - func_name=func_name, - field=field_label(check), - name=check_name(desc.function, desc.check_name), - expr=_render_check_expr(check, descriptor_idx), + target=row.check.target, + func_name=row.func_name, + field=row.label, + name=row.name, + expr=_render_check_expr(row.check, row.descriptor_idx), ) -def _render_model_constraint_function_context( - check: ModelCheck, idx: int, label_suffix: str -) -> dict[str, object]: +def _render_model_constraint_function_context(row: ModelCheckRow) -> dict[str, object]: """Build the template context for a model-constraint check function.""" + check = row.check desc = check.descriptor target = check.target match target: @@ -418,6 +505,13 @@ def _render_model_constraint_function_context( in_array = True var = "inner" if target.iter_struct_paths else "el" struct_path: tuple[str, ...] = target.leaf + case MapPath(): + # The map's values are iterated like an array element, so field + # references use the element accessor (`v["foo"]`) under the + # projected variable. + in_array = True + var = _map_iter_var(target.projection) + struct_path = target.leaf case _: in_array = False var, struct_path = "el", () @@ -440,10 +534,11 @@ def _cols_and_names() -> tuple[str, str]: inner_expr = f"{fn}({cols_list}, {names_list})" case RequireIf() | ForbidIf(): target_name = desc.field_names[0] + parsed = require_field_eq(desc.condition) condition_expr = _render_condition( - desc.condition, in_array=in_array, var=var + parsed, in_array=in_array, struct_path=struct_path, var=var ) - condition_desc = _render_condition_desc(desc.condition) + condition_desc = _render_condition_desc(parsed) target_ref = _field_ref(target_name) inner_expr = ( f"{fn}({target_ref}, {condition_expr}, {py_literal(condition_desc)})" @@ -470,6 +565,16 @@ def _cols_and_names() -> tuple[str, str]: expr = _wrap_in_array_iteration( target.column_path, target.iter_struct_paths, inner_expr ) + elif isinstance(target, MapPath): + # A `dict[K, Model]` value-model constraint wraps in map_values_check + # (or map_keys_check), iterating the projected values like an array. + # check_builder zeros the gate for iterated containers, so no gate + # reaches here. + assert check.gate is None, ( + f"ModelCheck gate={check.gate!r} paired with MapPath target={target!r}; " + f"map iteration handles value nullability, so a gate is unexpected" + ) + expr = _wrap_in_map_iteration(target, inner_expr) else: assert check.gate is None, ( f"ModelCheck gate={check.gate!r} paired with non-ArrayPath target={target!r}; " @@ -479,9 +584,9 @@ def _cols_and_names() -> tuple[str, str]: return _check_function_context( target=target, - func_name=_model_check_func_name(check, idx), - field=model_constraint_field_label(check, label_suffix), - name=check_name(fn), + func_name=_model_check_func_name(check, row.idx), + field=row.label, + name=row.name, expr=expr, ) @@ -526,6 +631,8 @@ def _pattern_imports_for(target: FieldPath) -> set[str]: if target.iter_struct_paths: names.add("nested_array_check") return names + case MapPath(): + return {map_runtime_helper(target.projection)} case _: return set() @@ -576,22 +683,8 @@ def _field_check_function_entries( field_checks: list[Check], ) -> list[dict[str, object]]: """Build template contexts for field-level checks.""" - descriptor_refs: list[tuple[Check, int]] = [] - raw_names: list[str] = [] - for check in field_checks: - labeled = field_label(check) - multi = len(check.descriptors) > 1 - for desc_idx, desc in enumerate(check.descriptors): - suffix = f"_{check_name(desc.function, desc.check_name)}" if multi else "" - raw_names.append(f"_{_sanitize_field_name(labeled)}{suffix}_check") - descriptor_refs.append((check, desc_idx)) - - func_names = disambiguate(raw_names) return [ - _render_check_function_context(check, func_name, desc_idx) - for (check, desc_idx), func_name in zip( - descriptor_refs, func_names, strict=True - ) + _render_check_function_context(row) for row in field_check_rows(field_checks) ] @@ -599,15 +692,14 @@ def _model_check_function_entries( model_checks: list[ModelCheck], ) -> list[dict[str, object]]: """Build template contexts for model-level checks.""" - label_suffixes = compute_label_suffixes(model_checks) return [ - _render_model_constraint_function_context(mc, idx, label_suffixes[idx]) - for idx, mc in enumerate(model_checks) + _render_model_constraint_function_context(row) + for row in model_check_rows(model_checks) ] -def render_feature_module( - feature_name: str, +def render_model_module( + model_name: str, field_checks: list[Check], model_checks: list[ModelCheck], schema_fields: list[SchemaField], @@ -616,7 +708,7 @@ def render_feature_module( entry_point: str = "tests.placeholder:Placeholder", partitions: Mapping[str, str] | None = None, ) -> str: - """Render a complete Python module for a feature's checks and schema.""" + """Render a complete Python module for a model's checks and schema.""" constraint_expr_fns = sorted( _collect_constraint_expr_imports(field_checks, model_checks) ) @@ -634,19 +726,19 @@ def render_feature_module( field_checks ) + _model_check_function_entries(model_checks) - feature_title = feature_name.replace("_", " ").title() + model_title = model_name.replace("_", " ").title() - template = jinja_env().get_template("feature_module.py.jinja2") + template = jinja_env().get_template("model_module.py.jinja2") return template.render( - feature_name=feature_name, - feature_title=feature_title, + model_name=model_name, + model_title=model_title, constraint_expr_fns=constraint_expr_fns, column_pattern_fns=column_pattern_fns, spark_types=spark_types, schema_struct_refs=schema_struct_refs, geometry_type=geometry_type, check_functions=check_functions, - schema_const_name=f"{feature_name.upper()}_SCHEMA", + schema_const_name=schema_const_name(model_name), schema_fields=schema_fields, geometry_types_literal=geometry_types_literal, entry_point=entry_point, diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py index 194119145..00a999ca8 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py @@ -1,9 +1,8 @@ -"""Build StructType schema source from FeatureSpec field trees.""" +"""Build StructType schema source from ModelSpec field trees.""" from __future__ import annotations from dataclasses import dataclass -from enum import Enum from ..extraction.field import ( AnyScalar, @@ -17,8 +16,8 @@ Scalar, UnionRef, ) -from ..extraction.field_walk import terminal_scalar -from ..extraction.specs import FeatureSpec, FieldSpec, UnionSpec +from ..extraction.field_walk import enum_source, terminal_scalar +from ..extraction.specs import FieldSpec, ModelSpec, UnionSpec from ..extraction.type_registry import get_type_mapping __all__ = [ @@ -83,11 +82,7 @@ def _spark_for_scalar(scalar: Scalar) -> str: return _STRING_FALLBACK if scalar.base_type in SHARED_TYPE_REFS: return SHARED_TYPE_REFS[scalar.base_type] - if ( - scalar.source_type is not None - and isinstance(scalar.source_type, type) - and issubclass(scalar.source_type, Enum) - ): + if enum_source(scalar) is not None: return _STRING_FALLBACK return _spark_for_base(scalar.base_type, scalar.source_type) @@ -156,7 +151,7 @@ def _shape_to_spark(shape: FieldShape) -> str: raise TypeError(f"Unhandled FieldShape: {shape!r}") -def build_schema(spec: FeatureSpec) -> list[SchemaField]: +def build_schema(spec: ModelSpec) -> list[SchemaField]: """Build schema fields for a feature spec. Walks the field tree and maps types to Spark type expressions. diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 index 8c15ed9d9..078f02b97 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 @@ -5,6 +5,6 @@ def {{ c.func_name }}() -> Check: name="{{ c.check_name }}", expr={{ c.expr }}, shape=CheckShape.{{ c.shape }}, - root_field={{ c.root_field | py_literal }}, + read_columns={{ c.read_columns | py_literal }}, ) {% endmacro %} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/feature_module.py.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/model_module.py.jinja2 similarity index 83% rename from packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/feature_module.py.jinja2 rename to packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/model_module.py.jinja2 index 1a28d39b2..8c91646c4 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/feature_module.py.jinja2 +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/model_module.py.jinja2 @@ -1,7 +1,7 @@ {% from '_check_function.py.jinja2' import check_function -%} # This file is auto-generated by overture-schema-codegen. Do not edit. -"""{{ feature_title }} validation expression builders.""" +"""{{ model_title }} validation expression builders.""" from __future__ import annotations @@ -17,7 +17,7 @@ from pyspark.sql.types import ( from overture.schema.system.primitive import GeometryType {% endif %} -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation {% if schema_struct_refs %} from overture.schema.pyspark.expressions._schema_structs import ( {% for r in schema_struct_refs %} @@ -45,8 +45,8 @@ from overture.schema.pyspark.expressions.constraint_expressions import ( {{ check_function(c) }} {% endfor %} -def {{ feature_name }}_checks() -> list[Check]: - """All validation checks for {{ feature_name }}.""" +def {{ model_name }}_checks() -> list[Check]: + """All validation checks for {{ model_name }}.""" {% if check_functions %} return [ {% for c in check_functions %} @@ -74,9 +74,9 @@ ENTRY_POINT = "{{ entry_point }}" PARTITIONS: dict[str, str] = {{ partitions | py_literal }} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema={{ schema_const_name }}, - checks={{ feature_name }}_checks, + checks={{ model_name }}_checks, {%- if geometry_types_literal %} geometry_types=GEOMETRY_TYPES, {%- endif %} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 index c69f146a8..9ce436c6a 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 @@ -1,13 +1,13 @@ # Auto-generated — do not edit. -"""Generated conformance tests for {{ feature_name }}.""" +"""Generated conformance tests for {{ model_name }}.""" from __future__ import annotations import pytest from {{ expression_import }} import ( - {{ feature_name | upper }}_SCHEMA, - {{ feature_name }}_checks, + {{ model_name | upper }}_SCHEMA, + {{ model_name }}_checks, ) from pyspark.sql import SparkSession @@ -42,7 +42,7 @@ SCENARIOS: list[Scenario] = [ @pytest.fixture(scope="module") def checks() -> list: - return {{ feature_name }}_checks() + return {{ model_name }}_checks() @pytest.fixture(scope="module") @@ -53,7 +53,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="{{ feature_name }}", + model_name="{{ model_name }}", ) @@ -65,7 +65,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="{{ feature_name }}", + model_name="{{ model_name }}", ) @@ -78,7 +78,7 @@ def test_baseline_sparse(sparse_results: ValidationResults) -> None: for required-only fields), the baseline fails here before any scenario runs. """ - baseline = sparse_results.violations.get("{{ feature_name }}::baseline", set()) + baseline = sparse_results.violations.get("{{ model_name }}::baseline", set()) assert baseline == set(), f"Sparse baseline has violations: {baseline}" @@ -89,7 +89,7 @@ def test_baseline_populated(populated_results: ValidationResults) -> None: filled, exercising codegen paths that only fire when a value is present. """ - baseline = populated_results.violations.get("{{ feature_name }}::baseline", set()) + baseline = populated_results.violations.get("{{ model_name }}::baseline", set()) assert baseline == set(), f"Populated baseline has violations: {baseline}" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py index 6af5b0855..29ad6db24 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py @@ -1,7 +1,7 @@ """Generate valid base rows for the rendered conformance tests. `generate_base_row` produces a minimal valid row (required fields only) -from a `FeatureSpec`. `generate_populated_row` produces a fully +from a `ModelSpec`. `generate_populated_row` produces a fully populated row including optional fields. `generate_arm_rows` and `generate_populated_arm_rows` do the same for each arm of a discriminated union. @@ -15,19 +15,7 @@ from typing import Any from overture.schema.common.scoping.lr import LinearReferenceRangeConstraint -from overture.schema.system.field_constraint.string import ( - CountryCodeAlpha2Constraint, - HexColorConstraint, - JsonPointerConstraint, - LanguageTagConstraint, - PhoneNumberConstraint, - RegionCodeConstraint, - SnakeCaseConstraint, - StrippedConstraint, - WikidataIdConstraint, -) from overture.schema.system.model_constraint import ( - FieldEqCondition, ForbidIfConstraint, MinFieldsSetConstraint, RadioGroupConstraint, @@ -52,11 +40,24 @@ Primitive, UnionRef, ) -from ...extraction.field_walk import has_array_layer, terminal_primitive +from ...extraction.field_walk import ( + enum_source, + has_array_layer, + terminal_primitive, + terminal_scalar, +) from ...extraction.length_constraints import ArrayMinLen -from ...extraction.specs import FeatureSpec, FieldSpec, ModelSpec, UnionSpec +from ...extraction.specs import FieldSpec, ModelSpec, RecordSpec, UnionSpec +from ...extraction.type_registry import primitive_spark_category +from .._render_common import require_field_eq from ..constraint_dispatch import ExpressionDescriptor, dispatch_constraint from ..schema_builder import spark_type_rank +from .constraint_values import ( + CONSTRAINT_VALUES, + curated_pattern_values, + uncurated_pattern_error, + valid_bound, +) __all__ = [ "generate_arm_rows", @@ -115,7 +116,7 @@ def _is_geometry_terminal(terminal: Primitive) -> bool: return terminal.source_type is Geometry -def generate_base_row(spec: FeatureSpec, *, index: int = 0) -> dict[str, Any]: +def generate_base_row(spec: ModelSpec, *, index: int = 0) -> dict[str, Any]: """Produce a minimal valid row from a feature spec (required fields only). The row passes `TypeAdapter(validation_type).validate_python()`. @@ -131,7 +132,7 @@ def generate_base_row(spec: FeatureSpec, *, index: int = 0) -> dict[str, Any]: return _build_row(spec, index=index, populate_optional=False) -def generate_populated_row(spec: FeatureSpec, *, index: int = 0) -> dict[str, Any]: +def generate_populated_row(spec: ModelSpec, *, index: int = 0) -> dict[str, Any]: """Produce a fully populated valid row (all fields, including optional). Sub-models are recursively populated. @@ -147,7 +148,7 @@ def generate_populated_row(spec: FeatureSpec, *, index: int = 0) -> dict[str, An return _build_row(spec, index=index, populate_optional=True) -def generate_arm_rows(spec: FeatureSpec) -> dict[str, dict[str, Any]]: +def generate_arm_rows(spec: ModelSpec) -> dict[str, dict[str, Any]]: """Produce one minimal valid row per discriminator arm of a union. Returns `{arm_value: row}` where each row passes TypeAdapter @@ -162,7 +163,7 @@ def generate_arm_rows(spec: FeatureSpec) -> dict[str, dict[str, Any]]: def generate_populated_arm_rows( - spec: FeatureSpec, + spec: ModelSpec, ) -> dict[str, dict[str, Any]]: """Produce one fully populated valid row per discriminator arm. @@ -177,7 +178,7 @@ def generate_populated_arm_rows( return _build_arm_rows(_require_union(spec), populate_optional=True) -def _require_union(spec: FeatureSpec) -> UnionSpec: +def _require_union(spec: ModelSpec) -> UnionSpec: if not isinstance(spec, UnionSpec): raise TypeError( f"Expected a UnionSpec, got {type(spec).__name__}: {spec.name!r}" @@ -186,7 +187,7 @@ def _require_union(spec: FeatureSpec) -> UnionSpec: def _build_row( - spec: FeatureSpec, + spec: ModelSpec, *, index: int = 0, populate_optional: bool, @@ -235,16 +236,28 @@ def _build_arm_rows( def _row_satisfies_condition(row: dict[str, Any], condition: object) -> bool: - """Check whether a FieldEqCondition is satisfied by the row's current values.""" - if not isinstance(condition, FieldEqCondition): - return False - cond_value = condition.value + """Check whether the condition is satisfied by the row's current values. + + Handles `FieldEqCondition` and `Not(FieldEqCondition)`. Raises + `TypeError` for any other condition kind so new condition types fail + loudly rather than silently returning an incorrect result. + + Parameters + ---------- + row + Current row dict being built. + condition + A `Condition` from a `RequireIfConstraint` or `ForbidIfConstraint`. + """ + field_eq = require_field_eq(condition) # type: ignore[arg-type] + cond_value = field_eq.value if isinstance(cond_value, Enum): cond_value = cond_value.value - return row.get(condition.field_name) == cond_value + matches = row.get(field_eq.field_name) == cond_value + return matches != field_eq.negated -def _satisfy_model_constraints(row: dict[str, Any], spec: FeatureSpec) -> None: +def _satisfy_model_constraints(row: dict[str, Any], spec: ModelSpec) -> None: """Adjust *row* so each model constraint is satisfied. `require_if`/`radio_group`/`require_any_of`/`min_fields_set` fill in @@ -364,7 +377,7 @@ def value_for_field( ) -def _widest_union_member(union: UnionSpec) -> ModelSpec: +def _widest_union_member(union: UnionSpec) -> RecordSpec: """Pick the union member whose fields have the highest cumulative Spark type rank. When multiple union members share a field name with different numeric @@ -388,7 +401,7 @@ def _widest_union_member(union: UnionSpec) -> ModelSpec: def _row_from_model_spec( - spec: ModelSpec, + spec: RecordSpec, *, index: int = 0, populate_optional: bool = False, @@ -446,17 +459,29 @@ def _value_for_shape( populate_optional=populate_optional, ) - case MapOf(): - return {} + case MapOf(key=key_shape, value=value_shape): + # One constraint-valid entry: an empty map satisfies Pydantic + # but leaves nothing for a conformance scenario to corrupt, so + # the key/value checks would never fire. A `dict[K, Any]` value + # (e.g. Infrastructure.source_tags) carries no constraint -- and + # thus no check -- and `Any` has no value strategy, so the map + # stays empty: there is nothing to validate or corrupt. + if isinstance(terminal_scalar(value_shape), AnyScalar): + return {} + map_key = _value_for_shape( + key_shape, index=index, populate_optional=populate_optional + ) + map_value = _value_for_shape( + value_shape, index=index, populate_optional=populate_optional + ) + return {map_key: map_value} case LiteralScalar(values=values): val = values[0] return val.value if isinstance(val, Enum) else val - case Primitive(source_type=cls) if ( - cls is not None and isinstance(cls, type) and issubclass(cls, Enum) - ): - return list(cls)[0].value # type: ignore[call-overload] + case Primitive() as p if (enum_cls := enum_source(p)) is not None: + return list(enum_cls)[0].value case ModelRef(model=m): return _row_from_model_spec( @@ -476,8 +501,10 @@ def _value_for_shape( ) case AnyScalar(): - # Unreachable today: the only `AnyScalar` is a `MapOf` value - # type, and the `MapOf` case returns `{}` without descending. + # No value strategy exists for `Any`. The map walk descends + # into key/value shapes, so a `dict[K, Any]` value would reach + # here -- no schema declares one today, and this raises loudly + # rather than guess a value if one ever appears. raise TypeError( "AnyScalar reached base-row generation; no value strategy exists" ) @@ -498,18 +525,6 @@ def _value_for_shape( raise TypeError(f"Unhandled FieldShape: {shape!r}") -def _value_from_check_bounds( - desc: ExpressionDescriptor, scalar: Primitive, cs: ConstraintSource -) -> object | None: - # Skip structural bounds from numeric primitive NewTypes (int32, uint8, ...). - # Those bounds match Spark/Parquet types structurally -- the type system - # already enforces the range. Only semantic bounds (from field-level - # constraints or semantic NewTypes like FeatureVersion) produce values. - if cs.source_name == scalar.base_type: - return None - return _valid_bound_for_base_row(desc) - - def _value_from_check_enum( desc: ExpressionDescriptor, _scalar: Primitive, _cs: ConstraintSource ) -> object: @@ -524,57 +539,85 @@ def _value_from_check_string_min_length( return "a" +def _value_from_check_pattern( + desc: ExpressionDescriptor, _scalar: Primitive, _cs: ConstraintSource +) -> object: + """Return a pattern-matching value for a curated raw pydantic pattern. + + Only raw `Field(pattern=)` constraints reach here -- named + `PatternConstraint` subclasses resolve earlier via `CONSTRAINT_VALUES`. + An uncurated pattern fails loud, symmetrically with `invalid_value`: + matching strings can't be generated generically, and silently falling + back to the primitive default would emit a row that fails the pattern, + surfacing later as a misleading "row should be valid" Pydantic error. + + Raises + ------ + ValueError + When the pattern has no curated entry in `PATTERN_VALUES`. + """ + curated = curated_pattern_values(desc) + if curated is None: + raise uncurated_pattern_error(desc, side="valid") + return curated.valid + + # Builders for descriptor-driven values, keyed by `ExpressionDescriptor.function`. -# Functions absent from this table are intentionally skipped -- notably -# `check_pattern`, since matching strings can't be generated generically. +# `check_bounds` is intentionally absent: it is routed through +# `_value_from_scalar_constraints` to merge multiple bound descriptors (e.g. +# separate Gt + Lt) before calling `valid_bound` once with the combined kwargs, +# so a single-bound path never silently produces a value that violates a second +# bound on the same field. +# `check_pattern` only yields a value for a curated raw pydantic pattern; +# named pattern constraints resolve earlier via `CONSTRAINT_VALUES`. _DESCRIPTOR_VALUE_BUILDERS: dict[ str, Callable[[ExpressionDescriptor, Primitive, ConstraintSource], object | None] ] = { "check_enum": _value_from_check_enum, - "check_bounds": _value_from_check_bounds, "check_string_min_length": _value_from_check_string_min_length, + "check_pattern": _value_from_check_pattern, } -_CONSTRAINT_VALID_VALUES: dict[type, object] = { - CountryCodeAlpha2Constraint: "US", - HexColorConstraint: "#aabbcc", - JsonPointerConstraint: "/valid/pointer", - LanguageTagConstraint: "en", - PhoneNumberConstraint: "+1 555-555-5555", - RegionCodeConstraint: "US-CA", - SnakeCaseConstraint: "snake_case", - StrippedConstraint: "clean", - WikidataIdConstraint: "Q42", -} - _CONSTRAINT_VALID_LIST_VALUES: dict[type, list[object]] = { LinearReferenceRangeConstraint: [0.0, 1.0], } def _value_from_scalar_constraints(scalar: Primitive) -> object | None: - """Return a value satisfying the first dispatched constraint. + """Return a value satisfying all dispatched constraints on a scalar. - Maps known constraint types to valid values directly, then dispatches - remaining constraints through `_DESCRIPTOR_VALUE_BUILDERS` keyed on - the `ExpressionDescriptor` function name. Assumes constraints on a - single field don't conflict; no schema today mixes constraints in a - way that would expose a conflict. + Maps known constraint types to valid values directly. For `check_bounds` + descriptors, merges all bound kwargs from every constraint on the field + into one dict and calls `valid_bound` once, so a field carrying separate + `Gt`/`Lt` constraints (two `check_bounds` descriptors) gets a value + satisfying both bounds. Non-bounds constraints use first-match behavior. """ + merged_bounds: dict[str, object] = {} for cs in scalar.constraints: constraint_type = type(cs.constraint) - if constraint_type in _CONSTRAINT_VALID_VALUES: - return _CONSTRAINT_VALID_VALUES[constraint_type] + if constraint_type in CONSTRAINT_VALUES: + return CONSTRAINT_VALUES[constraint_type].valid desc = dispatch_constraint(cs.constraint, base_type=scalar.base_type) if desc is None: continue + if desc.function == "check_bounds": + # Skip structural bounds from numeric NewType ranges — those are + # enforced by the Spark/Parquet type system, not by field constraints. + if cs.source_name != scalar.base_type: + merged_bounds.update(desc.kwargs) + continue builder = _DESCRIPTOR_VALUE_BUILDERS.get(desc.function) if builder is None: continue val = builder(desc, scalar, cs) if val is not None: return val + if merged_bounds: + merged_desc = ExpressionDescriptor( + function="check_bounds", kwargs=tuple(merged_bounds.items()) + ) + return valid_bound(merged_desc) return None @@ -604,32 +647,16 @@ def _min_length_from_shape_constraints( return 1 -def _valid_bound_for_base_row(desc: ExpressionDescriptor) -> object: - """Produce a value satisfying a bounds check for base row generation.""" - kwargs = dict(desc.kwargs) - if "ge" in kwargs: - return kwargs["ge"] - if "gt" in kwargs: - return kwargs["gt"] + 1 # type: ignore[operator] - if "le" in kwargs: - return kwargs["le"] - if "lt" in kwargs: - return kwargs["lt"] - 1 # type: ignore[operator] - return 0 - - def _primitive_default(base_type: str) -> object: """Return a type-appropriate default for a primitive base_type.""" explicit = _PRIMITIVE_DEFAULTS.get(base_type) if explicit is not None: return explicit - # Numeric types: match prefixes like int32, uint8, float64, double - lower = base_type.lower() - if lower.startswith(("float", "double")): + category = primitive_spark_category(base_type) + if category == "float": return 0.0 - if lower.startswith(("int", "uint")): + if category == "int": return 0 - # Fallback for string-like types return "" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/constraint_values.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/constraint_values.py new file mode 100644 index 000000000..6a15a070e --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/constraint_values.py @@ -0,0 +1,203 @@ +"""Paired valid/invalid value generation for string-constraint and numeric-bound checks. + +Each entry in `CONSTRAINT_VALUES` carries both sides of the pair: +`valid` is accepted by the constraint; `invalid` violates it. +Both sides are mandatory — partial entries are not allowed. + +Consumed by `base_row` (uses the `valid` side to produce valid base rows) +and `invalid_value` (uses the `invalid` side to produce scenario mutations). + +`valid_bound` and `invalid_bound` are analogous functions for numeric +bound descriptors, placed here so both sides of every constraint kind +live in one module. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + HexColorConstraint, + JsonPointerConstraint, + LanguageTagConstraint, + NoWhitespaceConstraint, + PhoneNumberConstraint, + RegionCodeConstraint, + SnakeCaseConstraint, + StrippedConstraint, + WikidataIdConstraint, +) + +from ..constraint_dispatch import ExpressionDescriptor, normalize_anchor + +__all__ = [ + "CONSTRAINT_VALUES", + "PATTERN_VALUES", + "ConstraintValues", + "curated_pattern_values", + "invalid_bound", + "uncurated_pattern_error", + "valid_bound", +] + + +@dataclass(frozen=True, slots=True) +class ConstraintValues: + """A paired valid/invalid value for one constraint type.""" + + valid: object + invalid: object + + +CONSTRAINT_VALUES: dict[type, ConstraintValues] = { + CountryCodeAlpha2Constraint: ConstraintValues(valid="US", invalid="99"), + HexColorConstraint: ConstraintValues(valid="#aabbcc", invalid="not-hex"), + JsonPointerConstraint: ConstraintValues(valid="/valid/pointer", invalid="no-slash"), + LanguageTagConstraint: ConstraintValues(valid="en", invalid="123"), + NoWhitespaceConstraint: ConstraintValues( + valid="nowhitespace", invalid="has whitespace" + ), + PhoneNumberConstraint: ConstraintValues( + valid="+1 555-555-5555", invalid="1234567890" + ), + RegionCodeConstraint: ConstraintValues(valid="US-CA", invalid="99-999"), + SnakeCaseConstraint: ConstraintValues(valid="snake_case", invalid="HAS SPACES"), + StrippedConstraint: ConstraintValues(valid="clean", invalid=" has spaces "), + WikidataIdConstraint: ConstraintValues(valid="Q42", invalid="P999"), +} + + +# Curated valid/invalid pairs for fields whose only string constraint is a +# raw pydantic `Field(pattern=...)` (a `_PydanticGeneralMetadata`, not a +# schema constraint class -- so it has no `CONSTRAINT_VALUES` type key). +# Keyed by the anchor-normalized pattern that lands in the generated +# `check_pattern` descriptor's `args`, so both `base_row` and +# `invalid_value` look it up via `desc.args[0]`. An uncurated raw pattern +# fails loud on both sides rather than guessing a value. +# +# Generation-principle gap: this table is hand-maintained and keyed by the +# literal regex, so it drifts from the schema -- a renamed or retuned +# `Field(pattern=)` silently loses its entry until the next regeneration +# fails loud. The principled fix is to derive both sides from the regex +# itself (e.g. a matching/non-matching string generator), removing the +# hand-keyed table entirely. Out of scope here; tracked separately. +PATTERN_VALUES: dict[str, ConstraintValues] = { + # Sources.license_priority key (LicenseShortname): `^[A-Za-z0-9._+\-]+$`. + normalize_anchor(r"^[A-Za-z0-9._+\-]+$"): ConstraintValues( + valid="ODbL-1.0", invalid="bad license!" + ), +} + + +def curated_pattern_values(desc: ExpressionDescriptor) -> ConstraintValues | None: + """Curated valid/invalid pair for a raw-pattern `check_pattern` descriptor. + + The pattern key is the descriptor's first arg (the anchor-normalized + regex). Returns None when the pattern is not curated in `PATTERN_VALUES` + -- named constraints resolve via `CONSTRAINT_VALUES` instead, and an + uncurated raw pattern has no values. + """ + pattern = desc.args[0] if desc.args else None + if isinstance(pattern, str): + return PATTERN_VALUES.get(pattern) + return None + + +def uncurated_pattern_error(desc: ExpressionDescriptor, *, side: str) -> ValueError: + """Build the error for a `check_pattern` descriptor with no curated value. + + Raised symmetrically by `base_row` (valid side) and `invalid_value` + (invalid side) when a raw `Field(pattern=)` has no `PATTERN_VALUES` + entry: both name the table to update rather than guessing a value. + + Parameters + ---------- + desc + The uncurated `check_pattern` descriptor. + side + Which value could not be produced -- `"valid"` or `"invalid"`. + """ + return ValueError( + f"No {side} value defined for check_pattern with " + f"constraint_type={desc.constraint_type!r}, pattern={desc.args!r}. " + "Add an entry to CONSTRAINT_VALUES (named constraint) or " + "PATTERN_VALUES (raw pydantic pattern) in constraint_values.py." + ) + + +def valid_bound(desc: ExpressionDescriptor) -> object: + """Produce a value satisfying a bounds check for base row generation. + + Prefers inclusive boundaries: if `ge` is present it is already a valid + value; if `le` is present and `ge` is absent, `le` is valid. When only + exclusive bounds remain, a strictly-interior value is computed: midpoint + for both-exclusive, or a type-aware step away from a single bound. + + Parameters + ---------- + desc + A `check_bounds` descriptor with at least one bound kwarg. + + Returns + ------- + object + A value on the valid side of all bounds. Falls back to `0` when + no recognised bound key is present. + """ + kwargs = dict(desc.kwargs) + if "ge" in kwargs: + return kwargs["ge"] + if "le" in kwargs: + return kwargs["le"] + gt = kwargs.get("gt") + lt = kwargs.get("lt") + if gt is not None and lt is not None: + # Midpoint: integer midpoint for int bounds, float midpoint for float. + if isinstance(gt, float) or isinstance(lt, float): + return (float(gt) + float(lt)) / 2.0 # type: ignore[arg-type,operator] + mid = (gt + lt) // 2 # type: ignore[operator] + if not (gt < mid < lt): # type: ignore[operator] + raise ValueError( + f"No valid integer strictly between gt={gt!r} and lt={lt!r}" + ) + return mid + if gt is not None: + step: object = 1.0 if isinstance(gt, float) else 1 + return gt + step # type: ignore[operator] + if lt is not None: + step = 1.0 if isinstance(lt, float) else 1 + return lt - step # type: ignore[operator] + return 0 + + +def invalid_bound(desc: ExpressionDescriptor) -> object: + """Produce a value violating a bounds check for invalid-value generation. + + The `ge` / `le` branches return one below / above the bound. For + `ge=0` this returns `-1`, which violates the bound but would also + underflow an unsigned base type. No schema today combines `ge=0` with + an unsigned terminal -- if that ever changes, the caller will need to + consult the base type and pick a sentinel (e.g. a string or null) for + the violating value. + + Parameters + ---------- + desc + A `check_bounds` descriptor with at least one bound kwarg. + + Raises + ------ + ValueError + When no recognised bound key is found. + """ + kwargs = dict(desc.kwargs) + if "ge" in kwargs: + return kwargs["ge"] - 1 # type: ignore[operator] + if "gt" in kwargs: + return kwargs["gt"] + if "le" in kwargs: + return kwargs["le"] + 1 # type: ignore[operator] + if "lt" in kwargs: + return kwargs["lt"] + raise ValueError(f"No recognised bound key in kwargs: {kwargs!r}") diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py index 055cb2c51..e21812a84 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py @@ -7,19 +7,15 @@ from __future__ import annotations -from overture.schema.system.field_constraint.string import ( - CountryCodeAlpha2Constraint, - HexColorConstraint, - LanguageTagConstraint, - NoWhitespaceConstraint, - PhoneNumberConstraint, - RegionCodeConstraint, - SnakeCaseConstraint, - WikidataIdConstraint, -) from overture.schema.system.primitive.geom import GeometryType from ..constraint_dispatch import ExpressionDescriptor +from .constraint_values import ( + CONSTRAINT_VALUES, + curated_pattern_values, + invalid_bound, + uncurated_pattern_error, +) __all__ = ["invalid_value"] @@ -31,28 +27,15 @@ ) -# Pattern-constraint -> sample value that violates the pattern. -# Used by `check_pattern` whose constraint_type identifies which validator. -_INVALID_PATTERN_VALUES: dict[type, str] = { - NoWhitespaceConstraint: "has whitespace", - CountryCodeAlpha2Constraint: "99", - RegionCodeConstraint: "99-999", - SnakeCaseConstraint: "HAS SPACES", - PhoneNumberConstraint: "1234567890", - WikidataIdConstraint: "P999", - HexColorConstraint: "not-hex", - LanguageTagConstraint: "123", -} - # Direct lookup: check function name -> invalid value (no descriptor inspection). +# Reserved for checks with no associated constraint type (url/email, linear_range, +# bbox, required, enum, and min-length literals). _INVALID_LITERALS: dict[str, object] = { "check_required": None, "check_enum": "__INVALID__", "check_url_format": "not-a-url", "check_url_length": "https://" + "x" * 2076, "check_email": "not-an-email", - "check_stripped": " has spaces ", - "check_json_pointer": "no-slash", "check_array_min_length": [], "check_string_min_length": "", "check_linear_range_length": [0.5], @@ -75,16 +58,24 @@ def invalid_value(desc: ExpressionDescriptor) -> object: Raises ------ ValueError - For unrecognised check function names or when all geometry candidates + For unrecognised check function names, unknown `constraint_type` + on `check_pattern` descriptors, or when all geometry candidates are in the allowed set. """ fn = desc.function + # Constraint-type lookup precedes function-name lookup: any type present in + # CONSTRAINT_VALUES resolves via the table even when its check function also + # appears in _INVALID_LITERALS (e.g. check_stripped, check_json_pointer). + if desc.constraint_type in CONSTRAINT_VALUES: + return CONSTRAINT_VALUES[desc.constraint_type].invalid if fn in _INVALID_LITERALS: return _INVALID_LITERALS[fn] if fn == "check_bounds": - return _invalid_bound(desc) + return invalid_bound(desc) if fn == "check_pattern": - return _INVALID_PATTERN_VALUES.get(desc.constraint_type, "!!!INVALID!!!") # type: ignore[arg-type] + if (curated := curated_pattern_values(desc)) is not None: + return curated.invalid + raise uncurated_pattern_error(desc, side="invalid") if fn == "check_array_max_length": max_len = int(desc.args[0]) # type: ignore[call-overload] return [{}] * (max_len + 1) @@ -96,28 +87,6 @@ def invalid_value(desc: ExpressionDescriptor) -> object: raise ValueError(f"No invalid value defined for check function: {fn!r}") -def _invalid_bound(desc: ExpressionDescriptor) -> object: - """Produce a value violating a bounds check for invalid-value generation. - - The `ge` / `le` branches return one below / above the bound. For - `ge=0` this returns `-1`, which violates the bound but would also - underflow an unsigned base type. No schema today combines `ge=0` with - an unsigned terminal -- if that ever changes, the caller will need to - consult the base type and pick a sentinel (e.g. a string or null) for - the violating value. - """ - kwargs = dict(desc.kwargs) - if "ge" in kwargs: - return kwargs["ge"] - 1 # type: ignore[operator] - if "gt" in kwargs: - return kwargs["gt"] - if "le" in kwargs: - return kwargs["le"] + 1 # type: ignore[operator] - if "lt" in kwargs: - return kwargs["lt"] - raise ValueError(f"No recognised bound key in kwargs: {kwargs!r}") - - def _invalid_geometry(desc: ExpressionDescriptor) -> str: allowed = set(desc.args) for geom_type, wkt in _INVALID_GEOMETRY_CANDIDATES: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py index d78cf3c43..040eca462 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py @@ -16,11 +16,11 @@ ArrayPath, ArraySegment, FieldPath, - PathSegment, + FieldSegment, ) from ...extraction.field_walk import has_array_layer, list_depth, terminal_model_ref -from ...extraction.specs import FeatureSpec, FieldSpec +from ...extraction.specs import FieldSpec, ModelSpec from ..check_ir import ( Check, ElementGuard, @@ -52,7 +52,7 @@ def _find_field_spec(fields: list[FieldSpec], name: str) -> FieldSpec | None: return None -def leaf_list_depth(field_path: FieldPath, spec: FeatureSpec) -> int: +def leaf_list_depth(field_path: FieldPath, spec: ModelSpec) -> int: """Return the unaccounted-for list depth of the leaf field. Walks the spec's field tree along *field_path* and returns the @@ -96,14 +96,20 @@ def _required_siblings( def _walk_to_target( - segments: tuple[PathSegment, ...], + segments: tuple[FieldSegment, ...], fields: list[FieldSpec], spec_name: str, *, discriminator: _ElementDiscriminator | None, current_depth: int = 0, ) -> dict[str, Any]: - """Recursively build the scaffold dict along the path segments.""" + """Recursively build the scaffold dict along the path segments. + + Accepts any `FieldSegment`: struct steps recurse, an `ArraySegment` + wraps its inner value in lists, and a trailing `MapSegment` resolves + via `value_for_field` (which populates the map with a valid entry), + so a `MapPath` target scaffolds the same way as a struct terminal. + """ if not segments: return {} @@ -185,7 +191,7 @@ def _element_discriminator(check: Check) -> _ElementDiscriminator | None: return None -def generate_scaffold(check: Check, spec: FeatureSpec) -> dict[str, Any]: +def generate_scaffold(check: Check, spec: ModelSpec) -> dict[str, Any]: """Build a sparse dict from null to the target field of a Check.""" segments = check.target.segments if not segments: @@ -206,7 +212,7 @@ def generate_scaffold(check: Check, spec: FeatureSpec) -> dict[str, Any]: ) -def generate_model_scaffold(check: ModelCheck, spec: FeatureSpec) -> dict[str, Any]: +def generate_model_scaffold(check: ModelCheck, spec: ModelSpec) -> dict[str, Any]: """Build a sparse dict for a model-level check's nesting structure. Only top-level array columns are supported -- a `ScalarPath` target diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py index bd933fb20..d027699df 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py @@ -6,20 +6,20 @@ from typing_extensions import assert_never -from overture.schema.system.field_path import ArrayPath +from overture.schema.system.field_path import ArrayPath, MapPath, MapProjection -from ..extraction.field import FieldShape -from ..extraction.field_walk import has_array_layer -from ..extraction.specs import FeatureSpec +from ..extraction.field import FieldShape, Primitive +from ..extraction.field_walk import has_array_layer, terminal_of +from ..extraction.specs import ModelSpec +from ..extraction.type_registry import primitive_spark_category from ._render_common import ( - check_name, - compute_label_suffixes, disambiguate, - field_label, + field_check_rows, jinja_env, - model_constraint_field_label, + model_check_rows, parse_field_eq, py_literal, + schema_const_name, ) from .check_ir import ( Check, @@ -70,7 +70,7 @@ def _model_check_belongs_to_arm(check: ModelCheck, arm: str) -> bool: def render_test_module( - feature_name: str, + model_name: str, field_checks: list[Check], model_checks: list[ModelCheck], *, @@ -79,24 +79,28 @@ def render_test_module( base_row_sparse: dict[str, Any] | None = None, base_row_populated: dict[str, Any] | None = None, arm: str | None = None, - spec: FeatureSpec | None = None, + spec: ModelSpec | None = None, ) -> str: - """Render a complete pytest test file for a feature's validation checks. + """Render a complete pytest test file for a model's validation checks. Arm filtering uses two complementary signals. A field check's `ColumnGuard`s identify the arms it belongs to. A model check's `arm` attribute is set for member-specific constraints and `None` for union-level constraints (which apply to every arm). - """ - if arm is not None: - field_checks = [c for c in field_checks if _check_belongs_to_arm(c, arm)] - model_checks = [c for c in model_checks if _model_check_belongs_to_arm(c, arm)] + Both label-collision passes run over the *unfiltered* check lists so + they agree with the expression module, which `renderer` emits once + across every arm. Each scenario builder takes `arm` and drops rows + that fall outside it after their suffixes are assigned; computing a + suffix over an arm subset would let it hide a collision the shared + module still carries, producing an `expected_field` the module never + emits. + """ model_scenarios, used_mutation_fns = _render_model_scenarios( - feature_name, model_checks, spec + model_name, model_checks, spec, arm ) field_scenarios, field_helpers = _render_field_check_scenarios( - feature_name, field_checks, spec + model_name, field_checks, spec, arm ) used_mutation_fns |= field_helpers - {"set_at_path"} @@ -109,8 +113,8 @@ def render_test_module( template = jinja_env().get_template("test_module.py.jinja2") return template.render( - feature_name=feature_name, - schema_name=f"{feature_name.upper()}_SCHEMA", + model_name=model_name, + schema_name=schema_const_name(model_name), mutation_imports=sorted(used_mutation_fns), needs_set_at_path="set_at_path" in field_helpers, base_row_sparse=sparse_repr, @@ -152,14 +156,24 @@ class _MutateExpr(NamedTuple): def _field_mutate_expr( - check: Check, desc: ExpressionDescriptor, spec: FeatureSpec | None + check: Check, desc: ExpressionDescriptor, spec: ModelSpec | None ) -> _MutateExpr: """Render the `mutate=` expression for one field-check descriptor. - `check_struct_unique` calls the `mutate_unique_items` helper at the - target path; every other descriptor injects a constraint-violating - literal via `set_at_path`. + A `MapPath` target corrupts the map's single valid entry via + `mutate_map_key` / `mutate_map_value`; `check_struct_unique` calls + `mutate_unique_items` at the target path; every other descriptor + injects a constraint-violating literal via `set_at_path`. """ + if isinstance(check.target, MapPath): + helper = ( + "mutate_map_key" + if check.target.projection is MapProjection.KEY + else "mutate_map_value" + ) + col_repr = py_literal(check.target.map_column) + iv_repr = py_literal(invalid_value(desc)) + return _MutateExpr(f"lambda row: {helper}(row, {col_repr}, {iv_repr})", helper) target_repr = py_literal(str(check.target)) if desc.function == "check_struct_unique": return _MutateExpr( @@ -171,32 +185,35 @@ def _field_mutate_expr( def _render_field_check_scenarios( - feature_name: str, + model_name: str, field_checks: list[Check], - spec: FeatureSpec | None, + spec: ModelSpec | None, + arm: str | None, ) -> tuple[list[list[tuple[str, str]]], set[str]]: """Render Scenario entries for field-level checks. Returns the entries and the set of mutation helper names referenced - by them, mirroring `_render_model_scenarios`. + by them, mirroring `_render_model_scenarios`. `field_check_rows` + assigns collision suffixes over the unfiltered list; this drops rows + outside `arm` afterward so per-arm modules carry the labels the shared + expression module emits. Pass `None` to include all arms. """ - rows: list[tuple[Check, ExpressionDescriptor, str, str]] = [] - for check in field_checks: - label = field_label(check) - for desc in check.descriptors: - name = check_name(desc.function, desc.check_name) - rows.append((check, desc, label, name)) - + rows = [ + row + for row in field_check_rows(field_checks) + if arm is None or _check_belongs_to_arm(row.check, arm) + ] scenario_ids = disambiguate( - [f"{feature_name}::{label}:{name}" for _check, _desc, label, name in rows] + [f"{model_name}::{row.label}:{row.name}" for row in rows] ) entries: list[list[tuple[str, str]]] = [] used_helpers: set[str] = set() - for (check, desc, label, name), scenario_id in zip(rows, scenario_ids, strict=True): - scaffold = generate_scaffold(check, spec) if spec is not None else {} + for row, scenario_id in zip(rows, scenario_ids, strict=True): + desc = row.check.descriptors[row.descriptor_idx] + scaffold = generate_scaffold(row.check, spec) if spec is not None else {} try: - mutate = _field_mutate_expr(check, desc, spec) + mutate = _field_mutate_expr(row.check, desc, spec) except ValueError as exc: raise ValueError( f"Cannot render mutate expression for {scenario_id}: {exc}" @@ -207,8 +224,8 @@ def _render_field_check_scenarios( scenario_id=scenario_id, scaffold=scaffold, mutate_expr=mutate.expr, - expected_field=label, - expected_check=name, + expected_field=row.label, + expected_check=row.name, ) ) @@ -229,7 +246,7 @@ def _checks_array_element(check: Check) -> bool: def _wrap_for_list_leaf( value: object, check: Check, - spec: FeatureSpec | None, + spec: ModelSpec | None, ) -> object: """Wrap a scalar invalid value to match the field's list nesting depth.""" if spec is None or isinstance(value, list): @@ -243,26 +260,38 @@ def _wrap_for_list_leaf( def _render_model_scenarios( - feature_name: str, + model_name: str, model_checks: list[ModelCheck], - spec: FeatureSpec | None, + spec: ModelSpec | None, + arm: str | None, ) -> tuple[list[list[tuple[str, str]]], set[str]]: """Render Scenario entries for model-level checks. Returns the entries and the set of mutation helper names referenced by them, so the caller can scope the test module's imports. + `model_check_rows` assigns collision suffixes over the unfiltered + list; this drops rows outside `arm` afterward so per-arm modules carry + the labels the shared expression module emits. Pass `None` to include + all arms. + + The scenario id's trailing index counts surviving rows within the arm + (`enumerate` after the filter), not the row's position in the + unfiltered list -- it is a test-internal disambiguator with no + cross-module contract, kept contiguous per arm. """ entries: list[list[tuple[str, str]]] = [] used_mutation_fns: set[str] = set() - label_suffixes = compute_label_suffixes(model_checks) - for idx, mc in enumerate(model_checks): + rows = [ + row + for row in model_check_rows(model_checks) + if arm is None or _model_check_belongs_to_arm(row.check, arm) + ] + for scenario_idx, row in enumerate(rows): + mc = row.check desc = mc.descriptor - fn = model_constraint_function(desc) mutation_fn = model_mutation_function(desc) - name = check_name(fn) - scenario_id = f"{feature_name}::model:{name}:{idx}" - label = model_constraint_field_label(mc, label_suffixes[idx]) + scenario_id = f"{model_name}::model:{row.name}:{scenario_idx}" scaffold = generate_model_scaffold(mc, spec) if spec is not None else {} try: @@ -278,8 +307,8 @@ def _render_model_scenarios( scenario_id=scenario_id, scaffold=scaffold, mutate_expr=mutate_expr, - expected_field=label, - expected_check=name, + expected_field=row.label, + expected_check=row.name, ) ) @@ -346,6 +375,22 @@ def _fill_value_literal(shape: FieldShape) -> str: """Return a Python source literal for a type-appropriate non-null fill value.""" if has_array_layer(shape): return "[{}]" + terminal = terminal_of(shape) + if isinstance(terminal, Primitive): + category = primitive_spark_category(terminal.base_type) + match category: + case "bool": + return "False" + case "float": + return "0.0" + case "int": + return "0" + case "string" | "other": + raise ValueError( + f"unhandled Primitive base_type: {terminal.base_type!r}" + ) + case _: + assert_never(category) return "{}" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/spec_discovery.py b/packages/overture-schema-codegen/src/overture/schema/codegen/spec_discovery.py new file mode 100644 index 000000000..7bcf5f54c --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/spec_discovery.py @@ -0,0 +1,44 @@ +"""Bridge discovered models to extracted specs. + +`discover_models` yields `(ModelKey, entry)` pairs where each entry is either +a concrete Pydantic model class or a discriminated-union type alias. This +module turns one such pair into its `ModelSpec`, applying the partition layout +and entry point uniformly so every call site shares the same extraction. It +sits at the orchestration tier (alongside `cli`), importing downward into +extraction and layout. +""" + +from __future__ import annotations + +from overture.schema.system.discovery import ModelKey + +from .extraction.model_extraction import extract_model +from .extraction.specs import ( + ModelSpec, + is_model_class, + is_union_alias, + partitions_from_tags, +) +from .extraction.union_extraction import extract_union +from .layout.module_layout import entry_point_class + +__all__ = ["extract_model_spec"] + + +def extract_model_spec(key: ModelKey, entry: object) -> ModelSpec | None: + """Extract the `ModelSpec` for one discovered `(key, entry)` pair. + + Returns None when `entry` is neither a concrete model class nor a union + alias, so callers can skip non-model entries with a single check. + """ + partitions = partitions_from_tags(key.tags) + if is_model_class(entry): + return extract_model(entry, entry_point=key.entry_point, partitions=partitions) + if is_union_alias(entry): + return extract_union( + entry_point_class(key.entry_point), + entry, + entry_point=key.entry_point, + partitions=partitions, + ) + return None diff --git a/packages/overture-schema-codegen/tests/codegen_test_support.py b/packages/overture-schema-codegen/tests/codegen_test_support.py index 4bdea9f62..1fb149e32 100644 --- a/packages/overture-schema-codegen/tests/codegen_test_support.py +++ b/packages/overture-schema-codegen/tests/codegen_test_support.py @@ -21,18 +21,18 @@ AnnotatedField, EnumMemberSpec, EnumSpec, - FeatureSpec, FieldSpec, MemberSpec, ModelSpec, + RecordSpec, TypeIdentity, UnionSpec, is_model_class, is_union_alias, - partitions_from_tags, ) from overture.schema.codegen.extraction.union_extraction import extract_union from overture.schema.codegen.layout.module_layout import entry_point_class +from overture.schema.codegen.spec_discovery import extract_model_spec from overture.schema.system.discovery import ( TagSelector, discover_models, @@ -369,8 +369,8 @@ def find_model_class(name: str, models: dict[object, object]) -> type[BaseModel] return match -def find_field(spec: ModelSpec, name: str) -> FieldSpec: - """Find a field by name in a ModelSpec, raising if missing.""" +def find_field(spec: RecordSpec, name: str) -> FieldSpec: + """Find a field by name in a RecordSpec, raising if missing.""" return next(f for f in spec.fields if f.name == name) @@ -401,7 +401,7 @@ def has_name(mapping: Mapping[TypeIdentity, object], name: str) -> bool: def assert_literal_field( - spec: ModelSpec, field_name: str, expected_value: object + spec: RecordSpec, field_name: str, expected_value: object ) -> None: """Assert a field is a single-value Literal with the expected value.""" field = find_field(spec, field_name) @@ -412,19 +412,40 @@ def assert_literal_field( def flat_specs_from_discovery( theme: str | None = None, -) -> list[ModelSpec]: - """Build a flat list of ModelSpecs from discovery, with entry_point set.""" +) -> list[RecordSpec]: + """Build a flat list of RecordSpecs from discovery, with entry_point set.""" models = discover_models() if theme: models = filter_models( models, TagSelector(include_any=(f"overture:theme={theme}",)) ) - result = [] - for key, cls in models.items(): - if not is_model_class(cls): - continue - result.append(extract_model(cls, entry_point=key.entry_point)) - return result + return [ + spec + for key, cls in models.items() + if isinstance(spec := extract_model_spec(key, cls), RecordSpec) + ] + + +class TaggedVariantA(SegmentBase): + """Segment variant with a unique-items tags field.""" + + subtype: Literal["tagged_a"] + tags: Annotated[list[str], UniqueItemsConstraint()] | None = None + + +class TaggedVariantB(SegmentBase): + """Segment variant with a unique-items tags field (distinct instance, same constraint).""" + + subtype: Literal["tagged_b"] + tags: Annotated[list[str], UniqueItemsConstraint()] | None = None + + +TestSegmentEqualConstraints = Annotated[ + TaggedVariantA | TaggedVariantB, + Field( + description="Union whose members share a field with equal-but-distinct constraint instances" + ), +] class LiteralSubtypeModel(BaseModel): @@ -454,31 +475,25 @@ class RequireAnyModel(BaseModel): y: str | None = None -def discover_feature(class_name: str) -> FeatureSpec: - """Discover and extract a feature spec by class name.""" +def discover_feature(class_name: str) -> ModelSpec: + """Discover and extract a model spec by class name.""" models = discover_models() for key, entry in models.items(): - partitions = partitions_from_tags(key.tags) - if is_model_class(entry) and entry.__name__ == class_name: - return extract_model( - entry, entry_point=key.entry_point, partitions=partitions - ) - if is_union_alias(entry) and entry_point_class(key.entry_point) == class_name: - return extract_union( - entry_point_class(key.entry_point), - entry, - entry_point=key.entry_point, - partitions=partitions, - ) + if (is_model_class(entry) and entry.__name__ == class_name) or ( + is_union_alias(entry) and entry_point_class(key.entry_point) == class_name + ): + spec = extract_model_spec(key, entry) + if spec is not None: + return spec raise LookupError(f"{class_name} not found in discovered models") -def feature_spec_for_model( +def spec_for_model( cls: type[BaseModel], *, entry_point: str | None = None, partitions: Mapping[str, str] | None = None, -) -> ModelSpec: +) -> RecordSpec: """Extract a model class for tests; sub-specs are populated by extract_model.""" return extract_model(cls, entry_point=entry_point, partitions=partitions) diff --git a/packages/overture-schema-codegen/tests/conftest.py b/packages/overture-schema-codegen/tests/conftest.py index d66cf72a3..fbd1f0f1e 100644 --- a/packages/overture-schema-codegen/tests/conftest.py +++ b/packages/overture-schema-codegen/tests/conftest.py @@ -6,7 +6,7 @@ from codegen_test_support import find_model_class from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.numeric_extraction import extract_numerics -from overture.schema.codegen.extraction.specs import ModelSpec +from overture.schema.codegen.extraction.specs import RecordSpec from overture.schema.codegen.markdown.pipeline import ( partition_numeric_and_geometry_types, ) @@ -52,7 +52,7 @@ def building_class(all_discovered_models: dict) -> type[BaseModel]: @pytest.fixture -def building_spec(building_class: type[BaseModel]) -> ModelSpec: +def building_spec(building_class: type[BaseModel]) -> RecordSpec: """Extract the Building model spec.""" return extract_model(building_class) diff --git a/packages/overture-schema-codegen/tests/test_cli.py b/packages/overture-schema-codegen/tests/test_cli.py index d81843027..07120d132 100644 --- a/packages/overture-schema-codegen/tests/test_cli.py +++ b/packages/overture-schema-codegen/tests/test_cli.py @@ -7,7 +7,7 @@ import pytest from click.testing import CliRunner from overture.schema.codegen.cli import cli -from overture.schema.codegen.extraction.specs import ModelSpec +from overture.schema.codegen.extraction.specs import RecordSpec class TestCliList: @@ -376,10 +376,10 @@ class TestCliEntryPoint: def test_generate_sets_entry_point_on_specs( self, cli_runner: CliRunner, monkeypatch: pytest.MonkeyPatch ) -> None: - captured: list[ModelSpec] = [] + captured: list[RecordSpec] = [] - def spy(feature_specs: list, schema_root: str, output_dir: object) -> None: - captured.extend(feature_specs) + def spy(model_specs: list, schema_root: str, output_dir: object) -> None: + captured.extend(model_specs) monkeypatch.setattr("overture.schema.codegen.cli._generate_markdown", spy) result = cli_runner.invoke( diff --git a/packages/overture-schema-codegen/tests/test_constraint_description.py b/packages/overture-schema-codegen/tests/test_constraint_description.py index 4ae9f2dff..e5b1b0ebc 100644 --- a/packages/overture-schema-codegen/tests/test_constraint_description.py +++ b/packages/overture-schema-codegen/tests/test_constraint_description.py @@ -1,5 +1,7 @@ """Tests for constraint description (model-level and field-level).""" +import re + from annotated_types import Ge, Gt, Interval, Le, Lt from overture.schema.codegen.extraction.field_constraints import ( constraint_display_text, @@ -16,6 +18,10 @@ ) from overture.schema.codegen.extraction.specs import TypeIdentity from overture.schema.codegen.extraction.type_analyzer import ConstraintSource +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + PatternConstraint, +) from overture.schema.system.model_constraint import ( FieldEqCondition, ForbidIfConstraint, @@ -478,3 +484,29 @@ def link_fn(tid: TypeIdentity) -> str: assert len(received) == 1 assert received[0].obj is Target assert result == "References [`Target`](link) (composition)" + + +class TestConstraintPatternFlags: + """constraint_display_text surfaces a compiled pattern's regex flags.""" + + def _display(self, constraint: object) -> str: + cs = ConstraintSource(source_ref=None, source_name=None, constraint=constraint) + return constraint_display_text(cs) + + def test_case_insensitive_pattern_shows_inline_flag(self) -> None: + # A case-insensitive pattern displayed without its flag misleads the + # reader into thinking only lowercase matches. + c = PatternConstraint(r"^[a-z]+$", "err: {value}", flags=re.I) + assert "pattern: `(?i)^[a-z]+$`" in self._display(c) + + def test_unflagged_pattern_omits_inline_flag_group(self) -> None: + # re.UNICODE is the implicit str-pattern default and must not leak as + # a (?u) group onto every pattern. + assert "pattern: `^[A-Z]{2}$`" in self._display(CountryCodeAlpha2Constraint()) + assert "(?" not in self._display(CountryCodeAlpha2Constraint()) + + def test_multiple_flags_render_as_one_group(self) -> None: + # Display tolerates flags pyspark cannot honor (re.M); doc generation + # must not crash where check generation would. + c = PatternConstraint(r"^[a-z]+$", "err: {value}", flags=re.I | re.M) + assert "pattern: `(?im)^[a-z]+$`" in self._display(c) diff --git a/packages/overture-schema-codegen/tests/test_field_walk.py b/packages/overture-schema-codegen/tests/test_field_walk.py index d0d493cf9..3cdb08048 100644 --- a/packages/overture-schema-codegen/tests/test_field_walk.py +++ b/packages/overture-schema-codegen/tests/test_field_walk.py @@ -1,9 +1,12 @@ """Tests for the `FieldShape` walker and structural helpers.""" +import enum + import pytest from overture.schema.codegen.extraction.field import ( AnyScalar, ArrayOf, + ConstraintSource, LiteralScalar, MapOf, ModelRef, @@ -12,6 +15,8 @@ UnionRef, ) from overture.schema.codegen.extraction.field_walk import ( + enum_source, + map_key_value_constraints, shape_children, terminal_model_ref, terminal_of, @@ -162,3 +167,69 @@ def test_terminal_scalar(self, shape: object, expected: object) -> None: ) def test_terminal_model_ref(self, shape: object, expected: object) -> None: assert terminal_model_ref(shape) is expected # type: ignore[arg-type] + + +class _Color(enum.Enum): + RED = "red" + BLUE = "blue" + + +class TestEnumSource: + """`enum_source` extracts the `Enum` class from an enum-backed `Primitive`.""" + + def test_enum_backed_primitive_returns_class(self) -> None: + shape = Primitive(base_type="str", source_type=_Color) + assert enum_source(shape) is _Color + + def test_plain_primitive_returns_none(self) -> None: + shape = Primitive(base_type="str") + assert enum_source(shape) is None + + def test_literal_scalar_returns_none(self) -> None: + shape = LiteralScalar(values=("a",)) + assert enum_source(shape) is None + + def test_non_enum_class_source_type_returns_none(self) -> None: + # source_type is a real class that is not an Enum subclass + shape = Primitive(base_type="str", source_type=int) + assert enum_source(shape) is None + + def test_newtype_wrapping_enum_primitive_returns_none(self) -> None: + # wrappers are not unwrapped — only a bare Primitive matches + inner = Primitive(base_type="str", source_type=_Color) + nt = NewTypeShape(name="ColorAlias", ref=object(), inner=inner) + assert enum_source(nt) is None + + def test_array_of_enum_primitive_returns_none(self) -> None: + inner = Primitive(base_type="str", source_type=_Color) + shape = ArrayOf(element=inner) + assert enum_source(shape) is None + + +def _constraint() -> ConstraintSource: + """A directly-applied constraint with no NewType source.""" + return ConstraintSource(source_ref=None, source_name=None, constraint=object()) + + +class TestMapKeyValueConstraints: + """`map_key_value_constraints` collects a `MapOf` terminal's sides.""" + + def test_direct_map_returns_key_and_value_constraints(self) -> None: + kc, vc = _constraint(), _constraint() + shape = MapOf( + key=Primitive(base_type="str", constraints=(kc,)), + value=Primitive(base_type="int32", constraints=(vc,)), + ) + assert map_key_value_constraints(shape) == ((kc,), (vc,)) + + def test_looks_through_newtype_and_array_wrappers(self) -> None: + vc = _constraint() + inner_map = MapOf( + key=Primitive(base_type="str"), + value=Primitive(base_type="int32", constraints=(vc,)), + ) + shape = NewTypeShape(name="N", ref=object(), inner=ArrayOf(element=inner_map)) + assert map_key_value_constraints(shape) == ((), (vc,)) + + def test_non_map_shape_returns_empty(self) -> None: + assert map_key_value_constraints(Primitive(base_type="str")) == ((), ()) diff --git a/packages/overture-schema-codegen/tests/test_golden_markdown.py b/packages/overture-schema-codegen/tests/test_golden_markdown.py index e75eddcc5..cffdc2def 100644 --- a/packages/overture-schema-codegen/tests/test_golden_markdown.py +++ b/packages/overture-schema-codegen/tests/test_golden_markdown.py @@ -18,17 +18,17 @@ Venue, Widget, assert_golden, - feature_spec_for_model, + spec_for_model, ) from overture.schema.codegen.extraction.enum_extraction import extract_enum from overture.schema.codegen.extraction.newtype_extraction import extract_newtype -from overture.schema.codegen.extraction.specs import FeatureSpec, TypeIdentity +from overture.schema.codegen.extraction.specs import ModelSpec, TypeIdentity from overture.schema.codegen.layout.type_collection import ( collect_all_supplementary_types, ) from overture.schema.codegen.markdown.renderer import ( render_enum, - render_feature, + render_model, render_newtype, ) from overture.schema.codegen.markdown.reverse_references import ( @@ -64,13 +64,13 @@ @pytest.fixture(scope="module") def reverse_refs() -> dict[TypeIdentity, list[UsedByEntry]]: """Compute reverse references for all test models.""" - feature_specs: list[FeatureSpec] = [] + model_specs: list[ModelSpec] = [] for model_class, _ in FEATURE_CASES: assert isinstance(model_class, type) and issubclass(model_class, BaseModel) - feature_specs.append(feature_spec_for_model(model_class)) + model_specs.append(spec_for_model(model_class)) - all_specs = collect_all_supplementary_types(feature_specs) - return compute_reverse_references(feature_specs, all_specs) + all_specs = collect_all_supplementary_types(model_specs) + return compute_reverse_references(model_specs, all_specs) @pytest.mark.parametrize( @@ -84,9 +84,9 @@ def test_feature_golden( update_golden: bool, reverse_refs: dict[TypeIdentity, list[UsedByEntry]], ) -> None: - spec = feature_spec_for_model(model_class) + spec = spec_for_model(model_class) used_by = reverse_refs.get(spec.identity) - actual = render_feature(spec, used_by=used_by) + actual = render_model(spec, used_by=used_by) assert_golden(actual, GOLDEN_DIR / golden_filename, update=update_golden) diff --git a/packages/overture-schema-codegen/tests/test_integration_real_models.py b/packages/overture-schema-codegen/tests/test_integration_real_models.py index b0f90e266..85ac6f718 100644 --- a/packages/overture-schema-codegen/tests/test_integration_real_models.py +++ b/packages/overture-schema-codegen/tests/test_integration_real_models.py @@ -5,20 +5,18 @@ """ import pytest -from codegen_test_support import assert_literal_field, feature_spec_for_model +from codegen_test_support import assert_literal_field, spec_for_model from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.specs import ( - FeatureSpec, ModelSpec, + RecordSpec, UnionSpec, filter_model_classes, - is_model_class, - is_union_alias, ) from overture.schema.codegen.extraction.union_extraction import extract_union -from overture.schema.codegen.layout.module_layout import entry_point_class from overture.schema.codegen.markdown.pipeline import generate_markdown_pages -from overture.schema.codegen.markdown.renderer import render_feature +from overture.schema.codegen.markdown.renderer import render_model +from overture.schema.codegen.spec_discovery import extract_model_spec from overture.schema.system.discovery import discover_models from overture.schema.transportation import Segment from overture.schema.transportation.segment.models import RoadSegment @@ -37,22 +35,22 @@ def test_discover_models_returns_multiple_themes(self) -> None: class TestExtractBuildingModel: """Tests for extracting the Building model.""" - def test_extract_building_has_name(self, building_spec: ModelSpec) -> None: + def test_extract_building_has_name(self, building_spec: RecordSpec) -> None: """Building model spec should have correct name.""" assert building_spec.name == "Building" - def test_extract_building_has_theme_type(self, building_spec: ModelSpec) -> None: + def test_extract_building_has_theme_type(self, building_spec: RecordSpec) -> None: """Building should have theme='buildings', type='building' as Literal fields.""" assert_literal_field(building_spec, "theme", "buildings") assert_literal_field(building_spec, "type", "building") - def test_extract_building_has_fields(self, building_spec: ModelSpec) -> None: + def test_extract_building_has_fields(self, building_spec: RecordSpec) -> None: """Building should have multiple fields.""" assert len(building_spec.fields) > 0, "Building should have at least one field" field_names = {f.name for f in building_spec.fields} assert "id" in field_names - def test_building_field_shapes_are_present(self, building_spec: ModelSpec) -> None: + def test_building_field_shapes_are_present(self, building_spec: RecordSpec) -> None: """Every Building field has a `FieldShape`.""" for field in building_spec.fields: assert field.shape is not None @@ -105,7 +103,7 @@ class TestMarkdownRenderingRealModels: def test_render_building_content(self, building_class: type[BaseModel]) -> None: """Building renders with title, field table, and expected fields.""" - markdown = render_feature(feature_spec_for_model(building_class)) + markdown = render_model(spec_for_model(building_class)) assert "# Building" in markdown assert "| Name |" in markdown @@ -114,9 +112,9 @@ def test_render_building_content(self, building_class: type[BaseModel]) -> None: assert "geometry" in markdown def test_render_all_models_without_crash(self, all_discovered_models: dict) -> None: - """render_feature should not crash on any discovered model.""" + """render_model should not crash on any discovered model.""" for model_class in filter_model_classes(all_discovered_models): - render_feature(feature_spec_for_model(model_class)) + render_model(spec_for_model(model_class)) class TestDiscriminatedUnions: @@ -218,19 +216,12 @@ class TestPydanticTypePages: def pages(self) -> list: """Generate all pages from real discovered models.""" models = discover_models() - feature_specs: list[FeatureSpec] = [] - for key, entry in models.items(): - if is_model_class(entry): - feature_specs.append(extract_model(entry, entry_point=key.entry_point)) - elif is_union_alias(entry): - feature_specs.append( - extract_union( - entry_point_class(key.entry_point), - entry, - entry_point=key.entry_point, - ) - ) - return generate_markdown_pages(feature_specs, self._SCHEMA_ROOT) + model_specs: list[ModelSpec] = [ + spec + for key, entry in models.items() + if (spec := extract_model_spec(key, entry)) is not None + ] + return generate_markdown_pages(model_specs, self._SCHEMA_ROOT) def test_http_url_page_exists(self, pages: list) -> None: """Pipeline produces a page for HttpUrl under pydantic/networks/.""" @@ -250,5 +241,5 @@ def test_http_url_page_content(self, pages: list) -> None: def test_place_links_to_http_url(self, pages: list) -> None: """Place feature page links to the HttpUrl type page.""" - place_page = next(p for p in pages if p.path.stem == "place" and p.is_feature) + place_page = next(p for p in pages if p.path.stem == "place" and p.is_model) assert "HttpUrl" in place_page.content diff --git a/packages/overture-schema-codegen/tests/test_markdown_renderer.py b/packages/overture-schema-codegen/tests/test_markdown_renderer.py index 92f1d0ac1..271347ca2 100644 --- a/packages/overture-schema-codegen/tests/test_markdown_renderer.py +++ b/packages/overture-schema-codegen/tests/test_markdown_renderer.py @@ -21,8 +21,8 @@ Sources, TreeNode, Venue, - feature_spec_for_model, make_union_spec, + spec_for_model, ) from overture.schema.codegen.extraction.examples import ExampleRecord from overture.schema.codegen.extraction.model_extraction import extract_model @@ -43,7 +43,7 @@ _linkify_bare_urls, _sanitize_for_table_cell, render_enum, - render_feature, + render_model, render_newtype, render_primitives_from_specs, render_pydantic_type, @@ -184,12 +184,12 @@ def test_mixed_code_span_and_bare_url(self) -> None: class TestRenderFeatureBasic: - """Tests for render_feature with basic models.""" + """Tests for render_model with basic models.""" def test_renders_title_from_model_name(self) -> None: """Should render model name as H1 title.""" spec = extract_model(SimpleModel) - result = render_feature(spec) + result = render_model(spec) assert "# SimpleModel" in result @@ -202,7 +202,7 @@ class DescribedModel(BaseModel): value: int spec = extract_model(DescribedModel) - result = render_feature(spec) + result = render_model(spec) assert "This is the model description." in result @@ -215,7 +215,7 @@ class ModelWithField(BaseModel): name: str spec = extract_model(ModelWithField) - result = render_feature(spec) + result = render_model(spec) assert "## Fields" in result @@ -228,7 +228,7 @@ class ModelWithField(BaseModel): name: str spec = extract_model(ModelWithField) - result = render_feature(spec) + result = render_model(spec) assert "| Name | Type | Description |" in result assert "| -----: | :----: | ------------- |" in result @@ -246,7 +246,7 @@ class ModelWithRequired(BaseModel): name: str = Field(description="The name") spec = extract_model(ModelWithRequired) - result = render_feature(spec) + result = render_model(spec) assert "| `name` |" in result assert "| `string` |" in result @@ -261,7 +261,7 @@ class ModelWithOptional(BaseModel): nickname: str | None = Field(None, description="Optional nickname") spec = extract_model(ModelWithOptional) - result = render_feature(spec) + result = render_model(spec) assert "| `nickname` |" in result assert "(optional)" in result @@ -278,7 +278,7 @@ class ModelWithTypes(BaseModel): active: bool spec = extract_model(ModelWithTypes) - result = render_feature(spec) + result = render_model(spec) # Check that fields are present (exact type format may vary) assert "`count`" in result @@ -294,7 +294,7 @@ class ModelWithMultilineDesc(BaseModel): name: str = Field(description="First line.\n\nSecond paragraph.") spec = extract_model(ModelWithMultilineDesc) - result = render_feature(spec) + result = render_model(spec) assert "First line.

Second paragraph." in result # The table should not be broken by a blank line @@ -318,7 +318,7 @@ class Place(FeatureBase[Literal["places"], Literal["place"]]): name: str spec = extract_model(Place) - result = render_feature(spec) + result = render_model(spec) # Theme and type should appear somewhere in output assert "places" in result @@ -337,7 +337,7 @@ class TestFeature(FeatureBase[Literal["test_theme"], Literal["test_type"]]): name: str spec = extract_model(TestFeature) - result = render_feature(spec) + result = render_model(spec) assert '| `"test_theme"` |' in result assert '| `"test_type"` |' in result @@ -363,7 +363,7 @@ class ModelWithSources(BaseModel): sources: TestSources | None = None - result = render_feature(feature_spec_for_model(ModelWithSources)) + result = render_model(spec_for_model(ModelWithSources)) assert "`TestSources`" in result assert "(list, optional)" in result @@ -377,7 +377,7 @@ class ModelWithColor(BaseModel): color: HexColor | None = None spec = extract_model(ModelWithColor) - result = render_feature(spec) + result = render_model(spec) assert "`HexColor`" in result assert "(optional)" in result @@ -391,7 +391,7 @@ class ModelWithCount(BaseModel): count: int32 spec = extract_model(ModelWithCount) - result = render_feature(spec) + result = render_model(spec) assert "| `int32` |" in result # Should NOT be linked @@ -406,7 +406,7 @@ class ModelWithName(BaseModel): name: str spec = extract_model(ModelWithName) - result = render_feature(spec) + result = render_model(spec) assert "| `string` |" in result @@ -422,7 +422,7 @@ class ModelWithEnum(BaseModel): status: Status spec = extract_model(ModelWithEnum) - result = render_feature(spec) + result = render_model(spec) assert "| `Status` |" in result @@ -437,7 +437,7 @@ class Outer(BaseModel): inner: Inner - result = render_feature(feature_spec_for_model(Outer)) + result = render_model(spec_for_model(Outer)) assert "| `Inner` |" in result @@ -447,7 +447,7 @@ class TestRenderFeatureInlineExpansion: def test_direct_model_fields_expanded_with_dot_prefix(self) -> None: """Direct model field expands sub-fields with dot notation.""" - result = render_feature(feature_spec_for_model(FeatureWithAddress)) + result = render_model(spec_for_model(FeatureWithAddress)) assert "| `address.street` |" in result assert "| `address.city` |" in result @@ -455,14 +455,14 @@ def test_direct_model_fields_expanded_with_dot_prefix(self) -> None: def test_list_of_model_fields_expanded_with_bracket_dot_prefix(self) -> None: """List-of-model field expands sub-fields with []. notation.""" - result = render_feature(feature_spec_for_model(FeatureWithSources)) + result = render_model(spec_for_model(FeatureWithSources)) assert "| `sources[]` |" in result assert "| `sources[].dataset` |" in result def test_cycle_detection_prevents_infinite_recursion(self) -> None: """Recursive model emits parent row but does not recurse.""" - result = render_feature(feature_spec_for_model(TreeNode)) + result = render_model(spec_for_model(TreeNode)) # The parent field row appears assert "| `parent` |" in result @@ -472,14 +472,14 @@ def test_cycle_detection_prevents_infinite_recursion(self) -> None: def test_primitive_field_unchanged(self) -> None: """Primitive fields produce a single row without expansion.""" spec = extract_model(SimpleModel) - result = render_feature(spec) + result = render_model(spec) lines = [line for line in result.splitlines() if "| `name` |" in line] assert len(lines) == 1 def test_parent_row_preserved_before_expansion(self) -> None: """The parent field row still appears before expanded sub-fields.""" - result = render_feature(feature_spec_for_model(FeatureWithAddress)) + result = render_model(spec_for_model(FeatureWithAddress)) # Parent row for 'address' itself appears assert "| `address` |" in result @@ -500,7 +500,7 @@ class TestRenderFeatureConstraints: def test_venue_has_constraints_section(self) -> None: """Venue's @require_any_of renders as a Constraints section.""" spec = extract_model(Venue) - result = render_feature(spec) + result = render_model(spec) assert "## Constraints" in result assert "At least one of `name`, `description` must be set" in result @@ -509,7 +509,7 @@ def test_constraints_section_between_fields_and_examples(self) -> None: """Constraints section appears after Fields, before Examples.""" spec = extract_model(Venue) examples = [ExampleRecord(rows=[("name", "test")])] - result = render_feature(spec, examples=examples) + result = render_model(spec, examples=examples) lines = result.splitlines() fields_line = next(i for i, line in enumerate(lines) if "## Fields" in line) @@ -529,7 +529,7 @@ class Plain(BaseModel): name: str spec = extract_model(Plain) - result = render_feature(spec) + result = render_model(spec) assert "## Constraints" not in result @@ -543,7 +543,7 @@ class Strict(BaseModel): name: str spec = extract_model(Strict) - result = render_feature(spec) + result = render_model(spec) assert "## Constraints" not in result @@ -554,7 +554,7 @@ class TestRenderFeatureConstraintNotes: def test_venue_name_field_includes_constraint_note(self) -> None: """Venue's name field description cell includes constraint note in italics.""" spec = extract_model(Venue) - result = render_feature(spec) + result = render_model(spec) # Find the row for 'name' field lines = result.splitlines() @@ -566,7 +566,7 @@ def test_venue_name_field_includes_constraint_note(self) -> None: def test_field_with_no_description_gets_constraint_note(self) -> None: """Field with no existing description still gets the constraint note.""" spec = extract_model(Venue) - result = render_feature(spec) + result = render_model(spec) # description field on Venue has no Field(description=...) lines = result.splitlines() @@ -579,7 +579,7 @@ class TestRenderFeatureFieldConstraints: def test_venue_geometry_shows_allowed_types(self) -> None: """Venue's geometry field shows GeometryTypeConstraint as a note.""" - result = render_feature(feature_spec_for_model(Venue)) + result = render_model(spec_for_model(Venue)) lines = result.splitlines() geo_line = next(line for line in lines if "| `geometry` |" in line) @@ -595,7 +595,7 @@ def test_venue_reference_links_when_context_available(self) -> None: ) }, ) - result = render_feature(feature_spec_for_model(Venue), link_ctx=ctx) + result = render_model(spec_for_model(Venue), link_ctx=ctx) lines = result.splitlines() ref_line = next(line for line in lines if "| `resident_ensemble` |" in line) @@ -604,7 +604,7 @@ def test_venue_reference_links_when_context_available(self) -> None: def test_venue_reference_unlinked_without_context(self) -> None: """Reference constraint renders as plain code when no LinkContext.""" - result = render_feature(feature_spec_for_model(Venue)) + result = render_model(spec_for_model(Venue)) lines = result.splitlines() ref_line = next(line for line in lines if "| `resident_ensemble` |" in line) @@ -612,6 +612,66 @@ def test_venue_reference_unlinked_without_context(self) -> None: assert "aggregation, part of" in ref_line +class TestRenderFeatureMapConstraints: + """Tests for map key/value constraint notes in field description cells. + + Mirrors `Sources.license_priority`, whose value bound was previously + dropped because `all_constraints` stopped at `MapOf`. + """ + + def test_map_value_bound_shows_value_note(self) -> None: + """A directly-applied map value bound renders as a 'value:' note.""" + + class ModelWithMapValueBound(BaseModel): + """Model with a bounded map value.""" + + priorities: dict[str, Annotated[int, Field(ge=0)]] = Field( + description="Priorities." + ) + + result = render_model(extract_model(ModelWithMapValueBound)) + line = next(li for li in result.splitlines() if "| `priorities` |" in li) + assert "*value: `≥ 0`*" in line + + def test_map_key_bound_shows_key_note(self) -> None: + """A directly-applied map key bound renders as a 'key:' note.""" + + class ModelWithMapKeyBound(BaseModel): + """Model with a bounded map key.""" + + counts: dict[Annotated[int, Field(ge=1)], str] = Field( + description="Counts." + ) + + result = render_model(extract_model(ModelWithMapKeyBound)) + line = next(li for li in result.splitlines() if "| `counts` |" in li) + assert "*key: `≥ 1`*" in line + + +class TestRenderFeatureMapModelValue: + """A model-valued map names its value, never a bare `?`. + + `extract_model` resolves the value to a `ModelRef`, the case that + `resolve_type_name` could not name and silently rendered as `?`. + """ + + def test_model_valued_map_names_the_model(self) -> None: + class Inner(BaseModel): + """An inner model.""" + + x: int = Field(description="x") + + class Outer(BaseModel): + """Outer with a model-valued map.""" + + by_key: dict[str, Inner] = Field(description="Map to inner models.") + + result = render_model(extract_model(Outer)) + line = next(li for li in result.splitlines() if "| `by_key` |" in li) + assert "map" in line + assert "?" not in line + + class TestRenderEnumBasic: """Tests for render_enum with simple enums.""" @@ -807,7 +867,7 @@ class ModelWithColor(BaseModel): }, ) - result = render_feature(spec, link_ctx=ctx) + result = render_model(spec, link_ctx=ctx) assert "[`HexColor`](../../types/strings/hex_color.md)" in result @@ -833,7 +893,7 @@ class ModelWithRoof(BaseModel): }, ) - result = render_feature(spec, link_ctx=ctx) + result = render_model(spec, link_ctx=ctx) assert "[`RoofShape`](../roof_shape.md)" in result @@ -859,7 +919,7 @@ class ModelWithClass(BaseModel): }, ) - result = render_feature(spec, link_ctx=ctx) + result = render_model(spec, link_ctx=ctx) assert "[`BuildingClass`](building_class.md)" in result @@ -872,7 +932,7 @@ class ModelWithColor(BaseModel): color: HexColor | None = None spec = extract_model(ModelWithColor) - result = render_feature(spec) + result = render_model(spec) assert "`HexColor`" in result assert "hex_color.md" not in result @@ -1008,15 +1068,15 @@ def test_pipe_character_not_escaped_in_backticks(self) -> None: class TestRenderFeatureWithExamples: - """Tests for render_feature with examples support.""" + """Tests for render_model with examples support.""" def test_accepts_examples_parameter(self) -> None: - """render_feature accepts examples parameter.""" + """render_model accepts examples parameter.""" spec = extract_model(SimpleModel) examples = [ExampleRecord(rows=[("name", "test")])] # Should not raise - result = render_feature(spec, examples=examples) + result = render_model(spec, examples=examples) assert "# SimpleModel" in result def test_renders_single_example_without_heading(self) -> None: @@ -1031,7 +1091,7 @@ class ModelWithCount(BaseModel): spec = extract_model(ModelWithCount) examples = [ExampleRecord(rows=[("name", "test"), ("count", 42)])] - result = render_feature(spec, examples=examples) + result = render_model(spec, examples=examples) assert "## Examples" in result assert "| Column | Value |" in result assert "| `name` | `test` |" in result @@ -1047,7 +1107,7 @@ def test_renders_multiple_examples_with_headings(self) -> None: ExampleRecord(rows=[("name", "second")]), ] - result = render_feature(spec, examples=examples) + result = render_model(spec, examples=examples) assert "## Examples" in result assert "### Example 1" in result assert "### Example 2" in result @@ -1077,7 +1137,7 @@ class TestModel(BaseModel): ) ] - result = render_feature(spec, examples=examples) + result = render_model(spec, examples=examples) # String with backticks assert "| `text` | `hello` |" in result # Number with backticks @@ -1090,14 +1150,14 @@ class TestModel(BaseModel): def test_no_examples_omits_section(self) -> None: """When examples is None, Examples section is not rendered.""" spec = extract_model(SimpleModel) - result = render_feature(spec, examples=None) + result = render_model(spec, examples=None) assert "## Examples" not in result def test_empty_examples_list_omits_section(self) -> None: """When examples is empty list, Examples section is not rendered.""" spec = extract_model(SimpleModel) - result = render_feature(spec, examples=[]) + result = render_model(spec, examples=[]) assert "## Examples" not in result @@ -1191,7 +1251,7 @@ def test_shared_fields_have_no_variant_tag(self) -> None: ), ], ) - result = render_feature(spec) + result = render_model(spec) assert "| `id` |" in result assert "*(" not in result # no variant tag @@ -1215,7 +1275,7 @@ class RoadSegment(BaseModel): ), ], ) - result = render_feature(spec) + result = render_model(spec) assert "| `speed_limit` *(Road)* |" in result @@ -1267,7 +1327,7 @@ def test_constraint_class_not_linked(self) -> None: assert "[`CountryCodeAlpha2Constraint`](" not in result.display -def _feature_spec() -> object: +def _model_spec() -> object: return extract_model(SimpleModel) @@ -1280,7 +1340,7 @@ def _newtype_spec() -> object: _USED_BY_CASES = [ - pytest.param(_feature_spec, render_feature, id="feature"), + pytest.param(_model_spec, render_model, id="feature"), pytest.param(_enum_spec, render_enum, id="enum"), pytest.param(_newtype_spec, render_newtype, id="newtype"), ] @@ -1318,8 +1378,8 @@ def test_entries_render_without_links_when_no_context( ("spec_factory", "render_fn", "page_path", "expected_link"), [ pytest.param( - _feature_spec, - render_feature, + _model_spec, + render_model, PurePosixPath("types/strings/hex_color.md"), "../../buildings/building/building.md", id="feature", diff --git a/packages/overture-schema-codegen/tests/test_markdown_type_format.py b/packages/overture-schema-codegen/tests/test_markdown_type_format.py index fc1b946a2..46a918beb 100644 --- a/packages/overture-schema-codegen/tests/test_markdown_type_format.py +++ b/packages/overture-schema-codegen/tests/test_markdown_type_format.py @@ -2,19 +2,24 @@ from enum import Enum from pathlib import PurePosixPath -from typing import Literal, NewType +from typing import Any, Literal, NewType +import pytest from overture.schema.codegen.extraction.field import ( AnyScalar, ArrayOf, LiteralScalar, + NewTypeShape, + Primitive, Scalar, UnionRef, ) +from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.specs import FieldSpec, TypeIdentity from overture.schema.codegen.extraction.type_analyzer import analyze_type from overture.schema.codegen.markdown.link_computation import LinkContext from overture.schema.codegen.markdown.type_format import ( + _bare_map_side_name, _registry_name, format_type, format_underlying_type, @@ -31,6 +36,14 @@ class _ModelB(BaseModel): y: str +class _OuterWithModelMap(BaseModel): + m: dict[str, _ModelB] + + +class _OuterWithModelListMap(BaseModel): + m: dict[str, list[_ModelB]] + + class TestFormatType: """Tests for format_type.""" @@ -82,6 +95,42 @@ def test_registered_primitive_not_linked(self) -> None: assert result == "`int32`" assert "](int32.md)" not in result + def test_geometry_links_to_aggregate_page(self) -> None: + from overture.schema.system.primitive import Geometry + + field = _make_field(Geometry) + ctx = LinkContext( + page_path=PurePosixPath("buildings/building/building.md"), + registry={ + TypeIdentity(Geometry, "Geometry"): PurePosixPath( + "system/primitive/geometry.md" + ) + }, + ) + assert ( + format_type(field, ctx) + == "[`geometry`](../../system/primitive/geometry.md)" + ) + + def test_bbox_links_to_aggregate_page(self) -> None: + from overture.schema.system.primitive import BBox + + field = _make_field(BBox) + ctx = LinkContext( + page_path=PurePosixPath("base/feature/feature.md"), + registry={ + TypeIdentity(BBox, "BBox"): PurePosixPath( + "system/primitive/geometry.md" + ) + }, + ) + assert format_type(field, ctx) == "[`bbox`](../../system/primitive/geometry.md)" + + def test_geometry_without_context_renders_plain_code(self) -> None: + from overture.schema.system.primitive import Geometry + + assert format_type(_make_field(Geometry)) == "`geometry`" + def _make_field( annotation: object, @@ -321,6 +370,131 @@ def test_nested_list_of_scalar_newtype_renders_nested_list_syntax(self) -> None: assert "(list)" not in result +def _link_ctx(*entries: tuple[object, str, str]) -> LinkContext: + """Build a LinkContext registering each (newtype, name, page_path).""" + return LinkContext( + page_path=PurePosixPath("base/names/names.md"), + registry={ + TypeIdentity(newtype, name): PurePosixPath(path) + for newtype, name, path in entries + }, + ) + + +class TestFormatMapType: + """Tests for MapOf rendering in field cells (format_type). + + Adjacent backtick runs break links: CommonMark reads `` as a + multi-backtick code-span delimiter, so any side left bare must fold + into the surrounding `map<...>` span. Every linked case asserts + `"``" not in result` to guard that. + """ + + def test_map_without_context_renders_bare_names(self) -> None: + """A map field with no link context renders bare key/value names.""" + assert format_type(_make_field(dict[str, int32])) == "`map`" + + def test_map_value_list_preserves_list_wrapper(self) -> None: + """A list-valued map keeps its `list<...>` wrapper, not just the element.""" + assert ( + format_type(_make_field(dict[str, list[int]])) + == "`map>`" + ) + + def test_map_value_nested_map_preserves_map_wrapper(self) -> None: + """A map-valued map keeps its inner `map<...>`, not a bare `?`.""" + assert ( + format_type(_make_field(dict[str, dict[str, int]])) + == "`map>`" + ) + + def test_map_value_any_renders_as_any_not_question_mark(self) -> None: + """An `Any`-valued map names the value `Any`, never a bare `?`.""" + assert format_type(_make_field(dict[str, Any])) == "`map`" + + def test_union_valued_map_side_raises(self) -> None: + """A union-valued map side fails loudly rather than rendering a guess.""" + with pytest.raises(NotImplementedError): + _bare_map_side_name(_union_ref([_ModelA, _ModelB])) + + def test_non_semantic_newtype_map_side_uses_registry_name(self) -> None: + """A pass-through NewType resolves to its registry name, not its raw name. + + A NewType whose name equals its base type (here `int`) is not + semantic, so it must render as the registry markdown name (`int64`). + """ + shape = NewTypeShape( + name="int", ref=object(), inner=Primitive(base_type="int", source_type=int) + ) + assert _bare_map_side_name(shape) == "int64" + + def test_map_key_newtype_links_in_field_cell(self) -> None: + """A semantic NewType key links to its page in the field cell.""" + LangTag = NewType("LangTag", str) + ctx = _link_ctx((LangTag, "LangTag", "system/language_tag.md")) + result = format_type(_make_field(dict[LangTag, str]), ctx) + assert "[`LangTag`]" in result + assert "language_tag.md" in result + assert "map<" in result + assert "``" not in result + + def test_map_value_newtype_links_in_field_cell(self) -> None: + """A semantic NewType value links to its page in the field cell.""" + Stripped = NewType("Stripped", str) + ctx = _link_ctx((Stripped, "Stripped", "system/stripped_string.md")) + result = format_type(_make_field(dict[str, Stripped]), ctx) + assert "[`Stripped`]" in result + assert "stripped_string.md" in result + assert "``" not in result + + def test_map_value_model_links_in_field_cell(self) -> None: + """A model-valued map links the value model to its page (bd-ru4n). + + The real pipeline resolves a `dict[K, Model]` value to a `ModelRef` + (not a BaseModel-sourced `Primitive`), so the link path must handle + `ModelRef` map sides, not only `Primitive` ones. + """ + spec = extract_model(_OuterWithModelMap) + field = next(f for f in spec.fields if f.name == "m") + ctx = _link_ctx((_ModelB, "_ModelB", "theme/feature/types/model_b.md")) + result = format_type(field, ctx) + assert "[`_ModelB`]" in result + assert "model_b.md" in result + assert "map<" in result + assert "``" not in result + + def test_map_value_model_list_renders_bare_with_wrapper(self) -> None: + """A `list`-valued map keeps its `list<...>` wrapper, no link. + + The real pipeline resolves the value to `ArrayOf(element=ModelRef)`. + Linking would collapse the wrapper to a bare model link, so the + `depth == 0` guard keeps the model side bare and preserves + `list<...>`. Registering `_ModelB` makes the absent link meaningful. + """ + spec = extract_model(_OuterWithModelListMap) + field = next(f for f in spec.fields if f.name == "m") + ctx = _link_ctx((_ModelB, "_ModelB", "theme/feature/types/model_b.md")) + result = format_type(field, ctx) + assert "[`_ModelB`]" not in result + assert "list<" in result + assert "map>" in result + + def test_map_key_and_value_newtypes_both_link(self) -> None: + """When both sides are semantic NewTypes, both link in the field cell.""" + LangTag = NewType("LangTag", str) + Stripped = NewType("Stripped", str) + ctx = _link_ctx( + (LangTag, "LangTag", "system/language_tag.md"), + (Stripped, "Stripped", "system/stripped_string.md"), + ) + result = format_type(_make_field(dict[LangTag, Stripped]), ctx) + assert "[`LangTag`]" in result + assert "language_tag.md" in result + assert "[`Stripped`]" in result + assert "stripped_string.md" in result + assert "``" not in result + + class TestFormatUnderlyingUnionType: """Tests for union FieldShape in format_underlying_type.""" diff --git a/packages/overture-schema-codegen/tests/test_model_extraction.py b/packages/overture-schema-codegen/tests/test_model_extraction.py index d5791ee61..5ee081459 100644 --- a/packages/overture-schema-codegen/tests/test_model_extraction.py +++ b/packages/overture-schema-codegen/tests/test_model_extraction.py @@ -1,6 +1,13 @@ """Tests for `extract_model`.""" -from overture.schema.codegen.extraction.field import ArrayOf, UnionRef +from typing import Annotated, Optional + +from overture.schema.codegen.extraction.field import ( + ArrayOf, + ModelRef, + Primitive, + UnionRef, +) from overture.schema.codegen.extraction.field_walk import terminal_of from overture.schema.codegen.extraction.length_constraints import ArrayMinLen from overture.schema.codegen.extraction.model_extraction import extract_model @@ -22,6 +29,119 @@ class Container(BaseModel): assert terminal.union.discriminator_field == "dimension" +def test_required_list_with_optional_element_is_required() -> None: + """A required `list[X | None]` field must not inherit element optionality. + + `list[str | None]` is a list whose elements may be None; the field + itself still requires a list to be present. The list branch must + return `False` for field optionality so `FieldSpec.is_required` stays + `True` and `check_required` is generated for the field. + """ + + class M(BaseModel): + tags: list[str | None] + + spec = extract_model(M) + tags_field = next(f for f in spec.fields if f.name == "tags") + + assert tags_field.is_optional is False + assert tags_field.is_required is True + assert isinstance(tags_field.shape, ArrayOf) + assert isinstance(tags_field.shape.element, Primitive) + assert tags_field.shape.element.base_type == "str" + + +def test_required_list_plain_element_is_required() -> None: + """A required `list[str]` field is unaffected by the optionality fix.""" + + class M(BaseModel): + tags: list[str] + + spec = extract_model(M) + tags_field = next(f for f in spec.fields if f.name == "tags") + + assert tags_field.is_optional is False + assert tags_field.is_required is True + assert isinstance(tags_field.shape, ArrayOf) + + +def test_optional_list_with_optional_element_is_optional() -> None: + """A `list[str | None] | None` field is optional (the outer | None).""" + + class M(BaseModel): + tags: list[str | None] | None = None + + spec = extract_model(M) + tags_field = next(f for f in spec.fields if f.name == "tags") + + assert tags_field.is_optional is True + assert tags_field.is_required is False + + +def test_self_referential_list_forward_ref_resolves_to_cycle() -> None: + """A `list["Self"]` forward ref resolves to a cycle-marked `ModelRef`. + + Builtin generics store `list["Node"]`'s element as a bare `str`, + which neither Pydantic nor `typing.get_type_hints` resolves. + `extract_model` must resolve it against the model's namespace so the + self-reference reaches its model terminal and the cycle guard marks + the back-edge -- rather than crashing the type analyzer's terminal + classifier on an unresolved string. + """ + + class Node(BaseModel): + val: Annotated[int, Field(ge=0)] + children: list["Node"] = Field(default_factory=list) + + spec = extract_model(Node) + children = next(f for f in spec.fields if f.name == "children") + + assert isinstance(children.shape, ArrayOf) + element = children.shape.element + assert isinstance(element, ModelRef) + assert element.starts_cycle is True + assert element.model is spec + + +def test_self_referential_optional_resolves_to_cycle() -> None: + """A top-level `Optional["Self"]` resolves to a cycle-marked `ModelRef`. + + Pydantic resolves the string inside `Optional["Node"]` before + `extract_model` runs -- the annotation's args are already + `(, NoneType)`, so no string reaches `_resolve_forward_ref`. + This exercises the cycle guard through a Pydantic-resolved `Optional`, + not the bare-string forward-ref path covered above. + """ + + class Node(BaseModel): + val: int + parent: Optional["Node"] = None + + spec = extract_model(Node) + parent = next(f for f in spec.fields if f.name == "parent") + + assert isinstance(parent.shape, ModelRef) + assert parent.shape.starts_cycle is True + assert parent.shape.model is spec + + +def test_nested_list_forward_ref_resolves_to_cycle() -> None: + """A `list[list["Self"]]` forward ref resolves through both array layers.""" + + class Node(BaseModel): + val: int + grid: list[list["Node"]] = Field(default_factory=list) + + spec = extract_model(Node) + grid = next(f for f in spec.fields if f.name == "grid") + + assert isinstance(grid.shape, ArrayOf) + assert isinstance(grid.shape.element, ArrayOf) + inner = grid.shape.element.element + assert isinstance(inner, ModelRef) + assert inner.starts_cycle is True + + def test_field_metadata_minlen_wrapped_as_array_min_len() -> None: """MinLen in field_info.metadata is wrapped to ArrayMinLen, not left as raw MinLen. diff --git a/packages/overture-schema-codegen/tests/test_model_extractor.py b/packages/overture-schema-codegen/tests/test_model_extractor.py index c033a19cf..534685dac 100644 --- a/packages/overture-schema-codegen/tests/test_model_extractor.py +++ b/packages/overture-schema-codegen/tests/test_model_extractor.py @@ -408,7 +408,7 @@ def test_cycle_detected_and_marked(self) -> None: assert terminal.starts_cycle is True def test_shared_reference_within_one_extraction(self) -> None: - """Two fields referencing the same sub-model share the ModelSpec.""" + """Two fields referencing the same sub-model share the RecordSpec.""" class Shared(BaseModel): value: str @@ -426,7 +426,7 @@ class Container(BaseModel): assert isinstance(first_ref, ModelRef) assert isinstance(second_ref, ModelRef) # Within one extract_model call, the cache ensures the same - # ModelSpec is reused for both references; neither is a cycle. + # RecordSpec is reused for both references; neither is a cycle. assert first_ref.model is second_ref.model assert first_ref.starts_cycle is False assert second_ref.starts_cycle is False diff --git a/packages/overture-schema-codegen/tests/test_pyspark_base_row.py b/packages/overture-schema-codegen/tests/test_pyspark_base_row.py index fadcd94fe..ab15f7509 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_base_row.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_base_row.py @@ -1,42 +1,60 @@ -"""Tests for valid-row generation from FeatureSpecs.""" +"""Tests for valid-row generation from ModelSpecs.""" import uuid +from enum import Enum import pytest +from annotated_types import Gt, Lt from codegen_test_support import ( + FeatureWithDict, FeatureWithRequiredUrl, discover_feature, - feature_spec_for_model, + spec_for_model, ) -from overture.schema.codegen.extraction.field import AnyScalar, LiteralScalar, ModelRef +from overture.schema.codegen.extraction.field import ( + AnyScalar, + ConstraintSource, + LiteralScalar, + ModelRef, + Primitive, +) +from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.specs import ( - FeatureSpec, FieldSpec, + ModelSpec, UnionSpec, ) +from overture.schema.codegen.pyspark.constraint_dispatch import ExpressionDescriptor from overture.schema.codegen.pyspark.test_data.base_row import ( _primitive_default, + _value_from_check_pattern, + _value_from_scalar_constraints, generate_arm_rows, generate_base_row, generate_populated_arm_rows, generate_populated_row, value_for_field, ) -from pydantic import HttpUrl, TypeAdapter +from overture.schema.system.model_constraint import ( + FieldEqCondition, + forbid_if, + require_if, +) +from pydantic import BaseModel, Field, HttpUrl, TypeAdapter @pytest.fixture(scope="module") -def connector_spec() -> FeatureSpec: +def connector_spec() -> ModelSpec: return discover_feature("Connector") @pytest.fixture(scope="module") -def segment_spec() -> FeatureSpec: +def segment_spec() -> ModelSpec: return discover_feature("Segment") @pytest.fixture(scope="module") -def segment_union(segment_spec: FeatureSpec) -> UnionSpec: +def segment_union(segment_spec: ModelSpec) -> UnionSpec: assert isinstance(segment_spec, UnionSpec) return segment_spec @@ -58,34 +76,34 @@ class TestBaseRowUrlFields: """Base rows with URL-typed fields produce Pydantic-valid values.""" def test_required_url_field_passes_validation(self) -> None: - spec = feature_spec_for_model(FeatureWithRequiredUrl) + spec = spec_for_model(FeatureWithRequiredUrl) row = generate_base_row(spec) TypeAdapter(FeatureWithRequiredUrl).validate_python(row) class TestGenerateBaseRow: - def test_passes_pydantic_validation(self, connector_spec: FeatureSpec) -> None: + def test_passes_pydantic_validation(self, connector_spec: ModelSpec) -> None: row = generate_base_row(connector_spec) assert connector_spec.source_type is not None TypeAdapter(connector_spec.source_type).validate_python(row) - def test_required_fields_present(self, connector_spec: FeatureSpec) -> None: + def test_required_fields_present(self, connector_spec: ModelSpec) -> None: row = generate_base_row(connector_spec) required_names = {f.name for f in connector_spec.fields if f.is_required} assert required_names <= set(row.keys()) - def test_optional_fields_absent(self, connector_spec: FeatureSpec) -> None: + def test_optional_fields_absent(self, connector_spec: ModelSpec) -> None: row = generate_base_row(connector_spec) optional_names = {f.name for f in connector_spec.fields if not f.is_required} assert optional_names.isdisjoint(set(row.keys())) - def test_id_is_deterministic_uuid(self, connector_spec: FeatureSpec) -> None: + def test_id_is_deterministic_uuid(self, connector_spec: ModelSpec) -> None: row = generate_base_row(connector_spec) assert "id" in row parsed = uuid.UUID(row["id"]) assert parsed.version == 5 - def test_geometry_is_valid_wkt(self, connector_spec: FeatureSpec) -> None: + def test_geometry_is_valid_wkt(self, connector_spec: ModelSpec) -> None: row = generate_base_row(connector_spec) assert "geometry" in row assert row["geometry"].startswith("POINT") @@ -93,14 +111,14 @@ def test_geometry_is_valid_wkt(self, connector_spec: FeatureSpec) -> None: class TestGenerateArmRows: def test_returns_dict_per_arm( - self, segment_spec: FeatureSpec, segment_union: UnionSpec + self, segment_spec: ModelSpec, segment_union: UnionSpec ) -> None: rows = generate_arm_rows(segment_spec) assert segment_union.discriminator_mapping is not None assert set(rows.keys()) == set(segment_union.discriminator_mapping.keys()) def test_each_row_passes_validation( - self, segment_spec: FeatureSpec, segment_union: UnionSpec + self, segment_spec: ModelSpec, segment_union: UnionSpec ) -> None: rows = generate_arm_rows(segment_spec) adapter: TypeAdapter[object] = TypeAdapter(segment_union.source_annotation) @@ -108,7 +126,7 @@ def test_each_row_passes_validation( adapter.validate_python(row) def test_discriminator_field_set( - self, segment_spec: FeatureSpec, segment_union: UnionSpec + self, segment_spec: ModelSpec, segment_union: UnionSpec ) -> None: rows = generate_arm_rows(segment_spec) assert segment_union.discriminator_field is not None @@ -116,7 +134,7 @@ def test_discriminator_field_set( assert row[segment_union.discriminator_field] == arm_val def test_arm_specific_required_fields_present( - self, segment_spec: FeatureSpec + self, segment_spec: ModelSpec ) -> None: """Road arm requires 'class' field; water arm does not.""" rows = generate_arm_rows(segment_spec) @@ -128,7 +146,7 @@ class TestPopulateOptionalFlag: """populate_optional flag controls recursion depth.""" def test_value_for_field_default_skips_optional_children( - self, connector_spec: FeatureSpec + self, connector_spec: ModelSpec ) -> None: """Default (`populate_optional=False`) yields sparse sub-models.""" field = next(f for f in connector_spec.fields if f.name == "sources") @@ -141,7 +159,7 @@ def test_value_for_field_default_skips_optional_children( assert not (optional_names & set(elem.keys())) def test_value_for_field_populate_includes_optional_children( - self, connector_spec: FeatureSpec + self, connector_spec: ModelSpec ) -> None: """`populate_optional=True` yields sub-models that include optional fields.""" field = next(f for f in connector_spec.fields if f.name == "sources") @@ -166,27 +184,27 @@ def _list_of_model(shape: object) -> ModelRef: class TestGeneratePopulatedRow: - def test_passes_pydantic_validation(self, connector_spec: FeatureSpec) -> None: + def test_passes_pydantic_validation(self, connector_spec: ModelSpec) -> None: row = generate_populated_row(connector_spec) assert connector_spec.source_type is not None TypeAdapter(connector_spec.source_type).validate_python(row) - def test_required_fields_present(self, connector_spec: FeatureSpec) -> None: + def test_required_fields_present(self, connector_spec: ModelSpec) -> None: row = generate_populated_row(connector_spec) required_names = {f.name for f in connector_spec.fields if f.is_required} assert required_names <= set(row.keys()) - def test_optional_fields_present(self, connector_spec: FeatureSpec) -> None: + def test_optional_fields_present(self, connector_spec: ModelSpec) -> None: row = generate_populated_row(connector_spec) optional_names = {f.name for f in connector_spec.fields if not f.is_required} assert optional_names <= set(row.keys()) - def test_id_matches_sparse_row(self, connector_spec: FeatureSpec) -> None: + def test_id_matches_sparse_row(self, connector_spec: ModelSpec) -> None: sparse = generate_base_row(connector_spec) populated = generate_populated_row(connector_spec) assert populated["id"] == sparse["id"] - def test_nested_structs_populated(self, connector_spec: FeatureSpec) -> None: + def test_nested_structs_populated(self, connector_spec: ModelSpec) -> None: """Optional struct fields contain populated sub-dicts, not empty.""" row = generate_populated_row(connector_spec) assert "sources" in row @@ -202,14 +220,14 @@ def test_nested_structs_populated(self, connector_spec: FeatureSpec) -> None: class TestGeneratePopulatedArmRows: def test_returns_dict_per_arm( - self, segment_spec: FeatureSpec, segment_union: UnionSpec + self, segment_spec: ModelSpec, segment_union: UnionSpec ) -> None: rows = generate_populated_arm_rows(segment_spec) assert segment_union.discriminator_mapping is not None assert set(rows.keys()) == set(segment_union.discriminator_mapping.keys()) def test_each_row_passes_validation( - self, segment_spec: FeatureSpec, segment_union: UnionSpec + self, segment_spec: ModelSpec, segment_union: UnionSpec ) -> None: rows = generate_populated_arm_rows(segment_spec) adapter: TypeAdapter[object] = TypeAdapter(segment_union.source_annotation) @@ -217,14 +235,14 @@ def test_each_row_passes_validation( adapter.validate_python(row) def test_discriminator_field_set( - self, segment_spec: FeatureSpec, segment_union: UnionSpec + self, segment_spec: ModelSpec, segment_union: UnionSpec ) -> None: rows = generate_populated_arm_rows(segment_spec) assert segment_union.discriminator_field is not None for arm_val, row in rows.items(): assert row[segment_union.discriminator_field] == arm_val - def test_optional_fields_present(self, segment_spec: FeatureSpec) -> None: + def test_optional_fields_present(self, segment_spec: ModelSpec) -> None: """Populated arm rows include optional fields.""" rows = generate_populated_arm_rows(segment_spec) # Road arm has optional speed_limits @@ -232,13 +250,108 @@ def test_optional_fields_present(self, segment_spec: FeatureSpec) -> None: assert "speed_limits" in road_row +class TestMapFieldPopulation: + """MapOf fields are populated with a constraint-valid entry, not `{}`. + + An empty map satisfies Pydantic but leaves nothing for a conformance + scenario to corrupt, so the generated key/value checks would never + fire. The entry's key and value must satisfy their own constraints + (`dict[LanguageTag, StrippedString]` -> key `"en"`, value `"clean"`). + """ + + def test_required_map_field_is_populated(self) -> None: + spec = spec_for_model(FeatureWithDict) + row = generate_base_row(spec) + # metadata: dict[str, int] is required. + assert row["metadata"], "required map field generated as empty dict" + ((k, v),) = row["metadata"].items() + assert isinstance(k, str) + assert isinstance(v, int) + + def test_constrained_map_entry_is_valid(self) -> None: + spec = spec_for_model(FeatureWithDict) + row = generate_populated_row(spec) + assert row["names"], "constrained map field generated as empty dict" + ((key, value),) = row["names"].items() + assert key == "en" # LanguageTagConstraint.valid + assert value == "clean" # StrippedConstraint.valid + + def test_populated_row_passes_pydantic(self) -> None: + spec = spec_for_model(FeatureWithDict) + row = generate_populated_row(spec) + TypeAdapter(FeatureWithDict).validate_python(row) + + def test_any_valued_map_generates_empty(self) -> None: + # `dict[str, Any]` (e.g. Infrastructure.source_tags) has no value + # constraint -- hence no value check -- and `Any` has no value + # strategy, so the map stays empty rather than crashing. + from typing import Any + + from overture.schema.codegen.extraction.model_extraction import extract_model + from pydantic import BaseModel + + class TagsModel(BaseModel): + source_tags: dict[str, Any] | None = None + + spec = extract_model(TagsModel) + row = generate_populated_row(spec) + assert row.get("source_tags") == {} + + +class TestRawPatternFailsLoud: + """An uncurated raw `Field(pattern=)` fails loud during base-row generation. + + Symmetric with `invalid_value`: both sides point at the missing + `PATTERN_VALUES` entry. Generating a valid value for a pattern with no + curated entry raises an actionable error that names the gap, rather than + yielding a value that fails the pattern and surfaces downstream as a + misleading "row should be valid" Pydantic error. + """ + + _DUMMY_SCALAR = Primitive(base_type="str") + _DUMMY_CS = ConstraintSource(source_ref=None, source_name=None, constraint=object()) + + def test_curated_pattern_returns_valid(self) -> None: + # Sources.license_priority key pattern, anchor-normalized. + desc = ExpressionDescriptor( + function="check_pattern", args=(r"^[A-Za-z0-9._+\-]+\z",) + ) + assert ( + _value_from_check_pattern(desc, self._DUMMY_SCALAR, self._DUMMY_CS) + == "ODbL-1.0" + ) + + def test_uncurated_pattern_raises(self) -> None: + desc = ExpressionDescriptor(function="check_pattern", args=(r"^xyz\z",)) + with pytest.raises(ValueError, match="check_pattern"): + _value_from_check_pattern(desc, self._DUMMY_SCALAR, self._DUMMY_CS) + + def test_uncurated_pattern_field_raises_during_generation(self) -> None: + # End to end: a scalar field carrying an uncurated raw pattern raises + # the actionable error at base-row generation, not as a downstream + # Pydantic "row should be valid" failure. + (meta,) = Field(pattern=r"^[0-9]{4}$").metadata + field = FieldSpec( + name="code", + shape=Primitive( + base_type="str", + constraints=( + ConstraintSource( + source_ref=None, source_name=None, constraint=meta + ), + ), + ), + ) + with pytest.raises(ValueError, match="check_pattern"): + value_for_field(field, "Foo") + + class TestValueForShapeScalarVariants: """_value_for_shape handles the Scalar variants it can reach.""" def test_any_scalar_raises(self) -> None: - # `AnyScalar` only appears as a `MapOf` value type in feature - # models; `_value_for_shape` returns `{}` for `MapOf` without - # descending, so reaching `AnyScalar` directly is a bug. + # No schema declares a `dict[K, Any]` value, so `AnyScalar` has no + # value strategy; reaching it raises rather than guessing. field = FieldSpec(name="x", shape=AnyScalar()) with pytest.raises(TypeError, match="AnyScalar reached base-row generation"): value_for_field(field, "Foo") @@ -317,3 +430,98 @@ class AllRequiredModel(BaseModel): row = generate_base_row(spec) assert "req_a" in row and "req_b" in row assert "opt_a" not in row + + +class _ModeColor(str, Enum): + RED = "red" + BLUE = "blue" + + +@require_if(["extra"], ~FieldEqCondition("mode", _ModeColor.BLUE)) +class _ModeModelRequireIf(BaseModel): + mode: _ModeColor = _ModeColor.BLUE + extra: str | None = None + + +@forbid_if(["extra"], ~FieldEqCondition("mode", _ModeColor.BLUE)) +class _ModeModelForbidIf(BaseModel): + mode: _ModeColor = _ModeColor.BLUE + extra: str | None = None + + +# Default mode=RED means Not(mode == BLUE) is True from the start, so +# generate_base_row must fill 'extra' without any manual row mutation. +@require_if(["extra"], ~FieldEqCondition("mode", _ModeColor.BLUE)) +class _ModeModelRequireIfTriggered(BaseModel): + mode: _ModeColor = _ModeColor.RED + extra: str | None = None + + +class TestNotConditionBaseRow: + """Base-row generation handles Not(FieldEqCondition) in require_if/forbid_if.""" + + def test_require_if_not_condition_fills_field(self) -> None: + """generate_base_row fills the require_if target when Not-condition holds. + + _ModeModelRequireIfTriggered defaults mode=RED, so Not(mode == BLUE) is + True from the start. generate_base_row must fill 'extra' end-to-end + without any manual row mutation. + """ + spec = extract_model(_ModeModelRequireIfTriggered) + row = generate_base_row(spec) + # 'mode' has a default (RED) and may be omitted from the sparse row; + # what matters is that the Not-condition was evaluated and 'extra' filled. + assert "extra" in row + assert row["extra"] is not None + TypeAdapter(_ModeModelRequireIfTriggered).validate_python(row) + + def test_forbid_if_not_condition_removes_field(self) -> None: + """forbid_if triggered by Not(FieldEqCondition) removes the forbidden field.""" + from overture.schema.codegen.pyspark.test_data.base_row import ( + _satisfy_model_constraints, + ) + + spec = extract_model(_ModeModelForbidIf) + row: dict[str, object] = { + "mode": _ModeColor.RED.value, + "extra": "should be removed", + } + _satisfy_model_constraints(row, spec) + # With mode='red', Not(mode == BLUE) is True -> extra must be absent + assert "extra" not in row + + def test_unknown_condition_type_raises(self) -> None: + """_row_satisfies_condition must raise for unknown condition kinds.""" + from overture.schema.codegen.pyspark.test_data.base_row import ( + _row_satisfies_condition, + ) + + class _Unknown: + pass + + with pytest.raises((TypeError, NotImplementedError)): + _row_satisfies_condition({}, _Unknown()) + + def test_not_field_eq_condition_base_row_passes_pydantic(self) -> None: + """A model with Not(FieldEqCondition) conditions produces a valid base row.""" + spec = extract_model(_ModeModelRequireIf) + row = generate_base_row(spec) + TypeAdapter(_ModeModelRequireIf).validate_python(row) + + +class TestMultiBoundScalarConstraints: + """_value_from_scalar_constraints merges multiple check_bounds before calling valid_bound.""" + + def test_gt_and_lt_float_tight_interval_returns_interior_value(self) -> None: + """Synthesized value satisfies both Gt(0.0) and Lt(1.0) simultaneously.""" + # float bounds: Gt(0.0)+Lt(1.0): gt+1 = 1.0 violates lt=1.0 (boundary) + scalar = Primitive( + base_type="float64", + constraints=( + ConstraintSource(source_ref=None, source_name=None, constraint=Gt(0.0)), + ConstraintSource(source_ref=None, source_name=None, constraint=Lt(1.0)), + ), + ) + result = _value_from_scalar_constraints(scalar) + assert isinstance(result, float) + assert 0.0 < result < 1.0 diff --git a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py index 983b4d348..2a6d3140e 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py @@ -7,19 +7,24 @@ import pytest from annotated_types import Ge, Le, MinLen from codegen_test_support import ( + FeatureWithDict, LiteralSubtypeModel, RadioModel, RequireAnyModel, TripleNestedArrayModel, discover_feature, - feature_spec_for_model, + spec_for_model, union_spec_for, ) -from overture.schema.codegen.extraction.field import ConstraintSource, Primitive +from overture.schema.codegen.extraction.field import ( + ConstraintSource, + Primitive, + UnionRef, +) from overture.schema.codegen.extraction.specs import ( - FeatureSpec, FieldSpec, ModelSpec, + RecordSpec, ) from overture.schema.codegen.extraction.union_extraction import extract_union from overture.schema.codegen.pyspark._render_common import column_level_suffix @@ -44,6 +49,8 @@ ArrayPath, ArraySegment, FieldPath, + MapPath, + MapProjection, ScalarPath, parse, ) @@ -80,7 +87,7 @@ def _element_guard(check: Check) -> ElementGuard | None: def _checks_for( model_cls: type[BaseModel], ) -> tuple[list[Check], list[ModelCheck]]: - return build_checks(feature_spec_for_model(model_cls)) + return build_checks(spec_for_model(model_cls)) def _condition_of(check: ModelCheck) -> object: @@ -1125,7 +1132,60 @@ def test_struct_nested_union_constraint_raises(self) -> None: with pytest.raises( NotImplementedError, match="Model constraint on struct-nested" ): - build_checks(feature_spec_for_model(_OuterWithStructNestedUnion)) + build_checks(spec_for_model(_OuterWithStructNestedUnion)) + + +class TestStructNestedUnionWithVariantFields: + """Struct-nested union producing gated field checks is unsupported. + + A `ColumnGuard` carries a bare discriminator name that renders as + `F.col("")` -- a top-level column access that is wrong + when the union is reached through a plain struct field. Raising loudly + is safer than emitting a mis-gated check. + + Distinct from `TestStructNestedUnionWithConstraint`: that class covers + model/exclusivity checks; this class covers variant-gated field checks + (the silent-failure path the previous guard missed). + + The trigger spec is built manually (not via `spec_for_model`) because + Pydantic strips the `Annotated[Union[...], FieldInfo(discriminator=...)]` + wrapper from `model_fields`, causing the inline extraction path to lose + `discriminator_mapping`. Constructing `UnionRef(union=...)` directly + with a fully-extracted union spec (via `union_spec_for`) replicates the + state that a future extraction path that preserves discriminator metadata + would produce. + """ + + @pytest.fixture(scope="class") + def discriminated_union_ref_spec(self) -> RecordSpec: + """A `RecordSpec` whose `nested` field holds a `UnionRef` with a full discriminator.""" + union_spec = union_spec_for("Synthetic", _SyntheticUnionFixtures.SyntheticUnion) + field = FieldSpec( + name="nested", + shape=UnionRef(union=union_spec), + description=None, + is_required=True, + is_optional=False, + ) + return RecordSpec(name="Outer", description=None, fields=[field]) + + def test_struct_nested_union_variant_fields_raises( + self, discriminated_union_ref_spec: RecordSpec + ) -> None: + with pytest.raises(NotImplementedError, match="ColumnGuard"): + build_checks(discriminated_union_ref_spec) + + def test_row_root_union_with_variant_fields_succeeds(self) -> None: + """Row-root union (empty `ScalarPath`) must still build checks without raising.""" + field_checks, _ = _union_checks( + "Synthetic", _SyntheticUnionFixtures.SyntheticUnion + ) + assert any(n.guards for n in field_checks) + + def test_array_reached_union_with_variant_fields_succeeds(self) -> None: + """Array-reached union (`ArrayPath` prefix) must still build checks without raising.""" + field_checks, _ = _checks_for(_ListUnionContainer) + assert any(n.guards for n in field_checks) class _NestedInnerBase(BaseModel): @@ -1412,12 +1472,12 @@ def test_target_is_nested_inner_array(self) -> None: class TestSegmentUnionChecks: @pytest.fixture(scope="class") - def segment_spec(self) -> FeatureSpec: + def segment_spec(self) -> ModelSpec: return discover_feature("Segment") @pytest.fixture(scope="class") def segment_checks( - self, segment_spec: FeatureSpec + self, segment_spec: ModelSpec ) -> tuple[list[Check], list[ModelCheck]]: return build_checks(segment_spec) @@ -1630,7 +1690,7 @@ class TestUnionInsideArray: @pytest.fixture(scope="class") def results(self) -> tuple[list[Check], list[ModelCheck]]: - return build_checks(feature_spec_for_model(_Wrapper)) + return build_checks(spec_for_model(_Wrapper)) @pytest.fixture(scope="class") def field_nodes(self, results: tuple[list[Check], list[ModelCheck]]) -> list[Check]: @@ -1862,7 +1922,7 @@ def nodes(self) -> list[Check]: field = FieldSpec( name="version", shape=shape, description=None, is_required=True ) - spec = ModelSpec(name="Test", description=None, fields=[field]) + spec = RecordSpec(name="Test", description=None, fields=[field]) nodes, _ = build_checks(spec) return nodes @@ -1958,3 +2018,202 @@ def test_segment_speed_limits_when_has_gate(self) -> None: assert len(when_nodes) >= 1 for node in when_nodes: assert node.gate == _path("speed_limits[].when") + + +class TestMapKeyValueConstraints: + """check_builder descends into MapOf key/value shapes. + + `FeatureWithDict.names` is `dict[LanguageTag, StrippedString]`: the key + carries `LanguageTagConstraint` (dispatches to check_pattern) and the + value carries `StrippedConstraint` (dispatches to check_stripped). Both + constraints are validated when the same NewTypes are reached through a + struct field -- generated transportation/segment.py emits check_pattern + for `names.rules[].language` -- so reaching them through a map must not + silently drop validation. + """ + + def _map_check(self, projection: MapProjection, function: str) -> Check: + field_checks, _ = _checks_for(FeatureWithDict) + matches = [ + c + for c in field_checks + if isinstance(c.target, MapPath) + and c.target.projection is projection + and any(d.function == function for d in c.descriptors) + ] + assert len(matches) >= 1, ( + f"no MapPath {projection} check with {function}; " + f"targets={[str(c.target) for c in field_checks]}" + ) + return matches[0] + + def test_map_key_pattern_check_targets_names_key(self) -> None: + check = self._map_check(MapProjection.KEY, "check_pattern") + assert str(check.target) == "names{key}" + + def test_map_value_stripped_check_targets_names_value(self) -> None: + check = self._map_check(MapProjection.VALUE, "check_stripped") + assert str(check.target) == "names{value}" + + def test_map_field_with_unconstrained_value_emits_no_value_check(self) -> None: + # metadata: dict[str, int] -- neither key nor value carries a + # constraint, so no MapPath checks are produced for it. + field_checks, _ = _checks_for(FeatureWithDict) + metadata_maps = [ + c + for c in field_checks + if isinstance(c.target, MapPath) and c.target.map_column == "metadata" + ] + assert metadata_maps == [] + + +class _MapWithConstrainedListValueModel(BaseModel): + """`dict[K, list[constrained-scalar]]` -- a map value carrying an array layer. + + `terminal_scalar` unwraps the `ArrayOf` to the inner scalar, so the + naive scalar guard lets this through; the value scalar's constraint + has no `MapPath` + `ArraySegment` geometry to land on. + """ + + items: dict[str, list[Annotated[str, MinLen(1)]]] + + +class _MapWithUnconstrainedListValueModel(BaseModel): + """`dict[K, list[scalar]]` with no key/value constraint -- nothing to emit.""" + + items: dict[str, list[int]] + + +class _ListOfConstrainedMapModel(BaseModel): + """`list[dict[K, constrained-scalar]]` -- a map reached through an array.""" + + items: list[dict[str, Annotated[str, MinLen(1)]]] + + +class _ListOfUnconstrainedMapModel(BaseModel): + """`list[dict[K, scalar]]` with no key/value constraint -- nothing to emit.""" + + items: list[dict[str, str]] + + +class TestMapProjectionUnsupportedShapes: + """`_map_projection_checks` is bounded to a scalar terminal reached struct-only. + + Three shapes fall outside that bound -- a map value/key with an array + layer (`dict[K, list[V]]`), and a map reached through an array + (`list[dict[K, V]]`). For each, a key/value constraint raises to keep + the dropped check loud, and an unconstrained one yields no checks (a + `MapPath` cannot locate the value, but there is nothing to validate). + """ + + def test_constrained_list_value_raises(self) -> None: + with pytest.raises(NotImplementedError, match="map value"): + _checks_for(_MapWithConstrainedListValueModel) + + def test_unconstrained_list_value_emits_no_projection_check(self) -> None: + field_checks, _ = _checks_for(_MapWithUnconstrainedListValueModel) + assert not any(isinstance(c.target, MapPath) for c in field_checks) + + def test_constrained_map_in_array_raises(self) -> None: + with pytest.raises(NotImplementedError, match="map value"): + _checks_for(_ListOfConstrainedMapModel) + + def test_unconstrained_map_in_array_emits_no_projection_check(self) -> None: + field_checks, _ = _checks_for(_ListOfUnconstrainedMapModel) + assert not any(isinstance(c.target, MapPath) for c in field_checks) + + +class _InnerLabel(BaseModel): + label: Annotated[str, MinLen(1)] + + +class _MapOfModel(BaseModel): + """A `dict[K, Model]` value model with a constrained scalar field. + + The value model's `label` field is validated on a `MapPath` leaf + (`items{value}.label`), the map analogue of a `list[Model]` element. + """ + + items: dict[str, _InnerLabel] + + +@require_any_of("foo", "bar") +class _AnyOfSub(BaseModel): + foo: int | None = None + bar: str | None = None + + +class _ModelConstraintAsMapValue(BaseModel): + """A `dict[K, Model]` value model carrying a model-level constraint. + + The `require_any_of` constraint is validated on the map value itself + (`subs{value}`). + """ + + subs: dict[str, _AnyOfSub] + + +class TestMapValueModelDescent: + """check_builder descends into a `dict[K, Model]` value model. + + A `ModelRef`/`UnionRef` map value is walked for its field and + model-level constraints on a `MapPath` target, the map analogue of a + `list[Model]` element reached through the `ModelRef` walker arm. + """ + + def test_value_field_constraint_targets_map_value_leaf(self) -> None: + field_checks, _ = _checks_for(_MapOfModel) + matches = [ + c + for c in field_checks + if isinstance(c.target, MapPath) + and str(c.target) == "items{value}.label" + and any(d.function == "check_string_min_length" for d in c.descriptors) + ] + assert len(matches) == 1, [str(c.target) for c in field_checks] + + def test_value_required_field_emits_required_descriptor(self) -> None: + field_checks, _ = _checks_for(_MapOfModel) + leaf_checks = [ + c + for c in field_checks + if isinstance(c.target, MapPath) and str(c.target) == "items{value}.label" + ] + assert leaf_checks + functions = {d.function for c in leaf_checks for d in c.descriptors} + assert "check_required" in functions + + def test_value_model_constraint_targets_map_value(self) -> None: + _, model_checks = _checks_for(_ModelConstraintAsMapValue) + matches = _filter_nodes(model_checks, "check_require_any_of", ("foo", "bar")) + assert len(matches) == 1 + assert isinstance(matches[0].target, MapPath) + assert str(matches[0].target) == "subs{value}" + + +class _MapValueWithList(BaseModel): + tags: list[Annotated[str, MinLen(1)]] + + +class _ListInsideMapValueModel(BaseModel): + """A `dict[K, Model]` value model with a constrained list field. + + A list nested inside a map element has no representable `MapPath`, so + the descent raises rather than emitting an unanchored target. + """ + + items: dict[str, _MapValueWithList] + + +class TestMapValueModelDescentBoundary: + """Descent raises where a `MapPath` cannot represent the shape. + + A map value model is descended into for scalar fields and model + constraints; a container (list or map) nested inside it has no + `MapPath` geometry, so the walker raises rather than emitting an + unvalidated target. + """ + + def test_list_inside_map_value_model_raises(self) -> None: + with pytest.raises(NotImplementedError, match="list nested inside a map"): + _checks_for(_ListInsideMapValueModel) diff --git a/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py b/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py index 2cfd6a676..bdee2511f 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py @@ -1,5 +1,7 @@ """Tests for pyspark constraint dispatch.""" +import re + import pytest from annotated_types import Ge, Gt, Interval, Le, Lt from overture.schema.codegen.extraction.field import Primitive @@ -21,7 +23,9 @@ dispatch_constraint, dispatch_model_constraint, dispatch_newtype, + forbid_if_field_shapes, model_constraint_function, + normalize_anchor, ) from overture.schema.system.field_constraint.collection import UniqueItemsConstraint from overture.schema.system.field_constraint.string import ( @@ -43,7 +47,7 @@ ) from overture.schema.system.primitive import GeometryType, GeometryTypeConstraint from overture.schema.system.ref import Identified, Reference, Relationship -from pydantic import Strict +from pydantic import Field, Strict class _Stub(Identified): @@ -121,11 +125,13 @@ def test_stripped(self) -> None: desc = dispatch_constraint(StrippedConstraint()) assert desc is not None assert desc.function == "check_stripped" + assert desc.constraint_type is StrippedConstraint def test_json_pointer(self) -> None: desc = dispatch_constraint(JsonPointerConstraint()) assert desc is not None assert desc.function == "check_json_pointer" + assert desc.constraint_type is JsonPointerConstraint def test_pattern_constraint_base(self) -> None: c = PatternConstraint(r"^[A-Z]{2}$", "test error") @@ -153,6 +159,67 @@ def test_snake_case_dispatches_as_pattern(self) -> None: assert desc.check_name == "snake_case" +class TestRawPydanticPatternDispatch: + """Raw pydantic `Field(pattern=)` metadata (`_PydanticGeneralMetadata`). + + Distinguished from the schema's `PatternConstraint` by being a + `PydanticMetadata` marker. Carries the pattern as a `str` + (`Field(pattern="...")`) or a compiled `re.Pattern` + (`Field(pattern=re.compile(...))` -- the only flagged-pattern carrier). + Reaches dispatch via map keys today (e.g. `Sources.license_priority`). + """ + + def test_pydantic_pattern_metadata_dispatches_as_pattern(self) -> None: + (meta,) = Field(pattern=r"^[a-z]+$").metadata + desc = dispatch_constraint(meta) + assert desc is not None + assert desc.function == "check_pattern" + assert desc.args == (r"^[a-z]+\z",) # anchor-normalized + + def test_compiled_pattern_metadata_dispatches_as_pattern(self) -> None: + # A compiled re.Pattern is the only carrier for a flagged pattern, so + # `Field(pattern=re.compile(...))` must dispatch like a bare string. + (meta,) = Field(pattern=re.compile(r"^[a-z]+$")).metadata + desc = dispatch_constraint(meta) + assert desc is not None + assert desc.function == "check_pattern" + assert desc.args == (r"^[a-z]+\z",) # anchor-normalized + + def test_compiled_pattern_ignorecase_prepends_inline_flag(self) -> None: + # re.IGNORECASE has no string-pattern carrier; it maps to Spark's + # inline (?i) flag (the same idiom check_url_format uses). + (meta,) = Field(pattern=re.compile(r"^[a-z]+$", re.I)).metadata + desc = dispatch_constraint(meta) + assert desc is not None + assert desc.function == "check_pattern" + assert desc.args == (r"(?i)^[a-z]+\z",) + + def test_compiled_pattern_unsupported_flag_raises_named(self) -> None: + # An untranslatable flag must raise a clean, flag-naming error rather + # than the opaque "Unhandled constraint type" TypeError. + (meta,) = Field(pattern=re.compile(r"^[a-z]+$", re.M)).metadata + with pytest.raises(NotImplementedError, match="MULTILINE"): + dispatch_constraint(meta) + + def test_plain_object_with_str_pattern_still_raises(self) -> None: + # A non-PydanticMetadata object that merely exposes a string + # `.pattern` must not be mistaken for raw pattern metadata: the + # fallback contract stays "raise on unhandled", so an unrelated + # future constraint can't be silently turned into a check_pattern. + class _Imposter: + pattern = r"^[a-z]+$" + + with pytest.raises(TypeError, match="Unhandled constraint type"): + dispatch_constraint(_Imposter()) + + def test_non_pattern_object_still_raises(self) -> None: + class _Unknown: + pass + + with pytest.raises(TypeError, match="Unhandled constraint type"): + dispatch_constraint(_Unknown()) + + class TestPatternConstraintDispatch: def test_pattern_constraint_label_fallback_to_docstring(self) -> None: """PatternConstraint with no description falls back to docstring, period stripped.""" @@ -185,6 +252,19 @@ def test_anchor_normalization_replaces_only_trailing_dollar(self) -> None: # The trailing $ is replaced; the \$ inside the class is preserved assert pattern == r"^[\$]+\z" + def test_ignorecase_flag_prepends_inline_flag(self) -> None: + """A case-insensitive PatternConstraint maps re.I to Spark's (?i).""" + c = PatternConstraint(r"^[a-z]+$", "error: {value}", flags=re.I) + desc = dispatch_constraint(c) + assert desc is not None + assert desc.args == (r"(?i)^[a-z]+\z",) + + def test_unsupported_flag_raises_named(self) -> None: + """An untranslatable flag raises a clean, flag-naming error.""" + c = PatternConstraint(r"^[a-z]+$", "error: {value}", flags=re.M) + with pytest.raises(NotImplementedError, match="MULTILINE"): + dispatch_constraint(c) + class TestStructuralConstraintDispatch: def test_unique_items(self) -> None: @@ -260,6 +340,19 @@ def test_unknown_newtype_returns_none(self) -> None: assert desc is None +class TestPatternLabelAcronymHandling: + def test_acronym_run_in_name_splits_correctly(self) -> None: + """PatternConstraint subclass with an acronym run labels with spaces.""" + + class JSONPathConstraint(PatternConstraint): + def __init__(self) -> None: + super().__init__(r"^\$", "Invalid JSON path: {value}") + + desc = dispatch_constraint(JSONPathConstraint()) + assert desc is not None + assert desc.label == "json path" + + class TestUnknownConstraintFails: def test_unknown_constraint_raises(self) -> None: with pytest.raises(TypeError, match="Unhandled constraint"): @@ -383,3 +476,65 @@ def test_no_extra_fields_skipped(self) -> None: def test_unknown_model_constraint_raises(self) -> None: with pytest.raises(TypeError, match="Unhandled model constraint"): dispatch_model_constraint(object(), []) + + +class TestForbidIfFieldShapes: + """Non-string scalar shapes must appear in field_shapes.""" + + @pytest.mark.parametrize( + ("base_type", "field_name"), + [ + ("int32", "count"), + ("bool", "flag"), + ("float64", "score"), + ], + ) + def test_non_string_scalar_included_in_field_shapes( + self, base_type: str, field_name: str + ) -> None: + shape = Primitive(base_type=base_type) + result = forbid_if_field_shapes((field_name,), {field_name: shape}) + assert len(result) == 1 + assert result[0][0] == field_name + + def test_string_scalar_excluded_from_field_shapes(self) -> None: + """String scalars remain excluded; renderer defaults to '' fill.""" + shape = Primitive(base_type="str") + result = forbid_if_field_shapes(("label",), {"label": shape}) + assert result == () + + def test_dispatch_model_constraint_forbid_if_int_has_field_shapes(self) -> None: + condition = FieldEqCondition(field_name="subtype", value="road") + c = ForbidIfConstraint(field_names=("version",), condition=condition) + fields = [ + FieldSpec(name="version", shape=Primitive(base_type="int32")), + ] + (desc,) = dispatch_model_constraint(c, fields) + assert isinstance(desc, ForbidIf) + assert len(desc.field_shapes) == 1 + assert desc.field_shapes[0][0] == "version" + + +class TestNormalizeAnchorParity: + """normalize_anchor uses backslash-parity to distinguish anchor from escaped $.""" + + def test_bare_dollar_converted(self) -> None: + assert normalize_anchor(r"foo$") == r"foo\z" + + def test_escaped_dollar_left_unchanged(self) -> None: + """Single backslash before $ -- literal dollar, must not convert.""" + assert normalize_anchor(r"foo\$") == r"foo\$" + + def test_escaped_backslash_then_anchor_converted(self) -> None: + """Two backslashes before $ -- even parity, $ is a real anchor, must convert.""" + # "foo\\\\$" is the 6-char string: f o o \ \ $ + # Even number of backslashes (2) before $: the $ is an unescaped anchor. + result = normalize_anchor("foo\\\\$") + assert result.endswith(r"\z"), f"Expected \\\\z suffix, got {result!r}" + assert not result.endswith("$") + + def test_triple_backslash_dollar_left_unchanged(self) -> None: + r"""Three backslashes before $ -- odd parity, $ is a literal dollar.""" + # "foo\\\\\\$" -- three backslashes + $, odd count: escaped literal $ + result = normalize_anchor("foo\\\\\\$") + assert result.endswith("$"), f"Expected trailing $, got {result!r}" diff --git a/packages/overture-schema-codegen/tests/test_pyspark_constraint_values.py b/packages/overture-schema-codegen/tests/test_pyspark_constraint_values.py new file mode 100644 index 000000000..419e2866e --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_constraint_values.py @@ -0,0 +1,231 @@ +"""Tests for the paired constraint value table.""" + +import pytest +from overture.schema.codegen.pyspark.constraint_dispatch import ExpressionDescriptor +from overture.schema.codegen.pyspark.test_data.constraint_values import ( + CONSTRAINT_VALUES, + invalid_bound, + valid_bound, +) +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + HexColorConstraint, + JsonPointerConstraint, + LanguageTagConstraint, + NoWhitespaceConstraint, + PatternConstraint, + PhoneNumberConstraint, + RegionCodeConstraint, + SnakeCaseConstraint, + StrippedConstraint, + WikidataIdConstraint, +) + + +class TestConstraintValuesCompleteness: + """CONSTRAINT_VALUES covers the expected set of constraint types.""" + + def test_expected_constraint_types_present(self) -> None: + expected = { + CountryCodeAlpha2Constraint, + HexColorConstraint, + JsonPointerConstraint, + LanguageTagConstraint, + NoWhitespaceConstraint, + PhoneNumberConstraint, + RegionCodeConstraint, + SnakeCaseConstraint, + StrippedConstraint, + WikidataIdConstraint, + } + assert expected <= set(CONSTRAINT_VALUES.keys()) + + +def _pattern_entries() -> list[type]: + # All CONSTRAINT_VALUES keys that are PatternConstraint subclasses, minus those + # with dedicated behavioural tests. + # StrippedConstraint IS a PatternConstraint subclass but uses \Z (not portable + # as a regex literal), so its contract is verified in TestStrippedConstraintValues. + # JsonPointerConstraint is NOT a PatternConstraint subclass — it has no .pattern + # attribute — and is verified in TestJsonPointerConstraintValues. + _BEHAVIOURAL_EXCLUSIONS = {StrippedConstraint, JsonPointerConstraint} + return sorted( + [ + ct + for ct in CONSTRAINT_VALUES + if issubclass(ct, PatternConstraint) and ct not in _BEHAVIOURAL_EXCLUSIONS + ], + key=lambda ct: ct.__name__, + ) + + +class TestPatternConstraintValues: + """For each PatternConstraint subclass, the valid value matches and invalid does not.""" + + _PATTERN_ENTRIES = _pattern_entries() + + @pytest.mark.parametrize( + "constraint_type", _PATTERN_ENTRIES, ids=lambda ct: ct.__name__ + ) + def test_valid_matches_pattern(self, constraint_type: type) -> None: + constraint = constraint_type() + assert isinstance(constraint, PatternConstraint) + cv = CONSTRAINT_VALUES[constraint_type] + assert isinstance(cv.valid, str) + assert constraint.pattern.match(cv.valid), ( + f"{constraint_type.__name__}: valid value {cv.valid!r} " + f"did not match pattern {constraint.pattern.pattern!r}" + ) + + @pytest.mark.parametrize( + "constraint_type", _PATTERN_ENTRIES, ids=lambda ct: ct.__name__ + ) + def test_invalid_does_not_match_pattern(self, constraint_type: type) -> None: + constraint = constraint_type() + assert isinstance(constraint, PatternConstraint) + cv = CONSTRAINT_VALUES[constraint_type] + assert isinstance(cv.invalid, str) + assert not constraint.pattern.match(cv.invalid), ( + f"{constraint_type.__name__}: invalid value {cv.invalid!r} " + f"matched pattern {constraint.pattern.pattern!r} (should not)" + ) + + +class TestStrippedConstraintValues: + """StrippedConstraint valid/invalid contract verified behaviorally.""" + + def test_valid_is_stripped(self) -> None: + cv = CONSTRAINT_VALUES[StrippedConstraint] + assert isinstance(cv.valid, str) + assert cv.valid == cv.valid.strip() + + def test_invalid_has_leading_or_trailing_whitespace(self) -> None: + cv = CONSTRAINT_VALUES[StrippedConstraint] + assert isinstance(cv.invalid, str) + assert cv.invalid != cv.invalid.strip() + + +class TestJsonPointerConstraintValues: + """JsonPointerConstraint valid/invalid contract verified behaviorally.""" + + def test_valid_starts_with_slash_or_is_empty(self) -> None: + cv = CONSTRAINT_VALUES[JsonPointerConstraint] + assert isinstance(cv.valid, str) + assert cv.valid == "" or cv.valid.startswith("/") + + def test_invalid_does_not_start_with_slash(self) -> None: + cv = CONSTRAINT_VALUES[JsonPointerConstraint] + assert isinstance(cv.invalid, str) + assert cv.invalid != "" and not cv.invalid.startswith("/") + + +class TestBoundFunctions: + """valid_bound and invalid_bound produce values on opposite sides of each bound kind.""" + + def test_valid_bound_ge(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("ge", 5),)) + assert valid_bound(desc) == 5 + + def test_valid_bound_gt(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("gt", 5),)) + assert valid_bound(desc) == 6 + + def test_valid_bound_le(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("le", 5),)) + assert valid_bound(desc) == 5 + + def test_valid_bound_lt(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("lt", 5),)) + assert valid_bound(desc) == 4 + + def test_valid_bound_fallback_to_zero(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=()) + assert valid_bound(desc) == 0 + + def test_invalid_bound_ge(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("ge", 5),)) + assert invalid_bound(desc) == 4 + + def test_invalid_bound_gt(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("gt", 5),)) + assert invalid_bound(desc) == 5 + + def test_invalid_bound_le(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("le", 5),)) + assert invalid_bound(desc) == 6 + + def test_invalid_bound_lt(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("lt", 5),)) + assert invalid_bound(desc) == 5 + + def test_invalid_bound_unknown_raises(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("unknown", 5),)) + with pytest.raises(ValueError): + invalid_bound(desc) + + def test_valid_bound_gt_lt_float_in_range(self) -> None: + """Interval(gt=0.0, lt=0.5) returns a value strictly between 0.0 and 0.5.""" + desc = ExpressionDescriptor( + function="check_bounds", kwargs=(("gt", 0.0), ("lt", 0.5)) + ) + result = valid_bound(desc) + assert isinstance(result, (int, float)) + assert 0 < result < 0.5 + + def test_valid_bound_gt_lt_int_non_degenerate(self) -> None: + """Adjacent-but-valid int intervals return the interior midpoint.""" + desc_2 = ExpressionDescriptor( + function="check_bounds", kwargs=(("gt", 0), ("lt", 2)) + ) + assert valid_bound(desc_2) == 1 + desc_4 = ExpressionDescriptor( + function="check_bounds", kwargs=(("gt", 0), ("lt", 4)) + ) + assert valid_bound(desc_4) == 2 + + def test_valid_bound_gt_lt_int_degenerate_raises(self) -> None: + """Adjacent exclusive int bounds (gt=0, lt=1) have no valid integer midpoint.""" + desc = ExpressionDescriptor( + function="check_bounds", kwargs=(("gt", 0), ("lt", 1)) + ) + with pytest.raises(ValueError, match="gt=0"): + valid_bound(desc) + + def test_valid_bound_ge_le_in_range(self) -> None: + """Interval(ge=0, le=10) returns a value in [0, 10].""" + desc = ExpressionDescriptor( + function="check_bounds", kwargs=(("ge", 0), ("le", 10)) + ) + result = valid_bound(desc) + assert isinstance(result, (int, float)) + assert 0 <= result <= 10 + + def test_valid_bound_gt_float_returns_float(self) -> None: + """gt=0.5 (float bound) returns a float value > 0.5.""" + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("gt", 0.5),)) + result = valid_bound(desc) + assert isinstance(result, float) + assert result > 0.5 + + def test_valid_bound_lt_float_returns_float(self) -> None: + """lt=0.5 (float bound) returns a float value < 0.5.""" + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("lt", 0.5),)) + result = valid_bound(desc) + assert isinstance(result, float) + assert result < 0.5 + + def test_valid_bound_gt_lt_float_tight_interval(self) -> None: + """gt=0.5, lt=1.0: midpoint 0.75 satisfies both bounds.""" + desc = ExpressionDescriptor( + function="check_bounds", kwargs=(("gt", 0.5), ("lt", 1.0)) + ) + result = valid_bound(desc) + assert isinstance(result, float) + assert 0.5 < result < 1.0 + + def test_invalid_bound_confirmed_correct(self) -> None: + """invalid_bound is already correct — one violated bound suffices.""" + ge_desc = ExpressionDescriptor(function="check_bounds", kwargs=(("ge", 3),)) + assert invalid_bound(ge_desc) == 2 + lt_desc = ExpressionDescriptor(function="check_bounds", kwargs=(("lt", 3),)) + assert invalid_bound(lt_desc) == 3 diff --git a/packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py b/packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py index d2a7811d1..9fceb373f 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py @@ -5,8 +5,10 @@ from overture.schema.codegen.pyspark.test_data.invalid_value import invalid_value from overture.schema.system.field_constraint.string import ( CountryCodeAlpha2Constraint, + JsonPointerConstraint, NoWhitespaceConstraint, RegionCodeConstraint, + StrippedConstraint, ) from overture.schema.system.primitive.geom import GeometryType @@ -52,9 +54,10 @@ def test_unknown_bound_raises(self) -> None: class TestInvalidValuePattern: - def test_default_pattern(self) -> None: + def test_unknown_constraint_type_raises(self) -> None: desc = ExpressionDescriptor(function="check_pattern", args=(r"^[A-Z]+$",)) - assert invalid_value(desc) == "!!!INVALID!!!" + with pytest.raises(ValueError, match="No invalid value"): + invalid_value(desc) def test_no_whitespace_pattern(self) -> None: desc = ExpressionDescriptor( @@ -93,11 +96,15 @@ def test_email(self) -> None: assert invalid_value(desc) == "not-an-email" def test_stripped(self) -> None: - desc = ExpressionDescriptor(function="check_stripped") + desc = ExpressionDescriptor( + function="check_stripped", constraint_type=StrippedConstraint + ) assert invalid_value(desc) == " has spaces " def test_json_pointer(self) -> None: - desc = ExpressionDescriptor(function="check_json_pointer") + desc = ExpressionDescriptor( + function="check_json_pointer", constraint_type=JsonPointerConstraint + ) assert invalid_value(desc) == "no-slash" @@ -168,6 +175,22 @@ def test_all_candidates_allowed_raises(self) -> None: invalid_value(desc) +class TestInvalidValueRawPattern: + """Raw pydantic `Field(pattern=)` map keys curated in `PATTERN_VALUES`.""" + + def test_curated_license_pattern_returns_invalid(self) -> None: + # Sources.license_priority key pattern, anchor-normalized. + desc = ExpressionDescriptor( + function="check_pattern", args=(r"^[A-Za-z0-9._+\-]+\z",) + ) + assert invalid_value(desc) == "bad license!" + + def test_uncurated_pattern_still_raises(self) -> None: + desc = ExpressionDescriptor(function="check_pattern", args=(r"^xyz\z",)) + with pytest.raises(ValueError, match="check_pattern"): + invalid_value(desc) + + class TestInvalidValueUnknown: def test_unknown_function_raises(self) -> None: desc = ExpressionDescriptor(function="check_something_unknown") diff --git a/packages/overture-schema-codegen/tests/test_pyspark_pipeline.py b/packages/overture-schema-codegen/tests/test_pyspark_pipeline.py index 95201a09b..d84fe7199 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_pipeline.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_pipeline.py @@ -6,15 +6,11 @@ import pytest from annotated_types import Ge -from codegen_test_support import find_theme, partitions_from_tags +from codegen_test_support import find_theme from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.specs import ( - FeatureSpec, - is_model_class, - is_union_alias, + ModelSpec, ) -from overture.schema.codegen.extraction.union_extraction import extract_union -from overture.schema.codegen.layout.module_layout import entry_point_class from overture.schema.codegen.pyspark.check_ir import Check from overture.schema.codegen.pyspark.constraint_dispatch import ExpressionDescriptor from overture.schema.codegen.pyspark.pipeline import ( @@ -24,6 +20,7 @@ generate_pyspark_module, generate_pyspark_modules, ) +from overture.schema.codegen.spec_discovery import extract_model_spec from overture.schema.system.field_path import ScalarPath from overture.schema.system.primitive import GeometryType from pydantic import BaseModel @@ -54,7 +51,7 @@ def test_content_is_nonempty(self, simple_module: GeneratedModule) -> None: def test_content_is_valid_python(self, simple_module: GeneratedModule) -> None: ast.parse(simple_module.content) - def test_path_uses_snake_case_feature_name( + def test_path_uses_snake_case_model_name( self, simple_module: GeneratedModule ) -> None: assert simple_module.path == PurePosixPath( @@ -78,7 +75,7 @@ def test_content_contains_schema_constant( assert "SIMPLE_MODEL_SCHEMA" in simple_module.content -def _two_specs() -> list[FeatureSpec]: +def _two_specs() -> list[ModelSpec]: return [ extract_model(SimpleModel, entry_point="overture.schema.simple:SimpleModel"), extract_model(BoundsModel, entry_point="overture.schema.bounds:BoundsModel"), @@ -119,26 +116,13 @@ def test_divisions_theme_produces_division_area( self, all_discovered_models: dict ) -> None: """divisions theme should produce a division_area.py module.""" - division_specs: list[FeatureSpec] = [] + division_specs: list[ModelSpec] = [] for key, entry in all_discovered_models.items(): if find_theme(key.tags) != "divisions": continue - partitions = partitions_from_tags(key.tags) - if is_model_class(entry): - division_specs.append( - extract_model( - entry, entry_point=key.entry_point, partitions=partitions - ) - ) - elif is_union_alias(entry): - division_specs.append( - extract_union( - entry_point_class(key.entry_point), - entry, - entry_point=key.entry_point, - partitions=partitions, - ) - ) + spec = extract_model_spec(key, entry) + if spec is not None: + division_specs.append(spec) results = generate_pyspark_modules(division_specs) names = {r.path.stem for r in results.source} @@ -180,19 +164,13 @@ class TestPerArmTestGeneration: @pytest.fixture def segment_modules(self, all_discovered_models: dict) -> PipelineOutput: - specs: list[FeatureSpec] = [] + specs: list[ModelSpec] = [] for key, entry in all_discovered_models.items(): if key.name != "segment": continue - if is_union_alias(entry): - specs.append( - extract_union( - entry_point_class(key.entry_point), - entry, - entry_point=key.entry_point, - partitions=partitions_from_tags(key.tags), - ) - ) + spec = extract_model_spec(key, entry) + if spec is not None: + specs.append(spec) return generate_pyspark_modules(specs) def test_produces_per_arm_test_files(self, segment_modules: PipelineOutput) -> None: diff --git a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py index 91a63c380..8fc6c9db1 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py @@ -12,9 +12,16 @@ RadioModel, RequireAnyModel, TripleNestedArrayModel, - feature_spec_for_model, + spec_for_model, +) +from overture.schema.codegen.pyspark._render_common import ( + FieldEq, + field_check_rows, + jinja_env, + model_check_rows, + require_field_eq, + schema_const_name, ) -from overture.schema.codegen.pyspark._render_common import jinja_env from overture.schema.codegen.pyspark.check_builder import build_checks from overture.schema.codegen.pyspark.check_ir import ( Check, @@ -25,11 +32,14 @@ from overture.schema.codegen.pyspark.constraint_dispatch import ( ExpressionDescriptor, RequireAnyOf, + RequireIf, ) from overture.schema.codegen.pyspark.renderer import ( + _read_columns, _render_check_function_context, _render_model_constraint_function_context, - render_feature_module, + _require_read_columns, + render_model_module, ) from overture.schema.codegen.pyspark.schema_builder import build_schema from overture.schema.system.field_path import ( @@ -55,6 +65,130 @@ _path = parse +class TestReadColumns: + """`_read_columns` derives a check's top-level reads from its rendered expr. + + Ground truth, not a structural proxy: the top-level column reads generated + code emits are `F.col("...")`, the outermost `array_check`/ + `nested_array_check` string argument, and the `map_keys_check`/ + `map_values_check` string argument. Element-relative accessors (`el[...]`, + `inner[...]`) read nothing at the row level. + """ + + def test_scalar_col(self) -> None: + assert _read_columns('check_bounds(F.col("speed"), ge=0)') == frozenset( + {"speed"} + ) + + def test_struct_leaf_strips_to_top_level(self) -> None: + # require_any_of over a struct field unwraps to a dotted required leaf; + # the read column is the top-level struct, not the dotted path. + expr = 'check_require_any_of([F.col("fast.value"), F.col("slow.value")], ["fast.value", "slow.value"])' + assert _read_columns(expr) == frozenset({"fast", "slow"}) + + def test_top_level_array_check(self) -> None: + assert _read_columns( + 'array_check("sources", lambda el: check_required(el["dataset"]))' + ) == frozenset({"sources"}) + + def test_dotted_array_check_strips_to_top_level(self) -> None: + assert _read_columns( + 'array_check("names.rules", lambda el: check_required(el["value"]))' + ) == frozenset({"names"}) + + def test_nested_array_check_reads_only_outer_column(self) -> None: + # The outer column is a string literal; inner iteration uses an + # element accessor (`el["when"]["vehicle"]`), which is not a row-level read. + expr = ( + 'nested_array_check("access_restrictions", lambda el: ' + 'array_check(el["when"]["vehicle"], lambda inner: ' + 'check_forbid_if(inner["unit"], inner["dimension"] == "axle_count", "...")))' + ) + assert _read_columns(expr) == frozenset({"access_restrictions"}) + + def test_map_keys_check_reads_map_column(self) -> None: + # A map key/value check dereferences the map column by name, exactly + # like array_check; the inner lambda reads a projected element, not a + # row column. The runtime must drop the check when the map is absent. + expr = 'map_keys_check("license_priority", lambda k: check_pattern(k, "^x$", label="pattern"))' + assert _read_columns(expr) == frozenset({"license_priority"}) + + def test_map_values_check_reads_map_column(self) -> None: + expr = 'map_values_check("license_priority", lambda v: check_bounds(v, ge=0))' + assert _read_columns(expr) == frozenset({"license_priority"}) + + def test_multiple_cols_with_condition(self) -> None: + # require_if reads its target column and the column its condition + # branches on; the description string is not a column read. + expr = ( + 'check_require_if(F.col("admin_level"), F.col("subtype") == "county", ' + "\"subtype = 'county'\")" + ) + assert _read_columns(expr) == frozenset({"admin_level", "subtype"}) + + def test_variant_gated_field_reads_discriminator(self) -> None: + # A variant-gated field check dereferences the discriminator column too, + # so an absent discriminator drops the check rather than crashing. + expr = 'F.when(F.col("subtype").isin(["road"]), check_required(F.col("class")))' + assert _read_columns(expr) == frozenset({"subtype", "class"}) + + def test_no_row_level_reads(self) -> None: + assert _read_columns('F.lit(None).cast("string")') == frozenset() + + +class TestRequireReadColumns: + """Every generated check must read at least one top-level column. + + The guard turns an unrecognized render form -- which yields empty + `read_columns` and a check the runtime can never drop on absence -- + into a generation-time error instead of a latent Spark crash. + """ + + def test_returns_columns_when_recognized(self) -> None: + assert _require_read_columns( + 'check_bounds(F.col("speed"), ge=0)', "speed", "bounds" + ) == frozenset({"speed"}) + + def test_raises_when_no_column_recognized(self) -> None: + with pytest.raises(ValueError, match="reads no top-level column"): + _require_read_columns( + 'unknown_wrapper("license_priority", lambda e: e)', + "license_priority", + "pattern", + ) + + +class TestRequireFieldEq: + """`require_field_eq` is the strict, raising companion to `parse_field_eq`.""" + + def test_unwraps_field_eq(self) -> None: + assert require_field_eq(FieldEqCondition("subtype", "county")) == FieldEq( + "subtype", "county", False + ) + + def test_unwraps_negated_field_eq(self) -> None: + condition = Not(FieldEqCondition("subtype", "county")) + assert require_field_eq(condition) == FieldEq("subtype", "county", True) + + def test_raises_on_other_condition(self) -> None: + # A condition `parse_field_eq` cannot unwrap (nested negation) names its + # type in the error so a new Condition subtype fails loudly in one place. + condition = Not(Not(FieldEqCondition("subtype", "county"))) + with pytest.raises(TypeError, match="Unhandled condition type: Not"): + require_field_eq(condition) + + +class TestSchemaConstName: + def test_uppercases_model_name(self) -> None: + assert schema_const_name("address") == "ADDRESS_SCHEMA" + + def test_already_uppercase(self) -> None: + assert schema_const_name("BUILDING") == "BUILDING_SCHEMA" + + def test_mixed_case(self) -> None: + assert schema_const_name("myFeature") == "MYFEATURE_SCHEMA" + + class BoundsModel(BaseModel): score: Annotated[float, Ge(0.0)] @@ -77,11 +211,64 @@ class FloatListModel(BaseModel): scores: list[Annotated[float, Ge(0.0)]] | None = None +class MapValueLeaf(BaseModel): + label: Annotated[str, MinLen(1)] + + +# dict[K, Model] value model with a constrained field -- the field check +# renders inside a map_values_check lambda navigating into the value struct. +class MapValueFieldModel(BaseModel): + items: dict[str, MapValueLeaf] + + +@require_any_of("foo", "bar") +class MapValueAnyOf(BaseModel): + foo: int | None = None + bar: str | None = None + + +# dict[K, Model] value model with a model-level constraint -- the model check +# renders inside a map_values_check lambda. +class MapValueConstraintModel(BaseModel): + subs: dict[str, MapValueAnyOf] + + +@require_if(["admin_level"], FieldEqCondition("subtype", "country")) +class LeafRequireIf(BaseModel): + subtype: str + admin_level: int | None = None + + +# The require_if model sits one struct level below the container element, so +# both container types reach it at a non-empty leaf (`...inner`). The target +# AND condition field refs must both navigate that leaf. +class LeafRequireIfOuter(BaseModel): + inner: LeafRequireIf + + +class MapValueRequireIfModel(BaseModel): + subs: dict[str, LeafRequireIfOuter] + + +class ArrayValueRequireIfModel(BaseModel): + rows: list[LeafRequireIfOuter] + + +@forbid_if(["extra"], FieldEqCondition("kind", "basic")) +class MapValueForbidIf(BaseModel): + kind: str + extra: str | None = None + + +class MapValueForbidIfModel(BaseModel): + subs: dict[str, MapValueForbidIf] + + def _render(model_cls: type[BaseModel], name: str = "simple") -> str: - spec = feature_spec_for_model(model_cls) + spec = spec_for_model(model_cls) field_checks, model_checks = build_checks(spec) schema_fields = build_schema(spec) - return render_feature_module(name, field_checks, model_checks, schema_fields) + return render_model_module(name, field_checks, model_checks, schema_fields) def _render_check_function_string(ctx: dict[str, object]) -> str: @@ -90,22 +277,21 @@ def _render_check_function_string(ctx: dict[str, object]) -> str: return str(template.module.check_function(c=ctx)) # type: ignore[attr-defined] -def _render_check_function( - check: Check, func_name: str, descriptor_idx: int = 0 -) -> str: +def _render_check_function(check: Check, descriptor_idx: int = 0) -> str: """Render a per-field check function source from a Check.""" - ctx = _render_check_function_context(check, func_name, descriptor_idx) + row = field_check_rows([check])[descriptor_idx] + ctx = _render_check_function_context(row) return _render_check_function_string(ctx) def _render_node(check: Check) -> str: """Render a single Check to its function source.""" - return _render_check_function(check, "_test_check", descriptor_idx=0) + return _render_check_function(check, descriptor_idx=0) def _render_model_node(check: ModelCheck) -> str: """Render a single ModelCheck to its function source.""" - ctx = _render_model_constraint_function_context(check, 0, "") + ctx = _render_model_constraint_function_context(model_check_rows([check])[0]) return _render_check_function_string(ctx) @@ -156,9 +342,9 @@ def test_contains_builder_function(self, literal_subtype_source: str) -> None: def test_builder_returns_list_check(self, literal_subtype_source: str) -> None: assert "list[Check]" in literal_subtype_source - def test_builder_name_uses_feature_name(self) -> None: - source = _render(LiteralSubtypeModel, "my_feature") - assert "def my_feature_checks()" in source + def test_builder_name_uses_model_name(self) -> None: + source = _render(LiteralSubtypeModel, "my_model") + assert "def my_model_checks()" in source class TestSchemaConstant: @@ -180,7 +366,7 @@ def test_shared_struct_ref_emits_struct_field(self) -> None: from overture.schema.codegen.pyspark.schema_builder import SchemaField schema_fields = [SchemaField(name="bbox", type_expr="BBOX_STRUCT")] - source = render_feature_module("simple", [], [], schema_fields) + source = render_model_module("simple", [], [], schema_fields) assert 'StructField("bbox", BBOX_STRUCT, True)' in source @@ -191,10 +377,10 @@ def test_omitted_when_empty(self, literal_subtype_source: str) -> None: assert "GEOMETRY_TYPES" not in literal_subtype_source def test_emitted_when_provided(self) -> None: - spec = feature_spec_for_model(LiteralSubtypeModel) + spec = spec_for_model(LiteralSubtypeModel) field_nodes, model_nodes = build_checks(spec) schema_fields = build_schema(spec) - source = render_feature_module( + source = render_model_module( "simple", field_nodes, model_nodes, @@ -208,10 +394,10 @@ def test_emitted_when_provided(self) -> None: def test_geometry_type_imported_when_only_constant_needs_it(self) -> None: # LiteralSubtypeModel has no check_geometry_type constraint, so the # import is only required because GEOMETRY_TYPES references it. - spec = feature_spec_for_model(LiteralSubtypeModel) + spec = spec_for_model(LiteralSubtypeModel) field_nodes, model_nodes = build_checks(spec) schema_fields = build_schema(spec) - source = render_feature_module( + source = render_model_module( "simple", field_nodes, model_nodes, @@ -330,6 +516,26 @@ def test_require_any_of_no_context_arg(self) -> None: source = _render(RequireAnyModel, "require_any") assert "'RequireAnyModel'" not in source + def test_require_any_of_emits_read_columns(self) -> None: + # A model check reads several columns directly; the runtime drops the + # check when any of them is skipped or structurally absent. + source = _render(RequireAnyModel, "require_any") + assert "read_columns=frozenset({'x', 'y'})" in source + + def test_field_check_emits_read_columns(self) -> None: + # Every check declares the columns it reads, field checks included -- + # there is no separate root_field/referenced_fields split. + source = _render(BoundsModel) + assert "read_columns=frozenset({'score'})" in source + assert "root_field" not in source + assert "referenced_fields" not in source + + def test_require_if_read_columns_include_condition(self) -> None: + # require_if reads its target column and the column its condition + # branches on; both must be carried so skipping either drops the check. + source = _render(RequireIfEnumModel, "require_if_enum") + assert "read_columns=frozenset({'admin_level', 'subtype'})" in source + def test_model_constraint_imports_function(self) -> None: source = _render(RadioModel, "radio") assert "check_radio_group" in source @@ -604,7 +810,7 @@ def test_column_and_element_level_get_unique_names(self) -> None: descriptors=(ExpressionDescriptor(function="check_required"),), target=_path("items[]"), ) - source = render_feature_module("dup", [col_check, elem_check], [], []) + source = render_model_module("dup", [col_check, elem_check], [], []) ast.parse(source) func_defs = re.findall(r"^def (_\w+_check\w*)\(", source, re.MULTILINE) assert len(func_defs) == len(set(func_defs)), ( @@ -627,7 +833,7 @@ def test_same_field_different_variants_get_unique_names(self) -> None: target=_path("class"), guards=(ColumnGuard(discriminator="subtype", values=("rail",)),), ) - source = render_feature_module("dup", [road_check, rail_check], [], []) + source = render_model_module("dup", [road_check, rail_check], [], []) ast.parse(source) func_defs = re.findall(r"^def (_\w+_check\w*)\(", source, re.MULTILINE) assert len(func_defs) == len(set(func_defs)), ( @@ -635,6 +841,153 @@ def test_same_field_different_variants_get_unique_names(self) -> None: ) +class TestFieldCheckLabelCollision: + """Field checks sharing a `(field, name)` identity get distinct labels. + + The discriminated vehicle-dimension union in `segment` emits two + field checks with the identical identity + `("...vehicle[].value", "required")` -- one per arm of the inner + union. Without a collision suffix the emitted `Check.field` is + ambiguous (it keys `suppress` matching, `explain_errors` metadata, + and the conformance test's `expected_field`). Mirror the model-check + `_N` convention: every member of a colliding group gets a suffix. + """ + + def test_colliding_required_checks_get_distinct_labels(self) -> None: + first = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("value"), + guards=(ElementGuard(discriminator="dimension", values=("axle_count",)),), + ) + second = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("value"), + guards=( + ElementGuard(discriminator="dimension", values=("height", "width")), + ), + ) + source = render_model_module("collide", [first, second], [], []) + ast.parse(source) + labels = re.findall(r'field="(value[^"]*)"', source) + assert labels == ["value_0", "value_1"], labels + + def test_noncolliding_field_check_stays_bare(self) -> None: + required = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("value"), + ) + bounds = Check( + descriptors=( + ExpressionDescriptor(function="check_bounds", kwargs=(("ge", 0),)), + ), + target=_path("value"), + ) + source = render_model_module("solo", [required, bounds], [], []) + ast.parse(source) + labels = re.findall(r'field="(value[^"]*)"', source) + assert labels == ["value", "value"], labels + + def test_multi_descriptor_collision_only_on_shared_name(self) -> None: + """A multi-descriptor check collides per emitted `(field, name)` row.""" + single = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("value"), + ) + multi = Check( + descriptors=( + ExpressionDescriptor(function="check_required"), + ExpressionDescriptor(function="check_bounds", kwargs=(("ge", 0),)), + ), + target=_path("value"), + ) + source = render_model_module("multi", [single, multi], [], []) + ast.parse(source) + # The two `required` rows collide (-> value_0/value_1); the lone + # `bounds` row stays bare. + required_fields = re.findall( + r'field="(value[^"]*)",\n\s+name="required"', source + ) + bounds_fields = re.findall(r'field="(value[^"]*)",\n\s+name="bounds"', source) + assert required_fields == ["value_0", "value_1"], required_fields + assert bounds_fields == ["value"], bounds_fields + + def test_labels_are_positional_not_identity_keyed(self) -> None: + """Row labels align to flattened `(check, desc_idx)` order. + + Collision suffixes depend only on the iteration order both + renderers share -- never on the identity of the `Check` objects. + Two value-equal but distinct checks (the cross-arm collision case) + must still each receive their own collision index. + """ + first = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("value"), + ) + second = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("value"), + ) + # A distinct copy of `first`, equal by value -- under identity + # keying this would alias `first`; positional keying keeps it + # separate. + first_copy = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("value"), + ) + labels = [row.label for row in field_check_rows([first, second, first_copy])] + assert labels == ["value_0", "value_1", "value_2"], labels + + +class TestMapPathRendering: + """MapPath targets render to map_keys_check / map_values_check.""" + + def test_map_key_renders_map_keys_check(self) -> None: + check = Check( + descriptors=( + ExpressionDescriptor( + function="check_pattern", + args=(r"^[a-z]+$",), + label="language tag", + ), + ), + target=_path("names{key}"), + ) + source = render_model_module("dictfeat", [check], [], []) + ast.parse(source) + assert 'map_keys_check("names", lambda k: check_pattern(k,' in source + + def test_map_value_renders_map_values_check(self) -> None: + check = Check( + descriptors=(ExpressionDescriptor(function="check_stripped"),), + target=_path("names.common{value}"), + ) + source = render_model_module("dictfeat", [check], [], []) + ast.parse(source) + assert 'map_values_check("names.common", lambda v: check_stripped(v))' in source + + def test_map_check_imports_helper_from_column_patterns(self) -> None: + check = Check( + descriptors=(ExpressionDescriptor(function="check_stripped"),), + target=_path("names{value}"), + ) + source = render_model_module("dictfeat", [check], [], []) + assert re.search( + r"from [.\w]*column_patterns import[\s\S]*?map_values_check", source + ) + + def test_map_check_read_columns_is_top_level_column(self) -> None: + # The map check dereferences its top-level map column (`names`), not the + # dotted struct path or the `{value}` step marker; `read_columns` is the + # granularity at which validate drops the check when the column is absent. + check = Check( + descriptors=(ExpressionDescriptor(function="check_stripped"),), + target=_path("names.common{value}"), + ) + source = render_model_module("dictfeat", [check], [], []) + # Renderer emits repr() (single quotes); ruff later normalizes. + assert "read_columns=frozenset({'names'})" in source + + @require_any_of("x", "y") class _ArrayElementConstrained(BaseModel): x: str | None = None @@ -690,7 +1043,7 @@ def test_variant_uses_check_discriminator_field(self) -> None: target=_path("a_field"), guards=(ColumnGuard(discriminator="kind", values=("a",)),), ) - source = render_feature_module("test_variant", [check], [], []) + source = render_model_module("test_variant", [check], [], []) ast.parse(source) assert 'F.col("kind")' in source assert 'F.col("subtype")' not in source @@ -785,12 +1138,12 @@ def surface_value_check(self) -> Check: ) def test_parseable(self, surface_check: Check) -> None: - source = render_feature_module("test", [surface_check], [], []) + source = render_model_module("test", [surface_check], [], []) ast.parse(source) def test_discriminator_uses_f_col(self, surface_check: Check) -> None: """Top-level discriminator must reference F.col, not el[...].""" - source = render_feature_module("test", [surface_check], [], []) + source = render_model_module("test", [surface_check], [], []) assert 'F.col("subtype")' in source, ( "Top-level discriminator must use F.col, not el[...]" ) @@ -800,7 +1153,7 @@ def test_discriminator_uses_f_col(self, surface_check: Check) -> None: def test_f_when_wraps_array_check(self, surface_check: Check) -> None: """F.when must wrap the array_check call, not the lambda body.""" - source = _render_check_function(surface_check, "_surface_check") + source = _render_check_function(surface_check) # F.when must appear before array_check in the expression. f_when_pos = source.find("F.when(") array_check_pos = source.find("array_check(") @@ -812,13 +1165,13 @@ def test_f_when_wraps_array_check(self, surface_check: Check) -> None: def test_no_el_discriminator_in_lambda(self, surface_value_check: Check) -> None: """el['subtype'] must not appear even with leaf path -- subtype is top-level.""" - source = render_feature_module("test", [surface_value_check], [], []) + source = render_model_module("test", [surface_value_check], [], []) assert 'el["subtype"]' not in source, ( 'el["subtype"] found -- top-level discriminator must not appear inside lambda' ) def test_leaf_path_check_parseable(self, surface_value_check: Check) -> None: - source = render_feature_module("test", [surface_value_check], [], []) + source = render_model_module("test", [surface_value_check], [], []) ast.parse(source) @@ -854,7 +1207,7 @@ def test_render_nested_array_check(self) -> None: ), target=_path("items[].things[].value"), ) - source = _render_check_function(check, "_test_check") + source = _render_check_function(check) assert "nested_array_check" in source assert "lambda el" in source assert "lambda inner" in source @@ -870,7 +1223,7 @@ def test_render_variant_expr_in_nested_array_top_level_disc(self) -> None: target=_path("items[].things[].unit"), guards=(ColumnGuard(discriminator="kind", values=("a", "b")),), ) - source = _render_check_function(check, "_test_check") + source = _render_check_function(check) assert "nested_array_check" in source assert 'F.col("kind").isin(' in source @@ -883,7 +1236,7 @@ def test_render_variant_expr_in_nested_array_element_disc(self) -> None: target=_path("items[].things[].unit"), guards=(ElementGuard(discriminator="kind", values=("a", "b")),), ) - source = _render_check_function(check, "_test_check") + source = _render_check_function(check) assert "nested_array_check" in source assert 'F.col("kind")' not in source assert 'inner["kind"]' in source @@ -1165,3 +1518,103 @@ def test_gated_model_check_assertion_on_non_array_target(self) -> None: ) with pytest.raises(AssertionError, match="gate.*non-ArrayPath"): _render_model_node(check) + + +class TestMapValueModelRendering: + """Render `dict[K, Model]` value-model checks inside a map lambda. + + The map's values are iterated like an array: a value-model field check + renders `map_values_check("col", lambda v: check(v["field"]))`, and a + value-model constraint renders `map_values_check("col", lambda v: + check_require_any_of([v["a"], v["b"]], ...))`. Both mirror the + `array_check` rendering of a `list[Model]` element. + """ + + def _field_check(self, model_cls: type[BaseModel], function: str) -> Check: + field_checks, _ = build_checks(spec_for_model(model_cls)) + for check in field_checks: + if any(d.function == function for d in check.descriptors): + return check + raise AssertionError(f"no field check with {function}") + + def _model_check(self, model_cls: type[BaseModel]) -> ModelCheck: + _, model_checks = build_checks(spec_for_model(model_cls)) + assert len(model_checks) == 1, model_checks + return model_checks[0] + + def test_value_field_check_renders_map_values_lambda(self) -> None: + check = self._field_check(MapValueFieldModel, "check_string_min_length") + rows = field_check_rows([check]) + sources = [ + _render_check_function_string(_render_check_function_context(row)) + for row in rows + ] + assert any( + 'map_values_check("items", lambda v: check_string_min_length(v["label"], 1))' + in s + for s in sources + ), sources + + def test_value_model_constraint_renders_map_values_lambda(self) -> None: + # Asserts the raw renderer form: field names render via repr (single + # quotes); ruff normalizes to double quotes downstream. + check = self._model_check(MapValueConstraintModel) + source = _render_model_node(check) + assert ( + 'map_values_check("subs", lambda v: ' + "check_require_any_of([v[\"foo\"], v[\"bar\"]], ['foo', 'bar']))" + ) in source, source + + def test_full_module_parseable_with_map_value_field(self) -> None: + source = _render(MapValueFieldModel, "map_field") + ast.parse(source) + assert "map_values_check(" in source + + def test_full_module_parseable_with_map_value_constraint(self) -> None: + source = _render(MapValueConstraintModel, "map_con") + ast.parse(source) + assert "map_values_check(" in source + + def test_model_constraint_func_name_prefixes_map_column(self) -> None: + # Mirrors the ArrayPath naming so distinct map columns yield distinct + # generated function names rather than colliding on `__`. + check = self._model_check(MapValueConstraintModel) + source = _render_model_node(check) + assert "def _subs_check_require_any_of_0_check()" in source, source + + def test_forbid_if_value_constraint_renders_map_values_lambda(self) -> None: + check = self._model_check(MapValueForbidIfModel) + source = _render_model_node(check) + assert ( + 'map_values_check("subs", lambda v: ' + 'check_forbid_if(v["extra"], v["kind"] == \'basic\', "kind = \'basic\'"))' + ) in source, source + + +class TestLeafQualifiedConditionRef: + """A require_if/forbid_if condition reached through a non-empty leaf + keeps the leaf. + + The target field ref and the condition field ref navigate the same + struct leaf; rendering the condition without the leaf references a + wrong column (a top-level field of the iterated element instead of the + nested struct's field). The leaf is non-empty for a constrained model + reached below an iterated container -- both `list[Model]` and + `dict[K, Model]`. + """ + + def _require_if_check(self, model_cls: type[BaseModel]) -> ModelCheck: + _, model_checks = build_checks(spec_for_model(model_cls)) + matches = [c for c in model_checks if isinstance(c.descriptor, RequireIf)] + assert len(matches) == 1, model_checks + return matches[0] + + def test_map_value_require_if_condition_keeps_leaf(self) -> None: + source = _render_model_node(self._require_if_check(MapValueRequireIfModel)) + assert 'v["inner"]["admin_level"]' in source, source + assert 'v["inner"]["subtype"] ==' in source, source + + def test_array_require_if_condition_keeps_leaf(self) -> None: + source = _render_model_node(self._require_if_check(ArrayValueRequireIfModel)) + assert 'el["inner"]["admin_level"]' in source, source + assert 'el["inner"]["subtype"] ==' in source, source diff --git a/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py b/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py index aba025cda..80cdfd739 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py @@ -6,9 +6,9 @@ from codegen_test_support import ( FeatureWithRequiredUrl, discover_feature, - feature_spec_for_model, + spec_for_model, ) -from overture.schema.codegen.extraction.specs import FeatureSpec +from overture.schema.codegen.extraction.specs import ModelSpec from overture.schema.codegen.pyspark.check_builder import build_checks from overture.schema.codegen.pyspark.check_ir import ElementGuard from overture.schema.codegen.pyspark.test_data.scaffold import ( @@ -22,24 +22,24 @@ @pytest.fixture(scope="module") -def connector_spec() -> FeatureSpec: +def connector_spec() -> ModelSpec: return discover_feature("Connector") @pytest.fixture(scope="module") -def division_area_spec() -> FeatureSpec: +def division_area_spec() -> ModelSpec: return discover_feature("DivisionArea") @pytest.fixture(scope="module") -def segment_spec() -> FeatureSpec: +def segment_spec() -> ModelSpec: return discover_feature("Segment") class TestLeafListDepth: def test_leaf_list_depth(self) -> None: """leaf_list_depth returns unaccounted-for list depth.""" - spec = feature_spec_for_model(FeatureWithRequiredUrl) + spec = spec_for_model(FeatureWithRequiredUrl) # Scalar field inside array struct — no extra wrapping assert leaf_list_depth(_path("datasets[].url"), spec) == 0 # List field without trailing array marker — needs wrapping @@ -53,7 +53,7 @@ class TestNestedListUrlField: def test_nested_list_url_field_single_depth(self) -> None: """list[HttpUrl] scaffold should be single-depth, not double-wrapped.""" - spec = feature_spec_for_model(FeatureWithRequiredUrl) + spec = spec_for_model(FeatureWithRequiredUrl) field_nodes, _ = build_checks(spec) url_nodes = [n for n in field_nodes if "download_urls" in str(n.target)] assert url_nodes, "Expected check nodes for download_urls" @@ -73,7 +73,7 @@ class TestGenerateScaffoldConnector: """Scaffold for Connector — simple top-level and one-level-nested fields.""" def test_required_top_level_field_produces_empty_scaffold( - self, connector_spec: FeatureSpec + self, connector_spec: ModelSpec ) -> None: """Required top-level fields exist in base row; scaffold adds nothing.""" field_nodes, _ = build_checks(connector_spec) @@ -82,7 +82,7 @@ def test_required_top_level_field_produces_empty_scaffold( assert scaffold == {} def test_optional_top_level_field_produces_scaffold( - self, connector_spec: FeatureSpec + self, connector_spec: ModelSpec ) -> None: """Optional fields absent from base row get a valid scaffold value.""" field_nodes, _ = build_checks(connector_spec) @@ -97,7 +97,7 @@ def test_optional_top_level_field_produces_scaffold( assert isinstance(scaffold["sources"], list) assert len(scaffold["sources"]) >= 1 - def test_array_nested_field_builds_path(self, connector_spec: FeatureSpec) -> None: + def test_array_nested_field_builds_path(self, connector_spec: ModelSpec) -> None: """sources[].property needs a sources array with one element.""" field_nodes, _ = build_checks(connector_spec) node = next(n for n in field_nodes if n.target == _path("sources[].property")) @@ -109,7 +109,7 @@ def test_array_nested_field_builds_path(self, connector_spec: FeatureSpec) -> No # Required sibling 'dataset' populated assert "dataset" in elem - def test_scaffold_is_dict(self, connector_spec: FeatureSpec) -> None: + def test_scaffold_is_dict(self, connector_spec: ModelSpec) -> None: field_nodes, _ = build_checks(connector_spec) for node in field_nodes: scaffold = generate_scaffold(node, connector_spec) @@ -120,7 +120,7 @@ class TestGenerateScaffoldSegment: """Scaffold for Segment — deeply nested arrays and discriminators.""" def test_suffixed_nested_leaf_uses_actual_field_name( - self, segment_spec: FeatureSpec + self, segment_spec: ModelSpec ) -> None: """Column-level checks share the structural path with the real field.""" field_nodes, _ = build_checks(segment_spec) @@ -136,7 +136,7 @@ def test_suffixed_nested_leaf_uses_actual_field_name( assert "mode" in when, f"Expected 'mode', got keys: {list(when.keys())}" assert "mode_min_length" not in when - def test_deeply_nested_array_path(self, segment_spec: FeatureSpec) -> None: + def test_deeply_nested_array_path(self, segment_spec: ModelSpec) -> None: """speed_limits[].when.vehicle[].dimension builds full nesting.""" field_nodes, _ = build_checks(segment_spec) node = next( @@ -153,7 +153,7 @@ def test_deeply_nested_array_path(self, segment_spec: FeatureSpec) -> None: assert isinstance(when["vehicle"], list) assert len(when["vehicle"]) == 1 - def test_element_guard_discriminator_set(self, segment_spec: FeatureSpec) -> None: + def test_element_guard_discriminator_set(self, segment_spec: ModelSpec) -> None: """Checks with an `ElementGuard` set the discriminator value in the scaffold.""" field_checks, _ = build_checks(segment_spec) # Find a speed_limits check with an ElementGuard. @@ -174,7 +174,7 @@ def test_element_guard_discriminator_set(self, segment_spec: FeatureSpec) -> Non assert vehicle_elem[element_guard.discriminator] == element_guard.values[0] def test_column_variant_does_not_appear_inside_scaffold( - self, segment_spec: FeatureSpec + self, segment_spec: ModelSpec ) -> None: """`ColumnGuard`s don't set discriminator inside the scaffold dict.""" field_checks, _ = build_checks(segment_spec) @@ -191,7 +191,7 @@ def test_column_variant_does_not_appear_inside_scaffold( # it belongs at the row level, which the base row handles. assert isinstance(scaffold, dict) - def test_multiple_element_guards_raises(self, segment_spec: FeatureSpec) -> None: + def test_multiple_element_guards_raises(self, segment_spec: ModelSpec) -> None: """The check_ir invariant allows at most one `ElementGuard` per Check. Multiple guards would indicate the gate composition rule changed @@ -217,7 +217,7 @@ def test_multiple_element_guards_raises(self, segment_spec: FeatureSpec) -> None class TestGenerateModelScaffold: def test_top_level_model_constraint_produces_empty_scaffold( - self, division_area_spec: FeatureSpec + self, division_area_spec: ModelSpec ) -> None: """Model constraints at the top level need no nesting.""" _, model_nodes = build_checks(division_area_spec) @@ -227,7 +227,7 @@ def test_top_level_model_constraint_produces_empty_scaffold( assert isinstance(scaffold, dict) def test_array_nested_model_constraint_builds_path( - self, segment_spec: FeatureSpec + self, segment_spec: ModelSpec ) -> None: """Model constraints inside arrays build the array path.""" _, model_checks = build_checks(segment_spec) diff --git a/packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py b/packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py index 26dcdff30..c848d88b4 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py @@ -3,7 +3,7 @@ from enum import Enum import pytest -from codegen_test_support import feature_spec_for_model +from codegen_test_support import spec_for_model from overture.schema.codegen.extraction.field import Primitive from overture.schema.codegen.extraction.specs import ( AnnotatedField, @@ -23,7 +23,7 @@ class SimpleModel(BaseModel): class TestPrimitiveFields: @pytest.fixture def fields(self) -> list[SchemaField]: - return build_schema(feature_spec_for_model(SimpleModel)) + return build_schema(spec_for_model(SimpleModel)) def test_string_field_maps_to_string_type(self, fields: list[SchemaField]) -> None: name_field = next(f for f in fields if f.name == "name") @@ -46,7 +46,7 @@ class ContainerModel(BaseModel): class TestNestedModel: @pytest.fixture def fields(self) -> list[SchemaField]: - return build_schema(feature_spec_for_model(ContainerModel)) + return build_schema(spec_for_model(ContainerModel)) def test_nested_model_emits_struct_type(self, fields: list[SchemaField]) -> None: item_field = next(f for f in fields if f.name == "item") @@ -66,7 +66,7 @@ class ListModel(BaseModel): class TestListFields: @pytest.fixture def fields(self) -> list[SchemaField]: - return build_schema(feature_spec_for_model(ListModel)) + return build_schema(spec_for_model(ListModel)) def test_list_str_maps_to_array_string(self, fields: list[SchemaField]) -> None: tags_field = next(f for f in fields if f.name == "tags") @@ -86,7 +86,7 @@ class DictModel(BaseModel): class TestDictFields: @pytest.fixture def fields(self) -> list[SchemaField]: - return build_schema(feature_spec_for_model(DictModel)) + return build_schema(spec_for_model(DictModel)) def test_dict_str_str_maps_to_map_type(self, fields: list[SchemaField]) -> None: labels_field = next(f for f in fields if f.name == "labels") @@ -96,7 +96,7 @@ def test_dict_str_str_maps_to_map_type(self, fields: list[SchemaField]) -> None: class TestDivisionAreaSchema: @pytest.fixture(scope="class") def fields(self) -> list[SchemaField]: - return build_schema(feature_spec_for_model(DivisionArea)) + return build_schema(spec_for_model(DivisionArea)) def test_id_field_is_string_type(self, fields: list[SchemaField]) -> None: id_field = next(f for f in fields if f.name == "id") diff --git a/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py index 64537c9b5..966686058 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py @@ -5,7 +5,8 @@ from enum import Enum import pytest -from overture.schema.codegen.extraction.field import ArrayOf, Primitive +from overture.schema.codegen.extraction.field import ArrayOf, ModelRef, Primitive +from overture.schema.codegen.extraction.specs import RecordSpec from overture.schema.codegen.pyspark.check_ir import ( Check, ColumnGuard, @@ -20,12 +21,18 @@ RequireAnyOf, RequireIf, ) +from overture.schema.codegen.pyspark.renderer import render_model_module +from overture.schema.codegen.pyspark.test_renderer import ( + _fill_value_literal, +) from overture.schema.codegen.pyspark.test_renderer import ( render_test_module as _real_render_test_module, ) from overture.schema.system.field_constraint.string import ( CountryCodeAlpha2Constraint, + LanguageTagConstraint, NoWhitespaceConstraint, + StrippedConstraint, ) from overture.schema.system.field_path import ArrayPath, ScalarPath, parse from overture.schema.system.model_constraint import FieldEqCondition, Not @@ -99,6 +106,7 @@ def _array( prefix = ScalarPath(segments=prefix_structs) path = prefix.append_array(outer_name, iter_count=1) else: + assert isinstance(column_path, ArrayPath) # never a MapPath here path = column_path for sp in inner_struct_paths: for n in sp[:-1]: @@ -109,6 +117,44 @@ def _array( return path +class TestMapPathScenarios: + """MapPath field checks emit mutate_map_key / mutate_map_value scenarios.""" + + def test_map_key_emits_mutate_map_key(self) -> None: + check = make_check( + "check_pattern", + _path("names.common{key}"), + args=(r"^[a-z]+$",), + constraint_type=LanguageTagConstraint, + label="language tag", + ) + source = render_test_module("dictfeat", [check], []) + ast.parse(source) + assert "mutate_map_key(row, 'names.common', '123')" in source + assert "expected_field='names.common{key}'" in source + + def test_map_value_emits_mutate_map_value(self) -> None: + check = make_check( + "check_stripped", + _path("names{value}"), + constraint_type=StrippedConstraint, + ) + source = render_test_module("dictfeat", [check], []) + ast.parse(source) + assert "mutate_map_value(row, 'names', ' has spaces ')" in source + assert "expected_field='names{value}'" in source + + def test_map_mutation_helper_is_imported(self) -> None: + check = make_check( + "check_stripped", + _path("names{value}"), + constraint_type=StrippedConstraint, + ) + source = render_test_module("dictfeat", [check], []) + # Appears in both the import block and the scenario call. + assert source.count("mutate_map_value") >= 2 + + class TestRenderTestModuleParseable: def test_renders_valid_python_with_nodes(self) -> None: nodes = [make_check("check_required", _path("country"))] @@ -189,12 +235,13 @@ def test_unknown_constraint_raises(self) -> None: with pytest.raises(ValueError, match="Cannot render mutate expression"): render_test_module("test", nodes, []) - def test_pattern_produces_invalid_string(self) -> None: + def test_pattern_without_constraint_type_raises(self) -> None: + """check_pattern with no constraint_type raises at codegen time.""" nodes = [ make_check("check_pattern", _path("wikidata.value"), args=(r"^Q\d+$",)), ] - source = render_test_module("test", nodes, []) - assert "'pattern'" in source + with pytest.raises(ValueError, match="Cannot render mutate expression"): + render_test_module("test", nodes, []) def test_no_whitespace_pattern_mutation_contains_whitespace(self) -> None: """Mutation for NoWhitespaceConstraint must contain whitespace to violate ^\\S+$.""" @@ -262,7 +309,7 @@ def test_max_length_produces_oversized_list(self) -> None: assert "[{}, {}, {}, {}]" in source or "[{}] * 4" in source assert "expected_field='connectors_max_length'" in source - def test_scenario_id_includes_feature_name(self) -> None: + def test_scenario_id_includes_model_name(self) -> None: nodes = [make_check("check_required", _path("country"))] source = render_test_module("division_area", nodes, []) assert "division_area::country:required" in source @@ -369,14 +416,24 @@ def test_forbid_if_array_field_generates_fill_values(self) -> None: assert "[{}]" in source def test_forbid_if_struct_field_generates_fill_values(self) -> None: - """forbid_if targeting a struct field emits fill_values with {}.""" + """forbid_if targeting a struct field emits fill_values with {}. + + Struct fields reach `_fill_value_literal` as `ModelRef` shapes, not + `Primitive` — `_needs_explicit_fill` only passes model references and + arrays for the `{}` / `[{}]` fill; string `Primitive`s are excluded. + """ model_nodes = [ ModelCheck( descriptor=ForbidIf( field_names=("road_surface",), condition=FieldEqCondition("subtype", "road"), field_shapes=( - ("road_surface", Primitive(base_type="RoadSurface")), + ( + "road_surface", + ModelRef( + model=RecordSpec(name="RoadSurface", description=None) + ), + ), ), ), ), @@ -556,6 +613,43 @@ def test_require_if_with_multi_inner_levels_raises(self) -> None: render_test_module("test", [], model_nodes) +class TestCrossArmModelCheckLabelCollision: + """Per-arm test labels must match the expression module's labels. + + The expression module is rendered once over the unfiltered model-check + list, so a cross-arm base-label collision earns a `_N` suffix there. A + per-arm test module must compute that suffix over the same unfiltered + list and filter rows afterward; computing it over the arm subset would + emit a bare `expected_field` the module never produces. + """ + + def test_per_arm_label_matches_module_label(self) -> None: + road = ModelCheck( + descriptor=RequireIf( + field_names=("class",), + condition=FieldEqCondition("subtype", "road"), + ), + arm="road", + ) + rail = ModelCheck( + descriptor=RequireIf( + field_names=("class",), + condition=FieldEqCondition("subtype", "rail"), + ), + arm="rail", + ) + model_checks = [road, rail] + + module = render_model_module("seg", [], model_checks, []) + module_labels = re.findall(r'field="(class_required[^"]*)"', module) + road_label = module_labels[0] + + test_source = render_test_module("seg", [], model_checks, arm="road") + test_labels = re.findall(r"expected_field='(class_required[^']*)'", test_source) + + assert test_labels == [road_label], (test_labels, road_label) + + class TestTestLayer: @pytest.fixture(scope="class") def empty_source(self) -> str: @@ -793,6 +887,211 @@ def test_arm_filtering_ignores_inner_element_discriminator(self) -> None: assert "speed_limits" in road +class TestFieldLabelCollisionSuffix: + """Colliding field-check `expected_field`s carry the suffix the module emits. + + The expression module is rendered once across every arm, so its + `(field, name)` collisions are defined over the full check list. The + per-arm test modules must derive `expected_field` from that same + full list -- not a post-arm-filter subset -- or they assert a field + the module never emits. + """ + + def _colliding_checks(self) -> list[Check]: + """Two `required` checks on one path, distinguished by inner union arm.""" + return [ + make_check( + "check_required", + _path("value"), + guards=( + ColumnGuard(discriminator="subtype", values=("road",)), + ElementGuard(discriminator="dimension", values=("axle_count",)), + ), + ), + make_check( + "check_required", + _path("value"), + guards=( + ColumnGuard(discriminator="subtype", values=("road",)), + ElementGuard(discriminator="dimension", values=("height",)), + ), + ), + ] + + def test_colliding_expected_fields_are_suffixed(self) -> None: + source = render_test_module("test", self._colliding_checks(), []) + ast.parse(source) + assert "expected_field='value_0'" in source + assert "expected_field='value_1'" in source + assert "expected_field='value'," not in source + + def test_suffix_survives_arm_filter(self) -> None: + """Both colliding checks share an arm; the per-arm file keeps both suffixes. + + Computing suffixes post-filter would still see the collision here + (both survive), so this alone is necessary but not sufficient -- + `test_suffix_computed_over_unfiltered_list` covers the case where + filtering would otherwise hide it. + """ + source = render_test_module("test", self._colliding_checks(), [], arm="road") + ast.parse(source) + assert "expected_field='value_0'" in source + assert "expected_field='value_1'" in source + + def test_suffix_computed_over_unfiltered_list(self) -> None: + """A surviving check keeps the suffix even when its collision sibling is filtered out. + + The two checks collide in the full list (both emit + `(value, required)`) but belong to different arms. The expression + module -- rendered across both arms -- emits `value_0` / `value_1`. + Each arm test sees only one of them after filtering; computing + the suffix from that one-element subset would wrongly drop it. + """ + checks = [ + make_check( + "check_required", + _path("value"), + guards=(ColumnGuard(discriminator="subtype", values=("road",)),), + ), + make_check( + "check_required", + _path("value"), + guards=(ColumnGuard(discriminator="subtype", values=("rail",)),), + ), + ] + road = render_test_module("test", checks, [], arm="road") + rail = render_test_module("test", checks, [], arm="rail") + ast.parse(road) + ast.parse(rail) + assert "expected_field='value_0'" in road + assert "expected_field='value_1'" in rail + # Neither arm asserts the bare, never-emitted label. + assert "expected_field='value'," not in road + assert "expected_field='value'," not in rail + + def test_noncolliding_field_check_stays_bare(self) -> None: + nodes = [ + make_check("check_required", _path("value")), + make_check("check_bounds", _path("value"), kwargs=(("ge", 0),)), + ] + source = render_test_module("test", nodes, []) + ast.parse(source) + assert "expected_field='value'," in source + assert "expected_field='value_0'" not in source + + +class TestForbidIfNonStringFillValues: + """fill_values for non-string scalar ForbidIf fields must be typed literals.""" + + def test_forbid_if_int_field_generates_int_fill_value(self) -> None: + """forbid_if targeting an int field emits fill_values with 0, not {}.""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("version",), + condition=FieldEqCondition("subtype", "road"), + field_shapes=(("version", Primitive(base_type="int32")),), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "fill_values" in source + assert "'version': 0" in source + assert "'version': {}" not in source + + def test_forbid_if_bool_field_generates_bool_fill_value(self) -> None: + """forbid_if targeting a bool field emits fill_values with False, not {}.""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("flag",), + condition=FieldEqCondition("subtype", "road"), + field_shapes=(("flag", Primitive(base_type="bool")),), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "fill_values" in source + assert "'flag': False" in source + assert "'flag': {}" not in source + + def test_forbid_if_float_field_generates_float_fill_value(self) -> None: + """forbid_if targeting a float field emits fill_values with 0.0, not {}.""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("score",), + condition=FieldEqCondition("subtype", "road"), + field_shapes=(("score", Primitive(base_type="float64")),), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "fill_values" in source + assert "'score': 0.0" in source + assert "'score': {}" not in source + + def test_string_primitive_in_field_shapes_raises(self) -> None: + """_fill_value_literal raises ValueError if a string-typed Primitive reaches it. + + String primitives must not appear in field_shapes (the contract is that + `_needs_explicit_fill` filters them out). A direct violation raises loudly + instead of silently emitting `{}`. + """ + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("label",), + condition=FieldEqCondition("subtype", "road"), + field_shapes=(("label", Primitive(base_type="str")),), + ), + ), + ] + with pytest.raises(ValueError, match="unhandled Primitive base_type"): + render_test_module("test", [], model_nodes) + + +class TestFillValueLiteralOtherCategory: + """_fill_value_literal raises for Primitive base types in category 'other'. + + 'Geometry' maps to `primitive_spark_category` -> 'other'. A ForbidIf + field_shapes entry containing such a shape must raise at generation time + rather than silently emitting `{}` (a struct literal) for a binary column. + """ + + def test_geometry_primitive_raises_directly(self) -> None: + """_fill_value_literal raises ValueError for a Primitive of category 'other'. + + Calls `_fill_value_literal` directly with a `Geometry` Primitive + (category 'other') to confirm the raise is unconditional rather + than guarded by a registry lookup. + """ + with pytest.raises(ValueError, match="unhandled Primitive base_type"): + _fill_value_literal(Primitive(base_type="Geometry")) + + def test_geometry_primitive_in_field_shapes_raises(self) -> None: + """_fill_value_literal raises ValueError for a Geometry-typed Primitive. + + 'Geometry' is category 'other' in `primitive_spark_category`. Without + the fix, the 'other' branch falls through to the struct `return "{}"`, + silently emitting an invalid fill value for a binary column. + """ + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("geometry",), + condition=FieldEqCondition("subtype", "road"), + field_shapes=(("geometry", Primitive(base_type="Geometry")),), + ), + ), + ] + with pytest.raises(ValueError, match="unhandled Primitive base_type"): + render_test_module("test", [], model_nodes) + + class TestLinearRangeMutations: @pytest.mark.parametrize( ("function", "expected_value"), diff --git a/packages/overture-schema-codegen/tests/test_reverse_references.py b/packages/overture-schema-codegen/tests/test_reverse_references.py index 7897a8256..ad1cf9fa7 100644 --- a/packages/overture-schema-codegen/tests/test_reverse_references.py +++ b/packages/overture-schema-codegen/tests/test_reverse_references.py @@ -11,16 +11,16 @@ RoadSegment, TreeNode, Venue, - feature_spec_for_model, has_name, lookup_by_name, make_union_spec, + spec_for_model, ) from overture.schema.codegen.extraction.enum_extraction import extract_enum from overture.schema.codegen.extraction.newtype_extraction import extract_newtype from overture.schema.codegen.extraction.specs import ( - ModelSpec, PydanticTypeSpec, + RecordSpec, TypeIdentity, ) from overture.schema.codegen.layout.type_collection import ( @@ -50,7 +50,7 @@ def test_model_referencing_type_produces_used_by_entry( target_name: str, ) -> None: """Model referencing a type produces a 'used by' entry on that type.""" - expanded = feature_spec_for_model(model_class, entry_point=model_name) + expanded = spec_for_model(model_class, entry_point=model_name) all_specs = collect_all_supplementary_types([expanded]) assert has_name(all_specs, target_name) @@ -84,6 +84,48 @@ def test_newtype_inheriting_from_newtype_produces_used_by_entry() -> None: assert entries[0].kind == UsedByKind.NEWTYPE +def test_newtype_inheriting_through_array_layer_produces_used_by_entry() -> None: + """A NewType chaining through an array NewType inherits the inner + NewType's provenance from the array layer, not just the terminal scalar.""" + from overture.schema.codegen.extraction.field import ( + ArrayOf, + ConstraintSource, + Primitive, + ) + from overture.schema.codegen.extraction.specs import NewTypeSpec + + Inner = NewType("Inner", str) + Outer = NewType("Outer", list) + + outer_spec = NewTypeSpec( + name="Outer", + description=None, + shape=ArrayOf( + element=Primitive(base_type="str"), + constraints=( + ConstraintSource( + source_ref=Inner, source_name="Inner", constraint=object() + ), + ), + ), + ) + inner_spec = NewTypeSpec( + name="Inner", description=None, shape=Primitive(base_type="str") + ) + + all_specs = { + TypeIdentity(Outer, "Outer"): outer_spec, + TypeIdentity(Inner, "Inner"): inner_spec, + } + + result = compute_reverse_references([], all_specs) + + entries = lookup_by_name(result, "Inner") + assert len(entries) == 1 + assert entries[0].identity.name == "Outer" + assert entries[0].kind == UsedByKind.NEWTYPE + + def test_union_members_have_used_by_entries() -> None: """Union members have 'used by' entries pointing to the union feature.""" # Create a union spec with RoadSegment as a member @@ -95,8 +137,8 @@ def test_union_members_have_used_by_entries() -> None: ) # Extract the member - road_spec = feature_spec_for_model(RoadSegment) - assert isinstance(road_spec, ModelSpec) + road_spec = spec_for_model(RoadSegment) + assert isinstance(road_spec, RecordSpec) all_specs = {TypeIdentity(RoadSegment, "RoadSegment"): road_spec} result = compute_reverse_references([union_spec], all_specs) @@ -109,8 +151,8 @@ def test_union_members_have_used_by_entries() -> None: def test_self_references_filtered_out() -> None: """Self-references are filtered out (handles recursive types).""" - tree_spec = feature_spec_for_model(TreeNode, entry_point="TreeNode") - assert isinstance(tree_spec, ModelSpec) + tree_spec = spec_for_model(TreeNode, entry_point="TreeNode") + assert isinstance(tree_spec, RecordSpec) # Manually add TreeNode to all_specs to test self-reference filtering all_specs = {TypeIdentity(TreeNode, "TreeNode"): tree_spec} @@ -124,8 +166,8 @@ def test_self_references_filtered_out() -> None: def test_deduplication_same_type_multiple_fields() -> None: """Deduplication works when same type is referenced via multiple fields.""" - instrument_spec = feature_spec_for_model(Instrument, entry_point="Instrument") - venue_spec = feature_spec_for_model(Venue, entry_point="Venue") + instrument_spec = spec_for_model(Instrument, entry_point="Instrument") + venue_spec = spec_for_model(Venue, entry_point="Venue") all_specs = collect_all_supplementary_types([instrument_spec, venue_spec]) assert has_name(all_specs, "Id") @@ -143,7 +185,7 @@ def test_deduplication_same_type_multiple_fields() -> None: def test_pydantic_type_has_used_by_from_feature() -> None: """Pydantic type in all_specs gets used-by entries from features referencing it.""" - expanded = feature_spec_for_model(FeatureWithUrl, entry_point="FeatureWithUrl") + expanded = spec_for_model(FeatureWithUrl, entry_point="FeatureWithUrl") all_specs = collect_all_supplementary_types([expanded]) assert has_name(all_specs, "HttpUrl") @@ -173,8 +215,8 @@ class FeatureBeta(BaseModel): FeatureBeta.__name__ = "Feature" FeatureBeta.__module__ = "beta.models" - spec_a = feature_spec_for_model(FeatureAlpha, entry_point="Feature") - spec_b = feature_spec_for_model(FeatureBeta, entry_point="Feature") + spec_a = spec_for_model(FeatureAlpha, entry_point="Feature") + spec_b = spec_for_model(FeatureBeta, entry_point="Feature") enum_id = TypeIdentity(SharedEnum, "SharedEnum") all_specs = {enum_id: extract_enum(SharedEnum)} @@ -196,8 +238,8 @@ def test_sorting_models_before_newtypes() -> None: # Create a synthetic NewType that wraps Id CustomId = NewType("CustomId", Id) - instrument_spec = feature_spec_for_model(Instrument, entry_point="Instrument") - venue_spec = feature_spec_for_model(Venue, entry_point="Venue") + instrument_spec = spec_for_model(Instrument, entry_point="Instrument") + venue_spec = spec_for_model(Venue, entry_point="Venue") all_specs = collect_all_supplementary_types([instrument_spec, venue_spec]) # Add the CustomId NewType which references Id diff --git a/packages/overture-schema-codegen/tests/test_specs.py b/packages/overture-schema-codegen/tests/test_specs.py index 550af18b7..51da3a20a 100644 --- a/packages/overture-schema-codegen/tests/test_specs.py +++ b/packages/overture-schema-codegen/tests/test_specs.py @@ -13,10 +13,10 @@ from overture.schema.codegen.extraction.specs import ( AnnotatedField, EnumSpec, - FeatureSpec, FieldSpec, ModelSpec, NewTypeSpec, + RecordSpec, TypeIdentity, is_union_alias, ) @@ -24,12 +24,13 @@ from pydantic import BaseModel, Field -class TestFeatureSpec: - def test_model_spec_is_feature_spec(self) -> None: +class TestModelSpec: + def test_record_spec_is_model_spec(self) -> None: class Simple(BaseModel): name: str - spec: FeatureSpec = extract_model(Simple) + spec: ModelSpec = extract_model(Simple) + assert isinstance(spec, RecordSpec) assert spec.name == "Simple" assert isinstance(spec.fields, list) assert spec.source_type is Simple @@ -133,8 +134,8 @@ def test_not_equal_to_non_identity(self) -> None: class TestSpecIdentity: - def test_model_spec_identity(self) -> None: - spec = ModelSpec(name="Foo", description=None, source_type=SimpleModel) + def test_record_spec_identity(self) -> None: + spec = RecordSpec(name="Foo", description=None, source_type=SimpleModel) assert spec.identity.obj is SimpleModel assert spec.identity.name == "Foo" diff --git a/packages/overture-schema-codegen/tests/test_type_analyzer.py b/packages/overture-schema-codegen/tests/test_type_analyzer.py index f8ccf88f0..003b94aba 100644 --- a/packages/overture-schema-codegen/tests/test_type_analyzer.py +++ b/packages/overture-schema-codegen/tests/test_type_analyzer.py @@ -23,6 +23,7 @@ ScalarMinLen, ) from overture.schema.codegen.extraction.type_analyzer import ( + UnresolvedForwardRefError, UnsupportedUnionError, analyze_type, single_literal_value, @@ -115,12 +116,26 @@ def test_optional_list(self) -> None: assert optional is True def test_list_optional_element(self) -> None: + # list[str | None] is a required list whose *elements* may be None. + # Field-level optionality (list itself accepting None) is False; + # element nullability is an element-shape concern, not a field concern. shape, optional, _ = analyze_type(list[str | None]) assert isinstance(shape, ArrayOf) - # `is_optional` reflects the field accepting None; element-level - # `| None` propagates the same way. + assert optional is False + + def test_optional_list_with_optional_element(self) -> None: + # list[str | None] | None: both the field and its elements accept None. + # Field optionality is True (the outer | None), independent of the element. + shape, optional, _ = analyze_type(list[str | None] | None) + assert isinstance(shape, ArrayOf) assert optional is True + def test_list_optional_element_desc_is_none(self) -> None: + # Description comes from Field(description=...) at the field layer, + # not from the element type. List branch returns None, matching dict. + _, _, desc = analyze_type(list[str | None]) + assert desc is None + class TestAnnotated: def test_ge_collected_on_terminal(self) -> None: @@ -155,6 +170,50 @@ def test_layered_constraints_anchor_separately(self) -> None: assert inner[0].constraint == ScalarMinLen(min_length=2) +class TestAttachConstraintsOnModelTerminal: + """Constraints destined for a model/union terminal are rejected loudly.""" + + def _model_ref(self) -> FieldShape: + from overture.schema.codegen.extraction.field import ModelRef + from overture.schema.codegen.extraction.specs import RecordSpec + + return ModelRef(model=RecordSpec(name="Person", description=None)) + + def _union_ref(self) -> FieldShape: + from overture.schema.codegen.extraction.field import UnionRef + from overture.schema.codegen.extraction.specs import UnionSpec + + return UnionRef( + union=UnionSpec( + name="U", + description=None, + annotated_fields=[], + members=[], + discriminator_field=None, + discriminator_mapping=None, + source_annotation=object(), + common_base=BaseModel, + ) + ) + + @pytest.mark.parametrize("ref_name", ["_model_ref", "_union_ref"]) + def test_constraint_on_terminal_raises(self, ref_name: str) -> None: + from overture.schema.codegen.extraction.field import ConstraintSource + from overture.schema.codegen.extraction.type_analyzer import attach_constraints + + shape = getattr(self, ref_name)() + cs = (ConstraintSource(source_ref=None, source_name=None, constraint=Ge(0)),) + with pytest.raises(NotImplementedError): + attach_constraints(shape, cs) + + @pytest.mark.parametrize("ref_name", ["_model_ref", "_union_ref"]) + def test_no_constraints_is_noop(self, ref_name: str) -> None: + from overture.schema.codegen.extraction.type_analyzer import attach_constraints + + shape = getattr(self, ref_name)() + assert attach_constraints(shape, ()) is shape + + class TestLiteral: def test_single_value(self) -> None: shape = _shape(Literal["active"]) @@ -317,8 +376,34 @@ def test_maxlen_on_map_raises(self) -> None: class TestErrors: def test_unsupported_annotation(self) -> None: with pytest.raises(TypeError, match="Unsupported annotation type"): + analyze_type(42) + + def test_unresolvable_forward_ref_raises_named_error(self) -> None: + with pytest.raises( + UnresolvedForwardRefError, match="forward reference 'Missing'" + ): + analyze_type("Missing") + + def test_malformed_forward_ref_raises_named_error(self) -> None: + with pytest.raises(UnresolvedForwardRefError, match="not a type"): analyze_type("not a type") + def test_dotted_forward_ref_missing_attr_raises_named_error(self) -> None: + """A dotted ref to a missing attribute fails as `UnresolvedForwardRefError`. + + `evaluate_forward_ref` raises `AttributeError` (not `NameError`) + when the head of a dotted reference resolves but the attribute is + absent; the wrapping must catch it so the failure stays named. + """ + + class Outer(BaseModel): + x: int + + with pytest.raises( + UnresolvedForwardRefError, match="forward reference 'Outer.Missing'" + ): + analyze_type("Outer.Missing", owner=Outer) + def test_multi_type_union_without_resolver(self) -> None: with pytest.raises(UnsupportedUnionError): analyze_type(str | int) @@ -328,6 +413,46 @@ def test_bare_list(self) -> None: analyze_type(list) +class TestForwardRefs: + def test_bare_string_element_resolved_against_owner(self) -> None: + """`list["Self"]` resolves the bare-string element via the owner namespace. + + With no `model_resolver` the resolved model terminal falls back + to a `Primitive` carrying the class as `source_type`; the point + is that the bare `str` was resolved rather than reaching the + terminal classifier unresolved. + """ + + class Node(BaseModel): + children: list["Node"] + + annotation = Node.model_fields["children"].annotation + shape, _, _ = analyze_type(annotation, owner=Node) + + assert isinstance(shape, ArrayOf) + assert isinstance(shape.element, Primitive) + assert shape.element.source_type is Node + + def test_nested_class_forward_ref_resolved_against_owner(self) -> None: + """A bare forward ref to a nested class resolves via the owner's namespace. + + `_resolve_forward_ref` merges `vars(owner)` into the resolution + namespace, so `"Inner"` reaches `Outer.Inner` rather than raising + `NameError`. The merge is explicit because passing any `locals` to + `evaluate_forward_ref` bypasses the library's own `vars(owner)` + fallback. + """ + + class Outer(BaseModel): + class Inner(BaseModel): + x: int + + shape, _, _ = analyze_type("Inner", owner=Outer) + + assert isinstance(shape, Primitive) + assert shape.source_type is Outer.Inner + + class UnionModelA(BaseModel): x: int diff --git a/packages/overture-schema-codegen/tests/test_type_collection.py b/packages/overture-schema-codegen/tests/test_type_collection.py index 2df73cf2f..22e412200 100644 --- a/packages/overture-schema-codegen/tests/test_type_collection.py +++ b/packages/overture-schema-codegen/tests/test_type_collection.py @@ -6,15 +6,15 @@ FeatureWithUrl, Instrument, TestSegmentWithSubModel, - feature_spec_for_model, has_name, lookup_by_name, + spec_for_model, ) from overture.schema.codegen.extraction.specs import ( EnumSpec, - ModelSpec, NewTypeSpec, PydanticTypeSpec, + RecordSpec, SupplementarySpec, TypeIdentity, ) @@ -34,7 +34,7 @@ def _make_feature_with_sub_model(sub_model: type) -> type[BaseModel]: def _expanded_supplementary(model_class: type) -> dict[TypeIdentity, SupplementarySpec]: - return collect_all_supplementary_types([feature_spec_for_model(model_class)]) + return collect_all_supplementary_types([spec_for_model(model_class)]) class TestCollectAllSupplementarySpecs: @@ -56,7 +56,7 @@ def test_returns_model_specs_from_expanded_tree(self) -> None: result = _expanded_supplementary(FeatureWithAddress) assert has_name(result, "Address") - assert isinstance(lookup_by_name(result, "Address"), ModelSpec) + assert isinstance(lookup_by_name(result, "Address"), RecordSpec) def test_collects_transitive_types(self) -> None: """Types referenced by sub-models are also collected.""" @@ -72,8 +72,8 @@ def test_same_name_different_types_both_collected(self) -> None: ModelA = type("Address", (BaseModel,), {"__annotations__": {"x": str}}) ModelB = type("Address", (BaseModel,), {"__annotations__": {"y": int}}) - outer_a = feature_spec_for_model(_make_feature_with_sub_model(ModelA)) - outer_b = feature_spec_for_model(_make_feature_with_sub_model(ModelB)) + outer_a = spec_for_model(_make_feature_with_sub_model(ModelA)) + outer_b = spec_for_model(_make_feature_with_sub_model(ModelB)) result = collect_all_supplementary_types([outer_a, outer_b]) @@ -95,7 +95,7 @@ class FeatureWithUnionSubModel(BaseModel): result = _expanded_supplementary(FeatureWithUnionSubModel) assert has_name(result, "ContactInfo") - assert isinstance(lookup_by_name(result, "ContactInfo"), ModelSpec) + assert isinstance(lookup_by_name(result, "ContactInfo"), RecordSpec) class TestCollectPydanticTypes: @@ -118,3 +118,24 @@ def test_does_not_collect_builtin_primitives(self) -> None: result = _expanded_supplementary(FeatureWithUrl) assert not has_name(result, "str") assert not has_name(result, "int") + + +class TestSemanticNewtypeGuard: + """Only semantic NewTypes become standalone supplementary specs.""" + + def test_registered_primitive_newtype_not_collected(self) -> None: + """A non-semantic NewType (uint8) belongs on the aggregate + primitives page, not as a standalone NewTypeSpec whose path + collides with it.""" + from overture.schema.system.primitive import uint8 + from overture.schema.system.string import HexColor + + feature = type( + "FeatureWithPrimitives", + (BaseModel,), + {"__annotations__": {"count": uint8, "color": HexColor}}, + ) + result = _expanded_supplementary(feature) + + assert not has_name(result, "uint8") + assert has_name(result, "HexColor") diff --git a/packages/overture-schema-codegen/tests/test_type_placement.py b/packages/overture-schema-codegen/tests/test_type_placement.py index 8550a7319..5ae07dbbd 100644 --- a/packages/overture-schema-codegen/tests/test_type_placement.py +++ b/packages/overture-schema-codegen/tests/test_type_placement.py @@ -13,9 +13,9 @@ ) from overture.schema.codegen.extraction.specs import ( AnnotatedField, - FeatureSpec, FieldSpec, ModelSpec, + RecordSpec, SupplementarySpec, TypeIdentity, ) @@ -41,12 +41,12 @@ def _build_registry( - feature_specs: list[ModelSpec], + model_specs: list[RecordSpec], ) -> tuple[dict[TypeIdentity, PurePosixPath], dict[TypeIdentity, SupplementarySpec]]: """Build placement registry with standard aggregate names.""" - all_specs = collect_all_supplementary_types(feature_specs) + all_specs = collect_all_supplementary_types(model_specs) registry = build_placement_registry( - feature_specs, all_specs, _NUMERIC_NAMES, _GEOMETRY_NAMES, _SCHEMA_ROOT + model_specs, all_specs, _NUMERIC_NAMES, _GEOMETRY_NAMES, _SCHEMA_ROOT ) return registry, all_specs @@ -145,7 +145,7 @@ class TestPlacementWithUnionSpec: """Tests for placement registry with UnionSpec.""" def test_union_spec_gets_placement(self) -> None: - """UnionSpec is placed alongside ModelSpec in the registry.""" + """UnionSpec is placed alongside RecordSpec in the registry.""" class Base(BaseModel): name: str @@ -170,10 +170,10 @@ class A(Base): entry_point="test.package:TestUnion", ) - feature_specs: list[FeatureSpec] = [union_spec] - all_specs = collect_all_supplementary_types(feature_specs) + model_specs: list[ModelSpec] = [union_spec] + all_specs = collect_all_supplementary_types(model_specs) registry = build_placement_registry( - feature_specs, all_specs, [], [], "test.package" + model_specs, all_specs, [], [], "test.package" ) assert any(tid.name == "TestUnion" for tid in registry) @@ -202,7 +202,7 @@ class TestPydanticTypePlacement: def test_pydantic_type_placed_under_module_dir(self) -> None: registry = build_placement_registry( - feature_specs=[], + model_specs=[], all_specs={HTTP_URL_SPEC.identity: HTTP_URL_SPEC}, numeric_names=[], geometry_names=[], @@ -218,7 +218,7 @@ def test_multiple_pydantic_types_same_module(self) -> None: EMAIL_STR_SPEC.identity: EMAIL_STR_SPEC, } registry = build_placement_registry( - feature_specs=[], + model_specs=[], all_specs=specs, numeric_names=[], geometry_names=[], diff --git a/packages/overture-schema-codegen/tests/test_type_registry.py b/packages/overture-schema-codegen/tests/test_type_registry.py index b2a4b45dc..3ca2e01c7 100644 --- a/packages/overture-schema-codegen/tests/test_type_registry.py +++ b/packages/overture-schema-codegen/tests/test_type_registry.py @@ -9,6 +9,7 @@ PRIMITIVE_TYPES, TypeMapping, get_type_mapping, + primitive_spark_category, resolve_type_name, ) @@ -102,3 +103,34 @@ def test_plain_scalar(self) -> None: def test_array_of_scalar_resolves_terminal(self) -> None: shape = ArrayOf(element=Primitive(base_type="str", source_type=str)) assert resolve_type_name(shape) == "string" + + +class TestPrimitiveSparkCategory: + def test_int_types_are_int(self) -> None: + for bt in ( + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "int", + ): + assert primitive_spark_category(bt) == "int", bt + + def test_float_types_are_float(self) -> None: + for bt in ("float32", "float64", "float"): + assert primitive_spark_category(bt) == "float", bt + + def test_bool_is_bool(self) -> None: + assert primitive_spark_category("bool") == "bool" + + def test_string_type_is_string(self) -> None: + assert primitive_spark_category("str") == "string" + + def test_unknown_type_falls_back_to_other(self) -> None: + assert primitive_spark_category("UnknownNewType") == "other" + + def test_geometry_is_other(self) -> None: + assert primitive_spark_category("Geometry") == "other" diff --git a/packages/overture-schema-codegen/tests/test_union_extraction.py b/packages/overture-schema-codegen/tests/test_union_extraction.py index 42b5e0c43..73f599332 100644 --- a/packages/overture-schema-codegen/tests/test_union_extraction.py +++ b/packages/overture-schema-codegen/tests/test_union_extraction.py @@ -1,6 +1,10 @@ """Tests for union extraction.""" +import re +from typing import Any + import pytest +from annotated_types import MinLen from codegen_test_support import ( RailSegment, RoadSegment, @@ -8,11 +12,31 @@ TestEnumDiscriminatorUnion, TestSegment, TestSegmentDivergingConstraints, + TestSegmentEqualConstraints, WaterSegment, ) +from overture.schema.codegen.extraction.field import ( + ArrayOf, + ConstraintSource, + Primitive, +) from overture.schema.codegen.extraction.specs import FieldSpec, UnionSpec -from overture.schema.codegen.extraction.union_extraction import extract_union +from overture.schema.codegen.extraction.union_extraction import ( + _constraints_fingerprint, + extract_union, +) from overture.schema.common.scoping.vehicle import VehicleSelector +from overture.schema.system.field_constraint import ( + FieldConstraint, + UniqueItemsConstraint, +) +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + JsonPointerConstraint, + PatternConstraint, +) +from pydantic import Field, GetCoreSchemaHandler +from pydantic_core import core_schema class TestExtractUnion: @@ -132,3 +156,120 @@ def test_name_derived_from_common_base(self) -> None: """When name matches a member class, derive from common base minus 'Base' suffix.""" spec = extract_union("VehicleAxleCountSelector", VehicleSelector) assert spec.name == "VehicleSelector" + + +def _make_array_field(constraint: object) -> FieldSpec: + """Build a FieldSpec with one array-level constraint for fingerprint tests.""" + cs = ConstraintSource(source_ref=None, source_name=None, constraint=constraint) + return FieldSpec( + name="items", shape=ArrayOf(element=Primitive("str"), constraints=(cs,)) + ) + + +def _make_scalar_field(constraint: object) -> FieldSpec: + """Build a FieldSpec with one scalar-level constraint for fingerprint tests.""" + cs = ConstraintSource(source_ref=None, source_name=None, constraint=constraint) + return FieldSpec(name="tag", shape=Primitive("str", constraints=(cs,))) + + +class _ListAttrConstraint(FieldConstraint): + """Test-only constraint with a list-valued attribute (fingerprint hashability guard).""" + + def __init__(self, items: list[str]) -> None: + self.items = list(items) + + def __get_pydantic_core_schema__( + self, source: type[Any], handler: GetCoreSchemaHandler + ) -> core_schema.CoreSchema: + return handler(source) + + +class TestConstraintsFingerprint: + """_constraints_fingerprint produces value-stable keys across distinct instances.""" + + def test_marker_constraint_equal_instances_same_fingerprint(self) -> None: + """Two distinct UniqueItemsConstraint() instances fingerprint equal.""" + fs1 = _make_array_field(UniqueItemsConstraint()) + fs2 = _make_array_field(UniqueItemsConstraint()) + assert _constraints_fingerprint(fs1) == _constraints_fingerprint(fs2) + + def test_parametric_constraint_equal_instances_same_fingerprint(self) -> None: + """Two distinct CountryCodeAlpha2Constraint() instances fingerprint equal.""" + fs1 = _make_scalar_field(CountryCodeAlpha2Constraint()) + fs2 = _make_scalar_field(CountryCodeAlpha2Constraint()) + assert _constraints_fingerprint(fs1) == _constraints_fingerprint(fs2) + + def test_different_attribute_values_unequal_fingerprint(self) -> None: + """Constraints differing in attribute value produce different fingerprints.""" + fs1 = _make_array_field(MinLen(1)) + fs2 = _make_array_field(MinLen(5)) + assert _constraints_fingerprint(fs1) != _constraints_fingerprint(fs2) + + def test_equal_constraints_do_not_raise(self) -> None: + """Union extraction does not raise when members share a field with equal constraints.""" + extract_union("TestSegmentEqualConstraints", TestSegmentEqualConstraints) + + def test_different_zero_attr_constraint_classes_unequal(self) -> None: + """Two different marker constraint classes with no attributes fingerprint unequal. + + Both `UniqueItemsConstraint()` and `JsonPointerConstraint()` have no + instance attributes, so their keys differ only by qualified class name. + """ + fs1 = _make_array_field(UniqueItemsConstraint()) + fs2 = _make_array_field(JsonPointerConstraint()) + assert _constraints_fingerprint(fs1) != _constraints_fingerprint(fs2) + + def test_pattern_constraint_flags_distinguish_fingerprint(self) -> None: + """PatternConstraints with the same source but different flags diverge. + + Value equality on `PatternConstraint` normalizes the compiled + `re.Pattern` to `(pattern, flags)`, so a flag difference produces + distinct fingerprints rather than collapsing. + """ + fs1 = _make_scalar_field(PatternConstraint(r"^[a-z]+$", "err")) + fs2 = _make_scalar_field(PatternConstraint(r"^[a-z]+$", "err", re.IGNORECASE)) + assert _constraints_fingerprint(fs1) != _constraints_fingerprint(fs2) + + def test_distinct_value_eq_constraints_diverge(self) -> None: + """Same type, different attributes diverge -- the case dedup guards. + + Per-variant divergence on a structurally identical field is the + condition `_constraints_fingerprint` exists to catch. Two + `PatternConstraint`s differing only in source pattern must not collapse. + """ + fs1 = _make_scalar_field(PatternConstraint(r"^[a-z]+$", "err")) + fs2 = _make_scalar_field(PatternConstraint(r"^[0-9]+$", "err")) + assert _constraints_fingerprint(fs1) != _constraints_fingerprint(fs2) + + def test_container_valued_constraint_routes_through_fingerprint(self) -> None: + """A container-valued constraint builds a frozenset without raising. + + Guards the original failure mode: an unhashable constraint key crashed + `frozenset` construction in `_constraints_fingerprint`. Value `__hash__` + on the constraint normalizes the list attribute, so equal instances + both hash and collapse. + """ + fs1 = _make_array_field(_ListAttrConstraint(["a", "b"])) + fs2 = _make_array_field(_ListAttrConstraint(["a", "b"])) + assert _constraints_fingerprint(fs1) == _constraints_fingerprint(fs2) + + def test_foreign_identity_eq_metadata_equal_instances_collapse(self) -> None: + """Raw pydantic `Field(...)` metadata compares by identity but collapses. + + Pydantic's internal metadata is the lone constraint type that falls + back to identity equality, so two equal-valued instances would + fingerprint as divergent. `_fingerprint_key` keys it on its + value-stable `repr` so equal metadata still collapses. + """ + raw1 = Field(pattern=r"^[a-z]+$").metadata[0] + raw2 = Field(pattern=r"^[a-z]+$").metadata[0] + assert raw1 != raw2 + fs1 = _make_scalar_field(raw1) + fs2 = _make_scalar_field(raw2) + assert _constraints_fingerprint(fs1) == _constraints_fingerprint(fs2) + + def test_foreign_identity_eq_metadata_distinct_values_diverge(self) -> None: + """Different raw `Field(...)` patterns produce divergent fingerprints.""" + fs1 = _make_scalar_field(Field(pattern=r"^[a-z]+$").metadata[0]) + fs2 = _make_scalar_field(Field(pattern=r"^[0-9]+$").metadata[0]) + assert _constraints_fingerprint(fs1) != _constraints_fingerprint(fs2) diff --git a/packages/overture-schema-pyspark/README.md b/packages/overture-schema-pyspark/README.md index ef13ce9e9..fdadde3fc 100644 --- a/packages/overture-schema-pyspark/README.md +++ b/packages/overture-schema-pyspark/README.md @@ -15,12 +15,12 @@ schema changes rather than editing the generated output. ```python from pyspark.sql import SparkSession -from overture.schema.pyspark import validate_feature, explain_errors +from overture.schema.pyspark import validate_model, explain_errors spark = SparkSession.builder.getOrCreate() df = spark.read.parquet("samples/segment.parquet") -result = validate_feature(df, "segment") +result = validate_model(df, "segment") result.evaluated.cache() total_rows = result.evaluated.count() @@ -32,17 +32,17 @@ if error_count > 0: violations.select("id", "field", "check", "message").show(truncate=False) ``` -`validate_feature()` looks up the feature type in the registry, compares +`validate_model()` looks up the feature type in the registry, compares schemas, and evaluates all checks in a single pass. It returns a `ValidationResult` with the evaluated DataFrame, the checks that ran, any schema mismatches, and suppressed checks. | Function | Returns | Description | | --- | --- | --- | -| `validate_feature(df, type)` | `ValidationResult` | Registry lookup, schema comparison, check evaluation. | +| `validate_model(df, model_type)` | `ValidationResult` | Registry lookup, schema comparison, check evaluation. | | `result.error_rows()` | `DataFrame` | Rows with at least one violation. Original columns only. | | `explain_errors(evaluated, checks)` | `DataFrame` | One row per violation. Adds `field`, `check`, `message` columns. | -| `feature_types()` | `list[str]` | Available feature type names, sorted. | +| `model_names()` | `list[str]` | Available model type names, sorted. | Lower-level helpers (`evaluate_checks`, `filter_errors`) are available for consumers needing finer control. All public symbols are re-exported @@ -142,7 +142,7 @@ Any `--conf` values override the S3A defaults. ## Architecture ```text -validate_feature() Entry point -- registry lookup, schema check, evaluation +validate_model() Entry point -- registry lookup, schema check, evaluation | list[Check] Interface -- frozen (field, name, expr, shape) tuples | @@ -163,7 +163,7 @@ that tells `evaluate_checks()` how to normalize the result. Expression builders (like `connector_checks()`) are generated by `overture-schema-codegen` from Pydantic schema models and registered in `REGISTRY` by feature type name, paired with an expected `StructType` -schema via `FeatureValidation`. +schema via `ModelValidation`. ## Generated expression builders @@ -227,12 +227,12 @@ def _sources_dataset_check() -> Check: ) ``` -The registry maps feature type names to `FeatureValidation` pairs: +The registry maps feature type names to `ModelValidation` pairs: ```python -from overture.schema.pyspark.check import FeatureValidation +from overture.schema.pyspark.check import ModelValidation from overture.schema.pyspark._registry import REGISTRY # REGISTRY is auto-generated: -# REGISTRY["connector"] = FeatureValidation(schema=CONNECTOR_SCHEMA, checks=connector_checks) +# REGISTRY["connector"] = ModelValidation(schema=CONNECTOR_SCHEMA, checks=connector_checks) ``` diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py index cb262b3d7..8841ae9f4 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py @@ -6,10 +6,10 @@ ValidationResult, evaluate_checks, explain_errors, - feature_keys, - feature_names, filter_errors, - validate_feature, + model_keys, + model_names, + validate_model, ) __all__ = [ @@ -20,8 +20,8 @@ "compare_schemas", "evaluate_checks", "explain_errors", - "feature_keys", - "feature_names", + "model_keys", + "model_names", "filter_errors", - "validate_feature", + "validate_model", ] diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py index 85158fb79..dd40f92af 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py @@ -2,7 +2,7 @@ Built at import time by walking the generated `expressions.generated` namespace and collecting every module that exposes the -codegen-emitted `ENTRY_POINT` and `FEATURE_VALIDATION` constants. +codegen-emitted `ENTRY_POINT` and `MODEL_VALIDATION` constants. The generated tree on disk is the runtime source of truth: the registry contains exactly what was generated, regardless of which @@ -17,14 +17,14 @@ import logging import pkgutil -from .check import FeatureValidation +from .check import ModelValidation logger = logging.getLogger(__name__) _GENERATED_ROOT = "overture.schema.pyspark.expressions.generated" -def _walk() -> tuple[dict[str, FeatureValidation], dict[str, dict[str, str]]]: +def _walk() -> tuple[dict[str, ModelValidation], dict[str, dict[str, str]]]: """Walk the generated tree and collect registry + partition map. Returns a `(registry, partition_map)` pair: @@ -39,7 +39,7 @@ def _walk() -> tuple[dict[str, FeatureValidation], dict[str, dict[str, str]]]: get a complete partition path without the codegen having to duplicate the type value. """ - registry: dict[str, FeatureValidation] = {} + registry: dict[str, ModelValidation] = {} partition_map: dict[str, dict[str, str]] = {} try: @@ -52,7 +52,7 @@ def _walk() -> tuple[dict[str, FeatureValidation], dict[str, dict[str, str]]]: continue module = importlib.import_module(info.name) entry_point = getattr(module, "ENTRY_POINT", None) - validation = getattr(module, "FEATURE_VALIDATION", None) + validation = getattr(module, "MODEL_VALIDATION", None) if entry_point is None or validation is None: continue registry[entry_point] = validation diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py index de6e5f955..9500cf4e7 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py @@ -27,21 +27,23 @@ class Check: and report grouping), not how to access the data. The expression in `expr` already encodes the access pattern. - `root_field` is the top-level schema column the check belongs to, - or None for synthetic model-level checks (radio_group, require_any_of) - that don't correspond to a single column. Used by `validate_feature` - to suppress or skip checks by column name. + `read_columns` names every top-level schema column the expression + dereferences -- one for a plain field check, several for a model-level + check that spans columns, plus any discriminator a variant gate reads. + `validate_model` drops a check when any column it reads is skipped or + structurally absent, so an unresolvable `F.col()` never reaches Spark; + it also treats these as the columns a check can be suppressed by name. """ field: str name: str expr: Column shape: CheckShape - root_field: str | None + read_columns: frozenset[str] @dataclass(frozen=True) -class FeatureValidation: +class ModelValidation: """Pairs an expected schema with check builders for a feature type.""" schema: StructType diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py index 2be4f9aeb..62d339128 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py @@ -3,10 +3,11 @@ from __future__ import annotations import sys -from collections.abc import Mapping +from collections.abc import Collection, Mapping from dataclasses import dataclass import click +from pyspark.errors import AnalysisException from pyspark.sql import DataFrame, SparkSession from overture.schema.system.discovery import resolve_entry_point_key @@ -15,8 +16,8 @@ from ._registry import PARTITION_MAP, REGISTRY from .validate import ( explain_errors, - feature_names, - validate_feature, + model_names, + validate_model, ) @@ -32,37 +33,75 @@ class ReadSpec: base_path: str | None = None +def absent_column(exc: AnalysisException, columns: Collection[str]) -> str | None: + """The top-level column an unresolved-column error names, if absent from data. + + Returns the column name only when `exc` is an `UNRESOLVED_COLUMN` error + whose target is genuinely missing from `columns` -- the case a re-run with + `--skip-columns` resolves. Every other `AnalysisException` (a struct field + accessed on a scalar, a type mismatch, an unresolved column that is in fact + present) returns None, marking it a generator or expression bug to surface + rather than steer toward `--skip-columns`. + + Parameters + ---------- + exc + The exception raised while Spark planned the check expressions. + columns + The data's top-level column names (`df.columns`). + """ + condition = exc.getCondition() + if condition is None or not condition.startswith("UNRESOLVED_COLUMN"): + return None + object_name = (exc.getMessageParameters() or {}).get("objectName") + if not object_name: + return None + # objectName is backtick-quoted, e.g. `phantom` or `bbox`.`xmin`; the + # top-level segment is the column df.columns would carry. + top_level = object_name.split(".", 1)[0].strip("`") + return top_level if top_level not in columns else None + + def resolve_read(path: str, partitions: Mapping[str, str] | None) -> ReadSpec: """Determine read strategy from path structure. - Three cases: + The partition map is an ordered Hive hierarchy + (`{"theme": "buildings", "type": "building"}`). A path supplies a + prefix of it; the leaves below the deepest level already present are + appended so the read always lands on a single feature type. Cases: - 1. **Hive partition path** (contains `/{key}=` for some key in - `partitions`) -- derive `basePath` so Spark discovers partition - columns. - 2. **Individual file** (`*.parquet`) or no partitions -- read + 1. **Individual file** (`*.parquet`) or no partitions -- read directly; data already contains the partition columns inline. - 3. **Release root** -- append the partition path - (`key1=v1/key2=v2/...`) and set `basePath` to the original path. + 2. **Release root** (no partition directories) -- append the full + partition path and set `basePath` to the original path. + 3. **Partial partition path** (`theme=X/`) -- append the missing + leaves (`type=Y`) so a single feature's checks aren't run against + every type sharing the theme directory. + 4. **Leaf partition path** (`theme=X/type=Y/`) -- nothing to append; + read it directly with `basePath` derived. """ stripped = path.rstrip("/") - # Path already contains Hive partition directories - for key in partitions or (): - idx = stripped.find(f"/{key}=") - if idx >= 0: - return ReadSpec(data_path=path, base_path=stripped[:idx]) - # Individual file or no partition mapping — data has partition columns inline if stripped.endswith(".parquet") or not partitions: return ReadSpec(data_path=path) - # Release root — construct leaf path from partition map - partition_path = "/".join(f"{k}={v}" for k, v in partitions.items()) - return ReadSpec( - data_path=f"{stripped}/{partition_path}", - base_path=stripped, - ) + keys = list(partitions) + # Partition levels already present in the path, in hierarchy order. + present = [i for i, key in enumerate(keys) if f"/{key}=" in stripped] + depth = present[-1] + 1 if present else 0 # count of levels already filled + leaves = "/".join(f"{key}={partitions[key]}" for key in keys[depth:]) + + if not present: + # Release root — append the full partition path; it is the base. + return ReadSpec(data_path=f"{stripped}/{leaves}", base_path=stripped) + + # Path already contains partition directories: the base is the release + # root (before the first one); append any leaves below the deepest + # present level (none for a leaf path, which then reads as-is). + base_idx = stripped.find(f"/{keys[present[0]]}=") + data_path = f"{stripped}/{leaves}" if leaves else path + return ReadSpec(data_path=data_path, base_path=stripped[:base_idx]) def read_feature(spark: SparkSession, spec: ReadSpec) -> DataFrame: @@ -182,7 +221,7 @@ def validate_cli( resolved = resolve_entry_point_key(feature_type, REGISTRY) except ValueError: click.echo( - f"Unknown type '{feature_type}'. Known: {', '.join(feature_names())}", + f"Unknown type '{feature_type}'. Known: {', '.join(model_names())}", err=True, ) sys.exit(1) @@ -205,7 +244,7 @@ def validate_cli( suppress.append(s) try: - result = validate_feature( + result = validate_model( df, resolved, skip_columns=skip_columns, @@ -215,20 +254,35 @@ def validate_cli( except ValueError as e: click.echo(str(e), err=True) sys.exit(1) + except AnalysisException as e: + # Backstop, narrowed to the one cause `--skip-columns` can address: a + # check that names a column missing from the data. validate_model + # already drops checks for skipped and schema-absent columns, so this + # fires only on a column outside the expected schema -- offer the + # operator the skip lever and name the column. Every other + # AnalysisException (a type mismatch, a struct field read off a scalar) + # is a generator bug `--skip-columns` cannot fix; let it propagate as a + # traceback rather than mask it behind the skip hint. + column = absent_column(e, df.columns) + if column is None: + raise + click.echo( + f"A check references column '{column}', absent from the data at {path}.", + err=True, + ) + click.echo( + f"Re-run with `--skip-columns {column}` to skip its checks, " + "or `--skip-schema-check`.", + err=True, + ) + sys.exit(1) if result.schema_mismatches: click.echo(f"Schema mismatches for {resolved}:", err=True) for m in result.schema_mismatches: click.echo(f" {m.path}: expected {m.expected}, got {m.actual}", err=True) - absent_columns = list( - dict.fromkeys( - m.path.split(".", 1)[0] - for m in result.schema_mismatches - if m.actual == "missing" - ) - ) - if absent_columns: - flags = " ".join(f"--skip-columns {c}" for c in absent_columns) + if result.absent_columns: + flags = " ".join(f"--skip-columns {c}" for c in result.absent_columns) click.echo( f" Re-run with `{flags}` to skip missing columns.", err=True, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py index c6d274790..efe1e6198 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py @@ -68,6 +68,44 @@ def nested_array_check( return _null_guarded_transform(_resolve_column(column), check_fn, flatten=True) +def _map_projection_check( + column: str | Column, + projector: Callable[[Column], Column], + check_fn: Callable[[Column], Column], +) -> Column: + """Project a map column to an array, then null-guard and transform it. + + *projector* is `F.map_keys` or `F.map_values`. The projection already + yields a Column, so this calls `_null_guarded_transform` directly -- + routing through `array_check` would re-resolve an already-resolved + Column. A null map column projects to null, which the guard yields + through as null. + """ + return _null_guarded_transform(projector(_resolve_column(column)), check_fn) + + +def map_keys_check( + column: str | Column, check_fn: Callable[[Column], Column] +) -> Column: + """Validate a map's keys: project to `map_keys`, then array-check. + + *check_fn* receives each map key and returns a string Column (error + message) or null. A null map column yields null. + """ + return _map_projection_check(column, F.map_keys, check_fn) + + +def map_values_check( + column: str | Column, check_fn: Callable[[Column], Column] +) -> Column: + """Validate a map's values: project to `map_values`, then array-check. + + *check_fn* receives each map value and returns a string Column (error + message) or null. A null map column yields null. + """ + return _map_projection_check(column, F.map_values, check_fn) + + def check_struct_unique(column: str | Column) -> Column: """Check that an array has no duplicate items by whole-element comparison. diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py index 9982b1486..fc616fcbe 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py @@ -25,16 +25,26 @@ from .column_patterns import error_msg -_WKB_TYPE_HEX: dict[GeometryType, str] = { - GeometryType.POINT: "01", - GeometryType.LINE_STRING: "02", - GeometryType.POLYGON: "03", - GeometryType.MULTI_POINT: "04", - GeometryType.MULTI_LINE_STRING: "05", - GeometryType.MULTI_POLYGON: "06", - GeometryType.GEOMETRY_COLLECTION: "07", +_WKB_TYPE_CODE: dict[GeometryType, int] = { + GeometryType.POINT: 1, + GeometryType.LINE_STRING: 2, + GeometryType.POLYGON: 3, + GeometryType.MULTI_POINT: 4, + GeometryType.MULTI_LINE_STRING: 5, + GeometryType.MULTI_POLYGON: 6, + GeometryType.GEOMETRY_COLLECTION: 7, } +# A 3D/measured geometry encodes its extra dimensions in the WKB type word two +# different ways. EWKB (shapely's `.wkb` default, PostGIS, GDAL old-OGC) sets +# high flag bits -- Z=0x80000000, M=0x40000000, SRID=0x20000000 -- leaving the +# base type in the low bits. ISO WKB (mandated by GeoParquet) instead offsets +# the type by 1000/2000/3000 for Z/M/ZM (PointZ=1001). Masking off the EWKB +# flag nibble and then taking the value mod 1000 recovers the OGC base type +# (1-7) from either encoding. +_EWKB_FLAG_MASK = 0x0FFFFFFF +_ISO_DIMENSION_MODULUS = 1000 + _BOUND_OPS: dict[str, tuple[str, Callable[[Column, float | int], Column]]] = { "ge": (">=", lambda c, v: c < v), @@ -69,8 +79,17 @@ def check_bounds( ) if not checks: return F.lit(None).cast("string") - # null col -> all F.when checks return null (no false positive) - return F.coalesce(*checks) + # NaN satisfies no Pydantic bound (every comparison against it is False), + # but Spark sorts NaN above all values, so lower bounds (NaN < v / NaN <= v) + # never fire on it. Reject NaN explicitly whenever any bound applies. The + # cast keeps integer columns -- which can never be NaN -- from erroring. + nan_check = F.when( + F.isnan(col.cast("double")), + error_msg("must be a number, got ", col.cast("string")), + ) + # null col -> isnan is False (nan_check null) and every bound comparison is + # null, so coalesce yields null (no false positive) + return F.coalesce(nan_check, *checks) def check_enum( @@ -101,6 +120,17 @@ def check_pattern(col: Column, pattern: str, *, label: str) -> Column: label Human-readable description used in error messages: `"invalid {label}: got '...'"` + + Notes + ----- + `rlike` runs Java's regex engine against patterns authored for Python's + `re` (the engine Pydantic validates with). The dialects coincide on the + ASCII character ranges the schema patterns use, but diverge on the + shorthand classes: Java's `\\d \\s \\w \\S` are ASCII-only while Python's + are Unicode, so e.g. `^\\S+$` accepts a non-breaking space here that + Pydantic rejects, and `.` excludes a different set of line terminators. + These divergences are accepted -- the affected inputs (Unicode digits, + exotic whitespace) do not occur in practice for the constrained fields. """ msg = error_msg(f"invalid {label}: got '", col.cast("string"), F.lit("'")) return F.when(col.isNotNull() & ~col.rlike(pattern), msg) @@ -109,20 +139,29 @@ def check_pattern(col: Column, pattern: str, *, label: str) -> Column: def check_url_format(col: Column) -> Column: """HTTP/HTTPS URL format check via pattern match. Returns error string or null. - Pydantic's `HttpUrl` additionally normalizes values (adds trailing - slash, lowercases host and scheme) before validation and comparison. - This check validates the raw string without normalization — format - acceptance is broader, and downstream uniqueness checks compare - un-normalized values. + Pydantic's `HttpUrl` normalizes values (adds trailing slash, lowercases + host and scheme) before validation and comparison. This check validates + the raw string without normalization, with one concession to scheme + normalization: `(?i)` accepts an upper- or mixed-case scheme (`HTTP://`) + the way Pydantic's lowercasing does. Host case is left un-normalized, so + downstream uniqueness checks still compare un-normalized values. """ - return check_pattern(col, r"^https?://[^\s]+\z", label="HTTP/HTTPS URL") + return check_pattern(col, r"(?i)^https?://[^\s]+\z", label="HTTP/HTTPS URL") + + +# Maximum HTTP(S) URL length, mirroring Pydantic's HttpUrl cap (the de facto +# 2083-character limit from legacy browsers). +_MAX_URL_LENGTH = 2083 def check_url_length(col: Column) -> Column: - """URL length check: must not exceed 2083 characters. Returns error string or null.""" + """URL length check: must not exceed the maximum URL length. Returns error string or null.""" return F.when( - col.isNotNull() & (F.length(col) > 2083), - error_msg("URL exceeds 2083 characters: length ", F.length(col).cast("string")), + col.isNotNull() & (F.length(col) > _MAX_URL_LENGTH), + error_msg( + f"URL exceeds {_MAX_URL_LENGTH} characters: length ", + F.length(col).cast("string"), + ), ) @@ -198,6 +237,14 @@ def check_string_max_length(col: Column, max_len: int) -> Column: the `.*` in the middle position still matches anything. Policing interior content is a separate concern. +Divergence from Pydantic (accepted): Pydantic's stripped pattern +`^(\S(.*\S)?)?\Z` runs without DOTALL, so an interior newline makes it +fail (its `.` cannot cross the newline to reach the closing anchor). The +`(?s)` here lets `.*` cross newlines, so a string with an interior newline +but clean boundaries passes Spark while Pydantic rejects it. Stripped +fields are short identifiers/names where interior newlines do not occur, +so the looser behavior is accepted rather than matched. + Flags: `(?s)` (DOTALL) lets `.*` cross newlines. `(?U)` (UNICODE_CHARACTER_CLASS) gives `\s` full Unicode coverage. `\z` (absolute end-of-input) avoids `$` matching before a trailing newline. @@ -408,32 +455,38 @@ def check_geometry_type( col: Column, *allowed: GeometryType, ) -> Column: - """Geometry type check via WKB header byte parsing. + """Geometry type check via WKB header parsing. - Reads the endianness indicator and type uint32 from the WKB binary - without deserializing coordinates. O(1) per row regardless of + Reads the endianness indicator and the 4-byte type word from the WKB + binary without deserializing coordinates. O(1) per row regardless of geometry complexity. - Extracts only the low byte of the type uint32, which is safe for - OGC types 1-7 and immune to Z/M/ZM flag bits (those modify high - bytes only). + Normalizes both 3D/measured encodings (EWKB high flag bits and the ISO + WKB dimension offset) down to the OGC base type, so a valid PointZ + validates as a Point regardless of how its dimensions were encoded. """ hex_geom = F.hex(col) byte_order = F.substring(hex_geom, 1, 2) - # LE: type LSB at hex positions 3-4 - # BE: type LSB at hex positions 9-10 - type_hex = F.when( - byte_order == "01", - F.substring(hex_geom, 3, 2), - ).otherwise( + # The 4-byte type word follows the 1-byte order flag (hex positions 3-10). + # Big-endian stores it most-significant byte first (read as-is); little- + # endian stores it least-significant byte first, so reverse the byte pairs. + be_type_hex = F.substring(hex_geom, 3, 8) + le_type_hex = F.concat( F.substring(hex_geom, 9, 2), + F.substring(hex_geom, 7, 2), + F.substring(hex_geom, 5, 2), + F.substring(hex_geom, 3, 2), ) - allowed_hex = [_WKB_TYPE_HEX[t] for t in allowed] + type_word = F.conv( + F.when(byte_order == "01", le_type_hex).otherwise(be_type_hex), 16, 10 + ).cast("long") + base_type = type_word.bitwiseAND(_EWKB_FLAG_MASK) % _ISO_DIMENSION_MODULUS + allowed_codes = [_WKB_TYPE_CODE[t] for t in allowed] names = " | ".join(t.geo_json_type for t in allowed) - if len(allowed_hex) == 1: - violation = type_hex != allowed_hex[0] + if len(allowed_codes) == 1: + violation = base_type != allowed_codes[0] else: - violation = ~type_hex.isin(allowed_hex) + violation = ~base_type.isin(allowed_codes) return F.when( col.isNotNull() & violation, error_msg(f"expected {names} geometry"), diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py index 19d17b7ba..cb9366dc5 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py @@ -15,7 +15,7 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) @@ -50,7 +50,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -60,7 +60,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -72,7 +72,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -82,7 +82,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -92,7 +92,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -102,7 +102,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -112,7 +112,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -122,7 +122,7 @@ def _geometry_geometry_type_check() -> Check: name="geometry_type", expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -132,7 +132,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -142,7 +142,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["addresses"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -152,7 +152,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -162,7 +162,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["address"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -172,7 +172,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -182,7 +182,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -192,7 +192,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -202,7 +202,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -212,7 +212,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -222,7 +222,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -232,7 +232,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -242,27 +242,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -274,7 +274,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -286,7 +286,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -296,7 +296,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -306,7 +306,7 @@ def _address_levels_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("address_levels"), 1), shape=CheckShape.SCALAR, - root_field="address_levels", + read_columns=frozenset({"address_levels"}), ) @@ -316,7 +316,7 @@ def _address_levels_max_length_check() -> Check: name="array_max_length", expr=check_array_max_length(F.col("address_levels"), 5), shape=CheckShape.SCALAR, - root_field="address_levels", + read_columns=frozenset({"address_levels"}), ) @@ -328,7 +328,7 @@ def _address_levels_value_string_min_length_check() -> Check: "address_levels", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="address_levels", + read_columns=frozenset({"address_levels"}), ) @@ -338,7 +338,7 @@ def _address_levels_value_stripped_check() -> Check: name="stripped", expr=array_check("address_levels", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="address_levels", + read_columns=frozenset({"address_levels"}), ) @@ -348,7 +348,7 @@ def _country_required_check() -> Check: name="required", expr=check_required(F.col("country")), shape=CheckShape.SCALAR, - root_field="country", + read_columns=frozenset({"country"}), ) @@ -360,7 +360,7 @@ def _country_country_code_alpha2_check() -> Check: F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" ), shape=CheckShape.SCALAR, - root_field="country", + read_columns=frozenset({"country"}), ) @@ -370,7 +370,7 @@ def _number_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("number"), 1), shape=CheckShape.SCALAR, - root_field="number", + read_columns=frozenset({"number"}), ) @@ -380,7 +380,7 @@ def _number_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("number")), shape=CheckShape.SCALAR, - root_field="number", + read_columns=frozenset({"number"}), ) @@ -390,7 +390,7 @@ def _postal_city_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("postal_city"), 1), shape=CheckShape.SCALAR, - root_field="postal_city", + read_columns=frozenset({"postal_city"}), ) @@ -400,7 +400,7 @@ def _postal_city_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("postal_city")), shape=CheckShape.SCALAR, - root_field="postal_city", + read_columns=frozenset({"postal_city"}), ) @@ -410,7 +410,7 @@ def _postcode_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("postcode"), 1), shape=CheckShape.SCALAR, - root_field="postcode", + read_columns=frozenset({"postcode"}), ) @@ -420,7 +420,7 @@ def _postcode_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("postcode")), shape=CheckShape.SCALAR, - root_field="postcode", + read_columns=frozenset({"postcode"}), ) @@ -430,7 +430,7 @@ def _street_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("street"), 1), shape=CheckShape.SCALAR, - root_field="street", + read_columns=frozenset({"street"}), ) @@ -440,7 +440,7 @@ def _street_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("street")), shape=CheckShape.SCALAR, - root_field="street", + read_columns=frozenset({"street"}), ) @@ -450,7 +450,7 @@ def _unit_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("unit"), 1), shape=CheckShape.SCALAR, - root_field="unit", + read_columns=frozenset({"unit"}), ) @@ -460,7 +460,7 @@ def _unit_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("unit")), shape=CheckShape.SCALAR, - root_field="unit", + read_columns=frozenset({"unit"}), ) @@ -557,7 +557,7 @@ def address_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "addresses"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=ADDRESS_SCHEMA, checks=address_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py index 026130578..64e09d7d2 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py @@ -15,14 +15,17 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions.column_patterns import ( array_check, + map_keys_check, + map_values_check, nested_array_check, ) from overture.schema.pyspark.expressions.constraint_expressions import ( check_array_max_length, check_array_min_length, + check_bounds, check_enum, check_pattern, check_required, @@ -37,7 +40,7 @@ def _datasets_check() -> Check: name="required", expr=check_required(F.col("datasets")), shape=CheckShape.SCALAR, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -47,7 +50,7 @@ def _datasets_source_name_check() -> Check: name="required", expr=array_check("datasets", lambda el: check_required(el["source_name"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -59,7 +62,7 @@ def _datasets_source_dataset_name_check() -> Check: "datasets", lambda el: check_required(el["source_dataset_name"]) ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -69,7 +72,7 @@ def _datasets_data_url_required_check() -> Check: name="required", expr=array_check("datasets", lambda el: check_required(el["data_url"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -79,7 +82,7 @@ def _datasets_data_url_url_format_check() -> Check: name="url_format", expr=array_check("datasets", lambda el: check_url_format(el["data_url"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -89,7 +92,7 @@ def _datasets_data_url_url_length_check() -> Check: name="url_length", expr=array_check("datasets", lambda el: check_url_length(el["data_url"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -101,7 +104,7 @@ def _datasets_data_url_archived_required_check() -> Check: "datasets", lambda el: check_required(el["data_url_archived"]) ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -113,7 +116,7 @@ def _datasets_data_url_archived_url_format_check() -> Check: "datasets", lambda el: check_url_format(el["data_url_archived"]) ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -125,7 +128,7 @@ def _datasets_data_url_archived_url_length_check() -> Check: "datasets", lambda el: check_url_length(el["data_url_archived"]) ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -135,7 +138,7 @@ def _datasets_license_url_required_check() -> Check: name="required", expr=array_check("datasets", lambda el: check_required(el["license_url"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -145,7 +148,7 @@ def _datasets_license_url_url_format_check() -> Check: name="url_format", expr=array_check("datasets", lambda el: check_url_format(el["license_url"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -155,7 +158,7 @@ def _datasets_license_url_url_length_check() -> Check: name="url_length", expr=array_check("datasets", lambda el: check_url_length(el["license_url"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -167,7 +170,7 @@ def _datasets_license_url_archived_required_check() -> Check: "datasets", lambda el: check_required(el["license_url_archived"]) ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -179,7 +182,7 @@ def _datasets_license_url_archived_url_format_check() -> Check: "datasets", lambda el: check_url_format(el["license_url_archived"]) ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -191,7 +194,7 @@ def _datasets_license_url_archived_url_length_check() -> Check: "datasets", lambda el: check_url_length(el["license_url_archived"]) ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -201,7 +204,7 @@ def _datasets_license_type_check() -> Check: name="required", expr=array_check("datasets", lambda el: check_required(el["license_type"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -211,7 +214,7 @@ def _datasets_license_text_check() -> Check: name="required", expr=array_check("datasets", lambda el: check_required(el["license_text"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -223,7 +226,7 @@ def _datasets_license_attribution_check() -> Check: "datasets", lambda el: check_required(el["license_attribution"]) ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -233,7 +236,7 @@ def _datasets_coverage_bbox_check() -> Check: name="required", expr=array_check("datasets", lambda el: check_required(el["coverage_bbox"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -245,7 +248,7 @@ def _datasets_coverage_bbox_min_length_check() -> Check: "datasets", lambda el: check_array_min_length(el["coverage_bbox"], 4) ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -257,7 +260,7 @@ def _datasets_coverage_bbox_max_length_check() -> Check: "datasets", lambda el: check_array_max_length(el["coverage_bbox"], 4) ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -267,7 +270,7 @@ def _datasets_url_url_format_check() -> Check: name="url_format", expr=array_check("datasets", lambda el: check_url_format(el["url"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -277,7 +280,7 @@ def _datasets_url_url_length_check() -> Check: name="url_length", expr=array_check("datasets", lambda el: check_url_length(el["url"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -287,7 +290,7 @@ def _datasets_url_archived_url_format_check() -> Check: name="url_format", expr=array_check("datasets", lambda el: check_url_format(el["url_archived"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -297,7 +300,7 @@ def _datasets_url_archived_url_length_check() -> Check: name="url_length", expr=array_check("datasets", lambda el: check_url_length(el["url_archived"])), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -312,7 +315,7 @@ def _datasets_data_download_url_url_format_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -327,7 +330,7 @@ def _datasets_data_download_url_url_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -345,7 +348,7 @@ def _datasets_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -360,7 +363,7 @@ def _datasets_build_source_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -373,7 +376,7 @@ def _datasets_update_type_check() -> Check: lambda el: check_enum(el["update_type"], ["continuous", "manual"]), ), shape=CheckShape.ARRAY, - root_field="datasets", + read_columns=frozenset({"datasets"}), ) @@ -383,7 +386,30 @@ def _license_priority_check() -> Check: name="required", expr=check_required(F.col("license_priority")), shape=CheckShape.SCALAR, - root_field="license_priority", + read_columns=frozenset({"license_priority"}), + ) + + +def _license_priority_key_check() -> Check: + return Check( + field="license_priority{key}", + name="pattern", + expr=map_keys_check( + "license_priority", + lambda k: check_pattern(k, "^[A-Za-z0-9._+\\-]+\\z", label="pattern"), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"license_priority"}), + ) + + +def _license_priority_value_check() -> Check: + return Check( + field="license_priority{value}", + name="bounds", + expr=map_values_check("license_priority", lambda v: check_bounds(v, ge=0)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"license_priority"}), ) @@ -421,6 +447,8 @@ def sources_checks() -> list[Check]: _datasets_build_source_check(), _datasets_update_type_check(), _license_priority_check(), + _license_priority_key_check(), + _license_priority_value_check(), ] @@ -480,7 +508,7 @@ def sources_checks() -> list[Check]: PARTITIONS: dict[str, str] = {} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=SOURCES_SCHEMA, checks=sources_checks, ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py index b57a1f074..19b279a5a 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py @@ -15,7 +15,7 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) @@ -49,7 +49,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -59,7 +59,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -71,7 +71,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -81,7 +81,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -91,7 +91,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -101,7 +101,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -111,7 +111,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -123,7 +123,7 @@ def _geometry_geometry_type_check() -> Check: F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON ), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -133,7 +133,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -143,7 +143,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["base"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -153,7 +153,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -163,7 +163,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["bathymetry"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -173,7 +173,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -183,7 +183,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -193,7 +193,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -203,7 +203,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -213,7 +213,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -223,7 +223,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -233,7 +233,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -243,27 +243,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -275,7 +275,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -287,7 +287,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -297,7 +297,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -307,7 +307,7 @@ def _depth_required_check() -> Check: name="required", expr=check_required(F.col("depth")), shape=CheckShape.SCALAR, - root_field="depth", + read_columns=frozenset({"depth"}), ) @@ -317,67 +317,67 @@ def _depth_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("depth"), ge=0), shape=CheckShape.SCALAR, - root_field="depth", + read_columns=frozenset({"depth"}), ) def _cartography_prominence_bounds_check() -> Check: return Check( - field="cartography.prominence", + field="cartography.prominence_0", name="bounds", expr=check_bounds(F.col("cartography.prominence"), ge=1), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_prominence_bounds_check_1() -> Check: return Check( - field="cartography.prominence", + field="cartography.prominence_1", name="bounds", expr=check_bounds(F.col("cartography.prominence"), le=100), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_min_zoom_bounds_check() -> Check: return Check( - field="cartography.min_zoom", + field="cartography.min_zoom_0", name="bounds", expr=check_bounds(F.col("cartography.min_zoom"), ge=0), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_min_zoom_bounds_check_1() -> Check: return Check( - field="cartography.min_zoom", + field="cartography.min_zoom_1", name="bounds", expr=check_bounds(F.col("cartography.min_zoom"), le=23), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_max_zoom_bounds_check() -> Check: return Check( - field="cartography.max_zoom", + field="cartography.max_zoom_0", name="bounds", expr=check_bounds(F.col("cartography.max_zoom"), ge=0), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_max_zoom_bounds_check_1() -> Check: return Check( - field="cartography.max_zoom", + field="cartography.max_zoom_1", name="bounds", expr=check_bounds(F.col("cartography.max_zoom"), le=23), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) @@ -471,7 +471,7 @@ def bathymetry_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "base"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=BATHYMETRY_SCHEMA, checks=bathymetry_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py index d388b7da1..8372383df 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py @@ -16,13 +16,15 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) from overture.schema.pyspark.expressions.column_patterns import ( array_check, check_struct_unique, + map_keys_check, + map_values_check, nested_array_check, ) from overture.schema.pyspark.expressions.constraint_expressions import ( @@ -51,7 +53,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -61,7 +63,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -73,7 +75,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -83,7 +85,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -93,7 +95,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -103,7 +105,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -113,7 +115,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -129,7 +131,7 @@ def _geometry_geometry_type_check() -> Check: GeometryType.POLYGON, ), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -139,7 +141,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -149,7 +151,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["base"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -159,7 +161,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -169,7 +171,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["infrastructure"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -179,7 +181,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -189,7 +191,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -199,7 +201,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -209,7 +211,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -219,7 +221,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -229,7 +231,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -239,7 +241,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -249,27 +251,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -281,7 +283,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -293,7 +295,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -303,7 +305,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -313,7 +315,7 @@ def _class_required_check() -> Check: name="required", expr=check_required(F.col("class")), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), ) @@ -491,7 +493,7 @@ def _class_enum_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), ) @@ -501,7 +503,7 @@ def _subtype_required_check() -> Check: name="required", expr=check_required(F.col("subtype")), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -533,7 +535,7 @@ def _subtype_enum_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -543,7 +545,7 @@ def _height_check() -> Check: name="bounds", expr=check_bounds(F.col("height"), gt=0.0), shape=CheckShape.SCALAR, - root_field="height", + read_columns=frozenset({"height"}), ) @@ -581,7 +583,7 @@ def _surface_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="surface", + read_columns=frozenset({"surface"}), ) @@ -591,7 +593,7 @@ def _names_primary_required_check() -> Check: name="required", expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -601,7 +603,7 @@ def _names_primary_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("names.primary"), 1), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -611,7 +613,34 @@ def _names_primary_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), + ) + + +def _names_common_key_check() -> Check: + return Check( + field="names.common{key}", + name="language_tag", + expr=map_keys_check( + "names.common", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), + ) + + +def _names_common_value_check() -> Check: + return Check( + field="names.common{value}", + name="stripped", + expr=map_values_check("names.common", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), ) @@ -621,7 +650,7 @@ def _names_rules_value_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -633,7 +662,7 @@ def _names_rules_value_string_min_length_check() -> Check: "names.rules", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -643,7 +672,7 @@ def _names_rules_value_stripped_check() -> Check: name="stripped", expr=array_check("names.rules", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -653,7 +682,7 @@ def _names_rules_variant_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["variant"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -668,7 +697,7 @@ def _names_rules_variant_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -685,7 +714,7 @@ def _names_rules_language_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -701,7 +730,7 @@ def _names_rules_perspectives_mode_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -716,7 +745,7 @@ def _names_rules_perspectives_mode_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -732,7 +761,7 @@ def _names_rules_perspectives_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -745,7 +774,7 @@ def _names_rules_perspectives_countries_min_length_check() -> Check: lambda el: check_array_min_length(el["perspectives"]["countries"], 1), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -758,7 +787,7 @@ def _names_rules_perspectives_countries_unique_check() -> Check: lambda el: check_struct_unique(el["perspectives"]["countries"]), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -776,7 +805,7 @@ def _names_rules_perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -788,7 +817,7 @@ def _names_rules_between_linear_range_length_check() -> Check: "names.rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -800,7 +829,7 @@ def _names_rules_between_linear_range_bounds_check() -> Check: "names.rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -812,7 +841,7 @@ def _names_rules_between_linear_range_order_check() -> Check: "names.rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -824,7 +853,7 @@ def _names_rules_side_check() -> Check: "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -838,7 +867,7 @@ def _wikidata_check() -> Check: label="Wikidata identifier (Q followed by digits)", ), shape=CheckShape.SCALAR, - root_field="wikidata", + read_columns=frozenset({"wikidata"}), ) @@ -879,6 +908,8 @@ def infrastructure_checks() -> list[Check]: _names_primary_required_check(), _names_primary_string_min_length_check(), _names_primary_stripped_check(), + _names_common_key_check(), + _names_common_value_check(), _names_rules_value_required_check(), _names_rules_value_string_min_length_check(), _names_rules_value_stripped_check(), @@ -990,7 +1021,7 @@ def infrastructure_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "base"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=INFRASTRUCTURE_SCHEMA, checks=infrastructure_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py index 53b53b926..aa229e53a 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py @@ -16,13 +16,15 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) from overture.schema.pyspark.expressions.column_patterns import ( array_check, check_struct_unique, + map_keys_check, + map_values_check, nested_array_check, ) from overture.schema.pyspark.expressions.constraint_expressions import ( @@ -51,7 +53,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -61,7 +63,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -73,7 +75,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -83,7 +85,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -93,7 +95,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -103,7 +105,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -113,7 +115,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -129,7 +131,7 @@ def _geometry_geometry_type_check() -> Check: GeometryType.POLYGON, ), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -139,7 +141,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -149,7 +151,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["base"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -159,7 +161,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -169,7 +171,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["land"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -179,7 +181,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -189,7 +191,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -199,7 +201,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -209,7 +211,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -219,7 +221,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -229,7 +231,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -239,7 +241,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -249,27 +251,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -281,7 +283,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -293,7 +295,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -303,7 +305,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -359,7 +361,7 @@ def _class_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), ) @@ -386,7 +388,7 @@ def _subtype_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -396,7 +398,7 @@ def _elevation_check() -> Check: name="bounds", expr=check_bounds(F.col("elevation"), le=9000), shape=CheckShape.SCALAR, - root_field="elevation", + read_columns=frozenset({"elevation"}), ) @@ -434,7 +436,7 @@ def _surface_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="surface", + read_columns=frozenset({"surface"}), ) @@ -444,7 +446,7 @@ def _names_primary_required_check() -> Check: name="required", expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -454,7 +456,7 @@ def _names_primary_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("names.primary"), 1), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -464,7 +466,34 @@ def _names_primary_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), + ) + + +def _names_common_key_check() -> Check: + return Check( + field="names.common{key}", + name="language_tag", + expr=map_keys_check( + "names.common", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), + ) + + +def _names_common_value_check() -> Check: + return Check( + field="names.common{value}", + name="stripped", + expr=map_values_check("names.common", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), ) @@ -474,7 +503,7 @@ def _names_rules_value_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -486,7 +515,7 @@ def _names_rules_value_string_min_length_check() -> Check: "names.rules", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -496,7 +525,7 @@ def _names_rules_value_stripped_check() -> Check: name="stripped", expr=array_check("names.rules", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -506,7 +535,7 @@ def _names_rules_variant_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["variant"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -521,7 +550,7 @@ def _names_rules_variant_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -538,7 +567,7 @@ def _names_rules_language_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -554,7 +583,7 @@ def _names_rules_perspectives_mode_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -569,7 +598,7 @@ def _names_rules_perspectives_mode_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -585,7 +614,7 @@ def _names_rules_perspectives_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -598,7 +627,7 @@ def _names_rules_perspectives_countries_min_length_check() -> Check: lambda el: check_array_min_length(el["perspectives"]["countries"], 1), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -611,7 +640,7 @@ def _names_rules_perspectives_countries_unique_check() -> Check: lambda el: check_struct_unique(el["perspectives"]["countries"]), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -629,7 +658,7 @@ def _names_rules_perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -641,7 +670,7 @@ def _names_rules_between_linear_range_length_check() -> Check: "names.rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -653,7 +682,7 @@ def _names_rules_between_linear_range_bounds_check() -> Check: "names.rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -665,7 +694,7 @@ def _names_rules_between_linear_range_order_check() -> Check: "names.rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -677,7 +706,7 @@ def _names_rules_side_check() -> Check: "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -691,7 +720,7 @@ def _wikidata_check() -> Check: label="Wikidata identifier (Q followed by digits)", ), shape=CheckShape.SCALAR, - root_field="wikidata", + read_columns=frozenset({"wikidata"}), ) @@ -730,6 +759,8 @@ def land_checks() -> list[Check]: _names_primary_required_check(), _names_primary_string_min_length_check(), _names_primary_stripped_check(), + _names_common_key_check(), + _names_common_value_check(), _names_rules_value_required_check(), _names_rules_value_string_min_length_check(), _names_rules_value_stripped_check(), @@ -841,7 +872,7 @@ def land_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "base"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=LAND_SCHEMA, checks=land_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py index 7e65987e2..1e6b86777 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py @@ -15,7 +15,7 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) @@ -49,7 +49,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -59,7 +59,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -71,7 +71,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -81,7 +81,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -91,7 +91,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -101,7 +101,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -111,7 +111,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -123,7 +123,7 @@ def _geometry_geometry_type_check() -> Check: F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON ), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -133,7 +133,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -143,7 +143,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["base"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -153,7 +153,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -163,7 +163,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["land_cover"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -173,7 +173,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -183,7 +183,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -193,7 +193,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -203,7 +203,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -213,7 +213,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -223,7 +223,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -233,7 +233,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -243,27 +243,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -275,7 +275,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -287,7 +287,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -297,7 +297,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -307,7 +307,7 @@ def _subtype_required_check() -> Check: name="required", expr=check_required(F.col("subtype")), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -331,67 +331,67 @@ def _subtype_enum_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) def _cartography_prominence_bounds_check() -> Check: return Check( - field="cartography.prominence", + field="cartography.prominence_0", name="bounds", expr=check_bounds(F.col("cartography.prominence"), ge=1), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_prominence_bounds_check_1() -> Check: return Check( - field="cartography.prominence", + field="cartography.prominence_1", name="bounds", expr=check_bounds(F.col("cartography.prominence"), le=100), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_min_zoom_bounds_check() -> Check: return Check( - field="cartography.min_zoom", + field="cartography.min_zoom_0", name="bounds", expr=check_bounds(F.col("cartography.min_zoom"), ge=0), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_min_zoom_bounds_check_1() -> Check: return Check( - field="cartography.min_zoom", + field="cartography.min_zoom_1", name="bounds", expr=check_bounds(F.col("cartography.min_zoom"), le=23), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_max_zoom_bounds_check() -> Check: return Check( - field="cartography.max_zoom", + field="cartography.max_zoom_0", name="bounds", expr=check_bounds(F.col("cartography.max_zoom"), ge=0), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_max_zoom_bounds_check_1() -> Check: return Check( - field="cartography.max_zoom", + field="cartography.max_zoom_1", name="bounds", expr=check_bounds(F.col("cartography.max_zoom"), le=23), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) @@ -485,7 +485,7 @@ def land_cover_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "base"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=LAND_COVER_SCHEMA, checks=land_cover_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py index bf573f9bc..0fd2e9e78 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py @@ -16,13 +16,15 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) from overture.schema.pyspark.expressions.column_patterns import ( array_check, check_struct_unique, + map_keys_check, + map_values_check, nested_array_check, ) from overture.schema.pyspark.expressions.constraint_expressions import ( @@ -51,7 +53,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -61,7 +63,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -73,7 +75,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -83,7 +85,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -93,7 +95,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -103,7 +105,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -113,7 +115,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -129,7 +131,7 @@ def _geometry_geometry_type_check() -> Check: GeometryType.POLYGON, ), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -139,7 +141,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -149,7 +151,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["base"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -159,7 +161,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -169,7 +171,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["land_use"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -179,7 +181,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -189,7 +191,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -199,7 +201,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -209,7 +211,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -219,7 +221,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -229,7 +231,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -239,7 +241,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -249,27 +251,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -281,7 +283,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -293,7 +295,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -303,7 +305,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -313,7 +315,7 @@ def _class_required_check() -> Check: name="required", expr=check_required(F.col("class")), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), ) @@ -436,7 +438,7 @@ def _class_enum_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), ) @@ -446,7 +448,7 @@ def _subtype_required_check() -> Check: name="required", expr=check_required(F.col("subtype")), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -484,7 +486,7 @@ def _subtype_enum_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -494,7 +496,7 @@ def _elevation_check() -> Check: name="bounds", expr=check_bounds(F.col("elevation"), le=9000), shape=CheckShape.SCALAR, - root_field="elevation", + read_columns=frozenset({"elevation"}), ) @@ -532,7 +534,7 @@ def _surface_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="surface", + read_columns=frozenset({"surface"}), ) @@ -542,7 +544,7 @@ def _names_primary_required_check() -> Check: name="required", expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -552,7 +554,7 @@ def _names_primary_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("names.primary"), 1), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -562,7 +564,34 @@ def _names_primary_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), + ) + + +def _names_common_key_check() -> Check: + return Check( + field="names.common{key}", + name="language_tag", + expr=map_keys_check( + "names.common", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), + ) + + +def _names_common_value_check() -> Check: + return Check( + field="names.common{value}", + name="stripped", + expr=map_values_check("names.common", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), ) @@ -572,7 +601,7 @@ def _names_rules_value_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -584,7 +613,7 @@ def _names_rules_value_string_min_length_check() -> Check: "names.rules", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -594,7 +623,7 @@ def _names_rules_value_stripped_check() -> Check: name="stripped", expr=array_check("names.rules", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -604,7 +633,7 @@ def _names_rules_variant_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["variant"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -619,7 +648,7 @@ def _names_rules_variant_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -636,7 +665,7 @@ def _names_rules_language_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -652,7 +681,7 @@ def _names_rules_perspectives_mode_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -667,7 +696,7 @@ def _names_rules_perspectives_mode_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -683,7 +712,7 @@ def _names_rules_perspectives_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -696,7 +725,7 @@ def _names_rules_perspectives_countries_min_length_check() -> Check: lambda el: check_array_min_length(el["perspectives"]["countries"], 1), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -709,7 +738,7 @@ def _names_rules_perspectives_countries_unique_check() -> Check: lambda el: check_struct_unique(el["perspectives"]["countries"]), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -727,7 +756,7 @@ def _names_rules_perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -739,7 +768,7 @@ def _names_rules_between_linear_range_length_check() -> Check: "names.rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -751,7 +780,7 @@ def _names_rules_between_linear_range_bounds_check() -> Check: "names.rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -763,7 +792,7 @@ def _names_rules_between_linear_range_order_check() -> Check: "names.rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -775,7 +804,7 @@ def _names_rules_side_check() -> Check: "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -789,7 +818,7 @@ def _wikidata_check() -> Check: label="Wikidata identifier (Q followed by digits)", ), shape=CheckShape.SCALAR, - root_field="wikidata", + read_columns=frozenset({"wikidata"}), ) @@ -830,6 +859,8 @@ def land_use_checks() -> list[Check]: _names_primary_required_check(), _names_primary_string_min_length_check(), _names_primary_stripped_check(), + _names_common_key_check(), + _names_common_value_check(), _names_rules_value_required_check(), _names_rules_value_string_min_length_check(), _names_rules_value_stripped_check(), @@ -941,7 +972,7 @@ def land_use_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "base"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=LAND_USE_SCHEMA, checks=land_use_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py index d0484e725..59c8d0f6a 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py @@ -17,13 +17,15 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) from overture.schema.pyspark.expressions.column_patterns import ( array_check, check_struct_unique, + map_keys_check, + map_values_check, nested_array_check, ) from overture.schema.pyspark.expressions.constraint_expressions import ( @@ -52,7 +54,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -62,7 +64,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -74,7 +76,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -84,7 +86,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -94,7 +96,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -104,7 +106,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -114,7 +116,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -130,7 +132,7 @@ def _geometry_geometry_type_check() -> Check: GeometryType.POLYGON, ), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -140,7 +142,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -150,7 +152,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["base"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -160,7 +162,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -170,7 +172,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["water"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -180,7 +182,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -190,7 +192,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -200,7 +202,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -210,7 +212,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -220,7 +222,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -230,7 +232,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -240,7 +242,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -250,27 +252,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -282,7 +284,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -294,7 +296,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -304,7 +306,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -353,7 +355,7 @@ def _class_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), ) @@ -379,7 +381,7 @@ def _subtype_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -389,7 +391,7 @@ def _names_primary_required_check() -> Check: name="required", expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -399,7 +401,7 @@ def _names_primary_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("names.primary"), 1), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -409,7 +411,34 @@ def _names_primary_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), + ) + + +def _names_common_key_check() -> Check: + return Check( + field="names.common{key}", + name="language_tag", + expr=map_keys_check( + "names.common", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), + ) + + +def _names_common_value_check() -> Check: + return Check( + field="names.common{value}", + name="stripped", + expr=map_values_check("names.common", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), ) @@ -419,7 +448,7 @@ def _names_rules_value_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -431,7 +460,7 @@ def _names_rules_value_string_min_length_check() -> Check: "names.rules", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -441,7 +470,7 @@ def _names_rules_value_stripped_check() -> Check: name="stripped", expr=array_check("names.rules", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -451,7 +480,7 @@ def _names_rules_variant_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["variant"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -466,7 +495,7 @@ def _names_rules_variant_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -483,7 +512,7 @@ def _names_rules_language_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -499,7 +528,7 @@ def _names_rules_perspectives_mode_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -514,7 +543,7 @@ def _names_rules_perspectives_mode_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -530,7 +559,7 @@ def _names_rules_perspectives_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -543,7 +572,7 @@ def _names_rules_perspectives_countries_min_length_check() -> Check: lambda el: check_array_min_length(el["perspectives"]["countries"], 1), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -556,7 +585,7 @@ def _names_rules_perspectives_countries_unique_check() -> Check: lambda el: check_struct_unique(el["perspectives"]["countries"]), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -574,7 +603,7 @@ def _names_rules_perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -586,7 +615,7 @@ def _names_rules_between_linear_range_length_check() -> Check: "names.rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -598,7 +627,7 @@ def _names_rules_between_linear_range_bounds_check() -> Check: "names.rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -610,7 +639,7 @@ def _names_rules_between_linear_range_order_check() -> Check: "names.rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -622,7 +651,7 @@ def _names_rules_side_check() -> Check: "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -636,7 +665,7 @@ def _wikidata_check() -> Check: label="Wikidata identifier (Q followed by digits)", ), shape=CheckShape.SCALAR, - root_field="wikidata", + read_columns=frozenset({"wikidata"}), ) @@ -673,6 +702,8 @@ def water_checks() -> list[Check]: _names_primary_required_check(), _names_primary_string_min_length_check(), _names_primary_stripped_check(), + _names_common_key_check(), + _names_common_value_check(), _names_rules_value_required_check(), _names_rules_value_string_min_length_check(), _names_rules_value_stripped_check(), @@ -784,7 +815,7 @@ def water_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "base"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=WATER_SCHEMA, checks=water_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py index a73f69b89..672c4d6aa 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py @@ -17,13 +17,15 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) from overture.schema.pyspark.expressions.column_patterns import ( array_check, check_struct_unique, + map_keys_check, + map_values_check, nested_array_check, ) from overture.schema.pyspark.expressions.constraint_expressions import ( @@ -52,7 +54,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -62,7 +64,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -74,7 +76,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -84,7 +86,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -94,7 +96,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -104,7 +106,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -114,7 +116,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -126,7 +128,7 @@ def _geometry_geometry_type_check() -> Check: F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON ), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -136,7 +138,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -146,7 +148,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["buildings"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -156,7 +158,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -166,7 +168,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["building"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -176,7 +178,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -186,7 +188,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -196,7 +198,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -206,7 +208,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -216,7 +218,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -226,7 +228,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -236,7 +238,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -246,27 +248,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -278,7 +280,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -290,7 +292,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -300,7 +302,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -327,7 +329,7 @@ def _subtype_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -428,7 +430,7 @@ def _class_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), ) @@ -438,7 +440,7 @@ def _names_primary_required_check() -> Check: name="required", expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -448,7 +450,7 @@ def _names_primary_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("names.primary"), 1), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -458,7 +460,34 @@ def _names_primary_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), + ) + + +def _names_common_key_check() -> Check: + return Check( + field="names.common{key}", + name="language_tag", + expr=map_keys_check( + "names.common", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), + ) + + +def _names_common_value_check() -> Check: + return Check( + field="names.common{value}", + name="stripped", + expr=map_values_check("names.common", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), ) @@ -468,7 +497,7 @@ def _names_rules_value_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -480,7 +509,7 @@ def _names_rules_value_string_min_length_check() -> Check: "names.rules", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -490,7 +519,7 @@ def _names_rules_value_stripped_check() -> Check: name="stripped", expr=array_check("names.rules", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -500,7 +529,7 @@ def _names_rules_variant_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["variant"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -515,7 +544,7 @@ def _names_rules_variant_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -532,7 +561,7 @@ def _names_rules_language_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -548,7 +577,7 @@ def _names_rules_perspectives_mode_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -563,7 +592,7 @@ def _names_rules_perspectives_mode_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -579,7 +608,7 @@ def _names_rules_perspectives_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -592,7 +621,7 @@ def _names_rules_perspectives_countries_min_length_check() -> Check: lambda el: check_array_min_length(el["perspectives"]["countries"], 1), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -605,7 +634,7 @@ def _names_rules_perspectives_countries_unique_check() -> Check: lambda el: check_struct_unique(el["perspectives"]["countries"]), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -623,7 +652,7 @@ def _names_rules_perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -635,7 +664,7 @@ def _names_rules_between_linear_range_length_check() -> Check: "names.rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -647,7 +676,7 @@ def _names_rules_between_linear_range_bounds_check() -> Check: "names.rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -659,7 +688,7 @@ def _names_rules_between_linear_range_order_check() -> Check: "names.rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -671,7 +700,7 @@ def _names_rules_side_check() -> Check: "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -681,7 +710,7 @@ def _height_check() -> Check: name="bounds", expr=check_bounds(F.col("height"), gt=0.0), shape=CheckShape.SCALAR, - root_field="height", + read_columns=frozenset({"height"}), ) @@ -691,7 +720,7 @@ def _num_floors_check() -> Check: name="bounds", expr=check_bounds(F.col("num_floors"), gt=0), shape=CheckShape.SCALAR, - root_field="num_floors", + read_columns=frozenset({"num_floors"}), ) @@ -701,7 +730,7 @@ def _num_floors_underground_check() -> Check: name="bounds", expr=check_bounds(F.col("num_floors_underground"), gt=0), shape=CheckShape.SCALAR, - root_field="num_floors_underground", + read_columns=frozenset({"num_floors_underground"}), ) @@ -711,7 +740,7 @@ def _min_floor_check() -> Check: name="bounds", expr=check_bounds(F.col("min_floor"), gt=0), shape=CheckShape.SCALAR, - root_field="min_floor", + read_columns=frozenset({"min_floor"}), ) @@ -725,7 +754,7 @@ def _facade_color_check() -> Check: label="Hexadecimal color code in format #RGB or #RRGGBB", ), shape=CheckShape.SCALAR, - root_field="facade_color", + read_columns=frozenset({"facade_color"}), ) @@ -750,7 +779,7 @@ def _facade_material_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="facade_material", + read_columns=frozenset({"facade_material"}), ) @@ -778,7 +807,7 @@ def _roof_material_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="roof_material", + read_columns=frozenset({"roof_material"}), ) @@ -806,27 +835,27 @@ def _roof_shape_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="roof_shape", + read_columns=frozenset({"roof_shape"}), ) def _roof_direction_bounds_check() -> Check: return Check( - field="roof_direction", + field="roof_direction_0", name="bounds", expr=check_bounds(F.col("roof_direction"), ge=0.0), shape=CheckShape.SCALAR, - root_field="roof_direction", + read_columns=frozenset({"roof_direction"}), ) def _roof_direction_bounds_check_1() -> Check: return Check( - field="roof_direction", + field="roof_direction_1", name="bounds", expr=check_bounds(F.col("roof_direction"), lt=360.0), shape=CheckShape.SCALAR, - root_field="roof_direction", + read_columns=frozenset({"roof_direction"}), ) @@ -836,7 +865,7 @@ def _roof_orientation_check() -> Check: name="enum", expr=check_enum(F.col("roof_orientation"), ["across", "along"]), shape=CheckShape.SCALAR, - root_field="roof_orientation", + read_columns=frozenset({"roof_orientation"}), ) @@ -850,7 +879,7 @@ def _roof_color_check() -> Check: label="Hexadecimal color code in format #RGB or #RRGGBB", ), shape=CheckShape.SCALAR, - root_field="roof_color", + read_columns=frozenset({"roof_color"}), ) @@ -887,6 +916,8 @@ def building_checks() -> list[Check]: _names_primary_required_check(), _names_primary_string_min_length_check(), _names_primary_stripped_check(), + _names_common_key_check(), + _names_common_value_check(), _names_rules_value_required_check(), _names_rules_value_string_min_length_check(), _names_rules_value_stripped_check(), @@ -1018,7 +1049,7 @@ def building_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "buildings"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=BUILDING_SCHEMA, checks=building_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py index 8a3a96eec..f7691c1d9 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py @@ -17,13 +17,15 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) from overture.schema.pyspark.expressions.column_patterns import ( array_check, check_struct_unique, + map_keys_check, + map_values_check, nested_array_check, ) from overture.schema.pyspark.expressions.constraint_expressions import ( @@ -52,7 +54,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -62,7 +64,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -74,7 +76,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -84,7 +86,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -94,7 +96,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -104,7 +106,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -114,7 +116,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -126,7 +128,7 @@ def _geometry_geometry_type_check() -> Check: F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON ), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -136,7 +138,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -146,7 +148,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["buildings"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -156,7 +158,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -166,7 +168,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["building_part"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -176,7 +178,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -186,7 +188,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -196,7 +198,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -206,7 +208,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -216,7 +218,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -226,7 +228,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -236,7 +238,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -246,27 +248,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -278,7 +280,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -290,7 +292,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -300,7 +302,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -310,7 +312,7 @@ def _building_id_required_check() -> Check: name="required", expr=check_required(F.col("building_id")), shape=CheckShape.SCALAR, - root_field="building_id", + read_columns=frozenset({"building_id"}), ) @@ -320,7 +322,7 @@ def _building_id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("building_id"), 1), shape=CheckShape.SCALAR, - root_field="building_id", + read_columns=frozenset({"building_id"}), ) @@ -334,7 +336,7 @@ def _building_id_no_whitespace_check() -> Check: label="String without whitespace characters", ), shape=CheckShape.SCALAR, - root_field="building_id", + read_columns=frozenset({"building_id"}), ) @@ -344,7 +346,7 @@ def _names_primary_required_check() -> Check: name="required", expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -354,7 +356,7 @@ def _names_primary_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("names.primary"), 1), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -364,7 +366,34 @@ def _names_primary_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), + ) + + +def _names_common_key_check() -> Check: + return Check( + field="names.common{key}", + name="language_tag", + expr=map_keys_check( + "names.common", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), + ) + + +def _names_common_value_check() -> Check: + return Check( + field="names.common{value}", + name="stripped", + expr=map_values_check("names.common", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), ) @@ -374,7 +403,7 @@ def _names_rules_value_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -386,7 +415,7 @@ def _names_rules_value_string_min_length_check() -> Check: "names.rules", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -396,7 +425,7 @@ def _names_rules_value_stripped_check() -> Check: name="stripped", expr=array_check("names.rules", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -406,7 +435,7 @@ def _names_rules_variant_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["variant"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -421,7 +450,7 @@ def _names_rules_variant_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -438,7 +467,7 @@ def _names_rules_language_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -454,7 +483,7 @@ def _names_rules_perspectives_mode_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -469,7 +498,7 @@ def _names_rules_perspectives_mode_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -485,7 +514,7 @@ def _names_rules_perspectives_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -498,7 +527,7 @@ def _names_rules_perspectives_countries_min_length_check() -> Check: lambda el: check_array_min_length(el["perspectives"]["countries"], 1), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -511,7 +540,7 @@ def _names_rules_perspectives_countries_unique_check() -> Check: lambda el: check_struct_unique(el["perspectives"]["countries"]), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -529,7 +558,7 @@ def _names_rules_perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -541,7 +570,7 @@ def _names_rules_between_linear_range_length_check() -> Check: "names.rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -553,7 +582,7 @@ def _names_rules_between_linear_range_bounds_check() -> Check: "names.rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -565,7 +594,7 @@ def _names_rules_between_linear_range_order_check() -> Check: "names.rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -577,7 +606,7 @@ def _names_rules_side_check() -> Check: "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -587,7 +616,7 @@ def _height_check() -> Check: name="bounds", expr=check_bounds(F.col("height"), gt=0.0), shape=CheckShape.SCALAR, - root_field="height", + read_columns=frozenset({"height"}), ) @@ -597,7 +626,7 @@ def _num_floors_check() -> Check: name="bounds", expr=check_bounds(F.col("num_floors"), gt=0), shape=CheckShape.SCALAR, - root_field="num_floors", + read_columns=frozenset({"num_floors"}), ) @@ -607,7 +636,7 @@ def _num_floors_underground_check() -> Check: name="bounds", expr=check_bounds(F.col("num_floors_underground"), gt=0), shape=CheckShape.SCALAR, - root_field="num_floors_underground", + read_columns=frozenset({"num_floors_underground"}), ) @@ -617,7 +646,7 @@ def _min_floor_check() -> Check: name="bounds", expr=check_bounds(F.col("min_floor"), gt=0), shape=CheckShape.SCALAR, - root_field="min_floor", + read_columns=frozenset({"min_floor"}), ) @@ -631,7 +660,7 @@ def _facade_color_check() -> Check: label="Hexadecimal color code in format #RGB or #RRGGBB", ), shape=CheckShape.SCALAR, - root_field="facade_color", + read_columns=frozenset({"facade_color"}), ) @@ -656,7 +685,7 @@ def _facade_material_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="facade_material", + read_columns=frozenset({"facade_material"}), ) @@ -684,7 +713,7 @@ def _roof_material_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="roof_material", + read_columns=frozenset({"roof_material"}), ) @@ -712,27 +741,27 @@ def _roof_shape_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="roof_shape", + read_columns=frozenset({"roof_shape"}), ) def _roof_direction_bounds_check() -> Check: return Check( - field="roof_direction", + field="roof_direction_0", name="bounds", expr=check_bounds(F.col("roof_direction"), ge=0.0), shape=CheckShape.SCALAR, - root_field="roof_direction", + read_columns=frozenset({"roof_direction"}), ) def _roof_direction_bounds_check_1() -> Check: return Check( - field="roof_direction", + field="roof_direction_1", name="bounds", expr=check_bounds(F.col("roof_direction"), lt=360.0), shape=CheckShape.SCALAR, - root_field="roof_direction", + read_columns=frozenset({"roof_direction"}), ) @@ -742,7 +771,7 @@ def _roof_orientation_check() -> Check: name="enum", expr=check_enum(F.col("roof_orientation"), ["across", "along"]), shape=CheckShape.SCALAR, - root_field="roof_orientation", + read_columns=frozenset({"roof_orientation"}), ) @@ -756,7 +785,7 @@ def _roof_color_check() -> Check: label="Hexadecimal color code in format #RGB or #RRGGBB", ), shape=CheckShape.SCALAR, - root_field="roof_color", + read_columns=frozenset({"roof_color"}), ) @@ -794,6 +823,8 @@ def building_part_checks() -> list[Check]: _names_primary_required_check(), _names_primary_string_min_length_check(), _names_primary_stripped_check(), + _names_common_key_check(), + _names_common_value_check(), _names_rules_value_required_check(), _names_rules_value_string_min_length_check(), _names_rules_value_stripped_check(), @@ -923,7 +954,7 @@ def building_part_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "buildings"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=BUILDING_PART_SCHEMA, checks=building_part_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py index cd42da918..75fb2def0 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py @@ -16,13 +16,15 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) from overture.schema.pyspark.expressions.column_patterns import ( array_check, check_struct_unique, + map_keys_check, + map_values_check, nested_array_check, ) from overture.schema.pyspark.expressions.constraint_expressions import ( @@ -49,61 +51,61 @@ def _cartography_prominence_bounds_check() -> Check: return Check( - field="cartography.prominence", + field="cartography.prominence_0", name="bounds", expr=check_bounds(F.col("cartography.prominence"), ge=1), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_prominence_bounds_check_1() -> Check: return Check( - field="cartography.prominence", + field="cartography.prominence_1", name="bounds", expr=check_bounds(F.col("cartography.prominence"), le=100), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_min_zoom_bounds_check() -> Check: return Check( - field="cartography.min_zoom", + field="cartography.min_zoom_0", name="bounds", expr=check_bounds(F.col("cartography.min_zoom"), ge=0), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_min_zoom_bounds_check_1() -> Check: return Check( - field="cartography.min_zoom", + field="cartography.min_zoom_1", name="bounds", expr=check_bounds(F.col("cartography.min_zoom"), le=23), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_max_zoom_bounds_check() -> Check: return Check( - field="cartography.max_zoom", + field="cartography.max_zoom_0", name="bounds", expr=check_bounds(F.col("cartography.max_zoom"), ge=0), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) def _cartography_max_zoom_bounds_check_1() -> Check: return Check( - field="cartography.max_zoom", + field="cartography.max_zoom_1", name="bounds", expr=check_bounds(F.col("cartography.max_zoom"), le=23), shape=CheckShape.SCALAR, - root_field="cartography", + read_columns=frozenset({"cartography"}), ) @@ -113,7 +115,7 @@ def _names_check() -> Check: name="required", expr=check_required(F.col("names")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -123,7 +125,7 @@ def _names_primary_required_check() -> Check: name="required", expr=check_required(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -133,7 +135,7 @@ def _names_primary_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("names.primary"), 1), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -143,7 +145,34 @@ def _names_primary_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), + ) + + +def _names_common_key_check() -> Check: + return Check( + field="names.common{key}", + name="language_tag", + expr=map_keys_check( + "names.common", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), + ) + + +def _names_common_value_check() -> Check: + return Check( + field="names.common{value}", + name="stripped", + expr=map_values_check("names.common", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), ) @@ -153,7 +182,7 @@ def _names_rules_value_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -165,7 +194,7 @@ def _names_rules_value_string_min_length_check() -> Check: "names.rules", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -175,7 +204,7 @@ def _names_rules_value_stripped_check() -> Check: name="stripped", expr=array_check("names.rules", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -185,7 +214,7 @@ def _names_rules_variant_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["variant"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -200,7 +229,7 @@ def _names_rules_variant_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -217,7 +246,7 @@ def _names_rules_language_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -233,7 +262,7 @@ def _names_rules_perspectives_mode_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -248,7 +277,7 @@ def _names_rules_perspectives_mode_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -264,7 +293,7 @@ def _names_rules_perspectives_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -277,7 +306,7 @@ def _names_rules_perspectives_countries_min_length_check() -> Check: lambda el: check_array_min_length(el["perspectives"]["countries"], 1), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -290,7 +319,7 @@ def _names_rules_perspectives_countries_unique_check() -> Check: lambda el: check_struct_unique(el["perspectives"]["countries"]), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -308,7 +337,7 @@ def _names_rules_perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -320,7 +349,7 @@ def _names_rules_between_linear_range_length_check() -> Check: "names.rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -332,7 +361,7 @@ def _names_rules_between_linear_range_bounds_check() -> Check: "names.rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -344,7 +373,7 @@ def _names_rules_between_linear_range_order_check() -> Check: "names.rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -356,7 +385,7 @@ def _names_rules_side_check() -> Check: "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -366,7 +395,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -376,7 +405,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -388,7 +417,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -398,7 +427,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -408,7 +437,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -418,7 +447,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -428,7 +457,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -438,7 +467,7 @@ def _geometry_geometry_type_check() -> Check: name="geometry_type", expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -448,7 +477,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -458,7 +487,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["divisions"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -468,7 +497,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -478,7 +507,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["division"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -488,7 +517,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -498,7 +527,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -508,7 +537,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -518,7 +547,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -528,7 +557,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -538,7 +567,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -548,7 +577,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -558,27 +587,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -590,7 +619,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -602,7 +631,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -612,7 +641,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -622,7 +651,7 @@ def _subtype_required_check() -> Check: name="required", expr=check_required(F.col("subtype")), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -648,7 +677,7 @@ def _subtype_enum_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -658,7 +687,7 @@ def _country_required_check() -> Check: name="required", expr=check_required(F.col("country")), shape=CheckShape.SCALAR, - root_field="country", + read_columns=frozenset({"country"}), ) @@ -670,7 +699,7 @@ def _country_country_code_alpha2_check() -> Check: F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" ), shape=CheckShape.SCALAR, - root_field="country", + read_columns=frozenset({"country"}), ) @@ -680,7 +709,7 @@ def _hierarchies_check() -> Check: name="required", expr=check_required(F.col("hierarchies")), shape=CheckShape.SCALAR, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -690,7 +719,7 @@ def _hierarchies_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("hierarchies"), 1), shape=CheckShape.SCALAR, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -700,7 +729,7 @@ def _hierarchies_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("hierarchies")), shape=CheckShape.SCALAR, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -710,7 +739,7 @@ def _hierarchies_min_length_check_1() -> Check: name="array_min_length", expr=array_check("hierarchies", lambda el: check_array_min_length(el, 1)), shape=CheckShape.ARRAY, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -720,7 +749,7 @@ def _hierarchies_unique_check_1() -> Check: name="struct_unique", expr=array_check("hierarchies", lambda el: check_struct_unique(el)), shape=CheckShape.ARRAY, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -735,7 +764,7 @@ def _hierarchies_division_id_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -750,7 +779,7 @@ def _hierarchies_division_id_string_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -770,7 +799,7 @@ def _hierarchies_division_id_no_whitespace_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -783,7 +812,7 @@ def _hierarchies_subtype_required_check() -> Check: lambda el: array_check(el, lambda inner: check_required(inner["subtype"])), ), shape=CheckShape.ARRAY, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -815,7 +844,7 @@ def _hierarchies_subtype_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -828,7 +857,7 @@ def _hierarchies_name_required_check() -> Check: lambda el: array_check(el, lambda inner: check_required(inner["name"])), ), shape=CheckShape.ARRAY, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -843,7 +872,7 @@ def _hierarchies_name_string_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -856,7 +885,7 @@ def _hierarchies_name_stripped_check() -> Check: lambda el: array_check(el, lambda inner: check_stripped(inner["name"])), ), shape=CheckShape.ARRAY, - root_field="hierarchies", + read_columns=frozenset({"hierarchies"}), ) @@ -866,7 +895,7 @@ def _parent_division_id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("parent_division_id"), 1), shape=CheckShape.SCALAR, - root_field="parent_division_id", + read_columns=frozenset({"parent_division_id"}), ) @@ -880,27 +909,27 @@ def _parent_division_id_no_whitespace_check() -> Check: label="String without whitespace characters", ), shape=CheckShape.SCALAR, - root_field="parent_division_id", + read_columns=frozenset({"parent_division_id"}), ) def _admin_level_bounds_check() -> Check: return Check( - field="admin_level", + field="admin_level_0", name="bounds", expr=check_bounds(F.col("admin_level"), ge=0), shape=CheckShape.SCALAR, - root_field="admin_level", + read_columns=frozenset({"admin_level"}), ) def _admin_level_bounds_check_1() -> Check: return Check( - field="admin_level", + field="admin_level_1", name="bounds", expr=check_bounds(F.col("admin_level"), le=16), shape=CheckShape.SCALAR, - root_field="admin_level", + read_columns=frozenset({"admin_level"}), ) @@ -912,7 +941,34 @@ def _class_check() -> Check: F.col("class"), ["megacity", "city", "town", "village", "hamlet"] ), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), + ) + + +def _local_type_key_check() -> Check: + return Check( + field="local_type{key}", + name="language_tag", + expr=map_keys_check( + "local_type", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"local_type"}), + ) + + +def _local_type_value_check() -> Check: + return Check( + field="local_type{value}", + name="stripped", + expr=map_values_check("local_type", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"local_type"}), ) @@ -926,7 +982,7 @@ def _region_check() -> Check: label="ISO 3166-2 subdivision code", ), shape=CheckShape.SCALAR, - root_field="region", + read_columns=frozenset({"region"}), ) @@ -939,7 +995,7 @@ def _perspectives_mode_required_check() -> Check: check_required(F.col("perspectives.mode")), ), shape=CheckShape.SCALAR, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -949,7 +1005,7 @@ def _perspectives_mode_enum_check() -> Check: name="enum", expr=check_enum(F.col("perspectives.mode"), ["accepted_by", "disputed_by"]), shape=CheckShape.SCALAR, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -962,7 +1018,7 @@ def _perspectives_countries_check() -> Check: check_required(F.col("perspectives.countries")), ), shape=CheckShape.SCALAR, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -972,7 +1028,7 @@ def _perspectives_countries_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("perspectives.countries"), 1), shape=CheckShape.SCALAR, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -982,7 +1038,7 @@ def _perspectives_countries_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("perspectives.countries")), shape=CheckShape.SCALAR, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -997,7 +1053,7 @@ def _perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -1007,7 +1063,7 @@ def _norms_driving_side_check() -> Check: name="enum", expr=check_enum(F.col("norms.driving_side"), ["left", "right"]), shape=CheckShape.SCALAR, - root_field="norms", + read_columns=frozenset({"norms"}), ) @@ -1017,7 +1073,7 @@ def _population_check() -> Check: name="bounds", expr=check_bounds(F.col("population"), ge=0), shape=CheckShape.SCALAR, - root_field="population", + read_columns=frozenset({"population"}), ) @@ -1027,7 +1083,7 @@ def _capital_division_ids_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("capital_division_ids"), 1), shape=CheckShape.SCALAR, - root_field="capital_division_ids", + read_columns=frozenset({"capital_division_ids"}), ) @@ -1037,7 +1093,7 @@ def _capital_division_ids_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("capital_division_ids")), shape=CheckShape.SCALAR, - root_field="capital_division_ids", + read_columns=frozenset({"capital_division_ids"}), ) @@ -1049,7 +1105,7 @@ def _capital_division_ids_string_min_length_check() -> Check: "capital_division_ids", lambda el: check_string_min_length(el, 1) ), shape=CheckShape.ARRAY, - root_field="capital_division_ids", + read_columns=frozenset({"capital_division_ids"}), ) @@ -1064,7 +1120,7 @@ def _capital_division_ids_no_whitespace_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="capital_division_ids", + read_columns=frozenset({"capital_division_ids"}), ) @@ -1074,7 +1130,7 @@ def _capital_of_divisions_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("capital_of_divisions"), 1), shape=CheckShape.SCALAR, - root_field="capital_of_divisions", + read_columns=frozenset({"capital_of_divisions"}), ) @@ -1084,7 +1140,7 @@ def _capital_of_divisions_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("capital_of_divisions")), shape=CheckShape.SCALAR, - root_field="capital_of_divisions", + read_columns=frozenset({"capital_of_divisions"}), ) @@ -1096,7 +1152,7 @@ def _capital_of_divisions_division_id_required_check() -> Check: "capital_of_divisions", lambda el: check_required(el["division_id"]) ), shape=CheckShape.ARRAY, - root_field="capital_of_divisions", + read_columns=frozenset({"capital_of_divisions"}), ) @@ -1109,7 +1165,7 @@ def _capital_of_divisions_division_id_string_min_length_check() -> Check: lambda el: check_string_min_length(el["division_id"], 1), ), shape=CheckShape.ARRAY, - root_field="capital_of_divisions", + read_columns=frozenset({"capital_of_divisions"}), ) @@ -1126,7 +1182,7 @@ def _capital_of_divisions_division_id_no_whitespace_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="capital_of_divisions", + read_columns=frozenset({"capital_of_divisions"}), ) @@ -1138,7 +1194,7 @@ def _capital_of_divisions_subtype_required_check() -> Check: "capital_of_divisions", lambda el: check_required(el["subtype"]) ), shape=CheckShape.ARRAY, - root_field="capital_of_divisions", + read_columns=frozenset({"capital_of_divisions"}), ) @@ -1167,7 +1223,7 @@ def _capital_of_divisions_subtype_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="capital_of_divisions", + read_columns=frozenset({"capital_of_divisions"}), ) @@ -1181,7 +1237,7 @@ def _wikidata_check() -> Check: label="Wikidata identifier (Q followed by digits)", ), shape=CheckShape.SCALAR, - root_field="wikidata", + read_columns=frozenset({"wikidata"}), ) @@ -1193,7 +1249,7 @@ def _check_require_if_0_check() -> Check: F.col("admin_level"), F.col("subtype") == "county", "subtype = 'county'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -1207,7 +1263,7 @@ def _check_require_if_1_check() -> Check: "subtype = 'macrocounty'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -1219,7 +1275,7 @@ def _check_require_if_2_check() -> Check: F.col("admin_level"), F.col("subtype") == "region", "subtype = 'region'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -1233,7 +1289,7 @@ def _check_require_if_3_check() -> Check: "subtype = 'macroregion'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -1247,7 +1303,7 @@ def _check_require_if_4_check() -> Check: "subtype = 'dependency'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -1259,7 +1315,7 @@ def _check_require_if_5_check() -> Check: F.col("admin_level"), F.col("subtype") == "country", "subtype = 'country'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -1273,7 +1329,7 @@ def _check_require_if_6_check() -> Check: "subtype != 'country'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"parent_division_id", "subtype"}), ) @@ -1287,7 +1343,7 @@ def _check_forbid_if_7_check() -> Check: "subtype = 'country'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"parent_division_id", "subtype"}), ) @@ -1304,6 +1360,8 @@ def division_checks() -> list[Check]: _names_primary_required_check(), _names_primary_string_min_length_check(), _names_primary_stripped_check(), + _names_common_key_check(), + _names_common_value_check(), _names_rules_value_required_check(), _names_rules_value_string_min_length_check(), _names_rules_value_stripped_check(), @@ -1367,6 +1425,8 @@ def division_checks() -> list[Check]: _admin_level_bounds_check(), _admin_level_bounds_check_1(), _class_check(), + _local_type_key_check(), + _local_type_value_check(), _region_check(), _perspectives_mode_required_check(), _perspectives_mode_enum_check(), @@ -1543,7 +1603,7 @@ def division_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "divisions"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=DIVISION_SCHEMA, checks=division_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py index dffe20713..eca66a320 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py @@ -17,13 +17,15 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) from overture.schema.pyspark.expressions.column_patterns import ( array_check, check_struct_unique, + map_keys_check, + map_values_check, nested_array_check, ) from overture.schema.pyspark.expressions.constraint_expressions import ( @@ -54,7 +56,7 @@ def _names_check() -> Check: name="required", expr=check_required(F.col("names")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -64,7 +66,7 @@ def _names_primary_required_check() -> Check: name="required", expr=check_required(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -74,7 +76,7 @@ def _names_primary_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("names.primary"), 1), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -84,7 +86,34 @@ def _names_primary_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), + ) + + +def _names_common_key_check() -> Check: + return Check( + field="names.common{key}", + name="language_tag", + expr=map_keys_check( + "names.common", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), + ) + + +def _names_common_value_check() -> Check: + return Check( + field="names.common{value}", + name="stripped", + expr=map_values_check("names.common", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), ) @@ -94,7 +123,7 @@ def _names_rules_value_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -106,7 +135,7 @@ def _names_rules_value_string_min_length_check() -> Check: "names.rules", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -116,7 +145,7 @@ def _names_rules_value_stripped_check() -> Check: name="stripped", expr=array_check("names.rules", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -126,7 +155,7 @@ def _names_rules_variant_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["variant"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -141,7 +170,7 @@ def _names_rules_variant_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -158,7 +187,7 @@ def _names_rules_language_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -174,7 +203,7 @@ def _names_rules_perspectives_mode_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -189,7 +218,7 @@ def _names_rules_perspectives_mode_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -205,7 +234,7 @@ def _names_rules_perspectives_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -218,7 +247,7 @@ def _names_rules_perspectives_countries_min_length_check() -> Check: lambda el: check_array_min_length(el["perspectives"]["countries"], 1), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -231,7 +260,7 @@ def _names_rules_perspectives_countries_unique_check() -> Check: lambda el: check_struct_unique(el["perspectives"]["countries"]), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -249,7 +278,7 @@ def _names_rules_perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -261,7 +290,7 @@ def _names_rules_between_linear_range_length_check() -> Check: "names.rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -273,7 +302,7 @@ def _names_rules_between_linear_range_bounds_check() -> Check: "names.rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -285,7 +314,7 @@ def _names_rules_between_linear_range_order_check() -> Check: "names.rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -297,7 +326,7 @@ def _names_rules_side_check() -> Check: "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -307,7 +336,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -317,7 +346,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -329,7 +358,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -339,7 +368,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -349,7 +378,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -359,7 +388,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -369,7 +398,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -381,7 +410,7 @@ def _geometry_geometry_type_check() -> Check: F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON ), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -391,7 +420,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -401,7 +430,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["divisions"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -411,7 +440,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -421,7 +450,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["division_area"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -431,7 +460,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -441,7 +470,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -451,7 +480,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -461,7 +490,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -471,7 +500,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -481,7 +510,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -491,7 +520,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -501,27 +530,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -533,7 +562,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -545,7 +574,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -555,7 +584,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -565,7 +594,7 @@ def _subtype_required_check() -> Check: name="required", expr=check_required(F.col("subtype")), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -591,7 +620,7 @@ def _subtype_enum_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -601,7 +630,7 @@ def _class_required_check() -> Check: name="required", expr=check_required(F.col("class")), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), ) @@ -611,7 +640,7 @@ def _class_enum_check() -> Check: name="enum", expr=check_enum(F.col("class"), ["land", "maritime"]), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), ) @@ -621,7 +650,7 @@ def _division_id_required_check() -> Check: name="required", expr=check_required(F.col("division_id")), shape=CheckShape.SCALAR, - root_field="division_id", + read_columns=frozenset({"division_id"}), ) @@ -631,7 +660,7 @@ def _division_id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("division_id"), 1), shape=CheckShape.SCALAR, - root_field="division_id", + read_columns=frozenset({"division_id"}), ) @@ -645,7 +674,7 @@ def _division_id_no_whitespace_check() -> Check: label="String without whitespace characters", ), shape=CheckShape.SCALAR, - root_field="division_id", + read_columns=frozenset({"division_id"}), ) @@ -655,7 +684,7 @@ def _country_required_check() -> Check: name="required", expr=check_required(F.col("country")), shape=CheckShape.SCALAR, - root_field="country", + read_columns=frozenset({"country"}), ) @@ -667,7 +696,7 @@ def _country_country_code_alpha2_check() -> Check: F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" ), shape=CheckShape.SCALAR, - root_field="country", + read_columns=frozenset({"country"}), ) @@ -681,27 +710,27 @@ def _region_check() -> Check: label="ISO 3166-2 subdivision code", ), shape=CheckShape.SCALAR, - root_field="region", + read_columns=frozenset({"region"}), ) def _admin_level_bounds_check() -> Check: return Check( - field="admin_level", + field="admin_level_0", name="bounds", expr=check_bounds(F.col("admin_level"), ge=0), shape=CheckShape.SCALAR, - root_field="admin_level", + read_columns=frozenset({"admin_level"}), ) def _admin_level_bounds_check_1() -> Check: return Check( - field="admin_level", + field="admin_level_1", name="bounds", expr=check_bounds(F.col("admin_level"), le=16), shape=CheckShape.SCALAR, - root_field="admin_level", + read_columns=frozenset({"admin_level"}), ) @@ -713,7 +742,7 @@ def _check_radio_group_0_check() -> Check: [F.col("is_land"), F.col("is_territorial")], ["is_land", "is_territorial"] ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"is_land", "is_territorial"}), ) @@ -725,7 +754,7 @@ def _check_require_if_1_check() -> Check: F.col("admin_level"), F.col("subtype") == "county", "subtype = 'county'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -739,7 +768,7 @@ def _check_require_if_2_check() -> Check: "subtype = 'macrocounty'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -751,7 +780,7 @@ def _check_require_if_3_check() -> Check: F.col("admin_level"), F.col("subtype") == "region", "subtype = 'region'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -765,7 +794,7 @@ def _check_require_if_4_check() -> Check: "subtype = 'macroregion'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -779,7 +808,7 @@ def _check_require_if_5_check() -> Check: "subtype = 'dependency'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -791,7 +820,7 @@ def _check_require_if_6_check() -> Check: F.col("admin_level"), F.col("subtype") == "country", "subtype = 'country'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -802,6 +831,8 @@ def division_area_checks() -> list[Check]: _names_primary_required_check(), _names_primary_string_min_length_check(), _names_primary_stripped_check(), + _names_common_key_check(), + _names_common_value_check(), _names_rules_value_required_check(), _names_rules_value_string_min_length_check(), _names_rules_value_stripped_check(), @@ -955,7 +986,7 @@ def division_area_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "divisions"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=DIVISION_AREA_SCHEMA, checks=division_area_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py index 68c7b1f62..f3e2bd07c 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py @@ -16,7 +16,7 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) @@ -54,7 +54,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -64,7 +64,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -76,7 +76,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -86,7 +86,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -96,7 +96,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -106,7 +106,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -116,7 +116,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -128,7 +128,7 @@ def _geometry_geometry_type_check() -> Check: F.col("geometry"), GeometryType.LINE_STRING, GeometryType.MULTI_LINE_STRING ), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -138,7 +138,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -148,7 +148,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["divisions"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -158,7 +158,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -168,7 +168,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["division_boundary"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -178,7 +178,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -188,7 +188,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -198,7 +198,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -208,7 +208,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -218,7 +218,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -228,7 +228,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -238,7 +238,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -248,27 +248,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -280,7 +280,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -292,7 +292,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -302,7 +302,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -312,7 +312,7 @@ def _subtype_required_check() -> Check: name="required", expr=check_required(F.col("subtype")), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -338,7 +338,7 @@ def _subtype_enum_check() -> Check: ], ), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -348,7 +348,7 @@ def _class_required_check() -> Check: name="required", expr=check_required(F.col("class")), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), ) @@ -358,7 +358,7 @@ def _class_enum_check() -> Check: name="enum", expr=check_enum(F.col("class"), ["land", "maritime"]), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class"}), ) @@ -368,7 +368,7 @@ def _division_ids_check() -> Check: name="required", expr=check_required(F.col("division_ids")), shape=CheckShape.SCALAR, - root_field="division_ids", + read_columns=frozenset({"division_ids"}), ) @@ -378,7 +378,7 @@ def _division_ids_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("division_ids"), 2), shape=CheckShape.SCALAR, - root_field="division_ids", + read_columns=frozenset({"division_ids"}), ) @@ -388,7 +388,7 @@ def _division_ids_max_length_check() -> Check: name="array_max_length", expr=check_array_max_length(F.col("division_ids"), 2), shape=CheckShape.SCALAR, - root_field="division_ids", + read_columns=frozenset({"division_ids"}), ) @@ -398,7 +398,7 @@ def _division_ids_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("division_ids")), shape=CheckShape.SCALAR, - root_field="division_ids", + read_columns=frozenset({"division_ids"}), ) @@ -408,7 +408,7 @@ def _division_ids_string_min_length_check() -> Check: name="string_min_length", expr=array_check("division_ids", lambda el: check_string_min_length(el, 1)), shape=CheckShape.ARRAY, - root_field="division_ids", + read_columns=frozenset({"division_ids"}), ) @@ -423,7 +423,7 @@ def _division_ids_no_whitespace_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="division_ids", + read_columns=frozenset({"division_ids"}), ) @@ -435,7 +435,7 @@ def _country_check() -> Check: F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" ), shape=CheckShape.SCALAR, - root_field="country", + read_columns=frozenset({"country"}), ) @@ -449,27 +449,27 @@ def _region_check() -> Check: label="ISO 3166-2 subdivision code", ), shape=CheckShape.SCALAR, - root_field="region", + read_columns=frozenset({"region"}), ) def _admin_level_bounds_check() -> Check: return Check( - field="admin_level", + field="admin_level_0", name="bounds", expr=check_bounds(F.col("admin_level"), ge=0), shape=CheckShape.SCALAR, - root_field="admin_level", + read_columns=frozenset({"admin_level"}), ) def _admin_level_bounds_check_1() -> Check: return Check( - field="admin_level", + field="admin_level_1", name="bounds", expr=check_bounds(F.col("admin_level"), le=16), shape=CheckShape.SCALAR, - root_field="admin_level", + read_columns=frozenset({"admin_level"}), ) @@ -482,7 +482,7 @@ def _perspectives_mode_required_check() -> Check: check_required(F.col("perspectives.mode")), ), shape=CheckShape.SCALAR, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -492,7 +492,7 @@ def _perspectives_mode_enum_check() -> Check: name="enum", expr=check_enum(F.col("perspectives.mode"), ["accepted_by", "disputed_by"]), shape=CheckShape.SCALAR, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -505,7 +505,7 @@ def _perspectives_countries_check() -> Check: check_required(F.col("perspectives.countries")), ), shape=CheckShape.SCALAR, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -515,7 +515,7 @@ def _perspectives_countries_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("perspectives.countries"), 1), shape=CheckShape.SCALAR, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -525,7 +525,7 @@ def _perspectives_countries_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("perspectives.countries")), shape=CheckShape.SCALAR, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -540,7 +540,7 @@ def _perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="perspectives", + read_columns=frozenset({"perspectives"}), ) @@ -552,7 +552,7 @@ def _check_radio_group_0_check() -> Check: [F.col("is_land"), F.col("is_territorial")], ["is_land", "is_territorial"] ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"is_land", "is_territorial"}), ) @@ -564,7 +564,7 @@ def _check_require_if_1_check() -> Check: F.col("admin_level"), F.col("subtype") == "county", "subtype = 'county'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -578,7 +578,7 @@ def _check_require_if_2_check() -> Check: "subtype = 'macrocounty'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -590,7 +590,7 @@ def _check_require_if_3_check() -> Check: F.col("admin_level"), F.col("subtype") == "region", "subtype = 'region'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -604,7 +604,7 @@ def _check_require_if_4_check() -> Check: "subtype = 'macroregion'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -618,7 +618,7 @@ def _check_require_if_5_check() -> Check: "subtype = 'dependency'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -630,7 +630,7 @@ def _check_require_if_6_check() -> Check: F.col("admin_level"), F.col("subtype") == "country", "subtype = 'country'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"admin_level", "subtype"}), ) @@ -642,7 +642,7 @@ def _check_require_if_7_check() -> Check: F.col("country"), F.col("subtype") != "country", "subtype != 'country'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"country", "subtype"}), ) @@ -654,7 +654,7 @@ def _check_forbid_if_8_check() -> Check: F.col("country"), F.col("subtype") == "country", "subtype = 'country'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"country", "subtype"}), ) @@ -775,7 +775,7 @@ def division_boundary_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "divisions"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=DIVISION_BOUNDARY_SCHEMA, checks=division_boundary_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py index c9d448f6a..846cef63d 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py @@ -16,13 +16,15 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) from overture.schema.pyspark.expressions.column_patterns import ( array_check, check_struct_unique, + map_keys_check, + map_values_check, nested_array_check, ) from overture.schema.pyspark.expressions.constraint_expressions import ( @@ -54,7 +56,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -64,7 +66,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -76,7 +78,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -86,7 +88,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -96,7 +98,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -106,7 +108,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -116,7 +118,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -126,7 +128,7 @@ def _geometry_geometry_type_check() -> Check: name="geometry_type", expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -136,7 +138,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -146,7 +148,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["places"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -156,7 +158,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -166,7 +168,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["place"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -176,7 +178,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -186,7 +188,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -196,7 +198,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -206,7 +208,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -216,7 +218,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -226,7 +228,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -236,7 +238,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -246,27 +248,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -278,7 +280,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -290,7 +292,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -300,7 +302,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -313,7 +315,7 @@ def _operating_status_check() -> Check: ["open", "permanently_closed", "temporarily_closed"], ), shape=CheckShape.SCALAR, - root_field="operating_status", + read_columns=frozenset({"operating_status"}), ) @@ -325,7 +327,7 @@ def _categories_primary_required_check() -> Check: F.col("categories").isNotNull(), check_required(F.col("categories.primary")) ), shape=CheckShape.SCALAR, - root_field="categories", + read_columns=frozenset({"categories"}), ) @@ -339,7 +341,7 @@ def _categories_primary_snake_case_check() -> Check: label="Category in snake_case format", ), shape=CheckShape.SCALAR, - root_field="categories", + read_columns=frozenset({"categories"}), ) @@ -349,7 +351,7 @@ def _categories_alternate_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("categories.alternate")), shape=CheckShape.SCALAR, - root_field="categories", + read_columns=frozenset({"categories"}), ) @@ -364,7 +366,7 @@ def _categories_alternate_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="categories", + read_columns=frozenset({"categories"}), ) @@ -378,7 +380,7 @@ def _basic_category_check() -> Check: label="Category in snake_case format", ), shape=CheckShape.SCALAR, - root_field="basic_category", + read_columns=frozenset({"basic_category"}), ) @@ -390,7 +392,7 @@ def _taxonomy_primary_required_check() -> Check: F.col("taxonomy").isNotNull(), check_required(F.col("taxonomy.primary")) ), shape=CheckShape.SCALAR, - root_field="taxonomy", + read_columns=frozenset({"taxonomy"}), ) @@ -404,7 +406,7 @@ def _taxonomy_primary_snake_case_check() -> Check: label="Category in snake_case format", ), shape=CheckShape.SCALAR, - root_field="taxonomy", + read_columns=frozenset({"taxonomy"}), ) @@ -416,7 +418,7 @@ def _taxonomy_hierarchy_check() -> Check: F.col("taxonomy").isNotNull(), check_required(F.col("taxonomy.hierarchy")) ), shape=CheckShape.SCALAR, - root_field="taxonomy", + read_columns=frozenset({"taxonomy"}), ) @@ -426,7 +428,7 @@ def _taxonomy_hierarchy_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("taxonomy.hierarchy"), 1), shape=CheckShape.SCALAR, - root_field="taxonomy", + read_columns=frozenset({"taxonomy"}), ) @@ -436,7 +438,7 @@ def _taxonomy_hierarchy_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("taxonomy.hierarchy")), shape=CheckShape.SCALAR, - root_field="taxonomy", + read_columns=frozenset({"taxonomy"}), ) @@ -451,7 +453,7 @@ def _taxonomy_hierarchy_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="taxonomy", + read_columns=frozenset({"taxonomy"}), ) @@ -461,7 +463,7 @@ def _taxonomy_alternates_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("taxonomy.alternates"), 1), shape=CheckShape.SCALAR, - root_field="taxonomy", + read_columns=frozenset({"taxonomy"}), ) @@ -471,7 +473,7 @@ def _taxonomy_alternates_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("taxonomy.alternates")), shape=CheckShape.SCALAR, - root_field="taxonomy", + read_columns=frozenset({"taxonomy"}), ) @@ -486,27 +488,27 @@ def _taxonomy_alternates_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="taxonomy", + read_columns=frozenset({"taxonomy"}), ) def _confidence_bounds_check() -> Check: return Check( - field="confidence", + field="confidence_0", name="bounds", expr=check_bounds(F.col("confidence"), ge=0.0), shape=CheckShape.SCALAR, - root_field="confidence", + read_columns=frozenset({"confidence"}), ) def _confidence_bounds_check_1() -> Check: return Check( - field="confidence", + field="confidence_1", name="bounds", expr=check_bounds(F.col("confidence"), le=1.0), shape=CheckShape.SCALAR, - root_field="confidence", + read_columns=frozenset({"confidence"}), ) @@ -516,7 +518,7 @@ def _websites_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("websites"), 1), shape=CheckShape.SCALAR, - root_field="websites", + read_columns=frozenset({"websites"}), ) @@ -526,7 +528,7 @@ def _websites_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("websites")), shape=CheckShape.SCALAR, - root_field="websites", + read_columns=frozenset({"websites"}), ) @@ -536,7 +538,7 @@ def _websites_url_format_check() -> Check: name="url_format", expr=array_check("websites", lambda el: check_url_format(el)), shape=CheckShape.ARRAY, - root_field="websites", + read_columns=frozenset({"websites"}), ) @@ -546,7 +548,7 @@ def _websites_url_length_check() -> Check: name="url_length", expr=array_check("websites", lambda el: check_url_length(el)), shape=CheckShape.ARRAY, - root_field="websites", + read_columns=frozenset({"websites"}), ) @@ -556,7 +558,7 @@ def _socials_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("socials"), 1), shape=CheckShape.SCALAR, - root_field="socials", + read_columns=frozenset({"socials"}), ) @@ -566,7 +568,7 @@ def _socials_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("socials")), shape=CheckShape.SCALAR, - root_field="socials", + read_columns=frozenset({"socials"}), ) @@ -576,7 +578,7 @@ def _socials_url_format_check() -> Check: name="url_format", expr=array_check("socials", lambda el: check_url_format(el)), shape=CheckShape.ARRAY, - root_field="socials", + read_columns=frozenset({"socials"}), ) @@ -586,7 +588,7 @@ def _socials_url_length_check() -> Check: name="url_length", expr=array_check("socials", lambda el: check_url_length(el)), shape=CheckShape.ARRAY, - root_field="socials", + read_columns=frozenset({"socials"}), ) @@ -596,7 +598,7 @@ def _emails_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("emails"), 1), shape=CheckShape.SCALAR, - root_field="emails", + read_columns=frozenset({"emails"}), ) @@ -606,7 +608,7 @@ def _emails_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("emails")), shape=CheckShape.SCALAR, - root_field="emails", + read_columns=frozenset({"emails"}), ) @@ -616,7 +618,7 @@ def _emails_check() -> Check: name="email", expr=array_check("emails", lambda el: check_email(el)), shape=CheckShape.ARRAY, - root_field="emails", + read_columns=frozenset({"emails"}), ) @@ -626,7 +628,7 @@ def _phones_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("phones"), 1), shape=CheckShape.SCALAR, - root_field="phones", + read_columns=frozenset({"phones"}), ) @@ -636,7 +638,7 @@ def _phones_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("phones")), shape=CheckShape.SCALAR, - root_field="phones", + read_columns=frozenset({"phones"}), ) @@ -653,7 +655,7 @@ def _phones_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="phones", + read_columns=frozenset({"phones"}), ) @@ -666,7 +668,7 @@ def _brand_names_primary_required_check() -> Check: check_required(F.col("brand.names.primary")), ), shape=CheckShape.SCALAR, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -676,7 +678,7 @@ def _brand_names_primary_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("brand.names.primary"), 1), shape=CheckShape.SCALAR, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -686,7 +688,34 @@ def _brand_names_primary_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("brand.names.primary")), shape=CheckShape.SCALAR, - root_field="brand", + read_columns=frozenset({"brand"}), + ) + + +def _brand_names_common_key_check() -> Check: + return Check( + field="brand.names.common{key}", + name="language_tag", + expr=map_keys_check( + "brand.names.common", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"brand"}), + ) + + +def _brand_names_common_value_check() -> Check: + return Check( + field="brand.names.common{value}", + name="stripped", + expr=map_values_check("brand.names.common", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"brand"}), ) @@ -696,7 +725,7 @@ def _brand_names_rules_value_required_check() -> Check: name="required", expr=array_check("brand.names.rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -708,7 +737,7 @@ def _brand_names_rules_value_string_min_length_check() -> Check: "brand.names.rules", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -718,7 +747,7 @@ def _brand_names_rules_value_stripped_check() -> Check: name="stripped", expr=array_check("brand.names.rules", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -728,7 +757,7 @@ def _brand_names_rules_variant_required_check() -> Check: name="required", expr=array_check("brand.names.rules", lambda el: check_required(el["variant"])), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -743,7 +772,7 @@ def _brand_names_rules_variant_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -760,7 +789,7 @@ def _brand_names_rules_language_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -776,7 +805,7 @@ def _brand_names_rules_perspectives_mode_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -791,7 +820,7 @@ def _brand_names_rules_perspectives_mode_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -807,7 +836,7 @@ def _brand_names_rules_perspectives_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -820,7 +849,7 @@ def _brand_names_rules_perspectives_countries_min_length_check() -> Check: lambda el: check_array_min_length(el["perspectives"]["countries"], 1), ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -833,7 +862,7 @@ def _brand_names_rules_perspectives_countries_unique_check() -> Check: lambda el: check_struct_unique(el["perspectives"]["countries"]), ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -851,7 +880,7 @@ def _brand_names_rules_perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -863,7 +892,7 @@ def _brand_names_rules_between_linear_range_length_check() -> Check: "brand.names.rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -875,7 +904,7 @@ def _brand_names_rules_between_linear_range_bounds_check() -> Check: "brand.names.rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -887,7 +916,7 @@ def _brand_names_rules_between_linear_range_order_check() -> Check: "brand.names.rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -899,7 +928,7 @@ def _brand_names_rules_side_check() -> Check: "brand.names.rules", lambda el: check_enum(el["side"], ["left", "right"]) ), shape=CheckShape.ARRAY, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -913,7 +942,7 @@ def _brand_wikidata_check() -> Check: label="Wikidata identifier (Q followed by digits)", ), shape=CheckShape.SCALAR, - root_field="brand", + read_columns=frozenset({"brand"}), ) @@ -923,7 +952,7 @@ def _addresses_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("addresses"), 1), shape=CheckShape.SCALAR, - root_field="addresses", + read_columns=frozenset({"addresses"}), ) @@ -940,7 +969,7 @@ def _addresses_region_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="addresses", + read_columns=frozenset({"addresses"}), ) @@ -955,7 +984,7 @@ def _addresses_country_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="addresses", + read_columns=frozenset({"addresses"}), ) @@ -965,7 +994,7 @@ def _names_primary_required_check() -> Check: name="required", expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -975,7 +1004,7 @@ def _names_primary_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("names.primary"), 1), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -985,7 +1014,34 @@ def _names_primary_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), + ) + + +def _names_common_key_check() -> Check: + return Check( + field="names.common{key}", + name="language_tag", + expr=map_keys_check( + "names.common", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), + ) + + +def _names_common_value_check() -> Check: + return Check( + field="names.common{value}", + name="stripped", + expr=map_values_check("names.common", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), ) @@ -995,7 +1051,7 @@ def _names_rules_value_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1007,7 +1063,7 @@ def _names_rules_value_string_min_length_check() -> Check: "names.rules", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1017,7 +1073,7 @@ def _names_rules_value_stripped_check() -> Check: name="stripped", expr=array_check("names.rules", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1027,7 +1083,7 @@ def _names_rules_variant_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["variant"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1042,7 +1098,7 @@ def _names_rules_variant_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1059,7 +1115,7 @@ def _names_rules_language_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1075,7 +1131,7 @@ def _names_rules_perspectives_mode_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1090,7 +1146,7 @@ def _names_rules_perspectives_mode_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1106,7 +1162,7 @@ def _names_rules_perspectives_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1119,7 +1175,7 @@ def _names_rules_perspectives_countries_min_length_check() -> Check: lambda el: check_array_min_length(el["perspectives"]["countries"], 1), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1132,7 +1188,7 @@ def _names_rules_perspectives_countries_unique_check() -> Check: lambda el: check_struct_unique(el["perspectives"]["countries"]), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1150,7 +1206,7 @@ def _names_rules_perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1162,7 +1218,7 @@ def _names_rules_between_linear_range_length_check() -> Check: "names.rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1174,7 +1230,7 @@ def _names_rules_between_linear_range_bounds_check() -> Check: "names.rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1186,7 +1242,7 @@ def _names_rules_between_linear_range_order_check() -> Check: "names.rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1198,7 +1254,7 @@ def _names_rules_side_check() -> Check: "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1264,6 +1320,8 @@ def place_checks() -> list[Check]: _brand_names_primary_required_check(), _brand_names_primary_string_min_length_check(), _brand_names_primary_stripped_check(), + _brand_names_common_key_check(), + _brand_names_common_value_check(), _brand_names_rules_value_required_check(), _brand_names_rules_value_string_min_length_check(), _brand_names_rules_value_stripped_check(), @@ -1287,6 +1345,8 @@ def place_checks() -> list[Check]: _names_primary_required_check(), _names_primary_string_min_length_check(), _names_primary_stripped_check(), + _names_common_key_check(), + _names_common_value_check(), _names_rules_value_required_check(), _names_rules_value_string_min_length_check(), _names_rules_value_stripped_check(), @@ -1498,7 +1558,7 @@ def place_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "places"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=PLACE_SCHEMA, checks=place_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py index 5813ca61b..63b614e41 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py @@ -15,7 +15,7 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) @@ -49,7 +49,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -59,7 +59,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -71,7 +71,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -81,7 +81,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -91,7 +91,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -101,7 +101,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -111,7 +111,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -121,7 +121,7 @@ def _geometry_geometry_type_check() -> Check: name="geometry_type", expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -131,7 +131,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -141,7 +141,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["transportation"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -151,7 +151,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -161,7 +161,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["connector"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -171,7 +171,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -181,7 +181,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -191,7 +191,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -201,7 +201,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -211,7 +211,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -221,7 +221,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -231,7 +231,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -241,27 +241,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -273,7 +273,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -285,7 +285,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -295,7 +295,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -365,7 +365,7 @@ def connector_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "transportation"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=CONNECTOR_SCHEMA, checks=connector_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py index 539999f21..ad2311427 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py @@ -17,13 +17,15 @@ StructType, ) -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape, ModelValidation from overture.schema.pyspark.expressions._schema_structs import ( BBOX_STRUCT, ) from overture.schema.pyspark.expressions.column_patterns import ( array_check, check_struct_unique, + map_keys_check, + map_values_check, nested_array_check, ) from overture.schema.pyspark.expressions.constraint_expressions import ( @@ -55,7 +57,7 @@ def _id_required_check() -> Check: name="required", expr=check_required(F.col("id")), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -65,7 +67,7 @@ def _id_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("id"), 1), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -77,7 +79,7 @@ def _id_no_whitespace_check() -> Check: F.col("id"), "^\\S+\\z", label="String without whitespace characters" ), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) @@ -87,7 +89,7 @@ def _bbox_bbox_completeness_check() -> Check: name="bbox_completeness", expr=check_bbox_completeness(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -97,7 +99,7 @@ def _bbox_bbox_lat_ordering_check() -> Check: name="bbox_lat_ordering", expr=check_bbox_lat_ordering(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -107,7 +109,7 @@ def _bbox_bbox_lat_range_check() -> Check: name="bbox_lat_range", expr=check_bbox_lat_range(F.col("bbox")), shape=CheckShape.SCALAR, - root_field="bbox", + read_columns=frozenset({"bbox"}), ) @@ -117,7 +119,7 @@ def _geometry_required_check() -> Check: name="required", expr=check_required(F.col("geometry")), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -127,7 +129,7 @@ def _geometry_geometry_type_check() -> Check: name="geometry_type", expr=check_geometry_type(F.col("geometry"), GeometryType.LINE_STRING), shape=CheckShape.SCALAR, - root_field="geometry", + read_columns=frozenset({"geometry"}), ) @@ -137,7 +139,7 @@ def _theme_required_check() -> Check: name="required", expr=check_required(F.col("theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -147,7 +149,7 @@ def _theme_enum_check() -> Check: name="enum", expr=check_enum(F.col("theme"), ["transportation"]), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ) @@ -157,7 +159,7 @@ def _type_required_check() -> Check: name="required", expr=check_required(F.col("type")), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -167,7 +169,7 @@ def _type_enum_check() -> Check: name="enum", expr=check_enum(F.col("type"), ["segment"]), shape=CheckShape.SCALAR, - root_field="type", + read_columns=frozenset({"type"}), ) @@ -177,7 +179,7 @@ def _version_required_check() -> Check: name="required", expr=check_required(F.col("version")), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -187,7 +189,7 @@ def _version_bounds_check() -> Check: name="bounds", expr=check_bounds(F.col("version"), ge=0), shape=CheckShape.SCALAR, - root_field="version", + read_columns=frozenset({"version"}), ) @@ -197,7 +199,7 @@ def _sources_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("sources"), 1), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -207,7 +209,7 @@ def _sources_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("sources")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -217,7 +219,7 @@ def _sources_property_required_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -227,7 +229,7 @@ def _sources_property_json_pointer_check() -> Check: name="json_pointer", expr=array_check("sources", lambda el: check_json_pointer(el["property"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -237,7 +239,7 @@ def _sources_dataset_check() -> Check: name="required", expr=array_check("sources", lambda el: check_required(el["dataset"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -247,27 +249,27 @@ def _sources_license_check() -> Check: name="stripped", expr=array_check("sources", lambda el: check_stripped(el["license"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_0", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) def _sources_confidence_bounds_check_1() -> Check: return Check( - field="sources[].confidence", + field="sources[].confidence_1", name="bounds", expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -279,7 +281,7 @@ def _sources_between_linear_range_length_check() -> Check: "sources", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -291,7 +293,7 @@ def _sources_between_linear_range_bounds_check() -> Check: "sources", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -301,7 +303,7 @@ def _sources_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="sources", + read_columns=frozenset({"sources"}), ) @@ -311,7 +313,7 @@ def _subtype_required_check() -> Check: name="required", expr=check_required(F.col("subtype")), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -321,7 +323,7 @@ def _subtype_enum_check() -> Check: name="enum", expr=check_enum(F.col("subtype"), ["road", "rail", "water"]), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) @@ -331,7 +333,7 @@ def _access_restrictions_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("access_restrictions"), 1), shape=CheckShape.SCALAR, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -341,7 +343,7 @@ def _access_restrictions_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("access_restrictions")), shape=CheckShape.SCALAR, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -353,7 +355,7 @@ def _access_restrictions_access_type_required_check() -> Check: "access_restrictions", lambda el: check_required(el["access_type"]) ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -368,7 +370,7 @@ def _access_restrictions_access_type_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -380,7 +382,7 @@ def _access_restrictions_between_linear_range_length_check() -> Check: "access_restrictions", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -392,7 +394,7 @@ def _access_restrictions_between_linear_range_bounds_check() -> Check: "access_restrictions", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -404,7 +406,7 @@ def _access_restrictions_between_linear_range_order_check() -> Check: "access_restrictions", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -417,7 +419,7 @@ def _access_restrictions_when_heading_check() -> Check: lambda el: check_enum(el["when"]["heading"], ["forward", "backward"]), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -430,7 +432,7 @@ def _access_restrictions_when_mode_min_length_check() -> Check: lambda el: check_array_min_length(el["when"]["mode"], 1), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -442,7 +444,7 @@ def _access_restrictions_when_mode_unique_check() -> Check: "access_restrictions", lambda el: check_struct_unique(el["when"]["mode"]) ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -473,7 +475,7 @@ def _access_restrictions_when_mode_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -486,7 +488,7 @@ def _access_restrictions_when_using_min_length_check() -> Check: lambda el: check_array_min_length(el["when"]["using"], 1), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -498,7 +500,7 @@ def _access_restrictions_when_using_unique_check() -> Check: "access_restrictions", lambda el: check_struct_unique(el["when"]["using"]) ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -523,7 +525,7 @@ def _access_restrictions_when_using_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -536,7 +538,7 @@ def _access_restrictions_when_recognized_min_length_check() -> Check: lambda el: check_array_min_length(el["when"]["recognized"], 1), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -549,7 +551,7 @@ def _access_restrictions_when_recognized_unique_check() -> Check: lambda el: check_struct_unique(el["when"]["recognized"]), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -574,7 +576,7 @@ def _access_restrictions_when_recognized_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -587,7 +589,7 @@ def _access_restrictions_when_vehicle_min_length_check() -> Check: lambda el: check_array_min_length(el["when"]["vehicle"], 1), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -599,7 +601,7 @@ def _access_restrictions_when_vehicle_unique_check() -> Check: "access_restrictions", lambda el: check_struct_unique(el["when"]["vehicle"]) ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -614,7 +616,7 @@ def _access_restrictions_when_vehicle_dimension_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -633,7 +635,7 @@ def _access_restrictions_when_vehicle_dimension_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -648,7 +650,7 @@ def _access_restrictions_when_vehicle_comparison_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -673,13 +675,13 @@ def _access_restrictions_when_vehicle_comparison_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) def _access_restrictions_when_vehicle_value_check() -> Check: return Check( - field="access_restrictions[].when.vehicle[].value", + field="access_restrictions[].when.vehicle[].value_0", name="required", expr=nested_array_check( "access_restrictions", @@ -692,13 +694,13 @@ def _access_restrictions_when_vehicle_value_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) def _access_restrictions_when_vehicle_value_required_check() -> Check: return Check( - field="access_restrictions[].when.vehicle[].value", + field="access_restrictions[].when.vehicle[].value_1", name="required", expr=nested_array_check( "access_restrictions", @@ -711,7 +713,7 @@ def _access_restrictions_when_vehicle_value_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -730,13 +732,13 @@ def _access_restrictions_when_vehicle_value_bounds_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) def _access_restrictions_when_vehicle_unit_required_check() -> Check: return Check( - field="access_restrictions[].when.vehicle[].unit", + field="access_restrictions[].when.vehicle[].unit_0", name="required", expr=nested_array_check( "access_restrictions", @@ -749,13 +751,13 @@ def _access_restrictions_when_vehicle_unit_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) def _access_restrictions_when_vehicle_unit_enum_check() -> Check: return Check( - field="access_restrictions[].when.vehicle[].unit", + field="access_restrictions[].when.vehicle[].unit_0", name="enum", expr=nested_array_check( "access_restrictions", @@ -770,13 +772,13 @@ def _access_restrictions_when_vehicle_unit_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) def _access_restrictions_when_vehicle_unit_required_check_1() -> Check: return Check( - field="access_restrictions[].when.vehicle[].unit", + field="access_restrictions[].when.vehicle[].unit_1", name="required", expr=nested_array_check( "access_restrictions", @@ -788,13 +790,13 @@ def _access_restrictions_when_vehicle_unit_required_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) def _access_restrictions_when_vehicle_unit_enum_check_1() -> Check: return Check( - field="access_restrictions[].when.vehicle[].unit", + field="access_restrictions[].when.vehicle[].unit_1", name="enum", expr=nested_array_check( "access_restrictions", @@ -807,7 +809,7 @@ def _access_restrictions_when_vehicle_unit_enum_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -817,7 +819,7 @@ def _connectors_min_length_check() -> Check: name="array_min_length", expr=check_array_min_length(F.col("connectors"), 2), shape=CheckShape.SCALAR, - root_field="connectors", + read_columns=frozenset({"connectors"}), ) @@ -827,7 +829,7 @@ def _connectors_unique_check() -> Check: name="struct_unique", expr=check_struct_unique(F.col("connectors")), shape=CheckShape.SCALAR, - root_field="connectors", + read_columns=frozenset({"connectors"}), ) @@ -837,7 +839,7 @@ def _connectors_connector_id_required_check() -> Check: name="required", expr=array_check("connectors", lambda el: check_required(el["connector_id"])), shape=CheckShape.ARRAY, - root_field="connectors", + read_columns=frozenset({"connectors"}), ) @@ -849,7 +851,7 @@ def _connectors_connector_id_string_min_length_check() -> Check: "connectors", lambda el: check_string_min_length(el["connector_id"], 1) ), shape=CheckShape.ARRAY, - root_field="connectors", + read_columns=frozenset({"connectors"}), ) @@ -866,27 +868,27 @@ def _connectors_connector_id_no_whitespace_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="connectors", + read_columns=frozenset({"connectors"}), ) def _connectors_at_bounds_check() -> Check: return Check( - field="connectors[].at", + field="connectors[].at_0", name="bounds", expr=array_check("connectors", lambda el: check_bounds(el["at"], ge=0.0)), shape=CheckShape.ARRAY, - root_field="connectors", + read_columns=frozenset({"connectors"}), ) def _connectors_at_bounds_check_1() -> Check: return Check( - field="connectors[].at", + field="connectors[].at_1", name="bounds", expr=array_check("connectors", lambda el: check_bounds(el["at"], le=1.0)), shape=CheckShape.ARRAY, - root_field="connectors", + read_columns=frozenset({"connectors"}), ) @@ -896,7 +898,7 @@ def _level_rules_value_check() -> Check: name="required", expr=array_check("level_rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="level_rules", + read_columns=frozenset({"level_rules"}), ) @@ -908,7 +910,7 @@ def _level_rules_between_linear_range_length_check() -> Check: "level_rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="level_rules", + read_columns=frozenset({"level_rules"}), ) @@ -920,7 +922,7 @@ def _level_rules_between_linear_range_bounds_check() -> Check: "level_rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="level_rules", + read_columns=frozenset({"level_rules"}), ) @@ -932,7 +934,7 @@ def _level_rules_between_linear_range_order_check() -> Check: "level_rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="level_rules", + read_columns=frozenset({"level_rules"}), ) @@ -942,7 +944,7 @@ def _routes_name_string_min_length_check() -> Check: name="string_min_length", expr=array_check("routes", lambda el: check_string_min_length(el["name"], 1)), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -952,7 +954,7 @@ def _routes_name_stripped_check() -> Check: name="stripped", expr=array_check("routes", lambda el: check_stripped(el["name"])), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -964,7 +966,7 @@ def _routes_network_string_min_length_check() -> Check: "routes", lambda el: check_string_min_length(el["network"], 1) ), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -974,7 +976,7 @@ def _routes_network_stripped_check() -> Check: name="stripped", expr=array_check("routes", lambda el: check_stripped(el["network"])), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -984,7 +986,7 @@ def _routes_ref_string_min_length_check() -> Check: name="string_min_length", expr=array_check("routes", lambda el: check_string_min_length(el["ref"], 1)), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -994,7 +996,7 @@ def _routes_ref_stripped_check() -> Check: name="stripped", expr=array_check("routes", lambda el: check_stripped(el["ref"])), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -1004,7 +1006,7 @@ def _routes_symbol_string_min_length_check() -> Check: name="string_min_length", expr=array_check("routes", lambda el: check_string_min_length(el["symbol"], 1)), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -1014,7 +1016,7 @@ def _routes_symbol_stripped_check() -> Check: name="stripped", expr=array_check("routes", lambda el: check_stripped(el["symbol"])), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -1031,7 +1033,7 @@ def _routes_wikidata_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -1041,7 +1043,7 @@ def _routes_between_linear_range_length_check() -> Check: name="linear_range_length", expr=array_check("routes", lambda el: check_linear_range_length(el["between"])), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -1051,7 +1053,7 @@ def _routes_between_linear_range_bounds_check() -> Check: name="linear_range_bounds", expr=array_check("routes", lambda el: check_linear_range_bounds(el["between"])), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -1061,7 +1063,7 @@ def _routes_between_linear_range_order_check() -> Check: name="linear_range_order", expr=array_check("routes", lambda el: check_linear_range_order(el["between"])), shape=CheckShape.ARRAY, - root_field="routes", + read_columns=frozenset({"routes"}), ) @@ -1071,7 +1073,7 @@ def _subclass_rules_value_required_check() -> Check: name="required", expr=array_check("subclass_rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="subclass_rules", + read_columns=frozenset({"subclass_rules"}), ) @@ -1095,7 +1097,7 @@ def _subclass_rules_value_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="subclass_rules", + read_columns=frozenset({"subclass_rules"}), ) @@ -1107,7 +1109,7 @@ def _subclass_rules_between_linear_range_length_check() -> Check: "subclass_rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="subclass_rules", + read_columns=frozenset({"subclass_rules"}), ) @@ -1119,7 +1121,7 @@ def _subclass_rules_between_linear_range_bounds_check() -> Check: "subclass_rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="subclass_rules", + read_columns=frozenset({"subclass_rules"}), ) @@ -1131,7 +1133,7 @@ def _subclass_rules_between_linear_range_order_check() -> Check: "subclass_rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="subclass_rules", + read_columns=frozenset({"subclass_rules"}), ) @@ -1141,7 +1143,7 @@ def _names_primary_required_check() -> Check: name="required", expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1151,7 +1153,7 @@ def _names_primary_string_min_length_check() -> Check: name="string_min_length", expr=check_string_min_length(F.col("names.primary"), 1), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1161,7 +1163,34 @@ def _names_primary_stripped_check() -> Check: name="stripped", expr=check_stripped(F.col("names.primary")), shape=CheckShape.SCALAR, - root_field="names", + read_columns=frozenset({"names"}), + ) + + +def _names_common_key_check() -> Check: + return Check( + field="names.common{key}", + name="language_tag", + expr=map_keys_check( + "names.common", + lambda k: check_pattern( + k, + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), + ) + + +def _names_common_value_check() -> Check: + return Check( + field="names.common{value}", + name="stripped", + expr=map_values_check("names.common", lambda v: check_stripped(v)), + shape=CheckShape.ARRAY, + read_columns=frozenset({"names"}), ) @@ -1171,7 +1200,7 @@ def _names_rules_value_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1183,7 +1212,7 @@ def _names_rules_value_string_min_length_check() -> Check: "names.rules", lambda el: check_string_min_length(el["value"], 1) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1193,7 +1222,7 @@ def _names_rules_value_stripped_check() -> Check: name="stripped", expr=array_check("names.rules", lambda el: check_stripped(el["value"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1203,7 +1232,7 @@ def _names_rules_variant_required_check() -> Check: name="required", expr=array_check("names.rules", lambda el: check_required(el["variant"])), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1218,7 +1247,7 @@ def _names_rules_variant_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1235,7 +1264,7 @@ def _names_rules_language_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1251,7 +1280,7 @@ def _names_rules_perspectives_mode_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1266,7 +1295,7 @@ def _names_rules_perspectives_mode_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1282,7 +1311,7 @@ def _names_rules_perspectives_countries_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1295,7 +1324,7 @@ def _names_rules_perspectives_countries_min_length_check() -> Check: lambda el: check_array_min_length(el["perspectives"]["countries"], 1), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1308,7 +1337,7 @@ def _names_rules_perspectives_countries_unique_check() -> Check: lambda el: check_struct_unique(el["perspectives"]["countries"]), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1326,7 +1355,7 @@ def _names_rules_perspectives_countries_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1338,7 +1367,7 @@ def _names_rules_between_linear_range_length_check() -> Check: "names.rules", lambda el: check_linear_range_length(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1350,7 +1379,7 @@ def _names_rules_between_linear_range_bounds_check() -> Check: "names.rules", lambda el: check_linear_range_bounds(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1362,7 +1391,7 @@ def _names_rules_between_linear_range_order_check() -> Check: "names.rules", lambda el: check_linear_range_order(el["between"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) @@ -1374,23 +1403,23 @@ def _names_rules_side_check() -> Check: "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) ), shape=CheckShape.ARRAY, - root_field="names", + read_columns=frozenset({"names"}), ) def _class_required_check() -> Check: return Check( - field="class", + field="class_0", name="required", expr=F.when(F.col("subtype").isin(["road"]), check_required(F.col("class"))), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class", "subtype"}), ) def _class_enum_check() -> Check: return Check( - field="class", + field="class_0", name="enum", expr=F.when( F.col("subtype").isin(["road"]), @@ -1418,7 +1447,7 @@ def _class_enum_check() -> Check: ), ), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class", "subtype"}), ) @@ -1433,7 +1462,7 @@ def _destinations_from_connector_id_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1449,7 +1478,7 @@ def _destinations_from_connector_id_string_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1469,7 +1498,7 @@ def _destinations_from_connector_id_no_whitespace_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1484,7 +1513,7 @@ def _destinations_to_connector_id_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1500,7 +1529,7 @@ def _destinations_to_connector_id_string_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1520,7 +1549,7 @@ def _destinations_to_connector_id_no_whitespace_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1533,7 +1562,7 @@ def _destinations_to_segment_id_required_check() -> Check: array_check("destinations", lambda el: check_required(el["to_segment_id"])), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1549,7 +1578,7 @@ def _destinations_to_segment_id_string_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1569,7 +1598,7 @@ def _destinations_to_segment_id_no_whitespace_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1582,7 +1611,7 @@ def _destinations_final_heading_required_check() -> Check: array_check("destinations", lambda el: check_required(el["final_heading"])), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1598,7 +1627,7 @@ def _destinations_final_heading_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1613,7 +1642,7 @@ def _destinations_labels_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1626,7 +1655,7 @@ def _destinations_labels_unique_check() -> Check: array_check("destinations", lambda el: check_struct_unique(el["labels"])), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1644,7 +1673,7 @@ def _destinations_labels_value_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1663,7 +1692,7 @@ def _destinations_labels_value_string_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1681,7 +1710,7 @@ def _destinations_labels_value_stripped_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1699,7 +1728,7 @@ def _destinations_labels_type_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1727,7 +1756,7 @@ def _destinations_labels_type_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1740,7 +1769,7 @@ def _destinations_symbols_unique_check() -> Check: array_check("destinations", lambda el: check_struct_unique(el["symbols"])), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1783,7 +1812,7 @@ def _destinations_symbols_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1801,7 +1830,7 @@ def _destinations_when_heading_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1817,7 +1846,7 @@ def _destinations_when_heading_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations", "subtype"}), ) @@ -1832,7 +1861,7 @@ def _prohibited_transitions_sequence_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -1848,7 +1877,7 @@ def _prohibited_transitions_sequence_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -1863,7 +1892,7 @@ def _prohibited_transitions_sequence_unique_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -1881,7 +1910,7 @@ def _prohibited_transitions_sequence_connector_id_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -1900,7 +1929,7 @@ def _prohibited_transitions_sequence_connector_id_string_min_length_check() -> C ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -1923,7 +1952,7 @@ def _prohibited_transitions_sequence_connector_id_no_whitespace_check() -> Check ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -1941,7 +1970,7 @@ def _prohibited_transitions_sequence_segment_id_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -1960,7 +1989,7 @@ def _prohibited_transitions_sequence_segment_id_string_min_length_check() -> Che ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -1983,7 +2012,7 @@ def _prohibited_transitions_sequence_segment_id_no_whitespace_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -1998,7 +2027,7 @@ def _prohibited_transitions_final_heading_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2014,7 +2043,7 @@ def _prohibited_transitions_final_heading_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2030,7 +2059,7 @@ def _prohibited_transitions_between_linear_range_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2046,7 +2075,7 @@ def _prohibited_transitions_between_linear_range_bounds_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2062,7 +2091,7 @@ def _prohibited_transitions_between_linear_range_order_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2078,7 +2107,7 @@ def _prohibited_transitions_when_heading_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2094,7 +2123,7 @@ def _prohibited_transitions_when_mode_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2110,7 +2139,7 @@ def _prohibited_transitions_when_mode_unique_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2144,7 +2173,7 @@ def _prohibited_transitions_when_mode_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2160,7 +2189,7 @@ def _prohibited_transitions_when_using_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2176,7 +2205,7 @@ def _prohibited_transitions_when_using_unique_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2204,7 +2233,7 @@ def _prohibited_transitions_when_using_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2220,7 +2249,7 @@ def _prohibited_transitions_when_recognized_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2236,7 +2265,7 @@ def _prohibited_transitions_when_recognized_unique_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2264,7 +2293,7 @@ def _prohibited_transitions_when_recognized_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2280,7 +2309,7 @@ def _prohibited_transitions_when_vehicle_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2296,7 +2325,7 @@ def _prohibited_transitions_when_vehicle_unique_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2315,7 +2344,7 @@ def _prohibited_transitions_when_vehicle_dimension_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2337,7 +2366,7 @@ def _prohibited_transitions_when_vehicle_dimension_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2356,7 +2385,7 @@ def _prohibited_transitions_when_vehicle_comparison_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2384,13 +2413,13 @@ def _prohibited_transitions_when_vehicle_comparison_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) def _prohibited_transitions_when_vehicle_value_check() -> Check: return Check( - field="prohibited_transitions[].when.vehicle[].value", + field="prohibited_transitions[].when.vehicle[].value_0", name="required", expr=F.when( F.col("subtype").isin(["road"]), @@ -2406,13 +2435,13 @@ def _prohibited_transitions_when_vehicle_value_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) def _prohibited_transitions_when_vehicle_value_required_check() -> Check: return Check( - field="prohibited_transitions[].when.vehicle[].value", + field="prohibited_transitions[].when.vehicle[].value_1", name="required", expr=F.when( F.col("subtype").isin(["road"]), @@ -2430,7 +2459,7 @@ def _prohibited_transitions_when_vehicle_value_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2454,13 +2483,13 @@ def _prohibited_transitions_when_vehicle_value_bounds_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) def _prohibited_transitions_when_vehicle_unit_required_check() -> Check: return Check( - field="prohibited_transitions[].when.vehicle[].unit", + field="prohibited_transitions[].when.vehicle[].unit_0", name="required", expr=F.when( F.col("subtype").isin(["road"]), @@ -2476,13 +2505,13 @@ def _prohibited_transitions_when_vehicle_unit_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) def _prohibited_transitions_when_vehicle_unit_enum_check() -> Check: return Check( - field="prohibited_transitions[].when.vehicle[].unit", + field="prohibited_transitions[].when.vehicle[].unit_0", name="enum", expr=F.when( F.col("subtype").isin(["road"]), @@ -2500,13 +2529,13 @@ def _prohibited_transitions_when_vehicle_unit_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) def _prohibited_transitions_when_vehicle_unit_required_check_1() -> Check: return Check( - field="prohibited_transitions[].when.vehicle[].unit", + field="prohibited_transitions[].when.vehicle[].unit_1", name="required", expr=F.when( F.col("subtype").isin(["road"]), @@ -2522,13 +2551,13 @@ def _prohibited_transitions_when_vehicle_unit_required_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) def _prohibited_transitions_when_vehicle_unit_enum_check_1() -> Check: return Check( - field="prohibited_transitions[].when.vehicle[].unit", + field="prohibited_transitions[].when.vehicle[].unit_1", name="enum", expr=F.when( F.col("subtype").isin(["road"]), @@ -2546,7 +2575,7 @@ def _prohibited_transitions_when_vehicle_unit_enum_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -2559,7 +2588,7 @@ def _road_flags_min_length_check() -> Check: check_array_min_length(F.col("road_flags"), 1), ), shape=CheckShape.SCALAR, - root_field="road_flags", + read_columns=frozenset({"road_flags", "subtype"}), ) @@ -2571,7 +2600,7 @@ def _road_flags_unique_check() -> Check: F.col("subtype").isin(["road"]), check_struct_unique(F.col("road_flags")) ), shape=CheckShape.SCALAR, - root_field="road_flags", + read_columns=frozenset({"road_flags", "subtype"}), ) @@ -2584,7 +2613,7 @@ def _road_flags_values_check() -> Check: array_check("road_flags", lambda el: check_required(el["values"])), ), shape=CheckShape.ARRAY, - root_field="road_flags", + read_columns=frozenset({"road_flags", "subtype"}), ) @@ -2599,7 +2628,7 @@ def _road_flags_values_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="road_flags", + read_columns=frozenset({"road_flags", "subtype"}), ) @@ -2612,7 +2641,7 @@ def _road_flags_values_unique_check() -> Check: array_check("road_flags", lambda el: check_struct_unique(el["values"])), ), shape=CheckShape.ARRAY, - root_field="road_flags", + read_columns=frozenset({"road_flags", "subtype"}), ) @@ -2642,7 +2671,7 @@ def _road_flags_values_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="road_flags", + read_columns=frozenset({"road_flags", "subtype"}), ) @@ -2657,7 +2686,7 @@ def _road_flags_between_linear_range_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="road_flags", + read_columns=frozenset({"road_flags", "subtype"}), ) @@ -2672,7 +2701,7 @@ def _road_flags_between_linear_range_bounds_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="road_flags", + read_columns=frozenset({"road_flags", "subtype"}), ) @@ -2687,7 +2716,7 @@ def _road_flags_between_linear_range_order_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="road_flags", + read_columns=frozenset({"road_flags", "subtype"}), ) @@ -2700,7 +2729,7 @@ def _road_surface_min_length_check() -> Check: check_array_min_length(F.col("road_surface"), 1), ), shape=CheckShape.SCALAR, - root_field="road_surface", + read_columns=frozenset({"road_surface", "subtype"}), ) @@ -2712,7 +2741,7 @@ def _road_surface_unique_check() -> Check: F.col("subtype").isin(["road"]), check_struct_unique(F.col("road_surface")) ), shape=CheckShape.SCALAR, - root_field="road_surface", + read_columns=frozenset({"road_surface", "subtype"}), ) @@ -2725,7 +2754,7 @@ def _road_surface_value_required_check() -> Check: array_check("road_surface", lambda el: check_required(el["value"])), ), shape=CheckShape.ARRAY, - root_field="road_surface", + read_columns=frozenset({"road_surface", "subtype"}), ) @@ -2752,7 +2781,7 @@ def _road_surface_value_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="road_surface", + read_columns=frozenset({"road_surface", "subtype"}), ) @@ -2767,7 +2796,7 @@ def _road_surface_between_linear_range_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="road_surface", + read_columns=frozenset({"road_surface", "subtype"}), ) @@ -2782,7 +2811,7 @@ def _road_surface_between_linear_range_bounds_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="road_surface", + read_columns=frozenset({"road_surface", "subtype"}), ) @@ -2797,7 +2826,7 @@ def _road_surface_between_linear_range_order_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="road_surface", + read_columns=frozenset({"road_surface", "subtype"}), ) @@ -2810,7 +2839,7 @@ def _speed_limits_min_length_check() -> Check: check_array_min_length(F.col("speed_limits"), 1), ), shape=CheckShape.SCALAR, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -2822,7 +2851,7 @@ def _speed_limits_unique_check() -> Check: F.col("subtype").isin(["road"]), check_struct_unique(F.col("speed_limits")) ), shape=CheckShape.SCALAR, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -2841,13 +2870,13 @@ def _speed_limits_max_speed_value_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) def _speed_limits_max_speed_value_bounds_check() -> Check: return Check( - field="speed_limits[].max_speed.value", + field="speed_limits[].max_speed.value_0", name="bounds", expr=F.when( F.col("subtype").isin(["road"]), @@ -2856,13 +2885,13 @@ def _speed_limits_max_speed_value_bounds_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) def _speed_limits_max_speed_value_bounds_check_1() -> Check: return Check( - field="speed_limits[].max_speed.value", + field="speed_limits[].max_speed.value_1", name="bounds", expr=F.when( F.col("subtype").isin(["road"]), @@ -2872,7 +2901,7 @@ def _speed_limits_max_speed_value_bounds_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -2890,7 +2919,7 @@ def _speed_limits_max_speed_unit_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -2906,7 +2935,7 @@ def _speed_limits_max_speed_unit_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -2925,13 +2954,13 @@ def _speed_limits_min_speed_value_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) def _speed_limits_min_speed_value_bounds_check() -> Check: return Check( - field="speed_limits[].min_speed.value", + field="speed_limits[].min_speed.value_0", name="bounds", expr=F.when( F.col("subtype").isin(["road"]), @@ -2940,13 +2969,13 @@ def _speed_limits_min_speed_value_bounds_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) def _speed_limits_min_speed_value_bounds_check_1() -> Check: return Check( - field="speed_limits[].min_speed.value", + field="speed_limits[].min_speed.value_1", name="bounds", expr=F.when( F.col("subtype").isin(["road"]), @@ -2956,7 +2985,7 @@ def _speed_limits_min_speed_value_bounds_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -2974,7 +3003,7 @@ def _speed_limits_min_speed_unit_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -2990,7 +3019,7 @@ def _speed_limits_min_speed_unit_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3005,7 +3034,7 @@ def _speed_limits_between_linear_range_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3020,7 +3049,7 @@ def _speed_limits_between_linear_range_bounds_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3035,7 +3064,7 @@ def _speed_limits_between_linear_range_order_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3051,7 +3080,7 @@ def _speed_limits_when_heading_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3066,7 +3095,7 @@ def _speed_limits_when_mode_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3081,7 +3110,7 @@ def _speed_limits_when_mode_unique_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3115,7 +3144,7 @@ def _speed_limits_when_mode_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3131,7 +3160,7 @@ def _speed_limits_when_using_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3146,7 +3175,7 @@ def _speed_limits_when_using_unique_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3174,7 +3203,7 @@ def _speed_limits_when_using_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3190,7 +3219,7 @@ def _speed_limits_when_recognized_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3205,7 +3234,7 @@ def _speed_limits_when_recognized_unique_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3233,7 +3262,7 @@ def _speed_limits_when_recognized_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3249,7 +3278,7 @@ def _speed_limits_when_vehicle_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3264,7 +3293,7 @@ def _speed_limits_when_vehicle_unique_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3283,7 +3312,7 @@ def _speed_limits_when_vehicle_dimension_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3305,7 +3334,7 @@ def _speed_limits_when_vehicle_dimension_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3324,7 +3353,7 @@ def _speed_limits_when_vehicle_comparison_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3352,13 +3381,13 @@ def _speed_limits_when_vehicle_comparison_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) def _speed_limits_when_vehicle_value_check() -> Check: return Check( - field="speed_limits[].when.vehicle[].value", + field="speed_limits[].when.vehicle[].value_0", name="required", expr=F.when( F.col("subtype").isin(["road"]), @@ -3374,13 +3403,13 @@ def _speed_limits_when_vehicle_value_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) def _speed_limits_when_vehicle_value_required_check() -> Check: return Check( - field="speed_limits[].when.vehicle[].value", + field="speed_limits[].when.vehicle[].value_1", name="required", expr=F.when( F.col("subtype").isin(["road"]), @@ -3398,7 +3427,7 @@ def _speed_limits_when_vehicle_value_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3422,13 +3451,13 @@ def _speed_limits_when_vehicle_value_bounds_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) def _speed_limits_when_vehicle_unit_required_check() -> Check: return Check( - field="speed_limits[].when.vehicle[].unit", + field="speed_limits[].when.vehicle[].unit_0", name="required", expr=F.when( F.col("subtype").isin(["road"]), @@ -3444,13 +3473,13 @@ def _speed_limits_when_vehicle_unit_required_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) def _speed_limits_when_vehicle_unit_enum_check() -> Check: return Check( - field="speed_limits[].when.vehicle[].unit", + field="speed_limits[].when.vehicle[].unit_0", name="enum", expr=F.when( F.col("subtype").isin(["road"]), @@ -3468,13 +3497,13 @@ def _speed_limits_when_vehicle_unit_enum_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) def _speed_limits_when_vehicle_unit_required_check_1() -> Check: return Check( - field="speed_limits[].when.vehicle[].unit", + field="speed_limits[].when.vehicle[].unit_1", name="required", expr=F.when( F.col("subtype").isin(["road"]), @@ -3490,13 +3519,13 @@ def _speed_limits_when_vehicle_unit_required_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) def _speed_limits_when_vehicle_unit_enum_check_1() -> Check: return Check( - field="speed_limits[].when.vehicle[].unit", + field="speed_limits[].when.vehicle[].unit_1", name="enum", expr=F.when( F.col("subtype").isin(["road"]), @@ -3514,7 +3543,7 @@ def _speed_limits_when_vehicle_unit_enum_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -3538,7 +3567,7 @@ def _subclass_check() -> Check: ), ), shape=CheckShape.SCALAR, - root_field="subclass", + read_columns=frozenset({"subclass", "subtype"}), ) @@ -3551,7 +3580,7 @@ def _width_rules_min_length_check() -> Check: check_array_min_length(F.col("width_rules"), 1), ), shape=CheckShape.SCALAR, - root_field="width_rules", + read_columns=frozenset({"subtype", "width_rules"}), ) @@ -3563,7 +3592,7 @@ def _width_rules_unique_check() -> Check: F.col("subtype").isin(["road"]), check_struct_unique(F.col("width_rules")) ), shape=CheckShape.SCALAR, - root_field="width_rules", + read_columns=frozenset({"subtype", "width_rules"}), ) @@ -3576,7 +3605,7 @@ def _width_rules_value_required_check() -> Check: array_check("width_rules", lambda el: check_required(el["value"])), ), shape=CheckShape.ARRAY, - root_field="width_rules", + read_columns=frozenset({"subtype", "width_rules"}), ) @@ -3589,7 +3618,7 @@ def _width_rules_value_bounds_check() -> Check: array_check("width_rules", lambda el: check_bounds(el["value"], gt=0.0)), ), shape=CheckShape.ARRAY, - root_field="width_rules", + read_columns=frozenset({"subtype", "width_rules"}), ) @@ -3604,7 +3633,7 @@ def _width_rules_between_linear_range_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="width_rules", + read_columns=frozenset({"subtype", "width_rules"}), ) @@ -3619,7 +3648,7 @@ def _width_rules_between_linear_range_bounds_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="width_rules", + read_columns=frozenset({"subtype", "width_rules"}), ) @@ -3634,23 +3663,23 @@ def _width_rules_between_linear_range_order_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="width_rules", + read_columns=frozenset({"subtype", "width_rules"}), ) def _class_required_check_1() -> Check: return Check( - field="class", + field="class_1", name="required", expr=F.when(F.col("subtype").isin(["rail"]), check_required(F.col("class"))), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class", "subtype"}), ) def _class_enum_check_1() -> Check: return Check( - field="class", + field="class_1", name="enum", expr=F.when( F.col("subtype").isin(["rail"]), @@ -3669,7 +3698,7 @@ def _class_enum_check_1() -> Check: ), ), shape=CheckShape.SCALAR, - root_field="class", + read_columns=frozenset({"class", "subtype"}), ) @@ -3682,7 +3711,7 @@ def _rail_flags_min_length_check() -> Check: check_array_min_length(F.col("rail_flags"), 1), ), shape=CheckShape.SCALAR, - root_field="rail_flags", + read_columns=frozenset({"rail_flags", "subtype"}), ) @@ -3694,7 +3723,7 @@ def _rail_flags_unique_check() -> Check: F.col("subtype").isin(["rail"]), check_struct_unique(F.col("rail_flags")) ), shape=CheckShape.SCALAR, - root_field="rail_flags", + read_columns=frozenset({"rail_flags", "subtype"}), ) @@ -3707,7 +3736,7 @@ def _rail_flags_values_check() -> Check: array_check("rail_flags", lambda el: check_required(el["values"])), ), shape=CheckShape.ARRAY, - root_field="rail_flags", + read_columns=frozenset({"rail_flags", "subtype"}), ) @@ -3722,7 +3751,7 @@ def _rail_flags_values_min_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="rail_flags", + read_columns=frozenset({"rail_flags", "subtype"}), ) @@ -3735,7 +3764,7 @@ def _rail_flags_values_unique_check() -> Check: array_check("rail_flags", lambda el: check_struct_unique(el["values"])), ), shape=CheckShape.ARRAY, - root_field="rail_flags", + read_columns=frozenset({"rail_flags", "subtype"}), ) @@ -3766,7 +3795,7 @@ def _rail_flags_values_check_1() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="rail_flags", + read_columns=frozenset({"rail_flags", "subtype"}), ) @@ -3781,7 +3810,7 @@ def _rail_flags_between_linear_range_length_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="rail_flags", + read_columns=frozenset({"rail_flags", "subtype"}), ) @@ -3796,7 +3825,7 @@ def _rail_flags_between_linear_range_bounds_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="rail_flags", + read_columns=frozenset({"rail_flags", "subtype"}), ) @@ -3811,7 +3840,7 @@ def _rail_flags_between_linear_range_order_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="rail_flags", + read_columns=frozenset({"rail_flags", "subtype"}), ) @@ -3831,7 +3860,7 @@ def _access_restrictions_when_vehicle_check_forbid_if_0_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -3851,7 +3880,7 @@ def _access_restrictions_when_vehicle_check_require_if_1_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -3871,7 +3900,7 @@ def _access_restrictions_when_vehicle_check_require_if_2_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -3891,7 +3920,7 @@ def _access_restrictions_when_vehicle_check_require_if_3_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -3909,7 +3938,7 @@ def _access_restrictions_when_vehicle_check_require_if_4_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -3935,7 +3964,7 @@ def _access_restrictions_when_check_require_any_of_5_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="access_restrictions", + read_columns=frozenset({"access_restrictions"}), ) @@ -3950,7 +3979,7 @@ def _destinations_check_require_any_of_6_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="destinations", + read_columns=frozenset({"destinations"}), ) @@ -3970,7 +3999,7 @@ def _prohibited_transitions_when_vehicle_check_forbid_if_7_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions"}), ) @@ -3990,7 +4019,7 @@ def _prohibited_transitions_when_vehicle_check_require_if_8_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions"}), ) @@ -4010,7 +4039,7 @@ def _prohibited_transitions_when_vehicle_check_require_if_9_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions"}), ) @@ -4030,7 +4059,7 @@ def _prohibited_transitions_when_vehicle_check_require_if_10_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions"}), ) @@ -4048,7 +4077,7 @@ def _prohibited_transitions_when_vehicle_check_require_if_11_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions"}), ) @@ -4074,7 +4103,7 @@ def _prohibited_transitions_when_check_require_any_of_12_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="prohibited_transitions", + read_columns=frozenset({"prohibited_transitions"}), ) @@ -4094,7 +4123,7 @@ def _speed_limits_when_vehicle_check_forbid_if_13_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits"}), ) @@ -4114,7 +4143,7 @@ def _speed_limits_when_vehicle_check_require_if_14_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits"}), ) @@ -4134,7 +4163,7 @@ def _speed_limits_when_vehicle_check_require_if_15_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits"}), ) @@ -4154,7 +4183,7 @@ def _speed_limits_when_vehicle_check_require_if_16_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits"}), ) @@ -4172,7 +4201,7 @@ def _speed_limits_when_vehicle_check_require_if_17_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits"}), ) @@ -4198,7 +4227,7 @@ def _speed_limits_when_check_require_any_of_18_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits"}), ) @@ -4214,7 +4243,7 @@ def _speed_limits_check_require_any_of_19_check() -> Check: ), ), shape=CheckShape.ARRAY, - root_field="speed_limits", + read_columns=frozenset({"speed_limits"}), ) @@ -4226,7 +4255,7 @@ def _check_forbid_if_20_check() -> Check: F.col("class"), F.col("subtype") == "water", "subtype = 'water'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"class", "subtype"}), ) @@ -4238,7 +4267,7 @@ def _check_require_if_21_check() -> Check: F.col("class"), F.col("subtype") == "rail", "subtype = 'rail'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"class", "subtype"}), ) @@ -4250,7 +4279,7 @@ def _check_require_if_22_check() -> Check: F.col("class"), F.col("subtype") == "road", "subtype = 'road'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"class", "subtype"}), ) @@ -4262,7 +4291,7 @@ def _check_forbid_if_23_check() -> Check: F.col("destinations"), F.col("subtype") != "road", "subtype != 'road'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"destinations", "subtype"}), ) @@ -4276,7 +4305,7 @@ def _check_forbid_if_24_check() -> Check: "subtype != 'road'", ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"prohibited_transitions", "subtype"}), ) @@ -4288,7 +4317,7 @@ def _check_forbid_if_25_check() -> Check: F.col("road_flags"), F.col("subtype") != "road", "subtype != 'road'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"road_flags", "subtype"}), ) @@ -4300,7 +4329,7 @@ def _check_forbid_if_26_check() -> Check: F.col("road_surface"), F.col("subtype") != "road", "subtype != 'road'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"road_surface", "subtype"}), ) @@ -4312,7 +4341,7 @@ def _check_forbid_if_27_check() -> Check: F.col("speed_limits"), F.col("subtype") != "road", "subtype != 'road'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"speed_limits", "subtype"}), ) @@ -4324,7 +4353,7 @@ def _check_forbid_if_28_check() -> Check: F.col("subclass"), F.col("subtype") != "road", "subtype != 'road'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"subclass", "subtype"}), ) @@ -4336,7 +4365,7 @@ def _check_forbid_if_29_check() -> Check: F.col("width_rules"), F.col("subtype") != "road", "subtype != 'road'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"subtype", "width_rules"}), ) @@ -4348,7 +4377,7 @@ def _check_forbid_if_30_check() -> Check: F.col("rail_flags"), F.col("subtype") != "rail", "subtype != 'rail'" ), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"rail_flags", "subtype"}), ) @@ -4443,6 +4472,8 @@ def segment_checks() -> list[Check]: _names_primary_required_check(), _names_primary_string_min_length_check(), _names_primary_stripped_check(), + _names_common_key_check(), + _names_common_value_check(), _names_rules_value_required_check(), _names_rules_value_string_min_length_check(), _names_rules_value_stripped_check(), @@ -5055,7 +5086,7 @@ def segment_checks() -> list[Check]: PARTITIONS: dict[str, str] = {"theme": "transportation"} -FEATURE_VALIDATION = FeatureValidation( +MODEL_VALIDATION = ModelValidation( schema=SEGMENT_SCHEMA, checks=segment_checks, geometry_types=GEOMETRY_TYPES, diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py index 8376ff5b0..d4f76eb55 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py @@ -6,6 +6,7 @@ from __future__ import annotations +import re from dataclasses import dataclass from pyspark.sql.types import ( @@ -15,6 +16,10 @@ StructType, ) +# First struct (`.`), array (`[]`), or map (`{key}`/`{value}`) step marker +# in an encoded path; everything before it is the top-level column. +_STEP_MARKER = re.compile(r"[.\[{]") + @dataclass(frozen=True) class SchemaMismatch: @@ -34,6 +39,16 @@ class SchemaMismatch: actual: str expected: str + @property + def root(self) -> str: + """Top-level schema column this mismatch belongs to. + + Strips the struct/array/map step markers `_compare` embeds in + `path` (e.g. `sources[].confidence` -> `sources`), leaving the + column name that matches a `Check.read_columns` entry. + """ + return _STEP_MARKER.split(self.path, maxsplit=1)[0] + def _type_name(dt: DataType) -> str: """Short display name for a DataType (e.g. `"StringType"`).""" diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py index 0274d6c18..f6161b4b0 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py @@ -1,7 +1,7 @@ -"""Validation pipeline for Overture feature data. +"""Validation pipeline for registered models. -`validate_feature()` is the primary entry point: it looks up the -feature type in the registry, compares schemas, filters checks, and +`validate_model()` is the primary entry point: it looks up the +model type in the registry, compares schemas, filters checks, and evaluates them in a single pass. Returns a `ValidationResult` carrying the evaluated DataFrame and metadata. @@ -31,13 +31,13 @@ from .schema_check import SchemaMismatch, compare_schemas -def feature_keys() -> list[str]: +def model_keys() -> list[str]: """Canonical entry-point keys registered in the validation registry.""" return sorted(REGISTRY) -def feature_names() -> list[str]: - """All names `validate_feature` accepts. +def model_names() -> list[str]: + """All names `validate_model` accepts. Includes canonical entry-point keys and the snake-case class-name aliases the resolver recognizes (only when an alias is unambiguous). @@ -85,18 +85,61 @@ def _normalize_suppress( # are preserved. _ERR_COLUMN = re.compile(r"^_err_\d+$") +# Working/output columns `explain_errors` introduces beyond `_err_`: +# `_idx`/`_errors` are UNPIVOT scratch, `field`/`check`/`message` are its +# output contract. An input column sharing any of these names produces +# duplicate attributes -> AMBIGUOUS_REFERENCE. +_EXPLAIN_RESERVED = ("_idx", "_errors", "field", "check", "message") + def _non_error_columns(evaluated: DataFrame) -> list[str]: """Column names excluding `_err_N` error columns appended by `evaluate_checks`.""" return [c for c in evaluated.columns if not _ERR_COLUMN.match(c)] +def _reject_reserved_collisions(collisions: Iterable[str], reserved_label: str) -> None: + """Raise if any input column collides with a reserved working/output name. + + Parameters + ---------- + collisions + Input column names that collide with the reserved set. + reserved_label + Human-readable description of the reserved names, completing the + sentence `... collide with {reserved_label}`. + + Raises + ------ + ValueError + If `collisions` is non-empty. The message names the offending + columns and the remediation (rename or drop them). + """ + names = sorted(collisions) + if names: + raise ValueError( + f"input columns {names} collide with {reserved_label}; " + f"rename or drop them before validating" + ) + + def evaluate_checks(df: DataFrame, checks: list[Check]) -> DataFrame: """Append `_err_N` columns for each check. Returns the input DataFrame with one `array` column per check, containing error messages (non-empty) or null/empty (no error). + + Raises + ------ + ValueError + If `df` already contains a `_err_` column. Appending the + working columns would shadow it (duplicate attributes), so the + collision is rejected -- most realistically a persisted + `result.evaluated` fed back through validation. """ + _reject_reserved_collisions( + (c for c in df.columns if _ERR_COLUMN.match(c)), + "the reserved '_err_' columns evaluate_checks appends", + ) error_cols = [] for i, chk in enumerate(checks): if chk.shape == CheckShape.SCALAR: @@ -155,8 +198,18 @@ def explain_errors(evaluated: DataFrame, checks: list[Check]) -> DataFrame: ------- DataFrame Schema: `, field, check, message`. + + Raises + ------ + ValueError + If an original column collides with a working/output name + (`_idx`, `_errors`, `field`, `check`, `message`). """ orig_cols = _non_error_columns(evaluated) + _reject_reserved_collisions( + (c for c in orig_cols if c in _EXPLAIN_RESERVED), + f"explain_errors' working/output columns {list(_EXPLAIN_RESERVED)}", + ) n = len(checks) if n == 0: empty_schema = StructType( @@ -192,7 +245,7 @@ def explain_errors(evaluated: DataFrame, checks: list[Check]) -> DataFrame: @dataclass(frozen=True) class ValidationResult: - """Result of validate_feature(). + """Result of validate_model(). Consumer owns caching of `evaluated`. Call `error_rows()` for the filtered view; use `explain_errors(result.evaluated, @@ -203,6 +256,14 @@ class ValidationResult: checks: list[Check] schema_mismatches: list[SchemaMismatch] suppressed_checks: list[Check] + absent_columns: tuple[str, ...] = () + """Root columns present in the schema but absent from the data and not already skipped. + + Ordered by first appearance in `schema_mismatches`, deduplicated. + Matches the set of root fields whose checks `validate_model` silently + drops; callers use this to suggest `--skip-columns` without re-deriving + it from `schema_mismatches`. + """ def error_rows(self) -> DataFrame: """Rows with at least one violation. Original columns only.""" @@ -234,22 +295,22 @@ def row_counts(self) -> tuple[int, int]: return row["total"], row["errors"] -def validate_feature( +def validate_model( df: DataFrame, - feature_type: str, + model_type: str, *, skip_columns: Iterable[str] = (), ignore_extra_columns: Iterable[str] = (), suppress: Iterable[str | tuple[str, str] | Check] = (), ) -> ValidationResult: - """Validate a DataFrame against a registered feature type. + """Validate a DataFrame against a registered model type. Parameters ---------- df Input DataFrame to validate. - feature_type - Registered feature type name (e.g. `"building"`). + model_type + Registered model type name (e.g. `"building"`). skip_columns Columns declared absent from the data. Raises `ValueError` if any are present in `df.columns`. @@ -265,11 +326,11 @@ def validate_feature( Raises ------ ValueError - If `feature_type` isn't registered. Message includes the + If `model_type` isn't registered. Message includes the sorted list of known types. """ - feature_type = resolve_entry_point_key(feature_type, REGISTRY) - validation = REGISTRY[feature_type] + model_type = resolve_entry_point_key(model_type, REGISTRY) + validation = REGISTRY[model_type] skip = frozenset(skip_columns) ignore_extra = frozenset(ignore_extra_columns) suppress_roots, suppress_pairs = _normalize_suppress(suppress) @@ -287,16 +348,18 @@ def validate_feature( raw_mismatches = compare_schemas(df.schema, validation.schema) mismatches = [] for m in raw_mismatches: - root = m.path.split(".", 1)[0] - if root in skip: + if m.root in skip: continue - if m.expected == "missing" and root in ignore_extra: + if m.expected == "missing" and m.root in ignore_extra: continue mismatches.append(m) - # Validate suppress entries match real checks before filtering + # Validate suppress entries match real checks before filtering. A bare + # suppress string names a column; it is valid when some check reads it, + # which is exactly when suppressing it would drop a check -- the same + # `read_columns` set that drives exclusion below. all_checks = validation.checks() - valid_roots = {c.root_field for c in all_checks if c.root_field is not None} + valid_roots = {col for c in all_checks for col in c.read_columns} valid_pairs = {(c.field, c.name) for c in all_checks} unmatched_roots = suppress_roots - valid_roots unmatched_pairs = suppress_pairs - valid_pairs @@ -307,7 +370,7 @@ def validate_feature( if unmatched_pairs: parts.append(f"unknown (field, name) pairs {sorted(unmatched_pairs)}") raise ValueError( - f"suppress entries don't match any check for {feature_type!r}: " + f"suppress entries don't match any check for {model_type!r}: " + "; ".join(parts) ) @@ -320,23 +383,32 @@ def validate_feature( # mismatch stays in `mismatches` and is reported, so the caller (the # CLI) aborts unless --skip-schema-check. `--skip-columns` opts into # that suppression -- it is not a restatement of the default. - # `Check.root_field` is column-granular, so filtering is all-or-nothing: - # if the data has the `bbox` struct but is missing only `bbox.xmin`, - # every check whose root_field is `bbox` is dropped, including checks on - # sub-fields that are present. Finer granularity would require - # sub-column awareness in Check, which it deliberately lacks. - absent_columns = { - m.path.split(".", 1)[0] for m in mismatches if m.actual == "missing" - } + # Exclusion is column-granular, so filtering is all-or-nothing: if the + # data has the `bbox` struct but is missing only `bbox.xmin`, every check + # rooted at `bbox` is dropped, including checks on sub-fields that are + # present. Finer granularity would require sub-column awareness in Check, + # which it deliberately lacks. `m.root` strips the array/map step markers + # (`sources[].confidence` -> `sources`) so a nested absence still resolves + # to the top-level column. + absent_columns = tuple( + dict.fromkeys(m.root for m in mismatches if m.actual == "missing") + ) + + # Check filtering. A check is dropped when any column it reads is gone -- + # whether skipped or structurally absent -- so an unresolvable `F.col()` + # never reaches Spark. Suppression by column name is the same predicate + # over a different set: suppressing a column is treating it as absent. + excluded = skip | set(absent_columns) + + def _is_excluded(chk: Check) -> bool: + return not excluded.isdisjoint(chk.read_columns) - # Check filtering - excluded = skip | absent_columns kept: list[Check] = [] suppressed: list[Check] = [] for chk in all_checks: - if chk.root_field is not None and chk.root_field in excluded: + if _is_excluded(chk): continue # structurally absent, not tracked in suppressed - if chk.root_field is not None and chk.root_field in suppress_roots: + if not suppress_roots.isdisjoint(chk.read_columns): suppressed.append(chk) continue if (chk.field, chk.name) in suppress_pairs: @@ -350,4 +422,5 @@ def validate_feature( checks=kept, schema_mismatches=mismatches, suppressed_checks=suppressed, + absent_columns=absent_columns, ) diff --git a/packages/overture-schema-pyspark/tests/_support/harness.py b/packages/overture-schema-pyspark/tests/_support/harness.py index b21320bb3..03d7af483 100644 --- a/packages/overture-schema-pyspark/tests/_support/harness.py +++ b/packages/overture-schema-pyspark/tests/_support/harness.py @@ -1,6 +1,6 @@ """Validation harness for generated conformance tests. -Builds a single DataFrame per feature type from scenario mutations, +Builds a single DataFrame per model type from scenario mutations, runs validation once, and indexes violations by scenario ID. """ @@ -23,7 +23,7 @@ # Namespace for `_scenario_id` UUIDs. Distinct from # `overture.schema.codegen.pyspark.test_data.base_row._BASE_ROW_NAMESPACE` -# (which synthesizes feature `id` values) so a feature `id` can never +# (which synthesizes model `id` values) so a model `id` can never # collide with a scenario tag and confuse the violations index. _NAMESPACE = uuid.UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890") @@ -44,16 +44,16 @@ def scenario_uuid(scenario_id: str) -> str: def build_scenario_map( scenarios: Sequence[Scenario], *, - feature_name: str, + model_name: str, ) -> dict[str, str]: """Map _scenario_id values to human-readable scenario IDs. Parameters ---------- scenarios - All scenarios for a feature type. - feature_name - Feature name for the baseline row ID. + All scenarios for a model type. + model_name + Model name for the baseline row ID. Returns ------- @@ -65,7 +65,7 @@ def build_scenario_map( ValueError If two scenarios would produce the same UUID key. """ - baseline_id = f"{feature_name}::baseline" + baseline_id = f"{model_name}::baseline" scenario_map: dict[str, str] = {scenario_uuid(baseline_id): baseline_id} for s in scenarios: @@ -85,7 +85,7 @@ def build_scenario_rows( base_row: dict[str, Any], scenarios: Sequence[Scenario], *, - feature_name: str, + model_name: str, ) -> tuple[list[dict[str, Any]], dict[str, str], dict[str, str]]: """Build mutation rows and scenario mapping from scenarios. @@ -95,8 +95,8 @@ def build_scenario_rows( Valid base row dict from the example loader. scenarios Scenarios to apply. - feature_name - Feature name for baseline ID and UUID namespace. + model_name + Model name for baseline ID and UUID namespace. Returns ------- @@ -105,14 +105,14 @@ def build_scenario_rows( scenario_map maps _scenario_id values to scenario IDs, and skipped maps scenario IDs to skip reasons. """ - scenario_map = build_scenario_map(scenarios, feature_name=feature_name) + scenario_map = build_scenario_map(scenarios, model_name=model_name) base_row = sanitize_row(base_row) # Deep-copy every row so nested structures aren't aliased with base_row; # a future in-place mutation of one row would otherwise leak across rows. rows: list[dict[str, Any]] = [ { **copy.deepcopy(base_row), - "_scenario_id": scenario_uuid(f"{feature_name}::baseline"), + "_scenario_id": scenario_uuid(f"{model_name}::baseline"), } ] skipped: dict[str, str] = {} @@ -178,21 +178,21 @@ def _sanitize_in_place(d: dict[str, Any]) -> dict[str, Any]: def assert_schema_covers_checks(schema: StructType, checks: list[Check]) -> None: - """Assert every check's root field exists in the schema. + """Assert every column a check reads exists in the schema. - Synthetic model-level checks (`root_field=None`) pass - unconditionally. Otherwise the root must be a top-level schema - column. This is a fast sanity check; deeper field paths are the - codegen's responsibility and surface at Spark execution time. + Covers field and model-level checks alike: each top-level column in a + check's `read_columns` must be a schema column. This is a fast sanity + check; deeper field paths are the codegen's responsibility and surface + at Spark execution time. """ top_level = {f.name for f in schema.fields} for chk in checks: - if chk.root_field is None or chk.root_field in top_level: - continue - raise AssertionError( - f"Check references root field {chk.root_field!r} " - f"not found in schema. Available: {sorted(top_level)}" - ) + missing = chk.read_columns - top_level + if missing: + raise AssertionError( + f"Check reads columns {sorted(missing)} " + f"not found in schema. Available: {sorted(top_level)}" + ) def run_validation_pipeline( @@ -201,7 +201,7 @@ def run_validation_pipeline( checks: list[Check], base_row: dict[str, Any], scenarios: Sequence[Scenario], - feature_name: str, + model_name: str, ) -> ValidationResults: """Run the full validation pipeline. @@ -211,15 +211,16 @@ def run_validation_pipeline( """ assert_schema_covers_checks(schema, checks) rows, scenario_map, skipped = build_scenario_rows( - base_row, scenarios, feature_name=feature_name + base_row, scenarios, model_name=model_name ) augmented_schema = StructType( schema.fields + [StructField("_scenario_id", StringType(), True)] ) df = spark.createDataFrame(rows, schema=augmented_schema, verifySchema=False) # type: ignore[union-attr] violations = explain_errors(evaluate_checks(df, checks), checks) + indexed = violations.select("_scenario_id", "field", "check") return ValidationResults( - violations=index_violations(violations.collect(), scenario_map), + violations=index_violations(indexed.collect(), scenario_map), skipped=skipped, ) diff --git a/packages/overture-schema-pyspark/tests/_support/helpers.py b/packages/overture-schema-pyspark/tests/_support/helpers.py index 2551e4b8b..3905f8cea 100644 --- a/packages/overture-schema-pyspark/tests/_support/helpers.py +++ b/packages/overture-schema-pyspark/tests/_support/helpers.py @@ -31,11 +31,11 @@ def deep_merge(base: dict, scaffold: dict) -> dict: class PathTraversalError(Exception): - """Raised when set_at_path cannot traverse a path in the row dict.""" + """Raised when path traversal cannot proceed.""" def _scaffold_struct(target: dict, name: str) -> dict: - """Return target[name] as a dict, scaffolding `{}` when missing or None.""" + """Return `target[name]` as a dict, scaffolding `{}` when missing or None.""" child = target.get(name) if isinstance(target, dict) else None if child is None: child = {} @@ -44,7 +44,7 @@ def _scaffold_struct(target: dict, name: str) -> dict: def _scaffold_array(target: dict, name: str, path: FieldPath | str) -> list: - """Return target[name] as a list, scaffolding `[{}]` when None. + """Return `target[name]` as a list, scaffolding `[{}]` when None. Empty arrays raise — there is no element to mutate. """ @@ -64,10 +64,10 @@ def _scaffold_array(target: dict, name: str, path: FieldPath | str) -> list: def _descend_through_array( segment: ArraySegment, target: dict, path: FieldPath | str ) -> list: - """Enter an array segment and walk through its iter_count. + """Enter an array segment and walk through its `iter_count`. Scaffolds `[{}]` at the outer level when None; deeper levels - (`iter_count > 1`) must already be lists -- scaffolding into + (`iter_count > 1`) must already be lists — scaffolding into nested-list shapes isn't supported because no current schema needs it. diff --git a/packages/overture-schema-pyspark/tests/_support/mutations.py b/packages/overture-schema-pyspark/tests/_support/mutations.py index 4ed3466e3..0e0a12299 100644 --- a/packages/overture-schema-pyspark/tests/_support/mutations.py +++ b/packages/overture-schema-pyspark/tests/_support/mutations.py @@ -12,10 +12,9 @@ from typing import Any from overture.schema.system.field_path import ( - ArrayPath, ArraySegment, FieldPath, - PathSegment, + FieldSegment, ScalarPath, coerce, ) @@ -190,7 +189,7 @@ def mutate_unique_items(row_dict: dict, path: FieldPath | str) -> dict: result = copy.deepcopy(row_dict) segments = coerce(path).segments - parent: Any = _walk_strict(result, segments[:-1], path) + parent: Any = _walk_strict(result, path, segments[:-1]) last = segments[-1] if not isinstance(parent, dict) or last.name not in parent: raise PathTraversalError(f"Missing key '{last.name}' in path '{path}'") @@ -198,13 +197,15 @@ def mutate_unique_items(row_dict: dict, path: FieldPath | str) -> dict: # When the terminal is an array segment, descend `iter_count` levels of # `[0]`. Otherwise the terminal struct already references the list to # mutate. The final `container[key]` must itself be a list. - container: Any = parent - key: int | str = last.name - iter_count = last.iter_count if isinstance(last, ArraySegment) else 0 - for depth in range(iter_count): - inner = container[key] - _require_non_empty_array(inner, f"{last.name}{'[]' * depth}", path) - container, key = inner, 0 + container: Any + key: int | str + if isinstance(last, ArraySegment): + container, key = _descend_iter_count( + parent[last.name], last.iter_count, last.name, path + ) + else: + container = parent + key = last.name arr = container[key] if not isinstance(arr, list): raise PathTraversalError( @@ -214,16 +215,56 @@ def mutate_unique_items(row_dict: dict, path: FieldPath | str) -> dict: return result +def mutate_map_key(row_dict: dict, path: FieldPath | str, bad_key: object) -> dict: + """Replace the single map entry's key with *bad_key*, preserving its value. + + *path* is the struct-field path to the map column (e.g. `"names.common"`). + The base row / scaffold populates the map with one valid entry, so a + one-entry replacement suffices to trigger the key check. Raises + `PathTraversalError` when the map is missing or empty. + """ + _key, value = _single_map_entry(row_dict, path) + return _replace_map(row_dict, path, {bad_key: value}) + + +def mutate_map_value(row_dict: dict, path: FieldPath | str, bad_value: object) -> dict: + """Replace the single map entry's value with *bad_value*, preserving its key. + + Mirror of `mutate_map_key` for the value side. Raises + `PathTraversalError` when the map is missing or empty. + """ + key, _value = _single_map_entry(row_dict, path) + return _replace_map(row_dict, path, {key: bad_value}) + + +def _single_map_entry(row_dict: dict, path: FieldPath | str) -> tuple[Any, Any]: + """Return the `(key, value)` of the sole entry of the map at *path*.""" + m = _get_nested(row_dict, path) + if not isinstance(m, dict) or not m: + raise PathTraversalError(f"Missing or empty map at path '{path}'") + return next(iter(m.items())) + + +def _replace_map(row_dict: dict, path: FieldPath | str, new_map: dict) -> dict: + """Return a deep copy of *row_dict* with the map at *path* replaced.""" + result = copy.deepcopy(row_dict) + _set_nested(result, path, new_map) + return result + + def _walk_strict( - target: Any, segments: tuple[PathSegment, ...], path: FieldPath | str + target: Any, path: FieldPath | str, segments: tuple[FieldSegment, ...] | None = None ) -> Any: - """Walk segments without scaffolding. + """Walk *path* without scaffolding, raising on missing or null nodes. Raises `PathTraversalError` on missing or null struct intermediates, - and on empty arrays encountered at array intermediates (each `[]` in - a segment's `iter_count` descends one element, which requires a - non-empty list). + and on empty arrays encountered at array segments (each `[]` in a + segment's `iter_count` descends one element, which requires a + non-empty list). When *segments* is provided it overrides the + segments derived from *path*; *path* still labels error messages. """ + if segments is None: + segments = coerce(path).segments for segment in segments: if not isinstance(target, dict) or target.get(segment.name) is None: raise PathTraversalError( @@ -231,14 +272,34 @@ def _walk_strict( ) target = target[segment.name] if isinstance(segment, ArraySegment): - for _ in range(segment.iter_count): - _require_non_empty_array(target, segment.name, path) - target = target[0] + container, key = _descend_iter_count( + target, segment.iter_count, segment.name, path + ) + target = container[key] return target +def _descend_iter_count( + arr: list, iter_count: int, name: str, path: FieldPath | str +) -> tuple[Any, int]: + """Descend *iter_count* levels into nested lists via element 0. + + Each level requires a non-empty list; the error label for depth `d` + is *name* followed by `d` `[]` markers. Returns the final + `(container, key)` write site so callers can read (`container[key]`) + or replace (`container[key] = ...`) the innermost element. + """ + container: Any = [arr] + key = 0 + for depth in range(iter_count): + inner = container[key] + _require_non_empty_array(inner, f"{name}{'[]' * depth}", path) + container, key = inner, 0 + return container, key + + def _require_non_empty_array(value: Any, name: str, path: FieldPath | str) -> None: - """Raise PathTraversalError unless *value* is a non-empty list.""" + """Raise `PathTraversalError` unless *value* is a non-empty list.""" if not isinstance(value, list) or len(value) == 0: raise PathTraversalError(f"Empty or missing array at '{name}' in path '{path}'") @@ -339,15 +400,15 @@ def _ensure_condition( def _as_scalar_path(path: FieldPath | str) -> ScalarPath: - """Coerce *path* to a ScalarPath, rejecting any array markers. + """Coerce *path* to a `ScalarPath`, rejecting array or map markers. - The dict-walking helpers operate only on struct fields; an array - marker indicates the caller wanted array-aware navigation and picked - the wrong helper. + The dict-walking helpers operate only on struct fields; an array or + map-projection marker indicates the caller wanted array-/map-aware + navigation and picked the wrong helper. """ coerced = coerce(path) - if isinstance(coerced, ArrayPath): - raise ValueError(f"struct-only path expected, got array segment in {path!r}") + if not isinstance(coerced, ScalarPath): + raise ValueError(f"struct-only path expected, got {coerced!r} for {path!r}") return coerced @@ -370,7 +431,7 @@ def _set_nested( if child is None: if value is None: return - raise TypeError(f"None intermediate at '{part}' in path '{path}'") + raise PathTraversalError(f"None intermediate at '{part}' in path '{path}'") target = child target[segments[-1].name] = value @@ -378,7 +439,7 @@ def _set_nested( def _get_nested(d: dict, path: FieldPath | str) -> object: """Get a value from a nested dict using a struct-field path. - Returns None when any intermediate key is missing. + Returns None when any intermediate key is missing or not a dict. """ target: object = d for segment in _as_scalar_path(path).segments: diff --git a/packages/overture-schema-pyspark/tests/_support/registry.py b/packages/overture-schema-pyspark/tests/_support/registry.py new file mode 100644 index 000000000..e6c917539 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/_support/registry.py @@ -0,0 +1,43 @@ +"""Registry registration helper for tests. + +Provides a context manager that registers a model type in the runtime +`REGISTRY` and guarantees teardown on exit, even if the test body raises. +""" + +from collections.abc import Callable, Iterator +from contextlib import contextmanager + +from overture.schema.pyspark._registry import REGISTRY +from overture.schema.pyspark.check import Check, ModelValidation +from pyspark.sql.types import StructType + + +@contextmanager +def register_model( + model_type: str, + schema: StructType, + checks: Callable[[], list[Check]], +) -> Iterator[None]: + """Register a model type in `REGISTRY` for the duration of a test. + + Guarantees `del REGISTRY[model_type]` on exit so a failed test body + never leaks an entry into sibling tests. + + Parameters + ---------- + model_type + The registry key string (e.g. `"_test_cli"`). + schema + StructType to associate with the model type. + checks + Callable returning the list of `Check` objects for the model type. + + Yields + ------ + None + """ + REGISTRY[model_type] = ModelValidation(schema=schema, checks=checks) + try: + yield + finally: + del REGISTRY[model_type] diff --git a/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py b/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py index 6720da35e..4082273ae 100644 --- a/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py +++ b/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py @@ -5,8 +5,14 @@ check_struct_unique, coalesce_errors, error_msg, + map_keys_check, + map_values_check, nested_array_check, ) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_require_any_of, + check_string_min_length, +) from pyspark.sql import Row, SparkSession from pyspark.sql import functions as F @@ -244,6 +250,132 @@ def test_nested_array_check_no_errors(spark: SparkSession) -> None: assert result[0]["errs"] == [] +def test_map_keys_check_flags_bad_key(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(tags={"good": "v", "bad": "v"})], + schema="tags map", + ) + result = df.select( + map_keys_check("tags", lambda k: F.when(k == "bad", F.lit("bad key"))).alias( + "errs" + ) + ).collect() + assert result[0]["errs"] == ["bad key"] + + +def test_map_values_check_flags_bad_value(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(tags={"a": "ok", "b": "bad"})], + schema="tags map", + ) + result = df.select( + map_values_check( + "tags", lambda v: F.when(v == "bad", F.lit("bad value")) + ).alias("errs") + ).collect() + assert result[0]["errs"] == ["bad value"] + + +def test_map_values_check_descends_into_value_struct_field( + spark: SparkSession, +) -> None: + # A field check on a `dict[str, Model]` value navigates into the value + # struct: map_values_check over a struct-navigating lambda, the exact + # composition the renderer emits for a map-of-model value field. + df = spark.createDataFrame( + [Row(items={"a": Row(label="")})], + schema="items map>", + ) + result = df.select( + map_values_check( + "items", lambda v: check_string_min_length(v["label"], 1) + ).alias("errs") + ).collect() + assert result[0]["errs"] == ["minimum length 1, got 0"] + + +def test_map_values_check_passes_valid_value_struct_field( + spark: SparkSession, +) -> None: + df = spark.createDataFrame( + [Row(items={"a": Row(label="ok")})], + schema="items map>", + ) + result = df.select( + map_values_check( + "items", lambda v: check_string_min_length(v["label"], 1) + ).alias("errs") + ).collect() + assert result[0]["errs"] == [] + + +def test_map_values_check_enforces_value_model_constraint( + spark: SparkSession, +) -> None: + # A model-level constraint on a `dict[str, Model]` value: map_values_check + # wrapping check_require_any_of over the value struct's fields, the exact + # composition the renderer emits for a map-of-model value-model constraint. + df = spark.createDataFrame( + [Row(subs={"a": Row(foo=None, bar=None)})], + schema="subs map>", + ) + result = df.select( + map_values_check( + "subs", + lambda v: check_require_any_of([v["foo"], v["bar"]], ["foo", "bar"]), + ).alias("errs") + ).collect() + assert result[0]["errs"] == ["requires at least one of foo, bar"] + + +def test_map_values_check_passes_satisfied_value_model_constraint( + spark: SparkSession, +) -> None: + df = spark.createDataFrame( + [Row(subs={"a": Row(foo=1, bar=None)})], + schema="subs map>", + ) + result = df.select( + map_values_check( + "subs", + lambda v: check_require_any_of([v["foo"], v["bar"]], ["foo", "bar"]), + ).alias("errs") + ).collect() + assert result[0]["errs"] == [] + + +def test_map_keys_check_null_column_returns_null(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(tags=None)], schema="tags map") + result = df.select( + map_keys_check("tags", lambda k: F.lit("err")).alias("errs") + ).collect() + assert result[0]["errs"] is None + + +def test_map_values_check_all_valid_empty(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(tags={"a": "ok"})], schema="tags map" + ) + result = df.select( + map_values_check("tags", lambda v: F.when(v == "bad", F.lit("err"))).alias( + "errs" + ) + ).collect() + assert result[0]["errs"] == [] + + +def test_map_keys_check_accepts_column(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(tags={"bad": "v"})], schema="tags map" + ) + result = df.select( + map_keys_check(F.col("tags"), lambda k: F.when(k == "bad", F.lit("err"))).alias( + "errs" + ) + ).collect() + assert result[0]["errs"] == ["err"] + + def test_coalesce_errors_null_becomes_empty(spark: SparkSession) -> None: df = spark.createDataFrame([Row(x=1)]) result = df.select( diff --git a/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py b/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py index 6a4289e35..831dc8a8e 100644 --- a/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py +++ b/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py @@ -68,6 +68,46 @@ def test_check_bounds_null_passthrough(spark: SparkSession) -> None: assert result[0]["err"] is None +def test_check_bounds_nan_lower_bound_violation(spark: SparkSession) -> None: + """NaN satisfies no Pydantic bound, but Spark sorts NaN above all values, + so a lower bound (NaN < v) never fires. check_bounds must reject it.""" + df = spark.createDataFrame([Row(val=float("nan"))], schema="val double") + result = df.select(check_bounds(F.col("val"), ge=0).alias("err")).collect() + assert result[0]["err"] is not None + assert "NaN" in result[0]["err"] + + +def test_check_bounds_nan_gt_violation(spark: SparkSession) -> None: + """Same lower-bound leak as ge, via the strict-greater comparison.""" + df = spark.createDataFrame([Row(val=float("nan"))], schema="val double") + result = df.select(check_bounds(F.col("val"), gt=0).alias("err")).collect() + assert result[0]["err"] is not None + assert "NaN" in result[0]["err"] + + +def test_check_bounds_nan_upper_bound_violation(spark: SparkSession) -> None: + """An upper bound already rejects NaN in Spark (NaN > v is true); the + explicit NaN check keeps that behavior.""" + df = spark.createDataFrame([Row(val=float("nan"))], schema="val double") + result = df.select(check_bounds(F.col("val"), le=1).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_bounds_nan_no_bounds_passes(spark: SparkSession) -> None: + """With no bounds there is nothing to violate; NaN passes, matching + Pydantic's allow_inf_nan default for unconstrained floats.""" + df = spark.createDataFrame([Row(val=float("nan"))], schema="val double") + result = df.select(check_bounds(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bounds_valid_float_passes(spark: SparkSession) -> None: + """A finite in-range float is unaffected by the NaN guard.""" + df = spark.createDataFrame([Row(val=0.5)], schema="val double") + result = df.select(check_bounds(F.col("val"), ge=0, le=1).alias("err")).collect() + assert result[0]["err"] is None + + def test_check_enum_valid(spark: SparkSession) -> None: df = spark.createDataFrame([Row(val="road")]) result = df.select( @@ -501,6 +541,64 @@ def test_big_endian_wkb(self, spark: SparkSession) -> None: ).collect() assert result[0]["err"] is None + def test_iso_wkb_z_point_accepted(self, spark: SparkSession) -> None: + """ISO WKB encodes Z by offsetting the type (PointZ=1001), shifting + the low byte to 0xE9. GeoParquet mandates ISO WKB, so 3D geometries + reach the check this way and must still validate by base type.""" + iso_point_z = struct.pack(" None: + iso_point_z_be = struct.pack(">bIddd", 0, 1001, 0.0, 0.0, 5.0) + df = spark.createDataFrame( + [Row(geometry=bytearray(iso_point_z_be))], schema="geometry binary" + ) + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_ewkb_z_point_accepted(self, spark: SparkSession) -> None: + """EWKB encodes Z as a high flag bit (0x80000001), leaving the low + byte at 0x01 -- shapely's `.wkb` default. Must keep validating.""" + ewkb_point_z = struct.pack(" None: + """shapely's native 3D WKB output validates as POINT.""" + df = spark.createDataFrame( + [Row(geometry=bytearray(Point(0, 0, 5).wkb))], schema="geometry binary" + ) + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_iso_wkb_z_wrong_type_rejected(self, spark: SparkSession) -> None: + """A 3D LineString (ISO 1002) is still rejected when POINT is expected + -- normalization strips the dimension offset, not the base type.""" + iso_linestring_z = struct.pack(" None: @@ -943,6 +1041,19 @@ def test_check_url_format_https_valid(spark: SparkSession) -> None: assert result[0]["err"] is None +def test_check_url_format_uppercase_scheme_valid(spark: SparkSession) -> None: + """Pydantic HttpUrl lowercases the scheme, so HTTP:// is accepted.""" + df = spark.createDataFrame([Row(val="HTTP://example.com")]) + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_url_format_mixed_case_scheme_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="HtTpS://example.com/path")]) + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_check_url_format_no_scheme_invalid(spark: SparkSession) -> None: df = spark.createDataFrame([Row(val="example.com")]) result = df.select(check_url_format(F.col("val")).alias("err")).collect() diff --git a/packages/overture-schema-pyspark/tests/expressions/test_schema_check.py b/packages/overture-schema-pyspark/tests/expressions/test_schema_check.py index 937b8862d..ddd756a00 100644 --- a/packages/overture-schema-pyspark/tests/expressions/test_schema_check.py +++ b/packages/overture-schema-pyspark/tests/expressions/test_schema_check.py @@ -266,3 +266,33 @@ def test_array_vs_primitive(self) -> None: ) result = compare_schemas(actual, expected) assert result == [SchemaMismatch("x", "StringType", "ArrayType")] + + +class TestSchemaMismatchRoot: + """`root` strips the step markers `_compare` embeds in `path`. + + The top-level column a mismatch belongs to is everything before the + first struct (`.`), array (`[]`), or map (`{key}`/`{value}`) step, so + it matches the column-granular `Check.read_columns`. + """ + + def test_top_level(self) -> None: + assert SchemaMismatch("theme", "missing", "StringType").root == "theme" + + def test_struct_field(self) -> None: + assert SchemaMismatch("bbox.xmin", "missing", "DoubleType").root == "bbox" + + def test_array_element_field(self) -> None: + assert ( + SchemaMismatch("sources[].confidence", "missing", "DoubleType").root + == "sources" + ) + + def test_array_element(self) -> None: + assert SchemaMismatch("tags[]", "IntegerType", "StringType").root == "tags" + + def test_map_key(self) -> None: + assert SchemaMismatch("tags{key}", "IntegerType", "StringType").root == "tags" + + def test_map_value(self) -> None: + assert SchemaMismatch("tags{value}", "IntegerType", "StringType").root == "tags" diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py index 3c5b66709..2a2ac1619 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py @@ -209,25 +209,25 @@ expected_check="stripped", ), Scenario( - id="address::sources[].confidence:bounds", + id="address::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="address::sources[].confidence:bounds_1", + id="address::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -391,7 +391,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="address", + model_name="address", ) @@ -403,7 +403,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="address", + model_name="address", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py index 168630de6..887a8899c 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py @@ -16,6 +16,7 @@ run_validation_pipeline, ) from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_map_key, mutate_map_value from ....._support.scenarios import Scenario BASE_ROW_SPARSE: dict = { @@ -33,7 +34,7 @@ "coverage_bbox": [0.0, 0.0, 0.0, 0.0], } ], - "license_priority": {}, + "license_priority": {"ODbL-1.0": 0}, } @@ -69,7 +70,7 @@ "requires_attribution": "", } ], - "license_priority": {}, + "license_priority": {"ODbL-1.0": 0}, } @@ -756,6 +757,20 @@ expected_field="license_priority", expected_check="required", ), + Scenario( + id="sources::license_priority{key}:pattern", + scaffold={}, + mutate=lambda row: mutate_map_key(row, "license_priority", "bad license!"), + expected_field="license_priority{key}", + expected_check="pattern", + ), + Scenario( + id="sources::license_priority{value}:bounds", + scaffold={}, + mutate=lambda row: mutate_map_value(row, "license_priority", -1), + expected_field="license_priority{value}", + expected_check="bounds", + ), ] @@ -772,7 +787,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="sources", + model_name="sources", ) @@ -784,7 +799,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="sources", + model_name="sources", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py index 9fdc1679d..f5e638217 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py @@ -204,25 +204,25 @@ expected_check="stripped", ), Scenario( - id="bathymetry::sources[].confidence:bounds", + id="bathymetry::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="bathymetry::sources[].confidence:bounds_1", + id="bathymetry::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -273,45 +273,45 @@ expected_check="bounds", ), Scenario( - id="bathymetry::cartography.prominence:bounds", + id="bathymetry::cartography.prominence_0:bounds", scaffold={"cartography": {"prominence": 1}}, mutate=set_at_path("cartography.prominence", 0), - expected_field="cartography.prominence", + expected_field="cartography.prominence_0", expected_check="bounds", ), Scenario( - id="bathymetry::cartography.prominence:bounds_1", + id="bathymetry::cartography.prominence_1:bounds", scaffold={"cartography": {"prominence": 1}}, mutate=set_at_path("cartography.prominence", 101), - expected_field="cartography.prominence", + expected_field="cartography.prominence_1", expected_check="bounds", ), Scenario( - id="bathymetry::cartography.min_zoom:bounds", + id="bathymetry::cartography.min_zoom_0:bounds", scaffold={"cartography": {"min_zoom": 0}}, mutate=set_at_path("cartography.min_zoom", -1), - expected_field="cartography.min_zoom", + expected_field="cartography.min_zoom_0", expected_check="bounds", ), Scenario( - id="bathymetry::cartography.min_zoom:bounds_1", + id="bathymetry::cartography.min_zoom_1:bounds", scaffold={"cartography": {"min_zoom": 0}}, mutate=set_at_path("cartography.min_zoom", 24), - expected_field="cartography.min_zoom", + expected_field="cartography.min_zoom_1", expected_check="bounds", ), Scenario( - id="bathymetry::cartography.max_zoom:bounds", + id="bathymetry::cartography.max_zoom_0:bounds", scaffold={"cartography": {"max_zoom": 0}}, mutate=set_at_path("cartography.max_zoom", -1), - expected_field="cartography.max_zoom", + expected_field="cartography.max_zoom_0", expected_check="bounds", ), Scenario( - id="bathymetry::cartography.max_zoom:bounds_1", + id="bathymetry::cartography.max_zoom_1:bounds", scaffold={"cartography": {"max_zoom": 0}}, mutate=set_at_path("cartography.max_zoom", 24), - expected_field="cartography.max_zoom", + expected_field="cartography.max_zoom_1", expected_check="bounds", ), ] @@ -330,7 +330,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="bathymetry", + model_name="bathymetry", ) @@ -342,7 +342,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="bathymetry", + model_name="bathymetry", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py index 33839d9ee..fa4a9c055 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py @@ -16,7 +16,11 @@ run_validation_pipeline, ) from ....._support.helpers import set_at_path -from ....._support.mutations import mutate_unique_items +from ....._support.mutations import ( + mutate_map_key, + mutate_map_value, + mutate_unique_items, +) from ....._support.scenarios import Scenario BASE_ROW_SPARSE: dict = { @@ -54,7 +58,7 @@ "surface": "asphalt", "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -224,25 +228,25 @@ expected_check="stripped", ), Scenario( - id="infrastructure::sources[].confidence:bounds", + id="infrastructure::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="infrastructure::sources[].confidence:bounds_1", + id="infrastructure::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -341,6 +345,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="infrastructure::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="infrastructure::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="infrastructure::names.rules[].value:required", scaffold={ @@ -579,7 +597,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="infrastructure", + model_name="infrastructure", ) @@ -591,7 +609,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="infrastructure", + model_name="infrastructure", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py index 2784a5806..45d14b4be 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py @@ -16,7 +16,11 @@ run_validation_pipeline, ) from ....._support.helpers import set_at_path -from ....._support.mutations import mutate_unique_items +from ....._support.mutations import ( + mutate_map_key, + mutate_map_value, + mutate_unique_items, +) from ....._support.scenarios import Scenario BASE_ROW_SPARSE: dict = { @@ -52,7 +56,7 @@ "surface": "asphalt", "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -222,25 +226,25 @@ expected_check="stripped", ), Scenario( - id="land::sources[].confidence:bounds", + id="land::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="land::sources[].confidence:bounds_1", + id="land::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -325,6 +329,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="land::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="land::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="land::names.rules[].value:required", scaffold={ @@ -563,7 +581,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="land", + model_name="land", ) @@ -575,7 +593,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="land", + model_name="land", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py index 962268634..4f892f5a1 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py @@ -204,25 +204,25 @@ expected_check="stripped", ), Scenario( - id="land_cover::sources[].confidence:bounds", + id="land_cover::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="land_cover::sources[].confidence:bounds_1", + id="land_cover::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -273,45 +273,45 @@ expected_check="enum", ), Scenario( - id="land_cover::cartography.prominence:bounds", + id="land_cover::cartography.prominence_0:bounds", scaffold={"cartography": {"prominence": 1}}, mutate=set_at_path("cartography.prominence", 0), - expected_field="cartography.prominence", + expected_field="cartography.prominence_0", expected_check="bounds", ), Scenario( - id="land_cover::cartography.prominence:bounds_1", + id="land_cover::cartography.prominence_1:bounds", scaffold={"cartography": {"prominence": 1}}, mutate=set_at_path("cartography.prominence", 101), - expected_field="cartography.prominence", + expected_field="cartography.prominence_1", expected_check="bounds", ), Scenario( - id="land_cover::cartography.min_zoom:bounds", + id="land_cover::cartography.min_zoom_0:bounds", scaffold={"cartography": {"min_zoom": 0}}, mutate=set_at_path("cartography.min_zoom", -1), - expected_field="cartography.min_zoom", + expected_field="cartography.min_zoom_0", expected_check="bounds", ), Scenario( - id="land_cover::cartography.min_zoom:bounds_1", + id="land_cover::cartography.min_zoom_1:bounds", scaffold={"cartography": {"min_zoom": 0}}, mutate=set_at_path("cartography.min_zoom", 24), - expected_field="cartography.min_zoom", + expected_field="cartography.min_zoom_1", expected_check="bounds", ), Scenario( - id="land_cover::cartography.max_zoom:bounds", + id="land_cover::cartography.max_zoom_0:bounds", scaffold={"cartography": {"max_zoom": 0}}, mutate=set_at_path("cartography.max_zoom", -1), - expected_field="cartography.max_zoom", + expected_field="cartography.max_zoom_0", expected_check="bounds", ), Scenario( - id="land_cover::cartography.max_zoom:bounds_1", + id="land_cover::cartography.max_zoom_1:bounds", scaffold={"cartography": {"max_zoom": 0}}, mutate=set_at_path("cartography.max_zoom", 24), - expected_field="cartography.max_zoom", + expected_field="cartography.max_zoom_1", expected_check="bounds", ), ] @@ -330,7 +330,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="land_cover", + model_name="land_cover", ) @@ -342,7 +342,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="land_cover", + model_name="land_cover", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py index 19bd140a9..a2a3b12f6 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py @@ -16,7 +16,11 @@ run_validation_pipeline, ) from ....._support.helpers import set_at_path -from ....._support.mutations import mutate_unique_items +from ....._support.mutations import ( + mutate_map_key, + mutate_map_value, + mutate_unique_items, +) from ....._support.scenarios import Scenario BASE_ROW_SPARSE: dict = { @@ -54,7 +58,7 @@ "surface": "asphalt", "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -224,25 +228,25 @@ expected_check="stripped", ), Scenario( - id="land_use::sources[].confidence:bounds", + id="land_use::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="land_use::sources[].confidence:bounds_1", + id="land_use::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -341,6 +345,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="land_use::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="land_use::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="land_use::names.rules[].value:required", scaffold={ @@ -579,7 +597,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="land_use", + model_name="land_use", ) @@ -591,7 +609,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="land_use", + model_name="land_use", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py index 9a1c9d57e..b5e2fc047 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py @@ -16,7 +16,11 @@ run_validation_pipeline, ) from ....._support.helpers import set_at_path -from ....._support.mutations import mutate_unique_items +from ....._support.mutations import ( + mutate_map_key, + mutate_map_value, + mutate_unique_items, +) from ....._support.scenarios import Scenario BASE_ROW_SPARSE: dict = { @@ -53,7 +57,7 @@ "level": 0, "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -222,25 +226,25 @@ expected_check="stripped", ), Scenario( - id="water::sources[].confidence:bounds", + id="water::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="water::sources[].confidence:bounds_1", + id="water::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -311,6 +315,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="water::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="water::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="water::names.rules[].value:required", scaffold={ @@ -549,7 +567,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="water", + model_name="water", ) @@ -561,7 +579,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="water", + model_name="water", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py index 9102f235c..5815c9012 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py @@ -16,7 +16,11 @@ run_validation_pipeline, ) from ....._support.helpers import set_at_path -from ....._support.mutations import mutate_unique_items +from ....._support.mutations import ( + mutate_map_key, + mutate_map_value, + mutate_unique_items, +) from ....._support.scenarios import Scenario BASE_ROW_SPARSE: dict = { @@ -51,7 +55,7 @@ "has_parts": False, "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -233,25 +237,25 @@ expected_check="stripped", ), Scenario( - id="building::sources[].confidence:bounds", + id="building::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="building::sources[].confidence:bounds_1", + id="building::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -322,6 +326,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="building::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="building::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="building::names.rules[].value:required", scaffold={ @@ -594,17 +612,17 @@ expected_check="enum", ), Scenario( - id="building::roof_direction:bounds", + id="building::roof_direction_0:bounds", scaffold={"roof_direction": 0.0}, mutate=set_at_path("roof_direction", -1.0), - expected_field="roof_direction", + expected_field="roof_direction_0", expected_check="bounds", ), Scenario( - id="building::roof_direction:bounds_1", + id="building::roof_direction_1:bounds", scaffold={"roof_direction": 0.0}, mutate=set_at_path("roof_direction", 360.0), - expected_field="roof_direction", + expected_field="roof_direction_1", expected_check="bounds", ), Scenario( @@ -637,7 +655,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="building", + model_name="building", ) @@ -649,7 +667,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="building", + model_name="building", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py index 45589511f..9daf3c2bc 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py @@ -16,7 +16,11 @@ run_validation_pipeline, ) from ....._support.helpers import set_at_path -from ....._support.mutations import mutate_unique_items +from ....._support.mutations import ( + mutate_map_key, + mutate_map_value, + mutate_unique_items, +) from ....._support.scenarios import Scenario BASE_ROW_SPARSE: dict = { @@ -50,7 +54,7 @@ "building_id": "a", "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -232,25 +236,25 @@ expected_check="stripped", ), Scenario( - id="building_part::sources[].confidence:bounds", + id="building_part::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="building_part::sources[].confidence:bounds_1", + id="building_part::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -328,6 +332,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="building_part::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="building_part::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="building_part::names.rules[].value:required", scaffold={ @@ -600,17 +618,17 @@ expected_check="enum", ), Scenario( - id="building_part::roof_direction:bounds", + id="building_part::roof_direction_0:bounds", scaffold={"roof_direction": 0.0}, mutate=set_at_path("roof_direction", -1.0), - expected_field="roof_direction", + expected_field="roof_direction_0", expected_check="bounds", ), Scenario( - id="building_part::roof_direction:bounds_1", + id="building_part::roof_direction_1:bounds", scaffold={"roof_direction": 0.0}, mutate=set_at_path("roof_direction", 360.0), - expected_field="roof_direction", + expected_field="roof_direction_1", expected_check="bounds", ), Scenario( @@ -643,7 +661,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="building_part", + model_name="building_part", ) @@ -655,7 +673,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="building_part", + model_name="building_part", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py index 0c16e74d7..128455a44 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py @@ -18,6 +18,8 @@ from ....._support.helpers import set_at_path from ....._support.mutations import ( mutate_forbid_if, + mutate_map_key, + mutate_map_value, mutate_require_if, mutate_unique_items, ) @@ -41,7 +43,7 @@ "cartography": {"prominence": 1, "min_zoom": 0, "max_zoom": 0, "sort_key": 0}, "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -75,7 +77,7 @@ "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]], "admin_level": 0, "class": "megacity", - "local_type": {}, + "local_type": {"en": "clean"}, "region": "US-CA", "perspectives": {"mode": "accepted_by", "countries": ["US"]}, "norms": {"driving_side": "left"}, @@ -88,45 +90,45 @@ SCENARIOS: list[Scenario] = [ Scenario( - id="division::cartography.prominence:bounds", + id="division::cartography.prominence_0:bounds", scaffold={"cartography": {"prominence": 1}}, mutate=set_at_path("cartography.prominence", 0), - expected_field="cartography.prominence", + expected_field="cartography.prominence_0", expected_check="bounds", ), Scenario( - id="division::cartography.prominence:bounds_1", + id="division::cartography.prominence_1:bounds", scaffold={"cartography": {"prominence": 1}}, mutate=set_at_path("cartography.prominence", 101), - expected_field="cartography.prominence", + expected_field="cartography.prominence_1", expected_check="bounds", ), Scenario( - id="division::cartography.min_zoom:bounds", + id="division::cartography.min_zoom_0:bounds", scaffold={"cartography": {"min_zoom": 0}}, mutate=set_at_path("cartography.min_zoom", -1), - expected_field="cartography.min_zoom", + expected_field="cartography.min_zoom_0", expected_check="bounds", ), Scenario( - id="division::cartography.min_zoom:bounds_1", + id="division::cartography.min_zoom_1:bounds", scaffold={"cartography": {"min_zoom": 0}}, mutate=set_at_path("cartography.min_zoom", 24), - expected_field="cartography.min_zoom", + expected_field="cartography.min_zoom_1", expected_check="bounds", ), Scenario( - id="division::cartography.max_zoom:bounds", + id="division::cartography.max_zoom_0:bounds", scaffold={"cartography": {"max_zoom": 0}}, mutate=set_at_path("cartography.max_zoom", -1), - expected_field="cartography.max_zoom", + expected_field="cartography.max_zoom_0", expected_check="bounds", ), Scenario( - id="division::cartography.max_zoom:bounds_1", + id="division::cartography.max_zoom_1:bounds", scaffold={"cartography": {"max_zoom": 0}}, mutate=set_at_path("cartography.max_zoom", 24), - expected_field="cartography.max_zoom", + expected_field="cartography.max_zoom_1", expected_check="bounds", ), Scenario( @@ -157,6 +159,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="division::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="division::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="division::names.rules[].value:required", scaffold={ @@ -523,25 +539,25 @@ expected_check="stripped", ), Scenario( - id="division::sources[].confidence:bounds", + id="division::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="division::sources[].confidence:bounds_1", + id="division::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -727,17 +743,17 @@ expected_check="no_whitespace", ), Scenario( - id="division::admin_level:bounds", + id="division::admin_level_0:bounds", scaffold={"admin_level": 0}, mutate=set_at_path("admin_level", -1), - expected_field="admin_level", + expected_field="admin_level_0", expected_check="bounds", ), Scenario( - id="division::admin_level:bounds_1", + id="division::admin_level_1:bounds", scaffold={"admin_level": 0}, mutate=set_at_path("admin_level", 17), - expected_field="admin_level", + expected_field="admin_level_1", expected_check="bounds", ), Scenario( @@ -747,6 +763,20 @@ expected_field="class", expected_check="enum", ), + Scenario( + id="division::local_type{key}:language_tag", + scaffold={"local_type": {"en": "clean"}}, + mutate=lambda row: mutate_map_key(row, "local_type", "123"), + expected_field="local_type{key}", + expected_check="language_tag", + ), + Scenario( + id="division::local_type{value}:stripped", + scaffold={"local_type": {"en": "clean"}}, + mutate=lambda row: mutate_map_value(row, "local_type", " has spaces "), + expected_field="local_type{value}", + expected_check="stripped", + ), Scenario( id="division::region:region_code", scaffold={"region": "US-CA"}, @@ -978,7 +1008,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="division", + model_name="division", ) @@ -990,7 +1020,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="division", + model_name="division", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py index d9170b23b..41c003074 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py @@ -17,6 +17,8 @@ ) from ....._support.helpers import set_at_path from ....._support.mutations import ( + mutate_map_key, + mutate_map_value, mutate_radio_group, mutate_require_if, mutate_unique_items, @@ -42,7 +44,7 @@ BASE_ROW_POPULATED: dict = { "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -111,6 +113,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="division_area::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="division_area::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="division_area::names.rules[].value:required", scaffold={ @@ -477,25 +493,25 @@ expected_check="stripped", ), Scenario( - id="division_area::sources[].confidence:bounds", + id="division_area::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="division_area::sources[].confidence:bounds_1", + id="division_area::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -602,17 +618,17 @@ expected_check="region_code", ), Scenario( - id="division_area::admin_level:bounds", + id="division_area::admin_level_0:bounds", scaffold={"admin_level": 0}, mutate=set_at_path("admin_level", -1), - expected_field="admin_level", + expected_field="admin_level_0", expected_check="bounds", ), Scenario( - id="division_area::admin_level:bounds_1", + id="division_area::admin_level_1:bounds", scaffold={"admin_level": 0}, mutate=set_at_path("admin_level", 17), - expected_field="admin_level", + expected_field="admin_level_1", expected_check="bounds", ), Scenario( @@ -688,7 +704,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="division_area", + model_name="division_area", ) @@ -700,7 +716,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="division_area", + model_name="division_area", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py index 41f9d3d59..f42e72153 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py @@ -220,25 +220,25 @@ expected_check="stripped", ), Scenario( - id="division_boundary::sources[].confidence:bounds", + id="division_boundary::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="division_boundary::sources[].confidence:bounds_1", + id="division_boundary::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -359,17 +359,17 @@ expected_check="region_code", ), Scenario( - id="division_boundary::admin_level:bounds", + id="division_boundary::admin_level_0:bounds", scaffold={"admin_level": 0}, mutate=set_at_path("admin_level", -1), - expected_field="admin_level", + expected_field="admin_level_0", expected_check="bounds", ), Scenario( - id="division_boundary::admin_level:bounds_1", + id="division_boundary::admin_level_1:bounds", scaffold={"admin_level": 0}, mutate=set_at_path("admin_level", 17), - expected_field="admin_level", + expected_field="admin_level_1", expected_check="bounds", ), Scenario( @@ -503,7 +503,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="division_boundary", + model_name="division_boundary", ) @@ -515,7 +515,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="division_boundary", + model_name="division_boundary", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py index b8a128bb0..f906eb4e4 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py @@ -16,7 +16,11 @@ run_validation_pipeline, ) from ....._support.helpers import set_at_path -from ....._support.mutations import mutate_unique_items +from ....._support.mutations import ( + mutate_map_key, + mutate_map_value, + mutate_unique_items, +) from ....._support.scenarios import Scenario BASE_ROW_SPARSE: dict = { @@ -62,7 +66,7 @@ "brand": { "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -87,7 +91,7 @@ ], "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -254,25 +258,25 @@ expected_check="stripped", ), Scenario( - id="place::sources[].confidence:bounds", + id="place::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="place::sources[].confidence:bounds_1", + id="place::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -432,17 +436,17 @@ expected_check="snake_case", ), Scenario( - id="place::confidence:bounds", + id="place::confidence_0:bounds", scaffold={"confidence": 0.0}, mutate=set_at_path("confidence", -1.0), - expected_field="confidence", + expected_field="confidence_0", expected_check="bounds", ), Scenario( - id="place::confidence:bounds_1", + id="place::confidence_1:bounds", scaffold={"confidence": 0.0}, mutate=set_at_path("confidence", 2.0), - expected_field="confidence", + expected_field="confidence_1", expected_check="bounds", ), Scenario( @@ -570,6 +574,20 @@ expected_field="brand.names.primary", expected_check="stripped", ), + Scenario( + id="place::brand.names.common{key}:language_tag", + scaffold={"brand": {"names": {"primary": "a", "common": {"en": "clean"}}}}, + mutate=lambda row: mutate_map_key(row, "brand.names.common", "123"), + expected_field="brand.names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="place::brand.names.common{value}:stripped", + scaffold={"brand": {"names": {"primary": "a", "common": {"en": "clean"}}}}, + mutate=lambda row: mutate_map_value(row, "brand.names.common", " has spaces "), + expected_field="brand.names.common{value}", + expected_check="stripped", + ), Scenario( id="place::brand.names.rules[].value:required", scaffold={ @@ -905,6 +923,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="place::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="place::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="place::names.rules[].value:required", scaffold={ @@ -1136,7 +1168,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="place", + model_name="place", ) @@ -1148,7 +1180,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="place", + model_name="place", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py index 7fb7739ad..f4b3738e5 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py @@ -201,25 +201,25 @@ expected_check="stripped", ), Scenario( - id="connector::sources[].confidence:bounds", + id="connector::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="connector::sources[].confidence:bounds_1", + id="connector::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -271,7 +271,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="connector", + model_name="connector", ) @@ -283,7 +283,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="connector", + model_name="connector", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py index fda44888d..ff866fe2a 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py @@ -18,6 +18,8 @@ from ....._support.helpers import set_at_path from ....._support.mutations import ( mutate_forbid_if, + mutate_map_key, + mutate_map_value, mutate_require_any_of, mutate_require_if, mutate_unique_items, @@ -38,7 +40,7 @@ BASE_ROW_POPULATED: dict = { "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -259,25 +261,25 @@ expected_check="stripped", ), Scenario( - id="segment::sources[].confidence:bounds", + id="segment::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="segment::sources[].confidence:bounds_1", + id="segment::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -591,7 +593,7 @@ expected_check="enum", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].value:required", + id="segment::access_restrictions[].when.vehicle[].value_0:required", scaffold={ "access_restrictions": [ { @@ -601,11 +603,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), - expected_field="access_restrictions[].when.vehicle[].value", + expected_field="access_restrictions[].when.vehicle[].value_0", expected_check="required", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].value:required_1", + id="segment::access_restrictions[].when.vehicle[].value_1:required", scaffold={ "access_restrictions": [ { @@ -615,7 +617,7 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), - expected_field="access_restrictions[].when.vehicle[].value", + expected_field="access_restrictions[].when.vehicle[].value_1", expected_check="required", ), Scenario( @@ -633,7 +635,7 @@ expected_check="bounds", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:required", + id="segment::access_restrictions[].when.vehicle[].unit_0:required", scaffold={ "access_restrictions": [ { @@ -643,11 +645,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_0", expected_check="required", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:enum", + id="segment::access_restrictions[].when.vehicle[].unit_0:enum", scaffold={ "access_restrictions": [ { @@ -657,11 +659,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_0", expected_check="enum", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:required_1", + id="segment::access_restrictions[].when.vehicle[].unit_1:required", scaffold={ "access_restrictions": [ { @@ -671,11 +673,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_1", expected_check="required", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:enum_1", + id="segment::access_restrictions[].when.vehicle[].unit_1:enum", scaffold={ "access_restrictions": [ { @@ -685,7 +687,7 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_1", expected_check="enum", ), Scenario( @@ -724,17 +726,17 @@ expected_check="no_whitespace", ), Scenario( - id="segment::connectors[].at:bounds", + id="segment::connectors[].at_0:bounds", scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, mutate=set_at_path("connectors[].at", -1.0), - expected_field="connectors[].at", + expected_field="connectors[].at_0", expected_check="bounds", ), Scenario( - id="segment::connectors[].at:bounds_1", + id="segment::connectors[].at_1:bounds", scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, mutate=set_at_path("connectors[].at", 2.0), - expected_field="connectors[].at", + expected_field="connectors[].at_1", expected_check="bounds", ), Scenario( @@ -905,6 +907,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="segment::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="segment::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="segment::names.rules[].value:required", scaffold={ @@ -1121,17 +1137,17 @@ expected_check="enum", ), Scenario( - id="segment::class:required", + id="segment::class_1:required", scaffold={}, mutate=set_at_path("class", None), - expected_field="class", + expected_field="class_1", expected_check="required", ), Scenario( - id="segment::class:enum", + id="segment::class_1:enum", scaffold={}, mutate=set_at_path("class", "__INVALID__"), - expected_field="class", + expected_field="class_1", expected_check="enum", ), Scenario( @@ -1423,7 +1439,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="segment", + model_name="segment", ) @@ -1435,7 +1451,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="segment", + model_name="segment", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py index 137862634..da78cbfba 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py @@ -18,6 +18,8 @@ from ....._support.helpers import set_at_path from ....._support.mutations import ( mutate_forbid_if, + mutate_map_key, + mutate_map_value, mutate_require_any_of, mutate_require_if, mutate_unique_items, @@ -38,7 +40,7 @@ BASE_ROW_POPULATED: dict = { "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -318,25 +320,25 @@ expected_check="stripped", ), Scenario( - id="segment::sources[].confidence:bounds", + id="segment::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="segment::sources[].confidence:bounds_1", + id="segment::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -650,7 +652,7 @@ expected_check="enum", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].value:required", + id="segment::access_restrictions[].when.vehicle[].value_0:required", scaffold={ "access_restrictions": [ { @@ -660,11 +662,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), - expected_field="access_restrictions[].when.vehicle[].value", + expected_field="access_restrictions[].when.vehicle[].value_0", expected_check="required", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].value:required_1", + id="segment::access_restrictions[].when.vehicle[].value_1:required", scaffold={ "access_restrictions": [ { @@ -674,7 +676,7 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), - expected_field="access_restrictions[].when.vehicle[].value", + expected_field="access_restrictions[].when.vehicle[].value_1", expected_check="required", ), Scenario( @@ -692,7 +694,7 @@ expected_check="bounds", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:required", + id="segment::access_restrictions[].when.vehicle[].unit_0:required", scaffold={ "access_restrictions": [ { @@ -702,11 +704,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_0", expected_check="required", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:enum", + id="segment::access_restrictions[].when.vehicle[].unit_0:enum", scaffold={ "access_restrictions": [ { @@ -716,11 +718,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_0", expected_check="enum", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:required_1", + id="segment::access_restrictions[].when.vehicle[].unit_1:required", scaffold={ "access_restrictions": [ { @@ -730,11 +732,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_1", expected_check="required", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:enum_1", + id="segment::access_restrictions[].when.vehicle[].unit_1:enum", scaffold={ "access_restrictions": [ { @@ -744,7 +746,7 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_1", expected_check="enum", ), Scenario( @@ -783,17 +785,17 @@ expected_check="no_whitespace", ), Scenario( - id="segment::connectors[].at:bounds", + id="segment::connectors[].at_0:bounds", scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, mutate=set_at_path("connectors[].at", -1.0), - expected_field="connectors[].at", + expected_field="connectors[].at_0", expected_check="bounds", ), Scenario( - id="segment::connectors[].at:bounds_1", + id="segment::connectors[].at_1:bounds", scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, mutate=set_at_path("connectors[].at", 2.0), - expected_field="connectors[].at", + expected_field="connectors[].at_1", expected_check="bounds", ), Scenario( @@ -964,6 +966,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="segment::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="segment::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="segment::names.rules[].value:required", scaffold={ @@ -1180,17 +1196,17 @@ expected_check="enum", ), Scenario( - id="segment::class:required", + id="segment::class_0:required", scaffold={}, mutate=set_at_path("class", None), - expected_field="class", + expected_field="class_0", expected_check="required", ), Scenario( - id="segment::class:enum", + id="segment::class_0:enum", scaffold={}, mutate=set_at_path("class", "__INVALID__"), - expected_field="class", + expected_field="class_0", expected_check="enum", ), Scenario( @@ -2032,7 +2048,7 @@ expected_check="enum", ), Scenario( - id="segment::prohibited_transitions[].when.vehicle[].value:required", + id="segment::prohibited_transitions[].when.vehicle[].value_0:required", scaffold={ "prohibited_transitions": [ { @@ -2043,11 +2059,11 @@ ] }, mutate=set_at_path("prohibited_transitions[].when.vehicle[].value", None), - expected_field="prohibited_transitions[].when.vehicle[].value", + expected_field="prohibited_transitions[].when.vehicle[].value_0", expected_check="required", ), Scenario( - id="segment::prohibited_transitions[].when.vehicle[].value:required_1", + id="segment::prohibited_transitions[].when.vehicle[].value_1:required", scaffold={ "prohibited_transitions": [ { @@ -2058,7 +2074,7 @@ ] }, mutate=set_at_path("prohibited_transitions[].when.vehicle[].value", None), - expected_field="prohibited_transitions[].when.vehicle[].value", + expected_field="prohibited_transitions[].when.vehicle[].value_1", expected_check="required", ), Scenario( @@ -2077,7 +2093,7 @@ expected_check="bounds", ), Scenario( - id="segment::prohibited_transitions[].when.vehicle[].unit:required", + id="segment::prohibited_transitions[].when.vehicle[].unit_0:required", scaffold={ "prohibited_transitions": [ { @@ -2088,11 +2104,11 @@ ] }, mutate=set_at_path("prohibited_transitions[].when.vehicle[].unit", None), - expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_field="prohibited_transitions[].when.vehicle[].unit_0", expected_check="required", ), Scenario( - id="segment::prohibited_transitions[].when.vehicle[].unit:enum", + id="segment::prohibited_transitions[].when.vehicle[].unit_0:enum", scaffold={ "prohibited_transitions": [ { @@ -2105,11 +2121,11 @@ mutate=set_at_path( "prohibited_transitions[].when.vehicle[].unit", "__INVALID__" ), - expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_field="prohibited_transitions[].when.vehicle[].unit_0", expected_check="enum", ), Scenario( - id="segment::prohibited_transitions[].when.vehicle[].unit:required_1", + id="segment::prohibited_transitions[].when.vehicle[].unit_1:required", scaffold={ "prohibited_transitions": [ { @@ -2120,11 +2136,11 @@ ] }, mutate=set_at_path("prohibited_transitions[].when.vehicle[].unit", None), - expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_field="prohibited_transitions[].when.vehicle[].unit_1", expected_check="required", ), Scenario( - id="segment::prohibited_transitions[].when.vehicle[].unit:enum_1", + id="segment::prohibited_transitions[].when.vehicle[].unit_1:enum", scaffold={ "prohibited_transitions": [ { @@ -2137,7 +2153,7 @@ mutate=set_at_path( "prohibited_transitions[].when.vehicle[].unit", "__INVALID__" ), - expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_field="prohibited_transitions[].when.vehicle[].unit_1", expected_check="enum", ), Scenario( @@ -2274,17 +2290,17 @@ expected_check="required", ), Scenario( - id="segment::speed_limits[].max_speed.value:bounds", + id="segment::speed_limits[].max_speed.value_0:bounds", scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, mutate=set_at_path("speed_limits[].max_speed.value", 0), - expected_field="speed_limits[].max_speed.value", + expected_field="speed_limits[].max_speed.value_0", expected_check="bounds", ), Scenario( - id="segment::speed_limits[].max_speed.value:bounds_1", + id="segment::speed_limits[].max_speed.value_1:bounds", scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, mutate=set_at_path("speed_limits[].max_speed.value", 351), - expected_field="speed_limits[].max_speed.value", + expected_field="speed_limits[].max_speed.value_1", expected_check="bounds", ), Scenario( @@ -2309,17 +2325,17 @@ expected_check="required", ), Scenario( - id="segment::speed_limits[].min_speed.value:bounds", + id="segment::speed_limits[].min_speed.value_0:bounds", scaffold={"speed_limits": [{"min_speed": {"unit": "mph", "value": 1}}]}, mutate=set_at_path("speed_limits[].min_speed.value", 0), - expected_field="speed_limits[].min_speed.value", + expected_field="speed_limits[].min_speed.value_0", expected_check="bounds", ), Scenario( - id="segment::speed_limits[].min_speed.value:bounds_1", + id="segment::speed_limits[].min_speed.value_1:bounds", scaffold={"speed_limits": [{"min_speed": {"unit": "mph", "value": 1}}]}, mutate=set_at_path("speed_limits[].min_speed.value", 351), - expected_field="speed_limits[].min_speed.value", + expected_field="speed_limits[].min_speed.value_1", expected_check="bounds", ), Scenario( @@ -2500,19 +2516,19 @@ expected_check="enum", ), Scenario( - id="segment::speed_limits[].when.vehicle[].value:required", + id="segment::speed_limits[].when.vehicle[].value_0:required", scaffold={ "speed_limits": [{"when": {"vehicle": [{"dimension": "axle_count"}]}}] }, mutate=set_at_path("speed_limits[].when.vehicle[].value", None), - expected_field="speed_limits[].when.vehicle[].value", + expected_field="speed_limits[].when.vehicle[].value_0", expected_check="required", ), Scenario( - id="segment::speed_limits[].when.vehicle[].value:required_1", + id="segment::speed_limits[].when.vehicle[].value_1:required", scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, mutate=set_at_path("speed_limits[].when.vehicle[].value", None), - expected_field="speed_limits[].when.vehicle[].value", + expected_field="speed_limits[].when.vehicle[].value_1", expected_check="required", ), Scenario( @@ -2523,31 +2539,31 @@ expected_check="bounds", ), Scenario( - id="segment::speed_limits[].when.vehicle[].unit:required", + id="segment::speed_limits[].when.vehicle[].unit_0:required", scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, mutate=set_at_path("speed_limits[].when.vehicle[].unit", None), - expected_field="speed_limits[].when.vehicle[].unit", + expected_field="speed_limits[].when.vehicle[].unit_0", expected_check="required", ), Scenario( - id="segment::speed_limits[].when.vehicle[].unit:enum", + id="segment::speed_limits[].when.vehicle[].unit_0:enum", scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, mutate=set_at_path("speed_limits[].when.vehicle[].unit", "__INVALID__"), - expected_field="speed_limits[].when.vehicle[].unit", + expected_field="speed_limits[].when.vehicle[].unit_0", expected_check="enum", ), Scenario( - id="segment::speed_limits[].when.vehicle[].unit:required_1", + id="segment::speed_limits[].when.vehicle[].unit_1:required", scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "weight"}]}}]}, mutate=set_at_path("speed_limits[].when.vehicle[].unit", None), - expected_field="speed_limits[].when.vehicle[].unit", + expected_field="speed_limits[].when.vehicle[].unit_1", expected_check="required", ), Scenario( - id="segment::speed_limits[].when.vehicle[].unit:enum_1", + id="segment::speed_limits[].when.vehicle[].unit_1:enum", scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "weight"}]}}]}, mutate=set_at_path("speed_limits[].when.vehicle[].unit", "__INVALID__"), - expected_field="speed_limits[].when.vehicle[].unit", + expected_field="speed_limits[].when.vehicle[].unit_1", expected_check="enum", ), Scenario( @@ -3014,7 +3030,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="segment", + model_name="segment", ) @@ -3026,7 +3042,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="segment", + model_name="segment", ) diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py index bfbe81702..08fafe741 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py @@ -18,6 +18,8 @@ from ....._support.helpers import set_at_path from ....._support.mutations import ( mutate_forbid_if, + mutate_map_key, + mutate_map_value, mutate_require_any_of, mutate_require_if, mutate_unique_items, @@ -37,7 +39,7 @@ BASE_ROW_POPULATED: dict = { "names": { "primary": "a", - "common": {}, + "common": {"en": "clean"}, "rules": [ { "value": "a", @@ -256,25 +258,25 @@ expected_check="stripped", ), Scenario( - id="segment::sources[].confidence:bounds", + id="segment::sources[].confidence_0:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", -1.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_0", expected_check="bounds", ), Scenario( - id="segment::sources[].confidence:bounds_1", + id="segment::sources[].confidence_1:bounds", scaffold={ "sources": [ {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} ] }, mutate=set_at_path("sources[].confidence", 2.0), - expected_field="sources[].confidence", + expected_field="sources[].confidence_1", expected_check="bounds", ), Scenario( @@ -588,7 +590,7 @@ expected_check="enum", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].value:required", + id="segment::access_restrictions[].when.vehicle[].value_0:required", scaffold={ "access_restrictions": [ { @@ -598,11 +600,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), - expected_field="access_restrictions[].when.vehicle[].value", + expected_field="access_restrictions[].when.vehicle[].value_0", expected_check="required", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].value:required_1", + id="segment::access_restrictions[].when.vehicle[].value_1:required", scaffold={ "access_restrictions": [ { @@ -612,7 +614,7 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), - expected_field="access_restrictions[].when.vehicle[].value", + expected_field="access_restrictions[].when.vehicle[].value_1", expected_check="required", ), Scenario( @@ -630,7 +632,7 @@ expected_check="bounds", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:required", + id="segment::access_restrictions[].when.vehicle[].unit_0:required", scaffold={ "access_restrictions": [ { @@ -640,11 +642,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_0", expected_check="required", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:enum", + id="segment::access_restrictions[].when.vehicle[].unit_0:enum", scaffold={ "access_restrictions": [ { @@ -654,11 +656,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_0", expected_check="enum", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:required_1", + id="segment::access_restrictions[].when.vehicle[].unit_1:required", scaffold={ "access_restrictions": [ { @@ -668,11 +670,11 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_1", expected_check="required", ), Scenario( - id="segment::access_restrictions[].when.vehicle[].unit:enum_1", + id="segment::access_restrictions[].when.vehicle[].unit_1:enum", scaffold={ "access_restrictions": [ { @@ -682,7 +684,7 @@ ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), - expected_field="access_restrictions[].when.vehicle[].unit", + expected_field="access_restrictions[].when.vehicle[].unit_1", expected_check="enum", ), Scenario( @@ -721,17 +723,17 @@ expected_check="no_whitespace", ), Scenario( - id="segment::connectors[].at:bounds", + id="segment::connectors[].at_0:bounds", scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, mutate=set_at_path("connectors[].at", -1.0), - expected_field="connectors[].at", + expected_field="connectors[].at_0", expected_check="bounds", ), Scenario( - id="segment::connectors[].at:bounds_1", + id="segment::connectors[].at_1:bounds", scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, mutate=set_at_path("connectors[].at", 2.0), - expected_field="connectors[].at", + expected_field="connectors[].at_1", expected_check="bounds", ), Scenario( @@ -902,6 +904,20 @@ expected_field="names.primary", expected_check="stripped", ), + Scenario( + id="segment::names.common{key}:language_tag", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_key(row, "names.common", "123"), + expected_field="names.common{key}", + expected_check="language_tag", + ), + Scenario( + id="segment::names.common{value}:stripped", + scaffold={"names": {"primary": "a", "common": {"en": "clean"}}}, + mutate=lambda row: mutate_map_value(row, "names.common", " has spaces "), + expected_field="names.common{value}", + expected_check="stripped", + ), Scenario( id="segment::names.rules[].value:required", scaffold={ @@ -1343,7 +1359,7 @@ def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_SPARSE, SCENARIOS, - feature_name="segment", + model_name="segment", ) @@ -1355,7 +1371,7 @@ def populated_results(spark: SparkSession, checks: list) -> ValidationResults: checks, BASE_ROW_POPULATED, SCENARIOS, - feature_name="segment", + model_name="segment", ) diff --git a/packages/overture-schema-pyspark/tests/test_check.py b/packages/overture-schema-pyspark/tests/test_check.py index 681add76b..0993411c2 100644 --- a/packages/overture-schema-pyspark/tests/test_check.py +++ b/packages/overture-schema-pyspark/tests/test_check.py @@ -14,7 +14,7 @@ def test_check_is_frozen(spark: SparkSession) -> None: name="required", expr=F.lit("error"), shape=CheckShape.SCALAR, - root_field="subtype", + read_columns=frozenset({"subtype"}), ) with pytest.raises(dataclasses.FrozenInstanceError): check.field = "other" # type: ignore[misc] diff --git a/packages/overture-schema-pyspark/tests/test_cli.py b/packages/overture-schema-pyspark/tests/test_cli.py index 2caaf558d..04e573efa 100644 --- a/packages/overture-schema-pyspark/tests/test_cli.py +++ b/packages/overture-schema-pyspark/tests/test_cli.py @@ -6,20 +6,34 @@ import pytest from click.testing import CliRunner from overture.schema.pyspark._registry import REGISTRY -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape from overture.schema.pyspark.cli import ( ReadSpec, _spark_config, + absent_column, read_feature, resolve_read, validate_cli, ) +from pyspark.errors import AnalysisException from pyspark.sql import Row, SparkSession from pyspark.sql import functions as F from pyspark.sql.types import StringType, StructField, StructType +from ._support.registry import register_model + _TEST_TYPE = "_test_cli" +# Shared schema for all test registrations that need the four base columns. +_BASE_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("value", StringType(), True), + ] +) + # Tests that branch on registered geometry types require the runtime registry # to be populated (i.e. generated expression modules present). _requires_generated = pytest.mark.skipif( @@ -95,26 +109,15 @@ def _test_checks() -> list[Check]: name="enum", expr=F.when(F.col("value") != "good", F.lit("not good")), shape=CheckShape.SCALAR, - root_field="value", + read_columns=frozenset({"value"}), ), ] @pytest.fixture(autouse=True) def _register_test_checks() -> Iterator[None]: - REGISTRY[_TEST_TYPE] = FeatureValidation( - schema=StructType( - [ - StructField("id", StringType(), True), - StructField("theme", StringType(), True), - StructField("type", StringType(), True), - StructField("value", StringType(), True), - ] - ), - checks=_test_checks, - ) - yield - del REGISTRY[_TEST_TYPE] + with register_model(_TEST_TYPE, _BASE_SCHEMA, _test_checks): + yield def test_validate_missing_args() -> None: @@ -265,6 +268,120 @@ def test_validate_missing_column_suggests_skip_columns( assert "--skip-columns value" in result.output +def _unresolved(object_name: str, *, suggestion: bool = True) -> AnalysisException: + """An UNRESOLVED_COLUMN AnalysisException naming `object_name`.""" + suffix = "WITH_SUGGESTION" if suggestion else "WITHOUT_SUGGESTION" + return AnalysisException( + f"column {object_name} cannot be resolved", + errorClass=f"UNRESOLVED_COLUMN.{suffix}", + messageParameters={"objectName": object_name}, + ) + + +class TestAbsentColumn: + """Classification of AnalysisExceptions into absent-column vs. bug.""" + + def test_absent_top_level_column_is_named(self) -> None: + assert absent_column(_unresolved("`phantom`"), ["id", "value"]) == "phantom" + + def test_without_suggestion_is_also_named(self) -> None: + exc = _unresolved("`phantom`", suggestion=False) + assert absent_column(exc, ["id", "value"]) == "phantom" + + def test_dotted_reference_yields_top_level_column(self) -> None: + assert absent_column(_unresolved("`bbox`.`xmin`"), ["id"]) == "bbox" + + def test_present_column_is_not_named(self) -> None: + # An unresolved-column error naming a column that *is* present is not + # the missing-data case --skip-columns resolves; treat it as a bug. + assert absent_column(_unresolved("`value`"), ["id", "value"]) is None + + def test_non_unresolved_condition_is_a_bug(self) -> None: + exc = AnalysisException( + "cannot extract field from scalar", + errorClass="INVALID_EXTRACT_BASE_FIELD_TYPE", + messageParameters={"base": '"value"', "other": '"STRING"'}, + ) + assert absent_column(exc, ["id", "value"]) is None + + def test_missing_condition_is_a_bug(self) -> None: + assert absent_column(AnalysisException("opaque failure"), ["id"]) is None + + def test_missing_object_name_is_a_bug(self) -> None: + exc = AnalysisException( + "unresolved with no objectName", + errorClass="UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + messageParameters={}, + ) + assert absent_column(exc, ["id"]) is None + + +def test_validate_unresolvable_check_names_absent_column( + spark: SparkSession, tmp_path: Path +) -> None: + """An unresolved-column check names the absent column and hints --skip-columns.""" + unresolvable_type = "_test_cli_unresolvable" + # A check reading a column in neither the data nor the expected schema + # is invisible to absence detection (which only flags expected-but- + # missing columns), so it survives the read_columns drop, reaches + # evaluation, and raises UNRESOLVED_COLUMN -- the backstop the CLI + # must catch and convert into a column-named hint. + with register_model( + unresolvable_type, + _BASE_SCHEMA, + checks=lambda: [ + Check( + field="phantom", + name="present", + expr=F.when(F.col("phantom").isNull(), F.lit("missing phantom")), + shape=CheckShape.SCALAR, + read_columns=frozenset({"phantom"}), + ) + ], + ): + input_path = str(tmp_path / "input.parquet") + spark.createDataFrame( + [Row(id="r1", theme="test", type="x", value="good")] + ).write.parquet(input_path) + runner = CliRunner() + result = runner.invoke(validate_cli, [unresolvable_type, input_path]) + assert result.exit_code != 0 + assert "phantom" in result.output + assert "--skip-columns phantom" in result.output + + +def test_validate_planning_bug_propagates(spark: SparkSession, tmp_path: Path) -> None: + """A non-column planning error surfaces as a bug, not a --skip-columns hint.""" + buggy_type = "_test_cli_planning_bug" + # `value` is a present string column, so the check survives the + # read_columns drop, but extracting a struct field from a string is a + # generator bug: it raises an AnalysisException that is *not* + # UNRESOLVED_COLUMN. `--skip-columns value` would not fix it, so the + # backstop must let it propagate rather than masking it. + with register_model( + buggy_type, + _BASE_SCHEMA, + checks=lambda: [ + Check( + field="value", + name="struct_field", + expr=F.when(F.col("value").getField("missing").isNull(), F.lit("bad")), + shape=CheckShape.SCALAR, + read_columns=frozenset({"value"}), + ) + ], + ): + input_path = str(tmp_path / "input.parquet") + spark.createDataFrame( + [Row(id="r1", theme="test", type="x", value="good")] + ).write.parquet(input_path) + runner = CliRunner() + result = runner.invoke(validate_cli, [buggy_type, input_path]) + assert result.exit_code != 0 + assert isinstance(result.exception, AnalysisException) + assert "--skip-columns" not in result.output + + def test_validate_ignore_extra_columns(spark: SparkSession, tmp_path: Path) -> None: """--ignore-extra-columns suppresses 'expected missing' schema mismatches.""" input_path = str(tmp_path / "input.parquet") @@ -372,12 +489,15 @@ def test_leaf_partition(self) -> None: base_path="/data/release/2026-02-18.0", ) - def test_theme_partition_without_type(self) -> None: + def test_theme_partition_appends_type_leaf(self) -> None: + # A theme-level path is missing the `type=` leaf; resolve_read must + # append it so a single feature's checks aren't run against every + # type sharing the theme directory. spec = resolve_read( "/data/release/2026-02-18.0/theme=base/", _BATHYMETRY_PARTITIONS ) assert spec == ReadSpec( - data_path="/data/release/2026-02-18.0/theme=base/", + data_path="/data/release/2026-02-18.0/theme=base/type=bathymetry", base_path="/data/release/2026-02-18.0", ) @@ -478,6 +598,25 @@ def test_release_root_filters_to_type( assert df.count() == 1 assert df.collect()[0]["id"] == "r1" + def test_theme_partition_filters_to_type( + self, spark: SparkSession, tmp_path: Path + ) -> None: + """A theme-level path returns only the target type, not its siblings.""" + base = tmp_path / "release" + _write_partitioned( + spark, + base, + [ + Row(id="r1", value="good", theme="test", type=_TEST_TYPE), + Row(id="r2", value="good", theme="test", type="other"), + ], + ) + theme_path = str(base / "theme=test") + spec = resolve_read(theme_path, {"theme": "test", "type": _TEST_TYPE}) + df = read_feature(spark, spec) + assert df.count() == 1 + assert df.collect()[0]["id"] == "r1" + def test_validate_from_partitioned_release(spark: SparkSession, tmp_path: Path) -> None: """Full CLI round-trip reading from a Hive-partitioned release root.""" diff --git a/packages/overture-schema-pyspark/tests/test_harness.py b/packages/overture-schema-pyspark/tests/test_harness.py index 188bdc3ac..f77135dc0 100644 --- a/packages/overture-schema-pyspark/tests/test_harness.py +++ b/packages/overture-schema-pyspark/tests/test_harness.py @@ -57,7 +57,7 @@ def test_scenarios_get_valid_and_invalid_entries(self) -> None: expected_check="required", ), ] - scenario_map = build_scenario_map(scenarios, feature_name="f") + scenario_map = build_scenario_map(scenarios, model_name="f") assert scenario_uuid("f::x:required::valid") in scenario_map assert ( scenario_map[scenario_uuid("f::x:required::valid")] @@ -79,7 +79,7 @@ def test_baseline_plus_two_entries_per_scenario(self) -> None: expected_check="check", ), ] - scenario_map = build_scenario_map(scenarios, feature_name="f") + scenario_map = build_scenario_map(scenarios, model_name="f") # baseline + (::valid, ::invalid) for the one scenario assert len(scenario_map) == 3 @@ -101,14 +101,14 @@ def test_duplicate_id_values_raises(self) -> None: ), ] with pytest.raises(ValueError, match="Duplicate"): - build_scenario_map(scenarios, feature_name="f") + build_scenario_map(scenarios, model_name="f") class TestBuildScenarioRows: def test_baseline_row_included(self) -> None: base = {"id": "original-uuid", "theme": "buildings", "type": "building", "x": 1} rows, scenario_map, skipped = build_scenario_rows( - base, [], feature_name="building" + base, [], model_name="building" ) assert len(rows) == 1 assert rows[0]["theme"] == "buildings" @@ -131,7 +131,7 @@ def bad_mutation(row: dict) -> dict: ), ] rows, scenario_map, skipped = build_scenario_rows( - base, scenarios, feature_name="f" + base, scenarios, model_name="f" ) assert len(rows) == 1 assert "f::x:check" in skipped @@ -149,7 +149,7 @@ def test_scenario_creates_valid_and_invalid_rows(self) -> None: ), ] rows, scenario_map, skipped = build_scenario_rows( - base, scenarios, feature_name="f" + base, scenarios, model_name="f" ) # baseline + valid + invalid assert len(rows) == 3 @@ -171,7 +171,7 @@ def test_valid_row_uses_base_row_not_scaffold(self) -> None: ), ] rows, scenario_map, skipped = build_scenario_rows( - base, scenarios, feature_name="f" + base, scenarios, model_name="f" ) assert len(rows) == 3 # Valid row uses base_row (preserves all fields in items element) @@ -189,7 +189,7 @@ def test_scaffold_merged_onto_invalid_row(self) -> None: expected_check="check", ) rows, scenario_map, skipped = build_scenario_rows( - base_row, [s], feature_name="test" + base_row, [s], model_name="test" ) invalid_id = scenario_uuid("test::b:check::invalid") invalid_row = next(r for r in rows if r["_scenario_id"] == invalid_id) @@ -207,7 +207,7 @@ def test_applies_scaffold_then_mutation(self) -> None: expected_check="required", ) rows, scenario_map, skipped = build_scenario_rows( - base_row, [s], feature_name="test" + base_row, [s], model_name="test" ) assert len(rows) == 3 assert not skipped @@ -258,7 +258,7 @@ def test_assert_schema_covers_checks_passes(self, spark: SparkSession) -> None: name="required", expr=F.lit(None), shape=CheckShape.SCALAR, - root_field="id", + read_columns=frozenset({"id"}), ) ] assert_schema_covers_checks(schema, checks) # should not raise @@ -271,7 +271,7 @@ def test_assert_schema_covers_synthetic_field(self, spark: SparkSession) -> None name="min_length", expr=F.lit(None), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ) ] assert_schema_covers_checks(schema, checks) # should not raise @@ -286,24 +286,30 @@ def test_assert_schema_covers_checks_missing_field( name="required", expr=F.lit(None), shape=CheckShape.SCALAR, - root_field="missing", + read_columns=frozenset({"missing"}), ) ] with pytest.raises(AssertionError, match="missing"): assert_schema_covers_checks(schema, checks) - def test_assert_schema_covers_synthetic_model_check( + def test_assert_schema_covers_model_check_columns( self, spark: SparkSession ) -> None: - """root_field=None passes regardless of schema (radio_group, etc.).""" - schema = StructType([StructField("id", StringType())]) + """A model check passes when every column it reads is in the schema.""" + schema = StructType( + [ + StructField("id", StringType()), + StructField("a", StringType()), + StructField("b", StringType()), + ] + ) checks = [ Check( field="radio_group", name="radio_group", expr=F.lit(None), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset({"a", "b"}), ) ] assert_schema_covers_checks(schema, checks) # should not raise diff --git a/packages/overture-schema-pyspark/tests/test_mutations.py b/packages/overture-schema-pyspark/tests/test_mutations.py index f4e233dad..b93ad16de 100644 --- a/packages/overture-schema-pyspark/tests/test_mutations.py +++ b/packages/overture-schema-pyspark/tests/test_mutations.py @@ -1,10 +1,17 @@ """Tests for model-level mutation functions.""" +from typing import Any + import pytest from ._support.helpers import PathTraversalError from ._support.mutations import ( + _get_nested, + _set_nested, + _walk_strict, mutate_forbid_if, + mutate_map_key, + mutate_map_value, mutate_min_fields_set, mutate_radio_group, mutate_require_any_of, @@ -13,6 +20,52 @@ ) +class TestMutateMapKey: + def test_replaces_key_preserving_value(self) -> None: + row = {"names": {"en": "clean"}} + result = mutate_map_key(row, "names", "123") + assert result["names"] == {"123": "clean"} + + def test_nested_path(self) -> None: + row = {"names": {"common": {"en": "clean"}}} + result = mutate_map_key(row, "names.common", "123") + assert result["names"]["common"] == {"123": "clean"} + + def test_does_not_mutate_original(self) -> None: + row = {"names": {"en": "clean"}} + mutate_map_key(row, "names", "123") + assert row["names"] == {"en": "clean"} + + def test_missing_map_raises(self) -> None: + with pytest.raises(PathTraversalError): + mutate_map_key({"other": 1}, "names", "123") + + def test_empty_map_raises(self) -> None: + with pytest.raises(PathTraversalError): + mutate_map_key({"names": {}}, "names", "123") + + +class TestMutateMapValue: + def test_replaces_value_preserving_key(self) -> None: + row = {"names": {"en": "clean"}} + result = mutate_map_value(row, "names", " has spaces ") + assert result["names"] == {"en": " has spaces "} + + def test_nested_path(self) -> None: + row = {"names": {"common": {"en": "clean"}}} + result = mutate_map_value(row, "names.common", " bad ") + assert result["names"]["common"] == {"en": " bad "} + + def test_does_not_mutate_original(self) -> None: + row = {"names": {"en": "clean"}} + mutate_map_value(row, "names", " bad ") + assert row["names"] == {"en": "clean"} + + def test_missing_map_raises(self) -> None: + with pytest.raises(PathTraversalError): + mutate_map_value({"other": 1}, "names", " bad ") + + class TestMutateRequireAnyOf: def test_nulls_all_named_fields(self) -> None: row = {"a": 1, "b": 2, "c": 3} @@ -261,3 +314,129 @@ def test_terminal_bracket_non_list_inner_raises(self) -> None: row: dict = {"hierarchies": [{"a": 1}]} with pytest.raises(PathTraversalError): mutate_unique_items(row, "hierarchies[]") + + +class TestWalkStrict: + def test_simple_struct(self) -> None: + row = {"a": {"b": {"c": 42}}} + result = _walk_strict(row, "a.b") + assert result == {"c": 42} + + def test_root_returns_row(self) -> None: + row = {"x": 1} + assert _walk_strict(row, "") == row + + def test_missing_key_raises(self) -> None: + row = {"a": {"b": 1}} + with pytest.raises(PathTraversalError, match="Missing"): + _walk_strict(row, "a.c") + + def test_null_intermediate_raises(self) -> None: + row = {"a": None} + with pytest.raises(PathTraversalError, match="a"): + _walk_strict(row, "a.b") + + def test_error_message_includes_segment_name(self) -> None: + row = {"outer": {"inner": None}} + with pytest.raises(PathTraversalError, match="inner"): + _walk_strict(row, "outer.inner.leaf") + + def test_error_message_includes_full_path(self) -> None: + row = {"outer": None} + with pytest.raises(PathTraversalError, match="outer.inner"): + _walk_strict(row, "outer.inner") + + def test_array_segment_descends_to_element_zero(self) -> None: + row = {"items": [{"val": 5}]} + result = _walk_strict(row, "items[]") + assert result == {"val": 5} + + def test_array_segment_empty_raises(self) -> None: + row: dict[str, Any] = {"items": []} + with pytest.raises(PathTraversalError, match="items"): + _walk_strict(row, "items[]") + + def test_array_segment_with_struct_after(self) -> None: + row = {"rules": [{"when": {"mode": [{"type": "car"}]}}]} + result = _walk_strict(row, "rules[].when") + assert result == {"mode": [{"type": "car"}]} + + def test_nested_list_descends_each_bracket_level(self) -> None: + row = {"grid": [[{"val": 7}]]} + result = _walk_strict(row, "grid[][]") + assert result == {"val": 7} + + def test_nested_list_empty_inner_raises(self) -> None: + row: dict[str, Any] = {"grid": [[]]} + with pytest.raises(PathTraversalError, match="grid"): + _walk_strict(row, "grid[][]") + + +class TestGetNested: + def test_simple_lookup(self) -> None: + row = {"a": {"b": 3}} + assert _get_nested(row, "a.b") == 3 + + def test_missing_key_returns_none(self) -> None: + row = {"a": 1} + assert _get_nested(row, "b") is None + + def test_missing_nested_key_returns_none(self) -> None: + row = {"a": {"b": 1}} + assert _get_nested(row, "a.c") is None + + def test_none_intermediate_returns_none(self) -> None: + row = {"a": None} + assert _get_nested(row, "a.b") is None + + def test_non_dict_intermediate_returns_none(self) -> None: + row = {"a": [1, 2, 3]} + assert _get_nested(row, "a.b") is None + + def test_rejects_array_path(self) -> None: + with pytest.raises(ValueError, match="struct-only"): + _get_nested({"items": [{"v": 1}]}, "items[].v") + + +class TestSetNested: + def test_set_simple_field(self) -> None: + d = {"a": 1} + _set_nested(d, "a", 2) + assert d["a"] == 2 + + def test_set_nested_field(self) -> None: + d = {"outer": {"inner": "old"}} + _set_nested(d, "outer.inner", "new") + assert d["outer"]["inner"] == "new" + + def test_null_value_through_none_intermediate_silent(self) -> None: + """Nulling through a None intermediate is a no-op — already null.""" + d = {"a": None} + _set_nested(d, "a.b", None) + assert d["a"] is None + + def test_null_value_through_missing_intermediate_silent(self) -> None: + d: dict = {} + _set_nested(d, "a.b", None) + assert "a" not in d + + def test_non_null_through_none_intermediate_raises_path_traversal_error( + self, + ) -> None: + d = {"a": None} + with pytest.raises(PathTraversalError, match="a"): + _set_nested(d, "a.b", "value") + + def test_create_scaffolds_missing_intermediate(self) -> None: + d: dict = {} + _set_nested(d, "a.b", "v", create=True) + assert d["a"]["b"] == "v" + + def test_create_scaffolds_none_intermediate(self) -> None: + d: dict = {"a": None} + _set_nested(d, "a.b", "v", create=True) + assert d["a"]["b"] == "v" + + def test_rejects_array_path(self) -> None: + with pytest.raises(ValueError, match="struct-only"): + _set_nested({"items": []}, "items[].v", "x") diff --git a/packages/overture-schema-pyspark/tests/test_validate.py b/packages/overture-schema-pyspark/tests/test_validate.py index f15f4e806..9539b8977 100644 --- a/packages/overture-schema-pyspark/tests/test_validate.py +++ b/packages/overture-schema-pyspark/tests/test_validate.py @@ -1,46 +1,58 @@ """Tests for validation pipeline.""" +import re from collections.abc import Iterator import pytest from overture.schema.pyspark._registry import REGISTRY -from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.check import Check, CheckShape +from overture.schema.pyspark.expressions.column_patterns import map_values_check from overture.schema.pyspark.validate import ( ValidationResult, _normalize_suppress, evaluate_checks, explain_errors, - feature_keys, - feature_names, filter_errors, - validate_feature, + model_keys, + model_names, + validate_model, ) from pyspark.sql import DataFrame, Row, SparkSession from pyspark.sql import functions as F -from pyspark.sql.types import StringType, StructField, StructType +from pyspark.sql.types import ( + ArrayType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from ._support.registry import register_model def _scalar_check( - field: str, name: str, expr: F.Column, *, root_field: str | None = None + field: str, name: str, expr: F.Column, *, read_columns: frozenset[str] | None = None ) -> Check: return Check( field=field, name=name, expr=expr, shape=CheckShape.SCALAR, - root_field=root_field if root_field is not None else field, + read_columns=read_columns if read_columns is not None else frozenset({field}), ) def _array_check( - field: str, name: str, expr: F.Column, *, root_field: str | None = None + field: str, name: str, expr: F.Column, *, read_columns: frozenset[str] | None = None ) -> Check: return Check( field=field, name=name, expr=expr, shape=CheckShape.ARRAY, - root_field=root_field if root_field is not None else field, + read_columns=read_columns if read_columns is not None else frozenset({field}), ) @@ -211,6 +223,63 @@ def test_user_err_named_column_preserved(self, spark: SparkSession) -> None: assert explained.collect()[0]["_err_foo"] == "custom-data" +class TestReservedColumnCollisions: + """Working/output columns must not collide with user input columns. + + `evaluate_checks` appends `_err_` columns; `explain_errors` + materializes `_idx`/`_errors` scratch columns and emits its + `field`/`check`/`message` output. An input column sharing any of + these names yields duplicate attributes -> AMBIGUOUS_REFERENCE (or + silent loss via the `_err_` strip), so both entry points reject + the collision up front with a clear error. + """ + + def test_evaluate_checks_rejects_err_column(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row(_err_0="dup")]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + with pytest.raises(ValueError, match=r"_err_0.*rename or drop"): + evaluate_checks(df, checks) + + def test_evaluate_checks_allows_non_digit_err_column( + self, spark: SparkSession + ) -> None: + # Only `_err_` is reserved; `_err_foo` is a user column. + df = spark.createDataFrame([_row(_err_foo="ok")]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + assert "_err_foo" in evaluated.columns + + def test_evaluate_checks_rejects_reevaluation(self, spark: SparkSession) -> None: + # The realistic trigger: a persisted `result.evaluated` (which + # carries `_err_0..N`) fed back through validation. + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + with pytest.raises(ValueError, match="_err_0"): + evaluate_checks(evaluated, checks) + + @pytest.mark.parametrize( + "reserved", ["_idx", "_errors", "field", "check", "message"] + ) + def test_explain_errors_rejects_reserved_input_column( + self, spark: SparkSession, reserved: str + ) -> None: + df = spark.createDataFrame([_row(**{reserved: "dup"})]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + with pytest.raises(ValueError, match=re.escape(reserved)): + explain_errors(evaluated, checks) + + def test_explain_errors_rejects_reserved_with_no_checks( + self, spark: SparkSession + ) -> None: + # The n == 0 branch also emits field/check/message, so the guard + # must precede it. + df = spark.createDataFrame([_row(field="dup")]) + with pytest.raises(ValueError, match="field"): + explain_errors(df, []) + + class TestSinglePassPipeline: """Tests for the evaluate-once pattern used by the CLI.""" @@ -255,7 +324,7 @@ def test_check_objects(self, spark: SparkSession) -> None: name="radio_group", expr=F.lit(None), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset(), ) roots, pairs = _normalize_suppress([check]) assert roots == set() @@ -267,7 +336,7 @@ def test_mixed(self, spark: SparkSession) -> None: name="radio_group", expr=F.lit(None), shape=CheckShape.SCALAR, - root_field=None, + read_columns=frozenset(), ) roots, pairs = _normalize_suppress( [ @@ -290,8 +359,8 @@ def test_mixed(self, spark: SparkSession) -> None: @_requires_generated -def test_feature_names_includes_aliases() -> None: - result = feature_names() +def test_model_names_includes_aliases() -> None: + result = model_names() assert isinstance(result, list) assert result == sorted(result) assert "building" in result @@ -300,8 +369,8 @@ def test_feature_names_includes_aliases() -> None: @_requires_generated -def test_feature_keys_only_canonical() -> None: - result = feature_keys() +def test_model_keys_only_canonical() -> None: + result = model_keys() assert isinstance(result, list) assert result == sorted(result) assert "overture.schema.buildings:Building" in result @@ -342,6 +411,7 @@ def test_frozen(self) -> None: _VF_TYPE = "_test_validate_feature" +_VF_NESTED_TYPE = "_test_validate_nested" _VF_SCHEMA = StructType( [ StructField("id", StringType(), True), @@ -351,6 +421,115 @@ def test_frozen(self) -> None: StructField("sources", StringType(), True), ] ) +_VF_NESTED_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField( + "bbox", + StructType( + [ + StructField("xmin", StringType(), True), + StructField("xmax", StringType(), True), + ] + ), + True, + ), + ] +) + + +# `_VF_ARRAY_*` exercises a missing field *inside* an array element struct. +# `compare_schemas` encodes the array step as `sources[].confidence`; the +# root-derivation in validate_model must strip the `[]` marker so the +# dropped-check root matches the check's read column (`sources`). +_VF_ARRAY_TYPE = "_test_validate_array_nested" +_VF_ARRAY_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("dataset", StringType(), True), + StructField("confidence", DoubleType(), True), + ] + ) + ), + True, + ), + ] +) + + +def _vf_array_checks() -> list[Check]: + return [ + Check( + field="sources", + name="confidence_bounds", + expr=F.transform( + "sources", + lambda el: F.when(el["confidence"] > 1.0, F.lit("confidence too high")), + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"sources"}), + ), + ] + + +# `_VF_MODEL_*` exercises a model-level constraint that reads several columns +# directly. When any column it reads is skipped/absent, the exclusion filter +# must drop the model check too. +_VF_MODEL_TYPE = "_test_validate_model_constraint" +_VF_MODEL_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("primary_name", StringType(), True), + StructField("alt_name", StringType(), True), + ] +) + + +def _vf_model_checks() -> list[Check]: + return [ + Check( + field="require_any_of", + name="require_any_of", + expr=F.when( + F.col("primary_name").isNull() & F.col("alt_name").isNull(), + F.lit("at least one name required"), + ), + shape=CheckShape.SCALAR, + read_columns=frozenset({"primary_name", "alt_name"}), + ), + ] + + +# `_VF_MAP_*` exercises a map key/value check, whose expression dereferences +# the map column by name (`map_values_check("license_priority", ...)`). Skipping +# or omitting that column must drop the check, mirroring the array path, rather +# than leaving an unresolvable map projection behind. +_VF_MAP_TYPE = "_test_validate_map_check" +_VF_MAP_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("license_priority", MapType(StringType(), IntegerType()), True), + ] +) + + +def _vf_map_checks() -> list[Check]: + return [ + Check( + field="license_priority{value}", + name="bounds", + expr=map_values_check( + "license_priority", lambda v: F.when(v < 0, F.lit("negative")) + ), + shape=CheckShape.ARRAY, + read_columns=frozenset({"license_priority"}), + ), + ] def _vf_checks() -> list[Check]: @@ -360,21 +539,21 @@ def _vf_checks() -> list[Check]: name="enum", expr=F.when(F.col("theme") != "test", F.lit("bad theme")), shape=CheckShape.SCALAR, - root_field="theme", + read_columns=frozenset({"theme"}), ), Check( field="value", name="required", expr=F.when(F.col("value").isNull(), F.lit("missing")), shape=CheckShape.SCALAR, - root_field="value", + read_columns=frozenset({"value"}), ), Check( field="sources_min_length", name="min_length", expr=F.when(F.length("sources") < 1, F.lit("too short")), shape=CheckShape.SCALAR, - root_field="sources", + read_columns=frozenset({"sources"}), ), ] @@ -382,9 +561,28 @@ def _vf_checks() -> list[Check]: class TestValidateFeature: @pytest.fixture(autouse=True) def _register_vf_type(self) -> Iterator[None]: - REGISTRY[_VF_TYPE] = FeatureValidation(schema=_VF_SCHEMA, checks=_vf_checks) - yield - del REGISTRY[_VF_TYPE] + with register_model(_VF_TYPE, _VF_SCHEMA, _vf_checks): + yield + + @pytest.fixture() + def _register_nested_type(self) -> Iterator[None]: + with register_model(_VF_NESTED_TYPE, _VF_NESTED_SCHEMA, lambda: []): + yield + + @pytest.fixture() + def _register_map_type(self) -> Iterator[None]: + with register_model(_VF_MAP_TYPE, _VF_MAP_SCHEMA, _vf_map_checks): + yield + + @pytest.fixture() + def _register_array_type(self) -> Iterator[None]: + with register_model(_VF_ARRAY_TYPE, _VF_ARRAY_SCHEMA, _vf_array_checks): + yield + + @pytest.fixture() + def _register_model_type(self) -> Iterator[None]: + with register_model(_VF_MODEL_TYPE, _VF_MODEL_SCHEMA, _vf_model_checks): + yield @pytest.fixture() def vf_df(self, spark: SparkSession) -> DataFrame: @@ -398,10 +596,10 @@ def test_unknown_type_raises_value_error(self, spark: SparkSession) -> None: with pytest.raises( ValueError, match="Unknown entry-point alias.*nonexistent_type_xyz" ): - validate_feature(df, "nonexistent_type_xyz") + validate_model(df, "nonexistent_type_xyz") def test_basic_validation(self, vf_df: DataFrame) -> None: - result = validate_feature(vf_df, _VF_TYPE) + result = validate_model(vf_df, _VF_TYPE) assert isinstance(result, ValidationResult) assert result.schema_mismatches == [] assert len(result.checks) == 3 @@ -409,7 +607,7 @@ def test_basic_validation(self, vf_df: DataFrame) -> None: def test_skip_columns_errors_if_present(self, vf_df: DataFrame) -> None: with pytest.raises(ValueError, match="skip_columns.*theme.*present"): - validate_feature(vf_df, _VF_TYPE, skip_columns=["theme"]) + validate_model(vf_df, _VF_TYPE, skip_columns=["theme"]) def test_skip_columns_filters_checks(self, spark: SparkSession) -> None: schema_no_theme = StructType( @@ -419,7 +617,7 @@ def test_skip_columns_filters_checks(self, spark: SparkSession) -> None: [Row(id="1", type=_VF_TYPE, value="ok", sources="s")], schema=schema_no_theme, ) - result = validate_feature(df, _VF_TYPE, skip_columns=["theme"]) + result = validate_model(df, _VF_TYPE, skip_columns=["theme"]) check_fields = [c.field for c in result.checks] assert "theme" not in check_fields assert "value" in check_fields @@ -432,7 +630,7 @@ def test_skip_columns_filters_schema_mismatches(self, spark: SparkSession) -> No [Row(id="1", type=_VF_TYPE, value="ok", sources="s")], schema=schema_no_theme, ) - result = validate_feature(df, _VF_TYPE, skip_columns=["theme"]) + result = validate_model(df, _VF_TYPE, skip_columns=["theme"]) mismatch_fields = [m.path for m in result.schema_mismatches] assert "theme" not in mismatch_fields @@ -453,43 +651,43 @@ def test_ignore_extra_columns(self, spark: SparkSession) -> None: ], schema=schema_extra, ) - result = validate_feature(df, _VF_TYPE, ignore_extra_columns=["extra_score"]) + result = validate_model(df, _VF_TYPE, ignore_extra_columns=["extra_score"]) mismatch_paths = [m.path for m in result.schema_mismatches] assert "extra_score" not in mismatch_paths def test_suppress_unknown_root_raises(self, vf_df: DataFrame) -> None: with pytest.raises(ValueError, match="unknown root fields.*typo_field"): - validate_feature(vf_df, _VF_TYPE, suppress=["typo_field"]) + validate_model(vf_df, _VF_TYPE, suppress=["typo_field"]) def test_suppress_unknown_pair_raises(self, vf_df: DataFrame) -> None: with pytest.raises(ValueError, match=r"unknown \(field, name\) pairs"): - validate_feature(vf_df, _VF_TYPE, suppress=[("theme", "wrong_name")]) + validate_model(vf_df, _VF_TYPE, suppress=[("theme", "wrong_name")]) def test_suppress_mixed_unknown_lists_both(self, vf_df: DataFrame) -> None: with pytest.raises(ValueError, match="unknown root fields.*unknown"): - validate_feature( + validate_model( vf_df, _VF_TYPE, suppress=["typo_field", ("theme", "wrong_name")], ) def test_suppress_bare_string(self, vf_df: DataFrame) -> None: - result = validate_feature(vf_df, _VF_TYPE, suppress=["sources"]) + result = validate_model(vf_df, _VF_TYPE, suppress=["sources"]) check_fields = [c.field for c in result.checks] assert not any(f.startswith("sources") for f in check_fields) assert len(result.suppressed_checks) == 1 assert result.suppressed_checks[0].field == "sources_min_length" def test_suppress_tuple(self, vf_df: DataFrame) -> None: - result = validate_feature(vf_df, _VF_TYPE, suppress=[("value", "required")]) + result = validate_model(vf_df, _VF_TYPE, suppress=[("value", "required")]) check_fields_names = [(c.field, c.name) for c in result.checks] assert ("value", "required") not in check_fields_names assert len(result.suppressed_checks) == 1 def test_suppress_check_object(self, vf_df: DataFrame) -> None: - initial = validate_feature(vf_df, _VF_TYPE) + initial = validate_model(vf_df, _VF_TYPE) target = [c for c in initial.checks if c.name == "required"][0] - result = validate_feature(vf_df, _VF_TYPE, suppress=[target]) + result = validate_model(vf_df, _VF_TYPE, suppress=[target]) # Column objects can't be compared with ==, so compare by (field, name) result_pairs = [(c.field, c.name) for c in result.checks] suppressed_pairs = [(c.field, c.name) for c in result.suppressed_checks] @@ -497,17 +695,17 @@ def test_suppress_check_object(self, vf_df: DataFrame) -> None: assert (target.field, target.name) in suppressed_pairs def test_evaluated_has_err_columns(self, vf_df: DataFrame) -> None: - result = validate_feature(vf_df, _VF_TYPE) + result = validate_model(vf_df, _VF_TYPE) err_cols = [c for c in result.evaluated.columns if c.startswith("_err_")] assert len(err_cols) == len(result.checks) def test_suppressed_checks_not_in_checks(self, vf_df: DataFrame) -> None: - result = validate_feature(vf_df, _VF_TYPE, suppress=[("theme", "enum")]) + result = validate_model(vf_df, _VF_TYPE, suppress=[("theme", "enum")]) for sc in result.suppressed_checks: assert sc not in result.checks def test_all_checks_suppressed(self, vf_df: DataFrame) -> None: - result = validate_feature( + result = validate_model( vf_df, _VF_TYPE, suppress=["theme", "value", "sources"], @@ -517,7 +715,7 @@ def test_all_checks_suppressed(self, vf_df: DataFrame) -> None: def test_missing_column_does_not_raise(self, spark: SparkSession) -> None: # A DataFrame missing a required column causes AnalysisException when - # evaluate_checks references that column. validate_feature must detect + # evaluate_checks references that column. validate_model must detect # structurally absent columns via schema_mismatches and silently drop # the corresponding checks before calling evaluate_checks -- mirroring # the skip_columns path. @@ -528,15 +726,144 @@ def test_missing_column_does_not_raise(self, spark: SparkSession) -> None: [Row(id="1", type=_VF_TYPE, value="ok", sources="s")], schema=schema_no_theme, ) - result = validate_feature(df, _VF_TYPE) + result = validate_model(df, _VF_TYPE) # Must not raise -- returns normally assert isinstance(result, ValidationResult) # Missing column is reported as a schema mismatch mismatch_paths = [m.path for m in result.schema_mismatches] assert "theme" in mismatch_paths - # No check may reference the absent root field - missing_root_fields = {c.root_field for c in result.checks} - assert "theme" not in missing_root_fields + # No kept check may read the absent column + assert all("theme" not in c.read_columns for c in result.checks) # Absent-column checks are silently dropped, not tracked in suppressed - suppressed_root_fields = {c.root_field for c in result.suppressed_checks} - assert "theme" not in suppressed_root_fields + assert all("theme" not in c.read_columns for c in result.suppressed_checks) + + def test_absent_columns_exposed_on_result(self, spark: SparkSession) -> None: + # validate_model must expose absent_columns as an ordered tuple so + # callers (e.g. CLI) don't need to re-derive it from schema_mismatches. + schema_no_theme = StructType( + [f for f in _VF_SCHEMA.fields if f.name != "theme"] + ) + df = spark.createDataFrame( + [Row(id="1", type=_VF_TYPE, value="ok", sources="s")], + schema=schema_no_theme, + ) + result = validate_model(df, _VF_TYPE) + assert result.absent_columns == ("theme",) + + def test_absent_columns_empty_when_schema_matches(self, vf_df: DataFrame) -> None: + result = validate_model(vf_df, _VF_TYPE) + assert result.absent_columns == () + + def test_absent_columns_ordered(self, spark: SparkSession) -> None: + # compare_schemas iterates actual fields first, then expected-only fields + # appended in their expected schema order. value precedes sources in + # _VF_SCHEMA, so absent_columns must be exactly ("value", "sources"). + schema_no_value_no_sources = StructType( + [f for f in _VF_SCHEMA.fields if f.name not in {"value", "sources"}] + ) + df = spark.createDataFrame( + [Row(id="1", theme="test", type=_VF_TYPE)], + schema=schema_no_value_no_sources, + ) + result = validate_model(df, _VF_TYPE) + assert result.absent_columns == ("value", "sources") + + def test_absent_columns_deduplicated( + self, spark: SparkSession, _register_nested_type: None + ) -> None: + # A nested struct column with multiple missing sub-fields must produce + # exactly one root entry in absent_columns, not one per sub-field. + # Schema: id + bbox(xmin, xmax) in the expected schema. + # Data: id + bbox(id) -- both xmin and xmax are absent sub-fields, + # so compare_schemas emits bbox.xmin and bbox.xmax as missing; both + # share root "bbox" and must collapse to a single entry. + bbox_partial = StructType([StructField("id", StringType(), True)]) + data_schema = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", bbox_partial, True), + ] + ) + df = spark.createDataFrame( + [Row(id="1", bbox=Row(id="x"))], + schema=data_schema, + ) + result = validate_model(df, _VF_NESTED_TYPE) + # Two sub-field mismatches (bbox.xmin, bbox.xmax) collapse to one root + assert result.absent_columns == ("bbox",) + + def test_missing_array_nested_field_does_not_raise( + self, spark: SparkSession, _register_array_type: None + ) -> None: + # A field absent from an array element struct yields a mismatch path + # carrying the array step marker (`sources[].confidence`). The absent + # root must be derived by stripping that marker so it matches the + # top-level column (`sources`); the column's checks are then dropped, + # mirroring the top-level graceful-degradation path, rather than + # evaluating an expression that dereferences the absent sub-field. + data_schema = StructType( + [ + StructField("id", StringType(), True), + StructField( + "sources", + ArrayType(StructType([StructField("dataset", StringType(), True)])), + True, + ), + ] + ) + df = spark.createDataFrame( + [Row(id="1", sources=[Row(dataset="osm")])], + schema=data_schema, + ) + result = validate_model(df, _VF_ARRAY_TYPE) + result.row_counts() # forces evaluation; raises if the check is kept + assert result.absent_columns == ("sources",) + assert all("sources" not in c.read_columns for c in result.checks) + + def test_skip_columns_with_map_check_does_not_raise( + self, spark: SparkSession, _register_map_type: None + ) -> None: + # A map key/value check dereferences its map column by name, exactly + # like an array check. Skipping that column must drop the check so + # validation degrades gracefully instead of leaving an unresolvable + # map projection (`map_values_check("license_priority", ...)`) behind. + data_schema = StructType( + [f for f in _VF_MAP_SCHEMA.fields if f.name != "license_priority"] + ) + df = spark.createDataFrame([Row(id="1")], schema=data_schema) + result = validate_model(df, _VF_MAP_TYPE, skip_columns=["license_priority"]) + result.row_counts() # forces evaluation; raises if the map check is kept + assert all("license_priority" not in c.read_columns for c in result.checks) + + def test_suppress_by_model_only_column( + self, spark: SparkSession, _register_model_type: None + ) -> None: + # A column read only by a model-level check is still a valid suppress + # target: suppression is symmetric with absence -- a column droppable + # when absent is suppressible by name. Suppressing it drops the model + # check (and records it as suppressed, not silently absent). + df = spark.createDataFrame( + [Row(id="1", primary_name="p", alt_name="a")], + schema=_VF_MODEL_SCHEMA, + ) + result = validate_model(df, _VF_MODEL_TYPE, suppress=["primary_name"]) + assert all(c.name != "require_any_of" for c in result.checks) + assert any(c.name == "require_any_of" for c in result.suppressed_checks) + + def test_skip_columns_with_model_constraint_does_not_raise( + self, spark: SparkSession, _register_model_type: None + ) -> None: + # Model-level checks read several columns directly. Skipping a column a + # model constraint reads must drop the model check too, so validation + # degrades gracefully instead of leaving an unresolvable column + # reference behind. + data_schema = StructType( + [f for f in _VF_MODEL_SCHEMA.fields if f.name != "primary_name"] + ) + df = spark.createDataFrame( + [Row(id="1", alt_name="x")], + schema=data_schema, + ) + result = validate_model(df, _VF_MODEL_TYPE, skip_columns=["primary_name"]) + result.row_counts() # forces evaluation; raises if the model check is kept + assert all(c.name != "require_any_of" for c in result.checks) diff --git a/packages/overture-schema-system/src/overture/schema/system/field_constraint/field_constraint.py b/packages/overture-schema-system/src/overture/schema/system/field_constraint/field_constraint.py index d96e1cc4f..5967f391b 100644 --- a/packages/overture-schema-system/src/overture/schema/system/field_constraint/field_constraint.py +++ b/packages/overture-schema-system/src/overture/schema/system/field_constraint/field_constraint.py @@ -8,15 +8,62 @@ one of the peer modules that implements a specific constraint type. """ +import re from abc import ABC, abstractmethod +from collections.abc import Mapping from typing import Any from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler, ValidationInfo from pydantic_core import core_schema +def _normalized(value: object) -> object: + """Reduce a constraint attribute to a hashable, value-stable form. + + A compiled `re.Pattern` carries identity equality -- two patterns built + from the same source compare unequal -- so it reduces to `(pattern, flags)`. + Containers reduce to sorted tuples so equal contents hash equal regardless + of insertion order. The sort by repr is stable because `FieldConstraint` + requires attribute values to be value types (see its docstring), so every + leaf reduces to a value-stable form. + """ + if isinstance(value, re.Pattern): + return (value.pattern, value.flags) + if isinstance(value, Mapping): + return tuple(sorted(((k, _normalized(v)) for k, v in value.items()), key=repr)) + if isinstance(value, (list, tuple)): + return tuple(_normalized(v) for v in value) + if isinstance(value, (set, frozenset)): + return tuple(sorted((_normalized(v) for v in value), key=repr)) + return value + + class FieldConstraint(ABC): - """Base class for field-level constraints.""" + """Base class for field-level constraints. + + Constraints are value objects: two instances of the same concrete type + carrying the same attributes are equal and hash equal, so a set of + constraints deduplicates by rule. Equality keys on the concrete type, so a + fixed-pattern subclass never equals a raw `PatternConstraint` with the same + pattern. + + Subclass attributes participate in equality and hashing, so they must be + value types -- scalars, `re.Pattern`, or containers of these. An attribute + that compares by object identity leaves equality ill-defined. + """ + + def __eq__(self, other: object) -> bool: + if not isinstance(other, FieldConstraint) or type(self) is not type(other): + return NotImplemented + return self._identity() == other._identity() + + def __hash__(self) -> int: + return hash((type(self), self._identity())) + + def _identity(self) -> tuple[tuple[str, object], ...]: + return tuple( + (name, _normalized(value)) for name, value in sorted(vars(self).items()) + ) def validate(self, value: Any, info: ValidationInfo) -> None: # noqa: B027 """Validate the value and raise `ValidationError` if invalid.""" diff --git a/packages/overture-schema-system/src/overture/schema/system/field_path.py b/packages/overture-schema-system/src/overture/schema/system/field_path.py index a63a0265a..7b60fea56 100644 --- a/packages/overture-schema-system/src/overture/schema/system/field_path.py +++ b/packages/overture-schema-system/src/overture/schema/system/field_path.py @@ -1,6 +1,6 @@ """Structural representation of a field path through a nested schema. -A `FieldPath` is one of two variants: +A `FieldPath` is one of three variants: - `ScalarPath` -- a sequence of `StructSegment` values locating a value that requires no iteration to reach. @@ -11,6 +11,13 @@ segments encode nested-list iteration without an intervening struct, e.g. `list[list[X]]` parses as a single `ArraySegment` with `iter_count=2`). +- `MapPath` -- struct segments leading to a map column, a single + `MapSegment` projecting the map to its keys or values, then a struct-only + leaf (possibly empty). Locates a value reached by iterating a + `dict[K, V]`'s keys or values, encoded with a `{key}` / `{value}` marker + on the map column and the leaf appended after it (e.g. `names.common{key}` + for a scalar value, `subs{value}.label` for a field inside a + `dict[K, Model]` value). The canonical string form (`str(path)`) round-trips through `parse`. Code that needs to emit a path into source or labels calls `str(path)` @@ -20,18 +27,24 @@ from __future__ import annotations from dataclasses import dataclass +from enum import Enum from typing import TypeAlias __all__ = [ "ArrayPath", "ArraySegment", "FieldPath", + "FieldSegment", + "MapPath", + "MapProjection", + "MapSegment", "PathSegment", "ScalarPath", "StructSegment", "coerce", "parse", "promote_terminal_array", + "promote_terminal_map", ] @@ -55,6 +68,26 @@ class ArraySegment: iter_count: int = 1 +class MapProjection(Enum): + """Which side of a `dict[K, V]` a `MapSegment` iterates.""" + + KEY = "key" + VALUE = "value" + + +@dataclass(frozen=True, slots=True) +class MapSegment: + """A map column entered by projecting to its keys or values. + + `projection` selects keys or values; the projected side is iterated + like an array, so checks on a `MapSegment` render through the same + element machinery as `ArraySegment`. + """ + + name: str + projection: MapProjection + + PathSegment: TypeAlias = StructSegment | ArraySegment @@ -165,8 +198,8 @@ def element_relative_gate(self, gate: FieldPath) -> tuple[str, ...] | None: itself; the element variable IS the gated value. - ``None`` -- "not reachable": `gate` does not cross into this path's element scope (different outer array, scalar gate, - mismatched struct prefix, etc.). Callers must apply the gate - at column level instead. + mismatched struct prefix, mismatched boundary `iter_count`, + etc.). Callers must apply the gate at column level instead. Raises `NotImplementedError` when `gate` enters the same outer array but contains a nested `ArraySegment` past the boundary; @@ -188,11 +221,14 @@ def element_relative_gate(self, gate: FieldPath) -> tuple[str, ...] | None: return None if gate_segs[i].name != column_prefix[i].name: return None - target_first_array_name = self.segments[n_prefix].name + target_boundary = self.segments[n_prefix] + assert isinstance(target_boundary, ArraySegment) gate_boundary = gate_segs[n_prefix] if not isinstance(gate_boundary, ArraySegment): return None - if gate_boundary.name != target_first_array_name: + if gate_boundary.name != target_boundary.name: + return None + if gate_boundary.iter_count != target_boundary.iter_count: return None inner_segments = gate_segs[n_prefix + 1 :] for seg in inner_segments: @@ -230,7 +266,79 @@ def __str__(self) -> str: return ".".join(_segment_str(s) for s in self.segments) -FieldPath: TypeAlias = ScalarPath | ArrayPath +@dataclass(frozen=True, slots=True) +class MapPath: + """Locate a value inside a map's keys or values via one `MapSegment`. + + Invariant: `segments` is a struct prefix, exactly one `MapSegment` + boundary, then a struct-only leaf (possibly empty). The `MapSegment` + iterates the projected keys or values like an array; the leaf navigates + structs inside each projected element, mirroring `ArrayPath.leaf` for a + `list[Model]`. An empty leaf locates the projected scalar itself + (`dict[K, scalar]`); a non-empty leaf locates a field inside a + `dict[K, Model]` value (or key). + + The map must be reachable without array iteration, and the leaf must be + struct-only -- a map nested inside an array element or a container + nested inside a map element is not representable (and + `promote_terminal_map` / `promote_terminal_array` raise rather than + fabricate one). + """ + + segments: tuple[StructSegment | MapSegment, ...] + + def __post_init__(self) -> None: + map_count = sum(isinstance(s, MapSegment) for s in self.segments) + if map_count != 1: + raise ValueError("MapPath must contain exactly one MapSegment") + if not all(isinstance(s, (StructSegment, MapSegment)) for s in self.segments): + raise ValueError("MapPath segments outside the map must be struct segments") + + @property + def _map_index(self) -> int: + return next(i for i, s in enumerate(self.segments) if isinstance(s, MapSegment)) + + @property + def projection(self) -> MapProjection: + seg = self.segments[self._map_index] + assert isinstance(seg, MapSegment) + return seg.projection + + @property + def map_column(self) -> str: + """Dotted name of the map column (struct prefix + map field name). + + This is what `F.col(...)` consumes; the `{key}` / `{value}` marker + and the leaf belong to `str(path)`, not to the column reference. + """ + return ".".join(s.name for s in self.segments[: self._map_index + 1]) + + @property + def leaf(self) -> tuple[str, ...]: + """Names of struct segments after the `MapSegment`. + + Empty for a bare key/value projection; the field path inside each + projected element otherwise. + """ + return tuple(s.name for s in self.segments[self._map_index + 1 :]) + + def append_struct(self, name: str) -> MapPath: + return MapPath(segments=self.segments + (StructSegment(name=name),)) + + def __str__(self) -> str: + base = f"{self.map_column}{{{self.projection.value}}}" + return base + "".join(f".{n}" for n in self.leaf) + + +FieldPath: TypeAlias = ScalarPath | ArrayPath | MapPath + + +# The element type of any `FieldPath.segments`, across all three variants. +# Broader than `PathSegment` (array/scalar paths only): a `MapPath` adds a +# trailing `MapSegment`. Consumers that walk an arbitrary `FieldPath`'s +# segments -- rather than a statically known `ArrayPath` -- annotate with +# this so a `MapSegment` is not a type error. +FieldSegment: TypeAlias = StructSegment | ArraySegment | MapSegment def _segment_str(seg: PathSegment) -> str: @@ -239,35 +347,69 @@ def _segment_str(seg: PathSegment) -> str: return seg.name +def _strip_map_suffix(part: str) -> MapProjection | None: + """Return the `MapProjection` named by a trailing `{key}`/`{value}`, or None.""" + for proj in MapProjection: + if part.endswith(f"{{{proj.value}}}"): + return proj + return None + + def parse(encoded: str) -> FieldPath: """Parse a canonical encoded path like `"items[].nested.value"`. Trailing `[]` markers on a dotted part produce an `ArraySegment` - with matching `iter_count`. The empty string returns the empty - `ScalarPath`. Raises `ValueError` when any dotted part has an empty - name (e.g. `".a"`, `"a..b"`, `"[]"`). + with matching `iter_count`; a `{key}`/`{value}` marker produces a + `MapSegment` (and a `MapPath`), with any dotted parts after it forming + the map's struct leaf (e.g. `subs{value}.label`). The empty string + returns the empty `ScalarPath`. Raises `ValueError` when any dotted + part has an empty name (e.g. `".a"`, `"a..b"`, `"[]"`), when more than + one map marker appears, or when an array marker combines with a map + projection (`dict[K, list[V]]` is not representable as a `MapPath`). """ if not encoded: return ScalarPath() - segments: list[PathSegment] = [] + segments: list[StructSegment | ArraySegment | MapSegment] = [] struct_segments: list[StructSegment] = [] has_array = False - for part in encoded.split("."): + map_seen = False + parts = encoded.split(".") + for part in parts: + projection = _strip_map_suffix(part) + if projection is not None: + if map_seen: + raise ValueError(f"FieldPath has multiple map markers in {encoded!r}") + part = part[: -(len(projection.value) + 2)] depth = 0 while part.endswith("[]"): part = part[:-2] depth += 1 if not part: raise ValueError(f"FieldPath part has empty name in {encoded!r}") - if depth > 0: + if projection is not None: + if depth > 0: + raise ValueError( + f"map projection marker cannot follow array markers in {encoded!r}" + ) + map_seen = True + segments.append(MapSegment(name=part, projection=projection)) + elif depth > 0: has_array = True segments.append(ArraySegment(name=part, iter_count=depth)) else: struct = StructSegment(name=part) segments.append(struct) struct_segments.append(struct) + if map_seen: + if has_array: + raise ValueError( + f"map projection cannot combine with array markers in {encoded!r}" + ) + return MapPath(segments=tuple(segments)) # type: ignore[arg-type] if has_array: - return ArrayPath(segments=tuple(segments)) + # No MapSegment reached this branch (map_seen is False), so the + # tuple holds only Struct/Array segments. + return ArrayPath(segments=tuple(segments)) # type: ignore[arg-type] return ScalarPath(segments=tuple(struct_segments)) @@ -289,13 +431,38 @@ def promote_terminal_array(path: FieldPath) -> ArrayPath: build the multi-iteration terminal of a `list[list[X]]` field. Raises `ValueError` on an empty path: there is no terminal segment - to promote. + to promote. Raises `NotImplementedError` for a `MapPath`: a list nested + inside a map element has no representable path, so the gap stays loud. """ if not path.segments: raise ValueError("cannot promote the terminal of an empty path") + if isinstance(path, MapPath): + raise NotImplementedError("list nested inside a map element is not supported") *prefix, last = path.segments if isinstance(last, ArraySegment): promoted = ArraySegment(name=last.name, iter_count=last.iter_count + 1) else: promoted = ArraySegment(name=last.name, iter_count=1) return ArrayPath(segments=(*prefix, promoted)) + + +def promote_terminal_map(path: FieldPath, projection: MapProjection) -> MapPath: + """Promote *path*'s terminal struct segment to a `MapSegment`. + + Records a walker entering a `dict[K, V]` layer on the field it already + points at, projecting to keys or values. Raises `ValueError` on an + empty path and `NotImplementedError` when the map is reached through + array iteration or already projects another map -- a map nested inside + an array element or another map element has no schema field today and + no representable `MapPath`, so the gap stays loud. + """ + if not path.segments: + raise ValueError("cannot promote the terminal of an empty path") + if isinstance(path, ArrayPath): + raise NotImplementedError("map nested under a list layer is not supported") + if isinstance(path, MapPath): + raise NotImplementedError("map nested inside a map element is not supported") + *prefix, last = path.segments + return MapPath( + segments=(*prefix, MapSegment(name=last.name, projection=projection)) # type: ignore[arg-type] + ) diff --git a/packages/overture-schema-system/tests/field_constraint/test_constraint_equality.py b/packages/overture-schema-system/tests/field_constraint/test_constraint_equality.py new file mode 100644 index 000000000..c723df812 --- /dev/null +++ b/packages/overture-schema-system/tests/field_constraint/test_constraint_equality.py @@ -0,0 +1,109 @@ +"""Value-equality semantics for `FieldConstraint` subclasses. + +Two constraints of the same concrete type with the same attributes are equal +and hash equal, so a set of constraints deduplicates by rule rather than by +object identity. Equality keys on the concrete type, so a subclass with a fixed +pattern never equals a raw `PatternConstraint` carrying the same pattern. +""" + +import re +from typing import Any + +from pydantic import GetCoreSchemaHandler +from pydantic_core import core_schema + +from overture.schema.system.field_constraint import ( + CountryCodeAlpha2Constraint, + FieldConstraint, + HexColorConstraint, + JsonPointerConstraint, + PatternConstraint, + UniqueItemsConstraint, +) + + +class TestMarkerConstraintEquality: + def test_equal_instances_compare_and_hash_equal(self) -> None: + a, b = UniqueItemsConstraint(), UniqueItemsConstraint() + assert a == b + assert hash(a) == hash(b) + assert len({a, b}) == 1 + + def test_distinct_marker_classes_unequal(self) -> None: + assert UniqueItemsConstraint() != JsonPointerConstraint() + + +class TestParametricConstraintEquality: + def test_fixed_pattern_subclass_instances_equal(self) -> None: + a, b = CountryCodeAlpha2Constraint(), CountryCodeAlpha2Constraint() + assert a == b + assert hash(a) == hash(b) + assert len({a, b}) == 1 + + def test_distinct_pattern_subclasses_unequal(self) -> None: + assert CountryCodeAlpha2Constraint() != HexColorConstraint() + + def test_equal_raw_patterns_collapse(self) -> None: + a = PatternConstraint(r"^[a-z]+$", "err") + b = PatternConstraint(r"^[a-z]+$", "err") + assert a == b + assert hash(a) == hash(b) + + def test_pattern_flags_distinguish(self) -> None: + a = PatternConstraint(r"^[a-z]+$", "err") + b = PatternConstraint(r"^[a-z]+$", "err", re.IGNORECASE) + assert a != b + + def test_subclass_not_equal_to_base_with_same_state(self) -> None: + """A fixed-pattern subclass is a distinct rule from a raw equivalent.""" + country = CountryCodeAlpha2Constraint() + raw = PatternConstraint( + country.pattern.pattern, + country.error_message, + description=country.description, + min_length=country.min_length, + max_length=country.max_length, + ) + assert country != raw + + +class _ListConstraint(FieldConstraint): + """Test-only constraint with a list-valued attribute.""" + + def __init__(self, items: list[str]) -> None: + self.items = list(items) + + def __get_pydantic_core_schema__( + self, source: type[Any], handler: GetCoreSchemaHandler + ) -> core_schema.CoreSchema: + return handler(source) + + +class _DictConstraint(FieldConstraint): + """Test-only constraint with a dict-valued attribute.""" + + def __init__(self, mapping: dict[str, int]) -> None: + self.mapping = dict(mapping) + + def __get_pydantic_core_schema__( + self, source: type[Any], handler: GetCoreSchemaHandler + ) -> core_schema.CoreSchema: + return handler(source) + + +class TestContainerValuedAttributes: + """A future constraint with a container attribute stays a hashable value.""" + + def test_equal_list_attr_instances_collapse(self) -> None: + a, b = _ListConstraint(["a", "b"]), _ListConstraint(["a", "b"]) + assert a == b + assert len({a, b}) == 1 + + def test_distinct_list_attr_instances_unequal(self) -> None: + assert _ListConstraint(["a", "b"]) != _ListConstraint(["a", "c"]) + + def test_equal_dict_attr_instances_collapse(self) -> None: + a = _DictConstraint({"x": 1, "y": 2}) + b = _DictConstraint({"y": 2, "x": 1}) + assert a == b + assert len({a, b}) == 1 diff --git a/packages/overture-schema-system/tests/test_field_path.py b/packages/overture-schema-system/tests/test_field_path.py index 0b3614ebf..4c10c5e3f 100644 --- a/packages/overture-schema-system/tests/test_field_path.py +++ b/packages/overture-schema-system/tests/test_field_path.py @@ -9,11 +9,15 @@ from overture.schema.system.field_path import ( ArrayPath, ArraySegment, + MapPath, + MapProjection, + MapSegment, ScalarPath, StructSegment, coerce, parse, promote_terminal_array, + promote_terminal_map, ) @@ -189,6 +193,10 @@ def test_empty_path_raises(self) -> None: with pytest.raises(ValueError, match="empty path"): promote_terminal_array(ScalarPath()) + def test_map_path_raises(self) -> None: + with pytest.raises(NotImplementedError, match="map"): + promote_terminal_array(parse("subs{value}.inner")) + class TestColumnPrefix: def test_array_at_start_has_empty_prefix(self) -> None: @@ -326,6 +334,21 @@ def test_inner_array_segment_raises(self) -> None: with pytest.raises(NotImplementedError, match="nested array segment"): target.element_relative_gate(gate) + def test_mismatched_iter_count_returns_none(self) -> None: + # target iterates items[] (iter_count=1); gate enters items[][] (iter_count=2) + # -- same name, different iteration depth -- not the same element scope + target = parse("items[].value") + gate = parse("items[][].nested") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) is None + + def test_matching_iter_count_still_returns_element_relative_tuple(self) -> None: + # regression: matching iter_count must remain reachable after the fix + target = parse("items[][].value") + gate = parse("items[][].nested") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) == ("nested",) + class TestArrayPathInvariant: def test_rejects_segments_without_array(self) -> None: @@ -364,6 +387,222 @@ def test_parses_string(self) -> None: assert coerce("items[].value") == parse("items[].value") +class TestMapPath: + def test_str_top_level_key(self) -> None: + path = MapPath( + segments=(MapSegment(name="tags", projection=MapProjection.KEY),) + ) + assert str(path) == "tags{key}" + + def test_str_top_level_value(self) -> None: + path = MapPath( + segments=(MapSegment(name="tags", projection=MapProjection.VALUE),) + ) + assert str(path) == "tags{value}" + + def test_str_nested_under_struct(self) -> None: + path = MapPath( + segments=( + StructSegment(name="names"), + MapSegment(name="common", projection=MapProjection.KEY), + ) + ) + assert str(path) == "names.common{key}" + + def test_projection_property(self) -> None: + path = MapPath( + segments=(MapSegment(name="tags", projection=MapProjection.VALUE),) + ) + assert path.projection is MapProjection.VALUE + + def test_map_column_top_level(self) -> None: + path = MapPath( + segments=(MapSegment(name="tags", projection=MapProjection.KEY),) + ) + assert path.map_column == "tags" + + def test_map_column_nested(self) -> None: + path = MapPath( + segments=( + StructSegment(name="names"), + MapSegment(name="common", projection=MapProjection.VALUE), + ) + ) + assert path.map_column == "names.common" + + def test_must_contain_a_map_segment(self) -> None: + with pytest.raises(ValueError, match="MapSegment"): + MapPath(segments=(StructSegment(name="names"),)) + + def test_rejects_array_segment_before_map(self) -> None: + with pytest.raises(ValueError, match="struct"): + MapPath( + segments=( # type: ignore[arg-type] # invalid by design: runtime guard under test + ArraySegment(name="items"), + MapSegment(name="tags", projection=MapProjection.KEY), + ) + ) + + def test_rejects_two_map_segments(self) -> None: + with pytest.raises(ValueError, match="MapSegment"): + MapPath( + segments=( + MapSegment(name="a", projection=MapProjection.KEY), + MapSegment(name="b", projection=MapProjection.VALUE), + ) + ) + + @pytest.mark.parametrize( + "encoded", + ["tags{key}", "tags{value}", "names.common{key}", "names.common{value}"], + ) + def test_str_round_trip(self, encoded: str) -> None: + assert str(parse(encoded)) == encoded + + def test_parse_returns_map_path(self) -> None: + assert isinstance(parse("names.common{key}"), MapPath) + + def test_parse_key(self) -> None: + assert parse("tags{key}") == MapPath( + segments=(MapSegment(name="tags", projection=MapProjection.KEY),) + ) + + def test_parse_nested_value(self) -> None: + assert parse("names.common{value}") == MapPath( + segments=( + StructSegment(name="names"), + MapSegment(name="common", projection=MapProjection.VALUE), + ) + ) + + +class TestMapPathLeaf: + """A `MapPath` may carry struct segments after the `MapSegment`. + + These name a value inside a `dict[K, Model]`'s value (or key) struct, + mirroring `ArrayPath.leaf` for `list[Model]`. The `MapSegment` is the + iteration boundary; the leaf is the struct navigation inside each + projected element. + """ + + def test_leaf_empty_for_bare_projection(self) -> None: + path = MapPath( + segments=(MapSegment(name="subs", projection=MapProjection.VALUE),) + ) + assert path.leaf == () + + def test_leaf_names_struct_segments_after_map(self) -> None: + path = MapPath( + segments=( + MapSegment(name="subs", projection=MapProjection.VALUE), + StructSegment(name="label"), + ) + ) + assert path.leaf == ("label",) + + def test_leaf_spans_nested_struct_navigation(self) -> None: + path = MapPath( + segments=( + MapSegment(name="subs", projection=MapProjection.VALUE), + StructSegment(name="inner"), + StructSegment(name="label"), + ) + ) + assert path.leaf == ("inner", "label") + + def test_map_column_excludes_leaf(self) -> None: + path = MapPath( + segments=( + StructSegment(name="names"), + MapSegment(name="common", projection=MapProjection.VALUE), + StructSegment(name="label"), + ) + ) + assert path.map_column == "names.common" + + def test_projection_found_with_leaf_present(self) -> None: + path = MapPath( + segments=( + MapSegment(name="subs", projection=MapProjection.KEY), + StructSegment(name="label"), + ) + ) + assert path.projection is MapProjection.KEY + + def test_str_appends_leaf_after_marker(self) -> None: + path = MapPath( + segments=( + MapSegment(name="subs", projection=MapProjection.VALUE), + StructSegment(name="label"), + ) + ) + assert str(path) == "subs{value}.label" + + def test_append_struct_extends_leaf(self) -> None: + path = MapPath( + segments=(MapSegment(name="subs", projection=MapProjection.VALUE),) + ) + extended = path.append_struct("label") + assert extended == MapPath( + segments=( + MapSegment(name="subs", projection=MapProjection.VALUE), + StructSegment(name="label"), + ) + ) + + def test_rejects_array_segment_in_leaf(self) -> None: + with pytest.raises(ValueError, match="struct"): + MapPath( + segments=( # type: ignore[arg-type] # invalid by design: runtime guard under test + MapSegment(name="subs", projection=MapProjection.VALUE), + ArraySegment(name="items"), + ) + ) + + @pytest.mark.parametrize( + "encoded", + ["subs{value}.label", "names.common{key}.tag", "subs{value}.inner.label"], + ) + def test_str_round_trip_with_leaf(self, encoded: str) -> None: + assert str(parse(encoded)) == encoded + + def test_parse_value_with_leaf(self) -> None: + assert parse("subs{value}.label") == MapPath( + segments=( + MapSegment(name="subs", projection=MapProjection.VALUE), + StructSegment(name="label"), + ) + ) + + def test_parse_rejects_array_marker_in_leaf(self) -> None: + with pytest.raises(ValueError, match="map projection"): + parse("subs{value}.items[]") + + +class TestPromoteTerminalMap: + def test_top_level_struct_becomes_map_key(self) -> None: + assert promote_terminal_map(parse("tags"), MapProjection.KEY) == parse( + "tags{key}" + ) + + def test_struct_prefix_preserved_for_value(self) -> None: + assert promote_terminal_map( + parse("names.common"), MapProjection.VALUE + ) == parse("names.common{value}") + + def test_empty_path_raises(self) -> None: + with pytest.raises(ValueError, match="empty path"): + promote_terminal_map(ScalarPath(), MapProjection.KEY) + + def test_array_path_raises(self) -> None: + with pytest.raises(NotImplementedError, match="list"): + promote_terminal_map(parse("items[].tags"), MapProjection.KEY) + + def test_map_path_raises(self) -> None: + with pytest.raises(NotImplementedError, match="map"): + promote_terminal_map(parse("subs{value}.inner"), MapProjection.VALUE) + + class TestParseRejectsEmptyParts: @pytest.mark.parametrize("encoded", [".a", "a..b", "[]", "a.[]", ".[]"]) def test_raises_value_error_on_empty_part(self, encoded: str) -> None: diff --git a/uv.lock b/uv.lock index e1cfed99f..4c896c25c 100644 --- a/uv.lock +++ b/uv.lock @@ -842,6 +842,7 @@ dependencies = [ { name = "overture-schema-common" }, { name = "overture-schema-system" }, { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions" }, ] [package.dev-dependencies] @@ -862,6 +863,7 @@ requires-dist = [ { name = "overture-schema-common", editable = "packages/overture-schema-common" }, { name = "overture-schema-system", editable = "packages/overture-schema-system" }, { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0" }, + { name = "typing-extensions", specifier = ">=4.0" }, ] [package.metadata.requires-dev] From d39b1c2607627027807a785f96599ba763ddb26f Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Wed, 24 Jun 2026 14:42:18 -0700 Subject: [PATCH 10/11] refactor(pyspark): unify decision logic, harden checks Consolidate duplicated decision logic behind single arbiters and close correctness gaps across the PySpark codegen and runtime. Runtime: - check_geometry_type flags a WKB blob too short to hold a full type word as a violation, gating on hex length (a valid header is 5 bytes: a 1-byte order flag plus a 4-byte type word). A length gate, not a null test, is required: conv() returns NULL only for a 0-1 byte blob, while a 2-4 byte blob parses a truncated header into a non-null bogus type (b"\x01\x01" reads as the Point code) that would otherwise pass. - check_bounds skips the NaN guard for integer columns via a check_nan flag. An integer column cannot be NaN, so the guard was dead work; this drops two casts and an isnan call at 54 integer bound sites across 16 generated files. Codegen correctness (latent on current schemas): - analyze_type preserves a list element's description when the element carries the field's only prose. - A model constraint on a dict[K, Model] value generates a test that mutates the map value rather than the row root, so the invalid row trips the violation it claims to test. Codegen consolidation (generated output unchanged): - Derive Check/ModelCheck read_columns from the IR instead of a regex over rendered source. Each FieldPath, Guard, and constraint variant names its column sources structurally and raises on any unhandled variant, replacing the regex's silent incompleteness. A test cross-checks the IR-derived columns against the rendered source across every real model, guarding the renderer/IR coupling the regex could not desync from. - Route every map-shape decision through classify_map_projection, the single arbiter of representable map projections. - Route map-side and NewType underlying-type linking through _scalar_identity, the single linkable-identity predicate. - Share primitive fill values through PRIMITIVE_FILL_TABLE across the three SparkCategory consumers. - Extract _top_level for dotted-name collapsing and _reject_struct_only_prefix for the struct-nested guards. Imports: - Hoist function-local imports to module top level project-wide and enforce it through ruff PLC0415. The lone deliberate cycle-breaker (extract_union in model_extraction) keeps a documented noqa. Also folds in deferred review nits: a stronger bounds-kwargs test, split map-projection rejection messages, and test-module reordering. Signed-off-by: Seth Fitzsimmons --- .../codegen/extraction/model_extraction.py | 2 +- .../codegen/extraction/type_analyzer.py | 7 +- .../schema/codegen/markdown/type_format.py | 37 +- .../schema/codegen/pyspark/_primitive_fill.py | 23 ++ .../schema/codegen/pyspark/check_builder.py | 142 +++++--- .../schema/codegen/pyspark/check_ir.py | 150 +++++++- .../codegen/pyspark/constraint_dispatch.py | 9 +- .../schema/codegen/pyspark/renderer.py | 52 +-- .../codegen/pyspark/test_data/base_row.py | 8 +- .../codegen/pyspark/test_data/scaffold.py | 19 +- .../schema/codegen/pyspark/test_renderer.py | 84 +++-- .../tests/test_markdown_type_format.py | 72 +++- .../tests/test_model_extractor.py | 13 +- .../tests/test_newtype_extraction.py | 7 +- .../tests/test_pyspark_base_row.py | 31 +- .../tests/test_pyspark_check_builder.py | 90 ++++- .../tests/test_pyspark_constraint_dispatch.py | 31 ++ .../tests/test_pyspark_e2e.py | 3 +- .../tests/test_pyspark_renderer.py | 339 +++++++++++++----- .../tests/test_pyspark_scaffold.py | 20 +- .../tests/test_pyspark_test_renderer.py | 143 ++++++++ .../tests/test_reverse_references.py | 13 +- .../tests/test_type_analyzer.py | 34 +- .../tests/test_type_collection.py | 5 +- .../expressions/constraint_expressions.py | 36 +- .../overture/schema/addresses/address.py | 2 +- .../overture/schema/annex/sources.py | 4 +- .../overture/schema/base/bathymetry.py | 16 +- .../overture/schema/base/infrastructure.py | 2 +- .../generated/overture/schema/base/land.py | 4 +- .../overture/schema/base/land_cover.py | 14 +- .../overture/schema/base/land_use.py | 4 +- .../generated/overture/schema/base/water.py | 2 +- .../overture/schema/buildings/building.py | 8 +- .../schema/buildings/building_part.py | 8 +- .../overture/schema/divisions/division.py | 20 +- .../schema/divisions/division_area.py | 6 +- .../schema/divisions/division_boundary.py | 6 +- .../generated/overture/schema/places/place.py | 2 +- .../schema/transportation/connector.py | 2 +- .../overture/schema/transportation/segment.py | 20 +- .../tests/_support/mutations.py | 101 ++++-- .../test_constraint_expressions.py | 64 ++++ .../tests/test_mutations.py | 75 ++++ pyproject.toml | 1 + 45 files changed, 1327 insertions(+), 404 deletions(-) create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_primitive_fill.py diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py index 809d9569e..1f4a28167 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py @@ -235,7 +235,7 @@ def resolve_union( ) -> UnionRef: # Late import: extract_union calls back into extract_model for # member classes. A module-level import would be a cycle. - from .union_extraction import extract_union + from .union_extraction import extract_union # noqa: PLC0415 # Recover the union alias name: `analyze_type` reaches the # union via `members[0].__name__` when the alias name is lost diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py index 55cd12715..ec1e5d353 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py @@ -343,8 +343,11 @@ def _recurse( args = get_args(annotation) if not args: raise TypeError("Bare list without type argument is not supported") - element, _, _ = _recurse(args[0], newtype_ctx) - return ArrayOf(element=element, constraints=()), False, None + element, _, desc = _recurse(args[0], newtype_ctx) + # A list field is never optional on account of element nullability, + # so the element's `is_optional` is dropped; its description is the + # field's fallback prose when no field-level description exists. + return ArrayOf(element=element, constraints=()), False, desc if origin is dict: args = get_args(annotation) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py index 4a445ebbc..55dde02cc 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py @@ -213,13 +213,13 @@ def _map_side(shape: FieldShape, ctx: LinkContext | None) -> tuple[str, bool]: def _map_side_link(shape: FieldShape, ctx: LinkContext | None) -> str | None: """Return a markdown link for a map key/value that has its own page. - Links a semantic NewType, a model (`ModelRef`), or an Enum / - BaseModel-sourced primitive when `ctx` resolves a page for it. - NewType and primitive sides link through `list<...>` layers; a model - side links only when it is the direct map side (`depth == 0`), so a - `list`-valued map keeps its `list<...>` wrapper from - `_bare_map_side_name` rather than collapsing to a bare model link. - Returns None when the side has no page; the caller renders a bare + Links a semantic NewType, a model (`ModelRef`), or a primitive whose + `source_type` is a linkable identity (`_scalar_identity`), when `ctx` + resolves a page for it. NewType and primitive sides link through + `list<...>` layers; a model side links only when it is the direct map + side (`depth == 0`), so a `list`-valued map keeps its `list<...>` + wrapper from `_bare_map_side_name` rather than collapsing to a bare model + link. Returns None when the side has no page; the caller renders a bare name instead. """ identity: TypeIdentity | None = None @@ -228,12 +228,8 @@ def _map_side_link(shape: FieldShape, ctx: LinkContext | None) -> str | None: identity = TypeIdentity(cur.ref, cur.name) elif depth == 0 and isinstance(cur, ModelRef): identity = _model_ref_identity(cur) - elif isinstance(cur, Primitive) and cur.source_type is not None: - src = cur.source_type - if isinstance(src, type) and ( - issubclass(src, Enum) or issubclass(src, BaseModel) - ): - identity = TypeIdentity(src, cur.base_type) + elif isinstance(cur, Primitive): + identity = _scalar_identity(cur) if identity and ctx: href = ctx.resolve_link(identity) if href: @@ -363,16 +359,13 @@ def format_underlying_type(shape: FieldShape, ctx: LinkContext | None = None) -> if isinstance(terminal, MapOf): return _format_map(terminal, ctx) - # For underlying-type rendering on a NewType's own page, skip the - # is_semantic_newtype path to avoid self-linking: this shape - # belongs to the NewType being rendered. + # Link by the terminal primitive's identity, not the enclosing NewType's: + # this shape belongs to the NewType being rendered, so linking its own + # identity would self-link. The terminal's identity is always its + # underlying primitive (Geometry/BBox, a pydantic type, etc.). identity: TypeIdentity | None = None - if isinstance(terminal, Primitive) and terminal.source_type is not None: - src = terminal.source_type - if isinstance(src, type) and ( - issubclass(src, Enum) or issubclass(src, BaseModel) - ): - identity = TypeIdentity.of(src) + if isinstance(terminal, Primitive): + identity = _scalar_identity(terminal) depth, _ = _peel_arrays(shape) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_primitive_fill.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_primitive_fill.py new file mode 100644 index 000000000..17f1595e0 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_primitive_fill.py @@ -0,0 +1,23 @@ +"""Shared fill-value table for non-string scalar Spark categories. + +Maps each SparkCategory that requires an explicit fill value to a +`(source_literal, runtime_value)` pair. The source literal is a valid +Python expression string; the runtime value is the corresponding Python +object. + +Consumers +--------- +- `constraint_dispatch._needs_explicit_fill`: category in PRIMITIVE_FILL_TABLE +- `test_renderer._fill_value_literal`: PRIMITIVE_FILL_TABLE[category][0] +- `test_data.base_row._primitive_default`: PRIMITIVE_FILL_TABLE[category][1] + +Adding a new numeric category here automatically wires it into all three. +""" + +from ..extraction.type_registry import SparkCategory + +PRIMITIVE_FILL_TABLE: dict[SparkCategory, tuple[str, object]] = { + "int": ("0", 0), + "float": ("0.0", 0.0), + "bool": ("False", False), +} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py index f2c3b6dda..b1dc017f6 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py @@ -323,6 +323,70 @@ def _terminal_scalar_checks( return [] +@dataclass(frozen=True) +class MapProjectionVerdict: + """Whether a map's projected key/value shape is representable as a `MapPath`. + + `reason` names why an unrepresentable shape was rejected (for the + `NotImplementedError` message); it is `None` when `representable` is True. + `has_value_to_validate` reports whether the projected shape carries a + constraint or descends into a model -- the loud/quiet discriminator: an + unrepresentable shape with something to validate raises, an unrepresentable + shape with nothing to validate is silently dropped. + """ + + representable: bool + reason: str | None + has_value_to_validate: bool + + +def classify_map_projection( + sub_shape: FieldShape, + map_path: FieldPath, +) -> MapProjectionVerdict: + """Classify a map's projected key/value shape against the representable bound. + + The single source of truth for which map projections a `MapPath` can + locate. The representable shape: a scalar terminal or `ModelRef`/`UnionRef` + terminal, reached WITHOUT array iteration (`map_path` is not an + `ArrayPath`), with no `ArrayOf` layer in the projected shape. Both + `_map_projection_checks` (shape-level) and the path-level guards in + `field_path.py` (`promote_terminal_map` rejecting an `ArrayPath`) enforce + this boundary; this classifier states it once so the prohibitions agree by + construction rather than by parallel maintenance. + + Two shapes fall outside the bound and have no `MapPath`: + + - a map reached through an array (`list[dict[K, V]]`, a `map_path` that is + an `ArrayPath`), whose key/value can't anchor a struct-prefixed `MapPath`; + - a key/value carrying an array layer (`dict[K, list[V]]`), whose scalar + terminal sits under an `ArrayOf` that `terminal_scalar` would unwrap. + """ + is_ref_terminal = isinstance(terminal_of(sub_shape), (ModelRef, UnionRef)) + has_value_to_validate = bool(all_constraints(sub_shape)) or is_ref_terminal + if isinstance(map_path, ArrayPath): + return MapProjectionVerdict( + representable=False, + reason="map reached through an array is not representable", + has_value_to_validate=has_value_to_validate, + ) + if has_array_layer(sub_shape): + return MapProjectionVerdict( + representable=False, + reason="map value carrying a list layer (dict[K, list[V]]) is not representable", + has_value_to_validate=has_value_to_validate, + ) + if not is_ref_terminal and terminal_scalar(sub_shape) is None: + return MapProjectionVerdict( + representable=False, + reason="constraint on a non-scalar terminal", + has_value_to_validate=has_value_to_validate, + ) + return MapProjectionVerdict( + representable=True, reason=None, has_value_to_validate=has_value_to_validate + ) + + def _map_projection_checks( sub_shape: FieldShape, map_path: FieldPath, @@ -336,35 +400,19 @@ def _map_projection_checks( `_ShapeTerminal` lets the caller descend into the model's fields and constraints on a `MapPath` leaf, mirroring a `list[Model]` element). - Two shapes fall outside that bound and have no representable `MapPath`: - - - a key/value carrying an array layer (`dict[K, list[V]]`), whose scalar - terminal sits under an `ArrayOf` that `terminal_scalar` would unwrap; - - a map reached through an array (`list[dict[K, V]]`, a `map_path` with - an `ArraySegment`), whose key/value can't anchor a struct-prefixed - `MapPath`. - - Each is handled the same way: an unsupported shape carrying a key/value - constraint (or a model to descend into) raises `NotImplementedError` to - keep the dropped check loud; an unconstrained, non-model shape yields no - checks, since there is nothing to validate. The constraint -- not the - shape alone -- is what stays loud, matching the silent treatment of - unconstrained maps. + `classify_map_projection` is the arbiter of which shapes are + representable. An unrepresentable shape carrying something to validate + (`has_value_to_validate`) raises `NotImplementedError` to keep the dropped + check loud; an unrepresentable shape with nothing to validate yields no + checks. The constraint -- not the shape alone -- is what stays loud, + matching the silent treatment of unconstrained maps. """ - reached_through_array = isinstance(map_path, ArrayPath) - is_ref_terminal = isinstance(terminal_of(sub_shape), (ModelRef, UnionRef)) - if reached_through_array or has_array_layer(sub_shape): - if all_constraints(sub_shape) or is_ref_terminal: - raise NotImplementedError( - f"map {projection.value} on an unsupported shape (list layer " - f"or map nested in an array) is not supported ({sub_shape!r})" - ) - return [], None - if not is_ref_terminal and terminal_scalar(sub_shape) is None: - if all_constraints(sub_shape): + verdict = classify_map_projection(sub_shape, map_path) + if not verdict.representable: + if verdict.has_value_to_validate: raise NotImplementedError( - f"map {projection.value} carrying a constraint on a non-scalar " - f"terminal is not supported ({sub_shape!r})" + f"map {projection.value} on an unsupported shape " + f"({verdict.reason}) is not supported ({sub_shape!r})" ) return [], None primitive = terminal_primitive(sub_shape) @@ -534,6 +582,18 @@ def _is_struct_only_prefix(prefix: FieldPath) -> bool: return not isinstance(prefix, ArrayPath) and bool(prefix.segments) +def _reject_struct_only_prefix(prefix: FieldPath, message: str) -> None: + """Raise `NotImplementedError(message)` when `prefix` is struct-only. + + Shared mechanism for the struct-nested guards: the renderer supports + neither model-constraint anchoring nor column-level discriminator + gating at a struct-only prefix, so reaching one with a real check is a + renderer gap rather than a normal case. + """ + if _is_struct_only_prefix(prefix): + raise NotImplementedError(message) + + def _guard_struct_nested_anchor(prefix: FieldPath, name: str) -> None: """Raise when emitting a model constraint at a struct-only prefix. @@ -545,12 +605,14 @@ def _guard_struct_nested_anchor(prefix: FieldPath, name: str) -> None: (`_model_constraint_target` keeps it, and the renderer wraps the check in `map_values_check`/`map_keys_check`). """ - if _is_struct_only_prefix(prefix) and not isinstance(prefix, MapPath): - raise NotImplementedError( - f"Model constraint on struct-nested {name!r} " - f"(reached at {prefix!r}) -- the renderer has no anchor " - "for nested-struct model constraints." - ) + if isinstance(prefix, MapPath): + return + _reject_struct_only_prefix( + prefix, + f"Model constraint on struct-nested {name!r} " + f"(reached at {prefix!r}) -- the renderer has no anchor " + "for nested-struct model constraints.", + ) def _guard_struct_nested_variant_fields(prefix: FieldPath, name: str) -> None: @@ -563,13 +625,13 @@ def _guard_struct_nested_variant_fields(prefix: FieldPath, name: str) -> None: column. Raising loudly is safer than emitting a mis-gated check; no current schema nests a discriminated union under a plain struct. """ - if _is_struct_only_prefix(prefix): - raise NotImplementedError( - f"Discriminated union {name!r} with variant-gated field checks " - f"at struct-nested prefix {prefix!r} -- `ColumnGuard` would " - "render the discriminator as a top-level column, not a " - "struct-qualified path." - ) + _reject_struct_only_prefix( + prefix, + f"Discriminated union {name!r} with variant-gated field checks " + f"at struct-nested prefix {prefix!r} -- `ColumnGuard` would " + "render the discriminator as a top-level column, not a " + "struct-qualified path.", + ) def _recurse_into_union( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py index d7e769c31..3a411d825 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py @@ -18,9 +18,24 @@ from dataclasses import dataclass from typing import TypeAlias -from overture.schema.system.field_path import FieldPath, ScalarPath - -from .constraint_dispatch import ExpressionDescriptor, ModelConstraintDescriptor +from overture.schema.system.field_path import ( + ArrayPath, + FieldPath, + MapPath, + ScalarPath, + StructSegment, +) +from overture.schema.system.model_constraint import FieldEqCondition, Not + +from .constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + MinFieldsSet, + ModelConstraintDescriptor, + RadioGroup, + RequireAnyOf, + RequireIf, +) __all__ = [ "Check", @@ -50,6 +65,49 @@ class ElementGuard: Guard: TypeAlias = ColumnGuard | ElementGuard +def _top_level(name: str) -> str: + """Strip a dotted field name to its top-level column.""" + return name.split(".", 1)[0] + + +def _path_top_column(path: FieldPath) -> str | None: + """Top-level row column for a `FieldPath`, or `None` for an empty `ScalarPath`. + + Collapses dotted struct navigation to its first segment -- the granularity + at which `validate_model` detects column absence. `ArrayPath.column_path` + and `MapPath.map_column` may be dotted when the iterated column is nested + inside a struct (e.g. `names.rules`); this strips to `names`. + """ + match path: + case ScalarPath(segments=(StructSegment(name=first), *_)): + return first + case ScalarPath(): + return None + case ArrayPath(): + return _top_level(path.column_path) + case MapPath(): + return _top_level(path.map_column) + case _: + raise TypeError(f"Unhandled FieldPath variant: {type(path).__name__}") + + +def _condition_field_name(condition: object) -> str: + """Extract the field name from a `FieldEqCondition` or `Not(FieldEqCondition)`. + + Raises `TypeError` for any other condition shape, so a new condition + type fails loudly here rather than silently omitting a column read. + """ + match condition: + case Not(inner=FieldEqCondition(field_name=fn)): + return fn + case FieldEqCondition(field_name=fn): + return fn + case _: + raise TypeError( + f"Unhandled condition type for read_columns: {type(condition).__name__}" + ) + + @dataclass(frozen=True, slots=True) class Check: """A field-level validation check.""" @@ -58,6 +116,38 @@ class Check: target: FieldPath guards: tuple[Guard, ...] = () + @property + def read_columns(self) -> frozenset[str]: + """Top-level row columns this check's expression dereferences. + + Includes the target's outermost column, any `ColumnGuard` discriminator + (rendered as `F.col(...)`), and any descriptor gate on a `ScalarPath` + target (rendered as `F.col("{gate}").isNotNull()`). `ElementGuard` + discriminators are excluded -- they reference `el[...]`, an + element-relative accessor, not a row-level column. Descriptor gates on + `ArrayPath` targets are also excluded -- they are applied element-relatively + via `element_relative_gate`. + """ + cols: set[str] = set() + top = _path_top_column(self.target) + if top is not None: + cols.add(top) + for guard in self.guards: + match guard: + case ColumnGuard(discriminator=d): + cols.add(d) + case ElementGuard(): + pass # element-relative: not a row-level read + case _: + raise TypeError(f"Unhandled Guard variant: {type(guard).__name__}") + if isinstance(self.target, ScalarPath): + for desc in self.descriptors: + if desc.gate is not None: + gate_col = _path_top_column(desc.gate) + if gate_col is not None: + cols.add(gate_col) + return frozenset(cols) + @dataclass(frozen=True, slots=True) class ModelCheck: @@ -82,9 +172,63 @@ class ModelCheck: an optional field (`field: Model | None`). The renderer wraps the constraint expression in `F.when(.isNotNull(), ...)` so the check is skipped when the optional model is absent (NULL). + `gate` is always applied element-relatively for array targets and + must be `None` for scalar targets, so it never contributes a + top-level row column to `read_columns`. """ descriptor: ModelConstraintDescriptor target: FieldPath = ScalarPath() arm: str | None = None gate: FieldPath | None = None + + @property + def read_columns(self) -> frozenset[str]: + """Top-level row columns this model check's expression dereferences. + + For row-root constraints (`ScalarPath` target): all `field_names` from + the constraint (collapsed to top-level column) and, for `RequireIf`/ + `ForbidIf`, the condition field (both rendered as `F.col(...)`). + + For array/map targets: only the outermost container column is a + row-level read (`array_check("col", ...)` / `map_values_check("col", + ...)`). The `field_names` and condition field are accessed as + element-relative `el[...]` / `inner[...]` accessors inside the + lambda -- not as `F.col(...)` -- so they do not contribute top-level + column reads. + + `gate` is excluded: for array targets it is element-relative; for scalar + targets the renderer asserts it is `None`. The `arm` field carries no + column information. + """ + cols: set[str] = set() + desc = self.descriptor + # Array/map targets wrap everything in array_check/map_values_check; + # field references inside the lambda are element-relative, not row-level. + # Only the container column itself is a top-level read. + if not isinstance(self.target, ScalarPath): + container_col = _path_top_column(self.target) + if container_col is not None: + cols.add(container_col) + return frozenset(cols) + # Row-root target: field_names and condition field render as F.col(...). + match desc: + case ( + RequireAnyOf(field_names=names) + | RadioGroup(field_names=names) + | MinFieldsSet(field_names=names) + ): + for name in names: + cols.add(_top_level(name)) + case ( + RequireIf(field_names=names, condition=cond) + | ForbidIf(field_names=names, condition=cond) + ): + for name in names: + cols.add(_top_level(name)) + cols.add(_condition_field_name(cond)) + case _: + raise TypeError( + f"Unhandled ModelConstraintDescriptor variant: {type(desc).__name__}" + ) + return frozenset(cols) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py index ff8cc614c..8af6ba0a9 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py @@ -47,6 +47,7 @@ ) from ..extraction.specs import FieldSpec from ..extraction.type_registry import primitive_spark_category +from ._primitive_fill import PRIMITIVE_FILL_TABLE __all__ = [ "ExpressionDescriptor", @@ -93,6 +94,7 @@ class ExpressionDescriptor: gate: FieldPath | None = None label: str | None = None check_name: str | None = None + check_nan: bool | None = None _BASE_TYPE_DISPATCH: dict[str, tuple[ExpressionDescriptor, ...]] = { @@ -214,7 +216,10 @@ def _dispatch_bounds( if is_float and isinstance(value, int) and not isinstance(value, bool): value = float(value) kwargs.append((attr, value)) - return ExpressionDescriptor(function="check_bounds", kwargs=tuple(kwargs)) + check_nan: bool | None = False if base_type is not None and not is_float else None + return ExpressionDescriptor( + function="check_bounds", kwargs=tuple(kwargs), check_nan=check_nan + ) def _dispatch_pattern( @@ -507,7 +512,7 @@ def _needs_explicit_fill(shape: FieldShape) -> bool: return True if not isinstance(terminal, Primitive): return False - return primitive_spark_category(terminal.base_type) in ("int", "float", "bool") + return primitive_spark_category(terminal.base_type) in PRIMITIVE_FILL_TABLE def forbid_if_field_shapes( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py index a1acb8055..0911704d8 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py @@ -88,51 +88,6 @@ ) -# A generated expression dereferences a top-level row column through one of a -# fixed set of forms, each taking the column name as a string literal: `F.col`, -# the outermost `array_check`/`nested_array_check`, and `map_keys_check`/ -# `map_values_check`. Inner array iterations use element accessors (`el[...]`), -# whose first argument is never a string literal and so never matches here. A -# new column-consuming wrapper must be added to this alternation; `read_columns` -# fails loudly (see `_require_read_columns`) if a check's expr matches none. -_COLUMN_READ = re.compile( - r'(?:F\.col|(?:nested_)?array_check|map_(?:keys|values)_check)\("([^"]+)"' -) - - -def _read_columns(expr: str) -> frozenset[str]: - """Top-level columns a rendered check expression dereferences. - - Derived from the expression source itself rather than the check's - structure, so it stays correct as the renderer evolves: whatever - `F.col`/`array_check`/`map_*_check` the expression emits is what the - runtime reads. Dotted struct navigation (`bbox.xmin`, `names.rules`) - collapses to its top-level column, the granularity at which absence is - detected. - """ - return frozenset(m.group(1).split(".", 1)[0] for m in _COLUMN_READ.finditer(expr)) - - -def _require_read_columns(expr: str, field: str, name: str) -> frozenset[str]: - """Top-level columns a generated check reads -- guaranteed non-empty. - - Every generated check dereferences at least one row column. An empty - result means `_read_columns` did not recognize a form `expr` uses -- - typically a newly added column-consuming wrapper absent from - `_COLUMN_READ`. Left silent, the runtime could not drop the check when - its column is absent (`validate_model` keys on `read_columns`), so an - unresolvable reference would reach Spark. This converts that latent - crash into a generation-time error naming the offending check. - """ - columns = _read_columns(expr) - if not columns: - raise ValueError( - f"check {field!r} ({name!r}) reads no top-level column; " - f"_read_columns recognized no column form in: {expr}" - ) - return columns - - def _render_condition_desc(parsed: FieldEq) -> str: """Render a parsed condition to a human-readable error-message description.""" display = repr( @@ -223,6 +178,8 @@ def _render_expr_call( parts.append(_render_arg(arg)) for k, v in desc.kwargs: parts.append(f"{k}={py_literal(v)}") + if desc.check_nan is not None: + parts.append(f"check_nan={py_literal(desc.check_nan)}") if desc.label is not None: parts.append(f"label={py_literal(desc.label)}") return f"{desc.function}({', '.join(parts)})" @@ -468,6 +425,7 @@ def _check_function_context( field: str, name: str, expr: str, + read_columns: frozenset[str], ) -> dict[str, object]: """Assemble the template context dict for one check function.""" return { @@ -476,7 +434,7 @@ def _check_function_context( "check_name": name, "expr": expr, "shape": _check_shape_token(target), - "read_columns": _require_read_columns(expr, field, name), + "read_columns": read_columns, } @@ -492,6 +450,7 @@ def _render_check_function_context(row: FieldCheckRow) -> dict[str, object]: field=row.label, name=row.name, expr=_render_check_expr(row.check, row.descriptor_idx), + read_columns=row.check.read_columns, ) @@ -588,6 +547,7 @@ def _cols_and_names() -> tuple[str, str]: field=row.label, name=row.name, expr=expr, + read_columns=check.read_columns, ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py index 29ad6db24..cdb711d2e 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py @@ -49,6 +49,7 @@ from ...extraction.length_constraints import ArrayMinLen from ...extraction.specs import FieldSpec, ModelSpec, RecordSpec, UnionSpec from ...extraction.type_registry import primitive_spark_category +from .._primitive_fill import PRIMITIVE_FILL_TABLE from .._render_common import require_field_eq from ..constraint_dispatch import ExpressionDescriptor, dispatch_constraint from ..schema_builder import spark_type_rank @@ -653,11 +654,8 @@ def _primitive_default(base_type: str) -> object: if explicit is not None: return explicit category = primitive_spark_category(base_type) - if category == "float": - return 0.0 - if category == "int": - return 0 - return "" + entry = PRIMITIVE_FILL_TABLE.get(category) + return entry[1] if entry is not None else "" def _geometry_wkt_from_shape_constraints( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py index 040eca462..bbfa64e81 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py @@ -215,11 +215,20 @@ def generate_scaffold(check: Check, spec: ModelSpec) -> dict[str, Any]: def generate_model_scaffold(check: ModelCheck, spec: ModelSpec) -> dict[str, Any]: """Build a sparse dict for a model-level check's nesting structure. - Only top-level array columns are supported -- a `ScalarPath` target - returns `{}` (no scaffold needed at row root) and an `ArrayPath` - whose column lives inside a struct raises `NotImplementedError`. - No schema today places a list of model-constrained models inside a - struct field, so the case has no test coverage. + Two target shapes need no scaffold and return `{}`: + + - a `ScalarPath` target -- a top-level model constraint, whose fields + live at the row root; + - a `MapPath` target -- a `dict[K, Model]` value-model constraint. The + mutation (`map_path=`) owns map navigation: it corrupts the base row's + single map entry in place, or stubs one when the map is absent. Unlike + an array, a dict scaffold can't replace a base-row map entry under + `deep_merge`'s recursive dict merge, so there is nothing to add here. + + A top-level `ArrayPath` builds the array path; an `ArrayPath` whose + column lives inside a struct raises `NotImplementedError`. No schema + today places a list of model-constrained models inside a struct field, + so the case has no test coverage. """ match check.target: case ArrayPath() as target: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py index d027699df..166acbda0 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py @@ -12,6 +12,7 @@ from ..extraction.field_walk import has_array_layer, terminal_of from ..extraction.specs import ModelSpec from ..extraction.type_registry import primitive_spark_category +from ._primitive_fill import PRIMITIVE_FILL_TABLE from ._render_common import ( disambiguate, field_check_rows, @@ -329,14 +330,14 @@ def _render_mutation_call( mutation_fn, desc, check, fields_repr ) case RadioGroup(): - if isinstance(check.target, ArrayPath): + if isinstance(check.target, (ArrayPath, MapPath)): raise ValueError( - "mutate_radio_group does not accept array_path " + "mutate_radio_group does not accept array_path/map_path " f"(target={check.target!r})" ) return f"{mutation_fn}(row, {fields_repr})" case RequireAnyOf() | MinFieldsSet(): - parts = _array_kwargs_leaf(check, mutation_fn) + parts = _iter_kwargs_leaf(check, mutation_fn) suffix = ", " + ", ".join(parts) if parts else "" return f"{mutation_fn}(row, {fields_repr}{suffix})" assert_never(desc) @@ -363,7 +364,7 @@ def _render_conditional_mutation_call( kwarg_parts.append("negate=True") if fill: kwarg_parts.append(f"fill_values={fill}") - kwarg_parts.extend(_array_kwargs_inner(check, mutation_fn)) + kwarg_parts.extend(_iter_kwargs_inner(check, mutation_fn)) suffix = ", " + ", ".join(kwarg_parts) if kwarg_parts else "" return ( f"{mutation_fn}(row, {fields_repr}, " @@ -378,19 +379,9 @@ def _fill_value_literal(shape: FieldShape) -> str: terminal = terminal_of(shape) if isinstance(terminal, Primitive): category = primitive_spark_category(terminal.base_type) - match category: - case "bool": - return "False" - case "float": - return "0.0" - case "int": - return "0" - case "string" | "other": - raise ValueError( - f"unhandled Primitive base_type: {terminal.base_type!r}" - ) - case _: - assert_never(category) + if category in PRIMITIVE_FILL_TABLE: + return PRIMITIVE_FILL_TABLE[category][0] + raise ValueError(f"unhandled Primitive base_type: {terminal.base_type!r}") return "{}" @@ -405,13 +396,48 @@ def _render_fill_values(desc: ForbidIf) -> str | None: return "{" + ", ".join(items) + "}" -def _array_kwargs_leaf(check: ModelCheck, mutation_fn: str) -> list[str]: - """Array kwargs for mutations accepting `struct_path` (a trailing leaf). +def _map_kwargs(target: MapPath, mutation_fn: str, *, allow_leaf: bool) -> list[str]: + """Mutation kwargs for a `dict[K, Model]` value-model constraint. + + Emits `map_path=...` (the map column) and, when `allow_leaf`, an + optional single-segment `struct_path=...` for a sub-model reached + through one struct field inside the value model -- the map analogue of + `_iter_kwargs_leaf`'s array `struct_path`. A KEY projection is + unrepresentable (a model can't be a dict key) and raises; a multi-segment + leaf, or any leaf when `allow_leaf` is False, raises too. + """ + if target.projection is not MapProjection.VALUE: + raise ValueError( + f"{mutation_fn} cannot target a map key (target={target!r}); a " + "model-level constraint on a map key is not representable as a row" + ) + kwargs = [f'map_path="{target.map_column}"'] + leaf = target.leaf + if leaf: + if not allow_leaf: + raise ValueError( + f"{mutation_fn} does not accept a map-value leaf (leaf={leaf!r})" + ) + if len(leaf) > 1: + raise ValueError( + f"multi-segment map-value leaf {leaf!r} not supported by " + f"{mutation_fn} (struct_path must be a single segment)" + ) + kwargs.append(f'struct_path="{leaf[0]}"') + return kwargs + + +def _iter_kwargs_leaf(check: ModelCheck, mutation_fn: str) -> list[str]: + """Container kwargs for mutations accepting `struct_path` (a trailing leaf). - Yields `array_path=...` and optionally `struct_path=...`. Inner array - iteration is rejected -- these mutations consume only the outermost - array level. + For an `ArrayPath`, yields `array_path=...` and optionally + `struct_path=...`; inner array iteration is rejected -- these mutations + consume only the outermost array level. For a `MapPath` (a + `dict[K, Model]` value-model constraint), delegates to `_map_kwargs`, + which yields `map_path=...` and an optional single-segment `struct_path`. """ + if isinstance(check.target, MapPath): + return _map_kwargs(check.target, mutation_fn, allow_leaf=True) if not isinstance(check.target, ArrayPath): return [] inner_struct_paths = check.target.iter_struct_paths @@ -434,13 +460,17 @@ def _array_kwargs_leaf(check: ModelCheck, mutation_fn: str) -> list[str]: return kwargs -def _array_kwargs_inner(check: ModelCheck, mutation_fn: str) -> list[str]: - """Array kwargs for mutations accepting `inner_array_path`. +def _iter_kwargs_inner(check: ModelCheck, mutation_fn: str) -> list[str]: + """Container kwargs for mutations accepting `inner_array_path`. - Yields `array_path=...` and optionally `inner_array_path=...`. A - trailing leaf path is rejected -- these mutations target an inner - array directly, not a struct field on its elements. + For an `ArrayPath`, yields `array_path=...` and optionally + `inner_array_path=...`; a trailing leaf path is rejected -- these + mutations target an inner array directly, not a struct field on its + elements. For a `MapPath`, delegates to `_map_kwargs` (no leaf: a map + value has no inner array layer to address). """ + if isinstance(check.target, MapPath): + return _map_kwargs(check.target, mutation_fn, allow_leaf=False) if not isinstance(check.target, ArrayPath): return [] inner_struct_paths = check.target.iter_struct_paths diff --git a/packages/overture-schema-codegen/tests/test_markdown_type_format.py b/packages/overture-schema-codegen/tests/test_markdown_type_format.py index 46a918beb..b10a69451 100644 --- a/packages/overture-schema-codegen/tests/test_markdown_type_format.py +++ b/packages/overture-schema-codegen/tests/test_markdown_type_format.py @@ -8,6 +8,7 @@ from overture.schema.codegen.extraction.field import ( AnyScalar, ArrayOf, + FieldShape, LiteralScalar, NewTypeShape, Primitive, @@ -15,7 +16,7 @@ UnionRef, ) from overture.schema.codegen.extraction.model_extraction import extract_model -from overture.schema.codegen.extraction.specs import FieldSpec, TypeIdentity +from overture.schema.codegen.extraction.specs import FieldSpec, TypeIdentity, UnionSpec from overture.schema.codegen.extraction.type_analyzer import analyze_type from overture.schema.codegen.markdown.link_computation import LinkContext from overture.schema.codegen.markdown.type_format import ( @@ -24,7 +25,7 @@ format_type, format_underlying_type, ) -from overture.schema.system.primitive import int32 +from overture.schema.system.primitive import BBox, Geometry, int32 from pydantic import BaseModel, HttpUrl @@ -96,8 +97,6 @@ def test_registered_primitive_not_linked(self) -> None: assert "](int32.md)" not in result def test_geometry_links_to_aggregate_page(self) -> None: - from overture.schema.system.primitive import Geometry - field = _make_field(Geometry) ctx = LinkContext( page_path=PurePosixPath("buildings/building/building.md"), @@ -113,8 +112,6 @@ def test_geometry_links_to_aggregate_page(self) -> None: ) def test_bbox_links_to_aggregate_page(self) -> None: - from overture.schema.system.primitive import BBox - field = _make_field(BBox) ctx = LinkContext( page_path=PurePosixPath("base/feature/feature.md"), @@ -127,8 +124,6 @@ def test_bbox_links_to_aggregate_page(self) -> None: assert format_type(field, ctx) == "[`bbox`](../../system/primitive/geometry.md)" def test_geometry_without_context_renders_plain_code(self) -> None: - from overture.schema.system.primitive import Geometry - assert format_type(_make_field(Geometry)) == "`geometry`" @@ -140,8 +135,6 @@ def _make_field( is_optional: bool = False, ) -> FieldSpec: """Build a FieldSpec from an annotation for test convenience.""" - from overture.schema.codegen.extraction.field import FieldShape - if isinstance(annotation, (Scalar, ArrayOf, UnionRef)): shape: FieldShape = annotation # type: ignore[assignment] else: @@ -158,9 +151,6 @@ def _make_field( def _union_ref(members: list[type]) -> UnionRef: """Build a UnionRef for tests without running through extract_union.""" - from overture.schema.codegen.extraction.specs import UnionSpec - from pydantic import BaseModel - union_spec = UnionSpec( name=members[0].__name__, description=None, @@ -494,6 +484,62 @@ def test_map_key_and_value_newtypes_both_link(self) -> None: assert "stripped_string.md" in result assert "``" not in result + def test_map_value_geometry_links_in_field_cell(self) -> None: + """A Geometry-valued map links to the geometry page. + + Geometry is a class-based registered primitive, not Enum/BaseModel. + The map-side link decision shares `_scalar_identity`'s coverage, so a + Geometry value links rather than rendering bare. + """ + ctx = _link_ctx((Geometry, "geometry", "system/primitive/geometry.md")) + result = format_type(_make_field(dict[str, Geometry]), ctx) + assert "[`geometry`]" in result + assert "system/primitive/geometry.md" in result + assert "``" not in result + + def test_map_value_pydantic_type_links_in_field_cell(self) -> None: + """A pydantic-sourced map value links to its page. + + HttpUrl is pydantic-sourced, not Enum/BaseModel; the shared map-side + decision links it where the narrow Enum/BaseModel-only check left it + bare. + """ + ctx = _link_ctx((HttpUrl, "HttpUrl", "pydantic/networks/http_url.md")) + result = format_type(_make_field(dict[str, HttpUrl]), ctx) + assert "[`HttpUrl`]" in result + assert "pydantic/networks/http_url.md" in result + assert "``" not in result + + +class TestFormatUnderlyingScalarType: + """Tests for scalar terminals in format_underlying_type.""" + + def test_geometry_underlying_type_links(self) -> None: + """A NewType whose underlying type is Geometry links to its page.""" + shape, _, _ = analyze_type(NewType("GeomAlias", Geometry)) + ctx = LinkContext( + page_path=PurePosixPath("system/types/geom_alias.md"), + registry={ + TypeIdentity(Geometry, "geometry"): PurePosixPath( + "system/primitive/geometry.md" + ) + }, + ) + result = format_underlying_type(shape, ctx) + assert "[`geometry`](../primitive/geometry.md)" in result + + def test_numeric_underlying_type_stays_bare(self) -> None: + """A NewType over a numeric primitive renders bare, not over-linked. + + The numeric branch keys on the builtin (`int`), which has no registry + entry, so the underlying type stays bare and keeps its markdown name. + """ + shape, _, _ = analyze_type(int32) + ctx = LinkContext(page_path=PurePosixPath("system/types/x.md"), registry={}) + result = format_underlying_type(shape, ctx) + assert result == "`int32`" + assert "[" not in result + class TestFormatUnderlyingUnionType: """Tests for union FieldShape in format_underlying_type.""" diff --git a/packages/overture-schema-codegen/tests/test_model_extractor.py b/packages/overture-schema-codegen/tests/test_model_extractor.py index 534685dac..981d7bf78 100644 --- a/packages/overture-schema-codegen/tests/test_model_extractor.py +++ b/packages/overture-schema-codegen/tests/test_model_extractor.py @@ -13,7 +13,11 @@ find_field, ) from overture.schema.codegen.extraction.field import ModelRef, Primitive -from overture.schema.codegen.extraction.field_walk import has_array_layer, terminal_of +from overture.schema.codegen.extraction.field_walk import ( + all_constraints, + has_array_layer, + terminal_of, +) from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.system.field_constraint import UniqueItemsConstraint from overture.schema.system.model_constraint import ( @@ -456,8 +460,6 @@ class TestFieldInfoMetadataConstraints: def test_geometry_type_constraint_extracted(self) -> None: """GeometryTypeConstraint on geometry field should appear in constraints.""" - from overture.schema.codegen.extraction.field_walk import all_constraints - spec = extract_model(Venue) geometry_field = find_field(spec, "geometry") @@ -468,8 +470,6 @@ def test_geometry_type_constraint_extracted(self) -> None: def test_geometry_type_constraint_has_null_source(self) -> None: """Constraints from field_info.metadata have source_ref=None (not from a NewType).""" - from overture.schema.codegen.extraction.field_walk import all_constraints - spec = extract_model(Venue) geometry_field = find_field(spec, "geometry") @@ -487,8 +487,6 @@ def test_metadata_constraints_not_duplicated(self) -> None: When field_info.metadata is empty (Pydantic kept the Annotated wrapper), no extra constraints are added. """ - from overture.schema.codegen.extraction.field_walk import all_constraints - spec = extract_model(Instrument) tags_field = find_field(spec, "tags") @@ -502,7 +500,6 @@ def test_metadata_constraints_not_duplicated(self) -> None: def test_standalone_annotated_field_extracts_metadata(self) -> None: """Direct Annotated[Type, constraint] fields (non-optional, non-union) get their constraints from field_info.metadata.""" - from overture.schema.codegen.extraction.field_walk import all_constraints class Model(BaseModel): geo: Annotated[ diff --git a/packages/overture-schema-codegen/tests/test_newtype_extraction.py b/packages/overture-schema-codegen/tests/test_newtype_extraction.py index 150198668..9f28dce7e 100644 --- a/packages/overture-schema-codegen/tests/test_newtype_extraction.py +++ b/packages/overture-schema-codegen/tests/test_newtype_extraction.py @@ -3,7 +3,8 @@ from typing import Annotated, NewType from codegen_test_support import STR_TYPE -from overture.schema.codegen.extraction.field import ArrayOf +from overture.schema.codegen.extraction.field import ArrayOf, Primitive +from overture.schema.codegen.extraction.field_walk import terminal_scalar from overture.schema.codegen.extraction.newtype_extraction import extract_newtype from overture.schema.codegen.extraction.specs import NewTypeSpec from overture.schema.system.field_constraint import UniqueItemsConstraint @@ -21,8 +22,6 @@ def test_extract_hex_color(self) -> None: assert spec.name == "HexColor" # Outermost NewTypeShape stripped; shape is the underlying scalar. - from overture.schema.codegen.extraction.field_walk import terminal_scalar - assert terminal_scalar(spec.shape) is not None def test_extract_id(self) -> None: @@ -33,8 +32,6 @@ def test_extract_id(self) -> None: # Id wraps NoWhitespaceString, which is a registered semantic newtype # resolving to a Scalar. After stripping "Id", shape is Scalar with # base_type "NoWhitespaceString". - from overture.schema.codegen.extraction.field import Primitive - assert isinstance(spec.shape, Primitive) assert spec.shape.base_type == "NoWhitespaceString" diff --git a/packages/overture-schema-codegen/tests/test_pyspark_base_row.py b/packages/overture-schema-codegen/tests/test_pyspark_base_row.py index ab15f7509..a84bdf78b 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_base_row.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_base_row.py @@ -2,6 +2,7 @@ import uuid from enum import Enum +from typing import Any import pytest from annotated_types import Gt, Lt @@ -18,6 +19,7 @@ ModelRef, Primitive, ) +from overture.schema.codegen.extraction.field_walk import terminal_of from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.specs import ( FieldSpec, @@ -27,6 +29,8 @@ from overture.schema.codegen.pyspark.constraint_dispatch import ExpressionDescriptor from overture.schema.codegen.pyspark.test_data.base_row import ( _primitive_default, + _row_satisfies_condition, + _satisfy_model_constraints, _value_from_check_pattern, _value_from_scalar_constraints, generate_arm_rows, @@ -38,6 +42,7 @@ from overture.schema.system.model_constraint import ( FieldEqCondition, forbid_if, + min_fields_set, require_if, ) from pydantic import BaseModel, Field, HttpUrl, TypeAdapter @@ -174,8 +179,6 @@ def test_value_for_field_populate_includes_optional_children( def _list_of_model(shape: object) -> ModelRef: """Peel `ArrayOf` / `NewTypeShape` layers to reach the inner `ModelRef`.""" - from overture.schema.codegen.extraction.field_walk import terminal_of - terminal = terminal_of(shape) # type: ignore[arg-type] assert isinstance(terminal, ModelRef), ( f"Expected ModelRef terminal, got {type(terminal).__name__}" @@ -285,11 +288,6 @@ def test_any_valued_map_generates_empty(self) -> None: # `dict[str, Any]` (e.g. Infrastructure.source_tags) has no value # constraint -- hence no value check -- and `Any` has no value # strategy, so the map stays empty rather than crashing. - from typing import Any - - from overture.schema.codegen.extraction.model_extraction import extract_model - from pydantic import BaseModel - class TagsModel(BaseModel): source_tags: dict[str, Any] | None = None @@ -365,10 +363,6 @@ class TestMinFieldsSetSatisfied: """`_satisfy_model_constraints` populates optional fields for `min_fields_set`.""" def test_min_fields_set_populates_optional_fields(self) -> None: - from overture.schema.codegen.extraction.model_extraction import extract_model - from overture.schema.system.model_constraint import min_fields_set - from pydantic import BaseModel - @min_fields_set(2) class MinTwoModel(BaseModel): a: str | None = None @@ -387,10 +381,6 @@ def test_min_fields_set_counts_required_fields(self) -> None: # and `min_fields_set(2)`, the required field plus one optional # already satisfy the constraint, so the sparse row only needs # one additional optional fill. - from overture.schema.codegen.extraction.model_extraction import extract_model - from overture.schema.system.model_constraint import min_fields_set - from pydantic import BaseModel - @min_fields_set(2) class MixedMinModel(BaseModel): required_field: str @@ -416,10 +406,6 @@ def test_min_fields_set_all_required_needs_no_optional_fill(self) -> None: # When required fields alone satisfy `count`, no optional fills are # needed -- matching Pydantic, which counts required fields toward # `model_fields_set`. - from overture.schema.codegen.extraction.model_extraction import extract_model - from overture.schema.system.model_constraint import min_fields_set - from pydantic import BaseModel - @min_fields_set(2) class AllRequiredModel(BaseModel): req_a: str @@ -477,10 +463,6 @@ def test_require_if_not_condition_fills_field(self) -> None: def test_forbid_if_not_condition_removes_field(self) -> None: """forbid_if triggered by Not(FieldEqCondition) removes the forbidden field.""" - from overture.schema.codegen.pyspark.test_data.base_row import ( - _satisfy_model_constraints, - ) - spec = extract_model(_ModeModelForbidIf) row: dict[str, object] = { "mode": _ModeColor.RED.value, @@ -492,9 +474,6 @@ def test_forbid_if_not_condition_removes_field(self) -> None: def test_unknown_condition_type_raises(self) -> None: """_row_satisfies_condition must raise for unknown condition kinds.""" - from overture.schema.codegen.pyspark.test_data.base_row import ( - _row_satisfies_condition, - ) class _Unknown: pass diff --git a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py index 2a6d3140e..de36ee560 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py @@ -17,7 +17,10 @@ union_spec_for, ) from overture.schema.codegen.extraction.field import ( + ArrayOf, ConstraintSource, + FieldShape, + MapOf, Primitive, UnionRef, ) @@ -30,6 +33,7 @@ from overture.schema.codegen.pyspark._render_common import column_level_suffix from overture.schema.codegen.pyspark.check_builder import ( build_checks, + classify_map_projection, ) from overture.schema.codegen.pyspark.check_ir import ( Check, @@ -2006,8 +2010,6 @@ def test_optional_list_field_element_model_has_no_gate(self) -> None: def test_segment_speed_limits_when_has_gate(self) -> None: """Segment.speed_limits[].when is optional -- gate == path to when.""" - from codegen_test_support import discover_feature - spec = discover_feature("Segment") _, model_nodes = build_checks(spec) when_nodes = [ @@ -2096,10 +2098,92 @@ class _ListOfUnconstrainedMapModel(BaseModel): items: list[dict[str, str]] +class _PlainScalarMapModel(BaseModel): + items: dict[str, str] + + +class _ConstrainedScalarMapModel(BaseModel): + items: dict[str, Annotated[str, MinLen(1)]] + + +class TestClassifyMapProjection: + """`classify_map_projection` is the single arbiter of map-shape support. + + Every map-shape prohibition in `_map_projection_checks` routes through + this classifier rather than restating the rule inline. The classifier + names the representable shape (struct-prefix -> one MapSegment -> scalar + or model/union terminal, reached without array iteration, no array layer + in the projected shape) and the reason each unsupported shape is rejected. + """ + + def _scalar_shape(self, *, constrained: bool) -> FieldShape: + spec = spec_for_model( + _ConstrainedScalarMapModel if constrained else _PlainScalarMapModel + ) + assert isinstance(spec, RecordSpec) + shape = spec.fields[0].shape + assert isinstance(shape, MapOf) + return shape.value + + def test_scalar_terminal_reached_struct_only_is_representable(self) -> None: + verdict = classify_map_projection( + self._scalar_shape(constrained=True), _path("items{value}") + ) + assert verdict.representable + assert verdict.reason is None + + def test_map_reached_through_array_is_rejected(self) -> None: + # The classifier owns the path-structural rejection too: a map_path + # that is an ArrayPath cannot anchor a struct-prefixed MapPath. + verdict = classify_map_projection( + self._scalar_shape(constrained=True), _path("items[]") + ) + assert not verdict.representable + assert verdict.reason is not None + + def test_array_layer_in_projected_shape_is_rejected(self) -> None: + spec = spec_for_model(_MapWithConstrainedListValueModel) + assert isinstance(spec, RecordSpec) + shape = spec.fields[0].shape + assert isinstance(shape, MapOf) + verdict = classify_map_projection(shape.value, _path("items{value}")) + assert not verdict.representable + assert verdict.reason is not None + + def test_classifier_rejects_dict_of_list_value(self) -> None: + # dict[K, list[V]]: the projected value shape carries an array layer. + # The classifier rejects it, and `_checks_for` raises -- the model + # raises iff the classifier rejects a shape with something to validate. + spec = spec_for_model(_MapWithConstrainedListValueModel) + assert isinstance(spec, RecordSpec) + shape = spec.fields[0].shape + assert isinstance(shape, MapOf) + verdict = classify_map_projection(shape.value, _path("items{value}")) + assert not verdict.representable + assert verdict.has_value_to_validate + with pytest.raises(NotImplementedError): + _checks_for(_MapWithConstrainedListValueModel) + + def test_classifier_rejects_map_reached_through_array(self) -> None: + # list[dict[K, V]]: the map is reached through an array, so the + # map_path is an ArrayPath. The classifier rejects on the path alone. + spec = spec_for_model(_ListOfConstrainedMapModel) + assert isinstance(spec, RecordSpec) + outer = spec.fields[0].shape + assert isinstance(outer, ArrayOf) + inner_map = outer.element + assert isinstance(inner_map, MapOf) + verdict = classify_map_projection(inner_map.value, _path("items[]")) + assert not verdict.representable + assert verdict.has_value_to_validate + with pytest.raises(NotImplementedError): + _checks_for(_ListOfConstrainedMapModel) + + class TestMapProjectionUnsupportedShapes: """`_map_projection_checks` is bounded to a scalar terminal reached struct-only. - Three shapes fall outside that bound -- a map value/key with an array + Two shapes fall outside that bound -- a map value/key with an array layer (`dict[K, list[V]]`), and a map reached through an array (`list[dict[K, V]]`). For each, a key/value constraint raises to keep the dropped check loud, and an unconstrained one yields no checks (a diff --git a/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py b/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py index bdee2511f..59b297594 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py @@ -93,6 +93,37 @@ def test_float_bounds_unchanged_for_float_type(self) -> None: assert desc.kwargs == (("ge", 0.0),) assert isinstance(dict(desc.kwargs)["ge"], float) + def test_float_bound_sets_check_nan_none(self) -> None: + """Float-typed bounds leave check_nan unset (runtime defaults to guarded).""" + desc = dispatch_constraint(Ge(ge=0), base_type="float64") + assert desc is not None + assert desc.check_nan is None + + def test_integer_bound_sets_check_nan_false(self) -> None: + """Integer-typed bounds set check_nan=False to skip the dead NaN guard.""" + desc = dispatch_constraint(Ge(ge=0), base_type="int32") + assert desc is not None + assert desc.check_nan is False + + def test_untyped_bound_sets_check_nan_none(self) -> None: + """Bounds without a base_type leave check_nan unset (safe default).""" + desc = dispatch_constraint(Ge(ge=0)) + assert desc is not None + assert desc.check_nan is None + + def test_kwargs_contains_only_bounds(self) -> None: + """check_nan does not appear in kwargs; only ge/gt/le/lt keys are present. + + Uses Interval(ge=0, le=1) so the descriptor has two bound kwargs, + making the assertion non-vacuous: a stray non-bound kwarg alongside + a real bound would cause the check to fail. + """ + desc = dispatch_constraint(Interval(ge=0, le=1), base_type="int32") + assert desc is not None + kwarg_keys = {k for k, _ in desc.kwargs} + assert kwarg_keys == {"ge", "le"} + assert kwarg_keys <= {"ge", "gt", "le", "lt"} + class TestLengthDispatch: def test_min_len_on_array(self) -> None: diff --git a/packages/overture-schema-codegen/tests/test_pyspark_e2e.py b/packages/overture-schema-codegen/tests/test_pyspark_e2e.py index 6b7629ea1..7095f8895 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_e2e.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_e2e.py @@ -7,6 +7,7 @@ import pytest from annotated_types import Ge from codegen_test_support import discover_feature +from overture.schema.codegen.cli import _generate_pyspark from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.pyspark.pipeline import ( GeneratedModule, @@ -192,8 +193,6 @@ def test_array_discriminator_outside_lambda( def test_cli_writes_init_modules(tmp_path: Path) -> None: - from overture.schema.codegen.cli import _generate_pyspark - spec = extract_model(SimpleModel, entry_point="overture.schema.simple:SimpleModel") out = tmp_path / "src" test_out = tmp_path / "tests" diff --git a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py index 8fc6c9db1..0857b1209 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py @@ -12,8 +12,11 @@ RadioModel, RequireAnyModel, TripleNestedArrayModel, + discover_feature, + flat_specs_from_discovery, spec_for_model, ) +from overture.schema.codegen.extraction.specs import ModelSpec from overture.schema.codegen.pyspark._render_common import ( FieldEq, field_check_rows, @@ -31,17 +34,18 @@ ) from overture.schema.codegen.pyspark.constraint_dispatch import ( ExpressionDescriptor, + ForbidIf, + MinFieldsSet, + RadioGroup, RequireAnyOf, RequireIf, ) from overture.schema.codegen.pyspark.renderer import ( - _read_columns, _render_check_function_context, _render_model_constraint_function_context, - _require_read_columns, render_model_module, ) -from overture.schema.codegen.pyspark.schema_builder import build_schema +from overture.schema.codegen.pyspark.schema_builder import SchemaField, build_schema from overture.schema.system.field_path import ( ScalarPath, parse, @@ -57,6 +61,7 @@ Geometry, GeometryType, GeometryTypeConstraint, + int32, ) from overture.schema.system.string import CountryCodeAlpha2 from pydantic import BaseModel @@ -65,97 +70,242 @@ _path = parse -class TestReadColumns: - """`_read_columns` derives a check's top-level reads from its rendered expr. +class TestCheckIRReadColumns: + """IR-derived `read_columns` on `Check` and `ModelCheck`. - Ground truth, not a structural proxy: the top-level column reads generated - code emits are `F.col("...")`, the outermost `array_check`/ - `nested_array_check` string argument, and the `map_keys_check`/ - `map_values_check` string argument. Element-relative accessors (`el[...]`, - `inner[...]`) read nothing at the row level. + Each variant enumerates top-level row columns from the IR structure + directly -- no regex over rendered source. `ColumnGuard` discriminators + are included (they produce `F.col(...)` at the row level); `ElementGuard` + discriminators are not (they reference `el[...]`, an element-relative + accessor). """ - def test_scalar_col(self) -> None: - assert _read_columns('check_bounds(F.col("speed"), ge=0)') == frozenset( - {"speed"} - ) - - def test_struct_leaf_strips_to_top_level(self) -> None: - # require_any_of over a struct field unwraps to a dotted required leaf; - # the read column is the top-level struct, not the dotted path. - expr = 'check_require_any_of([F.col("fast.value"), F.col("slow.value")], ["fast.value", "slow.value"])' - assert _read_columns(expr) == frozenset({"fast", "slow"}) - - def test_top_level_array_check(self) -> None: - assert _read_columns( - 'array_check("sources", lambda el: check_required(el["dataset"]))' - ) == frozenset({"sources"}) - - def test_dotted_array_check_strips_to_top_level(self) -> None: - assert _read_columns( - 'array_check("names.rules", lambda el: check_required(el["value"]))' - ) == frozenset({"names"}) - - def test_nested_array_check_reads_only_outer_column(self) -> None: - # The outer column is a string literal; inner iteration uses an - # element accessor (`el["when"]["vehicle"]`), which is not a row-level read. - expr = ( - 'nested_array_check("access_restrictions", lambda el: ' - 'array_check(el["when"]["vehicle"], lambda inner: ' - 'check_forbid_if(inner["unit"], inner["dimension"] == "axle_count", "...")))' - ) - assert _read_columns(expr) == frozenset({"access_restrictions"}) - - def test_map_keys_check_reads_map_column(self) -> None: - # A map key/value check dereferences the map column by name, exactly - # like array_check; the inner lambda reads a projected element, not a - # row column. The runtime must drop the check when the map is absent. - expr = 'map_keys_check("license_priority", lambda k: check_pattern(k, "^x$", label="pattern"))' - assert _read_columns(expr) == frozenset({"license_priority"}) - - def test_map_values_check_reads_map_column(self) -> None: - expr = 'map_values_check("license_priority", lambda v: check_bounds(v, ge=0))' - assert _read_columns(expr) == frozenset({"license_priority"}) - - def test_multiple_cols_with_condition(self) -> None: - # require_if reads its target column and the column its condition - # branches on; the description string is not a column read. - expr = ( - 'check_require_if(F.col("admin_level"), F.col("subtype") == "county", ' - "\"subtype = 'county'\")" + def test_scalar_field_read_columns(self) -> None: + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("speed"), ) - assert _read_columns(expr) == frozenset({"admin_level", "subtype"}) + assert check.read_columns == frozenset({"speed"}) - def test_variant_gated_field_reads_discriminator(self) -> None: - # A variant-gated field check dereferences the discriminator column too, - # so an absent discriminator drops the check rather than crashing. - expr = 'F.when(F.col("subtype").isin(["road"]), check_required(F.col("class")))' - assert _read_columns(expr) == frozenset({"subtype", "class"}) + def test_struct_dotted_scalar_strips_to_top_level(self) -> None: + check = Check( + descriptors=(ExpressionDescriptor(function="check_bounds"),), + target=_path("bbox.xmin"), + ) + assert check.read_columns == frozenset({"bbox"}) - def test_no_row_level_reads(self) -> None: - assert _read_columns('F.lit(None).cast("string")') == frozenset() + def test_array_field_read_columns(self) -> None: + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("sources[]"), + ) + assert check.read_columns == frozenset({"sources"}) + def test_dotted_array_strips_to_top_level(self) -> None: + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("names.rules[]"), + ) + assert check.read_columns == frozenset({"names"}) -class TestRequireReadColumns: - """Every generated check must read at least one top-level column. + def test_map_field_read_columns(self) -> None: + check = Check( + descriptors=(ExpressionDescriptor(function="check_stripped"),), + target=_path("names.common{value}"), + ) + assert check.read_columns == frozenset({"names"}) - The guard turns an unrecognized render form -- which yields empty - `read_columns` and a check the runtime can never drop on absence -- - into a generation-time error instead of a latent Spark crash. - """ + def test_column_guard_discriminator_included(self) -> None: + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("class"), + guards=(ColumnGuard(discriminator="subtype", values=("road",)),), + ) + assert check.read_columns == frozenset({"class", "subtype"}) - def test_returns_columns_when_recognized(self) -> None: - assert _require_read_columns( - 'check_bounds(F.col("speed"), ge=0)', "speed", "bounds" - ) == frozenset({"speed"}) - - def test_raises_when_no_column_recognized(self) -> None: - with pytest.raises(ValueError, match="reads no top-level column"): - _require_read_columns( - 'unknown_wrapper("license_priority", lambda e: e)', - "license_priority", - "pattern", + def test_element_guard_discriminator_excluded(self) -> None: + # ElementGuard discriminators reference `el["subtype"]`, not F.col -- + # they are element-relative and do not constitute a top-level row read. + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("items[].value"), + guards=(ElementGuard(discriminator="subtype", values=("road",)),), + ) + assert check.read_columns == frozenset({"items"}) + + def test_model_check_require_any_of_reads_all_field_names(self) -> None: + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("x", "y", "z")), + ) + assert check.read_columns == frozenset({"x", "y", "z"}) + + def test_model_check_require_if_includes_condition_field(self) -> None: + check = ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", "county"), + ), + ) + assert check.read_columns == frozenset({"admin_level", "subtype"}) + + def test_model_check_negated_condition_field_included(self) -> None: + check = ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=Not(FieldEqCondition("subtype", "county")), + ), + ) + assert check.read_columns == frozenset({"admin_level", "subtype"}) + + def test_model_check_array_target_reads_only_container_column(self) -> None: + # When the constrained model is inside an array, field references use + # element-relative el["x"] accessors (not F.col("x")), so only the outer + # array column is a top-level row read. + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("x", "y")), + target=_path("items[]"), + ) + assert check.read_columns == frozenset({"items"}) + + # IMPORTANT 1 — descriptor gate on scalar vs array target + def test_scalar_target_gate_column_included(self) -> None: + # A descriptor gate on a scalar target renders as F.col("{gate}").isNotNull(), + # a row-level read; the gate's top-level column must appear in read_columns. + check = Check( + descriptors=( + ExpressionDescriptor(function="check_required", gate=_path("parent")), + ), + target=_path("parent.value"), + ) + assert check.read_columns == frozenset({"parent"}) + + def test_array_target_gate_column_excluded(self) -> None: + # A descriptor gate on an array target is applied element-relatively via + # element_relative_gate (el[...]), not as F.col -- excluded from read_columns. + check = Check( + descriptors=( + ExpressionDescriptor(function="check_required", gate=_path("items[]")), + ), + target=_path("items[].value"), + ) + assert check.read_columns == frozenset({"items"}) + + # IMPORTANT 2 — RequireIf condition field exclusion on array target + def test_model_check_require_if_array_target_excludes_condition_field(self) -> None: + # On an array target, the condition is el["cond"] (element-relative), not + # F.col("cond"); only the outer array column is a row-level read. + check = ModelCheck( + descriptor=RequireIf( + field_names=("x",), + condition=FieldEqCondition("cond", "v"), + ), + target=_path("items[]"), + ) + assert check.read_columns == frozenset({"items"}) + + def test_model_check_forbid_if_array_target_excludes_condition_field(self) -> None: + check = ModelCheck( + descriptor=ForbidIf( + field_names=("x",), + condition=FieldEqCondition("cond", "v"), + field_shapes=(), + ), + target=_path("items[]"), + ) + assert check.read_columns == frozenset({"items"}) + + # MINOR 3 — RadioGroup and MinFieldsSet share the RequireAnyOf match arm + @pytest.mark.parametrize( + "descriptor", + [ + RequireAnyOf(field_names=("a", "b")), + RadioGroup(field_names=("a", "b")), + MinFieldsSet(field_names=("a", "b"), count=1), + ], + ) + def test_model_check_row_root_field_names_in_read_columns( + self, descriptor: RequireAnyOf | RadioGroup | MinFieldsSet + ) -> None: + # All three variants carry field_names rendered as F.col(...) at the row root. + check = ModelCheck(descriptor=descriptor) + assert check.read_columns == frozenset({"a", "b"}) + + # MINOR 4 — ModelCheck on a MapPath target + def test_model_check_map_target_reads_only_map_column(self) -> None: + # A dict[K, Model] value-model constraint targets a MapPath; field references + # use the projected element variable (v["field"]), not F.col. Only the map + # column itself is a row-level read. + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("label", "value")), + target=_path("names.common{value}"), + ) + assert check.read_columns == frozenset({"names"}) + + +# Resurrected as a TEST ORACLE: the regex `read_columns` derivation deleted from +# renderer.py in this refactor. It recognized every top-level column form the +# renderer emits -- `F.col`, the outer `array_check`/`nested_array_check`, and +# `map_keys_check`/`map_values_check` -- by reading the rendered source directly. +_RENDERED_COLUMN_READ = re.compile( + r'(?:F\.col|(?:nested_)?array_check|map_(?:keys|values)_check)\("([^"]+)"' +) + + +def _columns_in_rendered_expr(expr: str) -> frozenset[str]: + """Top-level columns a rendered check expression dereferences (regex oracle).""" + return frozenset( + m.group(1).split(".", 1)[0] for m in _RENDERED_COLUMN_READ.finditer(expr) + ) + + +def _read_columns_mismatches(spec: ModelSpec) -> list[str]: + """Mismatches between IR `read_columns` and the rendered source, for one spec.""" + field_checks, model_checks = build_checks(spec) + mismatches: list[str] = [] + for check in field_checks: + for row in field_check_rows([check]): + expr = str(_render_check_function_context(row)["expr"]) + rendered = _columns_in_rendered_expr(expr) + expected = row.check.read_columns + if rendered != expected: + mismatches.append( + f"{spec.name}.{row.label}: rendered={sorted(rendered)} " + f"read_columns={sorted(expected)} expr={expr}" + ) + for model_row in model_check_rows(model_checks): + expr = str(_render_model_constraint_function_context(model_row)["expr"]) + rendered = _columns_in_rendered_expr(expr) + expected = model_row.check.read_columns + if rendered != expected: + mismatches.append( + f"{spec.name}.{model_row.label} (model): rendered={sorted(rendered)} " + f"read_columns={sorted(expected)} expr={expr}" ) + return mismatches + + +class TestReadColumnsMatchRenderedSource: + """IR-derived `read_columns` equals what the rendered expression dereferences. + + `read_columns` moved from a regex over rendered source to an IR-structural + derivation, so the two are now independent code paths that must agree: + `validate_model` drops a check only when none of its `read_columns` are + present in the input, so a column the rendered `F.col(...)` reads but + `read_columns` omits would reach Spark unresolved when that column is absent. + Neither the unit tests nor the regeneration diff catch such a desync (both + sides derive from the same code). This oracle re-derives the columns from + rendered source -- ground truth -- and asserts equality across every check + the real schemas produce, so a future renderer change that emits a new + column form without updating `read_columns` fails here. + """ + + def test_real_models_read_columns_match_rendered_source(self) -> None: + specs: list[ModelSpec] = list(flat_specs_from_discovery()) + specs.append(discover_feature("Segment")) + mismatches: list[str] = [] + for spec in specs: + mismatches.extend(_read_columns_mismatches(spec)) + assert not mismatches, "read_columns desync:\n" + "\n".join(mismatches) class TestRequireFieldEq: @@ -193,6 +343,11 @@ class BoundsModel(BaseModel): score: Annotated[float, Ge(0.0)] +# int32 is a non-float primitive; bounds on it must not emit the NaN guard. +class IntBoundsModel(BaseModel): + count: Annotated[int32, Ge(0)] + + class ArrayModel(BaseModel): tags: Annotated[list[str], MinLen(1)] @@ -335,6 +490,22 @@ def test_depth_3_renders_valid_python(self) -> None: assert "nested_array_check(" in source +class TestBoundsNanGuardRendering: + """The NaN guard flag is emitted only for non-float bound columns.""" + + def test_float_bound_omits_check_nan(self) -> None: + """Float-typed bounds produce a check_bounds call without check_nan.""" + source = _render(BoundsModel) + start = source.index("check_bounds(") + call = source[start : source.index(")", start) + 1] + assert "check_nan" not in call + + def test_integer_bound_emits_check_nan_false(self) -> None: + """Integer-typed bounds include check_nan=False to skip the dead guard.""" + source = _render(IntBoundsModel) + assert "check_nan=False" in source + + class TestBuilderFunction: def test_contains_builder_function(self, literal_subtype_source: str) -> None: assert "def simple_checks()" in literal_subtype_source @@ -363,8 +534,6 @@ def test_contains_struct_field(self, literal_subtype_source: str) -> None: def test_shared_struct_ref_emits_struct_field(self) -> None: """Shared struct refs (BBOX_STRUCT) render as the type of a StructField.""" - from overture.schema.codegen.pyspark.schema_builder import SchemaField - schema_fields = [SchemaField(name="bbox", type_expr="BBOX_STRUCT")] source = render_model_module("simple", [], [], schema_fields) assert 'StructField("bbox", BBOX_STRUCT, True)' in source diff --git a/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py b/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py index 80cdfd739..cc000c559 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py @@ -10,7 +10,8 @@ ) from overture.schema.codegen.extraction.specs import ModelSpec from overture.schema.codegen.pyspark.check_builder import build_checks -from overture.schema.codegen.pyspark.check_ir import ElementGuard +from overture.schema.codegen.pyspark.check_ir import ElementGuard, ModelCheck +from overture.schema.codegen.pyspark.constraint_dispatch import RequireAnyOf from overture.schema.codegen.pyspark.test_data.scaffold import ( generate_model_scaffold, generate_scaffold, @@ -226,6 +227,23 @@ def test_top_level_model_constraint_produces_empty_scaffold( scaffold = generate_model_scaffold(node, division_area_spec) assert isinstance(scaffold, dict) + def test_map_value_model_constraint_produces_empty_scaffold( + self, connector_spec: ModelSpec + ) -> None: + """A `dict[K, Model]` value-model constraint needs no scaffold. + + The mutation (`map_path=`) owns map navigation -- it corrupts the + base row's single map entry in place, or stubs one when the map is + absent. A dict scaffold can't replace a base-row map entry under + deep_merge's recursive dict merge, so {} is correct, not the + row-root-mutation bug. + """ + mc = ModelCheck( + descriptor=RequireAnyOf(field_names=("foo", "bar")), + target=_path("subs{value}"), + ) + assert generate_model_scaffold(mc, connector_spec) == {} + def test_array_nested_model_constraint_builds_path( self, segment_spec: ModelSpec ) -> None: diff --git a/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py index 966686058..4867ea245 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py @@ -7,6 +7,7 @@ import pytest from overture.schema.codegen.extraction.field import ArrayOf, ModelRef, Primitive from overture.schema.codegen.extraction.specs import RecordSpec +from overture.schema.codegen.pyspark._primitive_fill import PRIMITIVE_FILL_TABLE from overture.schema.codegen.pyspark.check_ir import ( Check, ColumnGuard, @@ -20,8 +21,12 @@ RadioGroup, RequireAnyOf, RequireIf, + _needs_explicit_fill, ) from overture.schema.codegen.pyspark.renderer import render_model_module +from overture.schema.codegen.pyspark.test_data.base_row import ( + _primitive_default as _base_row_primitive_default, +) from overture.schema.codegen.pyspark.test_renderer import ( _fill_value_literal, ) @@ -44,6 +49,13 @@ # rather than executing it, so the import target need not be real. _TEST_EXPRESSION_IMPORT = "_placeholder.expression_module" +# Representative base_type for each SparkCategory in PRIMITIVE_FILL_TABLE. +_CATEGORY_BASE_TYPE: dict[str, str] = { + "int": "int32", + "float": "float64", + "bool": "bool", +} + def render_test_module(*args: object, **kwargs: object) -> str: """Invoke the renderer with placeholder `expression_import`/`support_prefix`. @@ -523,6 +535,90 @@ def test_require_if_not_condition_uses_negate(self) -> None: ast.parse(source) assert "negate=True" in source + def test_require_any_of_map_value_uses_map_path(self) -> None: + """require_any_of on a `dict[K, Model]` value model passes map_path.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("foo", "bar")), + target=_path("subs{value}"), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert 'map_path="subs"' in source + assert "array_path" not in source + + def test_require_any_of_map_value_leaf_uses_struct_path(self) -> None: + """A struct-nested sub-model in a map value passes map_path + struct_path.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("foo", "bar")), + target=_path("subs{value}.inner"), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert 'map_path="subs"' in source + assert 'struct_path="inner"' in source + + def test_min_fields_set_map_value_uses_map_path(self) -> None: + model_nodes = [ + ModelCheck( + descriptor=MinFieldsSet(field_names=("foo", "bar"), count=1), + target=_path("subs{value}"), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert 'map_path="subs"' in source + + def test_require_if_map_value_uses_map_path(self) -> None: + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", "country"), + ), + target=_path("subs{value}"), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert 'map_path="subs"' in source + + def test_radio_group_map_value_raises(self) -> None: + """radio_group has no map-aware mutation; raise rather than emit a vacuous test.""" + model_nodes = [ + ModelCheck( + descriptor=RadioGroup(field_names=("a", "b")), + target=_path("subs{value}"), + ), + ] + with pytest.raises(ValueError, match="map_path"): + render_test_module("test", [], model_nodes) + + def test_require_any_of_map_key_projection_raises(self) -> None: + """A model can't be a dict key, so a KEY-projection model check is untestable.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("foo", "bar")), + target=_path("subs{key}"), + ), + ] + with pytest.raises(ValueError, match="map key"): + render_test_module("test", [], model_nodes) + + def test_require_any_of_map_value_multi_segment_leaf_raises(self) -> None: + """The mutation struct_path is a single segment; a deeper leaf has no support.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("foo", "bar")), + target=_path("subs{value}.a.b"), + ), + ] + with pytest.raises(ValueError, match="single segment"): + render_test_module("test", [], model_nodes) + def test_model_scenario_uses_inline_lambda(self) -> None: """Model scenarios emit mutate=lambda row: ... directly.""" model_nodes = [ @@ -1177,3 +1273,50 @@ def test_no_geometry_type_no_shapely_imports(self) -> None: nodes = [make_check("check_required", _path("country"))] source = render_test_module("test", nodes, []) assert "shapely" not in source + + +class TestPrimitiveFillTableConsistency: + """The shared PRIMITIVE_FILL_TABLE drives all three fill-related functions. + + Every category in the table must be accepted by `_needs_explicit_fill`, + produce a non-raising `_fill_value_literal`, and yield the matching + `_primitive_default` runtime value. A future category added to the table + but not wired into a consumer will fail here. + """ + + def test_category_base_type_covers_table(self) -> None: + """_CATEGORY_BASE_TYPE must cover every key in PRIMITIVE_FILL_TABLE. + + If a category is added to the table without a representative base_type, + the other consistency tests would raise KeyError with a misleading trace + rather than a clear assertion. This test catches the gap loudly. + """ + assert set(_CATEGORY_BASE_TYPE) == set(PRIMITIVE_FILL_TABLE), ( + "Add a representative base_type to _CATEGORY_BASE_TYPE for each " + "new PRIMITIVE_FILL_TABLE key (and remove entries for deleted keys)." + ) + + def test_table_covers_needs_explicit_fill(self) -> None: + for category in PRIMITIVE_FILL_TABLE: + shape = Primitive(base_type=_CATEGORY_BASE_TYPE[category]) + assert _needs_explicit_fill(shape), ( + f"category {category!r} not accepted by _needs_explicit_fill" + ) + + def test_table_covers_fill_value_literal(self) -> None: + for category, (literal, _) in PRIMITIVE_FILL_TABLE.items(): + shape = Primitive(base_type=_CATEGORY_BASE_TYPE[category]) + assert _fill_value_literal(shape) == literal, ( + f"category {category!r} literal mismatch" + ) + + def test_table_covers_primitive_default(self) -> None: + for category, (_, runtime_value) in PRIMITIVE_FILL_TABLE.items(): + base_type = _CATEGORY_BASE_TYPE[category] + result = _base_row_primitive_default(base_type) + assert result == runtime_value, ( + f"category {category!r} runtime value mismatch" + ) + assert type(result) is type(runtime_value), ( + f"category {category!r} runtime type mismatch" + ) diff --git a/packages/overture-schema-codegen/tests/test_reverse_references.py b/packages/overture-schema-codegen/tests/test_reverse_references.py index ad1cf9fa7..02d7ffcc2 100644 --- a/packages/overture-schema-codegen/tests/test_reverse_references.py +++ b/packages/overture-schema-codegen/tests/test_reverse_references.py @@ -17,8 +17,14 @@ spec_for_model, ) from overture.schema.codegen.extraction.enum_extraction import extract_enum +from overture.schema.codegen.extraction.field import ( + ArrayOf, + ConstraintSource, + Primitive, +) from overture.schema.codegen.extraction.newtype_extraction import extract_newtype from overture.schema.codegen.extraction.specs import ( + NewTypeSpec, PydanticTypeSpec, RecordSpec, TypeIdentity, @@ -87,13 +93,6 @@ def test_newtype_inheriting_from_newtype_produces_used_by_entry() -> None: def test_newtype_inheriting_through_array_layer_produces_used_by_entry() -> None: """A NewType chaining through an array NewType inherits the inner NewType's provenance from the array layer, not just the terminal scalar.""" - from overture.schema.codegen.extraction.field import ( - ArrayOf, - ConstraintSource, - Primitive, - ) - from overture.schema.codegen.extraction.specs import NewTypeSpec - Inner = NewType("Inner", str) Outer = NewType("Outer", list) diff --git a/packages/overture-schema-codegen/tests/test_type_analyzer.py b/packages/overture-schema-codegen/tests/test_type_analyzer.py index 003b94aba..ea3421f5a 100644 --- a/packages/overture-schema-codegen/tests/test_type_analyzer.py +++ b/packages/overture-schema-codegen/tests/test_type_analyzer.py @@ -8,11 +8,14 @@ from overture.schema.codegen.extraction.field import ( AnyScalar, ArrayOf, + ConstraintSource, FieldShape, LiteralScalar, MapOf, + ModelRef, NewTypeShape, Primitive, + UnionRef, ) from overture.schema.codegen.extraction.field_walk import ( all_constraints, @@ -22,13 +25,16 @@ ArrayMinLen, ScalarMinLen, ) +from overture.schema.codegen.extraction.specs import RecordSpec, UnionSpec from overture.schema.codegen.extraction.type_analyzer import ( UnresolvedForwardRefError, UnsupportedUnionError, analyze_type, + attach_constraints, single_literal_value, unwrap_list, ) +from overture.schema.common.scoping.vehicle import VehicleSelector from overture.schema.system.primitive import int32 from overture.schema.system.ref import Id from overture.schema.system.string import ( @@ -130,9 +136,16 @@ def test_optional_list_with_optional_element(self) -> None: assert isinstance(shape, ArrayOf) assert optional is True + def test_list_inherits_element_description(self) -> None: + # A list field with no field-level description inherits its + # element's description -- losing it would leave the field + # undocumented when the only prose lives on the element. + desc = _description(list[Annotated[str, Field(description="element prose")]]) + assert desc == "element prose" + def test_list_optional_element_desc_is_none(self) -> None: - # Description comes from Field(description=...) at the field layer, - # not from the element type. List branch returns None, matching dict. + # Element nullability alone introduces no description: list[str | None] + # has no prose on either layer, so the field description stays None. _, _, desc = analyze_type(list[str | None]) assert desc is None @@ -174,15 +187,9 @@ class TestAttachConstraintsOnModelTerminal: """Constraints destined for a model/union terminal are rejected loudly.""" def _model_ref(self) -> FieldShape: - from overture.schema.codegen.extraction.field import ModelRef - from overture.schema.codegen.extraction.specs import RecordSpec - return ModelRef(model=RecordSpec(name="Person", description=None)) def _union_ref(self) -> FieldShape: - from overture.schema.codegen.extraction.field import UnionRef - from overture.schema.codegen.extraction.specs import UnionSpec - return UnionRef( union=UnionSpec( name="U", @@ -198,9 +205,6 @@ def _union_ref(self) -> FieldShape: @pytest.mark.parametrize("ref_name", ["_model_ref", "_union_ref"]) def test_constraint_on_terminal_raises(self, ref_name: str) -> None: - from overture.schema.codegen.extraction.field import ConstraintSource - from overture.schema.codegen.extraction.type_analyzer import attach_constraints - shape = getattr(self, ref_name)() cs = (ConstraintSource(source_ref=None, source_name=None, constraint=Ge(0)),) with pytest.raises(NotImplementedError): @@ -208,8 +212,6 @@ def test_constraint_on_terminal_raises(self, ref_name: str) -> None: @pytest.mark.parametrize("ref_name", ["_model_ref", "_union_ref"]) def test_no_constraints_is_noop(self, ref_name: str) -> None: - from overture.schema.codegen.extraction.type_analyzer import attach_constraints - shape = getattr(self, ref_name)() assert attach_constraints(shape, ()) is shape @@ -491,8 +493,6 @@ def test_no_resolver_raises_on_multi_arm(self) -> None: analyze_type(union_type) def test_annotated_wrapped_members_unwrapped(self) -> None: - from overture.schema.codegen.extraction.type_analyzer import analyze_type as at - captured_members: list[tuple[type[BaseModel], ...]] = [] def resolver( @@ -507,7 +507,7 @@ def resolver( Annotated[UnionModelA, Tag("a")] | Annotated[UnionModelB, Tag("b")], Field(description="disc"), ] - at(union_type, union_resolver=resolver) + analyze_type(union_type, union_resolver=resolver) expected: set[type[BaseModel]] = {UnionModelA, UnionModelB} assert set(captured_members[0]) == expected @@ -547,8 +547,6 @@ def test_optional_list(self) -> None: assert unwrap_list(list[int] | None) is int def test_optional_list_preserves_annotated(self) -> None: - from overture.schema.common.scoping.vehicle import VehicleSelector - assert unwrap_list(list[VehicleSelector] | None) is VehicleSelector diff --git a/packages/overture-schema-codegen/tests/test_type_collection.py b/packages/overture-schema-codegen/tests/test_type_collection.py index 22e412200..66cbbcbd5 100644 --- a/packages/overture-schema-codegen/tests/test_type_collection.py +++ b/packages/overture-schema-codegen/tests/test_type_collection.py @@ -21,6 +21,8 @@ from overture.schema.codegen.layout.type_collection import ( collect_all_supplementary_types, ) +from overture.schema.system.primitive import uint8 +from overture.schema.system.string import HexColor from pydantic import BaseModel @@ -127,9 +129,6 @@ def test_registered_primitive_newtype_not_collected(self) -> None: """A non-semantic NewType (uint8) belongs on the aggregate primitives page, not as a standalone NewTypeSpec whose path collides with it.""" - from overture.schema.system.primitive import uint8 - from overture.schema.system.string import HexColor - feature = type( "FeatureWithPrimitives", (BaseModel,), diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py index fc616fcbe..c7667402e 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py @@ -45,6 +45,13 @@ _EWKB_FLAG_MASK = 0x0FFFFFFF _ISO_DIMENSION_MODULUS = 1000 +# A WKB header is a 1-byte order flag plus a 4-byte type word: 5 bytes, or 10 +# hex characters under F.hex. F.conv returns NULL only when its input is the +# empty string (a 0-1 byte blob), so a 2-4 byte blob parses a truncated header +# into a non-null bogus type and would pass; gate on hex length so every blob +# too short to carry a full type word is a violation. +_MIN_WKB_HEADER_HEX_LEN = 10 + _BOUND_OPS: dict[str, tuple[str, Callable[[Column, float | int], Column]]] = { "ge": (">=", lambda c, v: c < v), @@ -61,8 +68,22 @@ def check_bounds( gt: float | int | None = None, le: float | int | None = None, lt: float | int | None = None, + check_nan: bool = True, ) -> Column: - """Numeric bounds check. Returns error string or null.""" + """Numeric bounds check. Returns error string or null. + + Parameters + ---------- + col + Column to validate. + ge, gt, le, lt + Inclusive/exclusive lower and upper bounds. + check_nan + When True (default), prepend an explicit NaN guard so that NaN values + are rejected even on lower bounds (Spark sorts NaN above all values, + so a lower bound never fires on NaN without this guard). Set to False + for integer columns, where NaN is impossible and the guard is dead work. + """ checks: list[Column] = [] for key, value in (("ge", ge), ("gt", gt), ("le", le), ("lt", lt)): if value is None: @@ -79,6 +100,9 @@ def check_bounds( ) if not checks: return F.lit(None).cast("string") + if not check_nan: + # null col -> every bound comparison is null, coalesce yields null + return F.coalesce(*checks) # NaN satisfies no Pydantic bound (every comparison against it is False), # but Spark sorts NaN above all values, so lower bounds (NaN < v / NaN <= v) # never fire on it. Reject NaN explicitly whenever any bound applies. The @@ -484,9 +508,15 @@ def check_geometry_type( allowed_codes = [_WKB_TYPE_CODE[t] for t in allowed] names = " | ".join(t.geo_json_type for t in allowed) if len(allowed_codes) == 1: - violation = base_type != allowed_codes[0] + type_mismatch = base_type != allowed_codes[0] else: - violation = ~base_type.isin(allowed_codes) + type_mismatch = ~base_type.isin(allowed_codes) + # A blob too short to hold the full 4-byte type word cannot be parsed; treat + # it as a violation rather than validating a bogus type read from a truncated + # header. The length gate subsumes the conv()-returns-NULL case (0-1 byte + # blob) and also catches the 2-4 byte blobs that parse to a non-null garbage + # type and would otherwise slip through. + violation = (F.length(hex_geom) < _MIN_WKB_HEADER_HEX_LEN) | type_mismatch return F.when( col.isNotNull() & violation, error_msg(f"expected {names} geometry"), diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py index cb9366dc5..a72ff00a3 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py @@ -180,7 +180,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py index 64e09d7d2..d6c98b482 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py @@ -407,7 +407,9 @@ def _license_priority_value_check() -> Check: return Check( field="license_priority{value}", name="bounds", - expr=map_values_check("license_priority", lambda v: check_bounds(v, ge=0)), + expr=map_values_check( + "license_priority", lambda v: check_bounds(v, ge=0, check_nan=False) + ), shape=CheckShape.ARRAY, read_columns=frozenset({"license_priority"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py index 19b279a5a..0720ce5bc 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py @@ -181,7 +181,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) @@ -315,7 +315,7 @@ def _depth_bounds_check() -> Check: return Check( field="depth", name="bounds", - expr=check_bounds(F.col("depth"), ge=0), + expr=check_bounds(F.col("depth"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"depth"}), ) @@ -325,7 +325,7 @@ def _cartography_prominence_bounds_check() -> Check: return Check( field="cartography.prominence_0", name="bounds", - expr=check_bounds(F.col("cartography.prominence"), ge=1), + expr=check_bounds(F.col("cartography.prominence"), ge=1, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -335,7 +335,7 @@ def _cartography_prominence_bounds_check_1() -> Check: return Check( field="cartography.prominence_1", name="bounds", - expr=check_bounds(F.col("cartography.prominence"), le=100), + expr=check_bounds(F.col("cartography.prominence"), le=100, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -345,7 +345,7 @@ def _cartography_min_zoom_bounds_check() -> Check: return Check( field="cartography.min_zoom_0", name="bounds", - expr=check_bounds(F.col("cartography.min_zoom"), ge=0), + expr=check_bounds(F.col("cartography.min_zoom"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -355,7 +355,7 @@ def _cartography_min_zoom_bounds_check_1() -> Check: return Check( field="cartography.min_zoom_1", name="bounds", - expr=check_bounds(F.col("cartography.min_zoom"), le=23), + expr=check_bounds(F.col("cartography.min_zoom"), le=23, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -365,7 +365,7 @@ def _cartography_max_zoom_bounds_check() -> Check: return Check( field="cartography.max_zoom_0", name="bounds", - expr=check_bounds(F.col("cartography.max_zoom"), ge=0), + expr=check_bounds(F.col("cartography.max_zoom"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -375,7 +375,7 @@ def _cartography_max_zoom_bounds_check_1() -> Check: return Check( field="cartography.max_zoom_1", name="bounds", - expr=check_bounds(F.col("cartography.max_zoom"), le=23), + expr=check_bounds(F.col("cartography.max_zoom"), le=23, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py index 8372383df..e6d89d350 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py @@ -189,7 +189,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py index aa229e53a..39ebf28ba 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py @@ -189,7 +189,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) @@ -396,7 +396,7 @@ def _elevation_check() -> Check: return Check( field="elevation", name="bounds", - expr=check_bounds(F.col("elevation"), le=9000), + expr=check_bounds(F.col("elevation"), le=9000, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"elevation"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py index 1e6b86777..9f16107d6 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py @@ -181,7 +181,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) @@ -339,7 +339,7 @@ def _cartography_prominence_bounds_check() -> Check: return Check( field="cartography.prominence_0", name="bounds", - expr=check_bounds(F.col("cartography.prominence"), ge=1), + expr=check_bounds(F.col("cartography.prominence"), ge=1, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -349,7 +349,7 @@ def _cartography_prominence_bounds_check_1() -> Check: return Check( field="cartography.prominence_1", name="bounds", - expr=check_bounds(F.col("cartography.prominence"), le=100), + expr=check_bounds(F.col("cartography.prominence"), le=100, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -359,7 +359,7 @@ def _cartography_min_zoom_bounds_check() -> Check: return Check( field="cartography.min_zoom_0", name="bounds", - expr=check_bounds(F.col("cartography.min_zoom"), ge=0), + expr=check_bounds(F.col("cartography.min_zoom"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -369,7 +369,7 @@ def _cartography_min_zoom_bounds_check_1() -> Check: return Check( field="cartography.min_zoom_1", name="bounds", - expr=check_bounds(F.col("cartography.min_zoom"), le=23), + expr=check_bounds(F.col("cartography.min_zoom"), le=23, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -379,7 +379,7 @@ def _cartography_max_zoom_bounds_check() -> Check: return Check( field="cartography.max_zoom_0", name="bounds", - expr=check_bounds(F.col("cartography.max_zoom"), ge=0), + expr=check_bounds(F.col("cartography.max_zoom"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -389,7 +389,7 @@ def _cartography_max_zoom_bounds_check_1() -> Check: return Check( field="cartography.max_zoom_1", name="bounds", - expr=check_bounds(F.col("cartography.max_zoom"), le=23), + expr=check_bounds(F.col("cartography.max_zoom"), le=23, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py index 0fd2e9e78..e06891663 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py @@ -189,7 +189,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) @@ -494,7 +494,7 @@ def _elevation_check() -> Check: return Check( field="elevation", name="bounds", - expr=check_bounds(F.col("elevation"), le=9000), + expr=check_bounds(F.col("elevation"), le=9000, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"elevation"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py index 59c8d0f6a..7ab4c2ff9 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py @@ -190,7 +190,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py index 672c4d6aa..b4ebfb025 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py @@ -186,7 +186,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) @@ -718,7 +718,7 @@ def _num_floors_check() -> Check: return Check( field="num_floors", name="bounds", - expr=check_bounds(F.col("num_floors"), gt=0), + expr=check_bounds(F.col("num_floors"), gt=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"num_floors"}), ) @@ -728,7 +728,7 @@ def _num_floors_underground_check() -> Check: return Check( field="num_floors_underground", name="bounds", - expr=check_bounds(F.col("num_floors_underground"), gt=0), + expr=check_bounds(F.col("num_floors_underground"), gt=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"num_floors_underground"}), ) @@ -738,7 +738,7 @@ def _min_floor_check() -> Check: return Check( field="min_floor", name="bounds", - expr=check_bounds(F.col("min_floor"), gt=0), + expr=check_bounds(F.col("min_floor"), gt=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"min_floor"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py index f7691c1d9..07ed06b19 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py @@ -186,7 +186,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) @@ -624,7 +624,7 @@ def _num_floors_check() -> Check: return Check( field="num_floors", name="bounds", - expr=check_bounds(F.col("num_floors"), gt=0), + expr=check_bounds(F.col("num_floors"), gt=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"num_floors"}), ) @@ -634,7 +634,7 @@ def _num_floors_underground_check() -> Check: return Check( field="num_floors_underground", name="bounds", - expr=check_bounds(F.col("num_floors_underground"), gt=0), + expr=check_bounds(F.col("num_floors_underground"), gt=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"num_floors_underground"}), ) @@ -644,7 +644,7 @@ def _min_floor_check() -> Check: return Check( field="min_floor", name="bounds", - expr=check_bounds(F.col("min_floor"), gt=0), + expr=check_bounds(F.col("min_floor"), gt=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"min_floor"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py index 75fb2def0..d9f8069b4 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py @@ -53,7 +53,7 @@ def _cartography_prominence_bounds_check() -> Check: return Check( field="cartography.prominence_0", name="bounds", - expr=check_bounds(F.col("cartography.prominence"), ge=1), + expr=check_bounds(F.col("cartography.prominence"), ge=1, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -63,7 +63,7 @@ def _cartography_prominence_bounds_check_1() -> Check: return Check( field="cartography.prominence_1", name="bounds", - expr=check_bounds(F.col("cartography.prominence"), le=100), + expr=check_bounds(F.col("cartography.prominence"), le=100, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -73,7 +73,7 @@ def _cartography_min_zoom_bounds_check() -> Check: return Check( field="cartography.min_zoom_0", name="bounds", - expr=check_bounds(F.col("cartography.min_zoom"), ge=0), + expr=check_bounds(F.col("cartography.min_zoom"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -83,7 +83,7 @@ def _cartography_min_zoom_bounds_check_1() -> Check: return Check( field="cartography.min_zoom_1", name="bounds", - expr=check_bounds(F.col("cartography.min_zoom"), le=23), + expr=check_bounds(F.col("cartography.min_zoom"), le=23, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -93,7 +93,7 @@ def _cartography_max_zoom_bounds_check() -> Check: return Check( field="cartography.max_zoom_0", name="bounds", - expr=check_bounds(F.col("cartography.max_zoom"), ge=0), + expr=check_bounds(F.col("cartography.max_zoom"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -103,7 +103,7 @@ def _cartography_max_zoom_bounds_check_1() -> Check: return Check( field="cartography.max_zoom_1", name="bounds", - expr=check_bounds(F.col("cartography.max_zoom"), le=23), + expr=check_bounds(F.col("cartography.max_zoom"), le=23, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"cartography"}), ) @@ -525,7 +525,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) @@ -917,7 +917,7 @@ def _admin_level_bounds_check() -> Check: return Check( field="admin_level_0", name="bounds", - expr=check_bounds(F.col("admin_level"), ge=0), + expr=check_bounds(F.col("admin_level"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"admin_level"}), ) @@ -927,7 +927,7 @@ def _admin_level_bounds_check_1() -> Check: return Check( field="admin_level_1", name="bounds", - expr=check_bounds(F.col("admin_level"), le=16), + expr=check_bounds(F.col("admin_level"), le=16, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"admin_level"}), ) @@ -1071,7 +1071,7 @@ def _population_check() -> Check: return Check( field="population", name="bounds", - expr=check_bounds(F.col("population"), ge=0), + expr=check_bounds(F.col("population"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"population"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py index eca66a320..ac6425b1f 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py @@ -468,7 +468,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) @@ -718,7 +718,7 @@ def _admin_level_bounds_check() -> Check: return Check( field="admin_level_0", name="bounds", - expr=check_bounds(F.col("admin_level"), ge=0), + expr=check_bounds(F.col("admin_level"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"admin_level"}), ) @@ -728,7 +728,7 @@ def _admin_level_bounds_check_1() -> Check: return Check( field="admin_level_1", name="bounds", - expr=check_bounds(F.col("admin_level"), le=16), + expr=check_bounds(F.col("admin_level"), le=16, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"admin_level"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py index f3e2bd07c..a2f261cc1 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py @@ -186,7 +186,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) @@ -457,7 +457,7 @@ def _admin_level_bounds_check() -> Check: return Check( field="admin_level_0", name="bounds", - expr=check_bounds(F.col("admin_level"), ge=0), + expr=check_bounds(F.col("admin_level"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"admin_level"}), ) @@ -467,7 +467,7 @@ def _admin_level_bounds_check_1() -> Check: return Check( field="admin_level_1", name="bounds", - expr=check_bounds(F.col("admin_level"), le=16), + expr=check_bounds(F.col("admin_level"), le=16, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"admin_level"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py index 846cef63d..67d25c22d 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py @@ -186,7 +186,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py index 63b614e41..c6e40eb9b 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py @@ -179,7 +179,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py index ad2311427..af4e625ef 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py @@ -187,7 +187,7 @@ def _version_bounds_check() -> Check: return Check( field="version", name="bounds", - expr=check_bounds(F.col("version"), ge=0), + expr=check_bounds(F.col("version"), ge=0, check_nan=False), shape=CheckShape.SCALAR, read_columns=frozenset({"version"}), ) @@ -2881,7 +2881,10 @@ def _speed_limits_max_speed_value_bounds_check() -> Check: expr=F.when( F.col("subtype").isin(["road"]), array_check( - "speed_limits", lambda el: check_bounds(el["max_speed"]["value"], ge=1) + "speed_limits", + lambda el: check_bounds( + el["max_speed"]["value"], ge=1, check_nan=False + ), ), ), shape=CheckShape.ARRAY, @@ -2897,7 +2900,9 @@ def _speed_limits_max_speed_value_bounds_check_1() -> Check: F.col("subtype").isin(["road"]), array_check( "speed_limits", - lambda el: check_bounds(el["max_speed"]["value"], le=350), + lambda el: check_bounds( + el["max_speed"]["value"], le=350, check_nan=False + ), ), ), shape=CheckShape.ARRAY, @@ -2965,7 +2970,10 @@ def _speed_limits_min_speed_value_bounds_check() -> Check: expr=F.when( F.col("subtype").isin(["road"]), array_check( - "speed_limits", lambda el: check_bounds(el["min_speed"]["value"], ge=1) + "speed_limits", + lambda el: check_bounds( + el["min_speed"]["value"], ge=1, check_nan=False + ), ), ), shape=CheckShape.ARRAY, @@ -2981,7 +2989,9 @@ def _speed_limits_min_speed_value_bounds_check_1() -> Check: F.col("subtype").isin(["road"]), array_check( "speed_limits", - lambda el: check_bounds(el["min_speed"]["value"], le=350), + lambda el: check_bounds( + el["min_speed"]["value"], le=350, check_nan=False + ), ), ), shape=CheckShape.ARRAY, diff --git a/packages/overture-schema-pyspark/tests/_support/mutations.py b/packages/overture-schema-pyspark/tests/_support/mutations.py index 0e0a12299..572fe2085 100644 --- a/packages/overture-schema-pyspark/tests/_support/mutations.py +++ b/packages/overture-schema-pyspark/tests/_support/mutations.py @@ -23,6 +23,7 @@ _SENTINEL = "__FORBIDDEN_PRESENT__" _NOT_EQUAL_PREFIX = "__NOT_" +_STUB_MAP_KEY = "_stub" def mutate_require_any_of( @@ -31,6 +32,7 @@ def mutate_require_any_of( *, array_path: FieldPath | str | None = None, struct_path: str | None = None, + map_path: FieldPath | str | None = None, ) -> dict: """Null every named field so `require_any_of` fires. @@ -41,12 +43,19 @@ def mutate_require_any_of( fields live at the row root. struct_path Optional single intermediate struct field between the array - element and the target fields. + element (or map value) and the target fields. + map_path + Map column whose `dict[K, Model]` value carries the constraint. + Mutually exclusive with `array_path`. See `_null_all_named_fields` for the full nesting semantics. """ return _null_all_named_fields( - row_dict, field_names, array_path=array_path, struct_path=struct_path + row_dict, + field_names, + array_path=array_path, + struct_path=struct_path, + map_path=map_path, ) @@ -64,6 +73,7 @@ def mutate_min_fields_set( *, array_path: FieldPath | str | None = None, struct_path: str | None = None, + map_path: FieldPath | str | None = None, ) -> dict: """Null every named field so `min_fields_set(N)` fires (0 < N). @@ -73,12 +83,16 @@ def mutate_min_fields_set( `check_required` checks; the conformance test only asserts the expected violation is present, so the extra failures don't matter. - `array_path` / `struct_path` mirror `mutate_require_any_of` for the - case where the constrained model is reached through array iteration - (and optionally one intermediate struct field). + `array_path` / `struct_path` / `map_path` mirror `mutate_require_any_of` + for the case where the constrained model is reached through array or map + iteration (and optionally one intermediate struct field). """ return _null_all_named_fields( - row_dict, field_names, array_path=array_path, struct_path=struct_path + row_dict, + field_names, + array_path=array_path, + struct_path=struct_path, + map_path=map_path, ) @@ -88,16 +102,26 @@ def _null_all_named_fields( *, array_path: FieldPath | str | None, struct_path: str | None, + map_path: FieldPath | str | None = None, ) -> dict: """Return a deep copy of *row_dict* with every named field set to None. - Without *array_path*, the fields live at the row root. With *array_path*, - the fields live inside elements of that array column; *struct_path* + Without *array_path* or *map_path*, the fields live at the row root. + With *array_path*, the fields live inside elements of that array column; + with *map_path*, inside the value model of that map column. *struct_path* names an optional single intermediate struct field between the array - element and the target fields. A null array is replaced with a single - stub element so the violation has a row to fire on. + element / map value and the target fields. A null array is replaced with + a single stub element so the violation has a row to fire on; a null map + is stubbed analogously. """ result = copy.deepcopy(row_dict) + if map_path is not None: + target = _map_value_to_mutate(result, map_path) + if struct_path: + target = _scaffold_struct_child(target, struct_path) + for name in field_names: + _set_nested(target, name, None) + return result if array_path is None: for name in field_names: _set_nested(result, name, None) @@ -112,18 +136,41 @@ def _null_all_named_fields( _set_nested(result, array_path, [element]) else: for element in arr: - if struct_path: - target = element.get(struct_path) - if target is None: - target = {} - element[struct_path] = target - else: - target = element + target = ( + _scaffold_struct_child(element, struct_path) if struct_path else element + ) for name in field_names: _set_nested(target, name, None) return result +def _scaffold_struct_child(parent: dict, name: str) -> dict: + """Return `parent[name]` as a dict, scaffolding `{}` when missing or None.""" + child = parent.get(name) + if child is None: + child = {} + parent[name] = child + return child + + +def _map_value_to_mutate(row: dict, map_path: FieldPath | str) -> dict: + """Return the value model of the map at *map_path*, stubbing if absent. + + A model-level constraint on a `dict[K, Model]` value targets the value + model. The base row supplies the map's single entry; a missing or empty + map is stubbed with one entry so the violation has a value to fire on -- + mirroring how a null array is stubbed in `_null_all_named_fields`. The + map carries exactly one entry (base-row generation emits a single + key/value pair), so the sole value is unambiguous. + """ + m = _get_nested(row, map_path) + if isinstance(m, dict) and m: + return next(iter(m.values())) + stub: dict = {} + _set_nested(row, map_path, {_STUB_MAP_KEY: stub}, create=True) + return stub + + def mutate_require_if( row_dict: dict, field_names: list[FieldPath | str], @@ -133,6 +180,7 @@ def mutate_require_if( negate: bool = False, array_path: FieldPath | str | None = None, inner_array_path: FieldPath | str | None = None, + map_path: FieldPath | str | None = None, ) -> dict: """Set condition to trigger require_if, then null target fields.""" result = copy.deepcopy(row_dict) @@ -142,7 +190,7 @@ def _apply(target: dict) -> None: for name in field_names: _set_nested(target, name, None) - _apply_to_targets(result, _apply, array_path, inner_array_path) + _apply_to_targets(result, _apply, array_path, inner_array_path, map_path) return result @@ -156,6 +204,7 @@ def mutate_forbid_if( fill_values: dict[str, object] | None = None, array_path: FieldPath | str | None = None, inner_array_path: FieldPath | str | None = None, + map_path: FieldPath | str | None = None, ) -> dict: """Set condition to trigger forbid_if, ensure target fields are non-null. @@ -172,7 +221,7 @@ def _apply(target: dict) -> None: if _get_nested(target, name) is None: _set_nested(target, name, fills.get(name, _SENTINEL)) - _apply_to_targets(result, _apply, array_path, inner_array_path) + _apply_to_targets(result, _apply, array_path, inner_array_path, map_path) return result @@ -324,17 +373,23 @@ def _apply_to_targets( fn: _Applicator, array_path: FieldPath | str | None, inner_array_path: FieldPath | str | None, + map_path: FieldPath | str | None = None, ) -> None: """Apply a mutation function to target dicts at the appropriate nesting level. - Without array paths, applies directly to the row. With `array_path`, - iterates over elements of that array. With both `array_path` and - `inner_array_path`, iterates over outer elements, navigates the - inner struct path to a nested array, then iterates those elements. + Without array or map paths, applies directly to the row. With + `array_path`, iterates over elements of that array. With both + `array_path` and `inner_array_path`, iterates over outer elements, + navigates the inner struct path to a nested array, then iterates those + elements. With `map_path`, applies to the value model of that map column + (stubbing one entry when the map is absent). Creates stub array elements when the arrays are null so the mutation can populate them. """ + if map_path is not None: + fn(_map_value_to_mutate(row, map_path)) + return if array_path is None: fn(row) return diff --git a/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py b/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py index 831dc8a8e..a71c12f11 100644 --- a/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py +++ b/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py @@ -2,6 +2,7 @@ import struct +import pytest from overture.schema.pyspark.expressions.constraint_expressions import ( check_array_max_length, check_array_min_length, @@ -108,6 +109,43 @@ def test_check_bounds_valid_float_passes(spark: SparkSession) -> None: assert result[0]["err"] is None +def test_check_bounds_nan_guard_off_passes_nan(spark: SparkSession) -> None: + """With check_nan=False the NaN guard is absent; NaN slips past a lower bound.""" + df = spark.createDataFrame([Row(val=float("nan"))], schema="val double") + result = df.select( + check_bounds(F.col("val"), ge=0, check_nan=False).alias("err") + ).collect() + assert result[0]["err"] is None + + +def test_check_bounds_nan_guard_on_rejects_nan(spark: SparkSession) -> None: + """With check_nan=True (default) NaN is rejected even with a lower bound.""" + df = spark.createDataFrame([Row(val=float("nan"))], schema="val double") + result = df.select( + check_bounds(F.col("val"), ge=0, check_nan=True).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "NaN" in result[0]["err"] + + +def test_check_bounds_integer_column_rejects_violation(spark: SparkSession) -> None: + """check_nan=False is safe for integer columns; bound violations still fire.""" + df = spark.createDataFrame([Row(val=0)], schema="val int") + result = df.select( + check_bounds(F.col("val"), ge=1, check_nan=False).alias("err") + ).collect() + assert result[0]["err"] is not None + + +def test_check_bounds_integer_column_accepts_valid(spark: SparkSession) -> None: + """check_nan=False on an integer column: in-bound values pass.""" + df = spark.createDataFrame([Row(val=5)], schema="val int") + result = df.select( + check_bounds(F.col("val"), ge=1, le=10, check_nan=False).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_check_enum_valid(spark: SparkSession) -> None: df = spark.createDataFrame([Row(val="road")]) result = df.select( @@ -599,6 +637,32 @@ def test_iso_wkb_z_wrong_type_rejected(self, spark: SparkSession) -> None: assert result[0]["err"] is not None assert "Point" in result[0]["err"] + def test_truncated_wkb_flagged(self, spark: SparkSession) -> None: + """A non-null WKB blob too short to contain a type word is flagged as a violation.""" + truncated = bytearray(b"\x01") + df = spark.createDataFrame([Row(geometry=truncated)], schema="geometry binary") + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is not None + + @pytest.mark.parametrize("nbytes", [1, 2, 3, 4]) + def test_partial_header_wkb_flagged(self, spark: SparkSession, nbytes: int) -> None: + """A blob with a partial WKB header is flagged, even when conv() yields a non-null type. + + A little-endian order flag followed by a partial type word (2-4 bytes) + parses to a non-null but bogus base type -- e.g. `b"\\x01\\x01"` reads as + type 1, the Point code, and would silently validate as a Point. Only a + 0-1 byte blob makes conv() return NULL, so a length gate (not a null + check) is what closes the truncation hole. + """ + partial = bytearray(b"\x01" * nbytes) + df = spark.createDataFrame([Row(geometry=partial)], schema="geometry binary") + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is not None + class TestCheckStripped: def test_clean_string(self, spark: SparkSession) -> None: diff --git a/packages/overture-schema-pyspark/tests/test_mutations.py b/packages/overture-schema-pyspark/tests/test_mutations.py index b93ad16de..e4aa0915b 100644 --- a/packages/overture-schema-pyspark/tests/test_mutations.py +++ b/packages/overture-schema-pyspark/tests/test_mutations.py @@ -224,6 +224,81 @@ def test_does_not_mutate_original(self) -> None: assert row["items"][0]["a"] == 1 +class TestMutateMapValueModelConstraint: + """`map_path` threads a model mutation into a `dict[K, Model]` value model. + + A model-level constraint on a map's value model targets the value, not + the row root. `map_path` names the map column; the mutation corrupts the + single entry's value (stubbing one when the map is absent), so the + generated `::invalid` row actually trips the value-model constraint. + """ + + def test_require_any_of_nulls_fields_in_map_value(self) -> None: + row = {"subs": {"en": {"foo": 1, "bar": "x"}}} + result = mutate_require_any_of(row, ["foo", "bar"], map_path="subs") + assert result["subs"]["en"] == {"foo": None, "bar": None} + + def test_require_any_of_preserves_map_key(self) -> None: + row = {"subs": {"en": {"foo": 1, "bar": "x"}}} + result = mutate_require_any_of(row, ["foo", "bar"], map_path="subs") + assert list(result["subs"]) == ["en"] + + def test_require_any_of_stubs_missing_map(self) -> None: + row: dict = {"subs": None} + result = mutate_require_any_of(row, ["foo", "bar"], map_path="subs") + assert isinstance(result["subs"], dict) and result["subs"] + assert next(iter(result["subs"].values())) == {"foo": None, "bar": None} + + def test_struct_path_descends_into_map_value(self) -> None: + row = {"subs": {"en": {"inner": {"foo": 1, "bar": 2}}}} + result = mutate_require_any_of( + row, ["foo", "bar"], map_path="subs", struct_path="inner" + ) + assert result["subs"]["en"]["inner"] == {"foo": None, "bar": None} + + def test_nested_map_column_path(self) -> None: + row = {"outer": {"subs": {"en": {"foo": 1, "bar": 2}}}} + result = mutate_require_any_of(row, ["foo", "bar"], map_path="outer.subs") + assert result["outer"]["subs"]["en"] == {"foo": None, "bar": None} + + def test_require_any_of_does_not_mutate_original(self) -> None: + row = {"subs": {"en": {"foo": 1, "bar": 2}}} + mutate_require_any_of(row, ["foo", "bar"], map_path="subs") + assert row["subs"]["en"]["foo"] == 1 + + def test_min_fields_set_nulls_fields_in_map_value(self) -> None: + row = {"subs": {"en": {"a": 1, "b": 2}}} + result = mutate_min_fields_set(row, ["a", "b"], map_path="subs") + assert result["subs"]["en"] == {"a": None, "b": None} + + def test_require_if_sets_condition_and_nulls_in_map_value(self) -> None: + row = {"subs": {"en": {"subtype": "other", "admin_level": 5}}} + result = mutate_require_if( + row, ["admin_level"], "subtype", "country", map_path="subs" + ) + value = result["subs"]["en"] + assert value["subtype"] == "country" + assert value["admin_level"] is None + + def test_require_if_stubs_missing_map(self) -> None: + row: dict = {"subs": None} + result = mutate_require_if( + row, ["admin_level"], "subtype", "country", map_path="subs" + ) + value = next(iter(result["subs"].values())) + assert value["subtype"] == "country" + assert value["admin_level"] is None + + def test_forbid_if_sets_condition_and_ensures_non_null_in_map_value(self) -> None: + row = {"subs": {"en": {"subtype": "other", "admin_level": None}}} + result = mutate_forbid_if( + row, ["admin_level"], "subtype", "country", map_path="subs" + ) + value = result["subs"]["en"] + assert value["subtype"] == "country" + assert value["admin_level"] is not None + + class TestMutateForbidIfNegate: def test_negate_changes_condition_value(self) -> None: """negate=True sets condition_field to something != condition_value.""" diff --git a/pyproject.toml b/pyproject.toml index 154546081..ea86feca4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ select = [ "C4", # flake8-comprehensions "UP", # pyupgrade, "ANN", # flake8-annotations + "PLC0415", # import-outside-top-level (prefer top-level imports) ] [tool.ruff.lint.per-file-ignores] From 6396ee209af8fd1ac80b60a40abc27b0b8559d09 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Fri, 26 Jun 2026 10:56:47 -0700 Subject: [PATCH 11/11] fix(pyspark): close validation and conformance gaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generated validation carried two latent runtime bugs. error_msg built its message with F.concat, which yields NULL when any interpolated value is NULL — array_compact then dropped that element along with the violation, so an out-of-bounds value read as valid whenever an interpolated value was itself NULL (a linear-reference range like [null, 1.5]). Each value now coalesces to a string before concatenation. Separately, _peel_union dropped every Literal arm wherever a concrete arm coexisted, so generated checks rejected model-valid literals — the empty string on annex *_url fields and "Global" on countries. The dropped literals now survive as a LiteralAlternatives constraint, and a runtime except_literals wrapper lets those exact values pass the concrete arm's content checks while a null still fails check_required. The markdown reference gains an "Also accepts" note. Both bugs were invisible to the conformance suite, which passed without exercising the check target. The per-scenario ::valid row was a plain base-row copy, so for the ~77% of scenarios whose target is reachable only through scaffolded nesting it asserted nothing. The valid row now merges the scaffold onto the base row with no mutation, carrying a constraint-satisfying value at the target. The scaffold generator builds every container on the path as a valid base row, descends a discriminated-union element into its seeded arm, and fills single-level arrays for min_length and uniqueness. coerce_to_schema casts each numeric to its declared column type before createDataFrame so a value scaffolded for a narrowed union arm stays valid. A forbid_if whose condition the base row triggers forbids its own target field, so the scaffold now flips that condition field to a value the forbid rejects and re-runs constraint satisfaction. An unreachable scaffold path raises at generation time instead of emitting a target-absent {}, and an unbuildable scenario fails with its id rather than routing to pytest.skip. Codegen needed correctness and ordering fixes. Restore UnionSpec's @dataclass(eq=False) — the rewrite dropped it, giving the spec value-equality over mutable field lists and leaving it unhashable where consumers key on object identity. schema_builder raises on a zero-field StructType and on union fields that share a name but resolve to differing non-widening Spark types. Output is now deterministic: reverse_references dedups referrers in insertion order, and _find_common_base breaks max-MRO-depth ties on module and qualname. The check field and name render through py_literal so a name carrying a quote or backslash stays valid Python, the float NaN guard derives from primitive_spark_category, and pipeline picks the no-arm test filename by arm is not None so a falsy discriminator cannot collide with it. A verify-pyspark-generated make target and a CI git diff --exit-code now gate the committed generated tree — make check regenerates it before tests, so stale output was overwritten and never verified. register_model teardown uses REGISTRY.pop instead of del, so a test that already dropped the key no longer raises KeyError inside finally and masks the body's own exception. Signed-off-by: Seth Fitzsimmons --- .github/workflows/check-python-code.yaml | 11 + Makefile | 10 +- .../codegen/extraction/field_constraints.py | 3 + .../schema/codegen/extraction/field_walk.py | 7 + .../extraction/literal_alternatives.py | 26 + .../schema/codegen/extraction/specs.py | 5 +- .../codegen/extraction/type_analyzer.py | 38 +- .../codegen/extraction/union_extraction.py | 2 +- .../codegen/markdown/reverse_references.py | 11 +- .../schema/codegen/pyspark/check_builder.py | 34 + .../codegen/pyspark/constraint_dispatch.py | 19 +- .../schema/codegen/pyspark/pipeline.py | 2 +- .../schema/codegen/pyspark/renderer.py | 28 +- .../schema/codegen/pyspark/schema_builder.py | 27 +- .../templates/_check_function.py.jinja2 | 4 +- .../pyspark/templates/test_module.py.jinja2 | 7 +- .../codegen/pyspark/test_data/base_row.py | 131 +- .../codegen/pyspark/test_data/scaffold.py | 232 ++-- .../schema/codegen/pyspark/test_renderer.py | 22 +- .../tests/test_constraint_description.py | 21 + .../tests/test_pyspark_base_row.py | 65 + .../tests/test_pyspark_check_builder.py | 59 + .../tests/test_pyspark_constraint_dispatch.py | 7 + .../tests/test_pyspark_e2e.py | 2 +- .../tests/test_pyspark_renderer.py | 54 +- .../tests/test_pyspark_scaffold.py | 124 +- .../tests/test_pyspark_test_renderer.py | 12 +- .../tests/test_type_analyzer.py | 34 +- .../pyspark/expressions/column_patterns.py | 13 +- .../expressions/constraint_expressions.py | 14 + .../overture/schema/annex/sources.py | 67 +- .../tests/_support/harness.py | 71 +- .../tests/_support/registry.py | 8 +- .../tests/_support/scenarios.py | 10 + .../tests/expressions/test_column_patterns.py | 10 + .../test_constraint_expressions.py | 32 +- .../overture/schema/addresses/test_address.py | 11 +- .../overture/schema/annex/test_sources.py | 220 ++- .../overture/schema/base/test_bathymetry.py | 11 +- .../schema/base/test_infrastructure.py | 21 +- .../overture/schema/base/test_land.py | 21 +- .../overture/schema/base/test_land_cover.py | 11 +- .../overture/schema/base/test_land_use.py | 21 +- .../overture/schema/base/test_water.py | 21 +- .../schema/buildings/test_building.py | 21 +- .../schema/buildings/test_building_part.py | 21 +- .../schema/divisions/test_division.py | 45 +- .../schema/divisions/test_division_area.py | 21 +- .../divisions/test_division_boundary.py | 17 +- .../overture/schema/places/test_place.py | 35 +- .../schema/transportation/test_connector.py | 11 +- .../transportation/test_segment_rail.py | 330 ++++- .../transportation/test_segment_road.py | 1175 ++++++++++++++--- .../transportation/test_segment_water.py | 330 ++++- .../tests/test_harness.py | 92 +- 55 files changed, 3131 insertions(+), 526 deletions(-) create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/extraction/literal_alternatives.py diff --git a/.github/workflows/check-python-code.yaml b/.github/workflows/check-python-code.yaml index ab555ca97..75ec1b145 100644 --- a/.github/workflows/check-python-code.yaml +++ b/.github/workflows/check-python-code.yaml @@ -84,3 +84,14 @@ jobs: - name: Run make check run: make check + + # `make check` regenerates the committed PySpark output in place. If a PR + # changed the schema or codegen without committing the regenerated files, + # the working tree now differs from what was committed -- fail loudly so + # stale generated output cannot pass CI. + - name: Verify generated PySpark output is committed + run: | + git diff --exit-code -- \ + packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated \ + packages/overture-schema-pyspark/tests/generated \ + || { echo "::error::Generated PySpark output is stale. Run 'make generate-pyspark' and commit."; exit 1; } diff --git a/Makefile b/Makefile index 201c77358..d73734fd1 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: default uv-sync clean-pyspark generate-pyspark check test-all test test-only docformat doctest doctest-only mypy mypy-only lint-only update-baselines +.PHONY: default uv-sync clean-pyspark generate-pyspark verify-pyspark-generated check test-all test test-only docformat doctest doctest-only mypy mypy-only lint-only update-baselines TESTMON ?= --testmon @@ -25,6 +25,14 @@ generate-pyspark: uv-sync clean-pyspark check: uv-sync generate-pyspark @$(MAKE) -j test-only doctest-only lint-only mypy-only +# Regenerate and fail if the committed generated output differs. Catches PRs +# that change the schema or codegen without committing the regenerated files -- +# `check` itself regenerates, so without this guard stale committed output is +# silently overwritten before the tests run and never verified. +verify-pyspark-generated: generate-pyspark + @git diff --exit-code -- $(PYSPARK_EXPRESSIONS) $(PYSPARK_GENERATED_TESTS) \ + || { echo "Generated PySpark output is stale; run 'make generate-pyspark' and commit."; exit 1; } + # test-all is the unconditional full run -- testmon-independent, unlike the # incremental test/test-only targets -- so data-only changes (golden JSON, # [[examples]]) that testmon cannot see still get exercised. diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py index 3403d0e37..c62f4adbd 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py @@ -18,6 +18,7 @@ from .docstring import first_docstring_line from .length_constraints import ArrayMaxLen, ArrayMinLen, ScalarMaxLen, ScalarMinLen +from .literal_alternatives import LiteralAlternatives from .specs import TypeIdentity from .type_analyzer import ConstraintSource @@ -107,6 +108,8 @@ def describe_field_constraint( return f"Minimum length: {constraint.min_length}" if isinstance(constraint, (ArrayMaxLen, ScalarMaxLen)): return f"Maximum length: {constraint.max_length}" + if isinstance(constraint, LiteralAlternatives): + return "Also accepts: " + ", ".join(f"`{v!r}`" for v in constraint.values) if _is_opaque_constraint(constraint): return f"`{type(constraint).__name__}`" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py index baea83b7b..7384c61c2 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py @@ -41,6 +41,7 @@ "terminal_of", "terminal_primitive", "terminal_scalar", + "terminal_union_ref", "walk_shape", ] @@ -91,6 +92,12 @@ def terminal_model_ref(shape: FieldShape) -> ModelRef | None: return terminal if isinstance(terminal, ModelRef) else None +def terminal_union_ref(shape: FieldShape) -> UnionRef | None: + """Return the terminal `UnionRef`, or `None` for non-union terminals.""" + terminal = terminal_of(shape) + return terminal if isinstance(terminal, UnionRef) else None + + def enum_source(shape: FieldShape) -> type[Enum] | None: """Return the `Enum` class backing a `Primitive`, or `None`. diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/literal_alternatives.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/literal_alternatives.py new file mode 100644 index 000000000..5d48d1059 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/literal_alternatives.py @@ -0,0 +1,26 @@ +"""Internal constraint recording a union's literal alternatives. + +A field annotated `X | Literal[c, ...]` validates as "the concrete arm `X`'s +checks pass OR the value is one of `c, ...`". `type_analyzer._peel_union` keeps +the concrete arm as the field's shape (so downstream consumers still see a +`Primitive` / `NewTypeShape` rather than a union of scalar-and-literal) and +records the dropped literal values in this constraint on that shape's layer. + +Consumers read it to let those literal values bypass the concrete arm's +constraints: the PySpark dispatch emits a value-exact bypass, and the markdown +renderer notes the accepted literals. Codegen-internal -- schema authors write +the plain `X | Literal[c]` union; nothing constructs this class directly. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +__all__ = ["LiteralAlternatives"] + + +@dataclass(frozen=True, slots=True) +class LiteralAlternatives: + """Literal values a union field accepts alongside its concrete arm.""" + + values: tuple[object, ...] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py index f4b676f7b..880e07d9a 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py @@ -170,7 +170,10 @@ class MemberSpec: spec: RecordSpec -@dataclass +# eq=False: contains mutable lists and a cached_property, so the +# dataclass-generated __eq__ would compare by value over mutable fields and +# __hash__ would be disabled (unhashable). Consumers key on object identity. +@dataclass(eq=False) class UnionSpec: """Specification for a discriminated union type alias.""" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py index ec1e5d353..7c3c6a71d 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py @@ -62,14 +62,21 @@ ) from .field_walk import terminal_of from .length_constraints import ArrayMaxLen, ArrayMinLen, ScalarMaxLen, ScalarMinLen +from .literal_alternatives import LiteralAlternatives @dataclass(frozen=True, slots=True) class _ContinueWith: - """`_peel_union` result: next annotation to keep peeling.""" + """`_peel_union` result: next annotation to keep peeling. + + `literal_alternatives` carries the values of any `Literal[...]` arms + dropped in favor of a single concrete arm, so the caller can attach a + `LiteralAlternatives` constraint to the recursed shape. + """ annotation: object is_optional: bool + literal_alternatives: tuple[object, ...] = () @dataclass(frozen=True, slots=True) @@ -301,6 +308,7 @@ def _recurse( # union short-circuits with the constraints attached. next_annotation = inner_annotation layer_optional = False + literal_alts: tuple[object, ...] = () if _is_union(get_origin(inner_annotation)): result = _peel_union( inner_annotation, @@ -315,11 +323,13 @@ def _recurse( result.is_optional, own_desc, ) - case _ContinueWith(next_annotation, layer_optional): + case _ContinueWith(next_annotation, layer_optional, literal_alts): pass case _: assert_never(result) + if literal_alts: + collected.append(_literal_alternatives_source(literal_alts)) inner, opt, desc = _recurse(next_annotation, newtype_ctx) inner = attach_constraints(inner, tuple(collected)) return ( @@ -333,8 +343,12 @@ def _recurse( match result: case _Resolved(shape): return shape, result.is_optional, None - case _ContinueWith(next_annotation, is_optional): + case _ContinueWith(next_annotation, is_optional, literal_alts): inner, opt, desc = _recurse(next_annotation, newtype_ctx) + if literal_alts: + inner = attach_constraints( + inner, (_literal_alternatives_source(literal_alts),) + ) return inner, opt or is_optional, desc case _: assert_never(result) @@ -402,6 +416,13 @@ def _constraint_source( ) +def _literal_alternatives_source(values: tuple[object, ...]) -> ConstraintSource: + """Wrap dropped union `Literal` values as a `LiteralAlternatives` source.""" + return ConstraintSource( + source_ref=None, source_name=None, constraint=LiteralAlternatives(values) + ) + + def _erase_inner_newtypes(shape: FieldShape) -> FieldShape: """Drop every `NewTypeShape` reachable through `ArrayOf` layers. @@ -555,6 +576,15 @@ def _peel_union( concrete_args = [a for a in non_none_args if get_origin(a) is not Literal] real_args = concrete_args if concrete_args else non_none_args + # A single concrete arm alongside `Literal[...]` arms keeps the concrete arm + # as the shape; the literal values ride along as a LiteralAlternatives + # constraint so they bypass the concrete arm's checks. Multi-arm and + # no-concrete-arm unions are unchanged (the literals stay dropped). + literal_alternatives: tuple[object, ...] = () + if len(concrete_args) == 1: + literal_args = [a for a in non_none_args if get_origin(a) is Literal] + literal_alternatives = tuple(v for a in literal_args for v in get_args(a)) + if len(real_args) > 1: members: list[type[BaseModel]] = [] for arg in real_args: @@ -581,7 +611,7 @@ def _peel_union( if not real_args: raise UnsupportedUnionError(f"Union with no concrete types: {annotation}") - return _ContinueWith(real_args[0], is_optional) + return _ContinueWith(real_args[0], is_optional, literal_alternatives) def unwrap_list(annotation: object) -> object: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py index 922b2a887..45c8caca8 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py @@ -52,7 +52,7 @@ def _find_common_base(members: list[type[BaseModel]]) -> type[BaseModel]: def max_mro_index(cls: type) -> int: return max(mro.index(cls) for mro in filtered_mros) - return min(common, key=max_mro_index) + return min(common, key=lambda c: (max_mro_index(c), c.__module__, c.__qualname__)) def _find_field_by_alias(model: type[BaseModel], alias: str) -> FieldInfo | None: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py index b163280bd..91f2864bc 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py @@ -65,14 +65,17 @@ def compute_reverse_references( all_specs Supplementary types (enums, newtypes, sub-models). """ - references: dict[TypeIdentity, set[UsedByEntry]] = {} + # An insertion-ordered set (dict keys) per target: dedups like a set but + # iterates deterministically, so sorted()'s stable order breaks ties by + # insertion rather than by nondeterministic set-hash order. + references: dict[TypeIdentity, dict[UsedByEntry, None]] = {} def add_reference( target: TypeIdentity, referrer: TypeIdentity, kind: UsedByKind ) -> None: if target == referrer or target not in all_specs: return - references.setdefault(target, set()).add(UsedByEntry(referrer, kind)) + references.setdefault(target, {})[UsedByEntry(referrer, kind)] = None def collect_from_shape( shape: FieldShape, @@ -156,9 +159,9 @@ def collect_from_newtype_spec(spec: NewTypeSpec, referrer: TypeIdentity) -> None # Sort into deterministic lists. result: dict[TypeIdentity, list[UsedByEntry]] = {} - for target, ref_set in references.items(): + for target, ref_map in references.items(): entries = sorted( - ref_set, + ref_map, key=lambda e: (e.kind.value, e.identity.name, e.identity.module), ) result[target] = entries diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py index b1dc017f6..56ab5499c 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py @@ -62,6 +62,7 @@ terminal_primitive, terminal_scalar, ) +from ..extraction.literal_alternatives import LiteralAlternatives from ..extraction.specs import FieldSpec, ModelSpec, RecordSpec, UnionSpec from ..extraction.type_registry import PRIMITIVE_TYPES from ._render_common import COLUMN_LEVEL_FUNCTIONS @@ -103,6 +104,36 @@ def _dispatch_layer_constraints( return descriptors +def _literal_alternatives(shape: Scalar | MapOf) -> tuple[object, ...]: + """Return the allowed literal values from a `LiteralAlternatives` constraint, or `()`. + + `MapOf` returns `()`: a map column carries no literal alternative of its + own (its key/value projections reach their own `Scalar` shapes, which are + handled there). + """ + if isinstance(shape, MapOf): + return () + for cs in shape.constraints: + if isinstance(cs.constraint, LiteralAlternatives): + return cs.constraint.values + return () + + +def _apply_literal_bypass( + descriptors: list[ExpressionDescriptor], + allow_literals: tuple[object, ...], +) -> list[ExpressionDescriptor]: + """Stamp `allow_literals` onto each content descriptor. + + Content descriptors are the non-required checks: enum, pattern, + bounds, base-type checks. `check_required` is excluded by callers + who pass only the content portion of the descriptor list. + """ + if not allow_literals: + return descriptors + return [replace(desc, allow_literals=allow_literals) for desc in descriptors] + + def _enum_values(scalar: Scalar) -> list[object] | None: """Return enum/literal values for a terminal `Scalar`, or `None`.""" if isinstance(scalar, LiteralScalar): @@ -310,6 +341,9 @@ def _terminal_scalar_checks( if base_descriptors is not None: element_descriptors.extend(base_descriptors) element_descriptors = list(dict.fromkeys(element_descriptors)) + element_descriptors = _apply_literal_bypass( + element_descriptors, _literal_alternatives(shape) + ) if required: return [ diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py index 8af6ba0a9..86e12911b 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py @@ -45,6 +45,7 @@ ScalarMaxLen, ScalarMinLen, ) +from ..extraction.literal_alternatives import LiteralAlternatives from ..extraction.specs import FieldSpec from ..extraction.type_registry import primitive_spark_category from ._primitive_fill import PRIMITIVE_FILL_TABLE @@ -95,6 +96,16 @@ class ExpressionDescriptor: label: str | None = None check_name: str | None = None check_nan: bool | None = None + allow_literals: tuple[object, ...] = () + """Literal values that bypass this check. + + When non-empty, the renderer wraps the generated call in + `except_literals(col, call, list(allow_literals))` so that a column + value matching one of these literals is treated as valid regardless of + what the check would otherwise report. Populated by `check_builder` + from `LiteralAlternatives` constraints on terminal scalars (e.g. + `HttpUrl | Literal[""]`). Never set on `check_required` descriptors. + """ _BASE_TYPE_DISPATCH: dict[str, tuple[ExpressionDescriptor, ...]] = { @@ -196,8 +207,6 @@ def _pattern_label(constraint: PatternConstraint) -> str: _BOUND_ATTRS = ("ge", "gt", "le", "lt") -_FLOAT_BASE_TYPES = frozenset({"float", "float32", "float64"}) - def _dispatch_bounds( constraint: Ge | Gt | Le | Lt | Interval, @@ -208,7 +217,7 @@ def _dispatch_bounds( Coerces integer bound values to float on float-typed columns so that generated test mutations match the Spark DoubleType column. """ - is_float = base_type in _FLOAT_BASE_TYPES + is_float = base_type is not None and primitive_spark_category(base_type) == "float" kwargs: list[tuple[str, object]] = [] for attr in _BOUND_ATTRS: value = getattr(constraint, attr, None) @@ -276,7 +285,9 @@ def _raw_pattern(constraint: object) -> str | None: # subclasses PatternConstraint, so it must appear before the PatternConstraint # fallback entry. _CONSTRAINT_DISPATCH: list[tuple[type | tuple[type, ...], _ConstraintHandler]] = [ - ((Reference, Strict), lambda _c, _bt: None), + # LiteralAlternatives is a modifier threaded onto the field's other + # descriptors as allow_literals (by check_builder), not a standalone check. + ((Reference, Strict, LiteralAlternatives), lambda _c, _bt: None), ((Ge, Gt, Le, Lt, Interval), _dispatch_bounds), ( ArrayMinLen, diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py index 781131120..9bb2a9daa 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py @@ -236,7 +236,7 @@ def _render_test_modules( modules: list[GeneratedModule] = [] for arm, (base_row_sparse, base_row_populated) in _select_arm_rows(spec).items(): - suffix = f"_{arm}" if arm else "" + suffix = f"_{arm}" if arm is not None else "" modules.append( GeneratedModule( content=render_test_module( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py index 0911704d8..fd000d957 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py @@ -182,7 +182,11 @@ def _render_expr_call( parts.append(f"check_nan={py_literal(desc.check_nan)}") if desc.label is not None: parts.append(f"label={py_literal(desc.label)}") - return f"{desc.function}({', '.join(parts)})" + call = f"{desc.function}({', '.join(parts)})" + if desc.allow_literals: + literals = py_literal(list(desc.allow_literals)) + return f"except_literals({col_expr}, {call}, {literals})" + return call def _element_accessor(var: str, path: tuple[str, ...]) -> str: @@ -563,12 +567,13 @@ def _collect_constraint_expr_imports( `check_radio_group`, ...) are disjoint from that set, so they pass through unfiltered. """ - names: set[str] = { - desc.function - for check in field_checks - for desc in check.descriptors - if desc.function not in _COLUMN_PATTERN_HELPERS - } + names: set[str] = set() + for check in field_checks: + for desc in check.descriptors: + if desc.function not in _COLUMN_PATTERN_HELPERS: + names.add(desc.function) + if desc.allow_literals: + names.add("except_literals") for mc in model_checks: names.add(model_constraint_function(mc.descriptor)) return names @@ -622,9 +627,12 @@ def _identifier_tokens(expr: str) -> set[str]: def _collect_spark_type_imports(schema_fields: list[SchemaField]) -> set[str]: - """Collect Spark type class names from schema field type expressions.""" - if not schema_fields: - return set() + """Collect Spark type class names from schema field type expressions. + + `StructType` and `StructField` are always included: the model module + template emits the schema constant as `StructType([...])` unconditionally, + so the import must be present even when there are no fields. + """ used: set[str] = {"StructType", "StructField"} for sf in schema_fields: used |= _identifier_tokens(sf.type_expr) & _SPARK_TYPES diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py index 00a999ca8..08469d67c 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py @@ -115,18 +115,41 @@ def _deduplicate_by_name(fields: list[FieldSpec]) -> list[FieldSpec]: type shapes (e.g. `value` as uint8 in one variant and float64 in another). Parquet stores one column per name, so the schema needs exactly one entry. When two fields share a name, the one with the - wider Spark type wins (matching Parquet's type-widening behavior). + wider numeric Spark type wins (matching Parquet's type-widening + behavior). Two same-named fields whose Spark types are non-numeric and + not identical cannot share a column, so the collision fails loudly + rather than silently keeping whichever arm came first. """ seen: dict[str, FieldSpec] = {} for f in fields: existing = seen.get(f.name) - if existing is None or spark_type_rank(f) > spark_type_rank(existing): + if existing is None: + seen[f.name] = f + continue + rank_f, rank_existing = spark_type_rank(f), spark_type_rank(existing) + if rank_f < 0 and rank_existing < 0: + spark_f = _shape_to_spark(f.shape) + spark_existing = _shape_to_spark(existing.shape) + if spark_f != spark_existing: + raise ValueError( + f"Union field {f.name!r} resolves to incompatible " + f"non-widening Spark types across arms " + f"({spark_existing} vs {spark_f}); a single Parquet " + "column cannot represent both." + ) + if rank_f > rank_existing: seen[f.name] = f return list(seen.values()) def _struct_type_expr(fields: list[FieldSpec]) -> str: """Build an inline `StructType([...])` expression from a list of fields.""" + if not fields: + raise ValueError( + "Cannot build a StructType for a model with no fields; an empty " + "struct column cannot carry data and signals an upstream " + "extraction problem." + ) parts = [ f'StructField("{f.name}", {_shape_to_spark(f.shape)}, True)' for f in fields ] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 index 078f02b97..beef4fdab 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 @@ -1,8 +1,8 @@ {%- macro check_function(c) -%} def {{ c.func_name }}() -> Check: return Check( - field="{{ c.field }}", - name="{{ c.check_name }}", + field={{ c.field | py_literal }}, + name={{ c.check_name | py_literal }}, expr={{ c.expr }}, shape=CheckShape.{{ c.shape }}, read_columns={{ c.read_columns | py_literal }}, diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 index 9ce436c6a..466103cf2 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 @@ -115,7 +115,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py index cdb711d2e..786ce0af9 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py @@ -50,7 +50,7 @@ from ...extraction.specs import FieldSpec, ModelSpec, RecordSpec, UnionSpec from ...extraction.type_registry import primitive_spark_category from .._primitive_fill import PRIMITIVE_FILL_TABLE -from .._render_common import require_field_eq +from .._render_common import FieldEq, require_field_eq from ..constraint_dispatch import ExpressionDescriptor, dispatch_constraint from ..schema_builder import spark_type_rank from .constraint_values import ( @@ -61,10 +61,12 @@ ) __all__ = [ + "condition_overrides_for_present_field", "generate_arm_rows", "generate_base_row", "generate_populated_arm_rows", "generate_populated_row", + "resolve_arm_spec", "value_for_field", ] @@ -318,6 +320,94 @@ def _satisfy_model_constraints(row: dict[str, Any], spec: ModelSpec) -> None: missing -= 1 +def _condition_disabling_value(field_eq: FieldEq, field_spec: FieldSpec) -> object: + """Return a value for a condition field that makes the condition false. + + `FieldEqCondition(f, X)` holds when `f == X`, so a different enum member + disables it; a negated condition (`Not(...)`, true when `f != X`) is + disabled by `X` itself. Every condition in the schema gates on an enum + field, so a non-enum condition field raises rather than guess a value. + + Parameters + ---------- + field_eq + The unwrapped field-equality condition. + field_spec + Spec of the condition field, used to enumerate alternative values. + """ + forbidden = field_eq.value + if isinstance(forbidden, Enum): + forbidden = forbidden.value + if field_eq.negated: + return forbidden + terminal = terminal_primitive(field_spec.shape) + enum_cls = enum_source(terminal) if terminal is not None else None + if enum_cls is None: + raise TypeError( + f"condition field {field_eq.field_name!r} is not enum-backed; " + "cannot derive a value that disables its forbid_if condition" + ) + for member in enum_cls: + if member.value != forbidden: + return member.value + raise ValueError( + f"enum {enum_cls.__name__} has no member other than {forbidden!r}; " + "cannot disable its forbid_if condition" + ) + + +def condition_overrides_for_present_field( + spec: ModelSpec, field_name: str +) -> dict[str, Any]: + """Return overrides that let `field_name` be present on a valid base row. + + A `forbid_if` whose condition the base row satisfies forbids `field_name`, + so a scaffold that sets the field yields a row Pydantic rejects. Flip each + such condition field to a value the forbid rejects -- which also satisfies + the symmetric `require_if` that then mandates the field -- and re-satisfy + the model constraints, since a flipped condition can newly require other + fields. Returns only the fields whose value differs from the base row; + `field_name` itself is set by the scaffold and is excluded. + + Returns `{}` when no `forbid_if` gates `field_name`, the common case. + + Parameters + ---------- + spec + The model whose constraints govern `field_name`. + field_name + A direct field of `spec` the scaffold needs to set. + """ + forbidding = [ + c + for c in spec.constraints + if isinstance(c, ForbidIfConstraint) and field_name in c.field_names + ] + if not forbidding: + return {} + base = generate_base_row(spec) + fields_by_name = {f.name: f for f in spec.fields} + flips: dict[str, Any] = {} + for constraint in forbidding: + if not _row_satisfies_condition(base, constraint.condition): + continue + field_eq = require_field_eq(constraint.condition) + cond_field = fields_by_name.get(field_eq.field_name) + if cond_field is not None: + flips[field_eq.field_name] = _condition_disabling_value( + field_eq, cond_field + ) + if not flips: + return {} + merged = {**base, **flips} + _satisfy_model_constraints(merged, spec) + return { + name: value + for name, value in merged.items() + if name != field_name and base.get(name) != value + } + + def value_for_field( field: FieldSpec, spec_name: str, @@ -401,6 +491,45 @@ def _widest_union_member(union: UnionSpec) -> RecordSpec: return best_spec +def resolve_arm_spec( + union: UnionSpec, discriminator_value: object | None = None +) -> RecordSpec: + """Return the member `RecordSpec` for one arm of a discriminated union. + + Without a discriminator value (a check not gated to a specific arm), + returns the widest member -- the one whose float types survive PySpark + column widening, per `_widest_union_member`. With a value, returns the + member that value selects, and raises when it selects none: a seeded + discriminator that matches no arm is a check_builder/scaffold inconsistency, + not a reason to fall back to an arm whose fields contradict the seed. + + Parameters + ---------- + union + The union to resolve an arm from. + discriminator_value + The discriminator value identifying the arm (e.g. a scaffold's seeded + `ElementGuard` value), matching a `discriminator_mapping` key. + + Raises + ------ + ValueError + When `discriminator_value` is given but selects no member arm. + """ + if discriminator_value is None: + return _widest_union_member(union) + mapping = union.discriminator_mapping or {} + member_cls = mapping.get(discriminator_value) # type: ignore[call-overload] + if member_cls is not None: + for member in union.member_specs: + if member.member_cls is member_cls: + return member.spec + raise ValueError( + f"discriminator {discriminator_value!r} selects no arm of union " + f"{union.name!r} (arms: {sorted(mapping)})" + ) + + def _row_from_model_spec( spec: RecordSpec, *, diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py index bbfa64e81..3d8d6554e 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py @@ -19,14 +19,24 @@ FieldSegment, ) -from ...extraction.field_walk import has_array_layer, list_depth, terminal_model_ref -from ...extraction.specs import FieldSpec, ModelSpec +from ...extraction.field_walk import ( + has_array_layer, + list_depth, + terminal_model_ref, + terminal_union_ref, +) +from ...extraction.specs import FieldSpec, ModelSpec, RecordSpec from ..check_ir import ( Check, ElementGuard, ModelCheck, ) -from .base_row import value_for_field +from .base_row import ( + condition_overrides_for_present_field, + generate_base_row, + resolve_arm_spec, + value_for_field, +) __all__ = [ "generate_model_scaffold", @@ -34,6 +44,23 @@ "leaf_list_depth", ] +# Sentinel for "no leaf override": the terminal field keeps its synthesized +# value. A `None` / `""` leaf override is meaningful, so it cannot be the +# default. +_UNSET: object = object() + + +def _nest_leaf_value(value: object, field_spec: FieldSpec) -> object: + """Wrap a scalar leaf override to the field's list nesting depth. + + `value_for_field` returns a list for a list-typed field, so a bare scalar + override (e.g. a literal alternative) is wrapped to the same depth: `[v]` + for `list[T]`, `[[v]]` for `list[list[T]]`, and `v` for a scalar field. + """ + for _ in range(list_depth(field_spec.shape)): + value = [value] + return value + @dataclass(frozen=True, slots=True) class _ElementDiscriminator: @@ -83,16 +110,23 @@ def leaf_list_depth(field_path: FieldPath, spec: ModelSpec) -> int: return max(0, list_depth(leaf.shape) - terminal_iter) -def _required_siblings( - fields: list[FieldSpec], exclude: str, spec_name: str -) -> dict[str, Any]: - """Populate required siblings at one nesting level, excluding the target.""" - result: dict[str, Any] = {} - for f in fields: - if f.name == exclude or not f.is_required: - continue - result[f.name] = value_for_field(f, spec_name) - return result +def _child_container_spec( + field_spec: FieldSpec, discriminator_value: object | None +) -> RecordSpec | None: + """Resolve the model a path field descends into. + + Returns the field's terminal `ModelRef` model, or -- for a discriminated + union -- the member arm the `discriminator_value` selects (the widest + member when the check is not arm-gated). `None` when the field has neither + a model nor a union terminal. + """ + model_ref = terminal_model_ref(field_spec.shape) + if model_ref is not None: + return model_ref.model + union_ref = terminal_union_ref(field_spec.shape) + if union_ref is not None: + return resolve_arm_spec(union_ref.union, discriminator_value) + return None def _walk_to_target( @@ -102,13 +136,26 @@ def _walk_to_target( *, discriminator: _ElementDiscriminator | None, current_depth: int = 0, + leaf_value: object = _UNSET, ) -> dict[str, Any]: - """Recursively build the scaffold dict along the path segments. + """Recursively build a constraint-satisfying scaffold along the path. + + Each container model on the path is built as a valid base row + (`generate_base_row` -- required fields populated and model constraints + such as `require_any_of` satisfied), then the on-path child overrides its + field. A discriminated-union element resolves to the arm the seeded + discriminator selects (or the widest member when the check is not + arm-gated), so the element is a valid instance of a concrete arm rather + than an untagged `{}`. Accepts any `FieldSegment`: struct steps recurse, an `ArraySegment` wraps its inner value in lists, and a trailing `MapSegment` resolves via `value_for_field` (which populates the map with a valid entry), so a `MapPath` target scaffolds the same way as a struct terminal. + + `leaf_value`, when set, replaces the synthesized value at the terminal + field -- used to seed a specific valid value (e.g. a literal alternative) + at the check's target. """ if not segments: return {} @@ -117,25 +164,42 @@ def _walk_to_target( remaining = segments[1:] field_spec = _find_field_spec(fields, seg.name) + # A path segment that resolves to no field, or that tries to descend into a + # non-container, would leave the scaffold short of its target -- the + # `::valid` row would then assert nothing (the vacuous-valid-row bug this + # generator exists to prevent). Fail loud at generation time instead. + if field_spec is None: + raise ValueError( + f"scaffold path segment {seg.name!r} matches no field " + f"(available: {sorted(f.name for f in fields)})" + ) + inner: Any - child_model = ( - terminal_model_ref(field_spec.shape) if field_spec is not None else None - ) - if remaining and child_model is not None: - child_fields = child_model.model.fields - inner = _walk_to_target( + if remaining: + discriminator_value = ( + discriminator.value + if discriminator is not None and current_depth == discriminator.depth + else None + ) + child_spec = _child_container_spec(field_spec, discriminator_value) + if child_spec is None: + raise ValueError( + f"scaffold cannot descend into non-container field {seg.name!r} " + f"with path remaining {[s.name for s in remaining]!r}" + ) + recursed = _walk_to_target( remaining, - child_fields, + child_spec.fields, spec_name, discriminator=discriminator, current_depth=current_depth + 1, + leaf_value=leaf_value, ) - siblings = _required_siblings(child_fields, remaining[0].name, spec_name) - inner = {**siblings, **inner} - elif not remaining and field_spec is not None: - inner = value_for_field(field_spec, spec_name) + inner = {**generate_base_row(child_spec), **recursed} + elif leaf_value is not _UNSET: + inner = _nest_leaf_value(leaf_value, field_spec) else: - inner = {} + inner = value_for_field(field_spec, spec_name) if ( isinstance(inner, dict) @@ -147,21 +211,43 @@ def _walk_to_target( # When the terminal segment is an array and the field itself is a list, # `value_for_field` already wrapped the value -- skip extra wrapping. if isinstance(seg, ArraySegment): - if ( - not remaining - and field_spec is not None - and has_array_layer(field_spec.shape) - ): + if not remaining and has_array_layer(field_spec.shape): return {seg.name: inner} + # A single-level array (iter_count == 1) gets a constraint-valid list; + # nested `list[list[...]]` levels (iter_count > 1) carry no min_length>1 + # or uniqueness constraint in any current schema, so minimal nesting + # suffices. Add per-level constraint handling here if one ever does -- + # the row would otherwise be short on the unmutated `::valid` row. + if seg.iter_count == 1: + return {seg.name: _array_with_target(inner, field_spec, spec_name)} wrapped: Any = inner for _ in range(seg.iter_count): wrapped = [wrapped] return {seg.name: wrapped} - if remaining and field_spec is not None and has_array_layer(field_spec.shape): - return {seg.name: [inner]} + if remaining and has_array_layer(field_spec.shape): + return {seg.name: _array_with_target(inner, field_spec, spec_name)} return {seg.name: inner} +def _array_with_target( + target_element: object, field_spec: FieldSpec, spec_name: str +) -> list[Any]: + """Return a constraint-valid single-level list holding the target element. + + `value_for_field` builds a list that satisfies the field's array + constraints (min length, unique items); the target-reaching element + replaces the first slot. A min_length>1 or uniqueness constraint then + holds on the unmutated `::valid` row -- a bare `[target_element]` would + leave the row short or, after `deep_merge` replaces the base row's list, + drop the elements that satisfied the constraint. + """ + full = value_for_field(field_spec, spec_name) + if isinstance(full, list) and full: + full[0] = target_element + return full + return [target_element] + + def _element_discriminator(check: Check) -> _ElementDiscriminator | None: """Return the element-level discriminator for a Check, or None. @@ -191,8 +277,15 @@ def _element_discriminator(check: Check) -> _ElementDiscriminator | None: return None -def generate_scaffold(check: Check, spec: ModelSpec) -> dict[str, Any]: - """Build a sparse dict from null to the target field of a Check.""" +def generate_scaffold( + check: Check, spec: ModelSpec, *, leaf_value: object = _UNSET +) -> dict[str, Any]: + """Build a sparse dict from null to the target field of a Check. + + `leaf_value`, when set, seeds that value at the target instead of the + field's synthesized value -- used to place a known-valid value (e.g. a + literal alternative) at the check's target for the `::valid` row. + """ segments = check.target.segments if not segments: return {} @@ -200,20 +293,28 @@ def generate_scaffold(check: Check, spec: ModelSpec) -> dict[str, Any]: if len(segments) == 1: seg0 = segments[0] field_spec = _find_field_spec(spec.fields, seg0.name) - if field_spec is None or field_spec.is_required: + if field_spec is None: + return {} + # A `forbid_if` the base row triggers forbids this field; disable the + # condition so the field can be set without invalidating the row. + overrides = condition_overrides_for_present_field(spec, seg0.name) + if leaf_value is not _UNSET: + return {**overrides, seg0.name: _nest_leaf_value(leaf_value, field_spec)} + if field_spec.is_required: return {} - return {seg0.name: value_for_field(field_spec, spec.name)} + return {**overrides, seg0.name: value_for_field(field_spec, spec.name)} return _walk_to_target( segments, spec.fields, spec.name, discriminator=_element_discriminator(check), + leaf_value=leaf_value, ) def generate_model_scaffold(check: ModelCheck, spec: ModelSpec) -> dict[str, Any]: - """Build a sparse dict for a model-level check's nesting structure. + """Build a constraint-satisfying scaffold for a model-level check. Two target shapes need no scaffold and return `{}`: @@ -225,55 +326,16 @@ def generate_model_scaffold(check: ModelCheck, spec: ModelSpec) -> dict[str, Any an array, a dict scaffold can't replace a base-row map entry under `deep_merge`'s recursive dict merge, so there is nothing to add here. - A top-level `ArrayPath` builds the array path; an `ArrayPath` whose - column lives inside a struct raises `NotImplementedError`. No schema - today places a list of model-constrained models inside a struct field, - so the case has no test coverage. + An `ArrayPath` walks the path with `_walk_to_target`: every model on the + way -- including the constrained model at the leaf -- is built as a valid + base row, so the constraint under test (e.g. a scope's `require_any_of`) + is satisfied on the unmutated `::valid` row and the only violation is the + one the mutation introduces. """ match check.target: case ArrayPath() as target: - pass + return _walk_to_target( + target.segments, spec.fields, spec.name, discriminator=None + ) case _: return {} - column_prefix = target.column_prefix - if column_prefix.segments: - raise NotImplementedError( - "Multi-segment column paths (struct fields containing arrays) " - "require walking the parent tree from the root to the array " - f"column; got {target!r}" - ) - - field_spec = _find_field_spec(spec.fields, target.column_path) - if field_spec is None: - return {} - - inner_levels = target.iter_struct_paths - leaf_path = target.leaf - - inner: dict[str, Any] = {} - root_model = terminal_model_ref(field_spec.shape) - current_fields: list[FieldSpec] = root_model.model.fields if root_model else [] - nested = inner - - for level in inner_levels: - for part in level: - child_spec = _find_field_spec(current_fields, part) - child_is_list = child_spec is not None and has_array_layer(child_spec.shape) - child_model = ( - terminal_model_ref(child_spec.shape) if child_spec is not None else None - ) - if child_is_list: - nested[part] = [{}] - nested = nested[part][0] - else: - nested[part] = {} - nested = nested[part] - current_fields = child_model.model.fields if child_model else [] - - for part in leaf_path: - nested[part] = {} - nested = nested[part] - - if has_array_layer(field_spec.shape): - return {target.column_path: [inner]} - return {target.column_path: inner} if inner else {} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py index 166acbda0..0e305e576 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py @@ -133,15 +133,23 @@ def _scenario_entry( mutate_expr: str, expected_field: str, expected_check: str, + valid_scaffold: dict[str, Any] | None = None, ) -> list[tuple[str, str]]: - """Build a rendered Scenario kwargs list for the test_module template.""" - return [ + """Build a rendered Scenario kwargs list for the test_module template. + + `valid_scaffold` is emitted only when set, so scenarios without one keep + the dataclass default (a vacuous base-row copy for the `::valid` row). + """ + entry = [ ("id", py_literal(scenario_id)), ("scaffold", py_literal(scaffold)), ("mutate", mutate_expr), ("expected_field", py_literal(expected_field)), ("expected_check", py_literal(expected_check)), ] + if valid_scaffold is not None: + entry.append(("valid_scaffold", py_literal(valid_scaffold))) + return entry class _MutateExpr(NamedTuple): @@ -213,6 +221,15 @@ def _render_field_check_scenarios( for row, scenario_id in zip(rows, scenario_ids, strict=True): desc = row.check.descriptors[row.descriptor_idx] scaffold = generate_scaffold(row.check, spec) if spec is not None else {} + # For an `X | Literal[c]` field, seed the literal alternative at the + # target so the `::valid` row proves the check accepts it. + valid_scaffold: dict[str, Any] | None = None + if desc.allow_literals and spec is not None: + # generate_scaffold shapes the bare literal to the field's list + # nesting, so pass it unwrapped. + valid_scaffold = generate_scaffold( + row.check, spec, leaf_value=desc.allow_literals[0] + ) try: mutate = _field_mutate_expr(row.check, desc, spec) except ValueError as exc: @@ -227,6 +244,7 @@ def _render_field_check_scenarios( mutate_expr=mutate.expr, expected_field=row.label, expected_check=row.name, + valid_scaffold=valid_scaffold, ) ) diff --git a/packages/overture-schema-codegen/tests/test_constraint_description.py b/packages/overture-schema-codegen/tests/test_constraint_description.py index e5b1b0ebc..e645bfddd 100644 --- a/packages/overture-schema-codegen/tests/test_constraint_description.py +++ b/packages/overture-schema-codegen/tests/test_constraint_description.py @@ -13,6 +13,9 @@ ScalarMaxLen, ScalarMinLen, ) +from overture.schema.codegen.extraction.literal_alternatives import ( + LiteralAlternatives, +) from overture.schema.codegen.extraction.model_constraints import ( analyze_model_constraints, ) @@ -142,6 +145,24 @@ class FutureConstraint(ModelConstraint): assert result == ["`@future_thing`"] +class TestLiteralAlternativesProse: + """`X | Literal[c]` renders a faithful 'Also accepts' note in the docs.""" + + def test_empty_string_literal(self) -> None: + assert ( + describe_field_constraint(LiteralAlternatives(("",))) + == "Also accepts: `''`" + ) + + def test_named_literal(self) -> None: + result = describe_field_constraint(LiteralAlternatives(("Global",))) + assert result == "Also accepts: `'Global'`" + + def test_multiple_literals(self) -> None: + result = describe_field_constraint(LiteralAlternatives(("a", "b"))) + assert result == "Also accepts: `'a'`, `'b'`" + + class TestConsolidation: """Consolidation of same-field conditional constraints.""" diff --git a/packages/overture-schema-codegen/tests/test_pyspark_base_row.py b/packages/overture-schema-codegen/tests/test_pyspark_base_row.py index a84bdf78b..4ada0c1a9 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_base_row.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_base_row.py @@ -33,6 +33,7 @@ _satisfy_model_constraints, _value_from_check_pattern, _value_from_scalar_constraints, + condition_overrides_for_present_field, generate_arm_rows, generate_base_row, generate_populated_arm_rows, @@ -488,6 +489,70 @@ def test_not_field_eq_condition_base_row_passes_pydantic(self) -> None: TypeAdapter(_ModeModelRequireIf).validate_python(row) +class _Subtype(str, Enum): + COUNTRY = "country" + REGION = "region" + + +_IS_COUNTRY = FieldEqCondition("subtype", _Subtype.COUNTRY) + + +# Mirrors Division/DivisionBoundary: the base row's first enum member +# (`country`) triggers the forbid, and the symmetric require_if mandates the +# field once the condition is disabled. +@forbid_if(["parent"], _IS_COUNTRY) +@require_if(["parent"], ~_IS_COUNTRY) +class _DivisionLike(BaseModel): + subtype: _Subtype + parent: str | None = None + + +class _Flag(str, Enum): + A = "a" + B = "b" + + +# Disabling the forbid (mode != A) activates a require_if for a field the base +# row lacks, so the override must re-satisfy constraints and carry that field. +@forbid_if(["forbidden"], FieldEqCondition("mode", _Flag.A)) +@require_if(["needed"], FieldEqCondition("mode", _Flag.B)) +class _FlipActivatesRequire(BaseModel): + mode: _Flag + forbidden: str | None = None + needed: str | None = None + + +class TestConditionOverridesForPresentField: + """A forbid_if the base row triggers is disabled so its field can be set.""" + + def test_flips_condition_field_to_disable_forbid(self) -> None: + """The override sets the condition field to a value the forbid rejects.""" + spec = extract_model(_DivisionLike) + overrides = condition_overrides_for_present_field(spec, "parent") + assert overrides == {"subtype": "region"} + + def test_merged_row_with_present_field_is_valid(self) -> None: + """Base row + override + the forbidden field validates against Pydantic.""" + spec = extract_model(_DivisionLike) + overrides = condition_overrides_for_present_field(spec, "parent") + row = {**generate_base_row(spec), **overrides, "parent": "a"} + TypeAdapter(_DivisionLike).validate_python(row) + + def test_field_without_forbid_if_needs_no_override(self) -> None: + """A field no forbid_if gates returns an empty override.""" + spec = extract_model(_DivisionLike) + assert condition_overrides_for_present_field(spec, "subtype") == {} + + def test_resatisfies_constraints_newly_required_by_the_flip(self) -> None: + """Disabling the forbid can activate a require_if; the override fills it.""" + spec = extract_model(_FlipActivatesRequire) + overrides = condition_overrides_for_present_field(spec, "forbidden") + assert overrides["mode"] == "b" + assert overrides.get("needed") is not None + row = {**generate_base_row(spec), **overrides, "forbidden": "x"} + TypeAdapter(_FlipActivatesRequire).validate_python(row) + + class TestMultiBoundScalarConstraints: """_value_from_scalar_constraints merges multiple check_bounds before calling valid_bound.""" diff --git a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py index de36ee560..5fa49c72b 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py @@ -2289,6 +2289,65 @@ class _ListInsideMapValueModel(BaseModel): items: dict[str, _MapValueWithList] +class _UrlOrEmptyModel(BaseModel): + """Required field typed `HttpUrl | Literal[""]` -- literal bypass scenario.""" + + data_url: Annotated[HttpUrl | Literal[""], Field()] + + +class _OptionalCountryListModel(BaseModel): + """Optional list[CountryCodeAlpha2 | Literal["Global"]] -- array literal bypass.""" + + countries: ( + list[Annotated[CountryCodeAlpha2 | Literal["Global"], Field()]] | None + ) = None + + +class TestLiteralAlternativesBypass: + """check_builder threads allow_literals onto content descriptors for X | Literal[c] fields.""" + + def test_url_literal_bypass_on_content_descriptors(self) -> None: + """url_format and url_length carry allow_literals; check_required does not.""" + checks, _ = _checks_for(_UrlOrEmptyModel) + url_checks = [c for c in checks if str(c.target).endswith("data_url")] + assert url_checks, "expected a check targeting data_url" + # Required field: one Check with (check_required, url_format, url_length) + assert len(url_checks) == 1 + check = url_checks[0] + for desc in check.descriptors: + if desc.function == "check_required": + assert desc.allow_literals == (), ( + f"check_required must not carry allow_literals, got {desc.allow_literals}" + ) + else: + assert desc.allow_literals == ("",), ( + f"{desc.function} should carry allow_literals=('',), got {desc.allow_literals}" + ) + + def test_array_literal_bypass_on_element_descriptor(self) -> None: + """Array-element pattern check carries allow_literals for list[T | Literal[c]].""" + checks, _ = _checks_for(_OptionalCountryListModel) + country_checks = [c for c in checks if "countries" in str(c.target)] + assert country_checks, "expected a check targeting countries" + check = country_checks[0] + content_descs = [d for d in check.descriptors if d.function != "check_required"] + assert content_descs, "expected at least one content descriptor" + for desc in content_descs: + assert desc.allow_literals == ("Global",), ( + f"{desc.function} should carry allow_literals=('Global',), got {desc.allow_literals}" + ) + + def test_check_required_never_gets_allow_literals(self) -> None: + """check_required is excluded from the literal bypass even when coalesced.""" + checks, _ = _checks_for(_UrlOrEmptyModel) + for check in checks: + for desc in check.descriptors: + if desc.function == "check_required": + assert desc.allow_literals == (), ( + f"check_required at {check.target} carries unexpected allow_literals" + ) + + class TestMapValueModelDescentBoundary: """Descent raises where a `MapPath` cannot represent the shape. diff --git a/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py b/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py index 59b297594..0c01736ba 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py @@ -11,6 +11,7 @@ ScalarMaxLen, ScalarMinLen, ) +from overture.schema.codegen.extraction.literal_alternatives import LiteralAlternatives from overture.schema.codegen.extraction.specs import FieldSpec from overture.schema.codegen.pyspark.constraint_dispatch import ( ExpressionDescriptor, @@ -321,6 +322,12 @@ def test_strict_returns_none(self) -> None: desc = dispatch_constraint(Strict()) assert desc is None + def test_literal_alternatives_returns_none(self) -> None: + # The literal-alternatives bypass is a modifier on the field's other + # checks (threaded as allow_literals), not a standalone check. + desc = dispatch_constraint(LiteralAlternatives(("",))) + assert desc is None + class TestBaseTypeDispatch: def test_http_url_dispatches_to_check_url_format_and_length(self) -> None: diff --git a/packages/overture-schema-codegen/tests/test_pyspark_e2e.py b/packages/overture-schema-codegen/tests/test_pyspark_e2e.py index 7095f8895..4d361b050 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_e2e.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_e2e.py @@ -44,7 +44,7 @@ def test_checks_cover_expected_fields(self, generated: GeneratedModule) -> None: content = generated.content # Hand-written checks: subtype, class, country, region, radio_group (is_land, is_territorial), admin_level for field in ["subtype", "class", "country", "region"]: - assert f'field="{field}"' in content, f"Missing check for {field}" + assert f"field='{field}'" in content, f"Missing check for {field}" def test_schema_has_expected_fields(self, generated: GeneratedModule) -> None: """Schema should contain all expected DivisionArea fields.""" diff --git a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py index 0857b1209..81cbaba40 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py @@ -64,7 +64,7 @@ int32, ) from overture.schema.system.string import CountryCodeAlpha2 -from pydantic import BaseModel +from pydantic import BaseModel, HttpUrl from pydantic.fields import FieldInfo _path = parse @@ -621,8 +621,8 @@ def test_per_field_function_exists(self, literal_subtype_source: str) -> None: def test_check_has_name_field(self, literal_subtype_source: str) -> None: """Rendered Check includes name= derived from constraint function.""" - assert 'name="required"' in literal_subtype_source - assert 'name="enum"' in literal_subtype_source + assert "name='required'" in literal_subtype_source + assert "name='enum'" in literal_subtype_source def test_no_field_in_check_calls(self, literal_subtype_source: str) -> None: """check_* calls should not include field string as second arg.""" @@ -643,8 +643,8 @@ def test_scalar_multi_descriptor_produces_separate_checks( ) -> None: """SimpleModel.subtype has check_required + check_enum -> two separate functions.""" assert "F.coalesce" not in literal_subtype_source - assert 'name="required"' in literal_subtype_source - assert 'name="enum"' in literal_subtype_source + assert "name='required'" in literal_subtype_source + assert "name='enum'" in literal_subtype_source def test_compound_checks_split(self, literal_subtype_source: str) -> None: """A field with required + enum produces two Check functions, not one coalesced.""" @@ -939,16 +939,16 @@ class TestModelConstraintFieldLabels: def test_require_if_single_constraint_no_suffix(self) -> None: source = _render(RequireIfEnumModel, "require_if_enum") - assert 'field="admin_level_required"' in source + assert "field='admin_level_required'" in source def test_forbid_if_single_constraint_no_suffix(self) -> None: source = _render(RequireForbidModel, "rf") - assert 'field="admin_level_forbidden"' in source + assert "field='admin_level_forbidden'" in source def test_require_and_forbid_have_distinct_labels(self) -> None: source = _render(RequireForbidModel, "rf") - assert 'field="admin_level_required"' in source - assert 'field="admin_level_forbidden"' in source + assert "field='admin_level_required'" in source + assert "field='admin_level_forbidden'" in source def test_multiple_require_if_same_target_disambiguated(self) -> None: """Multiple require_if on the same target get per-field numeric suffixes.""" @@ -960,7 +960,7 @@ class MultiRequireModel(BaseModel): level: int | None = None source = _render(MultiRequireModel, "multi_req") - labels = re.findall(r'field="(level_required[^"]*)"', source) + labels = re.findall(r"field='(level_required[^']*)'", source) assert len(labels) >= 2, f"Expected >=2 unique labels, got {labels}" assert len(labels) == len(set(labels)), f"Duplicate labels: {labels}" assert all(re.search(r"_\d+$", lbl) for lbl in labels), ( @@ -1037,7 +1037,7 @@ def test_colliding_required_checks_get_distinct_labels(self) -> None: ) source = render_model_module("collide", [first, second], [], []) ast.parse(source) - labels = re.findall(r'field="(value[^"]*)"', source) + labels = re.findall(r"field='(value[^']*)'", source) assert labels == ["value_0", "value_1"], labels def test_noncolliding_field_check_stays_bare(self) -> None: @@ -1053,7 +1053,7 @@ def test_noncolliding_field_check_stays_bare(self) -> None: ) source = render_model_module("solo", [required, bounds], [], []) ast.parse(source) - labels = re.findall(r'field="(value[^"]*)"', source) + labels = re.findall(r"field='(value[^']*)'", source) assert labels == ["value", "value"], labels def test_multi_descriptor_collision_only_on_shared_name(self) -> None: @@ -1074,9 +1074,9 @@ def test_multi_descriptor_collision_only_on_shared_name(self) -> None: # The two `required` rows collide (-> value_0/value_1); the lone # `bounds` row stays bare. required_fields = re.findall( - r'field="(value[^"]*)",\n\s+name="required"', source + r"field='(value[^']*)',\n\s+name='required'", source ) - bounds_fields = re.findall(r'field="(value[^"]*)",\n\s+name="bounds"', source) + bounds_fields = re.findall(r"field='(value[^']*)',\n\s+name='bounds'", source) assert required_fields == ["value_0", "value_1"], required_fields assert bounds_fields == ["value"], bounds_fields @@ -1195,7 +1195,7 @@ def test_shape_is_array(self) -> None: def test_field_label_uses_prefix(self) -> None: source = _render(ArrayOfConstrained, "arr_constrained") - assert 'field="items[]' in source + assert "field='items[]" in source def test_imports_array_check(self) -> None: source = _render(ArrayOfConstrained, "arr_constrained") @@ -1787,3 +1787,27 @@ def test_array_require_if_condition_keeps_leaf(self) -> None: source = _render_model_node(self._require_if_check(ArrayValueRequireIfModel)) assert 'el["inner"]["admin_level"]' in source, source assert 'el["inner"]["subtype"] ==' in source, source + + +class _UrlOrEmptyRender(BaseModel): + """Required `HttpUrl | Literal[""]` -- the literal bypasses the URL checks.""" + + data_url: HttpUrl | Literal[""] + + +class TestLiteralAlternativesRendering: + """A descriptor carrying allow_literals renders an except_literals wrapper.""" + + def test_url_checks_wrapped_in_except_literals(self) -> None: + source = _render(_UrlOrEmptyRender) + # Both content checks are wrapped; the literal value is threaded in. + assert 'except_literals(F.col("data_url"), check_url_format(' in source, source + assert 'except_literals(F.col("data_url"), check_url_length(' in source, source + # py_literal emits the pre-ruff form (single quotes); ruff normalizes later. + assert ", ['']" in source, source + + def test_except_literals_imported(self) -> None: + source = _render(_UrlOrEmptyRender) + assert "except_literals" in source + # check_required is not wrapped: it is not threaded with allow_literals. + assert 'except_literals(F.col("data_url"), check_required(' not in source diff --git a/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py b/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py index cc000c559..dbff1f009 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py @@ -1,6 +1,8 @@ """Tests for sparse path scaffold generation.""" +import copy from dataclasses import replace +from typing import Any import pytest from codegen_test_support import ( @@ -8,20 +10,51 @@ discover_feature, spec_for_model, ) -from overture.schema.codegen.extraction.specs import ModelSpec +from overture.schema.codegen.extraction.specs import ModelSpec, UnionSpec from overture.schema.codegen.pyspark.check_builder import build_checks -from overture.schema.codegen.pyspark.check_ir import ElementGuard, ModelCheck +from overture.schema.codegen.pyspark.check_ir import ( + Check, + ColumnGuard, + ElementGuard, + ModelCheck, +) from overture.schema.codegen.pyspark.constraint_dispatch import RequireAnyOf +from overture.schema.codegen.pyspark.test_data.base_row import ( + generate_arm_rows, + generate_base_row, +) from overture.schema.codegen.pyspark.test_data.scaffold import ( generate_model_scaffold, generate_scaffold, leaf_list_depth, ) from overture.schema.system.field_path import ArrayPath, parse +from pydantic import TypeAdapter _path = parse +def _deep_merge(base: dict, scaffold: dict) -> dict: + """Merge `scaffold` onto a deep copy of `base` (harness `deep_merge` semantics). + + Dicts merge recursively; every other value (including lists) replaces the + base value. Mirrors `overture.schema.pyspark`'s conformance harness so the + validated row matches what the generated suite builds. + """ + result = copy.deepcopy(base) + for key, value in scaffold.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = _deep_merge(result[key], value) + else: + result[key] = copy.deepcopy(value) + return result + + +def _check_belongs_to_arm(check: Check, arm: str) -> bool: + """Whether a field check applies to a union arm (every `ColumnGuard` admits it).""" + return all(arm in g.values for g in check.guards if isinstance(g, ColumnGuard)) + + @pytest.fixture(scope="module") def connector_spec() -> ModelSpec: return discover_feature("Connector") @@ -216,6 +249,93 @@ def test_multiple_element_guards_raises(self, segment_spec: ModelSpec) -> None: generate_scaffold(bogus, segment_spec) +# Models whose scaffolds must merge onto a base row to form a valid instance. +# Spans a union with a union-in-array (`Segment`'s `when.vehicle[]`), record +# specs with `require_any_of` and optional nested-model arrays, a map field +# (`Infrastructure.source_tags`), and `list[list[...]]` arrays +# (`Division.hierarchies[][]`, so iter_count>1 wrapping is covered). The +# conformance suite only asserts each scenario's own expected violation is +# absent from its valid row -- whole-row validity of a scaffold is checked here, +# so a model-specific scaffold defect can't hide behind it. +_VALID_ROW_MODELS = [ + "Segment", + "Connector", + "Division", + "DivisionArea", + "DivisionBoundary", + "Place", + "Building", + "BuildingPart", + "Address", + "Infrastructure", + "Land", + "LandCover", + "LandUse", + "Water", + "Bathymetry", +] + + +def _base_rows_and_adapter(spec: ModelSpec) -> tuple[dict[str, dict[str, Any]], Any]: + """Return per-arm base rows and a Pydantic adapter for a spec. + + A `UnionSpec` yields one base row per discriminator arm, validated against + the union annotation. A record spec yields a single row keyed by `""` (a + sentinel arm carrying no `ColumnGuard`, so `_check_belongs_to_arm` admits + every record-spec check) and validates against the source class. + """ + if isinstance(spec, UnionSpec): + return generate_arm_rows(spec), TypeAdapter(spec.source_annotation) + assert spec.source_type is not None + return {"": generate_base_row(spec)}, TypeAdapter(spec.source_type) + + +class TestScaffoldsProduceValidRows: + """Ground truth for finding #1: a scaffold merged onto the base row is valid. + + The conformance harness builds the `::valid` row as + `deep_merge(base_row, scaffold)` with no mutation, then asserts the check + does not fire. That assertion is only meaningful when the merged row is a + genuinely valid instance -- otherwise unrelated `required` / + `require_any_of` violations (or a vacuous, target-absent row) let a check + that wrongly rejects a valid value ship green. These tests validate the + merged row against the Pydantic schema directly: the scaffold must reach + the target while keeping every model on the path valid. + """ + + @pytest.fixture(scope="module", params=_VALID_ROW_MODELS) + def model_case( + self, request: pytest.FixtureRequest + ) -> tuple[ModelSpec, dict[str, dict[str, Any]], Any]: + spec = discover_feature(request.param) + arm_rows, adapter = _base_rows_and_adapter(spec) + return spec, arm_rows, adapter + + def test_field_scaffolds_validate( + self, model_case: tuple[ModelSpec, dict[str, dict[str, Any]], Any] + ) -> None: + spec, arm_rows, adapter = model_case + field_checks, _ = build_checks(spec) + for check in field_checks: + scaffold = generate_scaffold(check, spec) + for arm, base in arm_rows.items(): + if not _check_belongs_to_arm(check, arm): + continue + adapter.validate_python(_deep_merge(base, scaffold)) + + def test_model_scaffolds_validate( + self, model_case: tuple[ModelSpec, dict[str, dict[str, Any]], Any] + ) -> None: + spec, arm_rows, adapter = model_case + _, model_checks = build_checks(spec) + for check in model_checks: + scaffold = generate_model_scaffold(check, spec) + for arm, base in arm_rows.items(): + if not (check.arm is None or check.arm == arm): + continue + adapter.validate_python(_deep_merge(base, scaffold)) + + class TestGenerateModelScaffold: def test_top_level_model_constraint_produces_empty_scaffold( self, division_area_spec: ModelSpec diff --git a/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py index 4867ea245..2622d392a 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py @@ -178,6 +178,16 @@ def test_empty_nodes_renders_valid_python(self) -> None: ast.parse(source) +class TestUnbuildableScenariosAreLoud: + """An unbuildable scenario must fail, not silently skip (false green).""" + + def test_skip_branch_fails_not_skips(self) -> None: + source = render_test_module("loud", [], []) + assert "validation_results.skipped" in source + assert "pytest.fail(" in source + assert "pytest.skip(" not in source + + class TestBaseRow: def test_default_base_rows_are_empty(self) -> None: source = render_test_module("test", [], []) @@ -737,7 +747,7 @@ def test_per_arm_label_matches_module_label(self) -> None: model_checks = [road, rail] module = render_model_module("seg", [], model_checks, []) - module_labels = re.findall(r'field="(class_required[^"]*)"', module) + module_labels = re.findall(r"field='(class_required[^']*)'", module) road_label = module_labels[0] test_source = render_test_module("seg", [], model_checks, arm="road") diff --git a/packages/overture-schema-codegen/tests/test_type_analyzer.py b/packages/overture-schema-codegen/tests/test_type_analyzer.py index ea3421f5a..144214b75 100644 --- a/packages/overture-schema-codegen/tests/test_type_analyzer.py +++ b/packages/overture-schema-codegen/tests/test_type_analyzer.py @@ -25,6 +25,9 @@ ArrayMinLen, ScalarMinLen, ) +from overture.schema.codegen.extraction.literal_alternatives import ( + LiteralAlternatives, +) from overture.schema.codegen.extraction.specs import RecordSpec, UnionSpec from overture.schema.codegen.extraction.type_analyzer import ( UnresolvedForwardRefError, @@ -99,12 +102,41 @@ def test_pipe_none(self) -> None: def test_typing_optional(self) -> None: assert _is_optional(Optional[str]) is True # noqa: UP045 - def test_literal_arm_filtered_with_concrete(self) -> None: + def test_literal_arm_keeps_concrete_shape(self) -> None: + # The concrete arm is the field's shape; the literal rides along as a + # LiteralAlternatives constraint (see TestLiteralAlternatives). shape, optional, _ = analyze_type(str | Literal[""] | None) assert isinstance(shape, Primitive) and shape.base_type == "str" assert optional is True +class TestLiteralAlternatives: + """`X | Literal[c]` keeps the concrete arm but records the literal arms.""" + + def _alternatives(self, annotation: object) -> tuple[object, ...] | None: + for cs in all_constraints(_shape(annotation)): + if isinstance(cs.constraint, LiteralAlternatives): + return cs.constraint.values + return None + + def test_scalar_literal_arm_preserved(self) -> None: + assert self._alternatives(str | Literal[""] | None) == ("",) + + def test_literal_arm_preserved_inside_list(self) -> None: + shape = _shape(list[str | Literal["x"]]) + assert isinstance(shape, ArrayOf) and isinstance(shape.element, Primitive) + assert self._alternatives(list[str | Literal["x"]]) == ("x",) + + def test_multiple_literal_values_preserved(self) -> None: + assert self._alternatives(str | Literal["a", "b"]) == ("a", "b") + + def test_pure_literal_union_has_no_alternatives(self) -> None: + # No concrete arm -> stays a LiteralScalar, no bypass constraint. + shape = _shape(Literal["a"] | None) + assert isinstance(shape, LiteralScalar) + assert self._alternatives(Literal["a"] | None) is None + + class TestList: def test_simple_list(self) -> None: shape = _shape(list[str]) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py index efe1e6198..18acbb8ef 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py @@ -15,8 +15,17 @@ def error_msg(prefix: str, *value_cols: Column) -> Column: - """Build an error message: literal prefix followed by interpolated values.""" - return F.concat(F.lit(prefix), *value_cols) + """Build an error message: literal prefix followed by interpolated values. + + Each interpolated value is coalesced to a string before concatenation so + that a NULL value never makes the whole message NULL. `F.concat` returns + NULL if any argument is NULL, and a NULL message is silently dropped by + `array_compact` in the array-check path (or the scalar wrapper) -- which + would discard a real violation whenever the offending value is itself + NULL (e.g. a linear-reference range `[null, 1.5]`). + """ + safe = [F.coalesce(c.cast("string"), F.lit("null")) for c in value_cols] + return F.concat(F.lit(prefix), *safe) def _resolve_column(column: str | Column) -> Column: diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py index c7667402e..fe9e67002 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py @@ -160,6 +160,20 @@ def check_pattern(col: Column, pattern: str, *, label: str) -> Column: return F.when(col.isNotNull() & ~col.rlike(pattern), msg) +def except_literals(col: Column, error: Column, allowed: list[object]) -> Column: + """Suppress *error* when *col* equals one of the field's literal alternatives. + + A field typed `X | Literal[c, ...]` accepts the literals `c, ...` alongside + any value the concrete arm `X` validates. *error* is the concrete arm's + violation Column (error string or null). Returns null whenever *col* is one + of *allowed* (a permitted literal), and *error* unchanged otherwise. A null + *col* matches no literal and yields null -- the same result the wrapped + content checks already return for null, since presence is + `check_required`'s concern and is never wrapped here. + """ + return F.when(~col.isin(allowed), error) + + def check_url_format(col: Column) -> Column: """HTTP/HTTPS URL format check via pattern match. Returns error string or null. diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py index d6c98b482..1ff387791 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py @@ -31,6 +31,7 @@ check_required, check_url_format, check_url_length, + except_literals, ) @@ -80,7 +81,12 @@ def _datasets_data_url_url_format_check() -> Check: return Check( field="datasets[].data_url", name="url_format", - expr=array_check("datasets", lambda el: check_url_format(el["data_url"])), + expr=array_check( + "datasets", + lambda el: except_literals( + el["data_url"], check_url_format(el["data_url"]), [""] + ), + ), shape=CheckShape.ARRAY, read_columns=frozenset({"datasets"}), ) @@ -90,7 +96,12 @@ def _datasets_data_url_url_length_check() -> Check: return Check( field="datasets[].data_url", name="url_length", - expr=array_check("datasets", lambda el: check_url_length(el["data_url"])), + expr=array_check( + "datasets", + lambda el: except_literals( + el["data_url"], check_url_length(el["data_url"]), [""] + ), + ), shape=CheckShape.ARRAY, read_columns=frozenset({"datasets"}), ) @@ -113,7 +124,10 @@ def _datasets_data_url_archived_url_format_check() -> Check: field="datasets[].data_url_archived", name="url_format", expr=array_check( - "datasets", lambda el: check_url_format(el["data_url_archived"]) + "datasets", + lambda el: except_literals( + el["data_url_archived"], check_url_format(el["data_url_archived"]), [""] + ), ), shape=CheckShape.ARRAY, read_columns=frozenset({"datasets"}), @@ -125,7 +139,10 @@ def _datasets_data_url_archived_url_length_check() -> Check: field="datasets[].data_url_archived", name="url_length", expr=array_check( - "datasets", lambda el: check_url_length(el["data_url_archived"]) + "datasets", + lambda el: except_literals( + el["data_url_archived"], check_url_length(el["data_url_archived"]), [""] + ), ), shape=CheckShape.ARRAY, read_columns=frozenset({"datasets"}), @@ -146,7 +163,12 @@ def _datasets_license_url_url_format_check() -> Check: return Check( field="datasets[].license_url", name="url_format", - expr=array_check("datasets", lambda el: check_url_format(el["license_url"])), + expr=array_check( + "datasets", + lambda el: except_literals( + el["license_url"], check_url_format(el["license_url"]), [""] + ), + ), shape=CheckShape.ARRAY, read_columns=frozenset({"datasets"}), ) @@ -156,7 +178,12 @@ def _datasets_license_url_url_length_check() -> Check: return Check( field="datasets[].license_url", name="url_length", - expr=array_check("datasets", lambda el: check_url_length(el["license_url"])), + expr=array_check( + "datasets", + lambda el: except_literals( + el["license_url"], check_url_length(el["license_url"]), [""] + ), + ), shape=CheckShape.ARRAY, read_columns=frozenset({"datasets"}), ) @@ -179,7 +206,12 @@ def _datasets_license_url_archived_url_format_check() -> Check: field="datasets[].license_url_archived", name="url_format", expr=array_check( - "datasets", lambda el: check_url_format(el["license_url_archived"]) + "datasets", + lambda el: except_literals( + el["license_url_archived"], + check_url_format(el["license_url_archived"]), + [""], + ), ), shape=CheckShape.ARRAY, read_columns=frozenset({"datasets"}), @@ -191,7 +223,12 @@ def _datasets_license_url_archived_url_length_check() -> Check: field="datasets[].license_url_archived", name="url_length", expr=array_check( - "datasets", lambda el: check_url_length(el["license_url_archived"]) + "datasets", + lambda el: except_literals( + el["license_url_archived"], + check_url_length(el["license_url_archived"]), + [""], + ), ), shape=CheckShape.ARRAY, read_columns=frozenset({"datasets"}), @@ -311,7 +348,8 @@ def _datasets_data_download_url_url_format_check() -> Check: expr=nested_array_check( "datasets", lambda el: array_check( - el["data_download_url"], lambda inner: check_url_format(inner) + el["data_download_url"], + lambda inner: except_literals(inner, check_url_format(inner), [""]), ), ), shape=CheckShape.ARRAY, @@ -326,7 +364,8 @@ def _datasets_data_download_url_url_length_check() -> Check: expr=nested_array_check( "datasets", lambda el: array_check( - el["data_download_url"], lambda inner: check_url_length(inner) + el["data_download_url"], + lambda inner: except_literals(inner, check_url_length(inner), [""]), ), ), shape=CheckShape.ARRAY, @@ -342,8 +381,12 @@ def _datasets_countries_check() -> Check: "datasets", lambda el: array_check( el["countries"], - lambda inner: check_pattern( - inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + lambda inner: except_literals( + inner, + check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ["Global"], ), ), ), diff --git a/packages/overture-schema-pyspark/tests/_support/harness.py b/packages/overture-schema-pyspark/tests/_support/harness.py index 03d7af483..99681807d 100644 --- a/packages/overture-schema-pyspark/tests/_support/harness.py +++ b/packages/overture-schema-pyspark/tests/_support/harness.py @@ -15,7 +15,16 @@ from overture.schema.pyspark.check import Check from overture.schema.pyspark.validate import evaluate_checks, explain_errors from pyspark.sql import SparkSession -from pyspark.sql.types import StringType, StructField, StructType +from pyspark.sql.types import ( + ArrayType, + DataType, + DoubleType, + FloatType, + MapType, + StringType, + StructField, + StructType, +) from shapely import wkb, wkt from .helpers import PathTraversalError, deep_merge @@ -121,12 +130,20 @@ def build_scenario_rows( try: invalid_row = sanitize_row(s.mutate(deep_merge(base_row, s.scaffold))) invalid_row["_scenario_id"] = scenario_uuid(f"{s.id}::invalid") - rows.append( - { - **copy.deepcopy(base_row), - "_scenario_id": scenario_uuid(f"{s.id}::valid"), - } + # The valid row exercises a real value at the check's target: it + # merges the scaffold (a constraint-satisfying structure reaching + # the target) onto the base row, with NO mutation. A scenario may + # override with `valid_scaffold` to seed a specific value -- e.g. + # the literal alternative of an `X | Literal[c]` field. Without a + # valid scaffold the assertion would be vacuous: a target reachable + # only through scaffolded nesting is absent from a plain base-row + # copy, so a check that wrongly rejects a valid value passes green. + valid_source = ( + s.valid_scaffold if s.valid_scaffold is not None else s.scaffold ) + valid_row = sanitize_row(deep_merge(base_row, valid_source)) + valid_row["_scenario_id"] = scenario_uuid(f"{s.id}::valid") + rows.append(valid_row) rows.append(invalid_row) except PathTraversalError as e: skipped[s.id] = str(e) @@ -195,6 +212,47 @@ def assert_schema_covers_checks(schema: StructType, checks: list[Check]) -> None ) +_FLOAT_TYPES = (DoubleType, FloatType) + + +def coerce_to_schema(value: Any, dtype: DataType) -> Any: + """Cast Python ints to floats where the schema declares a float column. + + A discriminated union widens a numeric field to the broadest member type + (e.g. a `uint8` value alongside `float64` values becomes a `DoubleType` + column). A scaffold built for the narrow arm carries a Python `int`, which + Spark stores as null in a `DoubleType` column (`createDataFrame` does not + coerce with `verifySchema=False`) -- a null that fires `required` on the + `::valid` row. Recursing the row against the schema aligns each numeric + value with its declared column type, so a valid row stays valid. `bool` is + excluded (it is an `int` subclass but maps to `BooleanType`). + + The struct branch keeps only keys the schema declares, mirroring how + `createDataFrame` reads a dict by field name -- so this also drops any + key absent from `dtype`. No row carries such keys today; the filtering is + incidental, not a guarantee. + """ + if value is None: + return None + if isinstance(dtype, StructType) and isinstance(value, dict): + return { + f.name: coerce_to_schema(value[f.name], f.dataType) + for f in dtype.fields + if f.name in value + } + if isinstance(dtype, ArrayType) and isinstance(value, list): + return [coerce_to_schema(item, dtype.elementType) for item in value] + if isinstance(dtype, MapType) and isinstance(value, dict): + return {k: coerce_to_schema(v, dtype.valueType) for k, v in value.items()} + if ( + isinstance(dtype, _FLOAT_TYPES) + and isinstance(value, int) + and not isinstance(value, bool) + ): + return float(value) + return value + + def run_validation_pipeline( spark: SparkSession, schema: StructType, @@ -216,6 +274,7 @@ def run_validation_pipeline( augmented_schema = StructType( schema.fields + [StructField("_scenario_id", StringType(), True)] ) + rows = [coerce_to_schema(row, augmented_schema) for row in rows] df = spark.createDataFrame(rows, schema=augmented_schema, verifySchema=False) # type: ignore[union-attr] violations = explain_errors(evaluate_checks(df, checks), checks) indexed = violations.select("_scenario_id", "field", "check") diff --git a/packages/overture-schema-pyspark/tests/_support/registry.py b/packages/overture-schema-pyspark/tests/_support/registry.py index e6c917539..5108bd4b9 100644 --- a/packages/overture-schema-pyspark/tests/_support/registry.py +++ b/packages/overture-schema-pyspark/tests/_support/registry.py @@ -20,8 +20,10 @@ def register_model( ) -> Iterator[None]: """Register a model type in `REGISTRY` for the duration of a test. - Guarantees `del REGISTRY[model_type]` on exit so a failed test body - never leaks an entry into sibling tests. + Removes `REGISTRY[model_type]` on exit so a failed test body never + leaks an entry into sibling tests. Uses `pop(..., None)` so a body that + already removed or rebound the key does not raise a `KeyError` that + would mask the body's own exception. Parameters ---------- @@ -40,4 +42,4 @@ def register_model( try: yield finally: - del REGISTRY[model_type] + REGISTRY.pop(model_type, None) diff --git a/packages/overture-schema-pyspark/tests/_support/scenarios.py b/packages/overture-schema-pyspark/tests/_support/scenarios.py index a2f58abbd..8ccfa0af4 100644 --- a/packages/overture-schema-pyspark/tests/_support/scenarios.py +++ b/packages/overture-schema-pyspark/tests/_support/scenarios.py @@ -32,3 +32,13 @@ class Scenario: mutate: Callable[[dict], dict] expected_field: str expected_check: str + valid_scaffold: dict[str, Any] | None = None + """Override scaffold for the `::valid` row, when it must differ from `scaffold`. + + The harness builds the `::valid` row from `scaffold` by default (merged + onto the base row, no mutation), which already places a constraint-valid + value at the check's target. Set this only when the valid row needs a + *different* value there -- e.g. an `X | Literal[c]` field, where it seeds + the literal alternative `c` to prove the check accepts it, distinct from + the synthesized `X` value the mutation scaffold carries. + """ diff --git a/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py b/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py index 4082273ae..027598b64 100644 --- a/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py +++ b/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py @@ -31,6 +31,16 @@ def test_error_msg_multiple_values(spark: SparkSession) -> None: assert result[0]["msg"] == "prefix x and y" +def test_error_msg_null_value_does_not_nullify_message(spark: SparkSession) -> None: + # A NULL interpolated value must not make the whole message NULL: F.concat + # would, and a NULL message is dropped by array_compact, silently swallowing + # the violation (e.g. an out-of-bounds linear-reference range [null, 1.5]). + # The null must render as a literal instead. + df = spark.createDataFrame([Row(val=None)], schema="val double") + result = df.select(error_msg("got ", F.col("val")).alias("msg")).collect() + assert result[0]["msg"] == "got null" + + def test_array_check_null_column_returns_null(spark: SparkSession) -> None: df = spark.createDataFrame( [Row(items=None)], diff --git a/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py b/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py index a71c12f11..12a34a3c8 100644 --- a/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py +++ b/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py @@ -29,14 +29,44 @@ check_stripped, check_url_format, check_url_length, + except_literals, ) from overture.schema.system.primitive import GeometryType from pyspark.sql import Row, SparkSession from pyspark.sql import functions as F -from pyspark.sql.types import DoubleType, StructField, StructType +from pyspark.sql.types import DoubleType, StringType, StructField, StructType from shapely.geometry import LineString, MultiPolygon, Point, Polygon +def _except_literals_error(spark: SparkSession, value: str | None) -> str | None: + """Run `except_literals` over `check_url_format` for one string value.""" + df = spark.createDataFrame( + [Row(val=value)], schema=StructType([StructField("val", StringType(), True)]) + ) + col = F.col("val") + expr = except_literals(col, check_url_format(col), [""]) + # Spark Row field access is untyped (Any); the column holds an error string. + return df.select(expr.alias("err")).collect()[0]["err"] # type: ignore[no-any-return] + + +def test_except_literals_suppresses_allowed_literal(spark: SparkSession) -> None: + # "" is an allowed literal alternative -> the url_format error is suppressed. + assert _except_literals_error(spark, "") is None + + +def test_except_literals_passes_through_real_violation(spark: SparkSession) -> None: + # A non-literal invalid value still surfaces the inner check's error. + assert _except_literals_error(spark, "not a url") is not None + + +def test_except_literals_passes_through_valid_value(spark: SparkSession) -> None: + assert _except_literals_error(spark, "https://example.com/x") is None + + +def test_except_literals_null_is_not_an_error(spark: SparkSession) -> None: + assert _except_literals_error(spark, None) is None + + def test_check_bounds_ge_le_valid(spark: SparkSession) -> None: df = spark.createDataFrame([Row(val=5)]) result = df.select(check_bounds(F.col("val"), ge=1, le=10).alias("err")).collect() diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py index 2a2ac1619..3a62c0e36 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py @@ -178,14 +178,14 @@ ), Scenario( id="address::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="address::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -453,7 +453,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py index 887a8899c..b46940e9e 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py @@ -87,6 +87,7 @@ scaffold={ "datasets": [ { + "source_name": "", "source_dataset_name": "", "data_url": "https://example.com/", "data_url_archived": "https://example.com/", @@ -96,7 +97,6 @@ "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "source_name": "", } ] }, @@ -110,6 +110,7 @@ "datasets": [ { "source_name": "", + "source_dataset_name": "", "data_url": "https://example.com/", "data_url_archived": "https://example.com/", "license_url": "https://example.com/", @@ -118,7 +119,6 @@ "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "source_dataset_name": "", } ] }, @@ -133,6 +133,7 @@ { "source_name": "", "source_dataset_name": "", + "data_url": "https://example.com/", "data_url_archived": "https://example.com/", "license_url": "https://example.com/", "license_url_archived": "https://example.com/", @@ -140,7 +141,6 @@ "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "data_url": "https://example.com/", } ] }, @@ -155,6 +155,7 @@ { "source_name": "", "source_dataset_name": "", + "data_url": "https://example.com/", "data_url_archived": "https://example.com/", "license_url": "https://example.com/", "license_url_archived": "https://example.com/", @@ -162,13 +163,28 @@ "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "data_url": "https://example.com/", } ] }, mutate=set_at_path("datasets[].data_url", "not-a-url"), expected_field="datasets[].data_url", expected_check="url_format", + valid_scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, ), Scenario( id="sources::datasets[].data_url:url_length", @@ -177,6 +193,7 @@ { "source_name": "", "source_dataset_name": "", + "data_url": "https://example.com/", "data_url_archived": "https://example.com/", "license_url": "https://example.com/", "license_url_archived": "https://example.com/", @@ -184,7 +201,6 @@ "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "data_url": "https://example.com/", } ] }, @@ -194,6 +210,22 @@ ), expected_field="datasets[].data_url", expected_check="url_length", + valid_scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, ), Scenario( id="sources::datasets[].data_url_archived:required", @@ -203,13 +235,13 @@ "source_name": "", "source_dataset_name": "", "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", "license_url": "https://example.com/", "license_url_archived": "https://example.com/", "license_type": "", "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "data_url_archived": "https://example.com/", } ] }, @@ -225,19 +257,35 @@ "source_name": "", "source_dataset_name": "", "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", "license_url": "https://example.com/", "license_url_archived": "https://example.com/", "license_type": "", "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "data_url_archived": "https://example.com/", } ] }, mutate=set_at_path("datasets[].data_url_archived", "not-a-url"), expected_field="datasets[].data_url_archived", expected_check="url_format", + valid_scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, ), Scenario( id="sources::datasets[].data_url_archived:url_length", @@ -247,13 +295,13 @@ "source_name": "", "source_dataset_name": "", "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", "license_url": "https://example.com/", "license_url_archived": "https://example.com/", "license_type": "", "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "data_url_archived": "https://example.com/", } ] }, @@ -263,6 +311,22 @@ ), expected_field="datasets[].data_url_archived", expected_check="url_length", + valid_scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, ), Scenario( id="sources::datasets[].license_url:required", @@ -273,12 +337,12 @@ "source_dataset_name": "", "data_url": "https://example.com/", "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", "license_url_archived": "https://example.com/", "license_type": "", "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "license_url": "https://example.com/", } ] }, @@ -295,18 +359,34 @@ "source_dataset_name": "", "data_url": "https://example.com/", "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", "license_url_archived": "https://example.com/", "license_type": "", "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "license_url": "https://example.com/", } ] }, mutate=set_at_path("datasets[].license_url", "not-a-url"), expected_field="datasets[].license_url", expected_check="url_format", + valid_scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, ), Scenario( id="sources::datasets[].license_url:url_length", @@ -317,12 +397,12 @@ "source_dataset_name": "", "data_url": "https://example.com/", "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", "license_url_archived": "https://example.com/", "license_type": "", "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "license_url": "https://example.com/", } ] }, @@ -332,6 +412,22 @@ ), expected_field="datasets[].license_url", expected_check="url_length", + valid_scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, ), Scenario( id="sources::datasets[].license_url_archived:required", @@ -343,11 +439,11 @@ "data_url": "https://example.com/", "data_url_archived": "https://example.com/", "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", "license_type": "", "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "license_url_archived": "https://example.com/", } ] }, @@ -365,17 +461,33 @@ "data_url": "https://example.com/", "data_url_archived": "https://example.com/", "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", "license_type": "", "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "license_url_archived": "https://example.com/", } ] }, mutate=set_at_path("datasets[].license_url_archived", "not-a-url"), expected_field="datasets[].license_url_archived", expected_check="url_format", + valid_scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, ), Scenario( id="sources::datasets[].license_url_archived:url_length", @@ -387,11 +499,11 @@ "data_url": "https://example.com/", "data_url_archived": "https://example.com/", "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", "license_type": "", "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "license_url_archived": "https://example.com/", } ] }, @@ -401,6 +513,22 @@ ), expected_field="datasets[].license_url_archived", expected_check="url_length", + valid_scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, ), Scenario( id="sources::datasets[].license_type:required", @@ -413,10 +541,10 @@ "data_url_archived": "https://example.com/", "license_url": "https://example.com/", "license_url_archived": "https://example.com/", + "license_type": "", "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "license_type": "", } ] }, @@ -436,9 +564,9 @@ "license_url": "https://example.com/", "license_url_archived": "https://example.com/", "license_type": "", + "license_text": "", "license_attribution": "", "coverage_bbox": [0.0, 0.0, 0.0, 0.0], - "license_text": "", } ] }, @@ -459,8 +587,8 @@ "license_url_archived": "https://example.com/", "license_type": "", "license_text": "", - "coverage_bbox": [0.0, 0.0, 0.0, 0.0], "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], } ] }, @@ -654,6 +782,23 @@ mutate=set_at_path("datasets[].data_download_url[]", "not-a-url"), expected_field="datasets[].data_download_url[]", expected_check="url_format", + valid_scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_download_url": [""], + } + ] + }, ), Scenario( id="sources::datasets[].data_download_url[]:url_length", @@ -680,6 +825,23 @@ ), expected_field="datasets[].data_download_url[]", expected_check="url_length", + valid_scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_download_url": [""], + } + ] + }, ), Scenario( id="sources::datasets[].countries[]:country_code_alpha2", @@ -703,6 +865,23 @@ mutate=set_at_path("datasets[].countries[]", "99"), expected_field="datasets[].countries[]", expected_check="country_code_alpha2", + valid_scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "countries": ["Global"], + } + ] + }, ), Scenario( id="sources::datasets[].build_source:enum", @@ -849,7 +1028,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py index f5e638217..e334ab1d2 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py @@ -173,14 +173,14 @@ ), Scenario( id="bathymetry::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="bathymetry::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -392,7 +392,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py index fa4a9c055..dcc713124 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py @@ -197,14 +197,14 @@ ), Scenario( id="infrastructure::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="infrastructure::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -362,7 +362,7 @@ Scenario( id="infrastructure::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -371,7 +371,7 @@ Scenario( id="infrastructure::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -380,7 +380,7 @@ Scenario( id="infrastructure::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -425,7 +425,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -443,7 +443,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -659,7 +659,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py index 45d14b4be..a28afea35 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py @@ -195,14 +195,14 @@ ), Scenario( id="land::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="land::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -346,7 +346,7 @@ Scenario( id="land::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -355,7 +355,7 @@ Scenario( id="land::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -364,7 +364,7 @@ Scenario( id="land::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -409,7 +409,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -427,7 +427,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -643,7 +643,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py index 4f892f5a1..52432d163 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py @@ -173,14 +173,14 @@ ), Scenario( id="land_cover::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="land_cover::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -392,7 +392,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py index a2a3b12f6..8275f92aa 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py @@ -197,14 +197,14 @@ ), Scenario( id="land_use::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="land_use::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -362,7 +362,7 @@ Scenario( id="land_use::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -371,7 +371,7 @@ Scenario( id="land_use::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -380,7 +380,7 @@ Scenario( id="land_use::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -425,7 +425,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -443,7 +443,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -659,7 +659,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py index b5e2fc047..b60b6509e 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py @@ -195,14 +195,14 @@ ), Scenario( id="water::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="water::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -332,7 +332,7 @@ Scenario( id="water::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -341,7 +341,7 @@ Scenario( id="water::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -350,7 +350,7 @@ Scenario( id="water::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -395,7 +395,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -413,7 +413,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -629,7 +629,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py index 5815c9012..01ecbb582 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py @@ -206,14 +206,14 @@ ), Scenario( id="building::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="building::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -343,7 +343,7 @@ Scenario( id="building::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -352,7 +352,7 @@ Scenario( id="building::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -361,7 +361,7 @@ Scenario( id="building::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -406,7 +406,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -424,7 +424,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -717,7 +717,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py index 9daf3c2bc..464d22290 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py @@ -205,14 +205,14 @@ ), Scenario( id="building_part::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="building_part::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -349,7 +349,7 @@ Scenario( id="building_part::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -358,7 +358,7 @@ Scenario( id="building_part::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -367,7 +367,7 @@ Scenario( id="building_part::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -412,7 +412,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -430,7 +430,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -723,7 +723,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py index 128455a44..8371854fb 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py @@ -176,7 +176,7 @@ Scenario( id="division::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -185,7 +185,7 @@ Scenario( id="division::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -194,7 +194,7 @@ Scenario( id="division::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -239,7 +239,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -257,7 +257,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -508,14 +508,14 @@ ), Scenario( id="division::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="division::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -659,7 +659,7 @@ Scenario( id="division::hierarchies[][].division_id:required", scaffold={ - "hierarchies": [[{"subtype": "country", "name": "a", "division_id": "a"}]] + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]] }, mutate=set_at_path("hierarchies[][].division_id", None), expected_field="hierarchies[][].division_id", @@ -668,7 +668,7 @@ Scenario( id="division::hierarchies[][].division_id:string_min_length", scaffold={ - "hierarchies": [[{"subtype": "country", "name": "a", "division_id": "a"}]] + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]] }, mutate=set_at_path("hierarchies[][].division_id", ""), expected_field="hierarchies[][].division_id", @@ -677,7 +677,7 @@ Scenario( id="division::hierarchies[][].division_id:no_whitespace", scaffold={ - "hierarchies": [[{"subtype": "country", "name": "a", "division_id": "a"}]] + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]] }, mutate=set_at_path("hierarchies[][].division_id", "has whitespace"), expected_field="hierarchies[][].division_id", @@ -686,7 +686,7 @@ Scenario( id="division::hierarchies[][].subtype:required", scaffold={ - "hierarchies": [[{"division_id": "a", "name": "a", "subtype": "country"}]] + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]] }, mutate=set_at_path("hierarchies[][].subtype", None), expected_field="hierarchies[][].subtype", @@ -695,7 +695,7 @@ Scenario( id="division::hierarchies[][].subtype:enum", scaffold={ - "hierarchies": [[{"division_id": "a", "name": "a", "subtype": "country"}]] + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]] }, mutate=set_at_path("hierarchies[][].subtype", "__INVALID__"), expected_field="hierarchies[][].subtype", @@ -730,14 +730,14 @@ ), Scenario( id="division::parent_division_id:string_min_length", - scaffold={"parent_division_id": "a"}, + scaffold={"subtype": "dependency", "parent_division_id": "a"}, mutate=set_at_path("parent_division_id", ""), expected_field="parent_division_id", expected_check="string_min_length", ), Scenario( id="division::parent_division_id:no_whitespace", - scaffold={"parent_division_id": "a"}, + scaffold={"subtype": "dependency", "parent_division_id": "a"}, mutate=set_at_path("parent_division_id", "has whitespace"), expected_field="parent_division_id", expected_check="no_whitespace", @@ -786,14 +786,14 @@ ), Scenario( id="division::perspectives.mode:required", - scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, mutate=set_at_path("perspectives.mode", None), expected_field="perspectives.mode", expected_check="required", ), Scenario( id="division::perspectives.mode:enum", - scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, mutate=set_at_path("perspectives.mode", "__INVALID__"), expected_field="perspectives.mode", expected_check="enum", @@ -884,21 +884,21 @@ ), Scenario( id="division::capital_of_divisions[].division_id:required", - scaffold={"capital_of_divisions": [{"subtype": "country", "division_id": "a"}]}, + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, mutate=set_at_path("capital_of_divisions[].division_id", None), expected_field="capital_of_divisions[].division_id", expected_check="required", ), Scenario( id="division::capital_of_divisions[].division_id:string_min_length", - scaffold={"capital_of_divisions": [{"subtype": "country", "division_id": "a"}]}, + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, mutate=set_at_path("capital_of_divisions[].division_id", ""), expected_field="capital_of_divisions[].division_id", expected_check="string_min_length", ), Scenario( id="division::capital_of_divisions[].division_id:no_whitespace", - scaffold={"capital_of_divisions": [{"subtype": "country", "division_id": "a"}]}, + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, mutate=set_at_path("capital_of_divisions[].division_id", "has whitespace"), expected_field="capital_of_divisions[].division_id", expected_check="no_whitespace", @@ -1070,7 +1070,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py index 41c003074..c4b2bc880 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py @@ -130,7 +130,7 @@ Scenario( id="division_area::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -139,7 +139,7 @@ Scenario( id="division_area::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -148,7 +148,7 @@ Scenario( id="division_area::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -193,7 +193,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -211,7 +211,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -462,14 +462,14 @@ ), Scenario( id="division_area::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="division_area::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -766,7 +766,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py index f42e72153..8d4e66e10 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py @@ -189,14 +189,14 @@ ), Scenario( id="division_boundary::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="division_boundary::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -346,7 +346,7 @@ ), Scenario( id="division_boundary::country:country_code_alpha2", - scaffold={"country": "US"}, + scaffold={"subtype": "dependency", "country": "US"}, mutate=set_at_path("country", "99"), expected_field="country", expected_check="country_code_alpha2", @@ -374,14 +374,14 @@ ), Scenario( id="division_boundary::perspectives.mode:required", - scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, mutate=set_at_path("perspectives.mode", None), expected_field="perspectives.mode", expected_check="required", ), Scenario( id="division_boundary::perspectives.mode:enum", - scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, mutate=set_at_path("perspectives.mode", "__INVALID__"), expected_field="perspectives.mode", expected_check="enum", @@ -565,7 +565,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py index f906eb4e4..ea01ac84d 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py @@ -227,14 +227,14 @@ ), Scenario( id="place::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="place::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -356,14 +356,14 @@ ), Scenario( id="place::taxonomy.primary:required", - scaffold={"taxonomy": {"hierarchy": ["snake_case"], "primary": "snake_case"}}, + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, mutate=set_at_path("taxonomy.primary", None), expected_field="taxonomy.primary", expected_check="required", ), Scenario( id="place::taxonomy.primary:snake_case", - scaffold={"taxonomy": {"hierarchy": ["snake_case"], "primary": "snake_case"}}, + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, mutate=set_at_path("taxonomy.primary", "HAS SPACES"), expected_field="taxonomy.primary", expected_check="snake_case", @@ -594,7 +594,7 @@ "brand": { "names": { "primary": "a", - "rules": [{"variant": "common", "value": "a"}], + "rules": [{"value": "a", "variant": "common"}], } } }, @@ -608,7 +608,7 @@ "brand": { "names": { "primary": "a", - "rules": [{"variant": "common", "value": "a"}], + "rules": [{"value": "a", "variant": "common"}], } } }, @@ -622,7 +622,7 @@ "brand": { "names": { "primary": "a", - "rules": [{"variant": "common", "value": "a"}], + "rules": [{"value": "a", "variant": "common"}], } } }, @@ -683,8 +683,8 @@ "value": "a", "variant": "common", "perspectives": { - "countries": ["US"], "mode": "accepted_by", + "countries": ["US"], }, } ], @@ -706,8 +706,8 @@ "value": "a", "variant": "common", "perspectives": { - "countries": ["US"], "mode": "accepted_by", + "countries": ["US"], }, } ], @@ -940,7 +940,7 @@ Scenario( id="place::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -949,7 +949,7 @@ Scenario( id="place::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -958,7 +958,7 @@ Scenario( id="place::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -1003,7 +1003,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -1021,7 +1021,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -1230,7 +1230,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py index f4b3738e5..a2b70c267 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py @@ -170,14 +170,14 @@ ), Scenario( id="connector::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="connector::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -333,7 +333,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py index ff866fe2a..b805c2291 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py @@ -230,14 +230,14 @@ ), Scenario( id="segment::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="segment::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -399,7 +399,10 @@ id="segment::access_restrictions[].when.mode_min_length:array_min_length", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "mode": ["vehicle"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.mode", []), @@ -410,7 +413,10 @@ id="segment::access_restrictions[].when.mode_unique:struct_unique", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "mode": ["vehicle"]}, + } ] }, mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), @@ -421,7 +427,10 @@ id="segment::access_restrictions[].when.mode[]:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "mode": ["vehicle"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.mode[]", "__INVALID__"), @@ -432,7 +441,10 @@ id="segment::access_restrictions[].when.using_min_length:array_min_length", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "using": ["as_customer"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.using", []), @@ -443,7 +455,10 @@ id="segment::access_restrictions[].when.using_unique:struct_unique", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "using": ["as_customer"]}, + } ] }, mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), @@ -454,7 +469,10 @@ id="segment::access_restrictions[].when.using[]:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "using": ["as_customer"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.using[]", "__INVALID__"), @@ -465,7 +483,10 @@ id="segment::access_restrictions[].when.recognized_min_length:array_min_length", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.recognized", []), @@ -476,7 +497,10 @@ id="segment::access_restrictions[].when.recognized_unique:struct_unique", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } ] }, mutate=lambda row: mutate_unique_items( @@ -489,7 +513,10 @@ id="segment::access_restrictions[].when.recognized[]:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.recognized[]", "__INVALID__"), @@ -503,6 +530,7 @@ { "access_type": "allowed", "when": { + "heading": "forward", "vehicle": [ { "dimension": "height", @@ -510,7 +538,7 @@ "value": 0.0, "unit": "in", } - ] + ], }, } ] @@ -526,6 +554,7 @@ { "access_type": "allowed", "when": { + "heading": "forward", "vehicle": [ { "dimension": "height", @@ -533,7 +562,7 @@ "value": 0.0, "unit": "in", } - ] + ], }, } ] @@ -548,7 +577,20 @@ id="segment::access_restrictions[].when.vehicle[].dimension:required", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].dimension", None), @@ -559,7 +601,20 @@ id="segment::access_restrictions[].when.vehicle[].dimension:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path( @@ -572,7 +627,20 @@ id="segment::access_restrictions[].when.vehicle[].comparison:required", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].comparison", None), @@ -583,7 +651,20 @@ id="segment::access_restrictions[].when.vehicle[].comparison:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path( @@ -598,7 +679,16 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "axle_count"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "axle_count", + "comparison": "greater_than", + "value": 0, + } + ], + }, } ] }, @@ -612,7 +702,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -626,7 +726,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -640,7 +750,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -654,7 +774,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -668,7 +798,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "weight"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "weight", + "comparison": "greater_than", + "value": 0.0, + "unit": "oz", + } + ], + }, } ] }, @@ -682,7 +822,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "weight"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "weight", + "comparison": "greater_than", + "value": 0.0, + "unit": "oz", + } + ], + }, } ] }, @@ -706,35 +856,39 @@ ), Scenario( id="segment::connectors[].connector_id:required", - scaffold={"connectors": [{"connector_id": "a"}]}, + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, mutate=set_at_path("connectors[].connector_id", None), expected_field="connectors[].connector_id", expected_check="required", ), Scenario( id="segment::connectors[].connector_id:string_min_length", - scaffold={"connectors": [{"connector_id": "a"}]}, + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, mutate=set_at_path("connectors[].connector_id", ""), expected_field="connectors[].connector_id", expected_check="string_min_length", ), Scenario( id="segment::connectors[].connector_id:no_whitespace", - scaffold={"connectors": [{"connector_id": "a"}]}, + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, mutate=set_at_path("connectors[].connector_id", "has whitespace"), expected_field="connectors[].connector_id", expected_check="no_whitespace", ), Scenario( id="segment::connectors[].at_0:bounds", - scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + scaffold={ + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1"}] + }, mutate=set_at_path("connectors[].at", -1.0), expected_field="connectors[].at_0", expected_check="bounds", ), Scenario( id="segment::connectors[].at_1:bounds", - scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + scaffold={ + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1"}] + }, mutate=set_at_path("connectors[].at", 2.0), expected_field="connectors[].at_1", expected_check="bounds", @@ -924,7 +1078,7 @@ Scenario( id="segment::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -933,7 +1087,7 @@ Scenario( id="segment::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -942,7 +1096,7 @@ Scenario( id="segment::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -987,7 +1141,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -1005,7 +1159,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -1215,7 +1369,24 @@ ), Scenario( id="segment::model:forbid_if:0", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_forbid_if( row, ["unit"], @@ -1229,7 +1400,24 @@ ), Scenario( id="segment::model:require_if:1", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -1243,7 +1431,24 @@ ), Scenario( id="segment::model:require_if:2", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -1257,7 +1462,24 @@ ), Scenario( id="segment::model:require_if:3", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -1271,7 +1493,24 @@ ), Scenario( id="segment::model:require_if:4", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -1285,7 +1524,11 @@ ), Scenario( id="segment::model:require_any_of:5", - scaffold={"access_restrictions": [{"when": {}}]}, + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"heading": "forward"}} + ] + }, mutate=lambda row: mutate_require_any_of( row, ["heading", "during", "mode", "using", "recognized", "vehicle"], @@ -1501,7 +1744,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py index da78cbfba..5f9971a73 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py @@ -289,14 +289,14 @@ ), Scenario( id="segment::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="segment::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -458,7 +458,10 @@ id="segment::access_restrictions[].when.mode_min_length:array_min_length", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "mode": ["vehicle"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.mode", []), @@ -469,7 +472,10 @@ id="segment::access_restrictions[].when.mode_unique:struct_unique", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "mode": ["vehicle"]}, + } ] }, mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), @@ -480,7 +486,10 @@ id="segment::access_restrictions[].when.mode[]:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "mode": ["vehicle"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.mode[]", "__INVALID__"), @@ -491,7 +500,10 @@ id="segment::access_restrictions[].when.using_min_length:array_min_length", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "using": ["as_customer"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.using", []), @@ -502,7 +514,10 @@ id="segment::access_restrictions[].when.using_unique:struct_unique", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "using": ["as_customer"]}, + } ] }, mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), @@ -513,7 +528,10 @@ id="segment::access_restrictions[].when.using[]:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "using": ["as_customer"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.using[]", "__INVALID__"), @@ -524,7 +542,10 @@ id="segment::access_restrictions[].when.recognized_min_length:array_min_length", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.recognized", []), @@ -535,7 +556,10 @@ id="segment::access_restrictions[].when.recognized_unique:struct_unique", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } ] }, mutate=lambda row: mutate_unique_items( @@ -548,7 +572,10 @@ id="segment::access_restrictions[].when.recognized[]:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.recognized[]", "__INVALID__"), @@ -562,6 +589,7 @@ { "access_type": "allowed", "when": { + "heading": "forward", "vehicle": [ { "dimension": "height", @@ -569,7 +597,7 @@ "value": 0.0, "unit": "in", } - ] + ], }, } ] @@ -585,6 +613,7 @@ { "access_type": "allowed", "when": { + "heading": "forward", "vehicle": [ { "dimension": "height", @@ -592,7 +621,7 @@ "value": 0.0, "unit": "in", } - ] + ], }, } ] @@ -607,7 +636,20 @@ id="segment::access_restrictions[].when.vehicle[].dimension:required", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].dimension", None), @@ -618,7 +660,20 @@ id="segment::access_restrictions[].when.vehicle[].dimension:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path( @@ -631,7 +686,20 @@ id="segment::access_restrictions[].when.vehicle[].comparison:required", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].comparison", None), @@ -642,7 +710,20 @@ id="segment::access_restrictions[].when.vehicle[].comparison:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path( @@ -657,7 +738,16 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "axle_count"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "axle_count", + "comparison": "greater_than", + "value": 0, + } + ], + }, } ] }, @@ -671,7 +761,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -685,7 +785,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -699,7 +809,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -713,7 +833,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -727,7 +857,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "weight"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "weight", + "comparison": "greater_than", + "value": 0.0, + "unit": "oz", + } + ], + }, } ] }, @@ -741,7 +881,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "weight"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "weight", + "comparison": "greater_than", + "value": 0.0, + "unit": "oz", + } + ], + }, } ] }, @@ -765,35 +915,39 @@ ), Scenario( id="segment::connectors[].connector_id:required", - scaffold={"connectors": [{"connector_id": "a"}]}, + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, mutate=set_at_path("connectors[].connector_id", None), expected_field="connectors[].connector_id", expected_check="required", ), Scenario( id="segment::connectors[].connector_id:string_min_length", - scaffold={"connectors": [{"connector_id": "a"}]}, + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, mutate=set_at_path("connectors[].connector_id", ""), expected_field="connectors[].connector_id", expected_check="string_min_length", ), Scenario( id="segment::connectors[].connector_id:no_whitespace", - scaffold={"connectors": [{"connector_id": "a"}]}, + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, mutate=set_at_path("connectors[].connector_id", "has whitespace"), expected_field="connectors[].connector_id", expected_check="no_whitespace", ), Scenario( id="segment::connectors[].at_0:bounds", - scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + scaffold={ + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1"}] + }, mutate=set_at_path("connectors[].at", -1.0), expected_field="connectors[].at_0", expected_check="bounds", ), Scenario( id="segment::connectors[].at_1:bounds", - scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + scaffold={ + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1"}] + }, mutate=set_at_path("connectors[].at", 2.0), expected_field="connectors[].at_1", expected_check="bounds", @@ -983,7 +1137,7 @@ Scenario( id="segment::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -992,7 +1146,7 @@ Scenario( id="segment::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -1001,7 +1155,7 @@ Scenario( id="segment::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -1046,7 +1200,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -1064,7 +1218,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -1214,10 +1368,11 @@ scaffold={ "destinations": [ { + "from_connector_id": "a", "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", - "from_connector_id": "a", + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1230,10 +1385,11 @@ scaffold={ "destinations": [ { + "from_connector_id": "a", "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", - "from_connector_id": "a", + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1246,10 +1402,11 @@ scaffold={ "destinations": [ { + "from_connector_id": "a", "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", - "from_connector_id": "a", + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1263,9 +1420,10 @@ "destinations": [ { "from_connector_id": "a", + "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", - "to_connector_id": "a", + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1279,9 +1437,10 @@ "destinations": [ { "from_connector_id": "a", + "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", - "to_connector_id": "a", + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1295,9 +1454,10 @@ "destinations": [ { "from_connector_id": "a", + "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", - "to_connector_id": "a", + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1312,8 +1472,9 @@ { "from_connector_id": "a", "to_connector_id": "a", - "final_heading": "forward", "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1328,8 +1489,9 @@ { "from_connector_id": "a", "to_connector_id": "a", - "final_heading": "forward", "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1344,8 +1506,9 @@ { "from_connector_id": "a", "to_connector_id": "a", - "final_heading": "forward", "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1362,6 +1525,7 @@ "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1378,6 +1542,7 @@ "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1428,7 +1593,7 @@ "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", - "labels": [{"type": "street", "value": "a"}], + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1445,7 +1610,7 @@ "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", - "labels": [{"type": "street", "value": "a"}], + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1462,7 +1627,7 @@ "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", - "labels": [{"type": "street", "value": "a"}], + "labels": [{"value": "a", "type": "street"}], } ] }, @@ -1513,6 +1678,7 @@ "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], "symbols": ["motorway"], } ] @@ -1530,6 +1696,7 @@ "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], "symbols": ["motorway"], } ] @@ -1547,6 +1714,7 @@ "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], "when": {"heading": "forward"}, } ] @@ -1564,6 +1732,7 @@ "to_connector_id": "a", "to_segment_id": "a", "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], "when": {"heading": "forward"}, } ] @@ -1577,8 +1746,8 @@ scaffold={ "prohibited_transitions": [ { - "final_heading": "forward", "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", } ] }, @@ -1591,8 +1760,8 @@ scaffold={ "prohibited_transitions": [ { - "final_heading": "forward", "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", } ] }, @@ -1605,8 +1774,8 @@ scaffold={ "prohibited_transitions": [ { - "final_heading": "forward", "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", } ] }, @@ -1621,8 +1790,8 @@ scaffold={ "prohibited_transitions": [ { + "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "sequence": [{"segment_id": "a", "connector_id": "a"}], } ] }, @@ -1635,8 +1804,8 @@ scaffold={ "prohibited_transitions": [ { + "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "sequence": [{"segment_id": "a", "connector_id": "a"}], } ] }, @@ -1649,8 +1818,8 @@ scaffold={ "prohibited_transitions": [ { + "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "sequence": [{"segment_id": "a", "connector_id": "a"}], } ] }, @@ -1665,8 +1834,8 @@ scaffold={ "prohibited_transitions": [ { - "final_heading": "forward", "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", } ] }, @@ -1679,8 +1848,8 @@ scaffold={ "prohibited_transitions": [ { - "final_heading": "forward", "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", } ] }, @@ -1693,8 +1862,8 @@ scaffold={ "prohibited_transitions": [ { - "final_heading": "forward", "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", } ] }, @@ -1799,7 +1968,7 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"mode": ["vehicle"]}, + "when": {"heading": "forward", "mode": ["vehicle"]}, } ] }, @@ -1814,7 +1983,7 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"mode": ["vehicle"]}, + "when": {"heading": "forward", "mode": ["vehicle"]}, } ] }, @@ -1831,7 +2000,7 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"mode": ["vehicle"]}, + "when": {"heading": "forward", "mode": ["vehicle"]}, } ] }, @@ -1846,7 +2015,7 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"using": ["as_customer"]}, + "when": {"heading": "forward", "using": ["as_customer"]}, } ] }, @@ -1861,7 +2030,7 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"using": ["as_customer"]}, + "when": {"heading": "forward", "using": ["as_customer"]}, } ] }, @@ -1878,7 +2047,7 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"using": ["as_customer"]}, + "when": {"heading": "forward", "using": ["as_customer"]}, } ] }, @@ -1893,7 +2062,7 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"recognized": ["as_permitted"]}, + "when": {"heading": "forward", "recognized": ["as_permitted"]}, } ] }, @@ -1908,7 +2077,7 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"recognized": ["as_permitted"]}, + "when": {"heading": "forward", "recognized": ["as_permitted"]}, } ] }, @@ -1925,7 +2094,7 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"recognized": ["as_permitted"]}, + "when": {"heading": "forward", "recognized": ["as_permitted"]}, } ] }, @@ -1941,6 +2110,7 @@ "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", "when": { + "heading": "forward", "vehicle": [ { "dimension": "height", @@ -1948,7 +2118,7 @@ "value": 0.0, "unit": "in", } - ] + ], }, } ] @@ -1965,6 +2135,7 @@ "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", "when": { + "heading": "forward", "vehicle": [ { "dimension": "height", @@ -1972,7 +2143,7 @@ "value": 0.0, "unit": "in", } - ] + ], }, } ] @@ -1990,7 +2161,17 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"vehicle": [{}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -2005,7 +2186,17 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"vehicle": [{}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -2022,7 +2213,17 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"vehicle": [{}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -2037,7 +2238,17 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"vehicle": [{}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -2054,7 +2265,16 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"vehicle": [{"dimension": "axle_count"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "axle_count", + "comparison": "greater_than", + "value": 0, + } + ], + }, } ] }, @@ -2069,7 +2289,17 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -2084,7 +2314,17 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -2099,7 +2339,17 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -2114,7 +2364,17 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -2131,7 +2391,17 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"vehicle": [{"dimension": "weight"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "weight", + "comparison": "greater_than", + "value": 0.0, + "unit": "oz", + } + ], + }, } ] }, @@ -2146,7 +2416,17 @@ { "sequence": [{"connector_id": "a", "segment_id": "a"}], "final_heading": "forward", - "when": {"vehicle": [{"dimension": "weight"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "weight", + "comparison": "greater_than", + "value": 0.0, + "unit": "oz", + } + ], + }, } ] }, @@ -2284,21 +2564,21 @@ ), Scenario( id="segment::speed_limits[].max_speed.value:required", - scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, mutate=set_at_path("speed_limits[].max_speed.value", None), expected_field="speed_limits[].max_speed.value", expected_check="required", ), Scenario( id="segment::speed_limits[].max_speed.value_0:bounds", - scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, mutate=set_at_path("speed_limits[].max_speed.value", 0), expected_field="speed_limits[].max_speed.value_0", expected_check="bounds", ), Scenario( id="segment::speed_limits[].max_speed.value_1:bounds", - scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, mutate=set_at_path("speed_limits[].max_speed.value", 351), expected_field="speed_limits[].max_speed.value_1", expected_check="bounds", @@ -2319,126 +2599,243 @@ ), Scenario( id="segment::speed_limits[].min_speed.value:required", - scaffold={"speed_limits": [{"min_speed": {"unit": "mph", "value": 1}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "min_speed": {"value": 1, "unit": "mph"}, + } + ] + }, mutate=set_at_path("speed_limits[].min_speed.value", None), expected_field="speed_limits[].min_speed.value", expected_check="required", ), Scenario( id="segment::speed_limits[].min_speed.value_0:bounds", - scaffold={"speed_limits": [{"min_speed": {"unit": "mph", "value": 1}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "min_speed": {"value": 1, "unit": "mph"}, + } + ] + }, mutate=set_at_path("speed_limits[].min_speed.value", 0), expected_field="speed_limits[].min_speed.value_0", expected_check="bounds", ), Scenario( id="segment::speed_limits[].min_speed.value_1:bounds", - scaffold={"speed_limits": [{"min_speed": {"unit": "mph", "value": 1}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "min_speed": {"value": 1, "unit": "mph"}, + } + ] + }, mutate=set_at_path("speed_limits[].min_speed.value", 351), expected_field="speed_limits[].min_speed.value_1", expected_check="bounds", ), Scenario( id="segment::speed_limits[].min_speed.unit:required", - scaffold={"speed_limits": [{"min_speed": {"value": 1, "unit": "mph"}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "min_speed": {"value": 1, "unit": "mph"}, + } + ] + }, mutate=set_at_path("speed_limits[].min_speed.unit", None), expected_field="speed_limits[].min_speed.unit", expected_check="required", ), Scenario( id="segment::speed_limits[].min_speed.unit:enum", - scaffold={"speed_limits": [{"min_speed": {"value": 1, "unit": "mph"}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "min_speed": {"value": 1, "unit": "mph"}, + } + ] + }, mutate=set_at_path("speed_limits[].min_speed.unit", "__INVALID__"), expected_field="speed_limits[].min_speed.unit", expected_check="enum", ), Scenario( id="segment::speed_limits[].between:linear_range_length", - scaffold={"speed_limits": [{"between": [0.0, 1.0]}]}, + scaffold={ + "speed_limits": [ + {"max_speed": {"value": 1, "unit": "mph"}, "between": [0.0, 1.0]} + ] + }, mutate=set_at_path("speed_limits[].between", [0.5]), expected_field="speed_limits[].between", expected_check="linear_range_length", ), Scenario( id="segment::speed_limits[].between:linear_range_bounds", - scaffold={"speed_limits": [{"between": [0.0, 1.0]}]}, + scaffold={ + "speed_limits": [ + {"max_speed": {"value": 1, "unit": "mph"}, "between": [0.0, 1.0]} + ] + }, mutate=set_at_path("speed_limits[].between", [1.5, 2.0]), expected_field="speed_limits[].between", expected_check="linear_range_bounds", ), Scenario( id="segment::speed_limits[].between:linear_range_order", - scaffold={"speed_limits": [{"between": [0.0, 1.0]}]}, + scaffold={ + "speed_limits": [ + {"max_speed": {"value": 1, "unit": "mph"}, "between": [0.0, 1.0]} + ] + }, mutate=set_at_path("speed_limits[].between", [0.8, 0.2]), expected_field="speed_limits[].between", expected_check="linear_range_order", ), Scenario( id="segment::speed_limits[].when.heading:enum", - scaffold={"speed_limits": [{"when": {"heading": "forward"}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": {"heading": "forward"}, + } + ] + }, mutate=set_at_path("speed_limits[].when.heading", "__INVALID__"), expected_field="speed_limits[].when.heading", expected_check="enum", ), Scenario( id="segment::speed_limits[].when.mode_min_length:array_min_length", - scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": {"heading": "forward", "mode": ["vehicle"]}, + } + ] + }, mutate=set_at_path("speed_limits[].when.mode", []), expected_field="speed_limits[].when.mode_min_length", expected_check="array_min_length", ), Scenario( id="segment::speed_limits[].when.mode_unique:struct_unique", - scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": {"heading": "forward", "mode": ["vehicle"]}, + } + ] + }, mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.mode"), expected_field="speed_limits[].when.mode_unique", expected_check="struct_unique", ), Scenario( id="segment::speed_limits[].when.mode[]:enum", - scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": {"heading": "forward", "mode": ["vehicle"]}, + } + ] + }, mutate=set_at_path("speed_limits[].when.mode[]", "__INVALID__"), expected_field="speed_limits[].when.mode[]", expected_check="enum", ), Scenario( id="segment::speed_limits[].when.using_min_length:array_min_length", - scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": {"heading": "forward", "using": ["as_customer"]}, + } + ] + }, mutate=set_at_path("speed_limits[].when.using", []), expected_field="speed_limits[].when.using_min_length", expected_check="array_min_length", ), Scenario( id="segment::speed_limits[].when.using_unique:struct_unique", - scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": {"heading": "forward", "using": ["as_customer"]}, + } + ] + }, mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.using"), expected_field="speed_limits[].when.using_unique", expected_check="struct_unique", ), Scenario( id="segment::speed_limits[].when.using[]:enum", - scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": {"heading": "forward", "using": ["as_customer"]}, + } + ] + }, mutate=set_at_path("speed_limits[].when.using[]", "__INVALID__"), expected_field="speed_limits[].when.using[]", expected_check="enum", ), Scenario( id="segment::speed_limits[].when.recognized_min_length:array_min_length", - scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } + ] + }, mutate=set_at_path("speed_limits[].when.recognized", []), expected_field="speed_limits[].when.recognized_min_length", expected_check="array_min_length", ), Scenario( id="segment::speed_limits[].when.recognized_unique:struct_unique", - scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } + ] + }, mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.recognized"), expected_field="speed_limits[].when.recognized_unique", expected_check="struct_unique", ), Scenario( id="segment::speed_limits[].when.recognized[]:enum", - scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } + ] + }, mutate=set_at_path("speed_limits[].when.recognized[]", "__INVALID__"), expected_field="speed_limits[].when.recognized[]", expected_check="enum", @@ -2448,7 +2845,9 @@ scaffold={ "speed_limits": [ { + "max_speed": {"value": 1, "unit": "mph"}, "when": { + "heading": "forward", "vehicle": [ { "dimension": "height", @@ -2456,8 +2855,8 @@ "value": 0.0, "unit": "in", } - ] - } + ], + }, } ] }, @@ -2470,7 +2869,9 @@ scaffold={ "speed_limits": [ { + "max_speed": {"value": 1, "unit": "mph"}, "when": { + "heading": "forward", "vehicle": [ { "dimension": "height", @@ -2478,8 +2879,8 @@ "value": 0.0, "unit": "in", } - ] - } + ], + }, } ] }, @@ -2489,28 +2890,96 @@ ), Scenario( id="segment::speed_limits[].when.vehicle[].dimension:required", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=set_at_path("speed_limits[].when.vehicle[].dimension", None), expected_field="speed_limits[].when.vehicle[].dimension", expected_check="required", ), Scenario( id="segment::speed_limits[].when.vehicle[].dimension:enum", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=set_at_path("speed_limits[].when.vehicle[].dimension", "__INVALID__"), expected_field="speed_limits[].when.vehicle[].dimension", expected_check="enum", ), Scenario( id="segment::speed_limits[].when.vehicle[].comparison:required", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=set_at_path("speed_limits[].when.vehicle[].comparison", None), expected_field="speed_limits[].when.vehicle[].comparison", expected_check="required", ), Scenario( id="segment::speed_limits[].when.vehicle[].comparison:enum", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=set_at_path("speed_limits[].when.vehicle[].comparison", "__INVALID__"), expected_field="speed_limits[].when.vehicle[].comparison", expected_check="enum", @@ -2518,7 +2987,21 @@ Scenario( id="segment::speed_limits[].when.vehicle[].value_0:required", scaffold={ - "speed_limits": [{"when": {"vehicle": [{"dimension": "axle_count"}]}}] + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "axle_count", + "comparison": "greater_than", + "value": 0, + } + ], + }, + } + ] }, mutate=set_at_path("speed_limits[].when.vehicle[].value", None), expected_field="speed_limits[].when.vehicle[].value_0", @@ -2526,42 +3009,144 @@ ), Scenario( id="segment::speed_limits[].when.vehicle[].value_1:required", - scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=set_at_path("speed_limits[].when.vehicle[].value", None), expected_field="speed_limits[].when.vehicle[].value_1", expected_check="required", ), Scenario( id="segment::speed_limits[].when.vehicle[].value:bounds", - scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=set_at_path("speed_limits[].when.vehicle[].value", -1.0), expected_field="speed_limits[].when.vehicle[].value", expected_check="bounds", ), Scenario( id="segment::speed_limits[].when.vehicle[].unit_0:required", - scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, - mutate=set_at_path("speed_limits[].when.vehicle[].unit", None), - expected_field="speed_limits[].when.vehicle[].unit_0", - expected_check="required", - ), - Scenario( - id="segment::speed_limits[].when.vehicle[].unit_0:enum", - scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, - mutate=set_at_path("speed_limits[].when.vehicle[].unit", "__INVALID__"), - expected_field="speed_limits[].when.vehicle[].unit_0", - expected_check="enum", - ), - Scenario( - id="segment::speed_limits[].when.vehicle[].unit_1:required", - scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "weight"}]}}]}, - mutate=set_at_path("speed_limits[].when.vehicle[].unit", None), - expected_field="speed_limits[].when.vehicle[].unit_1", + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, + mutate=set_at_path("speed_limits[].when.vehicle[].unit", None), + expected_field="speed_limits[].when.vehicle[].unit_0", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].unit_0:enum", + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, + mutate=set_at_path("speed_limits[].when.vehicle[].unit", "__INVALID__"), + expected_field="speed_limits[].when.vehicle[].unit_0", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].unit_1:required", + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "weight", + "comparison": "greater_than", + "value": 0.0, + "unit": "oz", + } + ], + }, + } + ] + }, + mutate=set_at_path("speed_limits[].when.vehicle[].unit", None), + expected_field="speed_limits[].when.vehicle[].unit_1", expected_check="required", ), Scenario( id="segment::speed_limits[].when.vehicle[].unit_1:enum", - scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "weight"}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "weight", + "comparison": "greater_than", + "value": 0.0, + "unit": "oz", + } + ], + }, + } + ] + }, mutate=set_at_path("speed_limits[].when.vehicle[].unit", "__INVALID__"), expected_field="speed_limits[].when.vehicle[].unit_1", expected_check="enum", @@ -2624,7 +3209,24 @@ ), Scenario( id="segment::model:forbid_if:0", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_forbid_if( row, ["unit"], @@ -2638,7 +3240,24 @@ ), Scenario( id="segment::model:require_if:1", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2652,7 +3271,24 @@ ), Scenario( id="segment::model:require_if:2", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2666,7 +3302,24 @@ ), Scenario( id="segment::model:require_if:3", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2680,7 +3333,24 @@ ), Scenario( id="segment::model:require_if:4", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2694,7 +3364,11 @@ ), Scenario( id="segment::model:require_any_of:5", - scaffold={"access_restrictions": [{"when": {}}]}, + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"heading": "forward"}} + ] + }, mutate=lambda row: mutate_require_any_of( row, ["heading", "during", "mode", "using", "recognized", "vehicle"], @@ -2706,7 +3380,17 @@ ), Scenario( id="segment::model:require_any_of:6", - scaffold={"destinations": [{}]}, + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + } + ] + }, mutate=lambda row: mutate_require_any_of( row, ["labels", "symbols"], array_path="destinations" ), @@ -2715,7 +3399,25 @@ ), Scenario( id="segment::model:forbid_if:7", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_forbid_if( row, ["unit"], @@ -2729,7 +3431,25 @@ ), Scenario( id="segment::model:require_if:8", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2743,7 +3463,25 @@ ), Scenario( id="segment::model:require_if:9", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2757,7 +3495,25 @@ ), Scenario( id="segment::model:require_if:10", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2771,7 +3527,25 @@ ), Scenario( id="segment::model:require_if:11", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2785,7 +3559,15 @@ ), Scenario( id="segment::model:require_any_of:12", - scaffold={"prohibited_transitions": [{"when": {}}]}, + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"heading": "forward"}, + } + ] + }, mutate=lambda row: mutate_require_any_of( row, ["heading", "during", "mode", "using", "recognized", "vehicle"], @@ -2797,7 +3579,24 @@ ), Scenario( id="segment::model:forbid_if:13", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_forbid_if( row, ["unit"], @@ -2811,7 +3610,24 @@ ), Scenario( id="segment::model:require_if:14", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2825,7 +3641,24 @@ ), Scenario( id="segment::model:require_if:15", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2839,7 +3672,24 @@ ), Scenario( id="segment::model:require_if:16", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2853,7 +3703,24 @@ ), Scenario( id="segment::model:require_if:17", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -2867,7 +3734,14 @@ ), Scenario( id="segment::model:require_any_of:18", - scaffold={"speed_limits": [{"when": {}}]}, + scaffold={ + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "when": {"heading": "forward"}, + } + ] + }, mutate=lambda row: mutate_require_any_of( row, ["heading", "during", "mode", "using", "recognized", "vehicle"], @@ -2879,7 +3753,7 @@ ), Scenario( id="segment::model:require_any_of:19", - scaffold={"speed_limits": [{}]}, + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, mutate=lambda row: mutate_require_any_of( row, ["max_speed.value", "min_speed.value"], array_path="speed_limits" ), @@ -3092,7 +3966,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py index 08fafe741..6f109c2d2 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py @@ -227,14 +227,14 @@ ), Scenario( id="segment::sources[].property:required", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", None), expected_field="sources[].property", expected_check="required", ), Scenario( id="segment::sources[].property:json_pointer", - scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, mutate=set_at_path("sources[].property", "no-slash"), expected_field="sources[].property", expected_check="json_pointer", @@ -396,7 +396,10 @@ id="segment::access_restrictions[].when.mode_min_length:array_min_length", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "mode": ["vehicle"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.mode", []), @@ -407,7 +410,10 @@ id="segment::access_restrictions[].when.mode_unique:struct_unique", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "mode": ["vehicle"]}, + } ] }, mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), @@ -418,7 +424,10 @@ id="segment::access_restrictions[].when.mode[]:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "mode": ["vehicle"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.mode[]", "__INVALID__"), @@ -429,7 +438,10 @@ id="segment::access_restrictions[].when.using_min_length:array_min_length", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "using": ["as_customer"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.using", []), @@ -440,7 +452,10 @@ id="segment::access_restrictions[].when.using_unique:struct_unique", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "using": ["as_customer"]}, + } ] }, mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), @@ -451,7 +466,10 @@ id="segment::access_restrictions[].when.using[]:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "using": ["as_customer"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.using[]", "__INVALID__"), @@ -462,7 +480,10 @@ id="segment::access_restrictions[].when.recognized_min_length:array_min_length", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.recognized", []), @@ -473,7 +494,10 @@ id="segment::access_restrictions[].when.recognized_unique:struct_unique", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } ] }, mutate=lambda row: mutate_unique_items( @@ -486,7 +510,10 @@ id="segment::access_restrictions[].when.recognized[]:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + { + "access_type": "allowed", + "when": {"heading": "forward", "recognized": ["as_permitted"]}, + } ] }, mutate=set_at_path("access_restrictions[].when.recognized[]", "__INVALID__"), @@ -500,6 +527,7 @@ { "access_type": "allowed", "when": { + "heading": "forward", "vehicle": [ { "dimension": "height", @@ -507,7 +535,7 @@ "value": 0.0, "unit": "in", } - ] + ], }, } ] @@ -523,6 +551,7 @@ { "access_type": "allowed", "when": { + "heading": "forward", "vehicle": [ { "dimension": "height", @@ -530,7 +559,7 @@ "value": 0.0, "unit": "in", } - ] + ], }, } ] @@ -545,7 +574,20 @@ id="segment::access_restrictions[].when.vehicle[].dimension:required", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].dimension", None), @@ -556,7 +598,20 @@ id="segment::access_restrictions[].when.vehicle[].dimension:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path( @@ -569,7 +624,20 @@ id="segment::access_restrictions[].when.vehicle[].comparison:required", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path("access_restrictions[].when.vehicle[].comparison", None), @@ -580,7 +648,20 @@ id="segment::access_restrictions[].when.vehicle[].comparison:enum", scaffold={ "access_restrictions": [ - {"access_type": "allowed", "when": {"vehicle": [{}]}} + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } ] }, mutate=set_at_path( @@ -595,7 +676,16 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "axle_count"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "axle_count", + "comparison": "greater_than", + "value": 0, + } + ], + }, } ] }, @@ -609,7 +699,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -623,7 +723,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -637,7 +747,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -651,7 +771,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "height"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, } ] }, @@ -665,7 +795,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "weight"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "weight", + "comparison": "greater_than", + "value": 0.0, + "unit": "oz", + } + ], + }, } ] }, @@ -679,7 +819,17 @@ "access_restrictions": [ { "access_type": "allowed", - "when": {"vehicle": [{"dimension": "weight"}]}, + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "weight", + "comparison": "greater_than", + "value": 0.0, + "unit": "oz", + } + ], + }, } ] }, @@ -703,35 +853,39 @@ ), Scenario( id="segment::connectors[].connector_id:required", - scaffold={"connectors": [{"connector_id": "a"}]}, + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, mutate=set_at_path("connectors[].connector_id", None), expected_field="connectors[].connector_id", expected_check="required", ), Scenario( id="segment::connectors[].connector_id:string_min_length", - scaffold={"connectors": [{"connector_id": "a"}]}, + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, mutate=set_at_path("connectors[].connector_id", ""), expected_field="connectors[].connector_id", expected_check="string_min_length", ), Scenario( id="segment::connectors[].connector_id:no_whitespace", - scaffold={"connectors": [{"connector_id": "a"}]}, + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, mutate=set_at_path("connectors[].connector_id", "has whitespace"), expected_field="connectors[].connector_id", expected_check="no_whitespace", ), Scenario( id="segment::connectors[].at_0:bounds", - scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + scaffold={ + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1"}] + }, mutate=set_at_path("connectors[].at", -1.0), expected_field="connectors[].at_0", expected_check="bounds", ), Scenario( id="segment::connectors[].at_1:bounds", - scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + scaffold={ + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1"}] + }, mutate=set_at_path("connectors[].at", 2.0), expected_field="connectors[].at_1", expected_check="bounds", @@ -921,7 +1075,7 @@ Scenario( id="segment::names.rules[].value:required", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", None), expected_field="names.rules[].value", @@ -930,7 +1084,7 @@ Scenario( id="segment::names.rules[].value:string_min_length", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", ""), expected_field="names.rules[].value", @@ -939,7 +1093,7 @@ Scenario( id="segment::names.rules[].value:stripped", scaffold={ - "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} }, mutate=set_at_path("names.rules[].value", " has spaces "), expected_field="names.rules[].value", @@ -984,7 +1138,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -1002,7 +1156,7 @@ { "value": "a", "variant": "common", - "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, } ], } @@ -1135,7 +1289,24 @@ ), Scenario( id="segment::model:forbid_if:0", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_forbid_if( row, ["unit"], @@ -1149,7 +1320,24 @@ ), Scenario( id="segment::model:require_if:1", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -1163,7 +1351,24 @@ ), Scenario( id="segment::model:require_if:2", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -1177,7 +1382,24 @@ ), Scenario( id="segment::model:require_if:3", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -1191,7 +1413,24 @@ ), Scenario( id="segment::model:require_if:4", - scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "heading": "forward", + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ] + }, mutate=lambda row: mutate_require_if( row, ["unit"], @@ -1205,7 +1444,11 @@ ), Scenario( id="segment::model:require_any_of:5", - scaffold={"access_restrictions": [{"when": {}}]}, + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"heading": "forward"}} + ] + }, mutate=lambda row: mutate_require_any_of( row, ["heading", "during", "mode", "using", "recognized", "vehicle"], @@ -1421,7 +1664,12 @@ def _assert_scenario( ) -> None: expected = (scenario.expected_field, scenario.expected_check) if scenario.id in validation_results.skipped: - pytest.skip(validation_results.skipped[scenario.id]) + # An unbuildable scenario exercises nothing; fail loud rather than skip + # (a skip reads as a pass and hides codegen/scaffold gaps). + pytest.fail( + f"unbuildable scenario {scenario.id!r}: " + f"{validation_results.skipped[scenario.id]}" + ) valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) assert expected not in valid_violations invalid_violations = validation_results.violations.get( diff --git a/packages/overture-schema-pyspark/tests/test_harness.py b/packages/overture-schema-pyspark/tests/test_harness.py index f77135dc0..3fa07d628 100644 --- a/packages/overture-schema-pyspark/tests/test_harness.py +++ b/packages/overture-schema-pyspark/tests/test_harness.py @@ -10,7 +10,9 @@ from pyspark.sql import functions as F from pyspark.sql.types import ( ArrayType, + DoubleType, IntegerType, + MapType, StringType, StructField, StructType, @@ -20,6 +22,7 @@ assert_schema_covers_checks, build_scenario_map, build_scenario_rows, + coerce_to_schema, index_violations, sanitize_row, scenario_uuid, @@ -158,9 +161,13 @@ def test_scenario_creates_valid_and_invalid_rows(self) -> None: assert rows[1]["_scenario_id"] == scenario_uuid("f::x:required::valid") assert rows[2]["_scenario_id"] == scenario_uuid("f::x:required::invalid") - def test_valid_row_uses_base_row_not_scaffold(self) -> None: - """Valid row is a copy of base_row, not the scaffold-merged row.""" - base = {"id": "orig", "theme": "t", "type": "ty", "items": [{"a": 1, "b": 2}]} + def test_valid_row_uses_scaffold_merged_row(self) -> None: + """Valid row merges the scaffold so the target is present and exercised. + + The scaffold reaches a target the base row lacks; the valid row must + carry it (with no mutation) or the no-violation assertion is vacuous. + """ + base = {"id": "orig", "theme": "t", "type": "ty"} scenarios = [ Scenario( id="f::items[].a:required", @@ -174,11 +181,38 @@ def test_valid_row_uses_base_row_not_scaffold(self) -> None: base, scenarios, model_name="f" ) assert len(rows) == 3 - # Valid row uses base_row (preserves all fields in items element) - assert rows[1]["items"] == [{"a": 1, "b": 2}] - # Invalid row uses scaffold-merged row + # Valid row carries the scaffold's target value (no mutation). + assert rows[1]["items"] == [{"a": 0}] + # Invalid row applies the mutation on top of the scaffold. assert rows[2]["items"][0]["a"] is None + def test_valid_scaffold_overrides_scaffold_for_valid_row(self) -> None: + """`valid_scaffold`, when set, builds the valid row instead of `scaffold`. + + The invalid row still uses `scaffold` -- only the valid row diverges, + e.g. to seed a literal alternative the mutation scaffold can't carry. + """ + base = {"id": "orig", "theme": "t", "type": "ty"} + scenarios = [ + Scenario( + id="f::kind:literal", + scaffold={"kind": "synthesized"}, + mutate=set_at_path("kind", None), + expected_field="kind", + expected_check="required", + valid_scaffold={"kind": "literal-alt"}, + ), + ] + rows, _scenario_map, _skipped = build_scenario_rows( + base, scenarios, model_name="f" + ) + valid_id = scenario_uuid("f::kind:literal::valid") + invalid_id = scenario_uuid("f::kind:literal::invalid") + valid_row = next(r for r in rows if r["_scenario_id"] == valid_id) + invalid_row = next(r for r in rows if r["_scenario_id"] == invalid_id) + assert valid_row["kind"] == "literal-alt" + assert invalid_row["kind"] is None + def test_scaffold_merged_onto_invalid_row(self) -> None: base_row = {"id": "x", "a": 1} s = Scenario( @@ -219,6 +253,52 @@ def test_applies_scaffold_then_mutation(self) -> None: assert invalid_row["a"] is None +class TestCoerceToSchema: + """Ints land as floats in float columns; Spark would otherwise null them.""" + + def test_int_in_double_column_becomes_float(self) -> None: + schema = StructType([StructField("v", DoubleType(), True)]) + result = coerce_to_schema({"v": 0}, schema) + assert isinstance(result["v"], float) + assert result["v"] == 0.0 + + def test_bool_in_double_column_left_alone(self) -> None: + """`bool` is an `int` subclass but maps to BooleanType, not a float.""" + schema = StructType([StructField("v", DoubleType(), True)]) + assert coerce_to_schema({"v": True}, schema)["v"] is True + + def test_int_column_unchanged(self) -> None: + schema = StructType([StructField("v", IntegerType(), True)]) + assert coerce_to_schema({"v": 5}, schema)["v"] == 5 + + def test_nested_struct_array_and_map_coerced(self) -> None: + schema = StructType( + [ + StructField( + "items", + ArrayType(StructType([StructField("v", DoubleType(), True)])), + True, + ), + StructField("m", MapType(StringType(), DoubleType()), True), + ] + ) + result = coerce_to_schema({"items": [{"v": 1}], "m": {"k": 2}}, schema) + assert result["items"][0]["v"] == 1.0 + assert isinstance(result["items"][0]["v"], float) + assert result["m"]["k"] == 2.0 + + def test_none_and_missing_fields_preserved(self) -> None: + schema = StructType( + [ + StructField("v", DoubleType(), True), + StructField("w", DoubleType(), True), + ] + ) + # `w` absent from the value stays absent (Spark fills null). + result = coerce_to_schema({"v": None}, schema) + assert result == {"v": None} + + class TestSanitizeRow: def test_nested_geometry_converted(self) -> None: row = {