diff --git a/HISTORY.rst b/HISTORY.rst index 25abd5d..6fac461 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -5,6 +5,14 @@ History .. to_doc +--------------------- +0.10.12.dev0 +--------------------- + +* Add ``shed-tools validate`` to check a tools file structurally and against a + Tool Shed (every pinned ``changeset_revision``) without installing into Galaxy. + `Issue 236`_ + --------------------- 0.10.11 (2025-09-26) --------------------- @@ -282,6 +290,7 @@ History and adapt them for usage as a library. .. github_links +.. _Issue 236: https://github.com/galaxyproject/ephemeris/issues/236 .. _Pull Request 222: https://github.com/galaxyproject/ephemeris/pull/222 .. _Pull Request 220: https://github.com/galaxyproject/ephemeris/pull/220 .. _Pull Request 212: https://github.com/galaxyproject/ephemeris/pull/212 diff --git a/dev-requirements.txt b/dev-requirements.txt index 813b898..fe3595c 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -4,10 +4,8 @@ coverage pytest pytest-cov galaxy-test-driver -# Pin galaxy-app to the same release line as the rest of the Galaxy stack. -# It is only pulled in transitively (via galaxy-test-driver), so pip is free to -# backtrack it to an older release than galaxy-data/galaxy-util, producing a -# skewed install whose galaxy.model import fails during test collection. +# Pulled in only transitively via galaxy-test-driver; pin it to keep the Galaxy +# stack on one release line (otherwise pip can backtrack it, skewing the install). galaxy-app>=26 #Building Docs diff --git a/src/ephemeris/_config_models.py b/src/ephemeris/_config_models.py index 57b67b2..3bb1cb9 100644 --- a/src/ephemeris/_config_models.py +++ b/src/ephemeris/_config_models.py @@ -10,7 +10,7 @@ StrOrPath = Path | str -class RepositoryInstallTarget(BaseModel): +class RepositoryInstallTarget(BaseModel, extra=Extra.forbid): name: str owner: str tool_shed_url: str | None = None @@ -22,7 +22,7 @@ class RepositoryInstallTarget(BaseModel): install_resolver_dependencies: bool | None = None -class RepositoryInstallTargets(BaseModel): +class RepositoryInstallTargets(BaseModel, extra=Extra.forbid): """ """ api_key: str | None = None @@ -72,6 +72,14 @@ def _read_yaml(path: StrOrPath): return yaml.safe_load(f) +def _read_yaml_mapping(path: StrOrPath) -> dict: + data = _read_yaml(path) + if not isinstance(data, dict): + kind = "empty file" if data is None else f"a {type(data).__name__}" + raise ValueError(f"Expected a YAML mapping at the top level of '{path}', got {kind}.") + return data + + def read_data_managers(path: StrOrPath) -> DataManagers: return DataManagers(root=_read_yaml(path)) @@ -81,4 +89,4 @@ def read_genomes(path: StrOrPath) -> Genomes: def read_tools(path: StrOrPath) -> RepositoryInstallTargets: - return RepositoryInstallTargets(**_read_yaml(path)) + return RepositoryInstallTargets(**_read_yaml_mapping(path)) diff --git a/src/ephemeris/run_data_managers.py b/src/ephemeris/run_data_managers.py index efd738f..5ca22ca 100644 --- a/src/ephemeris/run_data_managers.py +++ b/src/ephemeris/run_data_managers.py @@ -280,9 +280,6 @@ def run( if not log: log = logging.getLogger() - # Galaxy 26.0 requires a valid history to execute tools, so always run the - # data managers in a (default-named) history rather than relying on Galaxy - # to supply one implicitly. history_id = get_or_create_history(history_name or "Ephemeris Data Manager History", self.gi)["id"] def run_jobs(jobs, skipped_jobs): diff --git a/src/ephemeris/shed_tools.py b/src/ephemeris/shed_tools.py index 60f40c2..5bfaa37 100644 --- a/src/ephemeris/shed_tools.py +++ b/src/ephemeris/shed_tools.py @@ -2,12 +2,16 @@ A tool to automate installation of tool repositories from a Galaxy Tool Shed into an instance of Galaxy. -Shed-tools has three commands: update, test and install. +Shed-tools has four commands: update, test, install and validate. Update simply updates all the tools in a Galaxy given connection details on the command line. Test tests the specified tools in the Galaxy Instance. +Validate checks a tools file structurally and, unless ``--structural-only`` is given, +verifies every repository and pinned ``changeset_revision`` against a Tool Shed. It needs +no Galaxy connection, so it is suitable for CI / pre-commit on revision-pinned lock files. + Install allows installation of tools in multiple ways. Galaxy instance details and the installed tools can be provided in one of three ways: @@ -58,6 +62,7 @@ verify_tool, ) from galaxy.util import unicodify +from pydantic import ValidationError from typing_extensions import ( NamedTuple, NotRequired, @@ -68,6 +73,10 @@ get_galaxy_connection, load_yaml_file, ) +from ._config_models import ( + read_tools, + RepositoryInstallTargets, +) from .ephemeris_log import ( disable_external_library_logging, setup_global_logger, @@ -82,8 +91,11 @@ complete_repo_information, flatten_repo_info, VALID_KEYS, + validate_against_tool_shed, ) +DEFAULT_TOOL_SHED_URL = "https://toolshed.g2.bx.psu.edu/" + NON_TERMINAL_REPOSITORY_STATES = { "New", "Cloning", @@ -184,7 +196,7 @@ def install_repositories( repositories: list[InstallRepoDict], log=log, force_latest_revision: bool = False, - default_toolshed: str = "https://toolshed.g2.bx.psu.edu/", + default_toolshed: str = DEFAULT_TOOL_SHED_URL, default_install_tool_dependencies: bool = False, default_install_resolver_dependencies: bool = True, default_install_repository_dependencies: bool = True, @@ -687,10 +699,56 @@ def args_to_repos(args) -> list[InstallRepoDict]: return repos +def validate(args, log) -> int: + """Validate a tools file structurally and (unless --structural-only) against a Tool Shed. + + Requires no Galaxy connection. Returns a process exit code (0 on success). + """ + default_toolshed_url = args.tool_shed_url or DEFAULT_TOOL_SHED_URL + tool_file = args.tools_file or args.tool_list_file + + # Structural validation: parse into the RepositoryInstallTargets pydantic model. + try: + if tool_file: + targets = read_tools(tool_file) + else: + repos = args_to_repos(args) + if not repos: + log.error("No tools to validate. Provide a tools file, --yaml-tool, or --name/--owner.") + return 1 + targets = RepositoryInstallTargets(tools=repos) # type: ignore[arg-type] + except ValidationError as e: + log.error(f"Structural validation failed for '{tool_file or 'provided tools'}':") + for error in e.errors(): + location = ".".join(str(part) for part in error["loc"]) + log.error(f" {location}: {error['msg']}") + return 1 + except (OSError, yaml.YAMLError, ValueError) as e: + # ValueError covers a non-mapping / empty YAML root (see read_tools). Pydantic's + # ValidationError is a ValueError subclass but is handled by the clause above. + log.error(f"Could not read tools file '{tool_file}': {unicodify(e)}") + return 1 + + repo_dicts = [target.model_dump(exclude_none=True) for target in targets.tools] + count = len(repo_dicts) + if args.structural_only: + log.info(f"Structure valid: {count} repositor{'y' if count == 1 else 'ies'}.") + return 0 + + errors = validate_against_tool_shed(repo_dicts, default_toolshed_url, log=log) + if errors: + log.error(f"Validation failed with {len(errors)} error(s).") + return 1 + log.info(f"Validated {count} repositor{'y' if count == 1 else 'ies'} successfully.") + return 0 + + def main(argv=None): disable_external_library_logging() args = parser().parse_args(argv) log = setup_global_logger(name=__name__, log_file=args.log_file, verbose=args.verbose) + if args.action == "validate": + return validate(args, log) gi = get_galaxy_connection(args, file=args.tool_list_file, log=log, login_required=True) install_repository_manager = InstallRepositoryManager(gi) @@ -759,7 +817,7 @@ def main(argv=None): if __name__ == "__main__": try: - main() + sys.exit(main()) except ToolInstallationException as e: log.error(str(e)) sys.exit(1) diff --git a/src/ephemeris/shed_tools_args.py b/src/ephemeris/shed_tools_args.py index a25dac5..05c4ddb 100644 --- a/src/ephemeris/shed_tools_args.py +++ b/src/ephemeris/shed_tools_args.py @@ -3,6 +3,8 @@ import argparse from .common_parser import ( + add_log_file_argument, + add_verbosity_argument, get_common_args, HideUnderscoresHelpFormatter, ) @@ -37,6 +39,8 @@ def parser(): test_existing=False, parallel_tests=1, client_test_config=None, + tools_file=None, + structural_only=False, ) # SUBPARSERS @@ -61,17 +65,33 @@ def parser(): parents=[common_arguments], ) + # The validate subparser deliberately does not inherit the Galaxy connection + # arguments: it talks only to the Tool Shed (or nothing, with --structural-only). + validate_arguments = argparse.ArgumentParser(add_help=False) + validate_general_group = validate_arguments.add_argument_group("General options") + add_verbosity_argument(validate_general_group) + add_log_file_argument(validate_general_group) + validate_command_parser = subparsers.add_parser( + "validate", + help="Validate a tools file against a Tool Shed without installing into Galaxy. " + "Use shed-tools validate --help for more information", + formatter_class=HideUnderscoresHelpFormatter, + parents=[validate_arguments], + ) + # SUBPARSER DEFAULTS update_command_parser.set_defaults(action="update") test_command_parser.set_defaults(action="test") install_command_parser.set_defaults(action="install") + validate_command_parser.set_defaults(action="validate") # COMMON OPTIONS for command_parser in [ update_command_parser, install_command_parser, test_command_parser, + validate_command_parser, ]: command_parser.add_argument( "-t", @@ -281,4 +301,19 @@ def parser(): help="Annotate expectations about tools in client testing YAML " "configuration file.", ) + # OPTIONS UNIQUE TO VALIDATE + validate_command_parser.add_argument( + "tools_file", + nargs="?", + default=None, + help="Path to the tools YAML / .yml.lock file to validate " "(equivalent to passing it with --tools-file).", + ) + validate_command_parser.add_argument( + "--structural-only", + "--structural_only", + action="store_true", + dest="structural_only", + help="Only validate the structure of the tools file; do not query the Tool Shed.", + ) + return shed_parser diff --git a/src/ephemeris/shed_tools_methods.py b/src/ephemeris/shed_tools_methods.py index 700afb3..38cc2d6 100644 --- a/src/ephemeris/shed_tools_methods.py +++ b/src/ephemeris/shed_tools_methods.py @@ -60,6 +60,16 @@ def format_tool_shed_url(tool_shed_url: str) -> str: return formatted_tool_shed_url +def get_installable_revisions(repository: "InstallRepoDict") -> list[str]: + """Query the tool shed for the ordered list of installable revisions of a repository. + + ``repository`` must have ``tool_shed_url`` already formatted (see + :func:`format_tool_shed_url`). Talks only to the Tool Shed, no Galaxy connection. + """ + ts = ToolShedInstance(url=repository["tool_shed_url"]) + return ts.repositories.get_ordered_installable_revisions(repository["name"], repository["owner"]) + + def get_changeset_revisions(repository: "InstallRepoDict", force_latest_revision: bool = False): """ Select the correct changeset revision for a repository, @@ -69,11 +79,8 @@ def get_changeset_revisions(repository: "InstallRepoDict", force_latest_revision """ # Do not connect to the internet when not necessary if repository.get("changeset_revision") is None or force_latest_revision: - ts = ToolShedInstance(url=repository["tool_shed_url"]) # Get the set revision or set it to the latest installable revision - installable_revisions = ts.repositories.get_ordered_installable_revisions( - repository["name"], repository["owner"] - ) + installable_revisions = get_installable_revisions(repository) if not installable_revisions: # raise LookupError(f"Repo does not exist in tool shed: {repository}") repository["changeset_revision"] = installable_revisions[-1] @@ -81,6 +88,50 @@ def get_changeset_revisions(repository: "InstallRepoDict", force_latest_revision return repository +def validate_against_tool_shed( + repositories: Iterable[dict], + default_toolshed_url: str, + log=None, +) -> list[str]: + """Check each repository (and every pinned changeset revision) against the Tool Shed. + + Requires no Galaxy connection. Returns a list of human-readable error messages, + empty when every repository exists and all pinned revisions are installable. Each + repository is queried once; both ``revisions`` (list) and a single + ``changeset_revision`` are checked for membership in the installable revisions. + """ + errors: list[str] = [] + for repo_info in repositories: + repo: InstallRepoDict = dict(repo_info) # type: ignore[assignment] + tool_shed_url = format_tool_shed_url(repo.get("tool_shed_url") or default_toolshed_url) + repo["tool_shed_url"] = tool_shed_url + name = repo["name"] + owner = repo["owner"] + label = f"{owner}/{name}" + pinned_revisions = list(repo.get("revisions") or []) + changeset_revision = repo.get("changeset_revision") + if changeset_revision: + pinned_revisions.append(changeset_revision) + try: + installable_revisions = get_installable_revisions(repo) + except Exception as e: + errors.append(f"{label}: failed to query tool shed {tool_shed_url}: {e}") + continue + if not installable_revisions: + errors.append(f"{label}: repository does not exist on tool shed {tool_shed_url}") + continue + for revision in pinned_revisions: + if revision not in installable_revisions: + errors.append( + f"{label}: changeset_revision '{revision}' is not installable on {tool_shed_url} " + f"(installable revisions: {', '.join(installable_revisions)})" + ) + if log: + for error in errors: + log.error(error) + return errors + + def flatten_repo_info( repositories: Iterable["InstallRepoDict"], ) -> list["InstallRepoDict"]: diff --git a/tests/test_shed_tools_validate.py b/tests/test_shed_tools_validate.py new file mode 100644 index 0000000..8954d96 --- /dev/null +++ b/tests/test_shed_tools_validate.py @@ -0,0 +1,214 @@ +"""Tests for the offline `shed-tools validate` path. + +The Tool Shed is faked via monkeypatching so these run without any network or +Galaxy connection. +""" + +import textwrap + +import pytest + +from ephemeris import shed_tools_methods +from ephemeris.shed_tools import main as shed_tools_cli +from ephemeris.shed_tools_methods import validate_against_tool_shed + +DEFAULT_TOOL_SHED_URL = "https://toolshed.g2.bx.psu.edu/" + +# What our fake Tool Shed reports as installable, keyed by (name, owner). +FAKE_INSTALLABLE = { + ("bwa", "devteam"): ["051eba708f43", "4d82cf59895e"], + ("tabular_to_fasta", "devteam"): ["0b4e36026794"], + ("empty_repo", "devteam"): [], # exists but nothing installable +} + + +class _FakeRepositoriesClient: + def get_ordered_installable_revisions(self, name, owner): + key = (name, owner) + if key not in FAKE_INSTALLABLE: + raise Exception(f"No repository named {name} found with owner {owner}") + return FAKE_INSTALLABLE[key] + + +class _FakeToolShedInstance: + def __init__(self, url): + self.url = url + self.repositories = _FakeRepositoriesClient() + + +@pytest.fixture(autouse=True) +def fake_tool_shed(monkeypatch): + monkeypatch.setattr(shed_tools_methods, "ToolShedInstance", _FakeToolShedInstance) + + +def _write(tmp_path, content): + path = tmp_path / "tools.yaml" + path.write_text(textwrap.dedent(content)) + return str(path) + + +def test_validate_against_tool_shed_success(): + repos = [{"name": "bwa", "owner": "devteam", "revisions": ["051eba708f43", "4d82cf59895e"]}] + assert validate_against_tool_shed(repos, DEFAULT_TOOL_SHED_URL) == [] + + +def test_validate_against_tool_shed_no_pinned_revision_just_existence(): + repos = [{"name": "bwa", "owner": "devteam"}] + assert validate_against_tool_shed(repos, DEFAULT_TOOL_SHED_URL) == [] + + +def test_validate_against_tool_shed_bad_revision(): + repos = [{"name": "bwa", "owner": "devteam", "revisions": ["deadbeefdead"]}] + errors = validate_against_tool_shed(repos, DEFAULT_TOOL_SHED_URL) + assert len(errors) == 1 + assert "deadbeefdead" in errors[0] + assert "not installable" in errors[0] + + +def test_validate_against_tool_shed_changeset_revision_key(): + # A single pinned `changeset_revision` (not a `revisions` list) is also checked. + repos = [{"name": "bwa", "owner": "devteam", "changeset_revision": "deadbeefdead"}] + errors = validate_against_tool_shed(repos, DEFAULT_TOOL_SHED_URL) + assert len(errors) == 1 + assert "deadbeefdead" in errors[0] + + +def test_validate_against_tool_shed_empty_installable(): + repos = [{"name": "empty_repo", "owner": "devteam", "revisions": ["abc"]}] + errors = validate_against_tool_shed(repos, DEFAULT_TOOL_SHED_URL) + assert len(errors) == 1 + assert "does not exist" in errors[0] + + +def test_validate_against_tool_shed_query_error(): + repos = [{"name": "missing", "owner": "nobody"}] + errors = validate_against_tool_shed(repos, DEFAULT_TOOL_SHED_URL) + assert len(errors) == 1 + assert "failed to query tool shed" in errors[0] + + +def test_validate_against_tool_shed_queries_once_per_repo(monkeypatch): + calls = [] + original = _FakeRepositoriesClient.get_ordered_installable_revisions + + def counting(self, name, owner): + calls.append((name, owner)) + return original(self, name, owner) + + monkeypatch.setattr(_FakeRepositoriesClient, "get_ordered_installable_revisions", counting) + repos = [{"name": "bwa", "owner": "devteam", "revisions": ["051eba708f43", "4d82cf59895e"]}] + validate_against_tool_shed(repos, DEFAULT_TOOL_SHED_URL) + assert calls == [("bwa", "devteam")] + + +def test_cli_validate_structural_only_good(tmp_path): + path = _write( + tmp_path, + """ + tools: + - name: bwa + owner: devteam + revisions: + - 'deadbeefdead' + """, + ) + # Bad revision, but --structural-only does not consult the shed. + assert shed_tools_cli(["validate", path, "--structural-only"]) == 0 + + +def test_cli_validate_structural_only_bad_missing_name(tmp_path): + path = _write( + tmp_path, + """ + tools: + - owner: devteam + """, + ) + assert shed_tools_cli(["validate", path, "--structural-only"]) == 1 + + +def test_cli_validate_full_good(tmp_path): + path = _write( + tmp_path, + """ + tools: + - name: bwa + owner: devteam + revisions: + - '051eba708f43' + - name: tabular_to_fasta + owner: devteam + revisions: + - '0b4e36026794' + """, + ) + assert shed_tools_cli(["validate", path]) == 0 + + +def test_cli_validate_full_bad_revision(tmp_path): + path = _write( + tmp_path, + """ + tools: + - name: bwa + owner: devteam + revisions: + - 'deadbeefdead' + """, + ) + assert shed_tools_cli(["validate", path]) == 1 + + +def test_cli_validate_via_tools_file_flag(tmp_path): + path = _write( + tmp_path, + """ + tools: + - name: bwa + owner: devteam + revisions: + - '051eba708f43' + """, + ) + assert shed_tools_cli(["validate", "-t", path]) == 0 + + +def test_cli_validate_missing_file(tmp_path): + assert shed_tools_cli(["validate", str(tmp_path / "nope.yaml")]) == 1 + + +def test_cli_validate_no_input(): + assert shed_tools_cli(["validate"]) == 1 + + +def test_cli_validate_unknown_key_rejected(tmp_path): + # A typo'd key (revision -> revisions) must fail structurally rather than + # silently pass with the pin unchecked. + path = _write( + tmp_path, + """ + tools: + - name: bwa + owner: devteam + revision: + - '051eba708f43' + """, + ) + assert shed_tools_cli(["validate", path, "--structural-only"]) == 1 + + +def test_cli_validate_empty_file(tmp_path): + path = tmp_path / "empty.yaml" + path.write_text("") + assert shed_tools_cli(["validate", str(path), "--structural-only"]) == 1 + + +def test_cli_validate_non_mapping_root(tmp_path): + path = _write( + tmp_path, + """ + - name: bwa + owner: devteam + """, + ) + assert shed_tools_cli(["validate", str(path), "--structural-only"]) == 1