Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions conda_recipe_v2_schema/cli.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in general, i'm wondering whether a tool that just does this for all yaml/json files would be more helpful. you could check at https://www.schemastore.org/ whether the filename recipe.yaml has a json schema and compare it against that. this would be more general than the conda-forge use case and i can imagine me using it in more places than only recipe.yaml

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

best check before building, maybe such a tool already exists somewhere

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I agree. If this is a general JSON schema validator, then this repo is probably not the right place for it

Copy link
Author

@bollwyvl bollwyvl Jan 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, sure schema everywhere, hooray!

again, as mentioned, the intent here is to make it easier for contributors to this repo to describe reproducible schema issues and their fixes.

schemastore

while useful, schemastore is... kinda bad on many levels (privacy, accuracy).

general

yeah, there are a lot of related tools (see also #29)... but YAML is weird enough across implementations (see: executable !!tags, lack of correct anchor support, etc.) that the finer points of being valid data sometimes is missed. Anyhow, all those tools don't know about pydantic (for good reasons) much less this repo's pydantic.

the conda-forge use case

i'll wager there are more github.com/conda-forge/.../recipe.yaml than anywhere else public, so doesn't seem like much of a stretch.

Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""Command line interface for generating and checking instances of the schema."""

from __future__ import annotations

import argparse
from pathlib import Path

from . import __version__

CLI = "conda-recipe-v2-schema"
GENERATE = "generate"
VALIDATE = "validate"


def get_parser() -> argparse.ArgumentParser:
"""Build a command line parser."""
parser = argparse.ArgumentParser(CLI)
parser.add_argument("-v", "--version", action="version", version=f"{CLI} {__version__}")

sub = parser.add_subparsers(dest="action")

sub.add_parser(GENERATE, help="print the schema")

validate = sub.add_parser(
VALIDATE, help="validate local paths and URLs against the schema and model"
)

validate.add_argument(
"recipes",
nargs="*",
help="a relative path or URL for a `recipe.yaml`; may be given multiple times",
)
validate.add_argument(
"-w", "--work-dir", type=Path, help="a work folder to persist remote recipes between runs"
)
validate.add_argument(
"-c",
"--conda-forge",
action="append",
help="names of conda-forge recipe to check (no `-feedstock`); may be given multiple times",
)
validate.add_argument(
"-u",
"--no-pretty",
action="store_true",
help="disable syntax highlighting for YAML findings",
)
validate.add_argument(
"-q",
"--quiet",
action="store_true",
help="minimize output",
)
validate.add_argument("-s", "--schema", type=Path, help="alternate path to the schema to use")
return parser


def main(argv: list[str] | None = None) -> int:
"""Parse command line arguments and dispatch to appropriate function."""
parser = get_parser()
ns = parser.parse_args(argv)
kwargs = {**vars(ns)}
action = kwargs.pop("action")
if action == GENERATE:
from . import model

return model.main()
elif action == VALIDATE:
from . import validate

return validate.main(kwargs)

parser.parse_args(["--help"])
return 1
134 changes: 134 additions & 0 deletions conda_recipe_v2_schema/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Command line utility for checking a recipe."""

from __future__ import annotations

import hashlib
import sys
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING, Any
from urllib import parse, request

import yaml
from jsonschema.validators import Draft7Validator
from pygments import highlight
from pygments.formatters import Terminal256Formatter
from pygments.lexers.templates import YamlJinjaLexer

from .model import ComplexRecipe, SimpleRecipe

if TYPE_CHECKING:
from collections.abc import Iterator

HERE = Path(__file__).parent
SCHEMA = HERE.parent / "schema.json"
CLI = "conda-recipe-v2-schema"
CF_TEMPLATE = (
"https://raw.githubusercontent.com/conda-forge/{recipe}-feedstock/"
"refs/heads/main/recipe/recipe.yaml"
)

# force unescaped multiline string formatting
yaml.representer.SafeRepresenter.add_representer(
str,
lambda dumper, data: dumper.represent_scalar(
"tag:yaml.org,2002:str", data, style="|" if "\n" in data or len(data) > 80 else None
),
)


def get_validator(schema: Path | None = None) -> Draft7Validator:
"""Get a JSON schema validator for the recipe from the built schema."""
schema = schema or SCHEMA
raw = yaml.safe_load(schema.read_text(encoding="utf-8"))
if not raw:
msg = (
f"could not retrieve the schema from {schema};"
" maybe run `conda-recipe-v2-schema generate`"
)
raise RuntimeError(msg)
return Draft7Validator(raw, format_checker=Draft7Validator.FORMAT_CHECKER)


def check_one_local(path: Path, validator: Draft7Validator) -> Iterator[Any]:
"""Validate one local path."""
recipe = yaml.safe_load(path.read_text(encoding="utf-8"))
for error in validator.iter_errors(recipe):
yield {
"path": "/".join(["#", *error.path, ""]),
"schema_path": "/".join(["#", *error.absolute_schema_path, ""]),
"message": error.message,
}
try:
model_cls = ComplexRecipe if "outputs" in recipe else SimpleRecipe
model_cls(**recipe)
except Exception as err:
yield {"pydantic": f"{err}"}


def check_one_recipe(path_or_url: str, validator: Draft7Validator, work_dir: Path) -> Iterator[Any]:
"""Validate one path or URL."""
url = parse.urlparse(path_or_url)
path: Path | None = None
if url.scheme in {"file"}:
path = Path(url.path)
elif url.scheme in {"http", "https"}:
sha = hashlib.sha256(path_or_url.encode()).hexdigest()
path = work_dir / f"{sha}/recipe.yaml"
if not path.is_file():
path.parent.mkdir(parents=True, exist_ok=True)
try:
request.urlretrieve(path_or_url, path)
except Exception as err:
yield {"message": f"Failed to download {path_or_url}: {err}"}
elif not url.scheme:
path = Path(path_or_url)

if not (path and path.exists()):
yield {"message": f"Couldn't figure out what to do with {path_or_url}"}
return

yield from check_one_local(path, validator)


def check_recipes(
recipes: list[str],
work_dir: Path,
conda_forge: list[str] | None = None,
schema: Path | None = None,
) -> dict[str, Any]:
"""Check all the recipes."""
validator = get_validator(schema)
cf = conda_forge or []
recipes = sorted(recipes + [CF_TEMPLATE.format(recipe=recipe) for recipe in cf])
return {recipe: [*check_one_recipe(recipe, validator, work_dir)] for recipe in recipes}


def main(kwargs: dict[str, Any]) -> int:
"""Get the count of validation errors from the CLI arguments and print a reports."""
work_dir = kwargs.pop("work_dir")
no_pretty = kwargs.pop("no_pretty")
quiet = kwargs.pop("quiet")
if work_dir is None:
with tempfile.TemporaryDirectory(prefix=f"{CLI}-") as td:
findings_by_recipe = check_recipes(work_dir=Path(td), **kwargs)
else:
findings_by_recipe = check_recipes(work_dir=work_dir, **kwargs)
if not findings_by_recipe:
print(
"No recipes were checked; please provide some URLs or conda-forge names",
file=sys.stderr,
)
return 1
count = sum(map(len, findings_by_recipe.values()))
if count and not quiet:
text = yaml.safe_dump(
{recipe: findings for recipe, findings in findings_by_recipe.items() if findings},
default_flow_style=False,
)
print(text if no_pretty else highlight(text, YamlJinjaLexer(), Terminal256Formatter()))
print(
f"{'!!! ' if count else ''}{count} findings in {len(findings_by_recipe)} recipes",
file=sys.stderr,
)
return count
Loading