From 45549febfa41129115884835580860085514bfe2 Mon Sep 17 00:00:00 2001 From: Aaron Schwartz Date: Tue, 26 May 2026 14:28:13 -0400 Subject: [PATCH] Add dottxt schema check cmd --- README.md | 1 + docs/cli.md | 9 +++ src/dottxt/cli.py | 99 ++++++++++++++++++++++++--- tests/test_cli.py | 169 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 267 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d42285f..a6cd8ec 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ Use the `dottxt` CLI for login, model discovery, and one-off generation. - CLI reference: [docs/cli.md](docs/cli.md) - Client reference: [docs/client.md](docs/client.md) +- Schema validation: `dottxt schema check schema.json` ## Client Surfaces diff --git a/docs/cli.md b/docs/cli.md index 0a8897c..a5a8701 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -73,3 +73,12 @@ Output rules: - If the selected model is unavailable for your key, generate returns a targeted error with guidance to run `dottxt models` and set `DOTTXT_MODEL` or pass `--model` + +### `dottxt schema check` + +Validate a schema file as JSON Schema. + +- Usage (file): `dottxt schema check schema.json` +- ``: JSON file path to validate +- `--json`: emits structured payload including `status` and `schema_file` +- Errors follow the shared `--json` error envelope when enabled diff --git a/src/dottxt/cli.py b/src/dottxt/cli.py index 9128cfc..fdf3143 100644 --- a/src/dottxt/cli.py +++ b/src/dottxt/cli.py @@ -9,6 +9,8 @@ from typing import Any, NoReturn import click +from jsonschema import Draft202012Validator +from jsonschema.exceptions import SchemaError from openai import ( APIConnectionError, APIStatusError, @@ -198,6 +200,52 @@ def _read_stdin() -> str: return sys.stdin.read() +def _load_schema_file( + schema_file: Path, + *, + json_mode: bool, +) -> tuple[str, Any]: + """Read and validate a schema file. + + Args: + schema_file: Path to schema JSON file. + json_mode: Whether machine-readable errors should be emitted. + + Returns: + Tuple of raw schema text and parsed JSON payload. + + Raises: + click.ClickException: When file is missing, invalid JSON, or invalid schema. + """ + try: + schema_text = schema_file.read_text(encoding="utf-8") + except FileNotFoundError: + _fail(f"Schema file not found: {schema_file}", json_mode=json_mode) + + try: + schema_payload = json.loads(schema_text) + except json.JSONDecodeError: + _fail( + "Schema file is invalid: schema file must contain valid JSON", + json_mode=json_mode, + ) + if not isinstance(schema_payload, dict): + _fail( + "Schema file is invalid: schema must contain a JSON object", + json_mode=json_mode, + ) + + try: + Draft202012Validator.check_schema(schema_payload) + except SchemaError as exc: + _fail( + f"Schema file is invalid: {exc}", + json_mode=json_mode, + ) + + return schema_text, schema_payload + + def _emit_verbose(ctx: click.Context, message: str, *, data: Any | None = None) -> None: """Print human-oriented verbose diagnostics to stderr.""" if not bool(ctx.obj["verbose"]): @@ -418,6 +466,43 @@ def models(ctx: click.Context, author: str | None) -> None: click.echo(model_id) +@main.group() +def schema() -> None: + """Schema utilities.""" + + +@schema.command(name="check") +@click.argument( + "schema_file", + required=True, + metavar="", + type=click.Path(dir_okay=False, path_type=Path), +) +@click.pass_context +def schema_check(ctx: click.Context, schema_file: Path) -> None: + """Validate a JSON Schema file.""" + json_mode = bool(ctx.obj["json_mode"]) + _load_schema_file( + schema_file, + json_mode=json_mode, + ) + _emit_verbose( + ctx, + "Validated schema file.", + data={"schema_file": str(schema_file)}, + ) + if json_mode: + _emit( + { + "status": "ok", + "schema_file": str(schema_file), + }, + json_mode=True, + ) + return + click.echo(f"Schema is valid JSON Schema: {schema_file}") + + @main.command(name="generate") @click.option( "-m", @@ -449,16 +534,10 @@ def generate( The model resolves from --model, then DOTTXT_MODEL. """ json_mode = bool(ctx.obj["json_mode"]) - if not schema_file.exists() or not schema_file.is_file(): - message = f"Schema file not found: {schema_file}" - _fail(message, json_mode=json_mode) - - schema_text = schema_file.read_text(encoding="utf-8") - try: - schema_payload = json.loads(schema_text) - except json.JSONDecodeError as exc: - message = f"Schema file is not valid JSON: {exc.msg}" - _fail(message, json_mode=json_mode) + schema_text, schema_payload = _load_schema_file( + schema_file, + json_mode=json_mode, + ) if prompt_arg is not None: final_prompt = prompt_arg diff --git a/tests/test_cli.py b/tests/test_cli.py index cad84b4..c22cb67 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -180,6 +180,7 @@ def test_login_persists_credentials( ) -> None: """Login should persist credentials from stdin.""" monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path)) + monkeypatch.delenv("DOTTXT_API_KEY", raising=False) result = _invoke(runner, ["login"], input_text="test-key\n") assert result.exit_code == 0 @@ -199,6 +200,7 @@ def test_login_verbose_prints_full_payload( ) -> None: """JSON login should print payload details while verbose stays orthogonal.""" monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path)) + monkeypatch.delenv("DOTTXT_API_KEY", raising=False) payload = _invoke_json( runner, ["--json", "--verbose", "login"], @@ -420,6 +422,116 @@ def test_models_fails_without_available_api_key( assert "No API key available" in result.output +def test_schema_check_succeeds_with_valid_schema( + runner: CliRunner, + schema_file: Path, +) -> None: + """schema check should print success for valid JSON Schema files.""" + result = _invoke(runner, ["schema", "check", str(schema_file)]) + + assert result.exit_code == 0 + assert result.output.strip() == f"Schema is valid JSON Schema: {schema_file}" + + +def test_schema_check_json_mode_returns_summary_payload( + runner: CliRunner, + schema_file: Path, +) -> None: + """schema check JSON mode should return minimal success payload.""" + payload = _invoke_json(runner, ["--json", "schema", "check", str(schema_file)]) + + assert isinstance(payload, dict) + assert payload == { + "status": "ok", + "schema_file": str(schema_file), + } + + +def test_schema_check_fails_for_invalid_json_schema( + runner: CliRunner, + tmp_path: Path, +) -> None: + """schema check should reject malformed JSON Schema documents.""" + invalid_schema = _create_schema(tmp_path, content='{"type": 1}') + + result = _invoke(runner, ["schema", "check", str(invalid_schema)]) + + assert result.exit_code == 1 + assert "Schema file is invalid:" in result.output + assert "is not valid under any of the given schemas" in result.output + + +def test_schema_check_fails_for_empty_schema_file( + runner: CliRunner, + tmp_path: Path, +) -> None: + """schema check should reject empty schema files as invalid JSON.""" + empty_schema = _create_schema(tmp_path, content="") + + result = _invoke(runner, ["schema", "check", str(empty_schema)]) + + assert result.exit_code == 1 + assert "Schema file is invalid:" in result.output + assert "schema file must contain valid JSON" in result.output + + +def test_schema_check_fails_for_invalid_json_syntax( + runner: CliRunner, + tmp_path: Path, +) -> None: + """schema check should reject schema files that are not valid JSON.""" + invalid_json_schema = _create_schema(tmp_path, content="{") + + result = _invoke(runner, ["schema", "check", str(invalid_json_schema)]) + + assert result.exit_code == 1 + assert "Schema file is invalid:" in result.output + assert "schema file must contain valid JSON" in result.output + + +def test_schema_check_fails_for_non_object_json( + runner: CliRunner, + tmp_path: Path, +) -> None: + """schema check should reject JSON values that are not objects.""" + array_schema = _create_schema(tmp_path, content="[]") + + result = _invoke(runner, ["schema", "check", str(array_schema)]) + + assert result.exit_code == 1 + assert "Schema file is invalid:" in result.output + assert "schema must contain a JSON object" in result.output + + +def test_schema_check_missing_file_human_mode_error_message( + runner: CliRunner, + tmp_path: Path, +) -> None: + """schema check should show a clean not-found message in human mode.""" + missing_schema = tmp_path / "missing-schema-human.json" + + result = _invoke(runner, ["schema", "check", str(missing_schema)]) + + assert result.exit_code == 1 + assert f"Schema file not found: {missing_schema}" in result.output + + +def test_schema_check_json_mode_errors_are_machine_readable( + runner: CliRunner, + tmp_path: Path, +) -> None: + """schema check errors should use the shared JSON error envelope.""" + missing_schema = tmp_path / "missing-schema.json" + result = _invoke(runner, ["--json", "schema", "check", str(missing_schema)]) + + assert result.exit_code == 1 + payload = _parse_json_output(result.output) + assert isinstance(payload, dict) + assert payload == { + "error": {"message": f"Schema file not found: {missing_schema}"}, + } + + @pytest.mark.parametrize( ("args", "expect_full_payload"), [ @@ -665,12 +777,67 @@ def test_generate_error_paths_and_usage_codes( usage_error = _invoke(runner, ["generate", "--unknown-flag"]) assert invalid_json.exit_code == 1 - assert "not valid JSON" in invalid_json.output + assert "Schema file is invalid:" in invalid_json.output + assert "must contain valid JSON" in invalid_json.output assert missing_schema_usage.exit_code == 2 assert "Missing option '-s' / '--schema'" in missing_schema_usage.output assert usage_error.exit_code == 2 +def test_generate_rejects_invalid_json_schema_before_sdk_call( + runner: CliRunner, + tmp_path: Path, +) -> None: + """Generate should fail fast for invalid JSON Schema documents.""" + invalid_schema = _create_schema( + tmp_path, + name="invalid-schema.json", + content='{"type": 1}', + ) + + result = _invoke( + runner, + ["generate", "-m", "openai/gpt-oss-20b", "-s", str(invalid_schema), "x y"], + ) + + assert result.exit_code == 1 + assert "Schema file is invalid:" in result.output + assert "is not valid under any of the given schemas" in result.output + assert FakeDotTxt.generate_calls == [] + + +def test_generate_invalid_json_schema_json_mode_is_machine_readable( + runner: CliRunner, + tmp_path: Path, +) -> None: + """Generate invalid schema errors should use the shared JSON envelope.""" + invalid_schema = _create_schema( + tmp_path, + name="invalid-schema.json", + content='{"type": 1}', + ) + + result = _invoke( + runner, + [ + "--json", + "generate", + "-m", + "openai/gpt-oss-20b", + "-s", + str(invalid_schema), + "x y", + ], + ) + + assert result.exit_code == 1 + payload = _parse_json_output(result.output) + assert isinstance(payload, dict) + assert payload["error"]["message"].startswith("Schema file is invalid:") + assert "is not valid under any of the given schemas" in payload["error"]["message"] + assert FakeDotTxt.generate_calls == [] + + def test_generate_requires_model_when_env_default_missing( runner: CliRunner, schema_file: Path,