Skip to content

Commit

Permalink
Extend the validate command to validate JSONL datasets (#131)
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <[email protected]>
  • Loading branch information
jviotti authored Jul 22, 2024
1 parent b5551d1 commit b9576cf
Show file tree
Hide file tree
Showing 13 changed files with 343 additions and 22 deletions.
1 change: 0 additions & 1 deletion cmake/FindJSONToolkit.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
if(NOT JSONToolkit_FOUND)
set(JSONTOOLKIT_INSTALL OFF CACHE BOOL "disable installation")
set(JSONTOOLKIT_JSONL OFF CACHE BOOL "disable JSONL support")
add_subdirectory("${PROJECT_SOURCE_DIR}/vendor/jsontoolkit")
set(JSONToolkit_FOUND ON)
endif()
14 changes: 7 additions & 7 deletions docs/validate.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ Validating
> Draft 2020-12 soon.
```sh
jsonschema validate <schema.json> <instance.json> [--http/-h] [--verbose/-v]
[--resolve/-r <schemas-or-directories> ...]
jsonschema validate <schema.json> <instance.json|.jsonl> [--http/-h]
[--verbose/-v] [--resolve/-r <schemas-or-directories> ...]
```

The most popular use case of JSON Schema is to validate JSON documents. The
JSON Schema CLI offers a `validate` command to evaluate a JSON instance against
a JSON Schema, presenting human-friendly information on unsuccessful
validation.
JSON Schema CLI offers a `validate` command to evaluate either a JSON instance
or a JSONL dataset against a JSON Schema, presenting human-friendly information
on unsuccessful validation.

**If you want to validate that a schema adheres to its metaschema, use the
[`metaschema`](./metaschema.markdown) command instead.**
Expand Down Expand Up @@ -55,10 +55,10 @@ error: The target document is expected to be of the given type
jsonschema validate path/to/my/schema.json path/to/my/instance.json
```

### Validate a JSON Schema against it meta-schema
### Validate a JSONL dataset against a schema

```sh
jsonschema validate path/to/my/schema.json
jsonschema validate path/to/my/schema.json path/to/my/dataset.jsonl
```

### Validate a JSON instance enabling HTTP resolution
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ noa_add_default_options(PRIVATE jsonschema_cli)
set_target_properties(jsonschema_cli PROPERTIES OUTPUT_NAME jsonschema)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsontoolkit::uri)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsontoolkit::json)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsontoolkit::jsonl)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsontoolkit::jsonschema)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::hydra::httpclient)

Expand Down
69 changes: 56 additions & 13 deletions src/command_validate.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <sourcemeta/jsontoolkit/json.h>
#include <sourcemeta/jsontoolkit/jsonl.h>
#include <sourcemeta/jsontoolkit/jsonschema.h>

#include <cstdlib> // EXIT_SUCCESS, EXIT_FAILURE
Expand Down Expand Up @@ -46,26 +47,68 @@ auto intelligence::jsonschema::cli::validate(
}

bool result{true};
const auto &instance_path{options.at("").at(1)};
const std::filesystem::path instance_path{options.at("").at(1)};
const auto schema_template{sourcemeta::jsontoolkit::compile(
schema, sourcemeta::jsontoolkit::default_schema_walker, custom_resolver,
sourcemeta::jsontoolkit::default_schema_compiler)};

const auto instance{sourcemeta::jsontoolkit::from_file(instance_path)};
if (instance_path.extension() == ".jsonl") {
log_verbose(options) << "Interpreting input as JSONL\n";
std::size_t index{0};

std::ostringstream error;
result = sourcemeta::jsontoolkit::evaluate(
schema_template, instance,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error, sourcemeta::jsontoolkit::empty_pointer));
auto stream{sourcemeta::jsontoolkit::read_file(instance_path)};
try {
for (const auto &instance : sourcemeta::jsontoolkit::JSONL{stream}) {
std::ostringstream error;
const auto subresult = sourcemeta::jsontoolkit::evaluate(
schema_template, instance,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error,
sourcemeta::jsontoolkit::empty_pointer));

if (result) {
log_verbose(options)
<< "ok: " << std::filesystem::weakly_canonical(instance_path).string()
<< "\n matches "
<< std::filesystem::weakly_canonical(schema_path).string() << "\n";
if (subresult) {
log_verbose(options)
<< "ok: "
<< std::filesystem::weakly_canonical(instance_path).string()
<< " (entry #" << index << ")"
<< "\n matches "
<< std::filesystem::weakly_canonical(schema_path).string()
<< "\n";
} else {
std::cerr << "fail: "
<< std::filesystem::weakly_canonical(instance_path).string()
<< " (entry #" << index << ")\n\n";
sourcemeta::jsontoolkit::prettify(instance, std::cerr);
std::cerr << "\n\n";
std::cerr << error.str();
result = false;
break;
}

index += 1;
}
} catch (const sourcemeta::jsontoolkit::ParseError &error) {
// For producing better error messages
throw sourcemeta::jsontoolkit::FileParseError(instance_path, error);
}
} else {
std::cerr << error.str();
const auto instance{sourcemeta::jsontoolkit::from_file(instance_path)};

std::ostringstream error;
result = sourcemeta::jsontoolkit::evaluate(
schema_template, instance,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error,
sourcemeta::jsontoolkit::empty_pointer));

if (result) {
log_verbose(options)
<< "ok: " << std::filesystem::weakly_canonical(instance_path).string()
<< "\n matches "
<< std::filesystem::weakly_canonical(schema_path).string() << "\n";
} else {
std::cerr << error.str();
}
}

return result ? EXIT_SUCCESS : EXIT_FAILURE;
Expand Down
2 changes: 1 addition & 1 deletion src/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Global Options:
Commands:
validate <schema.json> <instance.json> [--http/-h]
validate <schema.json> <instance.json|.jsonl> [--http/-h]
Validate an instance against the given schema.
Expand Down
7 changes: 7 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ add_jsonschema_test_unix(validate/pass_draft7)
add_jsonschema_test_unix(validate/fail_draft4)
add_jsonschema_test_unix(validate/fail_draft6)
add_jsonschema_test_unix(validate/fail_draft7)
add_jsonschema_test_unix(validate/pass_jsonl)
add_jsonschema_test_unix(validate/pass_jsonl_verbose)
add_jsonschema_test_unix(validate/fail_jsonl_invalid_entry)
add_jsonschema_test_unix(validate/fail_jsonl_one)
add_jsonschema_test_unix(validate/fail_jsonl_one_verbose)
add_jsonschema_test_unix(validate/fail_jsonl_all)
add_jsonschema_test_unix(validate/fail_jsonl_all_verbose)

# Test
add_jsonschema_test_unix(test/fail_true_single_resolve)
Expand Down
40 changes: 40 additions & 0 deletions test/validate/fail_jsonl_all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "array"
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": 1 }
{ "foo": 2 }
{ "foo": 3 }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2>"$TMP/stderr.txt" \
&& CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.jsonl (entry #0)
{
"foo": 1
}
error: Schema validation failure
The target document is expected to be of the given type
at instance location ""
at evaluate path "/type"
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
41 changes: 41 additions & 0 deletions test/validate/fail_jsonl_all_verbose.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "array"
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": 1 }
{ "foo": 2 }
{ "foo": 3 }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" --verbose 2>"$TMP/stderr.txt" \
&& CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
Interpreting input as JSONL
fail: $(realpath "$TMP")/instance.jsonl (entry #0)
{
"foo": 1
}
error: Schema validation failure
The target document is expected to be of the given type
at instance location ""
at evaluate path "/type"
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
32 changes: 32 additions & 0 deletions test/validate/fail_jsonl_invalid_entry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": "first" }
{ "foo" "second" }
{ "foo": "third" }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2>"$TMP/stderr.txt" \
&& CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
error: Failed to parse the JSON document at line 2 and column 10
$(realpath "$TMP")/instance.jsonl
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
42 changes: 42 additions & 0 deletions test/validate/fail_jsonl_one.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": 1 }
[ { "foo": 2 } ]
{ "foo": 3 }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2>"$TMP/stderr.txt" \
&& CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.jsonl (entry #1)
[
{
"foo": 2
}
]
error: Schema validation failure
The target document is expected to be of the given type
at instance location ""
at evaluate path "/type"
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
45 changes: 45 additions & 0 deletions test/validate/fail_jsonl_one_verbose.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": 1 }
[ { "foo": 2 } ]
{ "foo": 3 }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" --verbose 2>"$TMP/stderr.txt" \
&& CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
Interpreting input as JSONL
ok: $(realpath "$TMP")/instance.jsonl (entry #0)
matches $(realpath "$TMP")/schema.json
fail: $(realpath "$TMP")/instance.jsonl (entry #1)
[
{
"foo": 2
}
]
error: Schema validation failure
The target document is expected to be of the given type
at instance location ""
at evaluate path "/type"
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
32 changes: 32 additions & 0 deletions test/validate/pass_jsonl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"foo": {
"type": "string"
}
}
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": "first" }
{ "foo": "second" }
{ "foo": "third" }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2> "$TMP/output.txt" 1>&2

cat << EOF > "$TMP/expected.txt"
EOF

diff "$TMP/output.txt" "$TMP/expected.txt"
Loading

0 comments on commit b9576cf

Please sign in to comment.