diff --git a/include/simfil/model/schema.h b/include/simfil/model/schema.h index 63cb0a6..e072b7f 100644 --- a/include/simfil/model/schema.h +++ b/include/simfil/model/schema.h @@ -3,6 +3,7 @@ #include "simfil/model/string-pool.h" #include #include +#include #include #include #include @@ -21,6 +22,28 @@ using SchemaId = std::uint16_t; constexpr SchemaId NoSchemaId = SchemaId{0}; constexpr SchemaId MaxSchemaId = SchemaId{std::numeric_limits::max()}; +/** + * One segment in a schema-derived query path. + * + * Field segments address object members. Array-element segments represent the + * non-recursive `*` operator needed to traverse array elements precisely. + */ +struct SchemaPathSegment +{ + enum class Kind { + Field, + ArrayElement, + }; + + Kind kind = Kind::Field; + StringId field = 0; + + auto operator<=>(const SchemaPathSegment&) const = default; +}; + +/** Sequence of schema path segments from a root schema to a reachable value. */ +using SchemaPath = std::vector; + /** * Concept defining a callback to query a Schema* by SchemaId. */ @@ -53,6 +76,8 @@ class Schema Clean, }; + using SchemaIdStack = sfl::small_vector; + virtual ~Schema() = default; /** @@ -107,6 +132,47 @@ class Schema return {}; } + /** + * Return enum-like string symbols accepted directly by this schema node. + * + * Unlike nestedEnumSymbols(), this does not include descendants and is used + * to derive precise schema paths for auto-wildcard rewrites. + */ + virtual auto directEnumSymbols() const & -> std::span + { + return {}; + } + + /** + * Enumerate precise paths to all fields with the requested name. + */ + static auto fieldPaths(SchemaId root, + const std::function& queryFn, + StringId field) -> std::vector + { + std::vector paths; + SchemaIdStack visited; + SchemaPath current; + collectFieldPaths(root, queryFn, field, visited, current, paths); + sortUniquePaths(paths); + return paths; + } + + /** + * Enumerate precise paths to all values that can hold the enum-like symbol. + */ + static auto enumSymbolPaths(SchemaId root, + const std::function& queryFn, + StringId symbol) -> std::vector + { + std::vector paths; + SchemaIdStack visited; + SchemaPath current; + collectEnumSymbolPaths(root, queryFn, symbol, visited, current, paths); + sortUniquePaths(paths); + return paths; + } + /** * Return true once `canHaveField` is backed by finalized field caches. */ @@ -124,8 +190,6 @@ class Schema } protected: - using SchemaIdStack = sfl::small_vector; - /** * Append all fields reachable from this schema without relying on cached * finalization state. This lets cyclic schema graphs still produce an exact @@ -145,6 +209,111 @@ class Schema { } + /** + * Visit fields declared directly by this schema and their possible child + * schemas. The default is empty for scalar schemas. + */ + virtual auto forEachDirectField( + const std::function)>&) const -> void + { + } + + /** + * Visit possible array element schemas. The default is empty for non-arrays. + */ + virtual auto forEachElementSchema(const std::function&) const -> void + { + } + + /** + * Recursively collect schema paths to matching fields. + */ + static auto collectFieldPaths(SchemaId schemaId, + const std::function& queryFn, + StringId field, + SchemaIdStack& visited, + SchemaPath& current, + std::vector& paths) -> void + { + if (schemaId == NoSchemaId || std::ranges::find(visited, schemaId) != visited.end()) + return; + + auto const* schema = queryFn(schemaId); + if (!schema) + return; + + visited.push_back(schemaId); + + schema->forEachDirectField([&](StringId directField, std::span childSchemas) { + current.push_back({SchemaPathSegment::Kind::Field, directField}); + if (directField == field) + paths.push_back(current); + for (auto childSchemaId : childSchemas) + collectFieldPaths(childSchemaId, queryFn, field, visited, current, paths); + current.pop_back(); + }); + + schema->forEachElementSchema([&](SchemaId elementSchemaId) { + current.push_back({SchemaPathSegment::Kind::ArrayElement, 0}); + collectFieldPaths(elementSchemaId, queryFn, field, visited, current, paths); + current.pop_back(); + }); + + visited.pop_back(); + } + + /** + * Recursively collect schema paths to values accepting a matching enum-like + * string symbol. + */ + static auto collectEnumSymbolPaths(SchemaId schemaId, + const std::function& queryFn, + StringId symbol, + SchemaIdStack& visited, + SchemaPath& current, + std::vector& paths) -> void + { + if (schemaId == NoSchemaId || std::ranges::find(visited, schemaId) != visited.end()) + return; + + auto const* schema = queryFn(schemaId); + if (!schema) + return; + + visited.push_back(schemaId); + + for (auto directSymbol : schema->directEnumSymbols()) { + if (directSymbol == symbol) + paths.push_back(current); + } + + schema->forEachDirectField([&](StringId directField, std::span childSchemas) { + current.push_back({SchemaPathSegment::Kind::Field, directField}); + for (auto childSchemaId : childSchemas) + collectEnumSymbolPaths(childSchemaId, queryFn, symbol, visited, current, paths); + current.pop_back(); + }); + + schema->forEachElementSchema([&](SchemaId elementSchemaId) { + current.push_back({SchemaPathSegment::Kind::ArrayElement, 0}); + collectEnumSymbolPaths(elementSchemaId, queryFn, symbol, visited, current, paths); + current.pop_back(); + }); + + visited.pop_back(); + } + + /** + * Keep path rewrites deterministic and avoid duplicate paths from combined + * schemas or shared references. + */ + static auto sortUniquePaths(std::vector& paths) -> void + { + std::ranges::sort(paths); + auto duplicates = std::ranges::unique(paths); + paths.erase(duplicates.begin(), duplicates.end()); + } + /** * Append reachable values through a schema id, using finalized child * caches when possible and falling back to raw graph traversal for cycles. @@ -327,6 +496,13 @@ class ObjectSchema : public Schema return {fields_.begin(), fields_.end()}; } + auto forEachDirectField( + const std::function)>& fn) const -> void override + { + for (auto const& field : fields_) + fn(field.field, {field.schemas.begin(), field.schemas.end()}); + } + auto nestedFields() const & -> std::span override { return {flatFields_.cbegin(), flatFields_.cend()}; @@ -438,6 +614,11 @@ class ValueSchema : public Schema return {enumSymbols_.cbegin(), enumSymbols_.cend()}; } + auto directEnumSymbols() const & -> std::span override + { + return {enumSymbols_.cbegin(), enumSymbols_.cend()}; + } + auto finalized() const -> bool override { return state_ == State::Clean; @@ -535,6 +716,12 @@ class ArraySchema : public Schema return {schemas_.begin(), schemas_.end()}; } + auto forEachElementSchema(const std::function& fn) const -> void override + { + for (auto schemaId : schemas_) + fn(schemaId); + } + private: auto collectNestedFields(const std::function& lookup, SchemaIdStack& visited, diff --git a/include/simfil/simfil.h b/include/simfil/simfil.h index 2b98016..b461266 100644 --- a/include/simfil/simfil.h +++ b/include/simfil/simfil.h @@ -11,12 +11,52 @@ #include "simfil/diagnostics.h" #include "simfil/value.h" #include "simfil/error.h" +#include "simfil/model/schema.h" namespace simfil { struct ModelNode; +/** + * Options used while parsing and rewriting a query. + */ +struct CompileOptions +{ + bool any = true; + bool autoWildcard = false; + SchemaId rootSchema = NoSchemaId; +}; + +/** + * One schema path referenced by a compiled expression. + * + * The path is expressed relative to the root schema supplied to + * `referencedSchemaPaths`. If `viaWildcard` is set, the path came from a + * recursive wildcard-field lookup such as `**.foo`. + */ +struct ReferencedSchemaPath +{ + SchemaPath path; + SourceLocation location; + bool viaWildcard = false; +}; + +/** + * Schema references discovered by static AST inspection. + * + * The flags make the result conservative: callers can reject automatic scope + * decisions when the query contains broad wildcards or field access that cannot + * be tied to concrete schema paths. + */ +struct ReferencedSchemaPaths +{ + std::vector paths; + bool hasDynamicAccess = false; + bool hasUnresolvedAccess = false; + bool hasBroadWildcardAccess = false; +}; + /** * Compile expression `src`. * Param: @@ -30,6 +70,23 @@ struct ModelNode; */ auto compile(Environment& env, std::string_view query, bool any = true, bool autoWildcard = false) -> tl::expected; +/** + * Compile expression `src` with explicit options. + * + * If rootSchema is set and autoWildcard is enabled, single field/enum queries + * are classified through the schema instead of legacy casing heuristics. + */ +auto compile(Environment& env, std::string_view query, CompileOptions options) -> tl::expected; + +/** + * Collect schema paths that are referenced by a compiled query. + * + * This is static analysis over the AST, not runtime evaluation: both sides of + * `and`/`or` are inspected, and schema-aware auto-wildcard rewrites are resolved + * to the exact paths they can touch. + */ +auto referencedSchemaPaths(Environment& env, const AST& ast, SchemaId rootSchema) -> tl::expected; + /** * Evaluate compiled expression. * Param: diff --git a/src/expressions.cpp b/src/expressions.cpp index 6dc410c..78e3929 100644 --- a/src/expressions.cpp +++ b/src/expressions.cpp @@ -325,6 +325,11 @@ ConstExpr::ConstExpr(Value value) : value_(std::move(value)) {} +ConstExpr::ConstExpr(Value value, const Token& token) + : Expr(token) + , value_(std::move(value)) +{} + auto ConstExpr::type() const -> Type { return Type::VALUE; @@ -708,6 +713,15 @@ PathExpr::PathExpr(ExprPtr left, ExprPtr right) assert(right_.get()); } +PathExpr::PathExpr(ExprPtr left, ExprPtr right, SourceLocation location) + : Expr(location) + , left_(std::move(left)) + , right_(std::move(right)) +{ + assert(left_.get()); + assert(right_.get()); +} + auto PathExpr::type() const -> Type { return Type::PATH; diff --git a/src/expressions.h b/src/expressions.h index 8c717a3..48364d9 100644 --- a/src/expressions.h +++ b/src/expressions.h @@ -129,12 +129,20 @@ class ConstExpr : public Expr : value_(Value::make(std::forward(value))) {} + template + requires (!std::is_base_of_v>) + ConstExpr(CType_&& value, const Token& token) + : Expr(token) + , value_(Value::make(std::forward(value))) + {} + ConstExpr(const ConstExpr&) = delete; ConstExpr(ConstExpr&&) = delete; auto operator=(const ConstExpr&) -> ConstExpr& = delete; auto operator=(ConstExpr&&) -> ConstExpr& = delete; explicit ConstExpr(Value value); + ConstExpr(Value value, const Token& token); auto type() const -> Type override; auto constant() const -> bool override; @@ -237,6 +245,7 @@ class PathExpr : public Expr { public: PathExpr(ExprPtr left, ExprPtr right); + PathExpr(ExprPtr left, ExprPtr right, SourceLocation location); auto type() const -> Type override; auto ieval(Context ctx, const Value& val, const ResultFn& ores) const -> tl::expected override; diff --git a/src/simfil.cpp b/src/simfil.cpp index 2ea026e..73872f5 100644 --- a/src/simfil.cpp +++ b/src/simfil.cpp @@ -94,6 +94,316 @@ static auto isSymbolWord(std::string_view sv) -> bool }) && numUpperCaseLetters > 0; } +/** + * Extract the user-facing string from a single field or string-literal query. + */ +static auto schemaLookupName(const Expr& expr) -> std::optional +{ + if (auto const* field = dynamic_cast(&expr)) { + return field->field(); + } + + if (auto const* constant = dynamic_cast(&expr)) { + auto const& value = constant->value(); + if (value.isa(ValueType::String)) { + return value.as(); + } + } + + return std::nullopt; +} + +/** + * Convert a schema path to a SIMFIL path expression. + */ +static auto pathExpressionFromSchemaPath(Environment& env, const SchemaPath& path, SourceLocation location) -> expected +{ + ExprPtr expr = std::make_unique("_"); + for (auto const& segment : path) { + ExprPtr next; + switch (segment.kind) { + case SchemaPathSegment::Kind::Field: { + auto fieldName = env.strings()->resolve(segment.field); + if (!fieldName) { + return unexpected(Error::ParserError, "Schema path contains an unknown field string id."); + } + next = std::make_unique(std::string(*fieldName)); + break; + } + case SchemaPathSegment::Kind::ArrayElement: + next = std::make_unique(); + break; + } + expr = std::make_unique(std::move(expr), std::move(next), location); + } + return expr; +} + +/** + * Build `exact.path == enumValue` expressions for all schema-derived paths. + */ +static auto enumPathExpression( + Environment& env, + std::vector const& paths, + std::string enumValue, + SourceLocation location) -> expected +{ + ExprPtr result; + for (auto const& path : paths) { + auto lhs = pathExpressionFromSchemaPath(env, path, location); + TRY_EXPECTED(lhs); + + auto comparison = std::make_unique>( + std::move(*lhs), + std::make_unique(Value::make(std::string(enumValue)))); + + if (!result) + result = std::move(comparison); + else + result = std::make_unique(std::move(result), std::move(comparison)); + } + return result; +} + +/** + * Rewrite a single field/enum query by using schema metadata as source of truth. + */ +static auto rewriteAutoWildcardBySchema(Environment& env, ExprPtr expr, SchemaId rootSchema) -> expected +{ + if (rootSchema == NoSchemaId || !expr) + return expr; + + auto name = schemaLookupName(*expr); + if (!name) + return expr; + + // Querying the root schema may materialize schema-owned strings in + // completion/compile-local environments. + (void) env.querySchema(rootSchema); + + auto stringId = env.strings()->get(*name); + if (stringId == StringPool::Empty) + return expr; + + auto querySchema = [&env](SchemaId schemaId) -> const Schema* { + return env.querySchema(schemaId); + }; + + auto enumPaths = Schema::enumSymbolPaths(rootSchema, querySchema, stringId); + auto fieldPaths = Schema::fieldPaths(rootSchema, querySchema, stringId); + + if (!enumPaths.empty() && !fieldPaths.empty()) { + return unexpected( + Error::ParserError, + fmt::format("Ambiguous schema auto-wildcard token '{}': it is both a field and an enum-like string symbol.", *name)); + } + + if (!enumPaths.empty()) + return enumPathExpression(env, enumPaths, std::move(*name), expr->sourceLocation()); + + if (!fieldPaths.empty()) + return std::make_unique(true, std::move(*name), expr->sourceLocation()); + + return expr; +} + +static auto fieldPathSegment(Environment& env, std::string_view fieldName) -> std::optional +{ + auto fieldId = env.strings()->get(fieldName); + if (fieldId == StringPool::Empty) { + return std::nullopt; + } + return SchemaPathSegment{SchemaPathSegment::Kind::Field, fieldId}; +} + +static auto stringConstValue(const Expr& expr) -> std::optional +{ + auto const* constant = dynamic_cast(&expr); + if (!constant) { + return std::nullopt; + } + auto const& value = constant->value(); + if (!value.isa(ValueType::String)) { + return std::nullopt; + } + return value.as(); +} + +/** + * Flatten a static field path expression to a schema path. Returns nullopt for + * dynamic expressions, broad wildcards, or operators that cannot name one path. + */ +static auto flattenReferencedPath(Environment& env, const Expr& expr) -> expected, Error> +{ + if (auto const* field = dynamic_cast(&expr)) { + if (field->isCurrent()) { + return SchemaPath{}; + } + auto segment = fieldPathSegment(env, field->field()); + if (!segment) { + return std::nullopt; + } + return SchemaPath{*segment}; + } + + if (auto const* path = dynamic_cast(&expr)) { + auto left = flattenReferencedPath(env, *path->left()); + TRY_EXPECTED(left); + if (!*left) { + return std::nullopt; + } + + SchemaPath result = std::move(**left); + if (auto const* field = dynamic_cast(path->right())) { + auto segment = fieldPathSegment(env, field->field()); + if (!segment) { + return std::nullopt; + } + result.push_back(*segment); + return result; + } + if (dynamic_cast(path->right())) { + result.push_back({SchemaPathSegment::Kind::ArrayElement, 0}); + return result; + } + if (auto const* subscript = dynamic_cast(path->right())) { + auto right = flattenReferencedPath(env, *subscript); + TRY_EXPECTED(right); + if (!*right) { + return std::nullopt; + } + result.insert(result.end(), (*right)->begin(), (*right)->end()); + return result; + } + return std::nullopt; + } + + if (auto const* subscript = dynamic_cast(&expr)) { + auto left = flattenReferencedPath(env, *subscript->left_); + TRY_EXPECTED(left); + if (!*left) { + return std::nullopt; + } + auto index = stringConstValue(*subscript->index_); + if (!index) { + return std::nullopt; + } + SchemaPath result = std::move(**left); + auto segment = fieldPathSegment(env, *index); + if (!segment) { + return std::nullopt; + } + result.push_back(*segment); + return result; + } + + return std::nullopt; +} + +static auto addReferencedPath( + ReferencedSchemaPaths& result, + SchemaPath path, + SourceLocation location, + bool viaWildcard) -> void +{ + if (path.empty()) { + return; + } + if (std::ranges::any_of(result.paths, [&](auto const& existing) { + return existing.path == path && existing.viaWildcard == viaWildcard; + })) { + return; + } + result.paths.push_back({std::move(path), location, viaWildcard}); +} + +static auto schemaPathIsReachable(Environment& env, SchemaId rootSchema, const SchemaPath& path) -> bool +{ + auto leafField = std::ranges::find_if( + path.rbegin(), + path.rend(), + [](auto const& segment) { + return segment.kind == SchemaPathSegment::Kind::Field; + }); + if (leafField == path.rend()) { + return true; + } + + auto querySchema = [&env](SchemaId schemaId) -> const Schema* { + return env.querySchema(schemaId); + }; + auto possiblePaths = Schema::fieldPaths(rootSchema, querySchema, leafField->field); + return std::ranges::find(possiblePaths, path) != possiblePaths.end(); +} + +static auto collectReferencedSchemaPaths( + Environment& env, + const Expr& expr, + SchemaId rootSchema, + ReferencedSchemaPaths& result) -> expected +{ + if (dynamic_cast(&expr)) { + result.hasBroadWildcardAccess = true; + return {}; + } + + if (auto const* wildcardField = dynamic_cast(&expr)) { + // Non-recursive child wildcards (`*.foo`) cannot currently be mapped + // to exact schema paths without exposing child traversal internals. + if (!wildcardField->recurse_) { + result.hasDynamicAccess = true; + return {}; + } + + auto fieldId = env.strings()->get(wildcardField->name_); + if (fieldId == StringPool::Empty) { + result.hasUnresolvedAccess = true; + return {}; + } + + auto querySchema = [&env](SchemaId schemaId) -> const Schema* { + return env.querySchema(schemaId); + }; + auto paths = Schema::fieldPaths(rootSchema, querySchema, fieldId); + if (paths.empty()) { + result.hasUnresolvedAccess = true; + return {}; + } + for (auto& path : paths) { + addReferencedPath(result, std::move(path), wildcardField->sourceLocation(), true); + } + return {}; + } + + if (dynamic_cast(&expr) + || dynamic_cast(&expr) + || dynamic_cast(&expr)) { + auto path = flattenReferencedPath(env, expr); + TRY_EXPECTED(path); + if (*path) { + if (schemaPathIsReachable(env, rootSchema, **path)) { + addReferencedPath(result, std::move(**path), expr.sourceLocation(), false); + } + else { + result.hasUnresolvedAccess = true; + } + return {}; + } + if (dynamic_cast(&expr)) { + result.hasDynamicAccess = true; + } + else { + result.hasUnresolvedAccess = true; + } + } + + for (auto i = 0u; i < expr.numChildren(); ++i) { + auto childResult = collectReferencedSchemaPaths(env, *expr.childAt(i), rootSchema, result); + TRY_EXPECTED(childResult); + } + return {}; +} + /** * RIIA Helper for calling function at destruction. */ @@ -421,7 +731,7 @@ class ScalarParser : public PrefixParselet { auto parse(Parser& p, Token t) const -> expected override { - return std::make_unique(std::get(t.value)); + return std::make_unique(std::get(t.value), t); } }; @@ -435,7 +745,7 @@ class RegExpParser : public PrefixParselet auto parse(Parser& p, Token t) const -> expected override { auto value = ReType::Type.make(std::get(t.value)); - return std::make_unique(std::move(value)); + return std::make_unique(std::move(value), t); } }; @@ -456,7 +766,7 @@ class ConstParser : public PrefixParselet auto parse(Parser& p, Token t) const -> expected override { - return std::make_unique(value_); + return std::make_unique(value_, t); } Value value_; @@ -589,11 +899,11 @@ class WordParser : public PrefixParselet } else if (!p.ctx.inPath) { /* Parse Symbols (words in upper-case) */ if (isSymbolWord(word)) { - return std::make_unique(Value::make(std::move(word))); + return std::make_unique(Value::make(std::move(word)), t); } /* Constant */ else if (auto constant = p.env->findConstant(word)) { - return std::make_unique(*constant); + return std::make_unique(*constant, t); } } @@ -651,7 +961,7 @@ class CompletionWordParser : public WordParser } /* Constant */ else if (auto constant = p.env->findConstant(word)) { - return std::make_unique(*constant); + return std::make_unique(*constant, t); } } @@ -839,6 +1149,11 @@ static auto setupParser(Parser& p) } auto compile(Environment& env, std::string_view query, bool any, bool autoWildcard) -> expected +{ + return compile(env, query, CompileOptions{.any = any, .autoWildcard = autoWildcard}); +} + +auto compile(Environment& env, std::string_view query, CompileOptions options) -> expected { auto tokens = tokenize(query); TRY_EXPECTED(tokens); @@ -850,8 +1165,13 @@ auto compile(Environment& env, std::string_view query, bool any, bool autoWildca auto root = p.parse(); TRY_EXPECTED(root); + if (options.autoWildcard && options.rootSchema != NoSchemaId) { + root = rewriteAutoWildcardBySchema(env, std::move(*root), options.rootSchema); + TRY_EXPECTED(root); + } + /* Expand a single value to `** == ` */ - if (autoWildcard && *root && (*root)->constant()) { + if (options.autoWildcard && options.rootSchema == NoSchemaId && *root && (*root)->constant()) { root = simplifyOrForward(p.env, std::make_unique>( std::make_unique(), std::move(*root))); } @@ -859,7 +1179,7 @@ auto compile(Environment& env, std::string_view query, bool any, bool autoWildca if (!*root) return unexpected(Error::ParserError, "Expression is null"); - if (any) { + if (options.any) { std::vector args; args.emplace_back(std::move(*root)); return simplifyOrForward(p.env, std::make_unique(std::move(args))); @@ -960,6 +1280,20 @@ auto complete(Environment& env, std::string_view query, size_t point, const Mode return candidates; } +auto referencedSchemaPaths(Environment& env, const AST& ast, SchemaId rootSchema) -> expected +{ + ReferencedSchemaPaths result; + if (rootSchema == NoSchemaId) { + result.hasUnresolvedAccess = true; + return result; + } + + (void) env.querySchema(rootSchema); + auto collected = collectReferencedSchemaPaths(env, ast.expr(), rootSchema, result); + TRY_EXPECTED(collected); + return result; +} + auto eval(Environment& env, const AST& ast, const ModelNode& node, Diagnostics* diag) -> expected, Error> { if (!node.model_) diff --git a/test/schema.cpp b/test/schema.cpp index 19b9e51..e4b2ce6 100644 --- a/test/schema.cpp +++ b/test/schema.cpp @@ -289,6 +289,141 @@ TEST_CASE("Array schema serialization", "[model.schema]") { REQUIRE((*recoveredRoot)->schema() == SchemaId{7}); } +TEST_CASE("Schema auto-wildcard rewrites enum symbols to exact paths", "[model.schema]") +{ + auto model = json::parse(R"json( + { + "status": "Other", + "items": [ + {"kind": "Other"} + ], + "unrelated": { + "value": "Carrier" + }, + "CARRIER": 7 + } + )json").value(); + + auto registry = SchemaRegistry{}; + auto strings = model->strings(); + auto status = strings->get("status"); + auto items = strings->get("items"); + auto kind = strings->get("kind"); + auto carrierField = strings->get("CARRIER"); + auto carrierEnum = strings->get("Carrier"); + + auto rootSchema = std::make_unique(); + rootSchema->addField(status, {SchemaId{2}}); + rootSchema->addField(items, {SchemaId{3}}); + rootSchema->addField(carrierField); + + auto enumSchema = std::make_unique(); + enumSchema->addEnumSymbol(carrierEnum); + + auto arraySchema = std::make_unique(); + arraySchema->addElementSchemas({SchemaId{4}}); + + auto itemSchema = std::make_unique(); + itemSchema->addField(kind, {SchemaId{2}}); + + registry.schemas[SchemaId{1}] = std::move(rootSchema); + registry.schemas[SchemaId{2}] = std::move(enumSchema); + registry.schemas[SchemaId{3}] = std::move(arraySchema); + registry.schemas[SchemaId{4}] = std::move(itemSchema); + registry.finalize(); + + auto root = model->root(0); + REQUIRE(root); + auto rootObj = model->resolve(**root); + REQUIRE(rootObj); + REQUIRE(rootObj->setSchema(SchemaId{1})); + + Environment env(strings); + env.querySchemaCallback = registry.asFunction(); + + auto enumAst = compile(env, "Carrier", CompileOptions{ + .any = false, + .autoWildcard = true, + .rootSchema = SchemaId{1}}); + REQUIRE(enumAst); + INFO((*enumAst)->expr().toString()); + REQUIRE((*enumAst)->expr().toString().find("**") == std::string::npos); + REQUIRE((*enumAst)->expr().toString().find("status") != std::string::npos); + REQUIRE((*enumAst)->expr().toString().find("kind") != std::string::npos); + + auto enumResult = eval(env, **enumAst, **root, nullptr); + REQUIRE(enumResult); + REQUIRE(enumResult->size() == 1); + REQUIRE(enumResult->front().isa(ValueType::Bool)); + REQUIRE_FALSE(enumResult->front().as()); + + auto fieldAst = compile(env, "CARRIER", CompileOptions{ + .any = false, + .autoWildcard = true, + .rootSchema = SchemaId{1}}); + REQUIRE(fieldAst); + REQUIRE((*fieldAst)->expr().toString() == "**.CARRIER"); + + auto enumRefs = referencedSchemaPaths(env, **enumAst, SchemaId{1}); + REQUIRE(enumRefs); + REQUIRE_FALSE(enumRefs->hasBroadWildcardAccess); + REQUIRE_FALSE(enumRefs->hasDynamicAccess); + REQUIRE(enumRefs->paths.size() == 2); + REQUIRE(std::ranges::all_of(enumRefs->paths, [](auto const& ref) { + return ref.location == SourceLocation{0, 7}; + })); + REQUIRE(std::ranges::any_of(enumRefs->paths, [&](auto const& ref) { + return ref.path.size() == 1 && ref.path[0].field == status && !ref.viaWildcard; + })); + REQUIRE(std::ranges::any_of(enumRefs->paths, [&](auto const& ref) { + return ref.path.size() == 3 + && ref.path[0].field == items + && ref.path[1].kind == SchemaPathSegment::Kind::ArrayElement + && ref.path[2].field == kind + && !ref.viaWildcard; + })); + + auto fieldRefs = referencedSchemaPaths(env, **fieldAst, SchemaId{1}); + REQUIRE(fieldRefs); + REQUIRE(fieldRefs->paths.size() == 1); + REQUIRE(fieldRefs->paths.front().viaWildcard); + REQUIRE(fieldRefs->paths.front().location == SourceLocation{0, 7}); + REQUIRE(fieldRefs->paths.front().path.size() == 1); + REQUIRE(fieldRefs->paths.front().path.front().field == carrierField); + + auto unresolvedAst = compile(env, "unrelated.value", CompileOptions{ + .any = false, + .autoWildcard = false, + .rootSchema = SchemaId{1}}); + REQUIRE(unresolvedAst); + auto unresolvedRefs = referencedSchemaPaths(env, **unresolvedAst, SchemaId{1}); + REQUIRE(unresolvedRefs); + REQUIRE(unresolvedRefs->paths.empty()); + REQUIRE(unresolvedRefs->hasUnresolvedAccess); + + REQUIRE(strings->get("absent") == StringPool::Empty); + auto absentAst = compile(env, "absent", CompileOptions{ + .any = false, + .autoWildcard = false, + .rootSchema = SchemaId{1}}); + REQUIRE(absentAst); + auto absentRefs = referencedSchemaPaths(env, **absentAst, SchemaId{1}); + REQUIRE(absentRefs); + REQUIRE(absentRefs->paths.empty()); + REQUIRE(absentRefs->hasUnresolvedAccess); + REQUIRE(strings->get("absent") == StringPool::Empty); + + auto childWildcardAst = compile(env, "*.CARRIER", CompileOptions{ + .any = false, + .autoWildcard = false, + .rootSchema = SchemaId{1}}); + REQUIRE(childWildcardAst); + auto childWildcardRefs = referencedSchemaPaths(env, **childWildcardAst, SchemaId{1}); + REQUIRE(childWildcardRefs); + REQUIRE(childWildcardRefs->paths.empty()); + REQUIRE(childWildcardRefs->hasDynamicAccess); +} + // A minimal test that makes sure a field not in the schema // is pruned if we query for it via **.field. TEST_CASE("WildcardFieldExpr Field Pruning", "[model.schema]")