diff --git a/cmake/deps.cmake b/cmake/deps.cmake index 96fa2d50..a69e25b3 100644 --- a/cmake/deps.cmake +++ b/cmake/deps.cmake @@ -35,7 +35,7 @@ if (NOT "${_mapget_simfil_source_dir}" STREQUAL "") "SIMFIL_SHARED OFF") else() CPMAddPackage( - URI "gh:Klebert-Engineering/simfil#schema-field-pruning@06c9fab" + URI "gh:Klebert-Engineering/simfil#issue-146-schema-enums@65482b8" OPTIONS "SIMFIL_WITH_MODEL_JSON ON" "SIMFIL_SHARED OFF") diff --git a/libs/model/include/mapget/model/schemaregistry.h b/libs/model/include/mapget/model/schemaregistry.h index 89bac3b8..cc22ea07 100644 --- a/libs/model/include/mapget/model/schemaregistry.h +++ b/libs/model/include/mapget/model/schemaregistry.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -55,6 +56,18 @@ class SchemaRegistry /** Return true if the schema can contain the field directly or through descendants. */ [[nodiscard]] bool canHaveField(simfil::SchemaId schemaId, std::string_view fieldName) const; + /** Return true if the schema can contain the enum-like string symbol through descendants. */ + [[nodiscard]] bool canHaveEnumSymbol(simfil::SchemaId schemaId, std::string_view symbolName) const; + + /** Return field names directly declared by this schema node. */ + [[nodiscard]] std::span directFields(simfil::SchemaId schemaId) const; + + /** Return field names reachable from this schema node. */ + [[nodiscard]] std::span nestedFields(simfil::SchemaId schemaId) const; + + /** Return enum-like string symbols reachable from this schema node. */ + [[nodiscard]] std::span nestedEnumSymbols(simfil::SchemaId schemaId) const; + /** Resolve the Feature object schema for a concrete mapget feature type. */ [[nodiscard]] simfil::SchemaId featureSchema(std::string_view featureType) const; diff --git a/libs/model/include/mapget/model/simfilutil.h b/libs/model/include/mapget/model/simfilutil.h index d083d2c3..9a95fc74 100644 --- a/libs/model/include/mapget/model/simfilutil.h +++ b/libs/model/include/mapget/model/simfilutil.h @@ -18,6 +18,12 @@ void installSchemaRegistry( std::shared_ptr registry, std::shared_ptr strings); +/** Attach a completion-only SchemaRegistry callback which materializes schema strings locally. */ +void installCompletionSchemaRegistry( + simfil::Environment& env, + std::shared_ptr registry, + std::shared_ptr strings); + template std::unique_ptr makeEnvironment(Args&& ...args) { diff --git a/libs/model/src/featurelayer.cpp b/libs/model/src/featurelayer.cpp index 2ac18e47..4ca2bdcf 100644 --- a/libs/model/src/featurelayer.cpp +++ b/libs/model/src/featurelayer.cpp @@ -277,6 +277,15 @@ struct TileFeatureLayer::Impl { return env; } + static std::unique_ptr makeSchemaAwareCompletionEnvironment( + std::shared_ptr stringPool, + std::shared_ptr schemaRegistry) + { + auto env = makeEnvironment(stringPool); + installCompletionSchemaRegistry(*env, std::move(schemaRegistry), std::move(stringPool)); + return env; + } + // (De-)Serialization template void readWrite(S& s) { @@ -1363,7 +1372,9 @@ TileFeatureLayer::collectQueryDiagnostics(std::string_view query, const simfil:: tl::expected, simfil::Error> TileFeatureLayer::complete(std::string_view query, int point, ModelNode const& node, simfil::CompletionOptions const& opts) { - return impl_->expressionCache_.completions(query, point, node, opts); + auto completionStrings = std::make_shared(*strings()); + auto completionEnv = Impl::makeSchemaAwareCompletionEnvironment(std::move(completionStrings), impl_->schemaRegistry_); + return simfil::complete(*completionEnv, query, point, node, opts); } void TileFeatureLayer::setIdPrefix(const KeyValueViewPairs& prefix) diff --git a/libs/model/src/schemaregistry.cpp b/libs/model/src/schemaregistry.cpp index 3f90e75f..ee7aa6f1 100644 --- a/libs/model/src/schemaregistry.cpp +++ b/libs/model/src/schemaregistry.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -108,6 +109,59 @@ bool isArraySchema(nlohmann::json const& schema) return schema.contains("items") || hasType(schema, "array"); } +/** Return whether this schema branch should be treated as a scalar value schema. */ +bool isValueSchema(nlohmann::json const& schema) +{ + return schema.contains("const") + || schema.contains("enum") + || hasType(schema, "null") + || hasType(schema, "boolean") + || hasType(schema, "integer") + || hasType(schema, "number") + || hasType(schema, "string"); +} + +/** Collect string-valued const/enum entries from a JSON Schema branch. */ +std::vector stringEnumSymbols(nlohmann::json const& schema) +{ + std::vector symbols; + if (auto constIt = schema.find("const"); constIt != schema.end() && constIt->is_string()) { + symbols.push_back(constIt->get()); + } + + if (auto enumIt = schema.find("enum"); enumIt != schema.end() && enumIt->is_array()) { + for (auto const& value : *enumIt) { + if (value.is_string()) { + symbols.push_back(value.get()); + } + } + } + + std::ranges::sort(symbols); + auto duplicates = std::ranges::unique(symbols); + symbols.erase(duplicates.begin(), duplicates.end()); + return symbols; +} + +/** Stable suffix for memoizing the same JSON branch under different schema kinds. */ +std::string_view kindMemoSuffix(std::optional kind) +{ + if (!kind) { + return "n"; + } + + switch (*kind) { + case Kind::Object: + return "o"; + case Kind::Array: + return "a"; + case Kind::Value: + return "v"; + } + + return "n"; +} + /** Return whether a oneOf object/array wrapper represents a mapget multimap view. */ bool isMapgetMultimap(nlohmann::json const& schema) { @@ -193,16 +247,18 @@ std::string attributeLayerMapKey(std::string_view featureType) struct SchemaRegistry::Impl { - /** Logical object/array schema independent of any StringPool numbering. */ + /** Logical object/array/value schema independent of any StringPool numbering. */ struct LogicalSchema { simfil::SchemaId id_ = simfil::NoSchemaId; Kind kind_ = Kind::Object; Entry entry_; std::vector directFields_; + std::vector directEnumSymbols_; std::map, std::less<>> childSchemas_; std::vector elementSchemas_; std::vector flatFields_; + std::vector flatEnumSymbols_; bool finalized_ = false; }; @@ -261,6 +317,22 @@ struct SchemaRegistry::Impl fields.emplace_back(fieldName); } + void addEnumSymbol(simfil::SchemaId parent, std::string_view symbolName) + { + if (!valid(parent)) { + return; + } + auto& symbols = schemas_[parent].directEnumSymbols_; + symbols.emplace_back(symbolName); + } + + void addEnumSymbols(simfil::SchemaId parent, std::span symbolNames) + { + for (auto const& symbolName : symbolNames) { + addEnumSymbol(parent, symbolName); + } + } + void addChild(simfil::SchemaId parent, std::string_view fieldName, simfil::SchemaId child) { if (!valid(parent) || !valid(child)) { @@ -297,12 +369,21 @@ struct SchemaRegistry::Impl } std::vector fields; - std::vector visited; - collectFields(id, visited, fields); + std::vector visitedFields; + collectFields(id, visitedFields, fields); std::ranges::sort(fields); auto duplicates = std::ranges::unique(fields); fields.erase(duplicates.begin(), duplicates.end()); schemas_[id].flatFields_ = std::move(fields); + + std::vector symbols; + std::vector visitedEnumSymbols; + collectEnumSymbols(id, visitedEnumSymbols, symbols); + std::ranges::sort(symbols); + auto symbolDuplicates = std::ranges::unique(symbols); + symbols.erase(symbolDuplicates.begin(), symbolDuplicates.end()); + schemas_[id].flatEnumSymbols_ = std::move(symbols); + schemas_[id].finalized_ = true; } @@ -328,6 +409,28 @@ struct SchemaRegistry::Impl } } + void collectEnumSymbols( + simfil::SchemaId id, + std::vector& visited, + std::vector& symbols) const + { + if (!valid(id) || std::ranges::find(visited, id) != visited.end()) { + return; + } + visited.push_back(id); + + auto const& schema = schemas_[id]; + symbols.insert(symbols.end(), schema.directEnumSymbols_.begin(), schema.directEnumSymbols_.end()); + for (auto const& [_, children] : schema.childSchemas_) { + for (auto child : children) { + collectEnumSymbols(child, visited, symbols); + } + } + for (auto child : schema.elementSchemas_) { + collectEnumSymbols(child, visited, symbols); + } + } + [[nodiscard]] bool canHaveField(simfil::SchemaId id, std::string_view fieldName) { if (!valid(id)) { @@ -338,6 +441,40 @@ struct SchemaRegistry::Impl return std::ranges::binary_search(fields, fieldName); } + [[nodiscard]] bool canHaveEnumSymbol(simfil::SchemaId id, std::string_view symbolName) + { + if (!valid(id)) { + return false; + } + finalize(id); + auto const& symbols = schemas_[id].flatEnumSymbols_; + return std::ranges::binary_search(symbols, symbolName); + } + + [[nodiscard]] std::span directFields(simfil::SchemaId id) const + { + if (!valid(id)) { + return {}; + } + return schemas_[id].directFields_; + } + + [[nodiscard]] std::span nestedFields(simfil::SchemaId id) const + { + if (!valid(id)) { + return {}; + } + return schemas_[id].flatFields_; + } + + [[nodiscard]] std::span nestedEnumSymbols(simfil::SchemaId id) const + { + if (!valid(id)) { + return {}; + } + return schemas_[id].flatEnumSymbols_; + } + [[nodiscard]] simfil::SchemaId childSchema( simfil::SchemaId parent, std::string_view fieldName, @@ -433,7 +570,7 @@ class RegistryBuilder } auto const key = annotatedKey(schema, pointer, context); - auto const memoKey = pointer + "|" + (preferredKind ? (*preferredKind == Kind::Object ? "o" : "a") : "n"); + auto const memoKey = pointer + "|" + std::string(kindMemoSuffix(preferredKind)); if (auto memoIt = memo_.find(memoKey); memoIt != memo_.end()) { registry_.registerKey(key, memoIt->second); return memoIt->second; @@ -448,6 +585,9 @@ class RegistryBuilder if (isArraySchema(schema) && (!preferredKind || *preferredKind == Kind::Array)) { return buildArray(schema, std::move(pointer), std::move(context), std::move(key), std::move(metaType), memoKey); } + if (isValueSchema(schema) && (!preferredKind || *preferredKind == Kind::Value)) { + return buildValue(schema, std::move(pointer), std::move(key), std::move(metaType), memoKey); + } return simfil::NoSchemaId; } @@ -487,6 +627,10 @@ class RegistryBuilder registerCombinedAliases(schema, pointer, context, selected); return selected; } + if (auto selected = choose(Kind::Value); selected != simfil::NoSchemaId) { + registerCombinedAliases(schema, pointer, context, selected); + return selected; + } return simfil::NoSchemaId; } @@ -580,6 +724,25 @@ class RegistryBuilder return id; } + simfil::SchemaId buildValue( + nlohmann::json const& schema, + std::string pointer, + std::string key, + std::string metaType, + std::string const& memoKey) + { + auto id = registry_.allocate( + Kind::Value, + std::move(key), + std::move(pointer), + std::move(metaType)); + memo_[memoKey] = id; + + auto symbols = stringEnumSymbols(schema); + registry_.addEnumSymbols(id, symbols); + return id; + } + SchemaRegistry::Impl& registry_; nlohmann::json const& root_; std::map memo_; @@ -592,12 +755,17 @@ class BoundSchema final : public simfil::Schema BoundSchema( std::shared_ptr registry, std::shared_ptr strings, - simfil::SchemaId id) + simfil::SchemaId id, + bool materializeSchemaStrings = false) : registry_(std::move(registry)), strings_(std::move(strings)), id_(id) { + if (materializeSchemaStrings) { + auto mutableStrings = std::const_pointer_cast(strings_); + materializeStringIds(mutableStrings); + } } - /** Return the object/array kind for the stable mapget SchemaId. */ + /** Return the object/array/value kind for the stable mapget SchemaId. */ auto kind() const -> Kind override { return registry_ ? registry_->kind(id_) : Kind::Object; @@ -613,13 +781,61 @@ class BoundSchema final : public simfil::Schema return !fieldName || registry_->canHaveField(id_, *fieldName); } - /** This adapter intentionally avoids materializing StringIds for schema-only field names. */ + /** Resolve enum-like string symbols through the datasource-owned pool and match by name. */ + auto canHaveEnumSymbol(simfil::StringId symbolId) const -> bool override + { + if (!registry_ || !strings_) { + return false; + } + auto symbolName = strings_->resolve(symbolId); + return symbolName && registry_->canHaveEnumSymbol(id_, *symbolName); + } + + /** Return nested schema fields when this adapter was built for completion. */ auto nestedFields() const& -> std::span override { - return {}; + return nestedFields_; + } + + /** Return completion-local ids for direct schema fields, if materialized. */ + auto directFields() const& -> std::span override + { + return directFields_; + } + + /** Return nested schema enum symbols when this adapter was built for completion. */ + auto nestedEnumSymbols() const& -> std::span override + { + return nestedEnumSymbols_; } private: + /** Insert schema-owned strings into the completion-local pool. */ + auto materializeStringIds(std::shared_ptr const& strings) -> void + { + if (!registry_ || !strings) { + return; + } + + materialize(registry_->directFields(id_), *strings, directFields_); + materialize(registry_->nestedFields(id_), *strings, nestedFields_); + materialize(registry_->nestedEnumSymbols(id_), *strings, nestedEnumSymbols_); + } + + /** Convert schema-owned strings into StringIds in the provided temporary pool. */ + static auto materialize( + std::span names, + simfil::StringPool& strings, + std::vector& ids) -> void + { + ids.reserve(names.size()); + for (auto const& name : names) { + if (auto id = strings.emplace(name)) { + ids.push_back(*id); + } + } + } + /** Runtime pruning calls canHaveField directly, so no recursive StringId cache is built here. */ auto collectNestedFields( const std::function&, @@ -631,10 +847,19 @@ class BoundSchema final : public simfil::Schema std::shared_ptr registry_; std::shared_ptr strings_; simfil::SchemaId id_ = simfil::NoSchemaId; + std::vector directFields_; + std::vector nestedFields_; + std::vector nestedEnumSymbols_; }; } // namespace +void installSchemaRegistryImpl( + simfil::Environment& env, + std::shared_ptr registry, + std::shared_ptr strings, + bool materializeSchemaStrings); + SchemaRegistry::SchemaRegistry(nlohmann::json const& schema) : impl_(std::make_shared()) { @@ -680,6 +905,26 @@ bool SchemaRegistry::canHaveField(simfil::SchemaId schemaId, std::string_view fi return impl_->canHaveField(schemaId, fieldName); } +bool SchemaRegistry::canHaveEnumSymbol(simfil::SchemaId schemaId, std::string_view symbolName) const +{ + return impl_->canHaveEnumSymbol(schemaId, symbolName); +} + +std::span SchemaRegistry::directFields(simfil::SchemaId schemaId) const +{ + return impl_->directFields(schemaId); +} + +std::span SchemaRegistry::nestedFields(simfil::SchemaId schemaId) const +{ + return impl_->nestedFields(schemaId); +} + +std::span SchemaRegistry::nestedEnumSymbols(simfil::SchemaId schemaId) const +{ + return impl_->nestedEnumSymbols(schemaId); +} + simfil::SchemaId SchemaRegistry::featureSchema(std::string_view featureType) const { return schemaId(featureKey(featureType)); @@ -707,17 +952,35 @@ void installSchemaRegistry( simfil::Environment& env, std::shared_ptr registry, std::shared_ptr strings) +{ + installSchemaRegistryImpl(env, std::move(registry), std::move(strings), false); +} + +void installCompletionSchemaRegistry( + simfil::Environment& env, + std::shared_ptr registry, + std::shared_ptr strings) +{ + installSchemaRegistryImpl(env, std::move(registry), std::move(strings), true); +} + +void installSchemaRegistryImpl( + simfil::Environment& env, + std::shared_ptr registry, + std::shared_ptr strings, + bool materializeSchemaStrings) { auto schemas = std::make_shared>>(); env.querySchemaCallback = [registry = std::move(registry), strings = std::move(strings), - schemas = std::move(schemas)](simfil::SchemaId schemaId) { + schemas = std::move(schemas), + materializeSchemaStrings](simfil::SchemaId schemaId) { if (!registry || schemaId == simfil::NoSchemaId) { return static_cast(nullptr); } auto [it, inserted] = schemas->try_emplace(schemaId); if (inserted) { - it->second = std::make_unique(registry, strings, schemaId); + it->second = std::make_unique(registry, strings, schemaId, materializeSchemaStrings); } return static_cast(it->second.get()); }; diff --git a/test/unit/test-info.cpp b/test/unit/test-info.cpp index 6d3ab36d..c1b333d1 100644 --- a/test/unit/test-info.cpp +++ b/test/unit/test-info.cpp @@ -1,7 +1,10 @@ #include +#include + #include "mapget/model/featurelayer.h" #include "mapget/model/info.h" +#include "mapget/model/simfilutil.h" #include "mapget/model/stream.h" #include "mapget/log.h" @@ -88,7 +91,7 @@ nlohmann::json schemaAnnotatedLayerInfoJson() "attributeTypeCode": "speed" }, "properties": { - "unit": {"type": "string"}, + "unit": {"type": "string", "enum": ["km/h", "mph"]}, "value": {"type": "number"} } }, @@ -103,6 +106,16 @@ nlohmann::json schemaAnnotatedLayerInfoJson() })"_json; } +bool hasCompletion( + std::vector const& completions, + std::string_view text, + simfil::CompletionCandidate::Type type) +{ + return std::ranges::any_of(completions, [&](auto const& candidate) { + return candidate.text == text && candidate.type == type; + }); +} + } // namespace TEST_CASE("InfoToJson", "[DataSourceInfo]") @@ -258,6 +271,35 @@ TEST_CASE("LayerInfo builds SchemaRegistry from x-mapget annotations", "[DataSou REQUIRE(registry->canHaveField(carrierSchema->id_, "properties")); REQUIRE(registry->canHaveField(carrierSchema->id_, "value")); REQUIRE_FALSE(registry->canHaveField(carrierSchema->id_, "notDeclaredBySchema")); + + auto const typeIdId = registry->childSchema( + carrierSchema->id_, + "typeId", + simfil::Schema::Kind::Value); + auto const unitId = registry->childSchema( + speedId, + "unit", + simfil::Schema::Kind::Value); + REQUIRE(typeIdId != simfil::NoSchemaId); + REQUIRE(unitId != simfil::NoSchemaId); + REQUIRE(registry->kind(typeIdId) == simfil::Schema::Kind::Value); + REQUIRE(registry->kind(unitId) == simfil::Schema::Kind::Value); + REQUIRE(registry->canHaveEnumSymbol(carrierSchema->id_, "Carrier")); + REQUIRE(registry->canHaveEnumSymbol(carrierSchema->id_, "km/h")); + REQUIRE(registry->canHaveEnumSymbol(unitId, "mph")); + REQUIRE_FALSE(registry->canHaveEnumSymbol(carrierSchema->id_, "notAnEnum")); + + auto strings = std::make_shared("SchemaRegistryEnums"); + auto carrierSymbol = strings->emplace("Carrier").value(); + auto speedUnitSymbol = strings->emplace("km/h").value(); + auto missingSymbol = strings->emplace("missing").value(); + simfil::Environment env(strings); + installSchemaRegistry(env, registry, strings); + auto schema = env.querySchema(carrierSchema->id_); + REQUIRE(schema != nullptr); + REQUIRE(schema->canHaveEnumSymbol(carrierSymbol)); + REQUIRE(schema->canHaveEnumSymbol(speedUnitSymbol)); + REQUIRE_FALSE(schema->canHaveEnumSymbol(missingSymbol)); } TEST_CASE("SchemaRegistry does not mutate datasource StringPool", "[DataSourceInfo]") @@ -283,6 +325,40 @@ TEST_CASE("SchemaRegistry does not mutate datasource StringPool", "[DataSourceIn REQUIRE(strings->size() == sizeBefore); } +TEST_CASE("TileFeatureLayer completes schema fields and enum symbols without mutating datasource strings", "[DataSourceInfo]") +{ + auto layerInfo = LayerInfo::fromJson(schemaAnnotatedLayerInfoJson()); + auto strings = std::make_shared("SchemaCompletionNode"); + auto tile = std::make_shared( + TileId::fromWgs84(42., 11., 13), + "SchemaCompletionNode", + "SchemaCompletionMap", + layerInfo, + strings); + + auto feature = tile->newFeature("Carrier", {{"carrierId", 7}}); + auto layer = feature->attributeLayers()->newLayer("limits"); + auto speed = layer->newAttribute("speed"); + + REQUIRE(strings->get("unit") == simfil::StringPool::Empty); + REQUIRE(strings->get("km/h") == simfil::StringPool::Empty); + + simfil::CompletionOptions opts; + opts.showWildcardHints = false; + + simfil::ModelNode::Ptr speedNode = speed; + auto fieldCompletions = tile->complete("u", 1, *speedNode, opts); + REQUIRE(fieldCompletions); + REQUIRE(hasCompletion(*fieldCompletions, "unit", simfil::CompletionCandidate::Type::FIELD)); + + auto enumCompletions = tile->complete("k", 1, *speedNode, opts); + REQUIRE(enumCompletions); + REQUIRE(hasCompletion(*enumCompletions, "\"km/h\"", simfil::CompletionCandidate::Type::CONSTANT)); + + REQUIRE(strings->get("unit") == simfil::StringPool::Empty); + REQUIRE(strings->get("km/h") == simfil::StringPool::Empty); +} + TEST_CASE("TileFeatureLayer exposes SchemaIds on feature-model nodes", "[DataSourceInfo]") { auto layerInfo = LayerInfo::fromJson(schemaAnnotatedLayerInfoJson());