Skip to content

Commit acbd372

Browse files
Luuk from Paraplu.cloudclaude
andcommitted
fix: Resolve xs:include within imported schemas
load_imported_schema() now processes pending xs:include and xs:redefine locations using the same iterative worklist pattern as parse_file_internal(). Previously, imported schemas that used xs:include internally (e.g., MathML's mathml3.xsd which includes mathml3-content.xsd, mathml3-presentation.xsd, and mathml3-common.xsd) would report 0 elements, because parse_schema_element() collected include locations but load_imported_schema() never processed them. Also adds an early return for xs:import with no namespace and no schemaLocation, which is a no-op declaration. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b63bad1 commit acbd372

File tree

2 files changed

+81
-41
lines changed

2 files changed

+81
-41
lines changed

src/validators/parsing.rs

Lines changed: 81 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,32 +1191,60 @@ fn load_imported_schema(
11911191
expected_namespace: Option<&str>,
11921192
catalog: Option<Arc<XmlCatalog>>,
11931193
) -> Result<XsdSchema> {
1194-
// Read the file
1195-
let content = std::fs::read_to_string(path).map_err(|e| {
1196-
Error::Resource(format!("Failed to read imported schema '{}': {}", path.display(), e))
1197-
})?;
1194+
// Track which files have been loaded to prevent circular includes
1195+
let mut loaded_files = std::collections::HashSet::new();
1196+
1197+
// Parse the root imported schema file
1198+
let mut imported_schema = parse_single_schema_file(path, catalog.clone())?;
1199+
1200+
// Mark root as loaded
1201+
if let Ok(canonical) = path.canonicalize() {
1202+
loaded_files.insert(canonical);
1203+
}
1204+
1205+
// Process pending includes iteratively (same worklist pattern as parse_file_internal).
1206+
// Without this, imported schemas that use xs:include (e.g., MathML's mathml3.xsd
1207+
// which includes mathml3-content.xsd, mathml3-presentation.xsd, etc.) would have
1208+
// 0 elements visible.
1209+
let mut pending_includes: Vec<String> = imported_schema.pending_include_locations.drain(..).collect();
1210+
pending_includes.extend(imported_schema.pending_redefine_locations.drain(..));
1211+
1212+
while let Some(include_location) = pending_includes.pop() {
1213+
let resolved_path = resolve_schema_location(
1214+
&include_location,
1215+
imported_schema.source.base_url.as_deref(),
1216+
imported_schema.source.catalog.as_ref().map(|c| c.as_ref()),
1217+
);
1218+
1219+
// Skip already-loaded files
1220+
if let Ok(canonical) = resolved_path.canonicalize() {
1221+
if loaded_files.contains(&canonical) {
1222+
continue;
1223+
}
1224+
loaded_files.insert(canonical);
1225+
}
11981226

1199-
// Parse as document
1200-
let doc = Document::from_string(&content)?;
1201-
let root = doc.root().ok_or_else(|| Error::Parse(ParseError::new("Empty imported document")))?;
1227+
// Parse the included file (this may discover more pending includes)
1228+
match parse_single_schema_file(&resolved_path, catalog.clone()) {
1229+
Ok(include_schema) => {
1230+
// Queue any nested includes from this file
1231+
for nested_loc in &include_schema.pending_include_locations {
1232+
pending_includes.push(nested_loc.clone());
1233+
}
1234+
for nested_loc in &include_schema.pending_redefine_locations {
1235+
pending_includes.push(nested_loc.clone());
1236+
}
12021237

1203-
// Verify this is a schema element
1204-
if root.local_name() != xsd_elements::SCHEMA {
1205-
return Err(Error::Parse(ParseError::new(format!(
1206-
"Expected xs:schema root element in imported schema, got {}",
1207-
root.local_name()
1208-
))));
1238+
// Merge globals into the root imported schema
1239+
imported_schema.maps.global_maps.merge(&include_schema.maps.global_maps);
1240+
}
1241+
Err(_) => {
1242+
// Skip includes that fail to load (missing optional files)
1243+
continue;
1244+
}
1245+
}
12091246
}
12101247

1211-
// Create a new schema for parsing
1212-
let mut imported_schema = XsdSchema::new();
1213-
imported_schema.source.url = Some(path.to_string_lossy().to_string());
1214-
imported_schema.source.base_url = path.parent().map(|p| p.to_string_lossy().to_string());
1215-
imported_schema.source.catalog = catalog;
1216-
1217-
// Parse the schema element
1218-
parse_schema_element(&mut imported_schema, root)?;
1219-
12201248
// Verify namespace matches if expected
12211249
if let Some(expected_ns) = expected_namespace {
12221250
let actual_ns = imported_schema.target_namespace.as_deref();
@@ -1229,12 +1257,42 @@ fn load_imported_schema(
12291257
}
12301258
}
12311259

1232-
// Build the imported schema
1260+
// Build the imported schema (after all includes are merged)
12331261
imported_schema.build()?;
12341262

12351263
Ok(imported_schema)
12361264
}
12371265

1266+
/// Parse a single schema file without processing includes or building.
1267+
/// Returns the schema with pending_include_locations populated but not processed.
1268+
fn parse_single_schema_file(
1269+
path: &Path,
1270+
catalog: Option<Arc<XmlCatalog>>,
1271+
) -> Result<XsdSchema> {
1272+
let content = std::fs::read_to_string(path).map_err(|e| {
1273+
Error::Resource(format!("Failed to read schema '{}': {}", path.display(), e))
1274+
})?;
1275+
1276+
let doc = Document::from_string(&content)?;
1277+
let root = doc.root().ok_or_else(|| Error::Parse(ParseError::new("Empty schema document")))?;
1278+
1279+
if root.local_name() != xsd_elements::SCHEMA {
1280+
return Err(Error::Parse(ParseError::new(format!(
1281+
"Expected xs:schema root element, got {}",
1282+
root.local_name()
1283+
))));
1284+
}
1285+
1286+
let mut schema = XsdSchema::new();
1287+
schema.source.url = Some(path.to_string_lossy().to_string());
1288+
schema.source.base_url = path.parent().map(|p| p.to_string_lossy().to_string());
1289+
schema.source.catalog = catalog;
1290+
1291+
parse_schema_element(&mut schema, root)?;
1292+
1293+
Ok(schema)
1294+
}
1295+
12381296
/// Parse an include declaration
12391297
///
12401298
/// This only collects the include location. The actual loading and merging

src/validators/schemas.rs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,31 +1510,13 @@ impl Validator for XsdSchema {
15101510
self.register_builtins()?;
15111511
}
15121512

1513-
// Resolve complex type derivations (extension/restriction)
1514-
// This merges base type content into derived types
15151513
self.resolve_complex_type_derivations();
1516-
1517-
// Resolve group references in complex types
1518-
// This resolves <xs:group ref="..."/> to their actual content
15191514
self.resolve_group_references();
1520-
1521-
// Resolve derivations for inline element types AFTER group resolution
1522-
// This ensures the base types have fully resolved content
15231515
self.resolve_inline_element_type_derivations();
1524-
1525-
// Resolve attribute group references
15261516
self.resolve_attribute_group_references();
1527-
1528-
// Resolve type references in global elements
15291517
self.resolve_element_types();
1530-
1531-
// Resolve element types in complex type content models (forward references)
15321518
self.resolve_element_particle_types();
1533-
1534-
// Resolve attribute types in complex types (forward references)
15351519
self.resolve_attribute_types();
1536-
1537-
// Refresh global element types with the fully resolved versions from global_maps.types
15381520
self.refresh_element_types();
15391521

15401522
// Validate redefinitions have proper self-references

0 commit comments

Comments
 (0)