Skip to content

Commit 19f0ada

Browse files
authored
Fix generate_unions_case for Rust case (#1677)
* Fix generate_unions_case for rust case * Add test
1 parent f72df51 commit 19f0ada

File tree

5 files changed

+193
-0
lines changed

5 files changed

+193
-0
lines changed

arrow/src/datatypes/datatype.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,52 @@ impl DataType {
499499
))
500500
}
501501
}
502+
Some(s) if s == "union" => {
503+
if let Some(Value::String(mode)) = map.get("mode") {
504+
let union_mode = if mode == "SPARSE" {
505+
UnionMode::Sparse
506+
} else if mode == "DENSE" {
507+
UnionMode::Dense
508+
} else {
509+
return Err(ArrowError::ParseError(format!(
510+
"Unknown union mode {:?} for union",
511+
mode
512+
)));
513+
};
514+
if let Some(type_ids) = map.get("typeIds") {
515+
let type_ids = type_ids
516+
.as_array()
517+
.unwrap()
518+
.iter()
519+
.map(|t| t.as_i64().unwrap())
520+
.collect::<Vec<_>>();
521+
522+
let default_fields = type_ids
523+
.iter()
524+
.map(|t| {
525+
Field::new("", DataType::Boolean, true).with_metadata(
526+
Some(
527+
[("type_id".to_string(), t.to_string())]
528+
.iter()
529+
.cloned()
530+
.collect(),
531+
),
532+
)
533+
})
534+
.collect::<Vec<_>>();
535+
536+
Ok(DataType::Union(default_fields, union_mode))
537+
} else {
538+
Err(ArrowError::ParseError(
539+
"Expecting a typeIds for union ".to_string(),
540+
))
541+
}
542+
} else {
543+
Err(ArrowError::ParseError(
544+
"Expecting a mode for union".to_string(),
545+
))
546+
}
547+
}
502548
Some(other) => Err(ArrowError::ParseError(format!(
503549
"invalid or unsupported type name: {} in {:?}",
504550
other, json

arrow/src/datatypes/field.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,30 @@ impl Field {
390390
}
391391
}
392392
}
393+
DataType::Union(fields, mode) => match map.get("children") {
394+
Some(Value::Array(values)) => {
395+
let mut union_fields: Vec<Field> =
396+
values.iter().map(Field::from).collect::<Result<_>>()?;
397+
fields.iter().zip(union_fields.iter_mut()).for_each(
398+
|(f, union_field)| {
399+
union_field.set_metadata(Some(
400+
f.metadata().unwrap().clone(),
401+
));
402+
},
403+
);
404+
DataType::Union(union_fields, mode)
405+
}
406+
Some(_) => {
407+
return Err(ArrowError::ParseError(
408+
"Field 'children' must be an array".to_string(),
409+
))
410+
}
411+
None => {
412+
return Err(ArrowError::ParseError(
413+
"Field missing 'children' attribute".to_string(),
414+
));
415+
}
416+
},
393417
_ => data_type,
394418
};
395419

arrow/src/datatypes/mod.rs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,70 @@ mod tests {
392392
assert_eq!(expected, dt);
393393
}
394394

395+
#[test]
396+
fn parse_union_from_json() {
397+
let json = r#"
398+
{
399+
"name": "my_union",
400+
"nullable": false,
401+
"type": {
402+
"name": "union",
403+
"mode": "SPARSE",
404+
"typeIds": [
405+
5,
406+
7
407+
]
408+
},
409+
"children": [
410+
{
411+
"name": "f1",
412+
"type": {
413+
"name": "int",
414+
"isSigned": true,
415+
"bitWidth": 32
416+
},
417+
"nullable": true,
418+
"children": []
419+
},
420+
{
421+
"name": "f2",
422+
"type": {
423+
"name": "utf8"
424+
},
425+
"nullable": true,
426+
"children": []
427+
}
428+
]
429+
}
430+
"#;
431+
let value: Value = serde_json::from_str(json).unwrap();
432+
let dt = Field::from(&value).unwrap();
433+
434+
let expected = Field::new(
435+
"my_union",
436+
DataType::Union(
437+
vec![
438+
Field::new("f1", DataType::Int32, true).with_metadata(Some(
439+
[("type_id".to_string(), "5".to_string())]
440+
.iter()
441+
.cloned()
442+
.collect(),
443+
)),
444+
Field::new("f2", DataType::Utf8, true).with_metadata(Some(
445+
[("type_id".to_string(), "7".to_string())]
446+
.iter()
447+
.cloned()
448+
.collect(),
449+
)),
450+
],
451+
UnionMode::Sparse,
452+
),
453+
false,
454+
);
455+
456+
assert_eq!(expected, dt);
457+
}
458+
395459
#[test]
396460
fn parse_utf8_from_json() {
397461
let json = "{\"name\":\"utf8\"}";

arrow/src/util/integration_util.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ pub struct ArrowJsonColumn {
132132
pub data: Option<Vec<Value>>,
133133
#[serde(rename = "OFFSET")]
134134
pub offset: Option<Vec<Value>>, // leaving as Value as 64-bit offsets are strings
135+
#[serde(rename = "TYPE_ID")]
136+
pub type_id: Option<Vec<i8>>,
135137
pub children: Option<Vec<ArrowJsonColumn>>,
136138
}
137139

@@ -472,6 +474,7 @@ impl ArrowJsonBatch {
472474
validity: Some(validity),
473475
data: Some(data),
474476
offset: None,
477+
type_id: None,
475478
children: None,
476479
}
477480
}
@@ -481,6 +484,7 @@ impl ArrowJsonBatch {
481484
validity: None,
482485
data: None,
483486
offset: None,
487+
type_id: None,
484488
children: None,
485489
},
486490
};

integration-testing/src/lib.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,61 @@ fn array_from_json(
632632
let array = MapArray::from(array_data);
633633
Ok(Arc::new(array))
634634
}
635+
DataType::Union(fields, _) => {
636+
let field_type_ids = fields
637+
.iter()
638+
.enumerate()
639+
.into_iter()
640+
.map(|(idx, f)| {
641+
(
642+
f.metadata()
643+
.and_then(|m| m.get("type_id"))
644+
.unwrap()
645+
.parse::<i8>()
646+
.unwrap(),
647+
idx,
648+
)
649+
})
650+
.collect::<HashMap<_, _>>();
651+
652+
let type_ids = if let Some(type_id) = json_col.type_id {
653+
type_id
654+
.iter()
655+
.map(|t| {
656+
if field_type_ids.contains_key(t) {
657+
Ok(*(field_type_ids.get(t).unwrap()) as i8)
658+
} else {
659+
Err(ArrowError::JsonError(format!(
660+
"Unable to find type id {:?}",
661+
t
662+
)))
663+
}
664+
})
665+
.collect::<Result<_>>()?
666+
} else {
667+
vec![]
668+
};
669+
670+
let offset: Option<Buffer> = json_col.offset.map(|offsets| {
671+
let offsets: Vec<i32> =
672+
offsets.iter().map(|v| v.as_i64().unwrap() as i32).collect();
673+
Buffer::from(&offsets.to_byte_slice())
674+
});
675+
676+
let mut children: Vec<(Field, Arc<dyn Array>)> = vec![];
677+
for (field, col) in fields.iter().zip(json_col.children.unwrap()) {
678+
let array = array_from_json(field, col, dictionaries)?;
679+
children.push((field.clone(), array));
680+
}
681+
682+
let array = UnionArray::try_new(
683+
Buffer::from(&type_ids.to_byte_slice()),
684+
offset,
685+
children,
686+
)
687+
.unwrap();
688+
Ok(Arc::new(array))
689+
}
635690
t => Err(ArrowError::JsonError(format!(
636691
"data type {:?} not supported",
637692
t

0 commit comments

Comments
 (0)