Skip to content

Commit 89e4b83

Browse files
mferretticlaude
andcommitted
feat(formats): add CbeffSerializer — CBEFF-like JSON envelope format
Wraps any generated record in a CBEFF-inspired JSON envelope with cbeff_version, format_owner, format_type, creation_date, and payload fields. subject_id is promoted to the envelope level when present in the record. - CbeffSerializer: stateless, thread-safe, configurable owner/type - CLI: --format cbeff wired; reads cbeff_format_owner / cbeff_format_type from job conf if present, falls back to ISO/IEC-JTC1-SC37 defaults - 10 unit tests (CbeffSerializerTest), all passing - Directly enables biometric pipeline testing against the fingerprint and face YAML structures added in the previous commit Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 7044098 commit 89e4b83

File tree

6 files changed

+300
-5
lines changed

6 files changed

+300
-5
lines changed

cli/src/main/java/com/datagenerator/cli/ExecuteCommand.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import com.datagenerator.destinations.kafka.KafkaDestination;
3434
import com.datagenerator.destinations.kafka.KafkaDestinationConfig;
3535
import com.datagenerator.formats.FormatSerializer;
36+
import com.datagenerator.formats.cbeff.CbeffSerializer;
3637
import com.datagenerator.formats.csv.CsvSerializer;
3738
import com.datagenerator.formats.json.JsonSerializer;
3839
import com.datagenerator.formats.protobuf.ProtobufSerializer;
@@ -406,7 +407,7 @@ public Integer call() throws Exception {
406407
dataStructure.getData().size());
407408

408409
// 4. Create format serializer
409-
FormatSerializer serializer = createSerializer(format);
410+
FormatSerializer serializer = createSerializer(format, jobConfig);
410411
log.info("Created serializer: {}", serializer.getFormatName());
411412

412413
// 5. Create destination adapter
@@ -509,15 +510,28 @@ private long resolveSeed(JobConfig jobConfig) {
509510
/**
510511
* Creates a format serializer based on the specified format string.
511512
*
512-
* @param format format name ("json" or "csv", case-insensitive)
513+
* @param format format name ("json", "csv", "protobuf", "cbeff", case-insensitive)
514+
* @param jobConfig job configuration (used for cbeff-specific conf values)
513515
* @return serializer instance for the specified format
514516
* @throws IllegalArgumentException if format is unsupported
515517
*/
516-
private FormatSerializer createSerializer(String format) {
518+
private FormatSerializer createSerializer(String format, JobConfig jobConfig) {
517519
return switch (format.toLowerCase(Locale.ROOT)) {
518520
case "json" -> new JsonSerializer();
519521
case "csv" -> new CsvSerializer();
520522
case "protobuf" -> new ProtobufSerializer();
523+
case "cbeff" -> {
524+
JsonNode conf = jobConfig.getConf();
525+
String owner =
526+
conf != null && conf.has("cbeff_format_owner")
527+
? conf.get("cbeff_format_owner").asText()
528+
: CbeffSerializer.DEFAULT_FORMAT_OWNER;
529+
String type =
530+
conf != null && conf.has("cbeff_format_type")
531+
? conf.get("cbeff_format_type").asText()
532+
: CbeffSerializer.DEFAULT_FORMAT_TYPE;
533+
yield new CbeffSerializer(owner, type);
534+
}
521535
default -> throw new IllegalArgumentException("Unsupported format: " + format);
522536
};
523537
}

docs/internal/tasks/README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,12 @@ Each task lists dependencies on other tasks. Always complete dependencies before
9898
- [TASK-036: Security - File Permission Checks](TASK-036-security-permissions.md) ⏸️
9999
- [TASK-044: Extras Directory — External JARs and Custom Datafaker Providers](TASK-044-extras-directory-plugin-loading.md)
100100

101-
### Phase 10: Future Enhancements (⏸️ Not Started)
101+
### Phase 10: Future Enhancements (🔄 In Progress)
102102
- [TASK-037: API - REST Interface](TASK-037-api-rest.md) ⏸️ (future enhancement)
103103
- [TASK-038: API - gRPC Interface](TASK-038-api-grpc.md) ⏸️ (future enhancement)
104+
- [TASK-047: Biometric YAML Structure Definitions](TASK-047-biometric-yaml-structures.md)
105+
- [TASK-048: CBEFF JSON Wrapper Serializer](TASK-048-cbeff-json-serializer.md)
106+
- [TASK-049: BiometricValidator and validate CLI command](TASK-049-biometric-validator.md) ⏸️
104107

105108
---
106109

docs/internal/tasks/TASK-048-cbeff-json-serializer.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# TASK-048: CBEFF JSON Wrapper Serializer
22

3-
**Status:** Deferred ⏸️ (Future Enhancement)
3+
**Status:** ✅ Complete
4+
**Completion Date:** March 15, 2026
45
**Priority:** P3
56
**Phase:** Phase 10 (Biometric Data Generation)
67
**Effort:** 3–4h
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/*
2+
* Copyright 2026 Marco Ferretti
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.datagenerator.formats.cbeff;
18+
19+
import com.datagenerator.formats.FormatSerializer;
20+
import com.datagenerator.formats.SerializationException;
21+
import com.fasterxml.jackson.core.JsonProcessingException;
22+
import com.fasterxml.jackson.databind.ObjectMapper;
23+
import com.fasterxml.jackson.databind.SerializationFeature;
24+
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
25+
import java.time.Instant;
26+
import java.time.format.DateTimeFormatter;
27+
import java.util.LinkedHashMap;
28+
import java.util.Map;
29+
import lombok.extern.slf4j.Slf4j;
30+
31+
/**
32+
* Serializes generated records to a CBEFF-like JSON envelope format.
33+
*
34+
* <p>Wraps any generated record in a Common Biometric Exchange Formats Framework (CBEFF) inspired
35+
* JSON envelope, enabling testing of biometric exchange pipelines that expect a {@code
36+
* format_owner} / {@code format_type} metadata wrapper around the payload.
37+
*
38+
* <p><b>Envelope structure:</b>
39+
*
40+
* <pre>{@code
41+
* {
42+
* "cbeff_version": "1.1",
43+
* "format_owner": "ISO/IEC-JTC1-SC37",
44+
* "format_type": "biometric-json",
45+
* "creation_date": "2026-03-15T10:00:00Z",
46+
* "subject_id": "<promoted from payload if present>",
47+
* "payload": { ...original record... }
48+
* }
49+
* }</pre>
50+
*
51+
* <p><b>Thread Safety:</b> Stateless — {@code Instant.now()} is called per record. ObjectMapper is
52+
* thread-safe after configuration.
53+
*/
54+
@Slf4j
55+
public class CbeffSerializer implements FormatSerializer {
56+
57+
public static final String CBEFF_VERSION = "1.1";
58+
public static final String DEFAULT_FORMAT_OWNER = "ISO/IEC-JTC1-SC37";
59+
public static final String DEFAULT_FORMAT_TYPE = "biometric-json";
60+
61+
private final String formatOwner;
62+
private final String formatType;
63+
private final ObjectMapper mapper;
64+
65+
/** Create CBEFF serializer with default format owner and type. */
66+
public CbeffSerializer() {
67+
this(DEFAULT_FORMAT_OWNER, DEFAULT_FORMAT_TYPE);
68+
}
69+
70+
/**
71+
* Create CBEFF serializer with configurable format owner and type.
72+
*
73+
* @param formatOwner CBEFF format owner identifier (e.g. "ISO/IEC-JTC1-SC37")
74+
* @param formatType CBEFF format type identifier (e.g. "19794-2-json")
75+
*/
76+
public CbeffSerializer(String formatOwner, String formatType) {
77+
this.formatOwner = formatOwner;
78+
this.formatType = formatType;
79+
this.mapper = createObjectMapper();
80+
}
81+
82+
private static ObjectMapper createObjectMapper() {
83+
ObjectMapper m = new ObjectMapper();
84+
m.registerModule(new JavaTimeModule());
85+
m.disable(SerializationFeature.INDENT_OUTPUT);
86+
m.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS);
87+
return m;
88+
}
89+
90+
@Override
91+
public String serialize(Map<String, Object> record) {
92+
Map<String, Object> envelope = new LinkedHashMap<>();
93+
envelope.put("cbeff_version", CBEFF_VERSION);
94+
envelope.put("format_owner", formatOwner);
95+
envelope.put("format_type", formatType);
96+
envelope.put("creation_date", DateTimeFormatter.ISO_INSTANT.format(Instant.now()));
97+
98+
Object subjectId = record.get("subject_id");
99+
if (subjectId != null) {
100+
envelope.put("subject_id", subjectId);
101+
}
102+
103+
envelope.put("payload", record);
104+
105+
try {
106+
return mapper.writeValueAsString(envelope);
107+
} catch (JsonProcessingException e) {
108+
log.error("Failed to serialize record to CBEFF JSON: {}", record, e);
109+
throw new SerializationException("CBEFF serialization failed", e);
110+
}
111+
}
112+
113+
@Override
114+
public String getFormatName() {
115+
return "cbeff";
116+
}
117+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Copyright 2026 Marco Ferretti
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
/**
18+
* CBEFF-like JSON envelope serialization for biometric records.
19+
*
20+
* <p>Wraps generated records in a Common Biometric Exchange Formats Framework inspired JSON
21+
* envelope with configurable {@code format_owner} and {@code format_type} metadata.
22+
*/
23+
package com.datagenerator.formats.cbeff;
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
* Copyright 2026 Marco Ferretti
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.datagenerator.formats.cbeff;
18+
19+
import static org.assertj.core.api.Assertions.*;
20+
21+
import com.fasterxml.jackson.databind.JsonNode;
22+
import com.fasterxml.jackson.databind.ObjectMapper;
23+
import java.util.LinkedHashMap;
24+
import java.util.Map;
25+
import org.junit.jupiter.api.BeforeEach;
26+
import org.junit.jupiter.api.Test;
27+
28+
class CbeffSerializerTest {
29+
30+
private CbeffSerializer serializer;
31+
private ObjectMapper mapper;
32+
33+
@BeforeEach
34+
void setUp() {
35+
serializer = new CbeffSerializer();
36+
mapper = new ObjectMapper();
37+
}
38+
39+
@Test
40+
void shouldReturnFormatName() {
41+
assertThat(serializer.getFormatName()).isEqualTo("cbeff");
42+
}
43+
44+
@Test
45+
void shouldIncludeRequiredEnvelopeFields() throws Exception {
46+
Map<String, Object> record = Map.of("name", "Alice");
47+
JsonNode envelope = mapper.readTree(serializer.serialize(record));
48+
49+
assertThat(envelope.has("cbeff_version")).isTrue();
50+
assertThat(envelope.has("format_owner")).isTrue();
51+
assertThat(envelope.has("format_type")).isTrue();
52+
assertThat(envelope.has("creation_date")).isTrue();
53+
assertThat(envelope.has("payload")).isTrue();
54+
}
55+
56+
@Test
57+
void shouldUseDefaultFormatOwnerAndType() throws Exception {
58+
Map<String, Object> record = Map.of("field", "value");
59+
JsonNode envelope = mapper.readTree(serializer.serialize(record));
60+
61+
assertThat(envelope.get("format_owner").asText())
62+
.isEqualTo(CbeffSerializer.DEFAULT_FORMAT_OWNER);
63+
assertThat(envelope.get("format_type").asText()).isEqualTo(CbeffSerializer.DEFAULT_FORMAT_TYPE);
64+
assertThat(envelope.get("cbeff_version").asText()).isEqualTo(CbeffSerializer.CBEFF_VERSION);
65+
}
66+
67+
@Test
68+
void shouldApplyCustomFormatOwnerAndType() throws Exception {
69+
CbeffSerializer custom = new CbeffSerializer("ACME-Corp", "19794-2-json");
70+
Map<String, Object> record = Map.of("field", "value");
71+
JsonNode envelope = mapper.readTree(custom.serialize(record));
72+
73+
assertThat(envelope.get("format_owner").asText()).isEqualTo("ACME-Corp");
74+
assertThat(envelope.get("format_type").asText()).isEqualTo("19794-2-json");
75+
}
76+
77+
@Test
78+
void shouldContainOriginalRecordUnderPayload() throws Exception {
79+
Map<String, Object> record = Map.of("finger_position", "right_index", "quality", 85);
80+
JsonNode envelope = mapper.readTree(serializer.serialize(record));
81+
82+
JsonNode payload = envelope.get("payload");
83+
assertThat(payload.get("finger_position").asText()).isEqualTo("right_index");
84+
assertThat(payload.get("quality").asInt()).isEqualTo(85);
85+
}
86+
87+
@Test
88+
void shouldPromoteSubjectIdToEnvelope() throws Exception {
89+
Map<String, Object> record = new LinkedHashMap<>();
90+
record.put("subject_id", "SUBJ-001");
91+
record.put("quality", 90);
92+
JsonNode envelope = mapper.readTree(serializer.serialize(record));
93+
94+
assertThat(envelope.get("subject_id").asText()).isEqualTo("SUBJ-001");
95+
// subject_id also remains in payload
96+
assertThat(envelope.get("payload").get("subject_id").asText()).isEqualTo("SUBJ-001");
97+
}
98+
99+
@Test
100+
void shouldNotIncludeSubjectIdInEnvelopeWhenAbsentFromRecord() throws Exception {
101+
Map<String, Object> record = Map.of("name", "Alice");
102+
JsonNode envelope = mapper.readTree(serializer.serialize(record));
103+
104+
assertThat(envelope.has("subject_id")).isFalse();
105+
}
106+
107+
@Test
108+
void shouldProduceValidJsonForEmptyRecord() throws Exception {
109+
Map<String, Object> record = Map.of();
110+
String output = serializer.serialize(record);
111+
112+
JsonNode envelope = mapper.readTree(output);
113+
assertThat(envelope.get("payload").isEmpty()).isTrue();
114+
}
115+
116+
@Test
117+
void shouldIncludeValidIso8601CreationDate() throws Exception {
118+
Map<String, Object> record = Map.of("field", "value");
119+
JsonNode envelope = mapper.readTree(serializer.serialize(record));
120+
121+
String creationDate = envelope.get("creation_date").asText();
122+
// ISO-8601 instant ends with Z
123+
assertThat(creationDate).endsWith("Z");
124+
assertThat(creationDate).matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?Z");
125+
}
126+
127+
@Test
128+
void shouldProduceValidJsonRoundTrip() throws Exception {
129+
Map<String, Object> record = Map.of("x", 100, "y", 200, "type", "ending");
130+
String output = serializer.serialize(record);
131+
132+
// Must parse without error
133+
JsonNode envelope = mapper.readTree(output);
134+
assertThat(envelope.isObject()).isTrue();
135+
assertThat(envelope.get("payload").get("x").asInt()).isEqualTo(100);
136+
}
137+
}

0 commit comments

Comments
 (0)