Skip to content

Commit 2562384

Browse files
authored
fix(TTML): Correctly handle multiple samples in a segment (#8088)
Fixes #8087 Implements handling of multiple samples in a MP4/ISOBMFF/DASH TTML segment/fragment. Such segments are allowed by ISO14496-12 and ISO23000-19. gpac creates such segments. The prior code just treated the full MDAT as one TTML XML document and tried to parse it in whole without accounting for sample(s). A testcase is included which was created by taking the testdata from ttml-segment.mp4 and splitting the subtitles into two independent TTML-XML documents, which then were put as individual samples. The testdata for the prior existing multiple MDAT testcase was invalid. It was created by taking the same ttml-segment.mp4 as a source and just duplicating the MDAT box, but without then also fixing the TRUN box. The duplicated data was thus not referenced. The test case still worked, because the prior code did not look at the TRUN box and the sample specification at all and just handled any full MDAT box = 1 sample. The testdata was replaced with a new file, which is basically the same as for the multiple samples case, but with the two samples split into two MDAT boxes.
1 parent a614786 commit 2562384

File tree

6 files changed

+110
-34
lines changed

6 files changed

+110
-34
lines changed

AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ Jesper Haug Karsrud <jesper.karsrud@gmail.com>
5656
Johan Sundström <oyasumi@gmail.com>
5757
Jonas Birmé <jonas.birme@eyevinn.se>
5858
Jozef Chúťka <jozefchutka@gmail.com>
59+
Juliane Holzt <juliane@box.fqdn.org>
5960
Jun Hong Chong <chongjunhong@gmail.com>
6061
Jürgen Kartnaller <kartnaller@lovelysystems.com>
6162
Justin Swaney <justin.mark.swaney@gmail.com>

CONTRIBUTORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ Jono Ward <jonoward@gmail.com>
8686
Jozef Chúťka <jozefchutka@gmail.com>
8787
Juan Manuel Tomás <juant@qualabs.com>
8888
Julian Domingo <juliandomingo@google.com>
89+
Juliane Holzt <juliane@box.fqdn.org>
8990
Jun Hong Chong <chongjunhong@gmail.com>
9091
Jürgen Kartnaller <kartnaller@lovelysystems.com>
9192
Justin Swaney <justin.mark.swaney@gmail.com>

lib/text/mp4_ttml_parser.js

Lines changed: 81 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66

77
goog.provide('shaka.text.Mp4TtmlParser');
88

9+
goog.require('goog.asserts');
910
goog.require('shaka.text.TextEngine');
1011
goog.require('shaka.text.TtmlTextParser');
1112
goog.require('shaka.util.BufferUtils');
1213
goog.require('shaka.util.Error');
14+
goog.require('shaka.util.Mp4BoxParsers');
1315
goog.require('shaka.util.Mp4Parser');
1416
goog.require('shaka.util.Uint8ArrayUtils');
1517

@@ -80,67 +82,117 @@ shaka.text.Mp4TtmlParser = class {
8082
parseMedia(data, time, uri) {
8183
const Mp4Parser = shaka.util.Mp4Parser;
8284

83-
let sawMDAT = false;
8485
let payload = [];
86+
let defaultSampleSize = null;
87+
88+
/** @type {!Array<Uint8Array>} */
89+
const mdats = [];
90+
91+
/* @type {!Map<number,!Array<number>>} */
92+
const subSampleSizesPerSample = new Map();
8593

8694
/** @type {!Array<number>} */
87-
let subSizes = [];
95+
const sampleSizes = [];
8896

8997
const parser = new Mp4Parser()
9098
.box('moof', Mp4Parser.children)
9199
.box('traf', Mp4Parser.children)
100+
.fullBox('tfhd', (box) => {
101+
goog.asserts.assert(
102+
box.flags != null,
103+
'A TFHD box should have a valid flags value');
104+
const parsedTFHDBox = shaka.util.Mp4BoxParsers.parseTFHD(
105+
box.reader, box.flags);
106+
defaultSampleSize = parsedTFHDBox.defaultSampleSize;
107+
})
108+
.fullBox('trun', (box) => {
109+
goog.asserts.assert(
110+
box.version != null,
111+
'A TRUN box should have a valid version value');
112+
goog.asserts.assert(
113+
box.flags != null,
114+
'A TRUN box should have a valid flags value');
115+
116+
const parsedTRUNBox = shaka.util.Mp4BoxParsers.parseTRUN(
117+
box.reader, box.version, box.flags);
118+
119+
for (const sample of parsedTRUNBox.sampleData) {
120+
const sampleSize =
121+
sample.sampleSize || defaultSampleSize || 0;
122+
sampleSizes.push(sampleSize);
123+
}
124+
})
92125
.fullBox('subs', (box) => {
93-
subSizes = [];
94126
const reader = box.reader;
95127
const entryCount = reader.readUint32();
128+
let currentSampleNum = -1;
96129
for (let i = 0; i < entryCount; i++) {
97-
reader.readUint32(); // sample_delta
130+
const sampleDelta = reader.readUint32();
131+
currentSampleNum += sampleDelta;
98132
const subsampleCount = reader.readUint16();
133+
const subsampleSizes = [];
99134
for (let j = 0; j < subsampleCount; j++) {
100135
if (box.version == 1) {
101-
subSizes.push(reader.readUint32());
136+
subsampleSizes.push(reader.readUint32());
102137
} else {
103-
subSizes.push(reader.readUint16());
138+
subsampleSizes.push(reader.readUint16());
104139
}
105140
reader.readUint8(); // priority
106141
reader.readUint8(); // discardable
107142
reader.readUint32(); // codec_specific_parameters
108143
}
144+
subSampleSizesPerSample.set(currentSampleNum, subsampleSizes);
109145
}
110146
})
111147
.box('mdat', Mp4Parser.allData((data) => {
112-
sawMDAT = true;
113-
// Join this to any previous payload, in case the mp4 has multiple
114-
// mdats.
115-
if (subSizes.length) {
116-
const contentData =
117-
shaka.util.BufferUtils.toUint8(data, 0, subSizes[0]);
118-
const images = [];
119-
let offset = subSizes[0];
120-
for (let i = 1; i < subSizes.length; i++) {
121-
const imageData =
122-
shaka.util.BufferUtils.toUint8(data, offset, subSizes[i]);
123-
const raw =
124-
shaka.util.Uint8ArrayUtils.toStandardBase64(imageData);
125-
images.push('data:image/png;base64,' + raw);
126-
offset += subSizes[i];
127-
}
128-
payload = payload.concat(
129-
this.parser_.parseMedia(contentData, time, uri, images));
130-
} else {
131-
payload = payload.concat(
132-
this.parser_.parseMedia(data, time, uri, /* images= */ []));
133-
}
148+
// We collect all of the mdats first, before parsing any of them.
149+
// This is necessary in case the mp4 has multiple mdats.
150+
mdats.push(data);
134151
}));
135152
parser.parse(data, /* partialOkay= */ false);
136153

137-
if (!sawMDAT) {
154+
if (mdats.length == 0) {
138155
throw new shaka.util.Error(
139156
shaka.util.Error.Severity.CRITICAL,
140157
shaka.util.Error.Category.TEXT,
141158
shaka.util.Error.Code.INVALID_MP4_TTML);
142159
}
143160

161+
const fullData =
162+
shaka.util.Uint8ArrayUtils.concat(...mdats);
163+
164+
let sampleOffset = 0;
165+
for (let sampleNum = 0; sampleNum < sampleSizes.length; sampleNum++) {
166+
const sampleData =
167+
shaka.util.BufferUtils.toUint8(fullData, sampleOffset,
168+
sampleSizes[sampleNum]);
169+
sampleOffset += sampleSizes[sampleNum];
170+
171+
const subSampleSizes = subSampleSizesPerSample.get(sampleNum);
172+
173+
if (subSampleSizes && subSampleSizes.length) {
174+
const contentData =
175+
shaka.util.BufferUtils.toUint8(sampleData, 0, subSampleSizes[0]);
176+
const images = [];
177+
let subOffset = subSampleSizes[0];
178+
for (let i = 1; i < subSampleSizes.length; i++) {
179+
const imageData =
180+
shaka.util.BufferUtils.toUint8(data, subOffset,
181+
subSampleSizes[i]);
182+
const raw =
183+
shaka.util.Uint8ArrayUtils.toStandardBase64(imageData);
184+
images.push('data:image/png;base64,' + raw);
185+
subOffset += subSampleSizes[i];
186+
}
187+
payload = payload.concat(
188+
this.parser_.parseMedia(contentData, time, uri, images));
189+
} else {
190+
payload = payload.concat(
191+
this.parser_.parseMedia(sampleData, time, uri,
192+
/* images= */ []));
193+
}
194+
}
195+
144196
return payload;
145197
}
146198
};
-1.16 KB
Binary file not shown.
2.97 KB
Binary file not shown.

test/text/mp4_ttml_parser_unit.js

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ describe('Mp4TtmlParser', () => {
99
const ttmlSegmentUri = '/base/test/test/assets/ttml-segment.mp4';
1010
const ttmlSegmentMultipleMDATUri =
1111
'/base/test/test/assets/ttml-segment-multiple-mdat.mp4';
12+
const ttmlSegmentMultipleSampleUri =
13+
'/base/test/test/assets/ttml-segment-multiple-sample.mp4';
1214
const imscImageInitSegmentUri =
1315
'/base/test/test/assets/imsc-image-init.cmft';
1416
const imscImageSegmentUri =
@@ -22,6 +24,8 @@ describe('Mp4TtmlParser', () => {
2224
/** @type {!Uint8Array} */
2325
let ttmlSegmentMultipleMDAT;
2426
/** @type {!Uint8Array} */
27+
let ttmlSegmentMultipleSample;
28+
/** @type {!Uint8Array} */
2529
let imscImageInitSegment;
2630
/** @type {!Uint8Array} */
2731
let imscImageSegment;
@@ -33,16 +37,18 @@ describe('Mp4TtmlParser', () => {
3337
shaka.test.Util.fetch(ttmlInitSegmentUri),
3438
shaka.test.Util.fetch(ttmlSegmentUri),
3539
shaka.test.Util.fetch(ttmlSegmentMultipleMDATUri),
40+
shaka.test.Util.fetch(ttmlSegmentMultipleSampleUri),
3641
shaka.test.Util.fetch(imscImageInitSegmentUri),
3742
shaka.test.Util.fetch(imscImageSegmentUri),
3843
shaka.test.Util.fetch(audioInitSegmentUri),
3944
]);
4045
ttmlInitSegment = shaka.util.BufferUtils.toUint8(responses[0]);
4146
ttmlSegment = shaka.util.BufferUtils.toUint8(responses[1]);
4247
ttmlSegmentMultipleMDAT = shaka.util.BufferUtils.toUint8(responses[2]);
43-
imscImageInitSegment = shaka.util.BufferUtils.toUint8(responses[3]);
44-
imscImageSegment = shaka.util.BufferUtils.toUint8(responses[4]);
45-
audioInitSegment = shaka.util.BufferUtils.toUint8(responses[5]);
48+
ttmlSegmentMultipleSample = shaka.util.BufferUtils.toUint8(responses[3]);
49+
imscImageInitSegment = shaka.util.BufferUtils.toUint8(responses[4]);
50+
imscImageSegment = shaka.util.BufferUtils.toUint8(responses[5]);
51+
audioInitSegment = shaka.util.BufferUtils.toUint8(responses[6]);
4652
});
4753

4854
it('parses init segment', () => {
@@ -62,8 +68,24 @@ describe('Mp4TtmlParser', () => {
6268
expect(ret[0].nestedCues.length).toBe(1);
6369
expect(ret[1].nestedCues.length).toBe(1);
6470
// Cues.
65-
expect(ret[0].nestedCues[0].nestedCues.length).toBe(10);
66-
expect(ret[1].nestedCues[0].nestedCues.length).toBe(10);
71+
expect(ret[0].nestedCues[0].nestedCues.length).toBe(5);
72+
expect(ret[1].nestedCues[0].nestedCues.length).toBe(5);
73+
});
74+
75+
it('handles media segments with multiple sample', () => {
76+
const parser = new shaka.text.Mp4TtmlParser();
77+
parser.parseInit(ttmlInitSegment);
78+
const time =
79+
{periodStart: 0, segmentStart: 0, segmentEnd: 60, vttOffset: 0};
80+
const ret = parser.parseMedia(ttmlSegmentMultipleSample, time, null);
81+
// Bodies.
82+
expect(ret.length).toBe(2);
83+
// Divs.
84+
expect(ret[0].nestedCues.length).toBe(1);
85+
expect(ret[1].nestedCues.length).toBe(1);
86+
// Cues.
87+
expect(ret[0].nestedCues[0].nestedCues.length).toBe(5);
88+
expect(ret[1].nestedCues[0].nestedCues.length).toBe(5);
6789
});
6890

6991
it('accounts for offset', () => {

0 commit comments

Comments
 (0)