Skip to content

Commit a4e0f1e

Browse files
julijanejoeyparrish
authored andcommitted
fix(TTML): Correctly handle multiple samples in a segment (#8088)
Fixes #8087 Implements handling of multiple samples in a MP4/ISOBMFF/DASH TTML segment/fragment. Such segments are allowed by ISO14496-12 and ISO23000-19. gpac creates such segments. The prior code just treated the full MDAT as one TTML XML document and tried to parse it in whole without accounting for sample(s). A testcase is included which was created by taking the testdata from ttml-segment.mp4 and splitting the subtitles into two independent TTML-XML documents, which then were put as individual samples. The testdata for the prior existing multiple MDAT testcase was invalid. It was created by taking the same ttml-segment.mp4 as a source and just duplicating the MDAT box, but without then also fixing the TRUN box. The duplicated data was thus not referenced. The test case still worked, because the prior code did not look at the TRUN box and the sample specification at all and just handled any full MDAT box = 1 sample. The testdata was replaced with a new file, which is basically the same as for the multiple samples case, but with the two samples split into two MDAT boxes. Backported to v4.9.x
1 parent bb23fad commit a4e0f1e

File tree

7 files changed

+122
-10
lines changed

7 files changed

+122
-10
lines changed

AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ Jesper Haug Karsrud <jesper.karsrud@gmail.com>
5656
Johan Sundström <oyasumi@gmail.com>
5757
Jonas Birmé <jonas.birme@eyevinn.se>
5858
Jozef Chúťka <jozefchutka@gmail.com>
59+
Juliane Holzt <juliane@box.fqdn.org>
5960
Jun Hong Chong <chongjunhong@gmail.com>
6061
Jürgen Kartnaller <kartnaller@lovelysystems.com>
6162
Justin Swaney <justin.mark.swaney@gmail.com>

CONTRIBUTORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ Jonas Birmé <jonas.birme@eyevinn.se>
8484
Jono Ward <jonoward@gmail.com>
8585
Jozef Chúťka <jozefchutka@gmail.com>
8686
Julian Domingo <juliandomingo@google.com>
87+
Juliane Holzt <juliane@box.fqdn.org>
8788
Jun Hong Chong <chongjunhong@gmail.com>
8889
Jürgen Kartnaller <kartnaller@lovelysystems.com>
8990
Justin Swaney <justin.mark.swaney@gmail.com>

lib/text/mp4_ttml_parser.js

Lines changed: 94 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@
66

77
goog.provide('shaka.text.Mp4TtmlParser');
88

9+
goog.require('goog.asserts');
910
goog.require('shaka.text.TextEngine');
1011
goog.require('shaka.text.TtmlTextParser');
12+
goog.require('shaka.util.BufferUtils');
1113
goog.require('shaka.util.Error');
14+
goog.require('shaka.util.Mp4BoxParsers');
1215
goog.require('shaka.util.Mp4Parser');
16+
goog.require('shaka.util.Uint8ArrayUtils');
1317

1418

1519
/**
@@ -78,25 +82,109 @@ shaka.text.Mp4TtmlParser = class {
7882
parseMedia(data, time, uri) {
7983
const Mp4Parser = shaka.util.Mp4Parser;
8084

81-
let sawMDAT = false;
8285
let payload = [];
86+
let defaultSampleSize = null;
87+
88+
/** @type {!Array<Uint8Array>} */
89+
const mdats = [];
90+
91+
/* @type {!Map<number,!Array<number>>} */
92+
const subSampleSizesPerSample = new Map();
93+
94+
/** @type {!Array<number>} */
95+
const sampleSizes = [];
8396

8497
const parser = new Mp4Parser()
98+
.box('moof', Mp4Parser.children)
99+
.box('traf', Mp4Parser.children)
100+
.fullBox('tfhd', (box) => {
101+
goog.asserts.assert(
102+
box.flags != null,
103+
'A TFHD box should have a valid flags value');
104+
const parsedTFHDBox = shaka.util.Mp4BoxParsers.parseTFHD(
105+
box.reader, box.flags);
106+
defaultSampleSize = parsedTFHDBox.defaultSampleSize;
107+
})
108+
.fullBox('trun', (box) => {
109+
goog.asserts.assert(
110+
box.version != null,
111+
'A TRUN box should have a valid version value');
112+
goog.asserts.assert(
113+
box.flags != null,
114+
'A TRUN box should have a valid flags value');
115+
116+
const parsedTRUNBox = shaka.util.Mp4BoxParsers.parseTRUN(
117+
box.reader, box.version, box.flags);
118+
119+
for (const sample of parsedTRUNBox.sampleData) {
120+
const sampleSize =
121+
sample.sampleSize || defaultSampleSize || 0;
122+
sampleSizes.push(sampleSize);
123+
}
124+
})
125+
.fullBox('subs', (box) => {
126+
const reader = box.reader;
127+
const entryCount = reader.readUint32();
128+
let currentSampleNum = -1;
129+
for (let i = 0; i < entryCount; i++) {
130+
const sampleDelta = reader.readUint32();
131+
currentSampleNum += sampleDelta;
132+
const subsampleCount = reader.readUint16();
133+
const subsampleSizes = [];
134+
for (let j = 0; j < subsampleCount; j++) {
135+
if (box.version == 1) {
136+
subsampleSizes.push(reader.readUint32());
137+
} else {
138+
subsampleSizes.push(reader.readUint16());
139+
}
140+
reader.readUint8(); // priority
141+
reader.readUint8(); // discardable
142+
reader.readUint32(); // codec_specific_parameters
143+
}
144+
subSampleSizesPerSample.set(currentSampleNum, subsampleSizes);
145+
}
146+
})
85147
.box('mdat', Mp4Parser.allData((data) => {
86-
sawMDAT = true;
87-
// Join this to any previous payload, in case the mp4 has multiple
88-
// mdats.
89-
payload = payload.concat(this.parser_.parseMedia(data, time, uri));
148+
// We collect all of the mdats first, before parsing any of them.
149+
// This is necessary in case the mp4 has multiple mdats.
150+
mdats.push(data);
90151
}));
91152
parser.parse(data, /* partialOkay= */ false);
92153

93-
if (!sawMDAT) {
154+
if (mdats.length == 0) {
94155
throw new shaka.util.Error(
95156
shaka.util.Error.Severity.CRITICAL,
96157
shaka.util.Error.Category.TEXT,
97158
shaka.util.Error.Code.INVALID_MP4_TTML);
98159
}
99160

161+
const fullData =
162+
shaka.util.Uint8ArrayUtils.concat(...mdats);
163+
164+
let sampleOffset = 0;
165+
for (let sampleNum = 0; sampleNum < sampleSizes.length; sampleNum++) {
166+
const sampleData =
167+
shaka.util.BufferUtils.toUint8(fullData, sampleOffset,
168+
sampleSizes[sampleNum]);
169+
sampleOffset += sampleSizes[sampleNum];
170+
171+
const subSampleSizes = subSampleSizesPerSample.get(sampleNum);
172+
173+
if (subSampleSizes && subSampleSizes.length) {
174+
const contentData =
175+
shaka.util.BufferUtils.toUint8(sampleData, 0, subSampleSizes[0]);
176+
let subOffset = subSampleSizes[0];
177+
for (let i = 1; i < subSampleSizes.length; i++) {
178+
subOffset += subSampleSizes[i];
179+
}
180+
payload = payload.concat(
181+
this.parser_.parseMedia(contentData, time, uri));
182+
} else {
183+
payload = payload.concat(
184+
this.parser_.parseMedia(sampleData, time, uri));
185+
}
186+
}
187+
100188
return payload;
101189
}
102190
};
2.98 KB
Binary file not shown.
2.97 KB
Binary file not shown.
-4.13 KB
Binary file not shown.

test/text/mp4_ttml_parser_unit.js

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ describe('Mp4TtmlParser', () => {
88
const ttmlInitSegmentUri = '/base/test/test/assets/ttml-init.mp4';
99
const ttmlSegmentUri = '/base/test/test/assets/ttml-segment.mp4';
1010
const ttmlSegmentMultipleMDATUri =
11-
'/base/test/test/assets/ttml-segment-multiplemdat.mp4';
11+
'/base/test/test/assets/ttml-segment-multiple-mdat.mp4';
12+
const ttmlSegmentMultipleSampleUri =
13+
'/base/test/test/assets/ttml-segment-multiple-sample.mp4';
1214
const audioInitSegmentUri = '/base/test/test/assets/sintel-audio-init.mp4';
1315

1416
/** @type {!Uint8Array} */
@@ -18,19 +20,23 @@ describe('Mp4TtmlParser', () => {
1820
/** @type {!Uint8Array} */
1921
let ttmlSegmentMultipleMDAT;
2022
/** @type {!Uint8Array} */
23+
let ttmlSegmentMultipleSample;
24+
/** @type {!Uint8Array} */
2125
let audioInitSegment;
2226

2327
beforeAll(async () => {
2428
const responses = await Promise.all([
2529
shaka.test.Util.fetch(ttmlInitSegmentUri),
2630
shaka.test.Util.fetch(ttmlSegmentUri),
2731
shaka.test.Util.fetch(ttmlSegmentMultipleMDATUri),
32+
shaka.test.Util.fetch(ttmlSegmentMultipleSampleUri),
2833
shaka.test.Util.fetch(audioInitSegmentUri),
2934
]);
3035
ttmlInitSegment = shaka.util.BufferUtils.toUint8(responses[0]);
3136
ttmlSegment = shaka.util.BufferUtils.toUint8(responses[1]);
3237
ttmlSegmentMultipleMDAT = shaka.util.BufferUtils.toUint8(responses[2]);
33-
audioInitSegment = shaka.util.BufferUtils.toUint8(responses[3]);
38+
ttmlSegmentMultipleSample = shaka.util.BufferUtils.toUint8(responses[3]);
39+
audioInitSegment = shaka.util.BufferUtils.toUint8(responses[4]);
3440
});
3541

3642
it('parses init segment', () => {
@@ -50,8 +56,24 @@ describe('Mp4TtmlParser', () => {
5056
expect(ret[0].nestedCues.length).toBe(1);
5157
expect(ret[1].nestedCues.length).toBe(1);
5258
// Cues.
53-
expect(ret[0].nestedCues[0].nestedCues.length).toBe(10);
54-
expect(ret[1].nestedCues[0].nestedCues.length).toBe(10);
59+
expect(ret[0].nestedCues[0].nestedCues.length).toBe(5);
60+
expect(ret[1].nestedCues[0].nestedCues.length).toBe(5);
61+
});
62+
63+
it('handles media segments with multiple sample', () => {
64+
const parser = new shaka.text.Mp4TtmlParser();
65+
parser.parseInit(ttmlInitSegment);
66+
const time =
67+
{periodStart: 0, segmentStart: 0, segmentEnd: 60, vttOffset: 0};
68+
const ret = parser.parseMedia(ttmlSegmentMultipleSample, time, null);
69+
// Bodies.
70+
expect(ret.length).toBe(2);
71+
// Divs.
72+
expect(ret[0].nestedCues.length).toBe(1);
73+
expect(ret[1].nestedCues.length).toBe(1);
74+
// Cues.
75+
expect(ret[0].nestedCues[0].nestedCues.length).toBe(5);
76+
expect(ret[1].nestedCues[0].nestedCues.length).toBe(5);
5577
});
5678

5779
it('accounts for offset', () => {

0 commit comments

Comments
 (0)