Skip to content

Commit 80d33da

Browse files
committed
test assets
1 parent 9f3c82c commit 80d33da

File tree

4 files changed

+239
-1
lines changed

4 files changed

+239
-1
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,5 +107,4 @@ env2/
107107
ocrd.egg-info
108108
/src
109109
spec
110-
test/assets
111110
.pytest_cache

test/assets/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from test.base import PWD
2+
3+
_PREFIX = 'file://' + PWD + '/'
4+
5+
METS_HEROLD = _PREFIX + 'assets/herold/mets.xml'
6+
METS_HEROLD_SMALL = _PREFIX + 'assets/herold/mets_one_file.xml'
7+
METS_HEROLD_PAGE_5 = _PREFIX + 'assets/herold/00000005.xml'

test/assets/herold/mets.xml

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
<?xml version="1.0" encoding="UTF-8" ?>
2+
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd http://www.loc.gov/mix/v10 http://www.loc.gov/standards/mix/mix10/mix10.xsd" xmlns:xlink="http://www.w3.org/1999/xlink">
3+
<mets:metsHdr CREATEDATE="2017-11-30T16:18:26">
4+
<mets:agent OTHERTYPE="SOFTWARE" ROLE="CREATOR" TYPE="OTHER">
5+
<mets:name>DFG-Koordinierungsprojekt zur Weiterentwicklung von Verfahren der Optical Character Recognition (OCR-D)</mets:name>
6+
<mets:note>OCR-D</mets:note>
7+
</mets:agent>
8+
</mets:metsHdr>
9+
<mets:dmdSec ID="DMDLOG_0001">
10+
<mets:mdWrap MDTYPE="MODS">
11+
<mets:xmlData>
12+
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
13+
<mods:location>
14+
<mods:physicalLocation authority="marcorg" displayLabel="Staatsbibliothek zu Berlin - Preußischer Kulturbesitz, Berlin, Germany">DE-1</mods:physicalLocation>
15+
<mods:shelfLocator>Gq 14350;Beil.3-1839</mods:shelfLocator>
16+
</mods:location>
17+
<mods:originInfo eventType="publication">
18+
<mods:dateIssued encoding="iso8601" keyDate="yes">1839</mods:dateIssued>
19+
</mods:originInfo>
20+
<mods:originInfo eventType="digitization">
21+
<mods:place>
22+
<mods:placeTerm type="text">Berlin</mods:placeTerm>
23+
</mods:place>
24+
<mods:dateCaptured encoding="iso8601">2013</mods:dateCaptured>
25+
<mods:publisher>Staatsbibliothek zu Berlin – Preußischer Kulturbesitz, Germany</mods:publisher>
26+
<mods:edition>[Electronic ed.]</mods:edition>
27+
</mods:originInfo>
28+
<mods:classification authority="ZVDD">Rechtswissenschaft</mods:classification>
29+
<mods:classification authority="ZVDD">Historische Drucke</mods:classification>
30+
<mods:relatedItem type="host">
31+
<mods:recordInfo>
32+
<mods:recordIdentifier source="gbv-ppn">PPN767122410</mods:recordIdentifier>
33+
</mods:recordInfo>
34+
</mods:relatedItem>
35+
<mods:recordInfo>
36+
<mods:recordIdentifier source="gbv-ppn">PPN767137728</mods:recordIdentifier>
37+
</mods:recordInfo>
38+
<mods:identifier type="purl">http://resolver.staatsbibliothek-berlin.de/SBB0000F29300010000</mods:identifier>
39+
<mods:titleInfo>
40+
<mods:title>Der Herold</mods:title>
41+
</mods:titleInfo>
42+
<mods:note type="source characteristics">P_Drucke_Territorialrecht</mods:note>
43+
<mods:part order="1839000000" type="host">
44+
<mods:detail>
45+
<mods:number>1839</mods:number>
46+
</mods:detail>
47+
</mods:part>
48+
<mods:language>
49+
<mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm>
50+
</mods:language>
51+
<mods:relatedItem type="series">
52+
<mods:titleInfo>
53+
<mods:title>Deutsches Territorialrecht des 19. Jahrhunderts</mods:title>
54+
</mods:titleInfo>
55+
</mods:relatedItem>
56+
<mods:physicalDescription>
57+
<mods:digitalOrigin>reformatted digital</mods:digitalOrigin>
58+
</mods:physicalDescription>
59+
<mods:accessCondition type="use and reproduction">CC BY-NC-SA 4.0 International</mods:accessCondition>
60+
<mods:typeOfResource>text</mods:typeOfResource>
61+
</mods:mods>
62+
</mets:xmlData>
63+
</mets:mdWrap>
64+
</mets:dmdSec>
65+
<mets:dmdSec ID="DMDLOG_0002">
66+
<mets:mdWrap MDTYPE="MODS">
67+
<mets:xmlData>
68+
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
69+
<mods:titleInfo>
70+
<mods:title>4. Januar-30. November = No. 1-20</mods:title>
71+
</mods:titleInfo>
72+
</mods:mods>
73+
</mets:xmlData>
74+
</mets:mdWrap>
75+
</mets:dmdSec>
76+
<mets:amdSec ID="AMD">
77+
<mets:rightsMD ID="RIGHTS">
78+
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVRIGHTS">
79+
<mets:xmlData>
80+
<dv:rights xmlns:dv="http://dfg-viewer.de/">
81+
<dv:owner>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz</dv:owner>
82+
<dv:ownerLogo>http://resolver.staatsbibliothek-berlin.de/SBB0000000100000000</dv:ownerLogo>
83+
<dv:ownerSiteURL>http://www.staatsbibliothek-berlin.de</dv:ownerSiteURL>
84+
<dv:ownerContact>mailto:info@sbb.spk-berlin.de</dv:ownerContact>
85+
</dv:rights>
86+
</mets:xmlData>
87+
</mets:mdWrap>
88+
</mets:rightsMD>
89+
<mets:digiprovMD ID="DIGIPROV">
90+
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVLINKS">
91+
<mets:xmlData>
92+
<dv:links xmlns:dv="http://dfg-viewer.de/">
93+
<dv:reference>http://www.stabikat.de/DB=1/PPN?PPN=767137728 </dv:reference>
94+
<dv:presentation>http://digital.staatsbibliothek-berlin.de/dms/werkansicht/?PPN=PPN767137728</dv:presentation>
95+
</dv:links>
96+
</mets:xmlData>
97+
</mets:mdWrap>
98+
</mets:digiprovMD>
99+
</mets:amdSec>
100+
<mets:fileSec>
101+
<mets:fileGrp USE="INPUT">
102+
<mets:file ID="FILE_0001_IMAGE" MIMETYPE="image/tif">
103+
<mets:FLocat LOCTYPE="URL" xlink:href="https://github.com/OCR-D/spec/raw/master/io/example/00000001.tif" /></mets:file>
104+
<mets:file ID="FILE_0002_IMAGE" MIMETYPE="image/tif">
105+
<mets:FLocat LOCTYPE="URL" xlink:href="https://github.com/OCR-D/spec/raw/master/io/example/00000002.tif" /></mets:file>
106+
<mets:file ID="FILE_0003_IMAGE" MIMETYPE="image/tif">
107+
<mets:FLocat LOCTYPE="URL" xlink:href="https://github.com/OCR-D/spec/raw/master/io/example/00000003.tif" /></mets:file>
108+
<mets:file ID="FILE_0004_IMAGE" MIMETYPE="image/tif">
109+
<mets:FLocat LOCTYPE="URL" xlink:href="https://github.com/OCR-D/spec/raw/master/io/example/00000004.tif" /></mets:file>
110+
<mets:file ID="FILE_0005_IMAGE" MIMETYPE="image/tif">
111+
<mets:FLocat LOCTYPE="URL" xlink:href="https://github.com/OCR-D/spec/raw/master/io/example/00000005.tif" /></mets:file>
112+
</mets:fileGrp>
113+
<mets:fileGrp USE="FULLTEXT">
114+
<mets:file ID="FILE_0001_FULLTEXT" MIMETYPE="text/xml">
115+
<mets:FLocat LOCTYPE="URL" xlink:href="https://github.com/OCR-D/spec/raw/master/io/example/00000001.xml" /></mets:file>
116+
<mets:file ID="FILE_0002_FULLTEXT" MIMETYPE="text/xml">
117+
<mets:FLocat LOCTYPE="URL" xlink:href="https://github.com/OCR-D/spec/raw/master/io/example/00000002.xml" /></mets:file>
118+
<mets:file ID="FILE_0003_FULLTEXT" MIMETYPE="text/xml">
119+
<mets:FLocat LOCTYPE="URL" xlink:href="https://github.com/OCR-D/spec/raw/master/io/example/00000003.xml" /></mets:file>
120+
<mets:file ID="FILE_0004_FULLTEXT" MIMETYPE="text/xml">
121+
<mets:FLocat LOCTYPE="URL" xlink:href="https://github.com/OCR-D/spec/raw/master/io/example/00000004.xml" /></mets:file>
122+
<mets:file ID="FILE_0005_FULLTEXT" MIMETYPE="text/xml">
123+
<mets:FLocat LOCTYPE="URL" xlink:href="https://github.com/OCR-D/spec/raw/master/io/example/00000005.xml" /></mets:file>
124+
</mets:fileGrp>
125+
</mets:fileSec>
126+
</mets:mets>
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
<?xml version="1.0" encoding="UTF-8" ?>
2+
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd http://www.loc.gov/mix/v10 http://www.loc.gov/standards/mix/mix10/mix10.xsd" xmlns:xlink="http://www.w3.org/1999/xlink">
3+
<mets:metsHdr CREATEDATE="2017-11-30T16:18:26">
4+
<mets:agent OTHERTYPE="SOFTWARE" ROLE="CREATOR" TYPE="OTHER">
5+
<mets:name>DFG-Koordinierungsprojekt zur Weiterentwicklung von Verfahren der Optical Character Recognition (OCR-D)</mets:name>
6+
<mets:note>OCR-D</mets:note>
7+
</mets:agent>
8+
</mets:metsHdr>
9+
<mets:dmdSec ID="DMDLOG_0001">
10+
<mets:mdWrap MDTYPE="MODS">
11+
<mets:xmlData>
12+
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
13+
<mods:location>
14+
<mods:physicalLocation authority="marcorg" displayLabel="Staatsbibliothek zu Berlin - Preußischer Kulturbesitz, Berlin, Germany">DE-1</mods:physicalLocation>
15+
<mods:shelfLocator>Gq 14350;Beil.3-1839</mods:shelfLocator>
16+
</mods:location>
17+
<mods:originInfo eventType="publication">
18+
<mods:dateIssued encoding="iso8601" keyDate="yes">1839</mods:dateIssued>
19+
</mods:originInfo>
20+
<mods:originInfo eventType="digitization">
21+
<mods:place>
22+
<mods:placeTerm type="text">Berlin</mods:placeTerm>
23+
</mods:place>
24+
<mods:dateCaptured encoding="iso8601">2013</mods:dateCaptured>
25+
<mods:publisher>Staatsbibliothek zu Berlin – Preußischer Kulturbesitz, Germany</mods:publisher>
26+
<mods:edition>[Electronic ed.]</mods:edition>
27+
</mods:originInfo>
28+
<mods:classification authority="ZVDD">Rechtswissenschaft</mods:classification>
29+
<mods:classification authority="ZVDD">Historische Drucke</mods:classification>
30+
<mods:relatedItem type="host">
31+
<mods:recordInfo>
32+
<mods:recordIdentifier source="gbv-ppn">PPN767122410</mods:recordIdentifier>
33+
</mods:recordInfo>
34+
</mods:relatedItem>
35+
<mods:recordInfo>
36+
<mods:recordIdentifier source="gbv-ppn">PPN767137728</mods:recordIdentifier>
37+
</mods:recordInfo>
38+
<mods:identifier type="purl">http://resolver.staatsbibliothek-berlin.de/SBB0000F29300010000</mods:identifier>
39+
<mods:titleInfo>
40+
<mods:title>Der Herold</mods:title>
41+
</mods:titleInfo>
42+
<mods:note type="source characteristics">P_Drucke_Territorialrecht</mods:note>
43+
<mods:part order="1839000000" type="host">
44+
<mods:detail>
45+
<mods:number>1839</mods:number>
46+
</mods:detail>
47+
</mods:part>
48+
<mods:language>
49+
<mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm>
50+
</mods:language>
51+
<mods:relatedItem type="series">
52+
<mods:titleInfo>
53+
<mods:title>Deutsches Territorialrecht des 19. Jahrhunderts</mods:title>
54+
</mods:titleInfo>
55+
</mods:relatedItem>
56+
<mods:physicalDescription>
57+
<mods:digitalOrigin>reformatted digital</mods:digitalOrigin>
58+
</mods:physicalDescription>
59+
<mods:accessCondition type="use and reproduction">CC BY-NC-SA 4.0 International</mods:accessCondition>
60+
<mods:typeOfResource>text</mods:typeOfResource>
61+
</mods:mods>
62+
</mets:xmlData>
63+
</mets:mdWrap>
64+
</mets:dmdSec>
65+
<mets:dmdSec ID="DMDLOG_0002">
66+
<mets:mdWrap MDTYPE="MODS">
67+
<mets:xmlData>
68+
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
69+
<mods:titleInfo>
70+
<mods:title>4. Januar-30. November = No. 1-20</mods:title>
71+
</mods:titleInfo>
72+
</mods:mods>
73+
</mets:xmlData>
74+
</mets:mdWrap>
75+
</mets:dmdSec>
76+
<mets:amdSec ID="AMD">
77+
<mets:rightsMD ID="RIGHTS">
78+
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVRIGHTS">
79+
<mets:xmlData>
80+
<dv:rights xmlns:dv="http://dfg-viewer.de/">
81+
<dv:owner>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz</dv:owner>
82+
<dv:ownerLogo>http://resolver.staatsbibliothek-berlin.de/SBB0000000100000000</dv:ownerLogo>
83+
<dv:ownerSiteURL>http://www.staatsbibliothek-berlin.de</dv:ownerSiteURL>
84+
<dv:ownerContact>mailto:info@sbb.spk-berlin.de</dv:ownerContact>
85+
</dv:rights>
86+
</mets:xmlData>
87+
</mets:mdWrap>
88+
</mets:rightsMD>
89+
<mets:digiprovMD ID="DIGIPROV">
90+
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVLINKS">
91+
<mets:xmlData>
92+
<dv:links xmlns:dv="http://dfg-viewer.de/">
93+
<dv:reference>http://www.stabikat.de/DB=1/PPN?PPN=767137728 </dv:reference>
94+
<dv:presentation>http://digital.staatsbibliothek-berlin.de/dms/werkansicht/?PPN=PPN767137728</dv:presentation>
95+
</dv:links>
96+
</mets:xmlData>
97+
</mets:mdWrap>
98+
</mets:digiprovMD>
99+
</mets:amdSec>
100+
<mets:fileSec>
101+
<mets:fileGrp USE="INPUT">
102+
<mets:file ID="FILE_0005_IMAGE" MIMETYPE="image/tif">
103+
<mets:FLocat LOCTYPE="URL" xlink:href="https://github.com/OCR-D/spec/raw/master/io/example/00000005.tif" /></mets:file>
104+
</mets:fileGrp>
105+
</mets:fileSec>
106+
</mets:mets>

0 commit comments

Comments
 (0)