Skip to content

Commit 693440a

Browse files
committed
[Frontend] Parser update, wip, not called yet
1 parent 2cb82ad commit 693440a

File tree

5 files changed

+274
-0
lines changed

5 files changed

+274
-0
lines changed

src/ts/dataImport/importers/text.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ export const importText = async (
2222
return {
2323
'title': textName,
2424
'backgroundImage': uint8ArrayToBase64( await ( await loadFileFromDisk( image ) ).bytes() ),
25+
'language': 'undefined', //TODO: language lookup
2526
'characterBoundingBoxes': boundingBoxes.characters,
2627
'wordBoundingBoxes': boundingBoxes.words,
2728
'foreignId': Number( textId )
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import {
2+
InvalidIndexNameError,
3+
MultipleTextIDsWithoutSpecifiedTextIDError
4+
} from '../util/errors';
5+
import type {
6+
ImportAnnotation
7+
} from '@/types/import-annotation';
8+
9+
export const parseAlgorithmAnnotationsCSV = (
10+
text: string,
11+
title: string,
12+
textId?: string,
13+
textName: string = 'text',
14+
algorithm_id: string = 'algorithm_id',
15+
fixationName: string = 'fix_uid',
16+
boxName: string = 'char_uid',
17+
dGeomName: string = 'D_geom',
18+
pShareName: string = 'P_share'
19+
): ImportAnnotation => {
20+
const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
21+
const header = lines.shift()!.split( ',' )
22+
.map( h => h.trim() );
23+
const algorithmName = header.indexOf( algorithm_id );
24+
const textIndex = header.indexOf( textName );
25+
const fixIndex = header.indexOf( fixationName );
26+
const boxIndex = header.indexOf( boxName );
27+
const dGeomIndex = header.indexOf( dGeomName );
28+
const pShareIndex = header.indexOf( pShareName );
29+
30+
if ( fixIndex < 0 )
31+
throw new InvalidIndexNameError( 'X coordinate' );
32+
else if ( boxIndex < 0 )
33+
throw new InvalidIndexNameError( 'Y coordinate' );
34+
else if ( algorithmName < 0 )
35+
throw new InvalidIndexNameError( 'algorithm ID' );
36+
else if ( textIndex < 0 )
37+
throw new InvalidIndexNameError( 'text ID' );
38+
39+
const firstCols = lines[0]!.split( ',' );
40+
const firstEncounteredTextID = firstCols[ textIndex ];
41+
// First index is text, second id is reader
42+
const annotations: ImportAnnotation = {};
43+
44+
for ( let i = 0; i < lines.length; i++ ) {
45+
const cols = lines[i]!.split( ',' );
46+
47+
if ( firstEncounteredTextID !== cols[ textIndex ] && !textId ) {
48+
throw new MultipleTextIDsWithoutSpecifiedTextIDError();
49+
}
50+
51+
if ( textId === undefined ) {
52+
const algorithm = cols[ algorithmName ]!;
53+
54+
if ( !annotations[ algorithm ] ) {
55+
annotations[ algorithm ] = {
56+
'title': title,
57+
'annotations': []
58+
};
59+
}
60+
61+
const preAnnotation = {
62+
'foreignFixationId': parseInt( cols[ fixIndex ]! ),
63+
'foreignCharacterBoxId': parseInt( cols[ boxIndex ]! ),
64+
...(dGeomIndex >= 0 && cols[ dGeomIndex ] && { 'dGeom': parseFloat( cols[ dGeomIndex ] ) }),
65+
...(pShareIndex >= 0 && cols[ pShareIndex ] && { 'pShare': parseFloat( cols[ pShareIndex ] ) })
66+
};
67+
(annotations[ algorithm ]!.annotations as any).push( preAnnotation );
68+
} else if ( cols[ textIndex ] === textId ) {
69+
const algorithm = cols[ algorithmName ]!;
70+
71+
if ( !annotations[ algorithm ] ) {
72+
annotations[ algorithm ] = {
73+
'title': title,
74+
'annotations': []
75+
};
76+
}
77+
78+
const preAnnotation = {
79+
'foreignFixationId': parseInt( cols[ fixIndex ]! ),
80+
'foreignCharacterBoxId': parseInt( cols[ boxIndex ]! ),
81+
...(dGeomIndex >= 0 && cols[ dGeomIndex ] && { 'dGeom': parseFloat( cols[ dGeomIndex ] ) }),
82+
...(pShareIndex >= 0 && cols[ pShareIndex ] && { 'pShare': parseFloat( cols[ pShareIndex ] ) })
83+
};
84+
(annotations[ algorithm ]!.annotations as any).push( preAnnotation );
85+
}
86+
}
87+
88+
return annotations;
89+
};
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import {
2+
InvalidIndexNameError,
3+
MultipleTextIDsWithoutSpecifiedTextIDError
4+
} from '../util/errors';
5+
import type {
6+
ImportCharacterBoundingBoxDto
7+
} from '@/types/dtos/ImportCharacterBoundingBoxDto';
8+
9+
export const parseCharacterBoundingBoxesCSV = (
10+
text: string,
11+
textId?: string,
12+
xMinName: string = 'x_min',
13+
xMaxName: string = 'x_max',
14+
yMinName: string = 'y_min',
15+
yMaxName: string = 'y_max',
16+
char_text: string = 'char_text',
17+
char_uid: string = 'char_uid',
18+
uidLookupMap?: Map<string, string>, //MAP for char_uid to text_id
19+
): ImportCharacterBoundingBoxDto[] => {
20+
const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
21+
const header = lines.shift()!.split( ',' )
22+
.map( h => h.trim() );
23+
const charName = header.indexOf( char_text );
24+
const charId = header.indexOf( char_uid );
25+
const xMinIndex = header.indexOf( xMinName );
26+
const xMaxIndex = header.indexOf( xMaxName );
27+
const yMinIndex = header.indexOf( yMinName );
28+
const yMaxIndex = header.indexOf( yMaxName );
29+
30+
31+
32+
if ( xMinIndex < 0 )
33+
throw new InvalidIndexNameError( 'smaller X coordinate' );
34+
else if ( xMaxIndex < 0 )
35+
throw new InvalidIndexNameError( 'larger X coordinate' );
36+
else if ( yMinIndex < 0 )
37+
throw new InvalidIndexNameError( 'smaller Y coordinate' );
38+
else if ( yMaxIndex < 0 )
39+
throw new InvalidIndexNameError( 'larger Y coordinate' );
40+
else if ( charName < 0 )
41+
throw new InvalidIndexNameError( 'character' );
42+
else if ( charId < 0 )
43+
throw new InvalidIndexNameError( 'character ID' );
44+
45+
const boxes: ImportCharacterBoundingBoxDto[] = [];
46+
const firstCols = lines[0]!.split( ',' );
47+
const firstEncounteredTextID = firstCols[ charId ];
48+
49+
for (let i = 0; i < lines.length; i++) {
50+
const cols = lines[i]!.split(',');
51+
const currentCharUid = cols[charId]!;
52+
const actualTextId = uidLookupMap ? uidLookupMap.get(currentCharUid) : currentCharUid;
53+
54+
if (firstEncounteredTextID !== currentCharUid && !textId) {
55+
throw new MultipleTextIDsWithoutSpecifiedTextIDError();
56+
}
57+
58+
if (textId === undefined || actualTextId === textId) {
59+
60+
const x1 = Number( cols[xMinIndex] );
61+
const x2 = Number( cols[xMaxIndex] );
62+
const y1 = Number( cols[yMinIndex] );
63+
const y2 = Number( cols[yMaxIndex] );
64+
65+
boxes.push({
66+
'xMin': x1 < x2 ? x1 : x2,
67+
'xMax': x1 < x2 ? x2 : x1,
68+
'yMin': y1 < y2 ? y1 : y2,
69+
'yMax': y1 < y2 ? y2 : y1,
70+
'character': String(cols[charName]),
71+
'foreignId': Number(actualTextId)
72+
});
73+
}
74+
}
75+
76+
77+
78+
return boxes;
79+
};
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import {
2+
InvalidIndexNameError,
3+
MultipleTextIDsWithoutSpecifiedTextIDError
4+
} from '../util/errors';
5+
import type {
6+
ImportReadingSessionDto
7+
} from '@/types/dtos/ImportReadingSessionDto';
8+
9+
export const parseFixationsCSV = (
10+
text: string,
11+
textId: string,
12+
fileHasMultipleTextIDs: boolean,
13+
fileHasMultipleReaderIDs: boolean,
14+
currentReader: string = '0', // only used if fileHasMultipleReaderIDs = false
15+
factor: number = 100,
16+
xName: string = 'x',
17+
yName: string = 'y',
18+
readerName: string = 'reader',
19+
textName: string = 'text',
20+
idName: string = 'fixid',
21+
lang: string ='lang'
22+
): ImportReadingSessionDto[] => {
23+
const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
24+
const header = lines.shift()!.split( ',' )
25+
.map( h => h.trim() );
26+
const readerIndex = header.indexOf( readerName );
27+
const textIndex = header.indexOf( textName );
28+
const xIndex = header.indexOf( xName );
29+
const yIndex = header.indexOf( yName );
30+
const idIndex = header.indexOf( idName );
31+
const language = header.indexOf( lang );
32+
33+
if ( xIndex < 0 )
34+
throw new InvalidIndexNameError( 'X coordinate' );
35+
else if ( yIndex < 0 )
36+
throw new InvalidIndexNameError( 'Y coordinate' );
37+
else if ( readerIndex < 0 && fileHasMultipleReaderIDs )
38+
throw new InvalidIndexNameError( 'reader ID' );
39+
else if ( textIndex < 0 && fileHasMultipleTextIDs )
40+
throw new InvalidIndexNameError( 'text ID' );
41+
else if ( idIndex < 0 )
42+
throw new InvalidIndexNameError( 'fixation ID' );
43+
44+
const firstCols = lines[0]!.split( ',' );
45+
const firstEncounteredTextID = firstCols[ textIndex ];
46+
const points: {
47+
[reader: string]: ImportReadingSessionDto
48+
} = {};
49+
50+
const addPointForReader = ( cols: string[] ) => {
51+
const reader = fileHasMultipleReaderIDs ? cols[ readerIndex ]! : currentReader;
52+
const tempx = Math.round( Number( cols[xIndex] ) * factor );
53+
const tempy = Math.round( Number( cols[yIndex] ) * factor );
54+
55+
if ( !points[ reader ] ) {
56+
points[ reader ] = {
57+
'textForeignId': Number( textId ),
58+
'readerForeignId': Number( reader ),
59+
'fixations': [],
60+
'preAnnotations': []
61+
};
62+
}
63+
64+
points[ reader ]!.fixations!.push( {
65+
'x': tempx,
66+
'y': tempy,
67+
'foreignId': Number( cols[ idIndex ]! )
68+
} );
69+
};
70+
71+
for ( let i = 0; i < lines.length; i++ ) {
72+
const cols = lines[i]!.split( ',' );
73+
74+
if ( !fileHasMultipleTextIDs ) {
75+
if ( firstEncounteredTextID !== cols[ textIndex ] ) {
76+
throw new MultipleTextIDsWithoutSpecifiedTextIDError();
77+
}
78+
79+
addPointForReader( cols );
80+
} else if ( cols[textIndex] === textId ) {
81+
addPointForReader( cols );
82+
}
83+
}
84+
85+
return Object.values( points );
86+
};
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
export const createUidLookupMap = (csvText: string): Map<string, string> => {
2+
const lines = csvText.split( /\r?\n/ ).filter( l => l.trim() !== '' );
3+
const header = lines.shift()!.split( ',' )
4+
.map( h => h.trim() );
5+
6+
const charUid = header.indexOf('text_uid'); //Target for characterid lookup
7+
const textUid = header.indexOf('text_id'); //global text ID
8+
9+
const lookup = new Map<string, string>();
10+
11+
lines.forEach(line => {
12+
const cols = line.split(',');
13+
if (cols[charUid] !== undefined && cols[textUid] !== undefined) {
14+
lookup.set(cols[charUid].trim(), cols[textUid].trim());
15+
}
16+
});
17+
18+
return lookup;
19+
};

0 commit comments

Comments
 (0)