Skip to content

Commit b173688

Browse files
committed
[Import] Start update to parsers
1 parent f49b5bc commit b173688

File tree

3 files changed

+180
-181
lines changed

3 files changed

+180
-181
lines changed
Lines changed: 90 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,95 @@
1-
import {
2-
InvalidIndexNameError,
3-
MultipleTextIDsWithoutSpecifiedTextIDError
4-
} from '../util/errors';
5-
import type {
6-
ImportAnnotation
7-
} from '@/types/import-annotation';
8-
9-
export const parseAlgorithmAnnotationsCSV = (
10-
text: string,
11-
title: string,
12-
textId?: string,
13-
textName: string = 'text',
14-
algorithm_id: string = 'algorithm_id',
15-
fixationName: string = 'fix_uid',
16-
boxName: string = 'char_uid',
17-
dGeomName: string = 'D_geom',
18-
pShareName: string = 'P_share'
19-
): ImportAnnotation => {
20-
const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
21-
const header = lines.shift()!.split( ',' )
22-
.map( h => h.trim() );
23-
const algorithmName = header.indexOf( algorithm_id );
24-
const textIndex = header.indexOf( textName );
25-
const fixIndex = header.indexOf( fixationName );
26-
const boxIndex = header.indexOf( boxName );
27-
const dGeomIndex = header.indexOf( dGeomName );
28-
const pShareIndex = header.indexOf( pShareName );
29-
30-
if ( fixIndex < 0 )
31-
throw new InvalidIndexNameError( 'X coordinate' );
32-
else if ( boxIndex < 0 )
33-
throw new InvalidIndexNameError( 'Y coordinate' );
34-
else if ( algorithmName < 0 )
35-
throw new InvalidIndexNameError( 'algorithm ID' );
36-
else if ( textIndex < 0 )
37-
throw new InvalidIndexNameError( 'text ID' );
38-
39-
const firstCols = lines[0]!.split( ',' );
40-
const firstEncounteredTextID = firstCols[ textIndex ];
41-
// First index is text, second id is reader
42-
const annotations: ImportAnnotation = {};
43-
44-
for ( let i = 0; i < lines.length; i++ ) {
45-
const cols = lines[i]!.split( ',' );
46-
47-
if ( firstEncounteredTextID !== cols[ textIndex ] && !textId ) {
48-
throw new MultipleTextIDsWithoutSpecifiedTextIDError();
49-
}
50-
51-
if ( textId === undefined ) {
52-
const algorithm = cols[ algorithmName ]!;
53-
54-
if ( !annotations[ algorithm ] ) {
55-
annotations[ algorithm ] = {
56-
'title': title,
57-
'annotations': []
58-
};
59-
}
60-
61-
const preAnnotation = {
62-
'foreignFixationId': parseInt( cols[ fixIndex ]! ),
63-
'foreignCharacterBoxId': parseInt( cols[ boxIndex ]! ),
1+
import {
2+
InvalidIndexNameError,
3+
MultipleTextIDsWithoutSpecifiedTextIDError
4+
} from '../util/errors';
5+
import type {
6+
ImportAnnotation
7+
} from '@/types/import-annotation';
8+
import type {
9+
PreAnnotationValueDto
10+
} from '@/types/dtos/PreAnnotationValueDto';
11+
12+
export const parseAlgorithmAnnotationsCSV = (
13+
text: string,
14+
title: string,
15+
textFilter: undefined | string | [number, number] | number[],
16+
textName: string = 'text',
17+
algorithmName: string = 'algorithm_id',
18+
fixationName: string = 'fix_uid',
19+
boxName: string = 'char_uid',
20+
dGeomName: string = 'D_geom',
21+
pShareName: string = 'P_share'
22+
): ImportAnnotation => {
23+
const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
24+
const header = lines.shift()!.split( ',' )
25+
.map( h => h.trim() );
26+
const algorithmIndex = header.indexOf( algorithmName );
27+
const textIndex = header.indexOf( textName );
28+
const fixIndex = header.indexOf( fixationName );
29+
const boxIndex = header.indexOf( boxName );
30+
const dGeomIndex = header.indexOf( dGeomName );
31+
const pShareIndex = header.indexOf( pShareName );
32+
33+
if ( fixIndex < 0 )
34+
throw new InvalidIndexNameError( 'X coordinate' );
35+
else if ( boxIndex < 0 )
36+
throw new InvalidIndexNameError( 'Y coordinate' );
37+
else if ( algorithmIndex < 0 )
38+
throw new InvalidIndexNameError( 'algorithm ID' );
39+
else if ( textIndex < 0 )
40+
throw new InvalidIndexNameError( 'text ID' );
41+
42+
const firstCols = lines[0]!.split( ',' );
43+
const firstEncounteredTextID = firstCols[ textIndex ];
44+
// First index is text, second id is reader
45+
const annotations: ImportAnnotation = {};
46+
47+
for ( let i = 0; i < lines.length; i++ ) {
48+
const cols = lines[i]!.split( ',' );
49+
50+
if ( firstEncounteredTextID !== cols[ textIndex ] && textFilter === undefined ) {
51+
throw new MultipleTextIDsWithoutSpecifiedTextIDError();
52+
}
53+
54+
const addData = ( cols: string[] ) => {
55+
const algorithm = cols[ algorithmIndex ]!;
56+
57+
if ( !annotations[ algorithm ] ) {
58+
annotations[ algorithm ] = {
59+
'title': title,
60+
'annotations': []
61+
};
62+
}
63+
64+
const preAnnotation: PreAnnotationValueDto = {
65+
'foreignFixationId': parseInt( cols[ fixIndex ]! ),
66+
'foreignCharacterBoxId': parseInt( cols[ boxIndex ]! ),
6467
...dGeomIndex >= 0 && cols[ dGeomIndex ] && {
6568
'dGeom': parseFloat( cols[ dGeomIndex ] )
66-
},
69+
},
6770
...pShareIndex >= 0 && cols[ pShareIndex ] && {
6871
'pShare': parseFloat( cols[ pShareIndex ] )
69-
}
72+
}
7073
};
71-
72-
annotations[ algorithm ]!.annotations!.push( preAnnotation );
73-
} else if ( cols[ textIndex ] === textId ) {
74-
const algorithm = cols[ algorithmName ]!;
75-
76-
if ( !annotations[ algorithm ] ) {
77-
annotations[ algorithm ] = {
78-
'title': title,
79-
'annotations': []
80-
};
81-
}
82-
83-
const preAnnotation = {
84-
'foreignFixationId': parseInt( cols[ fixIndex ]! ),
85-
'foreignCharacterBoxId': parseInt( cols[ boxIndex ]! ),
86-
...dGeomIndex >= 0 && cols[ dGeomIndex ] && {
87-
'dGeom': parseFloat( cols[ dGeomIndex ] )
88-
},
89-
...pShareIndex >= 0 && cols[ pShareIndex ] && {
90-
'pShare': parseFloat( cols[ pShareIndex ] )
91-
}
92-
};
93-
94-
annotations[ algorithm ]!.annotations!.push( preAnnotation );
95-
}
96-
}
97-
98-
return annotations;
99-
};
74+
75+
annotations[ algorithm ]!.annotations!.push( preAnnotation );
76+
};
77+
78+
if ( textFilter === undefined ) {
79+
addData( cols );
80+
} else if ( typeof textFilter === 'string' && cols[ textIndex ] === textFilter ) {
81+
addData( cols );
82+
} else if ( typeof textFilter === 'object' ) {
83+
const text = Number( cols[ textIndex ] );
84+
85+
if ( textFilter.length === 2 ) {
86+
if ( text < textFilter[1] && text > textFilter[0] )
87+
addData( cols );
88+
} else if ( textFilter.includes( text ) ) {
89+
addData( cols );
90+
}
91+
}
92+
}
93+
94+
return annotations;
95+
};
Lines changed: 76 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,77 @@
1-
import {
2-
InvalidIndexNameError,
3-
MultipleTextIDsWithoutSpecifiedTextIDError
4-
} from '../util/errors';
5-
import type {
6-
ImportCharacterBoundingBoxDto
7-
} from '@/types/dtos/ImportCharacterBoundingBoxDto';
8-
9-
export const parseCharacterBoundingBoxesCSV = (
10-
text: string,
11-
textId?: string,
12-
xMinName: string = 'x_min',
13-
xMaxName: string = 'x_max',
14-
yMinName: string = 'y_min',
15-
yMaxName: string = 'y_max',
16-
char_text: string = 'char_text',
17-
char_uid: string = 'char_uid',
18-
uidLookupMap?: Map<string, string> // MAP for char_uid to text_id
19-
): ImportCharacterBoundingBoxDto[] => {
20-
const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
21-
const header = lines.shift()!.split( ',' )
22-
.map( h => h.trim() );
23-
const charName = header.indexOf( char_text );
24-
const charId = header.indexOf( char_uid );
25-
const xMinIndex = header.indexOf( xMinName );
26-
const xMaxIndex = header.indexOf( xMaxName );
27-
const yMinIndex = header.indexOf( yMinName );
28-
const yMaxIndex = header.indexOf( yMaxName );
29-
30-
31-
32-
if ( xMinIndex < 0 )
33-
throw new InvalidIndexNameError( 'smaller X coordinate' );
34-
else if ( xMaxIndex < 0 )
35-
throw new InvalidIndexNameError( 'larger X coordinate' );
36-
else if ( yMinIndex < 0 )
37-
throw new InvalidIndexNameError( 'smaller Y coordinate' );
38-
else if ( yMaxIndex < 0 )
39-
throw new InvalidIndexNameError( 'larger Y coordinate' );
40-
else if ( charName < 0 )
41-
throw new InvalidIndexNameError( 'character' );
42-
else if ( charId < 0 )
43-
throw new InvalidIndexNameError( 'character ID' );
44-
45-
const boxes: ImportCharacterBoundingBoxDto[] = [];
46-
const firstCols = lines[0]!.split( ',' );
47-
const firstEncounteredTextID = firstCols[ charId ];
48-
49-
for ( let i = 0; i < lines.length; i++ ) {
50-
const cols = lines[i]!.split( ',' );
51-
const currentCharUid = cols[charId]!;
52-
const actualTextId = uidLookupMap ? uidLookupMap.get( currentCharUid ) : currentCharUid;
53-
54-
if ( firstEncounteredTextID !== currentCharUid && !textId ) {
55-
throw new MultipleTextIDsWithoutSpecifiedTextIDError();
56-
}
57-
1+
import {
2+
InvalidIndexNameError,
3+
MultipleTextIDsWithoutSpecifiedTextIDError
4+
} from '../util/errors';
5+
import type {
6+
ImportCharacterBoundingBoxDto
7+
} from '@/types/dtos/ImportCharacterBoundingBoxDto';
8+
9+
// NOTE: I am not quite sure this is actually needed... I also don't really have a good understanding
10+
// of what data we are now using to import.
11+
export const parseCharacterBoundingBoxesCSV = (
12+
text: string,
13+
textId?: string,
14+
xMinName: string = 'x_min',
15+
xMaxName: string = 'x_max',
16+
yMinName: string = 'y_min',
17+
yMaxName: string = 'y_max',
18+
char_text: string = 'char_text',
19+
char_uid: string = 'char_uid',
20+
uidLookupMap?: Map<string, string> // MAP for char_uid to text_id
21+
): ImportCharacterBoundingBoxDto[] => {
22+
const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
23+
const header = lines.shift()!.split( ',' )
24+
.map( h => h.trim() );
25+
const charName = header.indexOf( char_text );
26+
const charId = header.indexOf( char_uid );
27+
const xMinIndex = header.indexOf( xMinName );
28+
const xMaxIndex = header.indexOf( xMaxName );
29+
const yMinIndex = header.indexOf( yMinName );
30+
const yMaxIndex = header.indexOf( yMaxName );
31+
32+
33+
if ( xMinIndex < 0 )
34+
throw new InvalidIndexNameError( 'smaller X coordinate' );
35+
else if ( xMaxIndex < 0 )
36+
throw new InvalidIndexNameError( 'larger X coordinate' );
37+
else if ( yMinIndex < 0 )
38+
throw new InvalidIndexNameError( 'smaller Y coordinate' );
39+
else if ( yMaxIndex < 0 )
40+
throw new InvalidIndexNameError( 'larger Y coordinate' );
41+
else if ( charName < 0 )
42+
throw new InvalidIndexNameError( 'character' );
43+
else if ( charId < 0 )
44+
throw new InvalidIndexNameError( 'character ID' );
45+
46+
const boxes: ImportCharacterBoundingBoxDto[] = [];
47+
const firstCols = lines[0]!.split( ',' );
48+
const firstEncounteredTextID = firstCols[ charId ];
49+
50+
for ( let i = 0; i < lines.length; i++ ) {
51+
const cols = lines[i]!.split( ',' );
52+
const currentCharUid = cols[charId]!;
53+
const actualTextId = uidLookupMap ? uidLookupMap.get( currentCharUid ) : currentCharUid;
54+
55+
if ( firstEncounteredTextID !== currentCharUid && !textId ) {
56+
throw new MultipleTextIDsWithoutSpecifiedTextIDError();
57+
}
58+
5859
if ( textId === undefined || actualTextId === textId ) {
59-
const x1 = Number( cols[xMinIndex] );
60-
const x2 = Number( cols[xMaxIndex] );
61-
const y1 = Number( cols[yMinIndex] );
62-
const y2 = Number( cols[yMaxIndex] );
63-
64-
boxes.push( {
65-
'xMin': x1 < x2 ? x1 : x2,
66-
'xMax': x1 < x2 ? x2 : x1,
67-
'yMin': y1 < y2 ? y1 : y2,
68-
'yMax': y1 < y2 ? y2 : y1,
69-
'character': String( cols[charName] ),
70-
'foreignId': Number( actualTextId )
71-
} );
72-
}
73-
}
74-
75-
76-
77-
return boxes;
78-
};
60+
const x1 = Number( cols[xMinIndex] );
61+
const x2 = Number( cols[xMaxIndex] );
62+
const y1 = Number( cols[yMinIndex] );
63+
const y2 = Number( cols[yMaxIndex] );
64+
65+
boxes.push( {
66+
'xMin': x1 < x2 ? x1 : x2,
67+
'xMax': x1 < x2 ? x2 : x1,
68+
'yMin': y1 < y2 ? y1 : y2,
69+
'yMax': y1 < y2 ? y2 : y1,
70+
'character': String( cols[charName] ),
71+
'foreignId': Number( actualTextId )
72+
} );
73+
}
74+
}
75+
76+
return boxes;
77+
};

src/ts/dataImport/parsers/fixations_new.ts

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@ export const parseFixationsCSV = (
1111
textId: string,
1212
fileHasMultipleTextIDs: boolean,
1313
fileHasMultipleReaderIDs: boolean,
14+
language?: string,
1415
currentReader: string = '0', // only used if fileHasMultipleReaderIDs = false
1516
factor: number = 100,
1617
xName: string = 'x',
1718
yName: string = 'y',
1819
readerName: string = 'reader',
1920
textName: string = 'text',
20-
idName: string = 'fixid'
21-
// lang: string ='lang'
21+
idName: string = 'fixid',
22+
langName: string = 'lang'
2223
): ImportReadingSessionDto[] => {
2324
const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
2425
const header = lines.shift()!.split( ',' )
@@ -28,7 +29,7 @@ export const parseFixationsCSV = (
2829
const xIndex = header.indexOf( xName );
2930
const yIndex = header.indexOf( yName );
3031
const idIndex = header.indexOf( idName );
31-
// const language = header.indexOf( lang );
32+
const langIndex = header.indexOf( langName );
3233

3334
if ( xIndex < 0 )
3435
throw new InvalidIndexNameError( 'X coordinate' );
@@ -71,14 +72,17 @@ export const parseFixationsCSV = (
7172
for ( let i = 0; i < lines.length; i++ ) {
7273
const cols = lines[i]!.split( ',' );
7374

74-
if ( !fileHasMultipleTextIDs ) {
75-
if ( firstEncounteredTextID !== cols[ textIndex ] ) {
76-
throw new MultipleTextIDsWithoutSpecifiedTextIDError();
77-
}
75+
// Language filtering
76+
if ( !language || cols[ langIndex ] === language ) {
77+
if ( !fileHasMultipleTextIDs ) {
78+
if ( firstEncounteredTextID !== cols[ textIndex ] ) {
79+
throw new MultipleTextIDsWithoutSpecifiedTextIDError();
80+
}
7881

79-
addPointForReader( cols );
80-
} else if ( cols[textIndex] === textId ) {
81-
addPointForReader( cols );
82+
addPointForReader( cols );
83+
} else if ( cols[textIndex] === textId ) {
84+
addPointForReader( cols );
85+
}
8286
}
8387
}
8488

0 commit comments

Comments
 (0)