[Import] Start update to parsers

janishutz · janishutz · commit b17368876afa · 2026-02-23T13:39:38.000+01:00
diff --git a/src/ts/dataImport/parsers/algorithm_annotations.ts b/src/ts/dataImport/parsers/algorithm_annotations.ts
@@ -1,99 +1,95 @@
-import {
-    InvalidIndexNameError,
-    MultipleTextIDsWithoutSpecifiedTextIDError
-} from '../util/errors';
-import type {
-    ImportAnnotation
-} from '@/types/import-annotation';
-
-export const parseAlgorithmAnnotationsCSV = (
-    text: string,
-    title: string,
-    textId?: string,
-    textName: string = 'text',
-    algorithm_id: string = 'algorithm_id',
-    fixationName: string = 'fix_uid',
-    boxName: string = 'char_uid',
-    dGeomName: string = 'D_geom',
-    pShareName: string = 'P_share'
-): ImportAnnotation => {
-    const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
-    const header = lines.shift()!.split( ',' )
-        .map( h => h.trim() );
-    const algorithmName = header.indexOf( algorithm_id );
-    const textIndex = header.indexOf( textName );
-    const fixIndex = header.indexOf( fixationName );
-    const boxIndex = header.indexOf( boxName );
-    const dGeomIndex = header.indexOf( dGeomName );
-    const pShareIndex = header.indexOf( pShareName );
-
-    if ( fixIndex < 0 )
-        throw new InvalidIndexNameError( 'X coordinate' );
-    else if ( boxIndex < 0 )
-        throw new InvalidIndexNameError( 'Y coordinate' );
-    else if ( algorithmName < 0 )
-        throw new InvalidIndexNameError( 'algorithm ID' );
-    else if ( textIndex < 0 )
-        throw new InvalidIndexNameError( 'text ID' );
-
-    const firstCols = lines[0]!.split( ',' );
-    const firstEncounteredTextID = firstCols[ textIndex ];
-    // First index is text, second id is reader
-    const annotations: ImportAnnotation = {};
-
-    for ( let i = 0; i < lines.length; i++ ) {
-        const cols = lines[i]!.split( ',' );
-
-        if ( firstEncounteredTextID !== cols[ textIndex ] && !textId ) {
-            throw new MultipleTextIDsWithoutSpecifiedTextIDError();
-        }
-
-        if ( textId === undefined ) {
-            const algorithm = cols[ algorithmName ]!;
-
-            if ( !annotations[ algorithm ] ) {
-                annotations[ algorithm ] = {
-                    'title': title,
-                    'annotations': []
-                };
-            }
-
-            const preAnnotation = {
-                'foreignFixationId': parseInt( cols[ fixIndex ]! ),
-                'foreignCharacterBoxId': parseInt( cols[ boxIndex ]! ),
+import {
+    InvalidIndexNameError,
+    MultipleTextIDsWithoutSpecifiedTextIDError
+} from '../util/errors';
+import type {
+    ImportAnnotation
+} from '@/types/import-annotation';
+import type {
+    PreAnnotationValueDto
+} from '@/types/dtos/PreAnnotationValueDto';
+
+export const parseAlgorithmAnnotationsCSV = (
+    text: string,
+    title: string,
+    textFilter: undefined | string | [number, number] | number[],
+    textName: string = 'text',
+    algorithmName: string = 'algorithm_id',
+    fixationName: string = 'fix_uid',
+    boxName: string = 'char_uid',
+    dGeomName: string = 'D_geom',
+    pShareName: string = 'P_share'
+): ImportAnnotation => {
+    const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
+    const header = lines.shift()!.split( ',' )
+        .map( h => h.trim() );
+    const algorithmIndex = header.indexOf( algorithmName );
+    const textIndex = header.indexOf( textName );
+    const fixIndex = header.indexOf( fixationName );
+    const boxIndex = header.indexOf( boxName );
+    const dGeomIndex = header.indexOf( dGeomName );
+    const pShareIndex = header.indexOf( pShareName );
+
+    if ( fixIndex < 0 )
+        throw new InvalidIndexNameError( 'X coordinate' );
+    else if ( boxIndex < 0 )
+        throw new InvalidIndexNameError( 'Y coordinate' );
+    else if ( algorithmIndex < 0 )
+        throw new InvalidIndexNameError( 'algorithm ID' );
+    else if ( textIndex < 0 )
+        throw new InvalidIndexNameError( 'text ID' );
+
+    const firstCols = lines[0]!.split( ',' );
+    const firstEncounteredTextID = firstCols[ textIndex ];
+    // First index is text, second id is reader
+    const annotations: ImportAnnotation = {};
+
+    for ( let i = 0; i < lines.length; i++ ) {
+        const cols = lines[i]!.split( ',' );
+
+        if ( firstEncounteredTextID !== cols[ textIndex ] && textFilter === undefined ) {
+            throw new MultipleTextIDsWithoutSpecifiedTextIDError();
+        }
+
+        const addData = ( cols: string[] ) => {
+            const algorithm = cols[ algorithmIndex ]!;
+
+            if ( !annotations[ algorithm ] ) {
+                annotations[ algorithm ] = {
+                    'title': title,
+                    'annotations': []
+                };
+            }
+
+            const preAnnotation: PreAnnotationValueDto = {
+                'foreignFixationId': parseInt( cols[ fixIndex ]! ),
+                'foreignCharacterBoxId': parseInt( cols[ boxIndex ]! ),
                 ...dGeomIndex >= 0 && cols[ dGeomIndex ] && {
                     'dGeom': parseFloat( cols[ dGeomIndex ] )
-                },
+                },
                 ...pShareIndex >= 0 && cols[ pShareIndex ] && {
                     'pShare': parseFloat( cols[ pShareIndex ] )
-                }
+                }
             };
-
-            annotations[ algorithm ]!.annotations!.push( preAnnotation );
-        } else if ( cols[ textIndex ] === textId ) {
-            const algorithm = cols[ algorithmName ]!;
-
-            if ( !annotations[ algorithm ] ) {
-                annotations[ algorithm ] = {
-                    'title': title,
-                    'annotations': []
-                };
-            }
-
-            const preAnnotation = {
-                'foreignFixationId': parseInt( cols[ fixIndex ]! ),
-                'foreignCharacterBoxId': parseInt( cols[ boxIndex ]! ),
-                ...dGeomIndex >= 0 && cols[ dGeomIndex ] && {
-                    'dGeom': parseFloat( cols[ dGeomIndex ] )
-                },
-                ...pShareIndex >= 0 && cols[ pShareIndex ] && {
-                    'pShare': parseFloat( cols[ pShareIndex ] )
-                }
-            };
-
-            annotations[ algorithm ]!.annotations!.push( preAnnotation );
-        }
-    }
-
-    return annotations;
-};
+
+            annotations[ algorithm ]!.annotations!.push( preAnnotation );
+        };
+
+        if ( textFilter === undefined ) {
+            addData( cols );
+        } else if ( typeof textFilter === 'string' && cols[ textIndex ] === textFilter ) {
+            addData( cols );
+        } else if ( typeof textFilter === 'object' ) {
+            const text = Number( cols[ textIndex ] );
+
+            if ( textFilter.length === 2 ) {
+                if ( text < textFilter[1] && text > textFilter[0] )
+                    addData( cols );
+            } else if ( textFilter.includes( text ) ) {
+                addData( cols );
+            }
+        }
+    }
+
+    return annotations;
+};
diff --git a/src/ts/dataImport/parsers/characterBoxes_new.ts b/src/ts/dataImport/parsers/characterBoxes_new.ts
@@ -1,78 +1,77 @@
-import {
-    InvalidIndexNameError,
-    MultipleTextIDsWithoutSpecifiedTextIDError
-} from '../util/errors';
-import type {
-    ImportCharacterBoundingBoxDto
-} from '@/types/dtos/ImportCharacterBoundingBoxDto';
-
-export const parseCharacterBoundingBoxesCSV = (
-    text: string,
-    textId?: string,
-    xMinName: string = 'x_min',
-    xMaxName: string = 'x_max',
-    yMinName: string = 'y_min',
-    yMaxName: string = 'y_max',
-    char_text: string = 'char_text',
-    char_uid: string = 'char_uid',
-    uidLookupMap?: Map<string, string> // MAP for char_uid to text_id
-): ImportCharacterBoundingBoxDto[] => {
-    const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
-    const header = lines.shift()!.split( ',' )
-        .map( h => h.trim() );
-    const charName = header.indexOf( char_text );
-    const charId = header.indexOf( char_uid );
-    const xMinIndex = header.indexOf( xMinName );
-    const xMaxIndex = header.indexOf( xMaxName );
-    const yMinIndex = header.indexOf( yMinName );
-    const yMaxIndex = header.indexOf( yMaxName );
-
-
-
-    if ( xMinIndex < 0 )
-        throw new InvalidIndexNameError( 'smaller X coordinate' );
-    else if ( xMaxIndex < 0 )
-        throw new InvalidIndexNameError( 'larger X coordinate' );
-    else if ( yMinIndex < 0 )
-        throw new InvalidIndexNameError( 'smaller Y coordinate' );
-    else if ( yMaxIndex < 0 )
-        throw new InvalidIndexNameError( 'larger Y coordinate' );
-    else if ( charName < 0 )
-        throw new InvalidIndexNameError( 'character' );
-    else if ( charId < 0 )
-        throw new InvalidIndexNameError( 'character ID' );
-
-    const boxes: ImportCharacterBoundingBoxDto[] = [];
-    const firstCols = lines[0]!.split( ',' );
-    const firstEncounteredTextID = firstCols[ charId ];
-
-    for ( let i = 0; i < lines.length; i++ ) {
-        const cols = lines[i]!.split( ',' );
-        const currentCharUid = cols[charId]!;
-        const actualTextId = uidLookupMap ? uidLookupMap.get( currentCharUid ) : currentCharUid;
-
-        if ( firstEncounteredTextID !== currentCharUid && !textId ) {
-            throw new MultipleTextIDsWithoutSpecifiedTextIDError();
-        }
-
+import {
+    InvalidIndexNameError,
+    MultipleTextIDsWithoutSpecifiedTextIDError
+} from '../util/errors';
+import type {
+    ImportCharacterBoundingBoxDto
+} from '@/types/dtos/ImportCharacterBoundingBoxDto';
+
+// NOTE: I am not quite sure this is actually needed... I also don't really have a good understanding
+// of what data we are now using to import.
+export const parseCharacterBoundingBoxesCSV = (
+    text: string,
+    textId?: string,
+    xMinName: string = 'x_min',
+    xMaxName: string = 'x_max',
+    yMinName: string = 'y_min',
+    yMaxName: string = 'y_max',
+    char_text: string = 'char_text',
+    char_uid: string = 'char_uid',
+    uidLookupMap?: Map<string, string> // MAP for char_uid to text_id
+): ImportCharacterBoundingBoxDto[] => {
+    const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
+    const header = lines.shift()!.split( ',' )
+        .map( h => h.trim() );
+    const charName = header.indexOf( char_text );
+    const charId = header.indexOf( char_uid );
+    const xMinIndex = header.indexOf( xMinName );
+    const xMaxIndex = header.indexOf( xMaxName );
+    const yMinIndex = header.indexOf( yMinName );
+    const yMaxIndex = header.indexOf( yMaxName );
+
+
+    if ( xMinIndex < 0 )
+        throw new InvalidIndexNameError( 'smaller X coordinate' );
+    else if ( xMaxIndex < 0 )
+        throw new InvalidIndexNameError( 'larger X coordinate' );
+    else if ( yMinIndex < 0 )
+        throw new InvalidIndexNameError( 'smaller Y coordinate' );
+    else if ( yMaxIndex < 0 )
+        throw new InvalidIndexNameError( 'larger Y coordinate' );
+    else if ( charName < 0 )
+        throw new InvalidIndexNameError( 'character' );
+    else if ( charId < 0 )
+        throw new InvalidIndexNameError( 'character ID' );
+
+    const boxes: ImportCharacterBoundingBoxDto[] = [];
+    const firstCols = lines[0]!.split( ',' );
+    const firstEncounteredTextID = firstCols[ charId ];
+
+    for ( let i = 0; i < lines.length; i++ ) {
+        const cols = lines[i]!.split( ',' );
+        const currentCharUid = cols[charId]!;
+        const actualTextId = uidLookupMap ? uidLookupMap.get( currentCharUid ) : currentCharUid;
+
+        if ( firstEncounteredTextID !== currentCharUid && !textId ) {
+            throw new MultipleTextIDsWithoutSpecifiedTextIDError();
+        }
+
         if ( textId === undefined || actualTextId === textId ) {
-            const x1 = Number( cols[xMinIndex] );
-            const x2 = Number( cols[xMaxIndex] );
-            const y1 = Number( cols[yMinIndex] );
-            const y2 = Number( cols[yMaxIndex] );
-
-            boxes.push( {
-                'xMin': x1 < x2 ? x1 : x2,
-                'xMax': x1 < x2 ? x2 : x1,
-                'yMin': y1 < y2 ? y1 : y2,
-                'yMax': y1 < y2 ? y2 : y1,
-                'character': String( cols[charName] ),
-                'foreignId': Number( actualTextId )
-            } );
-        }
-    }
-
-
-
-    return boxes;
-};
+            const x1 = Number( cols[xMinIndex] );
+            const x2 = Number( cols[xMaxIndex] );
+            const y1 = Number( cols[yMinIndex] );
+            const y2 = Number( cols[yMaxIndex] );
+
+            boxes.push( {
+                'xMin': x1 < x2 ? x1 : x2,
+                'xMax': x1 < x2 ? x2 : x1,
+                'yMin': y1 < y2 ? y1 : y2,
+                'yMax': y1 < y2 ? y2 : y1,
+                'character': String( cols[charName] ),
+                'foreignId': Number( actualTextId )
+            } );
+        }
+    }
+
+    return boxes;
+};
diff --git a/src/ts/dataImport/parsers/fixations_new.ts b/src/ts/dataImport/parsers/fixations_new.ts
@@ -11,14 +11,15 @@ export const parseFixationsCSV = (
     textId: string,
     fileHasMultipleTextIDs: boolean,
     fileHasMultipleReaderIDs: boolean,
+    language?: string,
     currentReader: string = '0', // only used if fileHasMultipleReaderIDs = false
     factor: number = 100,
     xName: string = 'x',
     yName: string = 'y',
     readerName: string = 'reader',
     textName: string = 'text',
-    idName: string = 'fixid'
-    // lang: string ='lang'
+    idName: string = 'fixid',
+    langName: string = 'lang'
 ): ImportReadingSessionDto[] => {
     const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
     const header = lines.shift()!.split( ',' )
@@ -28,7 +29,7 @@ export const parseFixationsCSV = (
     const xIndex = header.indexOf( xName );
     const yIndex = header.indexOf( yName );
     const idIndex = header.indexOf( idName );
-    // const language = header.indexOf( lang );
+    const langIndex = header.indexOf( langName );
 
     if ( xIndex < 0 )
         throw new InvalidIndexNameError( 'X coordinate' );
@@ -71,14 +72,17 @@ export const parseFixationsCSV = (
     for ( let i = 0; i < lines.length; i++ ) {
         const cols = lines[i]!.split( ',' );
 
-        if ( !fileHasMultipleTextIDs ) {
-            if ( firstEncounteredTextID !== cols[ textIndex ] ) {
-                throw new MultipleTextIDsWithoutSpecifiedTextIDError();
-            }
+        // Language filtering
+        if ( !language || cols[ langIndex ] === language ) {
+            if ( !fileHasMultipleTextIDs ) {
+                if ( firstEncounteredTextID !== cols[ textIndex ] ) {
+                    throw new MultipleTextIDsWithoutSpecifiedTextIDError();
+                }
 
-            addPointForReader( cols );
-        } else if ( cols[textIndex] === textId ) {
-            addPointForReader( cols );
+                addPointForReader( cols );
+            } else if ( cols[textIndex] === textId ) {
+                addPointForReader( cols );
+            }
         }
     }