[Frontend] Parser update, wip, not called yet

bickerda · bickerda · commit 693440a4a484 · 2026-02-23T10:00:07.000+01:00
diff --git a/src/ts/dataImport/importers/text.ts b/src/ts/dataImport/importers/text.ts
@@ -22,6 +22,7 @@ export const importText = async (
     return {
         'title': textName,
         'backgroundImage': uint8ArrayToBase64( await ( await loadFileFromDisk( image ) ).bytes() ),
+        'language': 'undefined', //TODO: language lookup
         'characterBoundingBoxes': boundingBoxes.characters,
         'wordBoundingBoxes': boundingBoxes.words,
         'foreignId': Number( textId )
diff --git a/src/ts/dataImport/parsers/algorithm_annotations.ts b/src/ts/dataImport/parsers/algorithm_annotations.ts
@@ -0,0 +1,89 @@
+import {
+    InvalidIndexNameError,
+    MultipleTextIDsWithoutSpecifiedTextIDError
+} from '../util/errors';
+import type {
+    ImportAnnotation
+} from '@/types/import-annotation';
+
+export const parseAlgorithmAnnotationsCSV = (
+    text: string,
+    title: string,
+    textId?: string,
+    textName: string = 'text',
+    algorithm_id: string = 'algorithm_id',
+    fixationName: string = 'fix_uid',
+    boxName: string = 'char_uid',
+    dGeomName: string = 'D_geom',
+    pShareName: string = 'P_share'
+): ImportAnnotation => {
+    const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
+    const header = lines.shift()!.split( ',' )
+        .map( h => h.trim() );
+    const algorithmName = header.indexOf( algorithm_id );
+    const textIndex = header.indexOf( textName );
+    const fixIndex = header.indexOf( fixationName );
+    const boxIndex = header.indexOf( boxName );
+    const dGeomIndex = header.indexOf( dGeomName );
+    const pShareIndex = header.indexOf( pShareName );
+
+    if ( fixIndex < 0 )
+        throw new InvalidIndexNameError( 'X coordinate' );
+    else if ( boxIndex < 0 )
+        throw new InvalidIndexNameError( 'Y coordinate' );
+    else if ( algorithmName < 0 )
+        throw new InvalidIndexNameError( 'algorithm ID' );
+    else if ( textIndex < 0 )
+        throw new InvalidIndexNameError( 'text ID' );
+
+    const firstCols = lines[0]!.split( ',' );
+    const firstEncounteredTextID = firstCols[ textIndex ];
+    // First index is text, second id is reader
+    const annotations: ImportAnnotation = {};
+
+    for ( let i = 0; i < lines.length; i++ ) {
+        const cols = lines[i]!.split( ',' );
+
+        if ( firstEncounteredTextID !== cols[ textIndex ] && !textId ) {
+            throw new MultipleTextIDsWithoutSpecifiedTextIDError();
+        }
+
+        if ( textId === undefined ) {
+            const algorithm = cols[ algorithmName ]!;
+
+            if ( !annotations[ algorithm ] ) {
+                annotations[ algorithm ] = {
+                    'title': title,
+                    'annotations': []
+                };
+            }
+
+            const preAnnotation = {
+                'foreignFixationId': parseInt( cols[ fixIndex ]! ),
+                'foreignCharacterBoxId': parseInt( cols[ boxIndex ]! ),
+                ...(dGeomIndex >= 0 && cols[ dGeomIndex ] && { 'dGeom': parseFloat( cols[ dGeomIndex ] ) }),
+                ...(pShareIndex >= 0 && cols[ pShareIndex ] && { 'pShare': parseFloat( cols[ pShareIndex ] ) })
+            };
+            (annotations[ algorithm ]!.annotations as any).push( preAnnotation );
+        } else if ( cols[ textIndex ] === textId ) {
+            const algorithm = cols[ algorithmName ]!;
+
+            if ( !annotations[ algorithm ] ) {
+                annotations[ algorithm ] = {
+                    'title': title,
+                    'annotations': []
+                };
+            }
+
+            const preAnnotation = {
+                'foreignFixationId': parseInt( cols[ fixIndex ]! ),
+                'foreignCharacterBoxId': parseInt( cols[ boxIndex ]! ),
+                ...(dGeomIndex >= 0 && cols[ dGeomIndex ] && { 'dGeom': parseFloat( cols[ dGeomIndex ] ) }),
+                ...(pShareIndex >= 0 && cols[ pShareIndex ] && { 'pShare': parseFloat( cols[ pShareIndex ] ) })
+            };
+            (annotations[ algorithm ]!.annotations as any).push( preAnnotation );
+        }
+    }
+
+    return annotations;
+};
diff --git a/src/ts/dataImport/parsers/characterBoxes_new.ts b/src/ts/dataImport/parsers/characterBoxes_new.ts
@@ -0,0 +1,79 @@
+import {
+    InvalidIndexNameError,
+    MultipleTextIDsWithoutSpecifiedTextIDError
+} from '../util/errors';
+import type {
+    ImportCharacterBoundingBoxDto
+} from '@/types/dtos/ImportCharacterBoundingBoxDto';
+
+export const parseCharacterBoundingBoxesCSV = (
+    text: string,
+    textId?: string,
+    xMinName: string = 'x_min',
+    xMaxName: string = 'x_max',
+    yMinName: string = 'y_min',
+    yMaxName: string = 'y_max',
+    char_text: string = 'char_text',
+    char_uid: string = 'char_uid',
+    uidLookupMap?: Map<string, string>, //MAP for char_uid to text_id
+): ImportCharacterBoundingBoxDto[] => {
+    const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
+    const header = lines.shift()!.split( ',' )
+        .map( h => h.trim() );
+    const charName = header.indexOf( char_text );
+    const charId = header.indexOf( char_uid );
+    const xMinIndex = header.indexOf( xMinName );
+    const xMaxIndex = header.indexOf( xMaxName );
+    const yMinIndex = header.indexOf( yMinName );
+    const yMaxIndex = header.indexOf( yMaxName );
+
+
+    
+    if ( xMinIndex < 0 )
+        throw new InvalidIndexNameError( 'smaller X coordinate' );
+    else if ( xMaxIndex < 0 )
+        throw new InvalidIndexNameError( 'larger X coordinate' );
+    else if ( yMinIndex < 0 )
+        throw new InvalidIndexNameError( 'smaller Y coordinate' );
+    else if ( yMaxIndex < 0 )
+        throw new InvalidIndexNameError( 'larger Y coordinate' );
+    else if ( charName < 0 )
+        throw new InvalidIndexNameError( 'character' );
+    else if ( charId < 0 )
+        throw new InvalidIndexNameError( 'character ID' );
+    
+    const boxes: ImportCharacterBoundingBoxDto[] = [];
+    const firstCols = lines[0]!.split( ',' );
+    const firstEncounteredTextID = firstCols[ charId ];
+    
+    for (let i = 0; i < lines.length; i++) {
+        const cols = lines[i]!.split(',');
+        const currentCharUid = cols[charId]!;
+        const actualTextId = uidLookupMap ? uidLookupMap.get(currentCharUid) : currentCharUid;
+
+        if (firstEncounteredTextID !== currentCharUid && !textId) {
+        throw new MultipleTextIDsWithoutSpecifiedTextIDError();
+        }
+
+        if (textId === undefined || actualTextId === textId) {
+            
+            const x1 = Number( cols[xMinIndex] );
+            const x2 = Number( cols[xMaxIndex] );
+            const y1 = Number( cols[yMinIndex] );
+            const y2 = Number( cols[yMaxIndex] );
+
+            boxes.push({
+                'xMin': x1 < x2 ? x1 : x2,
+                'xMax': x1 < x2 ? x2 : x1,
+                'yMin': y1 < y2 ? y1 : y2,
+                'yMax': y1 < y2 ? y2 : y1,
+                'character': String(cols[charName]),
+                'foreignId': Number(actualTextId) 
+            });
+        }
+    }
+
+
+
+    return boxes;
+};
diff --git a/src/ts/dataImport/parsers/fixations_new.ts b/src/ts/dataImport/parsers/fixations_new.ts
@@ -0,0 +1,86 @@
+import {
+    InvalidIndexNameError,
+    MultipleTextIDsWithoutSpecifiedTextIDError
+} from '../util/errors';
+import type {
+    ImportReadingSessionDto
+} from '@/types/dtos/ImportReadingSessionDto';
+
+export const parseFixationsCSV = (
+    text: string,
+    textId: string,
+    fileHasMultipleTextIDs: boolean,
+    fileHasMultipleReaderIDs: boolean,
+    currentReader: string = '0', // only used if fileHasMultipleReaderIDs = false
+    factor: number = 100,
+    xName: string = 'x',
+    yName: string = 'y',
+    readerName: string = 'reader',
+    textName: string = 'text',
+    idName: string = 'fixid',
+    lang: string ='lang'
+): ImportReadingSessionDto[] => {
+    const lines = text.split( /\r?\n/ ).filter( l => l.trim() !== '' );
+    const header = lines.shift()!.split( ',' )
+        .map( h => h.trim() );
+    const readerIndex = header.indexOf( readerName );
+    const textIndex = header.indexOf( textName );
+    const xIndex = header.indexOf( xName );
+    const yIndex = header.indexOf( yName );
+    const idIndex = header.indexOf( idName );
+    const language = header.indexOf( lang );
+
+    if ( xIndex < 0 )
+        throw new InvalidIndexNameError( 'X coordinate' );
+    else if ( yIndex < 0 )
+        throw new InvalidIndexNameError( 'Y coordinate' );
+    else if ( readerIndex < 0 && fileHasMultipleReaderIDs )
+        throw new InvalidIndexNameError( 'reader ID' );
+    else if ( textIndex < 0 && fileHasMultipleTextIDs )
+        throw new InvalidIndexNameError( 'text ID' );
+    else if ( idIndex < 0 )
+        throw new InvalidIndexNameError( 'fixation ID' );
+
+    const firstCols = lines[0]!.split( ',' );
+    const firstEncounteredTextID = firstCols[ textIndex ];
+    const points: {
+        [reader: string]: ImportReadingSessionDto
+    } = {};
+
+    const addPointForReader = ( cols: string[] ) => {
+        const reader = fileHasMultipleReaderIDs ? cols[ readerIndex ]! : currentReader;
+        const tempx = Math.round( Number( cols[xIndex] ) * factor );
+        const tempy = Math.round( Number( cols[yIndex] ) * factor );
+
+        if ( !points[ reader ] ) {
+            points[ reader ] = {
+                'textForeignId': Number( textId ),
+                'readerForeignId': Number( reader ),
+                'fixations': [],
+                'preAnnotations': []
+            };
+        }
+
+        points[ reader ]!.fixations!.push( {
+            'x': tempx,
+            'y': tempy,
+            'foreignId': Number( cols[ idIndex ]! )
+        } );
+    };
+
+    for ( let i = 0; i < lines.length; i++ ) {
+        const cols = lines[i]!.split( ',' );
+
+        if ( !fileHasMultipleTextIDs ) {
+            if ( firstEncounteredTextID !== cols[ textIndex ] ) {
+                throw new MultipleTextIDsWithoutSpecifiedTextIDError();
+            }
+
+            addPointForReader( cols );
+        } else if ( cols[textIndex] === textId ) {
+            addPointForReader( cols );
+        }
+    }
+
+    return Object.values( points );
+};
diff --git a/src/ts/dataImport/util/char_text_map.ts b/src/ts/dataImport/util/char_text_map.ts
@@ -0,0 +1,19 @@
+export const createUidLookupMap = (csvText: string): Map<string, string> => {
+    const lines = csvText.split( /\r?\n/ ).filter( l => l.trim() !== '' );
+    const header = lines.shift()!.split( ',' )
+        .map( h => h.trim() );
+    
+    const charUid = header.indexOf('text_uid'); //Target for characterid lookup
+    const textUid = header.indexOf('text_id');   //global text ID
+
+    const lookup = new Map<string, string>();
+
+    lines.forEach(line => {
+        const cols = line.split(',');
+        if (cols[charUid] !== undefined && cols[textUid] !== undefined) {
+            lookup.set(cols[charUid].trim(), cols[textUid].trim());
+        }
+    });
+
+    return lookup;
+};