diff --git a/config/custom-environment-variables.json b/config/custom-environment-variables.json index 701712e..7c5e03d 100644 --- a/config/custom-environment-variables.json +++ b/config/custom-environment-variables.json @@ -68,9 +68,16 @@ "NFS": { "pvPath": "PV_SOURCE_PATH" }, + "crawling": { + "extension": "CRAWLING_EXTENSION", + "nestedJsonPath": "CRAWLING_NESTED_JSON_PATH", + "ignoreNotFound": { + "__name": "CRAWLING_IGNORE_NOT_FOUND", + "__format": "boolean" + } + }, "ingestion": { - "provider": "PROVIDER_FROM", - "blackList": "BLACK_LIST" + "provider": "PROVIDER_FROM" }, "jobManager": { "url": "JOB_MANAGER_URL", diff --git a/config/default.json b/config/default.json index 4f2f48e..6e292f8 100644 --- a/config/default.json +++ b/config/default.json @@ -46,9 +46,13 @@ "NFS": { "pvPath": "/Path/To/Models" }, + "crawling": { + "extension": ".json", + "nestedJsonPath": "$..['uri','url']", + "ignoreNotFound": true + }, "ingestion": { - "provider": "NFS", - "blackList": ["tar", "zip", "rar", "7z"] + "provider": "NFS" }, "jobManager": { "url": "http://127.0.0.1:8080", diff --git a/config/test.json b/config/test.json index 81766b3..d5a18f5 100644 --- a/config/test.json +++ b/config/test.json @@ -1,4 +1,9 @@ { + "crawling": { + "extension": ".json", + "nestedJsonPath": "$..['uri','url']", + "ignoreNotFound": true + }, "S3": { "accessKeyId": "minioadmin", "secretAccessKey": "minioadmin", @@ -13,8 +18,7 @@ "pvPath": "./tests/helpers/3DModels" }, "ingestion": { - "provider": "S3", - "blackList": ["tar", "zip", "rar", "7z"] + "provider": "S3" }, "jobManager": { "url": "http://127.0.0.1:8080", diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 9b53e28..ee81463 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -26,7 +26,6 @@ data: REQUESTS_CA_BUNDLE: {{ printf "%s/%s" $ca.path $ca.key | quote }} NODE_EXTRA_CA_CERTS: {{ printf "%s/%s" $ca.path $ca.key | quote }} {{- end }} - BLACK_LIST: {{ .Values.env.blackList | quote }} MAX_CONCURRENCY: {{ .Values.env.maxConcurrency | quote }} {{ if eq $provider "S3" }} {{- $S3 := (include "merged.S3" . ) | fromYaml }} @@ -50,3 +49,6 @@ data: INGESTION_TASK_BATCHES: {{ $jobManager.ingestion.batches | quote}} JOB_DELETE_TYPE: {{ $jobManager.delete.jobType | quote }} TASK_DELETE_TYPE: {{ $jobManager.delete.taskType | quote }} + CRAWLING_EXTENSION: {{ .Values.env.crawling.extension | quote }} + CRAWLING_NESTED_JSON_PATH: {{ .Values.env.crawling.nestedJsonPath | quote }} + CRAWLING_IGNORE_NOT_FOUND: {{ .Values.env.crawling.ignoreNotFound | quote }} diff --git a/helm/values.yaml b/helm/values.yaml index a260449..312d159 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -116,6 +116,7 @@ jobManager: delete: jobType: taskType: + env: port: 80 targetPort: 8080 @@ -130,9 +131,13 @@ env: metrics: enabled: false url: '' - blackList: ["tar", "zip", "rar", "7z"] maxConcurrency: 5 + crawling: + extension: '.json' + nestedJsonPath: "$..['uri','url']" + ignoreNotFound: true + resources: enabled: true value: diff --git a/package-lock.json b/package-lock.json index 2fd760a..7c31397 100644 --- a/package-lock.json +++ b/package-lock.json @@ -32,6 +32,7 @@ "express": "^4.18.2", "express-openapi-validator": "^5.0.4", "http-status-codes": "^2.2.0", + "jsonpath": "^1.3.0", "n-readlines": "^1.0.1", "prom-client": "^15.1.1", "reflect-metadata": "^0.1.13", @@ -49,6 +50,7 @@ "@types/config": "^3.3.0", "@types/express": "^4.17.17", "@types/jest": "^29.5.2", + "@types/jsonpath": "^0.2.4", "@types/multer": "^1.4.7", "@types/n-readlines": "^1.0.3", "@types/supertest": "^2.0.12", @@ -10071,6 +10073,13 @@ "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, + "node_modules/@types/jsonpath": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/@types/jsonpath/-/jsonpath-0.2.4.tgz", + "integrity": "sha512-K3hxB8Blw0qgW6ExKgMbXQv2UPZBoE2GqLpVY+yr7nMD2Pq86lsuIzyAaiQ7eMqFL5B6di6pxSkogLJEyEHoGA==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/keygrip": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/@types/keygrip/-/keygrip-1.0.6.tgz", @@ -13614,6 +13623,27 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/escodegen": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz", + "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==", + "license": "BSD-2-Clause", + "dependencies": { + "esprima": "^4.0.1", + "estraverse": "^5.2.0", + "esutils": "^2.0.2" + }, + "bin": { + "escodegen": "bin/escodegen.js", + "esgenerate": "bin/esgenerate.js" + }, + "engines": { + "node": ">=6.0" + }, + "optionalDependencies": { + "source-map": "~0.6.1" + } + }, "node_modules/eslint": { "version": "8.53.0", "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.53.0.tgz", @@ -14546,7 +14576,6 @@ "version": "4.0.1", "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "dev": true, "bin": { "esparse": "bin/esparse.js", "esvalidate": "bin/esvalidate.js" @@ -14583,7 +14612,6 @@ "version": "5.3.0", "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", - "dev": true, "engines": { "node": ">=4.0" } @@ -14592,7 +14620,6 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "dev": true, "engines": { "node": ">=0.10.0" } @@ -17533,6 +17560,17 @@ "node >= 0.2.0" ] }, + "node_modules/jsonpath": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/jsonpath/-/jsonpath-1.3.0.tgz", + "integrity": "sha512-0kjkYHJBkAy50Z5QzArZ7udmvxrJzkpKYW27fiF//BrMY7TQibYLl+FYIXN2BiYmwMIVzSfD8aDRj6IzgBX2/w==", + "license": "MIT", + "dependencies": { + "esprima": "1.2.5", + "static-eval": "2.1.1", + "underscore": "1.13.6" + } + }, "node_modules/jsonpath-plus": { "version": "10.3.0", "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz", @@ -17552,6 +17590,18 @@ "node": ">=18.0.0" } }, + "node_modules/jsonpath/node_modules/esprima": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-1.2.5.tgz", + "integrity": "sha512-S9VbPDU0adFErpDai3qDkjq8+G05ONtKzcyNrPKg/ZKa+tf879nX2KexNU95b31UoTJjRLInNBHHHjFPoCd7lQ==", + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/jsonpointer": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz", @@ -21323,6 +21373,15 @@ "node": ">=10" } }, + "node_modules/static-eval": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/static-eval/-/static-eval-2.1.1.tgz", + "integrity": "sha512-MgWpQ/ZjGieSVB3eOJVs4OA2LT/q1vx98KPCTTQPzq/aLr0YUXTsgryTXr4SLfR0ZfUUCiedM9n/ABeDIyy4mA==", + "license": "MIT", + "dependencies": { + "escodegen": "^2.1.0" + } + }, "node_modules/statuses": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz", @@ -22248,6 +22307,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/underscore": { + "version": "1.13.6", + "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.6.tgz", + "integrity": "sha512-+A5Sja4HP1M08MaXya7p5LvjuM7K6q/2EaC0+iovj/wOcMsTzMvDFbasi/oSapiwOlt252IqsKqPjCl7huKS0A==", + "license": "MIT" + }, "node_modules/undici": { "version": "6.21.3", "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.3.tgz", diff --git a/package.json b/package.json index cdba821..795cd01 100644 --- a/package.json +++ b/package.json @@ -48,12 +48,12 @@ "@map-colonies/error-express-handler": "^2.1.0", "@map-colonies/express-access-log-middleware": "^2.0.1", "@map-colonies/js-logger": "^1.0.1", - "@map-colonies/types": "^1.7.0", "@map-colonies/mc-model-types": "^19.0.0", "@map-colonies/mc-priority-queue": "^8.2.1", "@map-colonies/openapi-express-viewer": "^3.0.0", "@map-colonies/read-pkg": "0.0.1", "@map-colonies/telemetry": "^6.1.0", + "@map-colonies/types": "^1.7.0", "@opentelemetry/api": "1.7.0", "@opentelemetry/api-metrics": "0.23.0", "@opentelemetry/context-async-hooks": "^1.24.1", @@ -65,6 +65,7 @@ "express": "^4.18.2", "express-openapi-validator": "^5.0.4", "http-status-codes": "^2.2.0", + "jsonpath": "^1.3.0", "n-readlines": "^1.0.1", "prom-client": "^15.1.1", "reflect-metadata": "^0.1.13", @@ -76,12 +77,13 @@ "@faker-js/faker": "^8.4.1", "@map-colonies/eslint-config": "^4.0.0", "@map-colonies/prettier-config": "0.0.1", - "@redocly/openapi-cli": "^1.0.0-beta.94", "@redocly/cli": "^1.34.3", + "@redocly/openapi-cli": "^1.0.0-beta.94", "@types/compression": "^1.7.2", "@types/config": "^3.3.0", "@types/express": "^4.17.17", "@types/jest": "^29.5.2", + "@types/jsonpath": "^0.2.4", "@types/multer": "^1.4.7", "@types/n-readlines": "^1.0.3", "@types/supertest": "^2.0.12", diff --git a/src/common/interfaces.ts b/src/common/interfaces.ts index 273d0c1..2b3033f 100644 --- a/src/common/interfaces.ts +++ b/src/common/interfaces.ts @@ -31,6 +31,7 @@ export interface DeletePayload { export interface Provider { streamModelPathsToQueueFile: (modelId: string, pathToTileset: string, productName: string) => Promise; + getFile: (filePath: string) => Promise; } export interface IngestionJobParameters { @@ -56,7 +57,7 @@ export interface DeleteTaskParameters { blockDuplication?: boolean; } -export interface S3Config { +export interface S3Config extends BaseProviderConfig { accessKeyId: string; secretAccessKey: string; endpointUrl: string; @@ -66,10 +67,16 @@ export interface S3Config { forcePathStyle: boolean; } -export interface NFSConfig { +export interface NFSConfig extends BaseProviderConfig { pvPath: string; } +export interface BaseProviderConfig { + extension: string; + nestedJsonPath: string; + ignoreNotFound: boolean; +} + export type ProviderConfig = S3Config | NFSConfig; export interface JobOperationResponse { diff --git a/src/containerConfig.ts b/src/containerConfig.ts index 5a88d99..02516e4 100644 --- a/src/containerConfig.ts +++ b/src/containerConfig.ts @@ -7,7 +7,6 @@ import jsLogger, { LoggerOptions } from '@map-colonies/js-logger'; import client from 'prom-client'; import { JobManagerClient } from '@map-colonies/mc-priority-queue'; import { SERVICES, SERVICE_NAME } from './common/constants'; -import { Provider, ProviderConfig } from './common/interfaces'; import { tracing } from './common/tracing'; import { jobOperationsRouterFactory, JOB_OPERATIONS_ROUTER_SYMBOL } from './jobOperations/routes/jobOperationsRouter'; import { InjectionObject, registerDependencies } from './common/dependencyRegistration'; @@ -62,18 +61,14 @@ export const registerExternalValues = (options?: RegisterOptions): DependencyCon { token: SERVICES.PROVIDER_CONFIG, provider: { - useFactory: (): ProviderConfig => { - return getProviderConfig(provider); - }, + useFactory: () => getProviderConfig(provider), }, }, { token: SERVICES.QUEUE_FILE_HANDLER, provider: { useClass: QueueFileHandler } }, { token: SERVICES.PROVIDER, provider: { - useFactory: (): Provider => { - return getProvider(provider); - }, + useFactory: (container) => getProvider(provider, container), }, }, { diff --git a/src/handlers/queueFileHandler.ts b/src/handlers/queueFileHandler.ts index ce6445d..9d62257 100644 --- a/src/handlers/queueFileHandler.ts +++ b/src/handlers/queueFileHandler.ts @@ -1,5 +1,5 @@ -import fs from 'fs/promises'; -import os from 'os'; +import fs from 'node:fs/promises'; +import os from 'node:os'; import LineByLine from 'n-readlines'; import { singleton } from 'tsyringe'; diff --git a/src/jobOperations/models/jobOperationsManager.ts b/src/jobOperations/models/jobOperationsManager.ts index f273a25..24134d2 100644 --- a/src/jobOperations/models/jobOperationsManager.ts +++ b/src/jobOperations/models/jobOperationsManager.ts @@ -268,27 +268,17 @@ export class JobOperationsManager { @withSpanV4 private createTasks(batchSize: number, modelId: string): ICreateTaskBody[] { - const logContext = { ...this.logContext, function: this.createTasks.name }; const tasks: ICreateTaskBody[] = []; let chunk: string[] = []; let data: string | null = this.queueFileHandler.readline(modelId); while (data !== null) { - if (this.isFileInBlackList(data)) { - this.logger.warn({ - msg: 'The file is is the black list! Ignored...', - logContext, - file: data, - modelId, - }); - } else { - chunk.push(data); - - if (chunk.length === batchSize) { - const task = this.buildTaskFromChunk(chunk, modelId); - tasks.push(task); - chunk = []; - } + chunk.push(data); + + if (chunk.length === batchSize) { + const task = this.buildTaskFromChunk(chunk, modelId); + tasks.push(task); + chunk = []; } data = this.queueFileHandler.readline(modelId); @@ -314,11 +304,4 @@ export class JobOperationsManager { const parameters: IngestionTaskParameters = { paths: chunk, modelId, lastIndexError: -1 }; return { type: INGESTION_TASK_TYPE, parameters }; } - - private isFileInBlackList(data: string): boolean { - const blackList = this.config.get('ingestion.blackList'); - // eslint-disable-next-line @typescript-eslint/no-magic-numbers - const fileExtension = data.split('.').slice(-1)[0]; - return blackList.includes(fileExtension); - } } diff --git a/src/providers/baseProvider.ts b/src/providers/baseProvider.ts new file mode 100644 index 0000000..911e10d --- /dev/null +++ b/src/providers/baseProvider.ts @@ -0,0 +1,139 @@ +import Path from 'node:path'; +import { Logger } from '@map-colonies/js-logger'; +import { StatusCodes } from 'http-status-codes'; +import { Tracer } from '@opentelemetry/api'; +import { withSpanAsyncV4 } from '@map-colonies/telemetry'; +import jsonpath from 'jsonpath'; +import { AppError } from '../common/appError'; +import { BaseProviderConfig, LogContext, Provider } from '../common/interfaces'; +import { QueueFileHandler } from '../handlers/queueFileHandler'; + +export abstract class BaseProvider implements Provider { + protected readonly logContext: LogContext; + private readonly crawlingExtension: string; + + public constructor( + protected readonly logger: Logger, + public readonly tracer: Tracer, + protected readonly config: T, + protected readonly queueFileHandler: QueueFileHandler + ) { + this.logContext = { + fileName: __filename, + class: BaseProvider.name, + }; + + const extension = this.config.extension; + this.crawlingExtension = extension.startsWith('.') ? extension : `.${extension}`; + } + + @withSpanAsyncV4 + public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { + const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; + + let initialPath = pathToTileset; + if (!initialPath.endsWith(this.crawlingExtension)) { + initialPath = Path.join(initialPath, `tileset${this.crawlingExtension}`); + + initialPath = initialPath.replace(/\\/g, '/').replace(/^\//, ''); + } + + this.logger.info({ + msg: 'Started streaming model paths to queue file', + logContext, + modelName, + modelId, + pathToTileset: initialPath, + }); + + const visitedFiles = new Set(); + const processingQueue: string[] = [initialPath]; + let totalFilesAdded = 0; + + while (processingQueue.length > 0) { + const currentPath = processingQueue.shift(); + + if (currentPath === undefined) { + continue; + } + + if (visitedFiles.has(currentPath)) { + continue; + } + + visitedFiles.add(currentPath); + + try { + const buffer = await this.getFile(currentPath); + + await this.queueFileHandler.writeFileNameToQueueFile(modelId, currentPath); + totalFilesAdded++; + + if (currentPath.endsWith(this.crawlingExtension)) { + const nestedPaths = this.extractPathsFromJson(buffer, currentPath); + + for (const nestedPath of nestedPaths) { + if (visitedFiles.has(nestedPath)) { + continue; + } + + if (nestedPath.endsWith(this.crawlingExtension)) { + processingQueue.push(nestedPath); + } else { + await this.queueFileHandler.writeFileNameToQueueFile(modelId, nestedPath); + visitedFiles.add(nestedPath); + totalFilesAdded++; + } + } + } + } catch (err) { + if (this.config.ignoreNotFound && err instanceof AppError && err.status === StatusCodes.NOT_FOUND) { + this.logger.warn({ msg: 'File not found, skipping...', logContext, path: currentPath, modelName }); + continue; + } + + this.logger.error({ + msg: 'Failed to stream model paths to queue file', + logContext, + modelName, + modelId, + path: currentPath, + err, + }); + + throw err; + } + } + + this.logger.info({ + msg: 'Finished streaming model paths to queue file', + logContext, + modelName, + modelId, + totalFilesAdded, + }); + + return totalFilesAdded; + } + + private extractPathsFromJson(buffer: Buffer, currentPath: string): string[] { + try { + const fileContent = buffer.toString(); + const json = JSON.parse(fileContent) as object; + const nestedJsonPath = this.config.nestedJsonPath; + const results = jsonpath.query(json, nestedJsonPath) as string[]; + + const dirname = Path.dirname(currentPath); + + return results.map((child) => { + const joinedPath = dirname === '.' ? child : Path.join(dirname, child); + return joinedPath.replace(/\\/g, '/').replace(/^\//, ''); + }); + } catch (err) { + this.logger.error({ msg: 'Failed to parse JSON', path: currentPath, err }); + return []; + } + } + + public abstract getFile(filePath: string): Promise; +} diff --git a/src/providers/getProvider.ts b/src/providers/getProvider.ts index 01d796f..f7a6625 100644 --- a/src/providers/getProvider.ts +++ b/src/providers/getProvider.ts @@ -1,12 +1,12 @@ import config from 'config'; import httpStatus from 'http-status-codes'; -import { container } from 'tsyringe'; +import { DependencyContainer } from 'tsyringe'; import { AppError } from '../common/appError'; -import { ProviderConfig } from '../common/interfaces'; +import { BaseProviderConfig, Provider, ProviderConfig } from '../common/interfaces'; import { NFSProvider } from './nfsProvider'; import { S3Provider } from './s3Provider'; -function getProvider(provider: string): S3Provider | NFSProvider { +function getProvider(provider: string, container: DependencyContainer): Provider { switch (provider.toLowerCase()) { case 'nfs': return container.resolve(NFSProvider); @@ -19,9 +19,17 @@ function getProvider(provider: string): S3Provider | NFSProvider { function getProviderConfig(provider: string): ProviderConfig { try { - return config.get(provider); + const providerConfig: ProviderConfig = config.get(provider); + const crawlingConfig: BaseProviderConfig = config.get('crawling'); + const fullConfig = { ...providerConfig, ...crawlingConfig }; + + return fullConfig; } catch (err) { - throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, `Invalid config provider received: ${provider} - available values: "nfs" or "s3"`, false); + throw new AppError( + httpStatus.INTERNAL_SERVER_ERROR, + `Invalid config provider received: ${provider}. Consult documentation for available values`, + false + ); } } diff --git a/src/providers/nfsProvider.ts b/src/providers/nfsProvider.ts index a871e1e..0781e8c 100644 --- a/src/providers/nfsProvider.ts +++ b/src/providers/nfsProvider.ts @@ -1,17 +1,20 @@ -import fs from 'fs/promises'; -import { Logger } from '@map-colonies/js-logger'; +import fs from 'node:fs/promises'; +import Path from 'node:path'; import httpStatus from 'http-status-codes'; import { inject, injectable } from 'tsyringe'; +import { Logger } from '@map-colonies/js-logger'; import { Tracer } from '@opentelemetry/api'; import { withSpanAsyncV4 } from '@map-colonies/telemetry'; import { QueueFileHandler } from '../handlers/queueFileHandler'; -import { AppError } from '../common/appError'; import { SERVICES } from '../common/constants'; -import { Provider, NFSConfig, LogContext } from '../common/interfaces'; +import { NFSConfig, LogContext } from '../common/interfaces'; +import { AppError } from '../common/appError'; +import { BaseProvider } from './baseProvider'; @injectable() -export class NFSProvider implements Provider { - private readonly logContext: LogContext; +export class NFSProvider extends BaseProvider { + protected override readonly logContext: LogContext; + private readonly pvPath: string; public constructor( @inject(SERVICES.LOGGER) protected readonly logger: Logger, @@ -19,73 +22,40 @@ export class NFSProvider implements Provider { @inject(SERVICES.PROVIDER_CONFIG) protected readonly config: NFSConfig, @inject(SERVICES.QUEUE_FILE_HANDLER) protected readonly queueFileHandler: QueueFileHandler ) { + super(logger, tracer, config, queueFileHandler); + this.logContext = { fileName: __filename, class: NFSProvider.name, }; + this.pvPath = this.config.pvPath; } @withSpanAsyncV4 - public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { - const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; - let filesCount = 0; - const modelPath = `${this.config.pvPath}/${pathToTileset}`; + public override async getFile(filePath: string): Promise { + const logContext = { ...this.logContext, function: this.getFile.name }; + this.logger.debug({ + msg: 'Starting to get file', + logContext, + filePath, + }); + + const fullPath = Path.join(this.pvPath, filePath); + try { - await fs.access(modelPath); + const data = await fs.readFile(fullPath); + return data; } catch (err) { - this.logger.error({ - msg: 'failed to access the folder', - logContext, - modelId, - modelName, - err, - }); - throw new AppError(httpStatus.NOT_FOUND, `Model ${modelName} doesn't exists in the agreed folder. Path: ${modelPath}`, true); - } + const error = err as NodeJS.ErrnoException; - const folders: string[] = [pathToTileset]; - - while (folders.length > 0) { - const files = await fs.readdir(`${this.config.pvPath}/${folders[0]}`); - this.logger.debug({ - msg: 'Listing folder', - logContext, - folder: folders[0], - filesCount, - modelId, - modelName, - }); - for (const file of files) { - const fileStats = await fs.stat(`${this.config.pvPath}/${folders[0]}/${file}`); - if (fileStats.isDirectory()) { - folders.push(`${folders[0]}/${file}`); - } else { - try { - await this.queueFileHandler.writeFileNameToQueueFile(modelId, `${folders[0]}/${file}`); - filesCount++; - } catch (err) { - this.logger.error({ - msg: `Didn't write the file: '${folders[0]}/${file}' in FS.`, - logContext, - modelId, - modelName, - err, - }); - throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, 'problem with queueFileHandler', false); - } - } + if (error.code === 'ENOENT') { + throw new AppError(httpStatus.NOT_FOUND, `File ${filePath} not found`, true); + } + if (error.code === 'EISDIR') { + throw new AppError(httpStatus.BAD_REQUEST, `${filePath} is a directory, expected a file`, true); } - folders.shift(); + throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, `Error reading file: ${error.message}`, true); } - - this.logger.info({ - msg: 'Finished listing the files', - logContext, - filesCount: filesCount, - modelName, - modelId, - }); - return filesCount; } } diff --git a/src/providers/s3Provider.ts b/src/providers/s3Provider.ts index d4162d6..9fe1877 100644 --- a/src/providers/s3Provider.ts +++ b/src/providers/s3Provider.ts @@ -1,19 +1,19 @@ -import { CommonPrefix, ListObjectsCommand, ListObjectsRequest, S3Client, S3ClientConfig, S3ServiceException, _Object } from '@aws-sdk/client-s3'; -import { Logger } from '@map-colonies/js-logger'; import httpStatus from 'http-status-codes'; +import { GetObjectCommand, S3Client, S3ClientConfig } from '@aws-sdk/client-s3'; +import { Logger } from '@map-colonies/js-logger'; import { inject, injectable } from 'tsyringe'; import { Tracer } from '@opentelemetry/api'; -import { withSpanAsyncV4, withSpanV4 } from '@map-colonies/telemetry'; +import { withSpanAsyncV4 } from '@map-colonies/telemetry'; import { QueueFileHandler } from '../handlers/queueFileHandler'; -import { AppError } from '../common/appError'; import { SERVICES } from '../common/constants'; -import { LogContext, Provider, S3Config } from '../common/interfaces'; +import { LogContext, S3Config } from '../common/interfaces'; +import { AppError } from '../common/appError'; +import { BaseProvider } from './baseProvider'; @injectable() -export class S3Provider implements Provider { +export class S3Provider extends BaseProvider { + protected override readonly logContext: LogContext; private readonly s3: S3Client; - private filesCount: number; - private readonly logContext: LogContext; public constructor( @inject(SERVICES.LOGGER) protected readonly logger: Logger, @@ -21,6 +21,8 @@ export class S3Provider implements Provider { @inject(SERVICES.PROVIDER_CONFIG) protected readonly s3Config: S3Config, @inject(SERVICES.QUEUE_FILE_HANDLER) protected readonly queueFileHandler: QueueFileHandler ) { + super(logger, tracer, s3Config, queueFileHandler); + const s3ClientConfig: S3ClientConfig = { endpoint: this.s3Config.endpointUrl, forcePathStyle: this.s3Config.forcePathStyle, @@ -32,7 +34,6 @@ export class S3Provider implements Provider { }; this.s3 = new S3Client(s3ClientConfig); - this.filesCount = 0; this.logContext = { fileName: __filename, @@ -41,110 +42,38 @@ export class S3Provider implements Provider { } @withSpanAsyncV4 - public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { - const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; - /* eslint-disable @typescript-eslint/naming-convention */ - const params: ListObjectsRequest = { - Bucket: this.s3Config.bucket, - Delimiter: '/', - Prefix: pathToTileset + '/', - }; - - await this.listS3Recursively(modelId, params); - - if (await this.queueFileHandler.checkIfTempFileEmpty(modelId)) { - throw new AppError(httpStatus.NOT_FOUND, `Model ${modelName} doesn't exists in bucket ${this.s3Config.bucket}! Path: ${pathToTileset}`, true); - } - - this.logger.info({ - msg: 'Finished listing the files', + public override async getFile(filePath: string): Promise { + const logContext = { ...this.logContext, function: this.getFile.name }; + this.logger.debug({ + msg: 'Starting to get file', logContext, - filesCount: this.filesCount, - modelName, - modelId, + filePath, }); - const lastFileCount = this.filesCount; - this.filesCount = 0; - return lastFileCount; - } + const getObjectCommand = new GetObjectCommand({ + /* eslint-disable @typescript-eslint/naming-convention */ + Bucket: this.s3Config.bucket, + Key: filePath, + /* eslint-disable @typescript-eslint/naming-convention */ + }); - @withSpanAsyncV4 - private async listS3Recursively(modelId: string, params: ListObjectsRequest): Promise { - const logContext = { ...this.logContext, function: this.listS3Recursively.name }; try { - const listObject = new ListObjectsCommand(params); - const data = await this.s3.send(listObject); - - if (data.Contents) { - await this.writeFileContent(modelId, data.Contents); - } - - if (data.CommonPrefixes) { - await this.writeFolderContent(modelId, data.CommonPrefixes); - } - - if (data.IsTruncated === true) { - const nextParams: ListObjectsRequest = { - Bucket: this.s3Config.bucket, - Delimiter: '/', - Prefix: data.Prefix, - Marker: data.NextMarker, - }; - await this.listS3Recursively(modelId, nextParams); - } - - this.logger.debug({ - msg: `Listed ${this.filesCount} files`, - logContext, - modelId, - }); + const response = await this.s3.send(getObjectCommand); + const responseArray = await response.Body?.transformToByteArray(); + return Buffer.from(responseArray as Uint8Array); } catch (err) { this.logger.error({ - msg: 'failed in listing the model', - logContext, - modelId, + msg: 'an error occurred during getting file', err, + endpoint: this.s3Config.endpointUrl, + bucketName: this.s3Config.bucket, + key: filePath, }); - this.handleS3Error(this.s3Config.bucket, err); - } - } - @withSpanAsyncV4 - private async writeFileContent(modelId: string, contents: _Object[]): Promise { - for (const content of contents) { - if (content.Key == undefined) { - throw new AppError(httpStatus.NO_CONTENT, 'found content without file name', true); - } - await this.queueFileHandler.writeFileNameToQueueFile(modelId, content.Key); - this.filesCount++; - } - } + const s3Error = err as Error; + const statusCode = (s3Error as unknown as { name: string }).name === 'NoSuchKey' ? httpStatus.NOT_FOUND : httpStatus.INTERNAL_SERVER_ERROR; - @withSpanAsyncV4 - private async writeFolderContent(modelId: string, CommonPrefixes: CommonPrefix[]): Promise { - for (const commonPrefix of CommonPrefixes) { - if (commonPrefix.Prefix != undefined) { - const nextParams: ListObjectsRequest = { - Bucket: this.s3Config.bucket, - Delimiter: '/', - Prefix: commonPrefix.Prefix, - }; - await this.listS3Recursively(modelId, nextParams); - } + throw new AppError(statusCode, `an error occurred during the get key ${filePath} on bucket ${this.s3Config.bucket}, ${s3Error.message}`, true); } } - - @withSpanV4 - private handleS3Error(s3Bucket: string, error: unknown): never { - let statusCode = httpStatus.INTERNAL_SERVER_ERROR; - let message = "Didn't throw a S3 exception in file"; - - if (error instanceof S3ServiceException) { - statusCode = error.$metadata.httpStatusCode ?? statusCode; - message = `${error.name}, message: ${error.message}, bucket: ${s3Bucket}`; - } - - throw new AppError(statusCode, message, true); - } } diff --git a/tests/configurations/initJestOpenapi.setup.ts b/tests/configurations/initJestOpenapi.setup.ts index e26f701..66745c4 100644 --- a/tests/configurations/initJestOpenapi.setup.ts +++ b/tests/configurations/initJestOpenapi.setup.ts @@ -1,4 +1,4 @@ -import path from 'path'; +import path from 'node:path'; import jestOpenApi from 'jest-openapi'; jestOpenApi(path.join(process.cwd(), 'bundledApi.yaml')); diff --git a/tests/helpers/mockCreator.ts b/tests/helpers/mockCreator.ts index cdfbceb..b442201 100644 --- a/tests/helpers/mockCreator.ts +++ b/tests/helpers/mockCreator.ts @@ -146,4 +146,5 @@ export const jobManagerClientMock = { export const configProviderMock = { streamModelPathsToQueueFile: jest.fn(), + getFile: jest.fn(), }; diff --git a/tests/helpers/nfsHelper.ts b/tests/helpers/nfsHelper.ts index add96b4..78fec74 100644 --- a/tests/helpers/nfsHelper.ts +++ b/tests/helpers/nfsHelper.ts @@ -1,21 +1,23 @@ -import fs from 'fs'; -import path from 'path'; +import fs from 'node:fs'; +import path from 'node:path'; import { faker } from '@faker-js/faker'; import { NFSConfig } from '../../src/common/interfaces'; export class NFSHelper { public constructor(private readonly config: NFSConfig) {} - public async createFileOfModel(modelName: string, file: string): Promise { + public async createFileOfModel(modelName: string, file: string, data?: string): Promise { const subFolders = path.dirname(file); const fileName = path.basename(file); - const dirPath = `${this.config.pvPath}/${modelName}/${subFolders}`; + const dirPath = path.join(this.config.pvPath, modelName, subFolders); + if (!fs.existsSync(dirPath)) { await this.createFolder(dirPath); } - const data = faker.word.words(); - await fs.promises.writeFile(`${dirPath}/${fileName}`, data); - return data; + + const content = data ?? faker.word.words(); + await fs.promises.writeFile(path.join(dirPath, fileName), content); + return content; } public async createFolder(path: string): Promise { diff --git a/tests/helpers/s3Helper.ts b/tests/helpers/s3Helper.ts index 45479ca..6f45dce 100644 --- a/tests/helpers/s3Helper.ts +++ b/tests/helpers/s3Helper.ts @@ -51,14 +51,21 @@ export class S3Helper { await this.s3.send(command); } - public async createFileOfModel(model: string, file: string): Promise { + public async createFileOfModel(model: string, file: string, data?: string | Buffer): Promise { + const content = data ?? faker.word.words(); + const bufferData = Buffer.isBuffer(content) ? content : Buffer.from(content); + + const key = model !== '' ? `${model}/${file}` : file; + const params: PutObjectCommandInput = { Bucket: this.s3Config.bucket, - Key: `${model}/${file}`, - Body: Buffer.from(faker.word.words()), + Key: key, + Body: bufferData, }; + const command = new PutObjectCommand(params); await this.s3.send(command); + return bufferData; } public async clearBucket(bucket = this.s3Config.bucket): Promise { diff --git a/tests/integration/handlers/queueFileHandler.spec.ts b/tests/integration/handlers/queueFileHandler.spec.ts index 6c60766..6130835 100644 --- a/tests/integration/handlers/queueFileHandler.spec.ts +++ b/tests/integration/handlers/queueFileHandler.spec.ts @@ -1,4 +1,4 @@ -import fs from 'fs'; +import fs from 'node:fs'; import { faker } from '@faker-js/faker'; import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; diff --git a/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts b/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts index 219b7c7..14d7190 100644 --- a/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts +++ b/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts @@ -28,7 +28,7 @@ describe('JobOperationsController on S3', function () { token: SERVICES.PROVIDER, provider: { useFactory: (): Provider => { - return getProvider('s3'); + return getProvider('S3', container); }, }, }, @@ -189,7 +189,7 @@ describe('IngestionController on NFS', function () { token: SERVICES.PROVIDER, provider: { useFactory: (): Provider => { - return getProvider('nfs'); + return getProvider('NFS', container); }, }, }, diff --git a/tests/integration/providers/baseProvider.spec.ts b/tests/integration/providers/baseProvider.spec.ts new file mode 100644 index 0000000..fec9f86 --- /dev/null +++ b/tests/integration/providers/baseProvider.spec.ts @@ -0,0 +1,140 @@ +import fs from 'node:fs'; +import os from 'node:os'; +import jsLogger, { Logger } from '@map-colonies/js-logger'; +import { container } from 'tsyringe'; +import { faker } from '@faker-js/faker'; +import { StatusCodes } from 'http-status-codes'; +import { Tracer } from '@opentelemetry/api'; +import { getApp } from '../../../src/app'; +import { SERVICES } from '../../../src/common/constants'; +import { NFSConfig } from '../../../src/common/interfaces'; +import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; +import { BaseProvider } from '../../../src/providers/baseProvider'; +import { AppError } from '../../../src/common/appError'; +import { NFSProvider } from '../../../src/providers/nfsProvider'; + +// ToDo those are UNIT tests, NOT INTEGRATION!! But CI requires integration coverage +describe('Crawling tests', () => { + let crawler: BaseProvider; + let queueFileHandler: QueueFileHandler; + const logger: Logger = jsLogger({ enabled: false }); + + const queueFilePath = os.tmpdir(); + const config: NFSConfig = { + extension: '.json', + nestedJsonPath: "$..['uri','url']", + ignoreNotFound: false, + pvPath: 'test_pv_path', + }; + + beforeAll(() => { + getApp({ + override: [ + { token: SERVICES.LOGGER, provider: { useValue: logger } }, + { + token: SERVICES.PROVIDER_CONFIG, + provider: { + useValue: { + ...config, + }, + }, + }, + ], + }); + queueFileHandler = container.resolve(QueueFileHandler); + const tracer = container.resolve(SERVICES.TRACER); + crawler = new NFSProvider(logger, tracer, config, queueFileHandler); + }); + + afterAll(function () { + container.reset(); + }); + + afterEach(() => { + jest.clearAllMocks(); + }); + + describe('streamModelPathsToQueueFile', () => { + const json0 = { + root: { + content: { uri: 'a.b3dm', boundingVolume: { region: [0] } }, + children: [ + { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { uri: 'b.b3dm' } }, + { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { url: '../1.json' }, children: [] }, + ], + }, + }; + const json1 = { root: { content: { uri: 'bla/c.b3dm' }, children: [{ content: { url: '2.json' } }] } }; + const json2 = {}; + const pathToTileset = '/x/y/0.json'; + + it('should returns all the files', async () => { + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const getFileSpy = jest.spyOn(crawler, 'getFile'); + + // eslint-disable-next-line @typescript-eslint/require-await + getFileSpy.mockImplementation(async (path) => { + const normalizedPath = path.replace(/\\/g, '/').replace(/^\//, ''); + + if (normalizedPath === 'x/y/0.json') { + return Buffer.from(JSON.stringify(json0)); + } + if (normalizedPath === 'x/1.json') { + return Buffer.from(JSON.stringify(json1)); + } + if (normalizedPath === 'x/2.json') { + return Buffer.from(JSON.stringify(json2)); + } + return Buffer.from('content'); + }); + + await queueFileHandler.createQueueFile(modelId); + const total = await crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + + const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8').trim().split('\n'); + + expect(total).toBe(6); + expect(result).toEqual( + expect.arrayContaining([expect.stringContaining('x/y/0.json'), expect.stringContaining('x/1.json'), expect.stringContaining('x/2.json')]) + ); + getFileSpy.mockRestore(); + }); + + describe('getFile errors', () => { + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const createCrawler = (overrides: Partial = {}) => + new NFSProvider(logger, container.resolve(SERVICES.TRACER), { ...config, ...overrides }, queueFileHandler); + + it('should throw on a general getFile error', async () => { + const getFileSpy = jest + .spyOn(crawler, 'getFile') + .mockRejectedValueOnce(new AppError(StatusCodes.INTERNAL_SERVER_ERROR, 'Internal error', false)); + + await expect(crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName)).rejects.toThrow(AppError); + + getFileSpy.mockRestore(); + }); + + it('should throw on NOT_FOUND when ignoreNotFound is false', async () => { + const getFileSpy = jest.spyOn(crawler, 'getFile').mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'Not Found', false)); + + await expect(crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName)).rejects.toThrow(AppError); + + getFileSpy.mockRestore(); + }); + + it('should skip NOT_FOUND files when ignoreNotFound is true', async () => { + const ignoringCrawler = createCrawler({ ignoreNotFound: true }); + const getFileSpy = jest.spyOn(ignoringCrawler, 'getFile').mockRejectedValue(new AppError(StatusCodes.NOT_FOUND, 'Not Found', false)); + + await expect(ignoringCrawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName)).resolves.toBe(0); + + getFileSpy.mockRestore(); + }); + }); + }); +}); diff --git a/tests/integration/providers/getProvider.spec.ts b/tests/integration/providers/getProvider.spec.ts index f4fcebc..a39d138 100644 --- a/tests/integration/providers/getProvider.spec.ts +++ b/tests/integration/providers/getProvider.spec.ts @@ -1,21 +1,27 @@ import config from 'config'; +import { container } from 'tsyringe'; import { AppError } from '../../../src/common/appError'; -import { NFSConfig, S3Config } from '../../../src/common/interfaces'; import { getProvider, getProviderConfig } from '../../../src/providers/getProvider'; describe('getProviderConfig tests', () => { - it('should return the NFS config when the provider is NFS', () => { + it('should return the NFS config merged with crawling config when the provider is NFS', () => { const provider = 'NFS'; - const expected = config.get('NFS'); + /* eslint-disable @typescript-eslint/no-unnecessary-type-assertion */ + const nfsConfig = config.get('NFS') as Record; + const crawlingConfig = config.get('crawling') as Record; + const expected = { ...nfsConfig, ...crawlingConfig }; const response = getProviderConfig(provider); expect(response).toStrictEqual(expected); }); - it('should return the S3 config when the provider is S3', () => { + it('should return the S3 config merged with crawling config when the provider is S3', () => { const provider = 'S3'; - const expected = config.get('S3'); + const s3Config = config.get('S3') as Record; + const crawlingConfig = config.get('crawling') as Record; + /* eslint-enable @typescript-eslint/no-unnecessary-type-assertion */ + const expected = { ...s3Config, ...crawlingConfig }; const response = getProviderConfig(provider); @@ -35,7 +41,7 @@ describe('getProvider tests', () => { it('should throw an error when the provider is nor S3 or NFS', () => { const provider = 'bla'; - const response = () => getProvider(provider); + const response = () => getProvider(provider, container); expect(response).toThrow(AppError); }); diff --git a/tests/integration/providers/nfsProvider.spec.ts b/tests/integration/providers/nfsProvider.spec.ts index 8362b50..29ba143 100644 --- a/tests/integration/providers/nfsProvider.spec.ts +++ b/tests/integration/providers/nfsProvider.spec.ts @@ -1,5 +1,5 @@ -import fs from 'fs'; -import os from 'os'; +import fs from 'node:fs'; +import os from 'node:os'; import config from 'config'; import { container } from 'tsyringe'; import httpStatus from 'http-status-codes'; @@ -9,7 +9,7 @@ import { faker } from '@faker-js/faker'; import { getApp } from '../../../src/app'; import { NFSProvider } from '../../../src/providers/nfsProvider'; import { SERVICES } from '../../../src/common/constants'; -import { NFSConfig } from '../../../src/common/interfaces'; +import { BaseProviderConfig, NFSConfig } from '../../../src/common/interfaces'; import { AppError } from '../../../src/common/appError'; import { createFile, queueFileHandlerMock } from '../../helpers/mockCreator'; import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; @@ -19,7 +19,7 @@ describe('NFSProvider tests', () => { let provider: NFSProvider; let queueFileHandler: QueueFileHandler; const queueFilePath = os.tmpdir(); - const nfsConfig = config.get('NFS'); + const nfsConfig = { ...config.get('NFS'), ...config.get('crawling') }; let nfsHelper: NFSHelper; beforeAll(() => { @@ -44,23 +44,48 @@ describe('NFSProvider tests', () => { jest.clearAllMocks(); }); + describe('getFile', () => { + it('When calling getFile, should get the file content from pv path', async () => { + const model = faker.word.sample(); + const file = `${faker.word.sample()}.${faker.system.commonFileExt()}`; + const fileContent = await nfsHelper.createFileOfModel(model, file); + + const bufferResult = await provider.getFile(`${model}/${file}`); + const result = bufferResult.toString(); + + expect(result).toStrictEqual(fileContent); + }); + }); + describe('streamModelPathsToQueueFile Function', () => { - it('if model exists in the agreed folder, returns all the file paths of the model', async () => { + it('if model exists and contains valid JSON, returns linked file paths', async () => { const modelId = faker.string.uuid(); + const modelName = 'interconnect'; + const entryFile = 'tileset.json'; + const pathToTileset = `${modelName}/${entryFile}`; + await queueFileHandler.createQueueFile(modelId); - const pathToTileset = faker.word.sample(); - const modelName = faker.word.sample(); - let expected = ''; - for (let i = 0; i < 4; i++) { - const file = i === 3 ? `${i}${createFile(false, true)}` : `${i}${createFile()}`; - await nfsHelper.createFileOfModel(pathToTileset, file); - expected = `${expected}${pathToTileset}/${file}\n`; - } + + const textureFile = 'text1.png'; + const childTileset = 'child.json'; + + const tilesetContent = JSON.stringify({ + root: { + content: { uri: childTileset }, + children: [{ content: { uri: textureFile } }], + }, + }); + + await nfsHelper.createFileOfModel('', pathToTileset, tilesetContent); + + await nfsHelper.createFileOfModel(modelName, textureFile, 'data'); + await nfsHelper.createFileOfModel(modelName, childTileset, JSON.stringify({ asset: { version: '1.0' } })); await provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8'); - expect(result).toStrictEqual(expected); + expect(result).toContain(pathToTileset); await queueFileHandler.deleteQueueFile(modelId); }); @@ -69,6 +94,8 @@ describe('NFSProvider tests', () => { const modelName = faker.word.sample(); const modelId = faker.string.uuid(); + (provider as unknown as { config: BaseProviderConfig }).config.ignoreNotFound = false; + const result = async () => { await provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); }; diff --git a/tests/integration/providers/s3Provider.spec.ts b/tests/integration/providers/s3Provider.spec.ts index a42a293..2c4ce60 100644 --- a/tests/integration/providers/s3Provider.spec.ts +++ b/tests/integration/providers/s3Provider.spec.ts @@ -1,5 +1,5 @@ -import fs from 'fs'; -import os from 'os'; +import fs from 'node:fs'; +import os from 'node:os'; import config from 'config'; import jsLogger from '@map-colonies/js-logger'; import { container } from 'tsyringe'; @@ -21,10 +21,21 @@ describe('S3Provider tests', () => { const s3Config = config.get('S3'); beforeAll(async () => { + container.reset(); getApp({ override: [ { token: SERVICES.LOGGER, provider: { useValue: jsLogger({ enabled: false }) } }, - { token: SERVICES.PROVIDER_CONFIG, provider: { useValue: s3Config } }, + { + token: SERVICES.PROVIDER_CONFIG, + provider: { + useValue: { + ...s3Config, + ignoreNotFound: false, + extension: '.json', + nestedJsonPath: "$..['uri','url']", + }, + }, + }, ], }); provider = container.resolve(S3Provider); @@ -44,28 +55,69 @@ describe('S3Provider tests', () => { s3Helper.killS3(); }); + describe('getFile', () => { + it(`When calling getFile, should see the file content from source bucket`, async () => { + const model = faker.word.sample(); + const file = `${faker.word.sample()}.${faker.system.commonFileExt()}`; + const expected = await s3Helper.createFileOfModel(model, file); + + const result = await provider.getFile(`${model}/${file}`); + + expect(result).toStrictEqual(expected); + }); + + it(`When the file is not exists in the bucket, throws error`, async () => { + const file = `${faker.word.sample()}.${faker.system.commonFileExt()}`; + + const result = async () => { + await provider.getFile(file); + }; + + await expect(result).rejects.toThrow(Error); + }); + }); + describe('streamModelPathsToQueueFile', () => { - it('returns all the files from S3', async () => { - const modelId = faker.word.sample(); - const modelName = faker.word.sample(); - const pathToTileset = faker.word.sample(); - const fileLength = faker.number.int({ min: 1, max: 5 }); - const expectedFiles: string[] = []; - for (let i = 0; i < fileLength; i++) { - const file = faker.word.sample(); - await s3Helper.createFileOfModel(pathToTileset, file); - expectedFiles.push(`${pathToTileset}/${file}`); - } + it('should recursively discover nested files across multiple directories and levels', async () => { + const modelId = faker.string.uuid(); + const modelName = 'complex-model'; + + const rootTileset = 'tileset.json'; + const subDir = 'folderA'; + const secondLevelJson = `${subDir}/sub-tileset.json`; + const leafFileJson = `${subDir}/data.json`; + const leafFileBinary = `${subDir}/geometry.b3dm`; + + const rootContent = JSON.stringify({ + root: { uri: secondLevelJson, url: secondLevelJson }, + }); + + const subTilesetContent = JSON.stringify({ + buffers: [{ uri: 'data.json' }, { url: 'geometry.b3dm' }], + }); + + await s3Helper.createFileOfModel('', rootTileset, rootContent); + await s3Helper.createFileOfModel('', secondLevelJson, subTilesetContent); + await s3Helper.createFileOfModel('', leafFileJson, JSON.stringify({})); + await s3Helper.createFileOfModel('', leafFileBinary, Buffer.from('fake-binary-data')); + await queueFileHandler.createQueueFile(modelId); - await s3Helper.createFileOfModel(pathToTileset, 'subDir/file'); - expectedFiles.push(`${pathToTileset}/subDir/file`); - await provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + const totalAdded = await provider.streamModelPathsToQueueFile(modelId, rootTileset, modelName); + const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8'); + const filesInQueue = result + .trim() + .split('\n') + .map((l) => l.trim()); + + expect(totalAdded).toBe(4); + + expect(filesInQueue).toContain(rootTileset); + expect(filesInQueue).toContain(secondLevelJson); + expect(filesInQueue).toContain(leafFileJson); + expect(filesInQueue).toContain(leafFileBinary); - for (const file of expectedFiles) { - expect(result).toContain(file); - } await queueFileHandler.deleteQueueFile(modelId); }); diff --git a/tests/unit/jobStatus/models/jobStatusManager.spec.ts b/tests/unit/jobStatus/models/jobStatusManager.spec.ts index 6e3b65b..0225201 100644 --- a/tests/unit/jobStatus/models/jobStatusManager.spec.ts +++ b/tests/unit/jobStatus/models/jobStatusManager.spec.ts @@ -7,7 +7,7 @@ import { AppError } from '../../../../src/common/appError'; import { SERVICES } from '../../../../src/common/constants'; import { JobStatusResponse } from '../../../../src/common/interfaces'; import { JobStatusManager } from '../../../../src/jobStatus/models/jobStatusManager'; -import { jobManagerClientMock } from '../../../helpers/mockCreator'; +import { configProviderMock, jobManagerClientMock } from '../../../helpers/mockCreator'; describe('jobStatusManager', () => { let jobStatusManager: JobStatusManager; @@ -16,6 +16,7 @@ describe('jobStatusManager', () => { getApp({ override: [ { token: SERVICES.JOB_MANAGER_CLIENT, provider: { useValue: jobManagerClientMock } }, + { token: SERVICES.PROVIDER, provider: { useValue: configProviderMock } }, { token: SERVICES.LOGGER, provider: { useValue: jsLogger({ enabled: false }) } }, ], }); diff --git a/tests/unit/providers/getProvider.spec.ts b/tests/unit/providers/getProvider.spec.ts new file mode 100644 index 0000000..bfc7df0 --- /dev/null +++ b/tests/unit/providers/getProvider.spec.ts @@ -0,0 +1,73 @@ +import jsLogger from '@map-colonies/js-logger'; +import { trace } from '@opentelemetry/api'; +import config from 'config'; +import { container } from 'tsyringe'; +import { getProvider, getProviderConfig } from '../../../src/providers/getProvider'; +import { SERVICES, SERVICE_NAME } from '../../../src/common/constants'; +import { NFSProvider } from '../../../src/providers/nfsProvider'; +import { S3Provider } from '../../../src/providers/s3Provider'; +import { configProviderMock, jobManagerClientMock, queueFileHandlerMock } from '../../helpers/mockCreator'; + +jest.mock('config', () => ({ + get: jest.fn((key: string) => { + switch (key) { + case 'NFS': + return { pvPath: '/tmp' }; + case 'S3': + return { bucket: 'test-bucket' }; + case 'crawling': + return { extension: '.json', nestedJsonPath: "$..['uri','url']", ignoreNotFound: true }; + default: + return {}; + } + }), +})); + +describe('getProvider tests', () => { + beforeEach(() => { + container.reset(); + jest.clearAllMocks(); + + const tracer = trace.getTracer(SERVICE_NAME); + + container.register(SERVICES.LOGGER, { useValue: jsLogger({ enabled: false }) }); + container.register(SERVICES.TRACER, { useValue: tracer }); + container.register(SERVICES.QUEUE_FILE_HANDLER, { useValue: queueFileHandlerMock }); + container.register(SERVICES.JOB_MANAGER_CLIENT, { useValue: jobManagerClientMock }); + container.register(SERVICES.PROVIDER, { useValue: configProviderMock }); + + container.register(SERVICES.PROVIDER_CONFIG, { + useFactory: () => getProviderConfig('NFS'), + }); + }); + + describe('getProvider nfs', () => { + it('should load an instance of the nfs provider', () => { + const provider = getProvider('nfs', container); + expect(provider).toBeInstanceOf(NFSProvider); + }); + }); + + describe('getProvider s3', () => { + it('should load an instance of the s3 provider', () => { + const provider = getProvider('s3', container); + expect(provider).toBeInstanceOf(S3Provider); + }); + }); + + describe('getProvider invalid', () => { + it('should throw an AppError for an unknown provider', () => { + expect(() => getProvider('invalid', container)).toThrow('Invalid config provider received: invalid - available values: "nfs" or "s3"'); + }); + }); + + describe('config failures', () => { + it('should throw when config.get fails', () => { + (config.get as jest.Mock).mockImplementationOnce(() => { + throw new Error('config failure'); + }); + + expect(() => getProviderConfig('NFS')).toThrow('Invalid config provider received: NFS. Consult documentation for available values'); + }); + }); +});