Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
5b0f0d1
feat(providers): add function to get file (copied from file-syncer)
nirolevy Sep 16, 2025
562de30
feat(providers): add JSON crawling provider
nirolevy Sep 16, 2025
b11b250
feat(providers): add crawling provider loading
nirolevy Sep 16, 2025
f91272d
refactor: removed blacklist and updated crawling
lirantul123 Apr 29, 2026
8d8540e
refactor: removed underlying and made crawling abstracted
lirantul123 May 5, 2026
e8b4a3d
refactor: added node:
lirantul123 May 5, 2026
aa26f5b
refactor: chnaged tests
lirantul123 May 5, 2026
7a29330
refactor: chnaged tests
lirantul123 May 5, 2026
2acca55
refactor: chnaged tests
lirantul123 May 6, 2026
cdace38
test: fix last test and fix uri-url
lirantul123 May 10, 2026
1723407
test: fix test issues and helm configuration
lirantul123 May 16, 2026
4483ff1
chore: update baseProvider test name
lirantul123 May 16, 2026
fd17b4c
chore: small updates
lirantul123 May 17, 2026
4c32288
chore: remove unnecessary as type
lirantul123 May 17, 2026
b2d0541
chore: remove duplicated configuraion and fix test
lirantul123 May 17, 2026
e2619b9
chore: remove crawling from tplValues
lirantul123 May 18, 2026
8c38480
refactor: added crawling configuration into the providers through the…
lirantul123 May 18, 2026
9502ba6
refactor: change interface nullable
May 18, 2026
64ebaaf
fix: fixed pathToTileset to reach to tileset.json
lirantul123 May 18, 2026
ee5ad2a
chore: handling empty crawling extenstion and nestedJsonPath, even th…
lirantul123 May 19, 2026
12b421b
chore: revert defualt and update test
lirantul123 May 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions config/custom-environment-variables.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,16 @@
"NFS": {
"pvPath": "PV_SOURCE_PATH"
},
"crawling": {
"extension": "CRAWLING_EXTENSION",
"nestedJsonPath": "CRAWLING_NESTED_JSON_PATH",
"ignoreNotFound": {
"__name": "CRAWLING_IGNORE_NOT_FOUND",
"__format": "boolean"
}
},
"ingestion": {
"provider": "PROVIDER_FROM",
"blackList": "BLACK_LIST"
"provider": "PROVIDER_FROM"
},
"jobManager": {
"url": "JOB_MANAGER_URL",
Expand Down
8 changes: 6 additions & 2 deletions config/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,13 @@
"NFS": {
"pvPath": "/Path/To/Models"
},
"crawling": {
"extension": ".json",
"nestedJsonPath": "$..['uri','url']",
"ignoreNotFound": true
Comment thread
asafMasa marked this conversation as resolved.
Comment thread
asafMasa marked this conversation as resolved.
Comment thread
asafMasa marked this conversation as resolved.
},
"ingestion": {
"provider": "NFS",
"blackList": ["tar", "zip", "rar", "7z"]
"provider": "NFS"
},
"jobManager": {
"url": "http://127.0.0.1:8080",
Expand Down
8 changes: 6 additions & 2 deletions config/test.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
{
"crawling": {
"extension": ".json",
"nestedJsonPath": "$..['uri','url']",
"ignoreNotFound": true
},
"S3": {
"accessKeyId": "minioadmin",
"secretAccessKey": "minioadmin",
Expand All @@ -13,8 +18,7 @@
"pvPath": "./tests/helpers/3DModels"
},
"ingestion": {
"provider": "S3",
"blackList": ["tar", "zip", "rar", "7z"]
"provider": "S3"
},
"jobManager": {
"url": "http://127.0.0.1:8080",
Expand Down
4 changes: 3 additions & 1 deletion helm/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ data:
REQUESTS_CA_BUNDLE: {{ printf "%s/%s" $ca.path $ca.key | quote }}
NODE_EXTRA_CA_CERTS: {{ printf "%s/%s" $ca.path $ca.key | quote }}
{{- end }}
BLACK_LIST: {{ .Values.env.blackList | quote }}
MAX_CONCURRENCY: {{ .Values.env.maxConcurrency | quote }}
{{ if eq $provider "S3" }}
{{- $S3 := (include "merged.S3" . ) | fromYaml }}
Expand All @@ -50,3 +49,6 @@ data:
INGESTION_TASK_BATCHES: {{ $jobManager.ingestion.batches | quote}}
JOB_DELETE_TYPE: {{ $jobManager.delete.jobType | quote }}
TASK_DELETE_TYPE: {{ $jobManager.delete.taskType | quote }}
CRAWLING_EXTENSION: {{ .Values.env.crawling.extension | quote }}
CRAWLING_NESTED_JSON_PATH: {{ .Values.env.crawling.nestedJsonPath | quote }}
CRAWLING_IGNORE_NOT_FOUND: {{ .Values.env.crawling.ignoreNotFound | quote }}
7 changes: 6 additions & 1 deletion helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ jobManager:
delete:
jobType:
taskType:

env:
port: 80
targetPort: 8080
Expand All @@ -130,9 +131,13 @@ env:
metrics:
enabled: false
url: ''
blackList: ["tar", "zip", "rar", "7z"]
maxConcurrency: 5

crawling:
extension: '.json'
nestedJsonPath: "$..['uri','url']"
ignoreNotFound: true

resources:
enabled: true
value:
Expand Down
71 changes: 68 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@
"@map-colonies/error-express-handler": "^2.1.0",
"@map-colonies/express-access-log-middleware": "^2.0.1",
"@map-colonies/js-logger": "^1.0.1",
"@map-colonies/types": "^1.7.0",
"@map-colonies/mc-model-types": "^19.0.0",
"@map-colonies/mc-priority-queue": "^8.2.1",
"@map-colonies/openapi-express-viewer": "^3.0.0",
"@map-colonies/read-pkg": "0.0.1",
"@map-colonies/telemetry": "^6.1.0",
"@map-colonies/types": "^1.7.0",
"@opentelemetry/api": "1.7.0",
"@opentelemetry/api-metrics": "0.23.0",
"@opentelemetry/context-async-hooks": "^1.24.1",
Expand All @@ -65,6 +65,7 @@
"express": "^4.18.2",
"express-openapi-validator": "^5.0.4",
"http-status-codes": "^2.2.0",
"jsonpath": "^1.3.0",
"n-readlines": "^1.0.1",
"prom-client": "^15.1.1",
"reflect-metadata": "^0.1.13",
Expand All @@ -76,12 +77,13 @@
"@faker-js/faker": "^8.4.1",
"@map-colonies/eslint-config": "^4.0.0",
"@map-colonies/prettier-config": "0.0.1",
"@redocly/openapi-cli": "^1.0.0-beta.94",
"@redocly/cli": "^1.34.3",
"@redocly/openapi-cli": "^1.0.0-beta.94",
"@types/compression": "^1.7.2",
"@types/config": "^3.3.0",
"@types/express": "^4.17.17",
"@types/jest": "^29.5.2",
"@types/jsonpath": "^0.2.4",
"@types/multer": "^1.4.7",
"@types/n-readlines": "^1.0.3",
"@types/supertest": "^2.0.12",
Expand Down
11 changes: 9 additions & 2 deletions src/common/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export interface DeletePayload {

export interface Provider {
streamModelPathsToQueueFile: (modelId: string, pathToTileset: string, productName: string) => Promise<number>;
getFile: (filePath: string) => Promise<Buffer>;
}

export interface IngestionJobParameters {
Expand All @@ -56,7 +57,7 @@ export interface DeleteTaskParameters {
blockDuplication?: boolean;
}

export interface S3Config {
export interface S3Config extends BaseProviderConfig {
accessKeyId: string;
secretAccessKey: string;
endpointUrl: string;
Expand All @@ -66,10 +67,16 @@ export interface S3Config {
forcePathStyle: boolean;
}

export interface NFSConfig {
export interface NFSConfig extends BaseProviderConfig {
pvPath: string;
}

export interface BaseProviderConfig {
extension: string;
nestedJsonPath: string;
ignoreNotFound: boolean;
}

export type ProviderConfig = S3Config | NFSConfig;

export interface JobOperationResponse {
Expand Down
9 changes: 2 additions & 7 deletions src/containerConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import jsLogger, { LoggerOptions } from '@map-colonies/js-logger';
import client from 'prom-client';
import { JobManagerClient } from '@map-colonies/mc-priority-queue';
import { SERVICES, SERVICE_NAME } from './common/constants';
import { Provider, ProviderConfig } from './common/interfaces';
import { tracing } from './common/tracing';
import { jobOperationsRouterFactory, JOB_OPERATIONS_ROUTER_SYMBOL } from './jobOperations/routes/jobOperationsRouter';
import { InjectionObject, registerDependencies } from './common/dependencyRegistration';
Expand Down Expand Up @@ -62,18 +61,14 @@ export const registerExternalValues = (options?: RegisterOptions): DependencyCon
{
token: SERVICES.PROVIDER_CONFIG,
provider: {
useFactory: (): ProviderConfig => {
return getProviderConfig(provider);
},
useFactory: () => getProviderConfig(provider),
},
},
{ token: SERVICES.QUEUE_FILE_HANDLER, provider: { useClass: QueueFileHandler } },
{
token: SERVICES.PROVIDER,
provider: {
useFactory: (): Provider => {
return getProvider(provider);
},
useFactory: (container) => getProvider(provider, container),
Comment thread
asafMasa marked this conversation as resolved.
},
},
{
Expand Down
4 changes: 2 additions & 2 deletions src/handlers/queueFileHandler.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import fs from 'fs/promises';
import os from 'os';
import fs from 'node:fs/promises';
import os from 'node:os';
import LineByLine from 'n-readlines';
import { singleton } from 'tsyringe';

Expand Down
29 changes: 6 additions & 23 deletions src/jobOperations/models/jobOperationsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -268,27 +268,17 @@ export class JobOperationsManager {

@withSpanV4
private createTasks(batchSize: number, modelId: string): ICreateTaskBody<IngestionTaskParameters>[] {
const logContext = { ...this.logContext, function: this.createTasks.name };
const tasks: ICreateTaskBody<IngestionTaskParameters>[] = [];
let chunk: string[] = [];
let data: string | null = this.queueFileHandler.readline(modelId);

while (data !== null) {
if (this.isFileInBlackList(data)) {
this.logger.warn({
msg: 'The file is is the black list! Ignored...',
logContext,
file: data,
modelId,
});
} else {
chunk.push(data);

if (chunk.length === batchSize) {
const task = this.buildTaskFromChunk(chunk, modelId);
tasks.push(task);
chunk = [];
}
chunk.push(data);

if (chunk.length === batchSize) {
const task = this.buildTaskFromChunk(chunk, modelId);
tasks.push(task);
chunk = [];
}

data = this.queueFileHandler.readline(modelId);
Expand All @@ -314,11 +304,4 @@ export class JobOperationsManager {
const parameters: IngestionTaskParameters = { paths: chunk, modelId, lastIndexError: -1 };
return { type: INGESTION_TASK_TYPE, parameters };
}

private isFileInBlackList(data: string): boolean {
const blackList = this.config.get<string[]>('ingestion.blackList');
// eslint-disable-next-line @typescript-eslint/no-magic-numbers
const fileExtension = data.split('.').slice(-1)[0];
return blackList.includes(fileExtension);
}
}
Loading
Loading