Skip to content
Merged
Prev Previous commit
Next Next commit
[backend] Refactor to extract attachment props from engine.ts
  • Loading branch information
fellowseb committed Mar 5, 2026
commit e4faeb9d68751166abda311c06c6e672b4028482
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import type { Mutable } from '../types/type-utils';

// List of fields extracted by the attachment ingest processor.
// The full list is available in the Elasticsearch docs:
// (https://www.elastic.co/guide/en/elasticsearch/reference/8.19/attachment.html#attachment-fields).
export const ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_ELASTICSEARCH = [
'content',
'title',
'author',
'keywords',
'date',
'content_type',
'content_length',
'language',
'modified',
'format',
// identifier, NOT EXTRACTED
// contributor, NOT EXTRACTED
// coverage, NOT EXTRACTED
'modifier',
'creator_tool',
// publisher, NOT EXTRACTED
// relation, NOT EXTRACTED
// rights, NOT EXTRACTED
// source, NOT EXTRACTED
// type, NOT EXTRACTED
'description',
'print_date',
'metadata_date',
// latitude, NOT EXTRACTED
// longitude, NOT EXTRACTED
// altitude, NOT EXTRACTED
// rating, NOT EXTRACTED
'comments',
] as const;

// List of fields extracted by the attachment ingest processor, for OpenSearch.
// The full list is available in the OS docs:
// (https://docs.opensearch.org/latest/install-and-configure/additional-plugins/ingest-attachment-plugin/#extracted-information),
// and code shows the check rejects unknown fields with an exception:
// https://github.com/opensearch-project/OpenSearch/blob/315481148edaa43410e2e9f1801ec903fd62ec20/plugins/ingest-attachment/src/main/java/org/opensearch/ingest/attachment/AttachmentProcessor.java#L277
export const ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_OPENSEARCH = [
'content',
'title',
'author',
'keywords',
'date',
'content_type',
'content_length',
'language',
] as const;

// Union type of all properties extracted by the ES or OS attachment processor
export type AttachmentProcessorExtractedProp = Mutable<typeof ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_ELASTICSEARCH>[number]
| Mutable<typeof ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_OPENSEARCH>[number];
53 changes: 0 additions & 53 deletions opencti-platform/opencti-graphql/src/database/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,59 +299,6 @@ const oebp = (queryResult: any): any => {
return queryResult.body;
};

// List of fields extracted by the attachment ingest processor.
// The full list is available in the Elasticsearch docs:
// (https://www.elastic.co/guide/en/elasticsearch/reference/8.19/attachment.html#attachment-fields).
const ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_ELASTICSEARCH = [
'content',
'title',
'author',
'keywords',
'date',
'content_type',
'content_length',
'language',
'modified',
'format',
// identifier, NOT EXTRACTED
// contributor, NOT EXTRACTED
// coverage, NOT EXTRACTED
'modifier',
'creator_tool',
// publisher, NOT EXTRACTED
// relation, NOT EXTRACTED
// rights, NOT EXTRACTED
// source, NOT EXTRACTED
// type, NOT EXTRACTED
'description',
'print_date',
'metadata_date',
// latitude, NOT EXTRACTED
// longitude, NOT EXTRACTED
// altitude, NOT EXTRACTED
// rating, NOT EXTRACTED
'comments',
] as const;

// List of fields extracted by the attachment ingest processor, for OpenSearch.
// The full list is available in the OS docs:
// (https://docs.opensearch.org/latest/install-and-configure/additional-plugins/ingest-attachment-plugin/#extracted-information),
// and code shows the check rejects unknown fields with an exception:
// https://github.com/opensearch-project/OpenSearch/blob/315481148edaa43410e2e9f1801ec903fd62ec20/plugins/ingest-attachment/src/main/java/org/opensearch/ingest/attachment/AttachmentProcessor.java#L277
const ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_OPENSEARCH = [
'content',
'title',
'author',
'keywords',
'date',
'content_type',
'content_length',
'language',
] as const;

export type AttachmentProcessorExtractedProp = Mutable<typeof ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_ELASTICSEARCH>[number]
& Mutable<typeof ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_OPENSEARCH>[number];

export const elConfigureAttachmentProcessor = async (): Promise<boolean> => {
let success = true;
if (engine instanceof ElkClient) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { AttachmentProcessorExtractedProp } from '../../../database/engine';
import type { AttachmentProcessorExtractedProp } from '../../../database/attachment-processor-props';
import { ENTITY_TYPE_INTERNAL_FILE } from '../../../schema/internalObject';
import { schemaAttributesDefinition } from '../../../schema/schema-attributes';
import {
Expand Down Expand Up @@ -40,12 +40,19 @@ export const ATTACHMENT_MAPPINGS = [
name: AttachmentProcessorExtractedProp;
} & MappingDefinition<BasicStoreAttribute>)[];

type KeysInArray = typeof ATTACHMENT_MAPPINGS[number]['name'];
type AttributesDefinitionWithCheck = Exclude<AttachmentProcessorExtractedProp, KeysInArray> extends never
? Array<AttributeDefinition>
// Compile-time shenanigans to make sure we don't forget to update
// ATTACHMENT_MAPPINGS when/if we start extracting new fields
// via the ES/OS attachment ingest pipeline.
type AttachmentMappingsWithCheck = Exclude<
AttachmentProcessorExtractedProp,
typeof ATTACHMENT_MAPPINGS[number]['name']
> extends never
? MappingDefinition<BasicStoreAttribute>[]
: 'Make sure ATTACHMENT_MAPPINGS defines one mapping for each AttachmentProcessorExtractedProp';

const attributes: AttributesDefinitionWithCheck = [
const TYPE_CHECKED_ATTACHMENT_MAPPINGS: AttachmentMappingsWithCheck = ATTACHMENT_MAPPINGS;

const attributes: Array<AttributeDefinition> = [
id,
internalId,
standardId,
Expand Down Expand Up @@ -110,7 +117,7 @@ const attributes: AttributesDefinitionWithCheck = [
multiple: false,
upsert: false,
isFilterable: false,
mappings: ATTACHMENT_MAPPINGS,
mappings: TYPE_CHECKED_ATTACHMENT_MAPPINGS,
},
{ name: 'uploaded_at', label: 'Upload date', type: 'date', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: false },
{ name: 'file_id', label: 'File identifier', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: false },
Expand Down