From a473c0b5cb5b89e9ed581185e66762c1475e932f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Voz=C3=A1r?= Date: Tue, 15 Jul 2025 21:56:34 +0200 Subject: [PATCH 01/27] NAE-2136 - Pridana logika pre Reindex case-ov a taskov cez controller a scheduler --- pom.xml | 7 + .../engine/elastic/domain/CaseField.java | 31 ++++ .../engine/elastic/domain/ElasticCase.java | 9 +- .../LocalDateTimeJsonDeserializer.java | 25 +++ .../LocalDateTimeJsonSerializer.java | 18 +++ .../engine/elastic/service/BulkService.java | 149 ++++++++++++++++++ .../service/ElasticCaseMappingService.java | 7 + .../service/ElasticSearchJsonpMapper.java | 28 ++++ .../elastic/service/ElasticsearchConfig.java | 27 ++++ .../elastic/service/ReindexingTask.java | 84 +++++++--- .../service/interfaces/IBulkService.java | 12 ++ .../engine/elastic/web/ElasticController.java | 14 ++ .../domain/repositories/TaskRepository.java | 2 + src/main/resources/application.properties | 2 +- 14 files changed, 387 insertions(+), 28 deletions(-) create mode 100644 src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java create mode 100644 src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonDeserializer.java create mode 100644 src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonSerializer.java create mode 100644 src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java create mode 100644 src/main/java/com/netgrif/application/engine/elastic/service/ElasticSearchJsonpMapper.java create mode 100644 src/main/java/com/netgrif/application/engine/elastic/service/ElasticsearchConfig.java create mode 100644 src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java diff --git a/pom.xml b/pom.xml index 1ff7da9f49f..60d38994579 100644 --- a/pom.xml +++ b/pom.xml @@ -366,6 +366,13 @@ spring-boot-starter-data-elasticsearch + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + + + com.google.protobuf diff --git a/src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java b/src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java new file mode 100644 index 00000000000..2e35d67265b --- /dev/null +++ b/src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java @@ -0,0 +1,31 @@ +package com.netgrif.application.engine.elastic.domain; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import org.springframework.data.elasticsearch.annotations.Field; + +import java.util.List; + +import static org.springframework.data.elasticsearch.annotations.FieldType.*; + +@Data +@NoArgsConstructor +@EqualsAndHashCode(callSuper = true) +public class CaseField extends DataField { + + @Field(type = Text) + public List caseValue; + + public CaseField(List value) { + super(value.toString()); + this.caseValue = value; + } + + @AllArgsConstructor + private static class FileNameAndExtension { + public String name; + public String extension; + } +} diff --git a/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java b/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java index 1a13ad93484..89dda11f55f 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java +++ b/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java @@ -1,9 +1,5 @@ package com.netgrif.application.engine.elastic.domain; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; -import com.fasterxml.jackson.databind.annotation.JsonSerialize; -import com.fasterxml.jackson.datatype.jsr310.deser.LocalDateTimeDeserializer; -import com.fasterxml.jackson.datatype.jsr310.ser.LocalDateTimeSerializer; import com.netgrif.application.engine.workflow.domain.Case; import com.netgrif.application.engine.workflow.domain.TaskPair; import lombok.AllArgsConstructor; @@ -18,6 +14,7 @@ import java.sql.Timestamp; import java.time.LocalDateTime; +import java.time.temporal.ChronoUnit; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -56,8 +53,6 @@ public class ElasticCase { private String title; - @JsonSerialize(using = LocalDateTimeSerializer.class) - @JsonDeserialize(using = LocalDateTimeDeserializer.class) @Field(type = FieldType.Date, format = DateFormat.date_hour_minute_second_millis) private LocalDateTime creationDate; @@ -121,7 +116,7 @@ public ElasticCase(Case useCase) { processId = useCase.getPetriNetId(); visualId = useCase.getVisualId(); title = useCase.getTitle(); - creationDate = useCase.getCreationDate(); + creationDate = useCase.getCreationDate().truncatedTo(ChronoUnit.MILLIS); creationDateSortable = Timestamp.valueOf(useCase.getCreationDate()).getTime(); author = useCase.getAuthor().getId(); authorName = useCase.getAuthor().getFullName(); diff --git a/src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonDeserializer.java b/src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonDeserializer.java new file mode 100644 index 00000000000..c0ae2d1114e --- /dev/null +++ b/src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonDeserializer.java @@ -0,0 +1,25 @@ +package com.netgrif.application.engine.elastic.serializer; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; + +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; + +public class LocalDateTimeJsonDeserializer extends JsonDeserializer { + private static final DateTimeFormatter FORMATTER = new DateTimeFormatterBuilder() + .appendPattern("yyyy-MM-dd'T'HH:mm:ss") + .optionalStart() + .appendFraction(ChronoField.MILLI_OF_SECOND, 1, 3, true) + .optionalEnd() + .toFormatter(); + + @Override + public LocalDateTime deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { + return LocalDateTime.parse(p.getValueAsString(), FORMATTER); + } +} \ No newline at end of file diff --git a/src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonSerializer.java b/src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonSerializer.java new file mode 100644 index 00000000000..a500adb5268 --- /dev/null +++ b/src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonSerializer.java @@ -0,0 +1,18 @@ +package com.netgrif.application.engine.elastic.serializer; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; + +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; + +public class LocalDateTimeJsonSerializer extends JsonSerializer { + private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS"); + + @Override + public void serialize(LocalDateTime value, JsonGenerator gen, SerializerProvider serializers) throws IOException { + gen.writeString(FORMATTER.format(value)); + } +} \ No newline at end of file diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java b/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java new file mode 100644 index 00000000000..af3ba848188 --- /dev/null +++ b/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java @@ -0,0 +1,149 @@ +package com.netgrif.application.engine.elastic.service; + +import co.elastic.clients.elasticsearch.ElasticsearchClient; +import co.elastic.clients.elasticsearch.core.BulkRequest; +import co.elastic.clients.elasticsearch.core.BulkResponse; +import com.netgrif.application.engine.elastic.domain.ElasticCase; +import com.netgrif.application.engine.elastic.domain.ElasticTask; +import com.netgrif.application.engine.elastic.service.interfaces.*; +import com.netgrif.application.engine.workflow.domain.Case; +import com.netgrif.application.engine.workflow.domain.Task; +import lombok.extern.slf4j.Slf4j; +import org.elasticsearch.ElasticsearchException; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.time.LocalDateTime; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Service responsible for bulk indexing of {@link Case} and {@link Task} entities into Elasticsearch. + * Uses transformation services to map domain objects to their corresponding Elastic representations. + * + * Indexing is performed using upsert operations. + */ +@Service +@Slf4j +public class BulkService implements IBulkService { + @Value("${spring.data.elasticsearch.index.task}") + private String taskIndex; + + @Value("${spring.data.elasticsearch.index.case}") + private String caseIndex; + + private final ElasticsearchClient esClient; + + private final IElasticCaseMappingService elasticCaseMappingService; + + private final IElasticTaskMappingService elasticTaskMappingService; + + + BulkService (@Qualifier("elasticsearchClient") ElasticsearchClient elasticsearchClient, + IElasticCaseMappingService elasticCaseMappingService, + IElasticTaskMappingService elasticTaskMappingService) { + this.esClient = elasticsearchClient; + this.elasticCaseMappingService = elasticCaseMappingService; + this.elasticTaskMappingService = elasticTaskMappingService; + } + + /** + * Performs bulk indexing of a list of {@link Case} objects into the Elasticsearch case index. + * Uses upsert semantics — if a document exists, it is updated; otherwise, it is created. + * + * @param cases the list of case entities to be indexed + */ + @Override + public void bulkIndexCases(List cases) { + BulkRequest.Builder builder = new BulkRequest.Builder(); + + for (Case c : cases) { + try { + if (c.getLastModified() == null) + c.setLastModified(LocalDateTime.now()); + + ElasticCase doc = elasticCaseMappingService.transform(c); + + builder.operations(op -> op + .update(u -> u + .index(caseIndex) + .id(doc.getStringId()) + .action(a -> a + .doc(doc) + .docAsUpsert(true) + ) + ) + ); + } catch (Exception e) { + log.error("Failed to prepare bulk operation for case [{}]: {}", c.getStringId(), e.getMessage()); + } + } + + executeAndValidate(builder.build()); + } + + /** + * Performs bulk indexing of a list of {@link Task} objects into the Elasticsearch task index. + * Uses upsert semantics — if a document exists, it is updated; otherwise, it is created. + * + * @param tasks the list of task entities to be indexed + */ + @Override + public void bulkIndexTasks(List tasks) { + if (tasks == null || tasks.isEmpty()) return; + + log.info("Indexing {} tasks", tasks.size()); + + BulkRequest.Builder requestBuilder = new BulkRequest.Builder(); + + for (Task task : tasks) { + try { + ElasticTask elasticTask = elasticTaskMappingService.transform(task); + + requestBuilder.operations(op -> op + .update(u -> u + .index(taskIndex) + .id(elasticTask.getStringId()) + .action(a -> a + .doc(elasticTask) + .docAsUpsert(true) + ) + ) + ); + } catch (Exception e) { + log.error("Failed to create upsert request for task [{}]: {}", task.getStringId(), e.getMessage()); + } + } + + executeAndValidate(requestBuilder.build()); + } + + private void executeAndValidate(BulkRequest request) { + try { + BulkResponse response = esClient.bulk(request); + checkForBulkUpdateFailure(response); + } catch (Exception e) { + log.error("Failed to index bulk " + e.getMessage(), e); + } + } + + private void checkForBulkUpdateFailure(BulkResponse response) { + Map failedDocuments = new HashMap<>(); + + response.items().forEach(item -> { + if (item.error() != null) { + failedDocuments.put(item.id(), item.error().reason()); + } + }); + + if (!failedDocuments.isEmpty()) { + throw new ElasticsearchException( + "Bulk indexing has failures. Use ElasticsearchException.getFailedDocuments() for details [" + + failedDocuments + "]", + failedDocuments + ); + } + } +} diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticCaseMappingService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticCaseMappingService.java index 18666dfb5f0..ee50866e3ae 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticCaseMappingService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticCaseMappingService.java @@ -3,6 +3,7 @@ import com.netgrif.application.engine.elastic.domain.BooleanField; import com.netgrif.application.engine.elastic.domain.ButtonField; +import com.netgrif.application.engine.elastic.domain.CaseField; import com.netgrif.application.engine.elastic.domain.DateField; import com.netgrif.application.engine.elastic.domain.FileField; import com.netgrif.application.engine.elastic.domain.I18nField; @@ -77,6 +78,8 @@ protected Optional transformDataField(String fieldId, Case useCase) { return this.transformFileListField(caseField); } else if (netField instanceof com.netgrif.application.engine.petrinet.domain.dataset.UserListField) { return this.transformUserListField(caseField); + } else if (netField instanceof com.netgrif.application.engine.petrinet.domain.dataset.CaseField) { + return this.transformCaseField(caseField); } else if (netField instanceof com.netgrif.application.engine.petrinet.domain.dataset.I18nField) { return this.transformI18nField(caseField, (com.netgrif.application.engine.petrinet.domain.dataset.I18nField) netField); } else { @@ -283,6 +286,10 @@ protected Optional transformFileListField(com.netgrif.application.eng return Optional.of(new FileField(((FileListFieldValue) fileListField.getValue()).getNamesPaths().toArray(new FileFieldValue[0]))); } + protected Optional transformCaseField(com.netgrif.application.engine.workflow.domain.DataField caseField) { + return Optional.of(new CaseField((List) caseField.getValue())); + } + protected Optional transformOtherFields(com.netgrif.application.engine.workflow.domain.DataField otherField, Field netField) { log.warn("Field of type " + netField.getClass().getCanonicalName() + " is not supported for indexation by default. Indexing the toString() representation of its value..."); return Optional.of(new TextField(otherField.getValue().toString())); diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticSearchJsonpMapper.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticSearchJsonpMapper.java new file mode 100644 index 00000000000..aa88227a758 --- /dev/null +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticSearchJsonpMapper.java @@ -0,0 +1,28 @@ +package com.netgrif.application.engine.elastic.service; + +import co.elastic.clients.json.jackson.JacksonJsonpMapper; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; +import com.netgrif.application.engine.elastic.serializer.LocalDateTimeJsonDeserializer; +import com.netgrif.application.engine.elastic.serializer.LocalDateTimeJsonSerializer; + +import java.time.LocalDateTime; + +public class ElasticSearchJsonpMapper extends JacksonJsonpMapper { + public ElasticSearchJsonpMapper() { + super(configureMapper()); + } + + private static ObjectMapper configureMapper() { + ObjectMapper mapper = new ObjectMapper(); + JavaTimeModule javaTimeModule = new JavaTimeModule(); + + mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); + javaTimeModule.addSerializer(LocalDateTime.class, new LocalDateTimeJsonSerializer()); + javaTimeModule.addDeserializer(LocalDateTime.class, new LocalDateTimeJsonDeserializer()); + mapper.registerModule(javaTimeModule); + + return mapper; + } +} diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticsearchConfig.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticsearchConfig.java new file mode 100644 index 00000000000..2287d0e031e --- /dev/null +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticsearchConfig.java @@ -0,0 +1,27 @@ +package com.netgrif.application.engine.elastic.service; + +import co.elastic.clients.elasticsearch.ElasticsearchClient; +import co.elastic.clients.transport.ElasticsearchTransport; +import co.elastic.clients.transport.rest_client.RestClientTransport; +import com.netgrif.application.engine.configuration.properties.ElasticsearchProperties; +import org.apache.http.HttpHost; +import org.elasticsearch.client.RestClient; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class ElasticsearchConfig { + private final ElasticsearchProperties elasticsearchProperties; + + ElasticsearchConfig(ElasticsearchProperties elasticsearchProperties) { + this.elasticsearchProperties = elasticsearchProperties; + } + + @Bean + public ElasticsearchClient elasticsearchClient() { + RestClient restClient = RestClient.builder(new HttpHost(elasticsearchProperties.getUrl(), elasticsearchProperties.getSearchPort())).build(); + ElasticsearchTransport transport = new RestClientTransport(restClient, new ElasticSearchJsonpMapper()); + return new ElasticsearchClient(transport); + + } +} diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java index ae21422527c..d509467a0ee 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java @@ -1,10 +1,8 @@ package com.netgrif.application.engine.elastic.service; import com.netgrif.application.engine.elastic.domain.ElasticCaseRepository; -import com.netgrif.application.engine.elastic.service.interfaces.IElasticCaseMappingService; -import com.netgrif.application.engine.elastic.service.interfaces.IElasticCaseService; -import com.netgrif.application.engine.elastic.service.interfaces.IElasticTaskMappingService; -import com.netgrif.application.engine.elastic.service.interfaces.IElasticTaskService; +import com.netgrif.application.engine.elastic.service.interfaces.*; +import com.netgrif.application.engine.petrinet.service.PetriNetService; import com.netgrif.application.engine.workflow.domain.Case; import com.netgrif.application.engine.workflow.domain.QCase; import com.netgrif.application.engine.workflow.domain.Task; @@ -13,6 +11,7 @@ import com.netgrif.application.engine.workflow.service.interfaces.IWorkflowService; import com.querydsl.core.types.Predicate; import com.querydsl.core.types.dsl.BooleanExpression; +import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -21,6 +20,9 @@ import org.springframework.boot.autoconfigure.condition.ConditionalOnExpression; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.data.mongodb.core.query.Criteria; +import org.springframework.data.mongodb.core.query.Query; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Component; @@ -28,6 +30,7 @@ import java.time.Duration; import java.time.LocalDateTime; import java.util.List; +import java.util.stream.Collectors; @Component @ConditionalOnExpression("'${spring.data.elasticsearch.reindex}'!= 'null'") @@ -35,17 +38,19 @@ public class ReindexingTask { private static final Logger log = LoggerFactory.getLogger(ReindexingTask.class); - private int pageSize; - private CaseRepository caseRepository; - private TaskRepository taskRepository; - private ElasticCaseRepository elasticCaseRepository; - private IElasticCaseService elasticCaseService; - private IElasticTaskService elasticTaskService; - private IElasticCaseMappingService caseMappingService; - private IElasticTaskMappingService taskMappingService; - private IWorkflowService workflowService; - + private final int pageSize; + private final CaseRepository caseRepository; + private final TaskRepository taskRepository; + private final ElasticCaseRepository elasticCaseRepository; + private final IElasticCaseService elasticCaseService; + private final IElasticTaskService elasticTaskService; + private final IElasticCaseMappingService caseMappingService; + private final IElasticTaskMappingService taskMappingService; + private final IWorkflowService workflowService; + private final MongoTemplate mongoTemplate; + private final PetriNetService petriNetService; private LocalDateTime lastRun; + private final IBulkService bulkService; @Autowired public ReindexingTask( @@ -59,6 +64,9 @@ public ReindexingTask( IElasticCaseMappingService caseMappingService, IElasticTaskMappingService taskMappingService, IWorkflowService workflowService, + IBulkService bulkService, + MongoTemplate mongoTemplate, + PetriNetService petriNetService, @Value("${spring.data.elasticsearch.reindexExecutor.size:20}") int pageSize, @Value("${spring.data.elasticsearch.reindex-from:#{null}}") Duration from) { this.caseRepository = caseRepository; @@ -69,7 +77,10 @@ public ReindexingTask( this.caseMappingService = caseMappingService; this.taskMappingService = taskMappingService; this.workflowService = workflowService; + this.mongoTemplate = mongoTemplate; + this.petriNetService = petriNetService; this.pageSize = pageSize; + this.bulkService = bulkService; lastRun = LocalDateTime.now(); if (from != null) { @@ -81,23 +92,56 @@ public ReindexingTask( public void reindex() { log.info("Reindexing stale cases: started reindexing after " + lastRun); - BooleanExpression predicate = QCase.case$.lastModified.before(LocalDateTime.now()).and(QCase.case$.lastModified.after(lastRun.minusMinutes(2))); - + LocalDateTime now = LocalDateTime.now(); + BooleanExpression predicate = QCase.case$.lastModified.before(now).and(QCase.case$.lastModified.after(lastRun.minusMinutes(2))); + LocalDateTime lastRunOld = lastRun; lastRun = LocalDateTime.now(); + long count = caseRepository.count(predicate); if (count > 0) { - reindexAllPages(predicate, count); + reindexAllPages(count, now, lastRunOld); } log.info("Reindexing stale cases: end"); } - private void reindexAllPages(BooleanExpression predicate, long count) { + private void reindexAllPages(long count, LocalDateTime now, LocalDateTime lastRunOld) { long numOfPages = ((count / pageSize) + 1); log.info("Reindexing " + numOfPages + " pages"); + ObjectId lastId = null; + + long page = 0; + while (true) { + page++; + log.info("Reindexing " + page + " / " + numOfPages); + Query query = new Query(); + + if (lastId != null) { + query.addCriteria(Criteria.where("_id").gt(lastId)); + } + query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); + query.limit(pageSize); + + List cases = mongoTemplate.find(query, Case.class); + List casesToIndex = cases.stream().filter(it -> elasticCaseRepository.countByStringIdAndLastModified(it.getStringId(), Timestamp.valueOf(it.getLastModified()).getTime()) == 0).collect(Collectors.toList()); + if (casesToIndex.isEmpty()) { + break; + } + + casesToIndex.forEach(c -> { + if (c.getPetriNet() == null) { + c.setPetriNet(petriNetService.get(c.getPetriNetObjectId())); + } + }); + + bulkService.bulkIndexCases(casesToIndex); + + List caseIds = casesToIndex.stream().map(Case::getStringId).collect(Collectors.toList()); + List tasksToReindex = taskRepository.findAllByCaseIdIn(caseIds); + + bulkService.bulkIndexTasks(tasksToReindex); - for (int page = 0; page < numOfPages; page++) { - reindexPage(predicate, page, numOfPages, false); + lastId = cases.get(cases.size() - 1).get_id(); } } diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java b/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java new file mode 100644 index 00000000000..95c53fa6daf --- /dev/null +++ b/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java @@ -0,0 +1,12 @@ +package com.netgrif.application.engine.elastic.service.interfaces; + +import com.netgrif.application.engine.workflow.domain.Case; +import com.netgrif.application.engine.workflow.domain.Task; + +import java.util.List; + +public interface IBulkService { + void bulkIndexCases(List cases); + + void bulkIndexTasks(List tasks); +} \ No newline at end of file diff --git a/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java b/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java index 9a738c8ab58..fc3e4f3b23c 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java +++ b/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java @@ -85,4 +85,18 @@ public MessageResource reindex(@RequestBody Map searchBody, Auth return MessageResource.errorMessage(e.getMessage()); } } + + @PreAuthorize("hasRole('ADMIN')") + @PostMapping(value = "/index/cursor", produces = MediaType.APPLICATION_JSON_UTF8_VALUE) + public MessageResource cursorAllReindex() { + try { + + reindexingTask.reindex(); + return MessageResource.successMessage("Success"); + + } catch (Exception e) { + log.error("Could not index: ", e); + return MessageResource.errorMessage(e.getMessage()); + } + } } diff --git a/src/main/java/com/netgrif/application/engine/workflow/domain/repositories/TaskRepository.java b/src/main/java/com/netgrif/application/engine/workflow/domain/repositories/TaskRepository.java index 841b720f503..9ee7b41ab56 100644 --- a/src/main/java/com/netgrif/application/engine/workflow/domain/repositories/TaskRepository.java +++ b/src/main/java/com/netgrif/application/engine/workflow/domain/repositories/TaskRepository.java @@ -16,6 +16,8 @@ public interface TaskRepository extends MongoRepository, QuerydslP List findAllByCaseId(String id); + List findAllByCaseIdIn(Collection ids); + Page findByCaseIdIn(Pageable pageable, Collection ids); Page findByTransitionIdIn(Pageable pageable, Collection ids); diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index ea5b8cf5e65..29a957a3fda 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -41,7 +41,7 @@ spring.data.elasticsearch.drop=false spring.data.elasticsearch.executors.size=500 spring.data.elasticsearch.executors.timeout=5 spring.data.elasticsearch.reindex=0 0 * * * * -spring.data.elasticsearch.reindexExecutor.size=20 +spring.data.elasticsearch.reindexExecutor.size=150 spring.data.elasticsearch.reindexExecutor.timeout=60 # Mail Service From 2480a25eaeb6011f81b4bb142a4fa2315e5aae0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Voz=C3=A1r?= Date: Mon, 21 Jul 2025 09:47:34 +0200 Subject: [PATCH 02/27] NAE-2136 - cursor next approach --- .../elastic/service/ReindexingTask.java | 128 +++++++++++++++++- 1 file changed, 125 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java index d509467a0ee..422d500d849 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java @@ -21,14 +21,15 @@ import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; import org.springframework.data.mongodb.core.MongoTemplate; -import org.springframework.data.mongodb.core.query.Criteria; import org.springframework.data.mongodb.core.query.Query; +import org.springframework.data.util.CloseableIterator; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Component; import java.sql.Timestamp; import java.time.Duration; import java.time.LocalDateTime; +import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; @@ -93,7 +94,8 @@ public void reindex() { log.info("Reindexing stale cases: started reindexing after " + lastRun); LocalDateTime now = LocalDateTime.now(); - BooleanExpression predicate = QCase.case$.lastModified.before(now).and(QCase.case$.lastModified.after(lastRun.minusMinutes(2))); + //BooleanExpression predicate = QCase.case$.lastModified.before(now).and(QCase.case$.lastModified.after(lastRun.minusMinutes(2))); + BooleanExpression predicate = QCase.case$.lastModified.isNotNull(); LocalDateTime lastRunOld = lastRun; lastRun = LocalDateTime.now(); @@ -111,7 +113,106 @@ private void reindexAllPages(long count, LocalDateTime now, LocalDateTime lastRu ObjectId lastId = null; long page = 0; - while (true) { + long pageWhile = 0; + + /*MongoCursor cursor = mongoTemplate + .getCollection("case") + .find() + .iterator(); + + try { + while (cursor.hasNext()) { + page++; + log.info("Reindexing " + page + " / " + numOfPages); + *//*Query query = new Query(); + + if (lastId != null) { + query.addCriteria(Criteria.where("_id").gt(lastId)); + } + query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); + query.limit(pageSize);*//* + + MongoDatabase + + new ArrayList<>(cursor.next().values()); + List cases = mongoTemplate.find(query, Case.class); + List casesToIndex = cases.stream().filter(it -> elasticCaseRepository.countByStringIdAndLastModified(it.getStringId(), Timestamp.valueOf(it.getLastModified()).getTime()) == 0).collect(Collectors.toList()); + if (casesToIndex.isEmpty()) { + break; + } + + casesToIndex.forEach(c -> { + if (c.getPetriNet() == null) { + c.setPetriNet(petriNetService.get(c.getPetriNetObjectId())); + } + }); + + bulkService.bulkIndexCases(casesToIndex); + + List caseIds = casesToIndex.stream().map(Case::getStringId).collect(Collectors.toList()); + List tasksToReindex = taskRepository.findAllByCaseIdIn(caseIds); + + bulkService.bulkIndexTasks(tasksToReindex); + + lastId = cases.get(cases.size() - 1).get_id(); + } + } finally { + cursor.close(); + }*/ + + Query query = new Query(); + + query.cursorBatchSize(pageSize); + + //query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); + + List batch = new ArrayList<>(pageSize); + + try (CloseableIterator cursor = mongoTemplate.stream(query, Case.class)) { + while (cursor.hasNext()) { + /*pageWhile++; + log.info("Reindexing -> " + pageWhile);*/ + batch.add(cursor.next()); + + if (batch.size() == pageSize) { + + page++; + log.info("Reindexing " + page + " / " + numOfPages); + + + reindexCasesBatch(batch); + + + + + + batch.clear(); + } + } + + // posledný batch + if (!batch.isEmpty()) { + reindexCasesBatch(batch); + } + } + + + /*try (CloseableIterator cursor = mongoTemplate.stream(query, Case.class)) { + while (cursor.hasNext()) { + batch.add(cursor.next()); + + if (batch.size() == pageSize) { + batch.clear(); + } + } + if (!batch.isEmpty()) { + + } + }*/ + + + + /* while (true) { page++; log.info("Reindexing " + page + " / " + numOfPages); Query query = new Query(); @@ -142,7 +243,28 @@ private void reindexAllPages(long count, LocalDateTime now, LocalDateTime lastRu bulkService.bulkIndexTasks(tasksToReindex); lastId = cases.get(cases.size() - 1).get_id(); + }*/ + } + + private void reindexCasesBatch(List casesBatch) { + List casesToIndex = casesBatch.stream().filter(it -> elasticCaseRepository.countByStringIdAndLastModified(it.getStringId(), Timestamp.valueOf(it.getLastModified()).getTime()) == 0).collect(Collectors.toList()); + if (casesToIndex.isEmpty()) { + log.info("No cases to reindex"); + return; } + + casesToIndex.forEach(c -> { + if (c.getPetriNet() == null) { + c.setPetriNet(petriNetService.get(c.getPetriNetObjectId())); + } + }); + + bulkService.bulkIndexCases(casesToIndex); + + List caseIds = casesToIndex.stream().map(Case::getStringId).collect(Collectors.toList()); + List tasksToReindex = taskRepository.findAllByCaseIdIn(caseIds); + + bulkService.bulkIndexTasks(tasksToReindex); } public void forceReindexPage(Predicate predicate, int page, long numOfPages) { From 990fe7b573b36bc04ca60be25a55509896fa3a04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Voz=C3=A1r?= Date: Mon, 21 Jul 2025 13:49:14 +0200 Subject: [PATCH 03/27] NAE-2136 - cursor improved indexing and improved loop --- .../engine/elastic/service/BulkService.java | 95 +++++++---- .../elastic/service/ReindexingTask.java | 151 +----------------- 2 files changed, 70 insertions(+), 176 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java b/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java index af3ba848188..5f01ef53f90 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java @@ -8,6 +8,7 @@ import com.netgrif.application.engine.elastic.service.interfaces.*; import com.netgrif.application.engine.workflow.domain.Case; import com.netgrif.application.engine.workflow.domain.Task; +import com.netgrif.application.engine.workflow.domain.repositories.TaskRepository; import lombok.extern.slf4j.Slf4j; import org.elasticsearch.ElasticsearchException; import org.springframework.beans.factory.annotation.Qualifier; @@ -15,9 +16,7 @@ import org.springframework.stereotype.Service; import java.time.LocalDateTime; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** * Service responsible for bulk indexing of {@link Case} and {@link Task} entities into Elasticsearch. @@ -34,54 +33,69 @@ public class BulkService implements IBulkService { @Value("${spring.data.elasticsearch.index.case}") private String caseIndex; + @Value("${spring.data.elasticsearch.reindexExecutor.size:20}") + private int batchSize; + private final ElasticsearchClient esClient; private final IElasticCaseMappingService elasticCaseMappingService; private final IElasticTaskMappingService elasticTaskMappingService; + private final TaskRepository taskRepository; + + private final List bulkCases = new ArrayList<>(); + private final List bulkCaseIds = new ArrayList<>(); + + private BulkRequest.Builder builder = new BulkRequest.Builder(); + BulkService (@Qualifier("elasticsearchClient") ElasticsearchClient elasticsearchClient, IElasticCaseMappingService elasticCaseMappingService, - IElasticTaskMappingService elasticTaskMappingService) { + IElasticTaskMappingService elasticTaskMappingService, + TaskRepository taskRepository) { this.esClient = elasticsearchClient; this.elasticCaseMappingService = elasticCaseMappingService; this.elasticTaskMappingService = elasticTaskMappingService; + this.taskRepository = taskRepository; } /** - * Performs bulk indexing of a list of {@link Case} objects into the Elasticsearch case index. - * Uses upsert semantics — if a document exists, it is updated; otherwise, it is created. + * Creates elastic upsert operation for given case — if a document exists, it is updated; otherwise, it is created. + * calls indexCases if size of case list in cache equals batch size * - * @param cases the list of case entities to be indexed + * @param aCase the case entities to be indexed */ @Override - public void bulkIndexCases(List cases) { - BulkRequest.Builder builder = new BulkRequest.Builder(); - - for (Case c : cases) { - try { - if (c.getLastModified() == null) - c.setLastModified(LocalDateTime.now()); + public void bulkIndexCase(Case aCase) { + if (aCase == null) return; - ElasticCase doc = elasticCaseMappingService.transform(c); + bulkCases.add(aCase); + bulkCaseIds.add(aCase.getStringId()); - builder.operations(op -> op - .update(u -> u - .index(caseIndex) - .id(doc.getStringId()) - .action(a -> a - .doc(doc) - .docAsUpsert(true) - ) - ) - ); - } catch (Exception e) { - log.error("Failed to prepare bulk operation for case [{}]: {}", c.getStringId(), e.getMessage()); - } + try { + if (aCase.getLastModified() == null) + aCase.setLastModified(LocalDateTime.now()); + + ElasticCase doc = elasticCaseMappingService.transform(aCase); + + builder.operations(op -> op + .update(u -> u + .index(caseIndex) + .id(doc.getStringId()) + .action(a -> a + .doc(doc) + .docAsUpsert(true) + ) + ) + ); + } catch (Exception e) { + log.error("Failed to prepare bulk operation for case [{}]: {}", aCase.getStringId(), e.getMessage()); } - executeAndValidate(builder.build()); + if (bulkCases.size() == batchSize) { + indexCases(); + } } /** @@ -94,8 +108,6 @@ public void bulkIndexCases(List cases) { public void bulkIndexTasks(List tasks) { if (tasks == null || tasks.isEmpty()) return; - log.info("Indexing {} tasks", tasks.size()); - BulkRequest.Builder requestBuilder = new BulkRequest.Builder(); for (Task task : tasks) { @@ -120,12 +132,31 @@ public void bulkIndexTasks(List tasks) { executeAndValidate(requestBuilder.build()); } + /** + * Performs bulk indexing of a list of {@link Case} objects from cache into the Elasticsearch case index. + * Clears cache lists and recreates {@link BulkRequest.Builder} + */ + @Override + public void indexCases() { + if (bulkCases.isEmpty()) { + return; + } + + executeAndValidate(builder.build()); + List tasksToReindex = taskRepository.findAllByCaseIdIn(bulkCaseIds); + bulkIndexTasks(tasksToReindex); + + bulkCases.clear(); + bulkCaseIds.clear(); + this.builder = new BulkRequest.Builder(); + } + private void executeAndValidate(BulkRequest request) { try { BulkResponse response = esClient.bulk(request); checkForBulkUpdateFailure(response); } catch (Exception e) { - log.error("Failed to index bulk " + e.getMessage(), e); + log.error("Failed to index bulk {}", e.getMessage(), e); } } diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java index 422d500d849..cb84b50dd64 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java @@ -11,7 +11,6 @@ import com.netgrif.application.engine.workflow.service.interfaces.IWorkflowService; import com.querydsl.core.types.Predicate; import com.querydsl.core.types.dsl.BooleanExpression; -import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -29,9 +28,7 @@ import java.sql.Timestamp; import java.time.Duration; import java.time.LocalDateTime; -import java.util.ArrayList; import java.util.List; -import java.util.stream.Collectors; @Component @ConditionalOnExpression("'${spring.data.elasticsearch.reindex}'!= 'null'") @@ -110,161 +107,27 @@ public void reindex() { private void reindexAllPages(long count, LocalDateTime now, LocalDateTime lastRunOld) { long numOfPages = ((count / pageSize) + 1); log.info("Reindexing " + numOfPages + " pages"); - ObjectId lastId = null; - - long page = 0; - long pageWhile = 0; - - /*MongoCursor cursor = mongoTemplate - .getCollection("case") - .find() - .iterator(); - - try { - while (cursor.hasNext()) { - page++; - log.info("Reindexing " + page + " / " + numOfPages); - *//*Query query = new Query(); - - if (lastId != null) { - query.addCriteria(Criteria.where("_id").gt(lastId)); - } - query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); - query.limit(pageSize);*//* - - MongoDatabase - - new ArrayList<>(cursor.next().values()); - List cases = mongoTemplate.find(query, Case.class); - List casesToIndex = cases.stream().filter(it -> elasticCaseRepository.countByStringIdAndLastModified(it.getStringId(), Timestamp.valueOf(it.getLastModified()).getTime()) == 0).collect(Collectors.toList()); - if (casesToIndex.isEmpty()) { - break; - } - - casesToIndex.forEach(c -> { - if (c.getPetriNet() == null) { - c.setPetriNet(petriNetService.get(c.getPetriNetObjectId())); - } - }); - - bulkService.bulkIndexCases(casesToIndex); - - List caseIds = casesToIndex.stream().map(Case::getStringId).collect(Collectors.toList()); - List tasksToReindex = taskRepository.findAllByCaseIdIn(caseIds); - - bulkService.bulkIndexTasks(tasksToReindex); - - lastId = cases.get(cases.size() - 1).get_id(); - } - } finally { - cursor.close(); - }*/ - Query query = new Query(); query.cursorBatchSize(pageSize); //query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); - List batch = new ArrayList<>(pageSize); - try (CloseableIterator cursor = mongoTemplate.stream(query, Case.class)) { - while (cursor.hasNext()) { - /*pageWhile++; - log.info("Reindexing -> " + pageWhile);*/ - batch.add(cursor.next()); - - if (batch.size() == pageSize) { - - page++; - log.info("Reindexing " + page + " / " + numOfPages); - - - reindexCasesBatch(batch); - - - - - - batch.clear(); + cursor.stream().forEach(aCase -> { + if (elasticCaseRepository.countByStringIdAndLastModified(aCase.getStringId(), Timestamp.valueOf(aCase.getLastModified()).getTime()) == 0) { + return; } - } - // posledný batch - if (!batch.isEmpty()) { - reindexCasesBatch(batch); - } - } - - - /*try (CloseableIterator cursor = mongoTemplate.stream(query, Case.class)) { - while (cursor.hasNext()) { - batch.add(cursor.next()); - - if (batch.size() == pageSize) { - batch.clear(); + if (aCase.getPetriNet() == null) { + aCase.setPetriNet(petriNetService.get(aCase.getPetriNetObjectId())); } - } - if (!batch.isEmpty()) { - - } - }*/ - - - - /* while (true) { - page++; - log.info("Reindexing " + page + " / " + numOfPages); - Query query = new Query(); - - if (lastId != null) { - query.addCriteria(Criteria.where("_id").gt(lastId)); - } - query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); - query.limit(pageSize); - List cases = mongoTemplate.find(query, Case.class); - List casesToIndex = cases.stream().filter(it -> elasticCaseRepository.countByStringIdAndLastModified(it.getStringId(), Timestamp.valueOf(it.getLastModified()).getTime()) == 0).collect(Collectors.toList()); - if (casesToIndex.isEmpty()) { - break; - } - - casesToIndex.forEach(c -> { - if (c.getPetriNet() == null) { - c.setPetriNet(petriNetService.get(c.getPetriNetObjectId())); - } + bulkService.bulkIndexCase(aCase); }); - - bulkService.bulkIndexCases(casesToIndex); - - List caseIds = casesToIndex.stream().map(Case::getStringId).collect(Collectors.toList()); - List tasksToReindex = taskRepository.findAllByCaseIdIn(caseIds); - - bulkService.bulkIndexTasks(tasksToReindex); - - lastId = cases.get(cases.size() - 1).get_id(); - }*/ - } - - private void reindexCasesBatch(List casesBatch) { - List casesToIndex = casesBatch.stream().filter(it -> elasticCaseRepository.countByStringIdAndLastModified(it.getStringId(), Timestamp.valueOf(it.getLastModified()).getTime()) == 0).collect(Collectors.toList()); - if (casesToIndex.isEmpty()) { - log.info("No cases to reindex"); - return; } - casesToIndex.forEach(c -> { - if (c.getPetriNet() == null) { - c.setPetriNet(petriNetService.get(c.getPetriNetObjectId())); - } - }); - - bulkService.bulkIndexCases(casesToIndex); - - List caseIds = casesToIndex.stream().map(Case::getStringId).collect(Collectors.toList()); - List tasksToReindex = taskRepository.findAllByCaseIdIn(caseIds); - - bulkService.bulkIndexTasks(tasksToReindex); + bulkService.indexCases(); } public void forceReindexPage(Predicate predicate, int page, long numOfPages) { From dfc51175ceb581cd0defa1bd1c5b36c0e0275f0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Voz=C3=A1r?= Date: Tue, 22 Jul 2025 12:26:01 +0200 Subject: [PATCH 04/27] NAE-2136 - fix pr logging, indexing algorithm, fix configuration, null checks --- pom.xml | 6 ++ .../ElasticsearchConfiguration.java | 13 +++ .../engine/elastic/domain/CaseField.java | 2 +- .../LocalDateTimeJsonDeserializer.java | 6 +- .../engine/elastic/service/BulkService.java | 81 +++++++++++++------ .../elastic/service/ElasticsearchConfig.java | 27 ------- .../elastic/service/ReindexingTask.java | 19 +++-- .../service/interfaces/IBulkService.java | 5 +- .../engine/elastic/web/ElasticController.java | 11 +-- 9 files changed, 101 insertions(+), 69 deletions(-) delete mode 100644 src/main/java/com/netgrif/application/engine/elastic/service/ElasticsearchConfig.java diff --git a/pom.xml b/pom.xml index 60d38994579..bdc8ce62468 100644 --- a/pom.xml +++ b/pom.xml @@ -372,6 +372,12 @@ jackson-datatype-jsr310 + + org.glassfish + jakarta.json + 2.0.1 + + diff --git a/src/main/java/com/netgrif/application/engine/configuration/ElasticsearchConfiguration.java b/src/main/java/com/netgrif/application/engine/configuration/ElasticsearchConfiguration.java index dbd923e5a51..9444649e314 100644 --- a/src/main/java/com/netgrif/application/engine/configuration/ElasticsearchConfiguration.java +++ b/src/main/java/com/netgrif/application/engine/configuration/ElasticsearchConfiguration.java @@ -1,6 +1,11 @@ package com.netgrif.application.engine.configuration; +import co.elastic.clients.elasticsearch.ElasticsearchClient; +import co.elastic.clients.transport.ElasticsearchTransport; +import co.elastic.clients.transport.rest_client.RestClientTransport; +import com.netgrif.application.engine.configuration.properties.ElasticsearchProperties; import com.netgrif.application.engine.configuration.properties.UriProperties; +import com.netgrif.application.engine.elastic.service.ElasticSearchJsonpMapper; import com.netgrif.application.engine.workflow.service.CaseEventHandler; import org.apache.http.HttpHost; import org.elasticsearch.client.RestClient; @@ -79,4 +84,12 @@ public ElasticsearchOperations elasticsearchTemplate() { public CaseEventHandler caseEventHandler() { return new CaseEventHandler(); } + + @Bean + public ElasticsearchClient elasticsearchClient() { + RestClient restClient = RestClient.builder(new HttpHost(url, port)).build(); + ElasticsearchTransport transport = new RestClientTransport(restClient, new ElasticSearchJsonpMapper()); + return new ElasticsearchClient(transport); + + } } diff --git a/src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java b/src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java index 2e35d67265b..de138061175 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java +++ b/src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java @@ -16,7 +16,7 @@ public class CaseField extends DataField { @Field(type = Text) - public List caseValue; + private List caseValue; public CaseField(List value) { super(value.toString()); diff --git a/src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonDeserializer.java b/src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonDeserializer.java index c0ae2d1114e..1d7b2e8c333 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonDeserializer.java +++ b/src/main/java/com/netgrif/application/engine/elastic/serializer/LocalDateTimeJsonDeserializer.java @@ -20,6 +20,10 @@ public class LocalDateTimeJsonDeserializer extends JsonDeserializer bulkCases = new ArrayList<>(); private final List bulkCaseIds = new ArrayList<>(); + private List bulkTasks = new ArrayList<>(); private BulkRequest.Builder builder = new BulkRequest.Builder(); @@ -93,11 +97,21 @@ public void bulkIndexCase(Case aCase) { log.error("Failed to prepare bulk operation for case [{}]: {}", aCase.getStringId(), e.getMessage()); } - if (bulkCases.size() == batchSize) { + if (bulkCases.size() == caseBatchSize) { indexCases(); } } + /** + * Calls bulkIndexTasks with empty list. + * + */ + @Override + public void bulkIndexTasks() { + bulkIndexTasks(List.of()); + bulkTasks.clear(); + } + /** * Performs bulk indexing of a list of {@link Task} objects into the Elasticsearch task index. * Uses upsert semantics — if a document exists, it is updated; otherwise, it is created. @@ -108,28 +122,20 @@ public void bulkIndexCase(Case aCase) { public void bulkIndexTasks(List tasks) { if (tasks == null || tasks.isEmpty()) return; - BulkRequest.Builder requestBuilder = new BulkRequest.Builder(); + tasks.addAll(0, bulkTasks); + int totalSize = tasks.size(); - for (Task task : tasks) { - try { - ElasticTask elasticTask = elasticTaskMappingService.transform(task); + for (int i = 0; i < totalSize; i += taskBatchSize) { + int end = Math.min(i + taskBatchSize, totalSize); + List batch = tasks.subList(i, end); - requestBuilder.operations(op -> op - .update(u -> u - .index(taskIndex) - .id(elasticTask.getStringId()) - .action(a -> a - .doc(elasticTask) - .docAsUpsert(true) - ) - ) - ); - } catch (Exception e) { - log.error("Failed to create upsert request for task [{}]: {}", task.getStringId(), e.getMessage()); + if (batch.size() < taskBatchSize && !tasks.isEmpty()) { + bulkTasks = batch; + break; } - } - executeAndValidate(requestBuilder.build()); + indexTaskBatch(batch); + } } /** @@ -154,6 +160,7 @@ public void indexCases() { private void executeAndValidate(BulkRequest request) { try { BulkResponse response = esClient.bulk(request); + checkForBulkUpdateFailure(response); } catch (Exception e) { log.error("Failed to index bulk {}", e.getMessage(), e); @@ -163,6 +170,7 @@ private void executeAndValidate(BulkRequest request) { private void checkForBulkUpdateFailure(BulkResponse response) { Map failedDocuments = new HashMap<>(); + response.items().forEach(item -> { if (item.error() != null) { failedDocuments.put(item.id(), item.error().reason()); @@ -170,11 +178,32 @@ private void checkForBulkUpdateFailure(BulkResponse response) { }); if (!failedDocuments.isEmpty()) { - throw new ElasticsearchException( - "Bulk indexing has failures. Use ElasticsearchException.getFailedDocuments() for details [" + - failedDocuments + "]", - failedDocuments - ); + throw new ElasticsearchException("Bulk indexing has failures. Use ElasticsearchException.getFailedDocuments() for details [{}]", failedDocuments); + } + } + + private void indexTaskBatch(List tasks) { + BulkRequest.Builder requestBuilder = new BulkRequest.Builder(); + + for (Task task : tasks) { + try { + ElasticTask elasticTask = elasticTaskMappingService.transform(task); + + requestBuilder.operations(op -> op + .update(u -> u + .index(taskIndex) + .id(elasticTask.getStringId()) + .action(a -> a + .doc(elasticTask) + .docAsUpsert(true) + ) + ) + ); + } catch (Exception e) { + log.error("Failed to create upsert request for task [{}]: {}", task.getStringId(), e.getMessage()); + } } + + executeAndValidate(requestBuilder.build()); } } diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticsearchConfig.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticsearchConfig.java deleted file mode 100644 index 2287d0e031e..00000000000 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticsearchConfig.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.netgrif.application.engine.elastic.service; - -import co.elastic.clients.elasticsearch.ElasticsearchClient; -import co.elastic.clients.transport.ElasticsearchTransport; -import co.elastic.clients.transport.rest_client.RestClientTransport; -import com.netgrif.application.engine.configuration.properties.ElasticsearchProperties; -import org.apache.http.HttpHost; -import org.elasticsearch.client.RestClient; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; - -@Configuration -public class ElasticsearchConfig { - private final ElasticsearchProperties elasticsearchProperties; - - ElasticsearchConfig(ElasticsearchProperties elasticsearchProperties) { - this.elasticsearchProperties = elasticsearchProperties; - } - - @Bean - public ElasticsearchClient elasticsearchClient() { - RestClient restClient = RestClient.builder(new HttpHost(elasticsearchProperties.getUrl(), elasticsearchProperties.getSearchPort())).build(); - ElasticsearchTransport transport = new RestClientTransport(restClient, new ElasticSearchJsonpMapper()); - return new ElasticsearchClient(transport); - - } -} diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java index cb84b50dd64..e5bd482fbe0 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java @@ -50,6 +50,8 @@ public class ReindexingTask { private LocalDateTime lastRun; private final IBulkService bulkService; + private final IElasticCaseMappingService elasticCaseMappingService; + @Autowired public ReindexingTask( CaseRepository caseRepository, @@ -65,8 +67,9 @@ public ReindexingTask( IBulkService bulkService, MongoTemplate mongoTemplate, PetriNetService petriNetService, - @Value("${spring.data.elasticsearch.reindexExecutor.size:20}") int pageSize, - @Value("${spring.data.elasticsearch.reindex-from:#{null}}") Duration from) { + @Value("${spring.data.elasticsearch.reindexExecutor.caseSize:20}") int pageSize, + @Value("${spring.data.elasticsearch.reindex-from:#{null}}") Duration from, + IElasticCaseMappingService elasticCaseMappingService) { this.caseRepository = caseRepository; this.taskRepository = taskRepository; this.elasticCaseRepository = elasticCaseRepository; @@ -79,6 +82,7 @@ public ReindexingTask( this.petriNetService = petriNetService; this.pageSize = pageSize; this.bulkService = bulkService; + this.elasticCaseMappingService = elasticCaseMappingService; lastRun = LocalDateTime.now(); if (from != null) { @@ -88,7 +92,7 @@ public ReindexingTask( @Scheduled(cron = "#{springElasticsearchReindex}") public void reindex() { - log.info("Reindexing stale cases: started reindexing after " + lastRun); + log.info("Reindexing stale cases: started reindexing after {}", lastRun); LocalDateTime now = LocalDateTime.now(); //BooleanExpression predicate = QCase.case$.lastModified.before(now).and(QCase.case$.lastModified.after(lastRun.minusMinutes(2))); @@ -106,7 +110,7 @@ public void reindex() { private void reindexAllPages(long count, LocalDateTime now, LocalDateTime lastRunOld) { long numOfPages = ((count / pageSize) + 1); - log.info("Reindexing " + numOfPages + " pages"); + log.info("Reindexing {} pages", numOfPages); Query query = new Query(); query.cursorBatchSize(pageSize); @@ -115,9 +119,9 @@ private void reindexAllPages(long count, LocalDateTime now, LocalDateTime lastRu try (CloseableIterator cursor = mongoTemplate.stream(query, Case.class)) { cursor.stream().forEach(aCase -> { - if (elasticCaseRepository.countByStringIdAndLastModified(aCase.getStringId(), Timestamp.valueOf(aCase.getLastModified()).getTime()) == 0) { + /*if (elasticCaseRepository.countByStringIdAndLastModified(aCase.getStringId(), Timestamp.valueOf(aCase.getLastModified()).getTime()) == 0) { return; - } + }*/ if (aCase.getPetriNet() == null) { aCase.setPetriNet(petriNetService.get(aCase.getPetriNetObjectId())); @@ -128,6 +132,7 @@ private void reindexAllPages(long count, LocalDateTime now, LocalDateTime lastRu } bulkService.indexCases(); + bulkService.bulkIndexTasks(); } public void forceReindexPage(Predicate predicate, int page, long numOfPages) { @@ -135,7 +140,7 @@ public void forceReindexPage(Predicate predicate, int page, long numOfPages) { } private void reindexPage(Predicate predicate, int page, long numOfPages, boolean forced) { - log.info("Reindexing " + (page + 1) + " / " + numOfPages); + log.info("Reindexing {} / {}", (page + 1), numOfPages); Page cases = this.workflowService.search(predicate, PageRequest.of(page, pageSize)); for (Case aCase : cases) { diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java b/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java index 95c53fa6daf..8aae214e5f1 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java @@ -6,7 +6,8 @@ import java.util.List; public interface IBulkService { - void bulkIndexCases(List cases); - + void bulkIndexCase(Case cases); + void bulkIndexTasks(); void bulkIndexTasks(List tasks); + void indexCases(); } \ No newline at end of file diff --git a/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java b/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java index fc3e4f3b23c..436469bb0e0 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java +++ b/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java @@ -69,11 +69,11 @@ public MessageResource reindex(@RequestBody Map searchBody, Auth if (count == 0) { log.info("No cases to reindex"); } else { - long numOfPages = (long) ((count / pageSize) + 1); - log.info("Reindexing cases: " + numOfPages + " pages"); + long numOfPages = (count / pageSize) + 1; + log.info("Reindexing cases: {} pages", numOfPages); for (int page = 0; page < numOfPages; page++) { - log.info("Indexing page " + (page + 1)); + log.info("Indexing page {}", (page + 1)); Predicate predicate = searchService.buildQuery(searchBody, user, locale); reindexingTask.forceReindexPage(predicate, page, numOfPages); } @@ -86,8 +86,9 @@ public MessageResource reindex(@RequestBody Map searchBody, Auth } } - @PreAuthorize("hasRole('ADMIN')") - @PostMapping(value = "/index/cursor", produces = MediaType.APPLICATION_JSON_UTF8_VALUE) + //@PreAuthorize("hasRole('ADMIN')") + @PreAuthorize("@authorizationService.hasAuthority('ADMIN')") + @PostMapping(value = "/index/cursor", produces = MediaType.APPLICATION_JSON_VALUE) public MessageResource cursorAllReindex() { try { From 9eef8e0d1eb65c433abb1e06cc574628de389937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Voz=C3=A1r?= Date: Tue, 22 Jul 2025 12:28:14 +0200 Subject: [PATCH 05/27] NAE-2136 - restrict indexing only for past till now - 2 minutes --- .../engine/elastic/service/ReindexingTask.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java index e5bd482fbe0..ea9ac47f201 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java @@ -20,6 +20,7 @@ import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.data.mongodb.core.query.Criteria; import org.springframework.data.mongodb.core.query.Query; import org.springframework.data.util.CloseableIterator; import org.springframework.scheduling.annotation.Scheduled; @@ -95,8 +96,7 @@ public void reindex() { log.info("Reindexing stale cases: started reindexing after {}", lastRun); LocalDateTime now = LocalDateTime.now(); - //BooleanExpression predicate = QCase.case$.lastModified.before(now).and(QCase.case$.lastModified.after(lastRun.minusMinutes(2))); - BooleanExpression predicate = QCase.case$.lastModified.isNotNull(); + BooleanExpression predicate = QCase.case$.lastModified.before(now).and(QCase.case$.lastModified.after(lastRun.minusMinutes(2))); LocalDateTime lastRunOld = lastRun; lastRun = LocalDateTime.now(); @@ -115,13 +115,13 @@ private void reindexAllPages(long count, LocalDateTime now, LocalDateTime lastRu query.cursorBatchSize(pageSize); - //query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); + query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); try (CloseableIterator cursor = mongoTemplate.stream(query, Case.class)) { cursor.stream().forEach(aCase -> { - /*if (elasticCaseRepository.countByStringIdAndLastModified(aCase.getStringId(), Timestamp.valueOf(aCase.getLastModified()).getTime()) == 0) { + if (elasticCaseRepository.countByStringIdAndLastModified(aCase.getStringId(), Timestamp.valueOf(aCase.getLastModified()).getTime()) == 0) { return; - }*/ + } if (aCase.getPetriNet() == null) { aCase.setPetriNet(petriNetService.get(aCase.getPetriNetObjectId())); From f286be955931f3e719564b552ca2398e284676ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Voz=C3=A1r?= Date: Tue, 22 Jul 2025 14:32:56 +0200 Subject: [PATCH 06/27] NAE-2136 - revert usage of unsecure global lists and new numbers of of bulk indexes set --- .../engine/elastic/service/BulkService.java | 99 +++++-------------- .../elastic/service/ReindexingTask.java | 48 ++++++--- .../service/interfaces/IBulkService.java | 4 +- src/main/resources/application.properties | 3 +- 4 files changed, 63 insertions(+), 91 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java b/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java index 77aeae87c41..a33a0e286c4 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java @@ -8,7 +8,6 @@ import com.netgrif.application.engine.elastic.service.interfaces.*; import com.netgrif.application.engine.workflow.domain.Case; import com.netgrif.application.engine.workflow.domain.Task; -import com.netgrif.application.engine.workflow.domain.repositories.TaskRepository; import lombok.extern.slf4j.Slf4j; import org.elasticsearch.ElasticsearchException; import org.springframework.beans.factory.annotation.Qualifier; @@ -45,71 +44,48 @@ public class BulkService implements IBulkService { private final IElasticTaskMappingService elasticTaskMappingService; - private final TaskRepository taskRepository; - - private final List bulkCases = new ArrayList<>(); - private final List bulkCaseIds = new ArrayList<>(); - private List bulkTasks = new ArrayList<>(); - - private BulkRequest.Builder builder = new BulkRequest.Builder(); - BulkService (@Qualifier("elasticsearchClient") ElasticsearchClient elasticsearchClient, IElasticCaseMappingService elasticCaseMappingService, - IElasticTaskMappingService elasticTaskMappingService, - TaskRepository taskRepository) { + IElasticTaskMappingService elasticTaskMappingService) { this.esClient = elasticsearchClient; this.elasticCaseMappingService = elasticCaseMappingService; this.elasticTaskMappingService = elasticTaskMappingService; - this.taskRepository = taskRepository; } /** - * Creates elastic upsert operation for given case — if a document exists, it is updated; otherwise, it is created. - * calls indexCases if size of case list in cache equals batch size + * Performs bulk indexing of a list of {@link Case} objects into the Elasticsearch case index. + * Uses upsert semantics — if a document exists, it is updated; otherwise, it is created. * - * @param aCase the case entities to be indexed + * @param cases the list of case entities to be indexed */ @Override - public void bulkIndexCase(Case aCase) { - if (aCase == null) return; + public void bulkIndexCases(List cases) { + BulkRequest.Builder builder = new BulkRequest.Builder(); - bulkCases.add(aCase); - bulkCaseIds.add(aCase.getStringId()); + for (Case c : cases) { + try { + if (c.getLastModified() == null) + c.setLastModified(LocalDateTime.now()); - try { - if (aCase.getLastModified() == null) - aCase.setLastModified(LocalDateTime.now()); - - ElasticCase doc = elasticCaseMappingService.transform(aCase); - - builder.operations(op -> op - .update(u -> u - .index(caseIndex) - .id(doc.getStringId()) - .action(a -> a - .doc(doc) - .docAsUpsert(true) - ) - ) - ); - } catch (Exception e) { - log.error("Failed to prepare bulk operation for case [{}]: {}", aCase.getStringId(), e.getMessage()); - } + ElasticCase doc = elasticCaseMappingService.transform(c); - if (bulkCases.size() == caseBatchSize) { - indexCases(); + builder.operations(op -> op + .update(u -> u + .index(caseIndex) + .id(doc.getStringId()) + .action(a -> a + .doc(doc) + .docAsUpsert(true) + ) + ) + ); + } catch (Exception e) { + log.error("Failed to prepare bulk operation for case [{}]: {}", c.getStringId(), e.getMessage()); + } } - } - /** - * Calls bulkIndexTasks with empty list. - * - */ - @Override - public void bulkIndexTasks() { - bulkIndexTasks(List.of()); - bulkTasks.clear(); + executeAndValidate(builder.build()); } /** @@ -122,41 +98,18 @@ public void bulkIndexTasks() { public void bulkIndexTasks(List tasks) { if (tasks == null || tasks.isEmpty()) return; - tasks.addAll(0, bulkTasks); int totalSize = tasks.size(); for (int i = 0; i < totalSize; i += taskBatchSize) { int end = Math.min(i + taskBatchSize, totalSize); List batch = tasks.subList(i, end); - if (batch.size() < taskBatchSize && !tasks.isEmpty()) { - bulkTasks = batch; - break; - } + log.info("Reindexing task page {} / {}", i / taskBatchSize, totalSize / taskBatchSize); indexTaskBatch(batch); } } - /** - * Performs bulk indexing of a list of {@link Case} objects from cache into the Elasticsearch case index. - * Clears cache lists and recreates {@link BulkRequest.Builder} - */ - @Override - public void indexCases() { - if (bulkCases.isEmpty()) { - return; - } - - executeAndValidate(builder.build()); - List tasksToReindex = taskRepository.findAllByCaseIdIn(bulkCaseIds); - bulkIndexTasks(tasksToReindex); - - bulkCases.clear(); - bulkCaseIds.clear(); - this.builder = new BulkRequest.Builder(); - } - private void executeAndValidate(BulkRequest request) { try { BulkResponse response = esClient.bulk(request); diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java index ea9ac47f201..dd3ef8b52fe 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java @@ -29,7 +29,10 @@ import java.sql.Timestamp; import java.time.Duration; import java.time.LocalDateTime; +import java.util.ArrayList; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; @Component @ConditionalOnExpression("'${spring.data.elasticsearch.reindex}'!= 'null'") @@ -51,8 +54,6 @@ public class ReindexingTask { private LocalDateTime lastRun; private final IBulkService bulkService; - private final IElasticCaseMappingService elasticCaseMappingService; - @Autowired public ReindexingTask( CaseRepository caseRepository, @@ -69,8 +70,7 @@ public ReindexingTask( MongoTemplate mongoTemplate, PetriNetService petriNetService, @Value("${spring.data.elasticsearch.reindexExecutor.caseSize:20}") int pageSize, - @Value("${spring.data.elasticsearch.reindex-from:#{null}}") Duration from, - IElasticCaseMappingService elasticCaseMappingService) { + @Value("${spring.data.elasticsearch.reindex-from:#{null}}") Duration from) { this.caseRepository = caseRepository; this.taskRepository = taskRepository; this.elasticCaseRepository = elasticCaseRepository; @@ -83,7 +83,6 @@ public ReindexingTask( this.petriNetService = petriNetService; this.pageSize = pageSize; this.bulkService = bulkService; - this.elasticCaseMappingService = elasticCaseMappingService; lastRun = LocalDateTime.now(); if (from != null) { @@ -112,33 +111,54 @@ private void reindexAllPages(long count, LocalDateTime now, LocalDateTime lastRu long numOfPages = ((count / pageSize) + 1); log.info("Reindexing {} pages", numOfPages); Query query = new Query(); + AtomicInteger page = new AtomicInteger(); query.cursorBatchSize(pageSize); query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); + List batch = new ArrayList<>(pageSize); try (CloseableIterator cursor = mongoTemplate.stream(query, Case.class)) { cursor.stream().forEach(aCase -> { - if (elasticCaseRepository.countByStringIdAndLastModified(aCase.getStringId(), Timestamp.valueOf(aCase.getLastModified()).getTime()) == 0) { - return; - } + batch.add(aCase); + + if (batch.size() == pageSize) { + page.getAndIncrement(); + log.info("Reindexing {} / {}", page, numOfPages); - if (aCase.getPetriNet() == null) { - aCase.setPetriNet(petriNetService.get(aCase.getPetriNetObjectId())); + reindexCasesBatch(batch); + batch.clear(); } - bulkService.bulkIndexCase(aCase); }); } - - bulkService.indexCases(); - bulkService.bulkIndexTasks(); } public void forceReindexPage(Predicate predicate, int page, long numOfPages) { reindexPage(predicate, page, numOfPages, true); } + private void reindexCasesBatch(List casesBatch) { + List casesToIndex = casesBatch.stream().filter(it -> elasticCaseRepository.countByStringIdAndLastModified(it.getStringId(), Timestamp.valueOf(it.getLastModified()).getTime()) == 0).collect(Collectors.toList()); + if (casesToIndex.isEmpty()) { + log.info("No cases to reindex"); + return; + } + + casesToIndex.forEach(c -> { + if (c.getPetriNet() == null) { + c.setPetriNet(petriNetService.get(c.getPetriNetObjectId())); + } + }); + + bulkService.bulkIndexCases(casesToIndex); + + List caseIds = casesToIndex.stream().map(Case::getStringId).collect(Collectors.toList()); + List tasksToReindex = taskRepository.findAllByCaseIdIn(caseIds); + + bulkService.bulkIndexTasks(tasksToReindex); + } + private void reindexPage(Predicate predicate, int page, long numOfPages, boolean forced) { log.info("Reindexing {} / {}", (page + 1), numOfPages); Page cases = this.workflowService.search(predicate, PageRequest.of(page, pageSize)); diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java b/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java index 8aae214e5f1..3dfaf6fd5d8 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java @@ -6,8 +6,6 @@ import java.util.List; public interface IBulkService { - void bulkIndexCase(Case cases); - void bulkIndexTasks(); + void bulkIndexCases(List cases); void bulkIndexTasks(List tasks); - void indexCases(); } \ No newline at end of file diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 29a957a3fda..8d996df2fc1 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -41,7 +41,8 @@ spring.data.elasticsearch.drop=false spring.data.elasticsearch.executors.size=500 spring.data.elasticsearch.executors.timeout=5 spring.data.elasticsearch.reindex=0 0 * * * * -spring.data.elasticsearch.reindexExecutor.size=150 +spring.data.elasticsearch.reindexExecutor.caseSize=5100 +spring.data.elasticsearch.reindexExecutor.taskSize=20000 spring.data.elasticsearch.reindexExecutor.timeout=60 # Mail Service From 714790d309292d8dd115d1bdcf03dd154bbca8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Voz=C3=A1r?= Date: Wed, 23 Jul 2025 11:40:26 +0200 Subject: [PATCH 07/27] NAE-2136 - modified solution based on dividing the number of operations in bulk requests --- .../engine/elastic/service/BulkService.java | 48 ++++++++++++++----- .../elastic/service/ReindexingTask.java | 24 +++++----- 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java b/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java index a33a0e286c4..d19e239be39 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java @@ -3,6 +3,7 @@ import co.elastic.clients.elasticsearch.ElasticsearchClient; import co.elastic.clients.elasticsearch.core.BulkRequest; import co.elastic.clients.elasticsearch.core.BulkResponse; +import co.elastic.clients.elasticsearch.core.bulk.BulkOperation; import com.netgrif.application.engine.elastic.domain.ElasticCase; import com.netgrif.application.engine.elastic.domain.ElasticTask; import com.netgrif.application.engine.elastic.service.interfaces.*; @@ -61,7 +62,7 @@ public class BulkService implements IBulkService { */ @Override public void bulkIndexCases(List cases) { - BulkRequest.Builder builder = new BulkRequest.Builder(); + List operations = new ArrayList<>(); for (Case c : cases) { try { @@ -70,7 +71,7 @@ public void bulkIndexCases(List cases) { ElasticCase doc = elasticCaseMappingService.transform(c); - builder.operations(op -> op + operations.add(BulkOperation.of(op -> op .update(u -> u .index(caseIndex) .id(doc.getStringId()) @@ -78,14 +79,13 @@ public void bulkIndexCases(List cases) { .doc(doc) .docAsUpsert(true) ) - ) - ); + ))); } catch (Exception e) { log.error("Failed to prepare bulk operation for case [{}]: {}", c.getStringId(), e.getMessage()); } } - executeAndValidate(builder.build()); + executeAndValidate(operations); } /** @@ -110,13 +110,34 @@ public void bulkIndexTasks(List tasks) { } } - private void executeAndValidate(BulkRequest request) { - try { - BulkResponse response = esClient.bulk(request); + private void executeAndValidate(List operations) { + if (operations.isEmpty()) { + return; + } + BulkRequest.Builder builder = new BulkRequest.Builder(); + builder.operations(operations); + + try { + BulkResponse response = esClient.bulk(builder.build()); checkForBulkUpdateFailure(response); + log.info("Batch indexed successfully with {} ops", operations.size()); } catch (Exception e) { - log.error("Failed to index bulk {}", e.getMessage(), e); + log.warn("Failed for {} ops to index bulk {}", operations.size(), e.getMessage(), e); + + if (operations.size() == 1) { + log.error("Single operation failed. Skipping. {}", operations.get(0), e); + return; + } + + log.warn("Dividing the requirement."); + + int mid = operations.size() / 2; + List left = operations.subList(0, mid); + List right = operations.subList(mid, operations.size()); + + executeAndValidate(left); + executeAndValidate(right); } } @@ -136,13 +157,14 @@ private void checkForBulkUpdateFailure(BulkResponse response) { } private void indexTaskBatch(List tasks) { - BulkRequest.Builder requestBuilder = new BulkRequest.Builder(); + + List operations = new ArrayList<>(); for (Task task : tasks) { try { ElasticTask elasticTask = elasticTaskMappingService.transform(task); - requestBuilder.operations(op -> op + operations.add(BulkOperation.of(op -> op .update(u -> u .index(taskIndex) .id(elasticTask.getStringId()) @@ -150,13 +172,13 @@ private void indexTaskBatch(List tasks) { .doc(elasticTask) .docAsUpsert(true) ) - ) + )) ); } catch (Exception e) { log.error("Failed to create upsert request for task [{}]: {}", task.getStringId(), e.getMessage()); } } - executeAndValidate(requestBuilder.build()); + executeAndValidate(operations); } } diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java index dd3ef8b52fe..ffcb0489fd6 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java @@ -40,7 +40,7 @@ public class ReindexingTask { private static final Logger log = LoggerFactory.getLogger(ReindexingTask.class); - private final int pageSize; + private final int caseBatchSize; private final CaseRepository caseRepository; private final TaskRepository taskRepository; private final ElasticCaseRepository elasticCaseRepository; @@ -69,7 +69,7 @@ public ReindexingTask( IBulkService bulkService, MongoTemplate mongoTemplate, PetriNetService petriNetService, - @Value("${spring.data.elasticsearch.reindexExecutor.caseSize:20}") int pageSize, + @Value("${spring.data.elasticsearch.reindexExecutor.caseSize:20}") int caseBatchSize, @Value("${spring.data.elasticsearch.reindex-from:#{null}}") Duration from) { this.caseRepository = caseRepository; this.taskRepository = taskRepository; @@ -81,7 +81,7 @@ public ReindexingTask( this.workflowService = workflowService; this.mongoTemplate = mongoTemplate; this.petriNetService = petriNetService; - this.pageSize = pageSize; + this.caseBatchSize = caseBatchSize; this.bulkService = bulkService; lastRun = LocalDateTime.now(); @@ -95,7 +95,8 @@ public void reindex() { log.info("Reindexing stale cases: started reindexing after {}", lastRun); LocalDateTime now = LocalDateTime.now(); - BooleanExpression predicate = QCase.case$.lastModified.before(now).and(QCase.case$.lastModified.after(lastRun.minusMinutes(2))); + //BooleanExpression predicate = QCase.case$.lastModified.before(now).and(QCase.case$.lastModified.after(lastRun.minusMinutes(2))); + BooleanExpression predicate = QCase.case$.creationDate.isNotNull(); LocalDateTime lastRunOld = lastRun; lastRun = LocalDateTime.now(); @@ -108,21 +109,21 @@ public void reindex() { } private void reindexAllPages(long count, LocalDateTime now, LocalDateTime lastRunOld) { - long numOfPages = ((count / pageSize) + 1); + long numOfPages = ((count / caseBatchSize) + 1); log.info("Reindexing {} pages", numOfPages); Query query = new Query(); AtomicInteger page = new AtomicInteger(); - query.cursorBatchSize(pageSize); + query.cursorBatchSize(caseBatchSize); - query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); + //query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); - List batch = new ArrayList<>(pageSize); + List batch = new ArrayList<>(caseBatchSize); try (CloseableIterator cursor = mongoTemplate.stream(query, Case.class)) { cursor.stream().forEach(aCase -> { batch.add(aCase); - if (batch.size() == pageSize) { + if (batch.size() == caseBatchSize) { page.getAndIncrement(); log.info("Reindexing {} / {}", page, numOfPages); @@ -139,7 +140,8 @@ public void forceReindexPage(Predicate predicate, int page, long numOfPages) { } private void reindexCasesBatch(List casesBatch) { - List casesToIndex = casesBatch.stream().filter(it -> elasticCaseRepository.countByStringIdAndLastModified(it.getStringId(), Timestamp.valueOf(it.getLastModified()).getTime()) == 0).collect(Collectors.toList()); + //List casesToIndex = casesBatch.stream().filter(it -> elasticCaseRepository.countByStringIdAndLastModified(it.getStringId(), Timestamp.valueOf(it.getLastModified()).getTime()) == 0).collect(Collectors.toList()); + List casesToIndex = casesBatch; if (casesToIndex.isEmpty()) { log.info("No cases to reindex"); return; @@ -161,7 +163,7 @@ private void reindexCasesBatch(List casesBatch) { private void reindexPage(Predicate predicate, int page, long numOfPages, boolean forced) { log.info("Reindexing {} / {}", (page + 1), numOfPages); - Page cases = this.workflowService.search(predicate, PageRequest.of(page, pageSize)); + Page cases = this.workflowService.search(predicate, PageRequest.of(page, caseBatchSize)); for (Case aCase : cases) { if (forced || elasticCaseRepository.countByStringIdAndLastModified(aCase.getStringId(), Timestamp.valueOf(aCase.getLastModified()).getTime()) == 0) { From 01bea6037e8a5380fd0618b339cb24ad09c96dbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Voz=C3=A1r?= Date: Wed, 23 Jul 2025 11:43:56 +0200 Subject: [PATCH 08/27] NAE-2136 - ConcurrentModificationException prevention --- .../application/engine/elastic/service/BulkService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java b/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java index d19e239be39..e7742a559f2 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java @@ -136,8 +136,8 @@ private void executeAndValidate(List operations) { List left = operations.subList(0, mid); List right = operations.subList(mid, operations.size()); - executeAndValidate(left); - executeAndValidate(right); + executeAndValidate(new ArrayList<>(left)); + executeAndValidate(new ArrayList<>(right)); } } From 6ea44cc6c817b23a9f0482b7ec76793949f178e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Voz=C3=A1r?= Date: Wed, 23 Jul 2025 12:10:50 +0200 Subject: [PATCH 09/27] NAE-2136 - Remove unused inner class --- .../application/engine/elastic/domain/CaseField.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java b/src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java index de138061175..e661a947d83 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java +++ b/src/main/java/com/netgrif/application/engine/elastic/domain/CaseField.java @@ -1,6 +1,5 @@ package com.netgrif.application.engine.elastic.domain; -import lombok.AllArgsConstructor; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; @@ -22,10 +21,4 @@ public CaseField(List value) { super(value.toString()); this.caseValue = value; } - - @AllArgsConstructor - private static class FileNameAndExtension { - public String name; - public String extension; - } } From eaec9dc3047dffa2f69ee55b94844d499156635a Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 09:52:38 +0200 Subject: [PATCH 10/27] **Enhance reindexing capabilities with bulk indexing improvements** - **Add bulk reindexing support**: - Introduced the `bulkIndex` method in `IElasticIndexService`, enabling reindexing of all or stale cases and tasks. - Created a dedicated `IndexParams` class to encapsulate parameters for the reindexing process (e.g., `indexAll`, `caseBatchSize`, `taskBatchSize`). - Optimized the index batching process for cases and tasks by leveraging configurable batch sizes. - **Remove deprecated methods**: - Removed the old `bulkIndex` method from `IElasticIndexService`. - Eliminated the obsolete `IBulkService` interface. - **Update ElasticController**: - Changed endpoint, `/reindex/bulk`, for initiating bulk reindexing with configurable parameters. - Updated controller logic to leverage the new `bulkIndex` method for improved performance and scalability. - **Configuration enhancements**: - Extended `ElasticsearchProperties` to include `IndexProperties`, allowing batch sizes for cases and tasks to be configured via properties. - **Code cleanup**: - Removed unused imports and annotated services with `@RequiredArgsConstructor`. - Streamlined reindexing tasks by integrating advanced filtering and improved logging. This commit enhances the maintainability and scalability of Elasticsearch reindexing while introducing configurability and improved documentation for reindexing processes. --- .../properties/ElasticsearchProperties.java | 14 + .../engine/elastic/service/BulkService.java | 184 ------------- .../elastic/service/ElasticIndexService.java | 244 ++++++++++++++++-- .../elastic/service/ReindexingTask.java | 108 ++------ .../service/interfaces/IBulkService.java | 11 - .../interfaces/IElasticIndexService.java | 6 +- .../engine/elastic/web/ElasticController.java | 26 +- .../web/requestbodies/IndexParams.java | 10 + .../engine/workflow/service/TaskService.java | 1 - 9 files changed, 277 insertions(+), 327 deletions(-) delete mode 100644 src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java delete mode 100644 src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java create mode 100644 src/main/java/com/netgrif/application/engine/elastic/web/requestbodies/IndexParams.java diff --git a/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java b/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java index 4f77f278cb8..6475cfbab06 100644 --- a/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java +++ b/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java @@ -1,6 +1,7 @@ package com.netgrif.application.engine.configuration.properties; import lombok.Data; +import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.core.io.Resource; import org.springframework.stereotype.Component; @@ -53,6 +54,8 @@ public class ElasticsearchProperties { private List defaultSearchFilters = new ArrayList<>(); + private IndexProperties indexProperties = new IndexProperties(); + @PostConstruct public void init() { indexSettings.putIfAbsent("max_result_window", 10000000); @@ -72,4 +75,15 @@ public void init() { public Map getClassSpecificSettings(String className) { return classSpecificIndexSettings.getOrDefault(className, new HashMap<>()); } + + @Data + public static class IndexProperties { + private String taskIndex; + + private String caseIndex; + + private int caseBatchSize; + + private int taskBatchSize; + } } diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java b/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java deleted file mode 100644 index e7742a559f2..00000000000 --- a/src/main/java/com/netgrif/application/engine/elastic/service/BulkService.java +++ /dev/null @@ -1,184 +0,0 @@ -package com.netgrif.application.engine.elastic.service; - -import co.elastic.clients.elasticsearch.ElasticsearchClient; -import co.elastic.clients.elasticsearch.core.BulkRequest; -import co.elastic.clients.elasticsearch.core.BulkResponse; -import co.elastic.clients.elasticsearch.core.bulk.BulkOperation; -import com.netgrif.application.engine.elastic.domain.ElasticCase; -import com.netgrif.application.engine.elastic.domain.ElasticTask; -import com.netgrif.application.engine.elastic.service.interfaces.*; -import com.netgrif.application.engine.workflow.domain.Case; -import com.netgrif.application.engine.workflow.domain.Task; -import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.ElasticsearchException; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.stereotype.Service; - -import java.time.LocalDateTime; -import java.util.*; - -/** - * Service responsible for bulk indexing of {@link Case} and {@link Task} entities into Elasticsearch. - * Uses transformation services to map domain objects to their corresponding Elastic representations. - * - * Indexing is performed using upsert operations. - */ -@Service -@Slf4j -public class BulkService implements IBulkService { - @Value("${spring.data.elasticsearch.index.task}") - private String taskIndex; - - @Value("${spring.data.elasticsearch.index.case}") - private String caseIndex; - - @Value("${spring.data.elasticsearch.reindexExecutor.caseSize:20}") - private int caseBatchSize; - - @Value("${spring.data.elasticsearch.reindexExecutor.taskSize:20}") - private int taskBatchSize; - - private final ElasticsearchClient esClient; - - private final IElasticCaseMappingService elasticCaseMappingService; - - private final IElasticTaskMappingService elasticTaskMappingService; - - - BulkService (@Qualifier("elasticsearchClient") ElasticsearchClient elasticsearchClient, - IElasticCaseMappingService elasticCaseMappingService, - IElasticTaskMappingService elasticTaskMappingService) { - this.esClient = elasticsearchClient; - this.elasticCaseMappingService = elasticCaseMappingService; - this.elasticTaskMappingService = elasticTaskMappingService; - } - - /** - * Performs bulk indexing of a list of {@link Case} objects into the Elasticsearch case index. - * Uses upsert semantics — if a document exists, it is updated; otherwise, it is created. - * - * @param cases the list of case entities to be indexed - */ - @Override - public void bulkIndexCases(List cases) { - List operations = new ArrayList<>(); - - for (Case c : cases) { - try { - if (c.getLastModified() == null) - c.setLastModified(LocalDateTime.now()); - - ElasticCase doc = elasticCaseMappingService.transform(c); - - operations.add(BulkOperation.of(op -> op - .update(u -> u - .index(caseIndex) - .id(doc.getStringId()) - .action(a -> a - .doc(doc) - .docAsUpsert(true) - ) - ))); - } catch (Exception e) { - log.error("Failed to prepare bulk operation for case [{}]: {}", c.getStringId(), e.getMessage()); - } - } - - executeAndValidate(operations); - } - - /** - * Performs bulk indexing of a list of {@link Task} objects into the Elasticsearch task index. - * Uses upsert semantics — if a document exists, it is updated; otherwise, it is created. - * - * @param tasks the list of task entities to be indexed - */ - @Override - public void bulkIndexTasks(List tasks) { - if (tasks == null || tasks.isEmpty()) return; - - int totalSize = tasks.size(); - - for (int i = 0; i < totalSize; i += taskBatchSize) { - int end = Math.min(i + taskBatchSize, totalSize); - List batch = tasks.subList(i, end); - - log.info("Reindexing task page {} / {}", i / taskBatchSize, totalSize / taskBatchSize); - - indexTaskBatch(batch); - } - } - - private void executeAndValidate(List operations) { - if (operations.isEmpty()) { - return; - } - - BulkRequest.Builder builder = new BulkRequest.Builder(); - builder.operations(operations); - - try { - BulkResponse response = esClient.bulk(builder.build()); - checkForBulkUpdateFailure(response); - log.info("Batch indexed successfully with {} ops", operations.size()); - } catch (Exception e) { - log.warn("Failed for {} ops to index bulk {}", operations.size(), e.getMessage(), e); - - if (operations.size() == 1) { - log.error("Single operation failed. Skipping. {}", operations.get(0), e); - return; - } - - log.warn("Dividing the requirement."); - - int mid = operations.size() / 2; - List left = operations.subList(0, mid); - List right = operations.subList(mid, operations.size()); - - executeAndValidate(new ArrayList<>(left)); - executeAndValidate(new ArrayList<>(right)); - } - } - - private void checkForBulkUpdateFailure(BulkResponse response) { - Map failedDocuments = new HashMap<>(); - - - response.items().forEach(item -> { - if (item.error() != null) { - failedDocuments.put(item.id(), item.error().reason()); - } - }); - - if (!failedDocuments.isEmpty()) { - throw new ElasticsearchException("Bulk indexing has failures. Use ElasticsearchException.getFailedDocuments() for details [{}]", failedDocuments); - } - } - - private void indexTaskBatch(List tasks) { - - List operations = new ArrayList<>(); - - for (Task task : tasks) { - try { - ElasticTask elasticTask = elasticTaskMappingService.transform(task); - - operations.add(BulkOperation.of(op -> op - .update(u -> u - .index(taskIndex) - .id(elasticTask.getStringId()) - .action(a -> a - .doc(elasticTask) - .docAsUpsert(true) - ) - )) - ); - } catch (Exception e) { - log.error("Failed to create upsert request for task [{}]: {}", task.getStringId(), e.getMessage()); - } - } - - executeAndValidate(operations); - } -} diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java index e4ff155a89b..0b33a299f70 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java @@ -1,10 +1,24 @@ package com.netgrif.application.engine.elastic.service; +import co.elastic.clients.elasticsearch.ElasticsearchClient; +import co.elastic.clients.elasticsearch.core.BulkRequest; +import co.elastic.clients.elasticsearch.core.BulkResponse; +import co.elastic.clients.elasticsearch.core.bulk.BulkOperation; import com.fasterxml.jackson.databind.ObjectMapper; import com.netgrif.application.engine.configuration.properties.ElasticsearchProperties; +import com.netgrif.application.engine.elastic.domain.ElasticCase; +import com.netgrif.application.engine.elastic.domain.ElasticTask; import com.netgrif.application.engine.elastic.service.interfaces.IElasticIndexService; +import com.netgrif.application.engine.petrinet.service.PetriNetService; +import com.netgrif.application.engine.workflow.domain.Case; +import com.netgrif.application.engine.workflow.domain.QCase; +import com.netgrif.application.engine.workflow.domain.Task; +import com.netgrif.application.engine.workflow.domain.repositories.CaseRepository; +import com.querydsl.core.types.Predicate; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.admin.indices.open.OpenIndexRequest; import org.elasticsearch.action.admin.indices.open.OpenIndexResponse; import org.elasticsearch.action.support.master.AcknowledgedResponse; @@ -13,7 +27,6 @@ import org.elasticsearch.client.indices.CloseIndexResponse; import org.elasticsearch.client.indices.PutIndexTemplateRequest; import org.elasticsearch.xcontent.XContentType; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; import org.springframework.core.io.Resource; import org.springframework.data.annotation.Id; @@ -24,33 +37,46 @@ import org.springframework.data.elasticsearch.core.SearchScrollHits; import org.springframework.data.elasticsearch.core.document.Document; import org.springframework.data.elasticsearch.core.mapping.IndexCoordinates; -import org.springframework.data.elasticsearch.core.query.IndexQuery; import org.springframework.data.elasticsearch.core.query.IndexQueryBuilder; import org.springframework.data.elasticsearch.core.query.Query; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.data.mongodb.core.query.Criteria; +import org.springframework.data.util.CloseableIterator; import org.springframework.stereotype.Service; import org.springframework.util.Assert; import java.io.InputStream; import java.lang.reflect.Field; +import java.time.LocalDateTime; import java.util.*; @Slf4j @Service +@RequiredArgsConstructor public class ElasticIndexService implements IElasticIndexService { private static final String PLACEHOLDERS = "petriNetIndex, caseIndex, taskIndex"; - @Autowired - private ApplicationContext context; + private final ApplicationContext context; - @Autowired - private ElasticsearchRestTemplate elasticsearchTemplate; + private final ElasticsearchRestTemplate elasticsearchTemplate; - @Autowired - private ElasticsearchOperations operations; + private final ElasticsearchClient elasticsearchClient; + + private final ElasticsearchOperations operations; + + private final ElasticsearchProperties elasticsearchProperties; + + private final CaseRepository caseRepository; + + private final PetriNetService petriNetService; + + private final MongoTemplate mongoTemplate; + + private final ElasticCaseMappingService caseMappingService; + + private final ElasticTaskMappingService taskMappingService; - @Autowired - private ElasticsearchProperties elasticsearchProperties; @Override public boolean indexExists(String indexName) { @@ -69,24 +95,6 @@ public String index(Class clazz, T source, String... placeholders) { .withObject(source).build(), IndexCoordinates.of(indexName)); } - - @Override - public boolean bulkIndex(List list, Class clazz, String... placeholders) { - String indexName = getIndexName(clazz, placeholders); - try { - if (list != null && !list.isEmpty()) { - List indexQueries = new ArrayList<>(); - list.forEach(source -> - indexQueries.add(new IndexQueryBuilder().withId(getIdFromSource(source)).withObject(source).build())); - elasticsearchTemplate.bulkIndex(indexQueries, IndexCoordinates.of(indexName)); - } - } catch (Exception e) { - log.error("bulkIndex:", e); - return false; - } - return true; - } - @Override public boolean createIndex(Class clazz, String... placeholders) { try { @@ -304,6 +312,186 @@ public void clearScrollHits(List scrollIds) { } } + @Override + public void bulkIndex(boolean indexAll, LocalDateTime after, Integer caseBatchSize, Integer taskBatchSize) { + log.info("Reindexing stale cases: started reindexing after {}", after); + LocalDateTime now = LocalDateTime.now(); + + if (caseBatchSize == null) { + caseBatchSize = elasticsearchProperties.getIndexProperties().getCaseBatchSize(); + } + if (taskBatchSize == null) { + taskBatchSize = elasticsearchProperties.getIndexProperties().getTaskBatchSize(); + } + + Predicate predicate; + if (indexAll || after == null) { + predicate = QCase.case$.lastModified.before(now); + log.info("Reindexing stale cases: force all"); + } else { + predicate = QCase.case$.lastModified.before(now).and(QCase.case$.lastModified.after(after.minusMinutes(2))); + } + + long count = caseRepository.count(predicate); + if (count > 0) { + reindexQueried(count, now, after, indexAll, caseBatchSize, taskBatchSize); + } + log.info("Reindexing stale cases: end"); + } + + private void reindexQueried(long count, LocalDateTime now, LocalDateTime after, boolean indexAll, int caseBatchSize, int taskBatchSize) { + long numOfPages = ((count / caseBatchSize) + 1); + log.info("Reindexing {} pages", numOfPages); + + org.springframework.data.mongodb.core.query.Query query; + if (indexAll) { + query = org.springframework.data.mongodb.core.query.Query.query(Criteria.where("lastModified").lt(now)); + } else { + query = org.springframework.data.mongodb.core.query.Query.query(Criteria.where("lastModified").lt(now).gt(after.minusMinutes(2))); + } + query.cursorBatchSize(caseBatchSize); + + long page = 1, currentBatchSize = 0; + List caseOperations = new ArrayList<>(); + List caseIds = new ArrayList<>(); + + try (CloseableIterator cursor = mongoTemplate.stream(query, Case.class)) { + while (cursor.hasNext()) { + Case aCase = cursor.next(); + prepareCase(aCase); + ElasticCase doc = caseMappingService.transform(aCase); + prepareCaseBulkOperation(doc, caseOperations); + caseIds.add(aCase.getStringId()); + + if (++currentBatchSize == caseBatchSize || !cursor.hasNext()) { + log.info("Reindexing case page {} / {}", page, numOfPages); + executeAndValidate(caseOperations); + bulkIndexTasks(caseIds, taskBatchSize); + caseOperations.clear(); + caseIds.clear(); + currentBatchSize = 0; + page++; + } + } + } + } + + private void bulkIndexTasks(List caseIds, int taskBatchSize) { + if (caseIds == null || caseIds.isEmpty()) { + return; + } + org.springframework.data.mongodb.core.query.Query query = org.springframework.data.mongodb.core.query.Query.query(Criteria.where("caseId").in(caseIds)).cursorBatchSize(taskBatchSize); + long totalSize = mongoTemplate.count(query, Task.class); + long numOfPages = ((totalSize / taskBatchSize) + 1); + + long page = 1, currentBatchSize = 0; + List taskOperations = new ArrayList<>(); + + try (CloseableIterator cursor = mongoTemplate.stream(query, Task.class)) { + while (cursor.hasNext()) { + Task task = cursor.next(); + ElasticTask elasticTask = taskMappingService.transform(task); + prepareTaskBulkOperation(elasticTask, taskOperations); + + if (++currentBatchSize == taskBatchSize || !cursor.hasNext()) { + log.info("Reindexing task page {} / {}", page, numOfPages); + executeAndValidate(taskOperations); + taskOperations.clear(); + currentBatchSize = 0; + page++; + } + } + } + } + + private void prepareCase(Case useCase) { + if (useCase.getPetriNet() == null) { + useCase.setPetriNet(petriNetService.get(useCase.getPetriNetObjectId())); + } + if (useCase.getLastModified() == null) { + useCase.setLastModified(LocalDateTime.now()); + } + } + + private void prepareCaseBulkOperation(ElasticCase doc, List operations) { + try { + operations.add(BulkOperation.of(op -> op + .update(u -> u + .index(elasticsearchProperties.getIndexProperties().getCaseIndex()) + .id(doc.getStringId()) + .action(a -> a + .doc(doc) + .docAsUpsert(true) + ) + ))); + } catch (Exception e) { + log.error("Failed to prepare bulk operation for case [{}]: {}", doc.getStringId(), e.getMessage()); + } + } + + private void prepareTaskBulkOperation(ElasticTask doc, List operations) { + try { + operations.add(BulkOperation.of(op -> op + .update(u -> u + .index(elasticsearchProperties.getIndexProperties().getTaskIndex()) + .id(doc.getStringId()) + .action(a -> a + .doc(doc) + .docAsUpsert(true) + ) + )) + ); + } catch (Exception e) { + log.error("Failed to prepare bulk operation for task [{}]: {}", doc.getStringId(), e.getMessage()); + } + } + + private void executeAndValidate(List operations) { + if (operations.isEmpty()) { + return; + } + + BulkRequest.Builder builder = new BulkRequest.Builder(); + builder.operations(operations); + + try { + BulkResponse response = elasticsearchClient.bulk(builder.build()); + checkForBulkUpdateFailure(response); + log.info("Batch indexed successfully with {} ops", operations.size()); + } catch (ElasticsearchException e) { + log.warn("Failed for {} ops to index bulk {}", operations.size(), e.getMessage(), e); + + if (operations.size() == 1) { + log.error("Single operation failed. Skipping. {}", operations.get(0), e); + return; + } + + log.warn("Dividing the requirement."); + + int mid = operations.size() / 2; + List left = operations.subList(0, mid); + List right = operations.subList(mid, operations.size()); + + executeAndValidate(new ArrayList<>(left)); + executeAndValidate(new ArrayList<>(right)); + } catch (Exception e) { + log.error("Failed to index bulk: {}", e.getMessage(), e); + } + } + + private void checkForBulkUpdateFailure(BulkResponse response) { + Map failedDocuments = new HashMap<>(); + response.items().forEach(item -> { + if (item.error() != null) { + failedDocuments.put(item.id(), item.error().reason()); + } + }); + + if (!failedDocuments.isEmpty()) { + throw new ElasticsearchException("Bulk indexing has failures. Use ElasticsearchException.getFailedDocuments() for details [{}]", failedDocuments); + } + } + private String getIdFromSource(Object source) { if (source == null) { return null; diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java index ffcb0489fd6..5a545381646 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java @@ -2,7 +2,6 @@ import com.netgrif.application.engine.elastic.domain.ElasticCaseRepository; import com.netgrif.application.engine.elastic.service.interfaces.*; -import com.netgrif.application.engine.petrinet.service.PetriNetService; import com.netgrif.application.engine.workflow.domain.Case; import com.netgrif.application.engine.workflow.domain.QCase; import com.netgrif.application.engine.workflow.domain.Task; @@ -19,20 +18,13 @@ import org.springframework.boot.autoconfigure.condition.ConditionalOnExpression; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; -import org.springframework.data.mongodb.core.MongoTemplate; -import org.springframework.data.mongodb.core.query.Criteria; -import org.springframework.data.mongodb.core.query.Query; -import org.springframework.data.util.CloseableIterator; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Component; import java.sql.Timestamp; import java.time.Duration; import java.time.LocalDateTime; -import java.util.ArrayList; import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; @Component @ConditionalOnExpression("'${spring.data.elasticsearch.reindex}'!= 'null'") @@ -40,19 +32,18 @@ public class ReindexingTask { private static final Logger log = LoggerFactory.getLogger(ReindexingTask.class); - private final int caseBatchSize; - private final CaseRepository caseRepository; - private final TaskRepository taskRepository; - private final ElasticCaseRepository elasticCaseRepository; - private final IElasticCaseService elasticCaseService; - private final IElasticTaskService elasticTaskService; - private final IElasticCaseMappingService caseMappingService; - private final IElasticTaskMappingService taskMappingService; - private final IWorkflowService workflowService; - private final MongoTemplate mongoTemplate; - private final PetriNetService petriNetService; + private int pageSize; + private CaseRepository caseRepository; + private TaskRepository taskRepository; + private ElasticCaseRepository elasticCaseRepository; + private IElasticCaseService elasticCaseService; + private IElasticTaskService elasticTaskService; + private IElasticCaseMappingService caseMappingService; + private IElasticTaskMappingService taskMappingService; + private IWorkflowService workflowService; + private IElasticIndexService elasticIndexService; + private LocalDateTime lastRun; - private final IBulkService bulkService; @Autowired public ReindexingTask( @@ -66,10 +57,7 @@ public ReindexingTask( IElasticCaseMappingService caseMappingService, IElasticTaskMappingService taskMappingService, IWorkflowService workflowService, - IBulkService bulkService, - MongoTemplate mongoTemplate, - PetriNetService petriNetService, - @Value("${spring.data.elasticsearch.reindexExecutor.caseSize:20}") int caseBatchSize, + @Value("${spring.data.elasticsearch.reindexExecutor.size:20}") int pageSize, @Value("${spring.data.elasticsearch.reindex-from:#{null}}") Duration from) { this.caseRepository = caseRepository; this.taskRepository = taskRepository; @@ -79,10 +67,7 @@ public ReindexingTask( this.caseMappingService = caseMappingService; this.taskMappingService = taskMappingService; this.workflowService = workflowService; - this.mongoTemplate = mongoTemplate; - this.petriNetService = petriNetService; - this.caseBatchSize = caseBatchSize; - this.bulkService = bulkService; + this.pageSize = pageSize; lastRun = LocalDateTime.now(); if (from != null) { @@ -92,78 +77,19 @@ public ReindexingTask( @Scheduled(cron = "#{springElasticsearchReindex}") public void reindex() { - log.info("Reindexing stale cases: started reindexing after {}", lastRun); - - LocalDateTime now = LocalDateTime.now(); - //BooleanExpression predicate = QCase.case$.lastModified.before(now).and(QCase.case$.lastModified.after(lastRun.minusMinutes(2))); - BooleanExpression predicate = QCase.case$.creationDate.isNotNull(); - LocalDateTime lastRunOld = lastRun; + log.info("Reindexing stale cases: started reindexing after " + lastRun); + elasticIndexService.bulkIndex(false, lastRun, null, null); lastRun = LocalDateTime.now(); - - long count = caseRepository.count(predicate); - if (count > 0) { - reindexAllPages(count, now, lastRunOld); - } - log.info("Reindexing stale cases: end"); } - private void reindexAllPages(long count, LocalDateTime now, LocalDateTime lastRunOld) { - long numOfPages = ((count / caseBatchSize) + 1); - log.info("Reindexing {} pages", numOfPages); - Query query = new Query(); - AtomicInteger page = new AtomicInteger(); - - query.cursorBatchSize(caseBatchSize); - - //query.addCriteria(Criteria.where("lastModified").lt(now).gt(lastRunOld.minusMinutes(2))); - - List batch = new ArrayList<>(caseBatchSize); - try (CloseableIterator cursor = mongoTemplate.stream(query, Case.class)) { - cursor.stream().forEach(aCase -> { - batch.add(aCase); - - if (batch.size() == caseBatchSize) { - page.getAndIncrement(); - log.info("Reindexing {} / {}", page, numOfPages); - - reindexCasesBatch(batch); - batch.clear(); - } - - }); - } - } - public void forceReindexPage(Predicate predicate, int page, long numOfPages) { reindexPage(predicate, page, numOfPages, true); } - private void reindexCasesBatch(List casesBatch) { - //List casesToIndex = casesBatch.stream().filter(it -> elasticCaseRepository.countByStringIdAndLastModified(it.getStringId(), Timestamp.valueOf(it.getLastModified()).getTime()) == 0).collect(Collectors.toList()); - List casesToIndex = casesBatch; - if (casesToIndex.isEmpty()) { - log.info("No cases to reindex"); - return; - } - - casesToIndex.forEach(c -> { - if (c.getPetriNet() == null) { - c.setPetriNet(petriNetService.get(c.getPetriNetObjectId())); - } - }); - - bulkService.bulkIndexCases(casesToIndex); - - List caseIds = casesToIndex.stream().map(Case::getStringId).collect(Collectors.toList()); - List tasksToReindex = taskRepository.findAllByCaseIdIn(caseIds); - - bulkService.bulkIndexTasks(tasksToReindex); - } - private void reindexPage(Predicate predicate, int page, long numOfPages, boolean forced) { - log.info("Reindexing {} / {}", (page + 1), numOfPages); - Page cases = this.workflowService.search(predicate, PageRequest.of(page, caseBatchSize)); + log.info("Reindexing " + (page + 1) + " / " + numOfPages); + Page cases = this.workflowService.search(predicate, PageRequest.of(page, pageSize)); for (Case aCase : cases) { if (forced || elasticCaseRepository.countByStringIdAndLastModified(aCase.getStringId(), Timestamp.valueOf(aCase.getLastModified()).getTime()) == 0) { diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java b/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java deleted file mode 100644 index 3dfaf6fd5d8..00000000000 --- a/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IBulkService.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.netgrif.application.engine.elastic.service.interfaces; - -import com.netgrif.application.engine.workflow.domain.Case; -import com.netgrif.application.engine.workflow.domain.Task; - -import java.util.List; - -public interface IBulkService { - void bulkIndexCases(List cases); - void bulkIndexTasks(List tasks); -} \ No newline at end of file diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IElasticIndexService.java index 5660a6db477..f23036457b4 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/interfaces/IElasticIndexService.java @@ -1,10 +1,12 @@ package com.netgrif.application.engine.elastic.service.interfaces; +import com.querydsl.core.types.Predicate; import org.springframework.data.elasticsearch.core.SearchHits; import org.springframework.data.elasticsearch.core.SearchScrollHits; import org.springframework.data.elasticsearch.core.document.Document; import org.springframework.data.elasticsearch.core.query.Query; +import java.time.LocalDateTime; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -31,8 +33,6 @@ public interface IElasticIndexService { String index(Class clazz, T source, String... placeholders); - boolean bulkIndex(List list, Class clazz, String... placeholders); - SearchScrollHits scrollFirst(Query query, Class clazz, String... placeholders); SearchScrollHits scroll(String scrollId, Class clazz, String... placeholders); @@ -42,4 +42,6 @@ public interface IElasticIndexService { void applySettings(HashMap settingMap, Class clazz); void clearScrollHits(List scrollIds); + + void bulkIndex(boolean indexAll, LocalDateTime lastRun, Integer caseBatchSize, Integer taskBatchSize); } diff --git a/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java b/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java index 436469bb0e0..96c0318466f 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java +++ b/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java @@ -2,6 +2,8 @@ import com.netgrif.application.engine.auth.domain.LoggedUser; import com.netgrif.application.engine.elastic.service.ReindexingTask; +import com.netgrif.application.engine.elastic.service.interfaces.IElasticIndexService; +import com.netgrif.application.engine.elastic.web.requestbodies.IndexParams; import com.netgrif.application.engine.workflow.service.CaseSearchService; import com.netgrif.application.engine.workflow.service.interfaces.IWorkflowService; import com.netgrif.application.engine.workflow.web.responsebodies.MessageResource; @@ -20,10 +22,7 @@ import org.springframework.http.MediaType; import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.security.core.Authentication; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestBody; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.bind.annotation.*; import java.util.Locale; import java.util.Map; @@ -49,6 +48,9 @@ public class ElasticController { @Autowired private ReindexingTask reindexingTask; + @Autowired + private IElasticIndexService indexService; + @Value("${spring.data.elasticsearch.reindexExecutor.size:20}") private int pageSize; @@ -86,15 +88,19 @@ public MessageResource reindex(@RequestBody Map searchBody, Auth } } - //@PreAuthorize("hasRole('ADMIN')") @PreAuthorize("@authorizationService.hasAuthority('ADMIN')") - @PostMapping(value = "/index/cursor", produces = MediaType.APPLICATION_JSON_VALUE) - public MessageResource cursorAllReindex() { + @Operation(summary = "Reindex all or stale cases with bulk index", + description = "Reindex all or stale cases (specified by IndexParams.indexAll param) with bulk index. Caller must have the ADMIN role", + security = {@SecurityRequirement(name = "BasicAuth")}) + @ApiResponses(value = { + @ApiResponse(responseCode = "200", description = "OK"), + @ApiResponse(responseCode = "403", description = "Caller doesn't fulfill the authorisation requirements"), + }) + @PostMapping(value = "/reindex/bulk", produces = MediaType.APPLICATION_JSON_VALUE) + public MessageResource bulkIndex(@RequestParam(required = false) IndexParams indexParams) { try { - - reindexingTask.reindex(); + indexService.bulkIndex(indexParams.isIndexAll(), null, indexParams.getCaseBatchSize(), indexParams.getTaskBatchSize()); return MessageResource.successMessage("Success"); - } catch (Exception e) { log.error("Could not index: ", e); return MessageResource.errorMessage(e.getMessage()); diff --git a/src/main/java/com/netgrif/application/engine/elastic/web/requestbodies/IndexParams.java b/src/main/java/com/netgrif/application/engine/elastic/web/requestbodies/IndexParams.java new file mode 100644 index 00000000000..d63e62ae067 --- /dev/null +++ b/src/main/java/com/netgrif/application/engine/elastic/web/requestbodies/IndexParams.java @@ -0,0 +1,10 @@ +package com.netgrif.application.engine.elastic.web.requestbodies; + +import lombok.Data; + +@Data +public class IndexParams { + private boolean indexAll = false; + private Integer caseBatchSize = 5000; + private Integer taskBatchSize = 20000; +} diff --git a/src/main/java/com/netgrif/application/engine/workflow/service/TaskService.java b/src/main/java/com/netgrif/application/engine/workflow/service/TaskService.java index 2a045ffad93..bde3ec1fad7 100644 --- a/src/main/java/com/netgrif/application/engine/workflow/service/TaskService.java +++ b/src/main/java/com/netgrif/application/engine/workflow/service/TaskService.java @@ -36,7 +36,6 @@ import com.netgrif.application.engine.workflow.domain.eventoutcomes.dataoutcomes.SetDataEventOutcome; import com.netgrif.application.engine.workflow.domain.eventoutcomes.taskoutcomes.*; import com.netgrif.application.engine.workflow.domain.repositories.TaskRepository; -import com.netgrif.application.engine.workflow.domain.triggers.AutoTrigger; import com.netgrif.application.engine.workflow.domain.triggers.TimeTrigger; import com.netgrif.application.engine.workflow.domain.triggers.Trigger; import com.netgrif.application.engine.workflow.service.interfaces.IDataService; From f8817945a5a2cf04e23bffc5808e3304a1d74a74 Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 10:16:26 +0200 Subject: [PATCH 11/27] - updated index resolution - updated request param --- .../properties/ElasticsearchProperties.java | 9 ++------- .../engine/elastic/service/ElasticIndexService.java | 12 ++++++------ .../engine/elastic/web/ElasticController.java | 2 +- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java b/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java index 6475cfbab06..4e4b2131347 100644 --- a/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java +++ b/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java @@ -1,7 +1,6 @@ package com.netgrif.application.engine.configuration.properties; import lombok.Data; -import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.core.io.Resource; import org.springframework.stereotype.Component; @@ -54,7 +53,7 @@ public class ElasticsearchProperties { private List defaultSearchFilters = new ArrayList<>(); - private IndexProperties indexProperties = new IndexProperties(); + private BatchProperties batch = new BatchProperties(); @PostConstruct public void init() { @@ -77,11 +76,7 @@ public Map getClassSpecificSettings(String className) { } @Data - public static class IndexProperties { - private String taskIndex; - - private String caseIndex; - + public static class BatchProperties { private int caseBatchSize; private int taskBatchSize; diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java index 0b33a299f70..687a65f837f 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java @@ -15,7 +15,7 @@ import com.netgrif.application.engine.workflow.domain.QCase; import com.netgrif.application.engine.workflow.domain.Task; import com.netgrif.application.engine.workflow.domain.repositories.CaseRepository; -import com.querydsl.core.types.Predicate; +import com.querydsl.core.types.dsl.BooleanExpression; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.elasticsearch.ElasticsearchException; @@ -318,13 +318,13 @@ public void bulkIndex(boolean indexAll, LocalDateTime after, Integer caseBatchSi LocalDateTime now = LocalDateTime.now(); if (caseBatchSize == null) { - caseBatchSize = elasticsearchProperties.getIndexProperties().getCaseBatchSize(); + caseBatchSize = elasticsearchProperties.getBatch().getCaseBatchSize(); } if (taskBatchSize == null) { - taskBatchSize = elasticsearchProperties.getIndexProperties().getTaskBatchSize(); + taskBatchSize = elasticsearchProperties.getBatch().getTaskBatchSize(); } - Predicate predicate; + BooleanExpression predicate; if (indexAll || after == null) { predicate = QCase.case$.lastModified.before(now); log.info("Reindexing stale cases: force all"); @@ -417,7 +417,7 @@ private void prepareCaseBulkOperation(ElasticCase doc, List opera try { operations.add(BulkOperation.of(op -> op .update(u -> u - .index(elasticsearchProperties.getIndexProperties().getCaseIndex()) + .index(elasticsearchProperties.getIndex().get("case")) .id(doc.getStringId()) .action(a -> a .doc(doc) @@ -433,7 +433,7 @@ private void prepareTaskBulkOperation(ElasticTask doc, List opera try { operations.add(BulkOperation.of(op -> op .update(u -> u - .index(elasticsearchProperties.getIndexProperties().getTaskIndex()) + .index(elasticsearchProperties.getIndex().get("task")) .id(doc.getStringId()) .action(a -> a .doc(doc) diff --git a/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java b/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java index 96c0318466f..f9049f190a3 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java +++ b/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java @@ -97,7 +97,7 @@ public MessageResource reindex(@RequestBody Map searchBody, Auth @ApiResponse(responseCode = "403", description = "Caller doesn't fulfill the authorisation requirements"), }) @PostMapping(value = "/reindex/bulk", produces = MediaType.APPLICATION_JSON_VALUE) - public MessageResource bulkIndex(@RequestParam(required = false) IndexParams indexParams) { + public MessageResource bulkIndex(IndexParams indexParams) { try { indexService.bulkIndex(indexParams.isIndexAll(), null, indexParams.getCaseBatchSize(), indexParams.getTaskBatchSize()); return MessageResource.successMessage("Success"); From 2e124b191d5308ebe506bc4f17d114c38fac3506 Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 10:25:22 +0200 Subject: [PATCH 12/27] Add Javadoc comments to Elasticsearch indexing methods Added detailed Javadoc comments to improve the clarity of key methods and fields in `ElasticIndexService` and `IndexParams`. This documentation provides insights into parameter usage, default values, and functionalities, facilitating better understanding and maintenance. --- .../elastic/service/ElasticIndexService.java | 53 +++++++++++++++++++ .../web/requestbodies/IndexParams.java | 18 +++++++ 2 files changed, 71 insertions(+) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java index 687a65f837f..73526382024 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java @@ -312,6 +312,15 @@ public void clearScrollHits(List scrollIds) { } } + + /** + * Performs bulk indexing of cases and tasks into Elasticsearch. + * + * @param indexAll if true, indexes all cases and tasks, regardless of modification time + * @param after the time after which cases and tasks should be considered for reindexing + * @param caseBatchSize number of cases to process per batch. If null, defaults from Elasticsearch properties + * @param taskBatchSize number of tasks to process per batch. If null, defaults from Elasticsearch properties + */ @Override public void bulkIndex(boolean indexAll, LocalDateTime after, Integer caseBatchSize, Integer taskBatchSize) { log.info("Reindexing stale cases: started reindexing after {}", after); @@ -339,6 +348,16 @@ public void bulkIndex(boolean indexAll, LocalDateTime after, Integer caseBatchSi log.info("Reindexing stale cases: end"); } + /** + * Reindexes queried cases and tasks into Elasticsearch in batches. + * + * @param count total number of cases to reindex + * @param now current timestamp for filtering cases + * @param after reindexing cases modified after this time + * @param indexAll when true, reindexes all cases + * @param caseBatchSize batch size for cases + * @param taskBatchSize batch size for tasks + */ private void reindexQueried(long count, LocalDateTime now, LocalDateTime after, boolean indexAll, int caseBatchSize, int taskBatchSize) { long numOfPages = ((count / caseBatchSize) + 1); log.info("Reindexing {} pages", numOfPages); @@ -376,6 +395,12 @@ private void reindexQueried(long count, LocalDateTime now, LocalDateTime after, } } + /** + * Reindexes tasks into Elasticsearch in batches corresponding to the provided case IDs. + * + * @param caseIds list of case IDs whose tasks need to be reindexed + * @param taskBatchSize size of the batch for tasks + */ private void bulkIndexTasks(List caseIds, int taskBatchSize) { if (caseIds == null || caseIds.isEmpty()) { return; @@ -404,6 +429,11 @@ private void bulkIndexTasks(List caseIds, int taskBatchSize) { } } + /** + * Prepares the case object by ensuring necessary dependencies and last modified timestamp are set. + * + * @param useCase case object to prepare + */ private void prepareCase(Case useCase) { if (useCase.getPetriNet() == null) { useCase.setPetriNet(petriNetService.get(useCase.getPetriNetObjectId())); @@ -413,6 +443,12 @@ private void prepareCase(Case useCase) { } } + /** + * Prepares a bulk operation for indexing or updating a case in Elasticsearch. + * + * @param doc transformed ElasticCase object + * @param operations collection of BulkOperations to add this operation to + */ private void prepareCaseBulkOperation(ElasticCase doc, List operations) { try { operations.add(BulkOperation.of(op -> op @@ -429,6 +465,12 @@ private void prepareCaseBulkOperation(ElasticCase doc, List opera } } + /** + * Prepares a bulk operation for indexing or updating a task in Elasticsearch. + * + * @param doc transformed ElasticTask object + * @param operations collection of BulkOperations to add this operation to + */ private void prepareTaskBulkOperation(ElasticTask doc, List operations) { try { operations.add(BulkOperation.of(op -> op @@ -446,6 +488,11 @@ private void prepareTaskBulkOperation(ElasticTask doc, List opera } } + /** + * Executes the bulk operations and validates the results, retrying on partial failures. + * + * @param operations list of bulk operations to execute + */ private void executeAndValidate(List operations) { if (operations.isEmpty()) { return; @@ -479,6 +526,12 @@ private void executeAndValidate(List operations) { } } + /** + * Checks the results of a bulk indexing operation for failures. + * + * @param response the BulkResponse from Elasticsearch + * @throws ElasticsearchException if there are failures in the bulk response + */ private void checkForBulkUpdateFailure(BulkResponse response) { Map failedDocuments = new HashMap<>(); response.items().forEach(item -> { diff --git a/src/main/java/com/netgrif/application/engine/elastic/web/requestbodies/IndexParams.java b/src/main/java/com/netgrif/application/engine/elastic/web/requestbodies/IndexParams.java index d63e62ae067..9e3adab68c6 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/web/requestbodies/IndexParams.java +++ b/src/main/java/com/netgrif/application/engine/elastic/web/requestbodies/IndexParams.java @@ -2,9 +2,27 @@ import lombok.Data; + +/** + * Represents the parameters to configure the indexing operation. + * This class allows customization of batch sizes for cases and tasks, + * as well as the option to index all data. + */ @Data public class IndexParams { + + /** + * Determines whether to index all available data. Default is {@code false}. + */ private boolean indexAll = false; + + /** + * Specifies the batch size for cases during indexing. Default is {@code 5000}. + */ private Integer caseBatchSize = 5000; + + /** + * Specifies the batch size for tasks during indexing. Default is {@code 20000}. + */ private Integer taskBatchSize = 20000; } From 692610ab4ac6e17109a2a65968ed6b5eb87d362b Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 11:28:25 +0200 Subject: [PATCH 13/27] Update repositories and QRGen dependency in pom.xml Enable and configure JitPack repository for dependency resolution. Update QRGen dependency to version 3.0.1 with updated groupId and artifactId for compatibility. --- pom.xml | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index bdc8ce62468..574550d8ac9 100644 --- a/pom.xml +++ b/pom.xml @@ -66,7 +66,7 @@ https://sonarcloud.io - + @@ -90,7 +90,17 @@ - + + jitpack.io + https://jitpack.io + + true + + + false + + + @@ -355,9 +365,9 @@ - com.github.kenglxn.qrgen - javase - 2.6.0 + com.github.kenglxn + QRGen + 3.0.1 From 4762d94cf8e4b4b4dc94e1d26f5179a9acac226b Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 11:39:02 +0200 Subject: [PATCH 14/27] Refactor reindex logic to use MongoDB queries. Updated the reindexing logic to replace BooleanExpression predicates with MongoDB query objects, improving clarity and alignment with MongoDB operations. Adjusted method signatures and internal calls to support the new query-based approach. Renamed `bulkIndex` to `bulkReindex` for consistency with functionality. --- .../elastic/service/ElasticIndexService.java | 19 ++++++------------- .../engine/elastic/web/ElasticController.java | 2 +- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java index 73526382024..9eef9ba552d 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java @@ -333,17 +333,17 @@ public void bulkIndex(boolean indexAll, LocalDateTime after, Integer caseBatchSi taskBatchSize = elasticsearchProperties.getBatch().getTaskBatchSize(); } - BooleanExpression predicate; + org.springframework.data.mongodb.core.query.Query query; if (indexAll || after == null) { - predicate = QCase.case$.lastModified.before(now); + query = org.springframework.data.mongodb.core.query.Query.query(Criteria.where("lastModified").lt(now)); log.info("Reindexing stale cases: force all"); } else { - predicate = QCase.case$.lastModified.before(now).and(QCase.case$.lastModified.after(after.minusMinutes(2))); + query = org.springframework.data.mongodb.core.query.Query.query(Criteria.where("lastModified").lt(now).gt(after.minusMinutes(2))); } - long count = caseRepository.count(predicate); + long count = mongoTemplate.count(query, Case.class); if (count > 0) { - reindexQueried(count, now, after, indexAll, caseBatchSize, taskBatchSize); + reindexQueried(query, count, now, after, indexAll, caseBatchSize, taskBatchSize); } log.info("Reindexing stale cases: end"); } @@ -358,18 +358,11 @@ public void bulkIndex(boolean indexAll, LocalDateTime after, Integer caseBatchSi * @param caseBatchSize batch size for cases * @param taskBatchSize batch size for tasks */ - private void reindexQueried(long count, LocalDateTime now, LocalDateTime after, boolean indexAll, int caseBatchSize, int taskBatchSize) { + private void reindexQueried(org.springframework.data.mongodb.core.query.Query query, long count, LocalDateTime now, LocalDateTime after, boolean indexAll, int caseBatchSize, int taskBatchSize) { long numOfPages = ((count / caseBatchSize) + 1); log.info("Reindexing {} pages", numOfPages); - org.springframework.data.mongodb.core.query.Query query; - if (indexAll) { - query = org.springframework.data.mongodb.core.query.Query.query(Criteria.where("lastModified").lt(now)); - } else { - query = org.springframework.data.mongodb.core.query.Query.query(Criteria.where("lastModified").lt(now).gt(after.minusMinutes(2))); - } query.cursorBatchSize(caseBatchSize); - long page = 1, currentBatchSize = 0; List caseOperations = new ArrayList<>(); List caseIds = new ArrayList<>(); diff --git a/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java b/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java index f9049f190a3..c50c37df036 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java +++ b/src/main/java/com/netgrif/application/engine/elastic/web/ElasticController.java @@ -97,7 +97,7 @@ public MessageResource reindex(@RequestBody Map searchBody, Auth @ApiResponse(responseCode = "403", description = "Caller doesn't fulfill the authorisation requirements"), }) @PostMapping(value = "/reindex/bulk", produces = MediaType.APPLICATION_JSON_VALUE) - public MessageResource bulkIndex(IndexParams indexParams) { + public MessageResource bulkReindex(IndexParams indexParams) { try { indexService.bulkIndex(indexParams.isIndexAll(), null, indexParams.getCaseBatchSize(), indexParams.getTaskBatchSize()); return MessageResource.successMessage("Success"); From 12f50fc0ce60791e6058d2983bd07b2f1afac860 Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 11:41:28 +0200 Subject: [PATCH 15/27] Refactor reindexQueried method to simplify parameters Removed unused parameters `now`, `after`, and `indexAll` from the `reindexQueried` method and its invocation. This streamlines the method signature and improves clarity by reducing unnecessary complexity. --- .../engine/elastic/service/ElasticIndexService.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java index 9eef9ba552d..a3d039d0ec0 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java @@ -12,10 +12,8 @@ import com.netgrif.application.engine.elastic.service.interfaces.IElasticIndexService; import com.netgrif.application.engine.petrinet.service.PetriNetService; import com.netgrif.application.engine.workflow.domain.Case; -import com.netgrif.application.engine.workflow.domain.QCase; import com.netgrif.application.engine.workflow.domain.Task; import com.netgrif.application.engine.workflow.domain.repositories.CaseRepository; -import com.querydsl.core.types.dsl.BooleanExpression; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.elasticsearch.ElasticsearchException; @@ -343,7 +341,7 @@ public void bulkIndex(boolean indexAll, LocalDateTime after, Integer caseBatchSi long count = mongoTemplate.count(query, Case.class); if (count > 0) { - reindexQueried(query, count, now, after, indexAll, caseBatchSize, taskBatchSize); + reindexQueried(query, count, caseBatchSize, taskBatchSize); } log.info("Reindexing stale cases: end"); } @@ -352,13 +350,10 @@ public void bulkIndex(boolean indexAll, LocalDateTime after, Integer caseBatchSi * Reindexes queried cases and tasks into Elasticsearch in batches. * * @param count total number of cases to reindex - * @param now current timestamp for filtering cases - * @param after reindexing cases modified after this time - * @param indexAll when true, reindexes all cases * @param caseBatchSize batch size for cases * @param taskBatchSize batch size for tasks */ - private void reindexQueried(org.springframework.data.mongodb.core.query.Query query, long count, LocalDateTime now, LocalDateTime after, boolean indexAll, int caseBatchSize, int taskBatchSize) { + private void reindexQueried(org.springframework.data.mongodb.core.query.Query query, long count, int caseBatchSize, int taskBatchSize) { long numOfPages = ((count / caseBatchSize) + 1); log.info("Reindexing {} pages", numOfPages); From 668dc94f61dfceaff44b5a2f519d975e0b82d7c1 Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 12:13:57 +0200 Subject: [PATCH 16/27] Add @Min validation to batch size properties Introduce the @Min annotation to enforce a minimum value of 1 for `caseBatchSize` and `taskBatchSize` in `BatchProperties`. This ensures valid configurations and prevents potential errors caused by invalid or zero values. --- .../configuration/properties/ElasticsearchProperties.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java b/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java index 4e4b2131347..5084cbf5b64 100644 --- a/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java +++ b/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java @@ -6,6 +6,7 @@ import org.springframework.stereotype.Component; import javax.annotation.PostConstruct; +import javax.validation.constraints.Min; import java.time.Duration; import java.util.ArrayList; import java.util.HashMap; @@ -77,8 +78,10 @@ public Map getClassSpecificSettings(String className) { @Data public static class BatchProperties { + @Min(1) private int caseBatchSize; + @Min(1) private int taskBatchSize; } } From faa66e28300e548b1feab912ecea5b7461b23767 Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 12:21:04 +0200 Subject: [PATCH 17/27] Remove unused QRGen dependency from pom.xml The QRGen library is no longer required and has been removed to improve project maintainability. This cleanup helps reduce redundancies and ensures only necessary dependencies are included. --- pom.xml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pom.xml b/pom.xml index 574550d8ac9..9fb08fb81bd 100644 --- a/pom.xml +++ b/pom.xml @@ -362,7 +362,6 @@ ${querydsl.version} - com.github.kenglxn @@ -418,13 +417,6 @@ ${drools.version} - - - net.glxn.qrgen - core - 2.0 - - org.apache.commons commons-lang3 From 62ee1e5f04544c3d2a71152f4e565dc97ffd415b Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 12:26:56 +0200 Subject: [PATCH 18/27] Use property for Jackson version management Centralized the Jackson version using the `` property to ensure consistency across dependencies. This improves maintainability by avoiding hardcoded version references. --- pom.xml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pom.xml b/pom.xml index 9fb08fb81bd..06357d40181 100644 --- a/pom.xml +++ b/pom.xml @@ -64,6 +64,7 @@ 7.70.0.Final netgrif-oss https://sonarcloud.io + 2.15.0-rc1 @@ -379,6 +380,7 @@ com.fasterxml.jackson.datatype jackson-datatype-jsr310 + ${jackson.version} @@ -507,38 +509,38 @@ com.fasterxml.jackson jackson-base - 2.15.0-rc1 + ${jackson.version} pom com.fasterxml.jackson.core jackson-core - 2.15.0-rc1 + ${jackson.version} com.fasterxml.jackson.core jackson-databind - 2.15.0-rc1 + ${jackson.version} com.fasterxml.jackson.jaxrs jackson-jaxrs-json-provider - 2.15.0-rc1 + ${jackson.version} com.fasterxml.jackson.core jackson-annotations - 2.15.0-rc1 + ${jackson.version} com.fasterxml.jackson.dataformat jackson-dataformat-xml - 2.15.0-rc1 + ${jackson.version} com.fasterxml.jackson.module jackson-module-jsonSchema - 2.15.0-rc1 + ${jackson.version} io.minio From 6788ac26482e63568f6f6efac100c259ff7af60a Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 12:53:17 +0200 Subject: [PATCH 19/27] Add ElasticIndexService dependency to ReindexingTask The ElasticIndexService was added as a new dependency to the ReindexingTask class. This enables better integration and ensures required services are available for indexing operations. --- .../application/engine/elastic/service/ReindexingTask.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java index 5a545381646..2150e9d6df4 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java @@ -58,7 +58,8 @@ public ReindexingTask( IElasticTaskMappingService taskMappingService, IWorkflowService workflowService, @Value("${spring.data.elasticsearch.reindexExecutor.size:20}") int pageSize, - @Value("${spring.data.elasticsearch.reindex-from:#{null}}") Duration from) { + @Value("${spring.data.elasticsearch.reindex-from:#{null}}") Duration from, + ElasticIndexService elasticIndexService) { this.caseRepository = caseRepository; this.taskRepository = taskRepository; this.elasticCaseRepository = elasticCaseRepository; @@ -67,6 +68,7 @@ public ReindexingTask( this.caseMappingService = caseMappingService; this.taskMappingService = taskMappingService; this.workflowService = workflowService; + this.elasticIndexService = elasticIndexService; this.pageSize = pageSize; lastRun = LocalDateTime.now(); From 7edf68f559c764791ca50da3bc2f56a0a022ce03 Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 12:53:43 +0200 Subject: [PATCH 20/27] Remove unused CaseRepository dependency from ReindexingTask. The CaseRepository field and constructor dependency were eliminated as they are not utilized in the class. This simplifies the code, reduces clutter, and improves maintainability. No functional changes have been introduced. --- .../application/engine/elastic/service/ReindexingTask.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java index 2150e9d6df4..ef2b3a74dd5 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java @@ -3,13 +3,11 @@ import com.netgrif.application.engine.elastic.domain.ElasticCaseRepository; import com.netgrif.application.engine.elastic.service.interfaces.*; import com.netgrif.application.engine.workflow.domain.Case; -import com.netgrif.application.engine.workflow.domain.QCase; import com.netgrif.application.engine.workflow.domain.Task; import com.netgrif.application.engine.workflow.domain.repositories.CaseRepository; import com.netgrif.application.engine.workflow.domain.repositories.TaskRepository; import com.netgrif.application.engine.workflow.service.interfaces.IWorkflowService; import com.querydsl.core.types.Predicate; -import com.querydsl.core.types.dsl.BooleanExpression; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -33,7 +31,6 @@ public class ReindexingTask { private static final Logger log = LoggerFactory.getLogger(ReindexingTask.class); private int pageSize; - private CaseRepository caseRepository; private TaskRepository taskRepository; private ElasticCaseRepository elasticCaseRepository; private IElasticCaseService elasticCaseService; @@ -47,7 +44,6 @@ public class ReindexingTask { @Autowired public ReindexingTask( - CaseRepository caseRepository, TaskRepository taskRepository, ElasticCaseRepository elasticCaseRepository, @Qualifier("reindexingTaskElasticCaseService") @@ -60,7 +56,6 @@ public ReindexingTask( @Value("${spring.data.elasticsearch.reindexExecutor.size:20}") int pageSize, @Value("${spring.data.elasticsearch.reindex-from:#{null}}") Duration from, ElasticIndexService elasticIndexService) { - this.caseRepository = caseRepository; this.taskRepository = taskRepository; this.elasticCaseRepository = elasticCaseRepository; this.elasticCaseService = elasticCaseService; From d0de75c0a677c261f699f8d24018fbc96fef398b Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Wed, 13 Aug 2025 15:46:07 +0200 Subject: [PATCH 21/27] Refactor Elasticsearch indexing and update entity handling. Standardize ID usage for ElasticTask and ElasticCase by setting IDs during transformation. Update indexing logic to handle existing cases and tasks via lookup and merge, ensuring up-to-date data in Elasticsearch. Adjust batch size defaults and improve validation for Elasticsearch properties. --- .../properties/ElasticsearchProperties.java | 6 ++-- .../engine/elastic/domain/ElasticCase.java | 1 + .../engine/elastic/domain/ElasticTask.java | 1 + .../elastic/service/ElasticIndexService.java | 29 ++++++++++++++----- .../elastic/service/ReindexingTask.java | 2 +- 5 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java b/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java index 5084cbf5b64..99c2e1c180b 100644 --- a/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java +++ b/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java @@ -6,6 +6,7 @@ import org.springframework.stereotype.Component; import javax.annotation.PostConstruct; +import javax.validation.Valid; import javax.validation.constraints.Min; import java.time.Duration; import java.util.ArrayList; @@ -54,6 +55,7 @@ public class ElasticsearchProperties { private List defaultSearchFilters = new ArrayList<>(); + @Valid private BatchProperties batch = new BatchProperties(); @PostConstruct @@ -79,9 +81,9 @@ public Map getClassSpecificSettings(String className) { @Data public static class BatchProperties { @Min(1) - private int caseBatchSize; + private int caseBatchSize = 5000; @Min(1) - private int taskBatchSize; + private int taskBatchSize = 20000; } } diff --git a/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java b/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java index 89dda11f55f..34c9c6f2124 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java +++ b/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java @@ -108,6 +108,7 @@ public class ElasticCase { * @param useCase the data object that should be turned into elasticsearch data object */ public ElasticCase(Case useCase) { + id = useCase.getStringId(); stringId = useCase.getStringId(); uriNodeId = useCase.getUriNodeId(); mongoId = useCase.getStringId(); //TODO: Duplication diff --git a/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticTask.java b/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticTask.java index 9eac5ee86b5..f3cea15a177 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticTask.java @@ -105,6 +105,7 @@ public class ElasticTask { private Map tags; public ElasticTask(Task task) { + this.id = task.getStringId(); this.stringId = task.getStringId(); this.processId = task.getProcessId(); this.taskId = task.getStringId(); diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java index a3d039d0ec0..81a4a7603a6 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java @@ -8,12 +8,13 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.netgrif.application.engine.configuration.properties.ElasticsearchProperties; import com.netgrif.application.engine.elastic.domain.ElasticCase; +import com.netgrif.application.engine.elastic.domain.ElasticCaseRepository; import com.netgrif.application.engine.elastic.domain.ElasticTask; +import com.netgrif.application.engine.elastic.domain.ElasticTaskRepository; import com.netgrif.application.engine.elastic.service.interfaces.IElasticIndexService; import com.netgrif.application.engine.petrinet.service.PetriNetService; import com.netgrif.application.engine.workflow.domain.Case; import com.netgrif.application.engine.workflow.domain.Task; -import com.netgrif.application.engine.workflow.domain.repositories.CaseRepository; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.elasticsearch.ElasticsearchException; @@ -65,7 +66,9 @@ public class ElasticIndexService implements IElasticIndexService { private final ElasticsearchProperties elasticsearchProperties; - private final CaseRepository caseRepository; + private final ElasticCaseRepository elasticCaseRepository; + + private final ElasticTaskRepository elasticTaskRepository; private final PetriNetService petriNetService; @@ -366,8 +369,14 @@ private void reindexQueried(org.springframework.data.mongodb.core.query.Query qu while (cursor.hasNext()) { Case aCase = cursor.next(); prepareCase(aCase); - ElasticCase doc = caseMappingService.transform(aCase); - prepareCaseBulkOperation(doc, caseOperations); + ElasticCase elasticCase = caseMappingService.transform(aCase); + ElasticCase savedCase = elasticCaseRepository.findByStringId(aCase.getStringId()); + if (savedCase == null) { + savedCase = elasticCase; + } else { + savedCase.update(elasticCase); + } + prepareCaseBulkOperation(savedCase, caseOperations); caseIds.add(aCase.getStringId()); if (++currentBatchSize == caseBatchSize || !cursor.hasNext()) { @@ -404,7 +413,13 @@ private void bulkIndexTasks(List caseIds, int taskBatchSize) { while (cursor.hasNext()) { Task task = cursor.next(); ElasticTask elasticTask = taskMappingService.transform(task); - prepareTaskBulkOperation(elasticTask, taskOperations); + ElasticTask savedTask = elasticTaskRepository.findByStringId(task.getStringId()); + if (savedTask == null) { + savedTask = elasticTask; + } else { + savedTask.update(elasticTask); + } + prepareTaskBulkOperation(savedTask, taskOperations); if (++currentBatchSize == taskBatchSize || !cursor.hasNext()) { log.info("Reindexing task page {} / {}", page, numOfPages); @@ -442,7 +457,7 @@ private void prepareCaseBulkOperation(ElasticCase doc, List opera operations.add(BulkOperation.of(op -> op .update(u -> u .index(elasticsearchProperties.getIndex().get("case")) - .id(doc.getStringId()) + .id(doc.getId()) .action(a -> a .doc(doc) .docAsUpsert(true) @@ -464,7 +479,7 @@ private void prepareTaskBulkOperation(ElasticTask doc, List opera operations.add(BulkOperation.of(op -> op .update(u -> u .index(elasticsearchProperties.getIndex().get("task")) - .id(doc.getStringId()) + .id(doc.getId()) .action(a -> a .doc(doc) .docAsUpsert(true) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java index ef2b3a74dd5..3b9426c74fc 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ReindexingTask.java @@ -55,7 +55,7 @@ public ReindexingTask( IWorkflowService workflowService, @Value("${spring.data.elasticsearch.reindexExecutor.size:20}") int pageSize, @Value("${spring.data.elasticsearch.reindex-from:#{null}}") Duration from, - ElasticIndexService elasticIndexService) { + IElasticIndexService elasticIndexService) { this.taskRepository = taskRepository; this.elasticCaseRepository = elasticCaseRepository; this.elasticCaseService = elasticCaseService; From 523f1b0586f0e776e7a0f6f6f3af45c417567346 Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Thu, 14 Aug 2025 11:10:08 +0200 Subject: [PATCH 22/27] Handle duplicate records during ElasticSearch reindexing Added logic to handle and reindex duplicate cases and tasks by catching InvalidDataAccessApiUsageException and deleting duplicates. Removed redundant assignment of `id` fields in ElasticTask and ElasticCase constructors to avoid potential inconsistencies. --- .../engine/elastic/domain/ElasticCase.java | 1 - .../engine/elastic/domain/ElasticTask.java | 1 - .../elastic/service/ElasticIndexService.java | 33 +++++++++++++------ 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java b/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java index 34c9c6f2124..89dda11f55f 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java +++ b/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticCase.java @@ -108,7 +108,6 @@ public class ElasticCase { * @param useCase the data object that should be turned into elasticsearch data object */ public ElasticCase(Case useCase) { - id = useCase.getStringId(); stringId = useCase.getStringId(); uriNodeId = useCase.getUriNodeId(); mongoId = useCase.getStringId(); //TODO: Duplication diff --git a/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticTask.java b/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticTask.java index f3cea15a177..9eac5ee86b5 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticTask.java +++ b/src/main/java/com/netgrif/application/engine/elastic/domain/ElasticTask.java @@ -105,7 +105,6 @@ public class ElasticTask { private Map tags; public ElasticTask(Task task) { - this.id = task.getStringId(); this.stringId = task.getStringId(); this.processId = task.getProcessId(); this.taskId = task.getStringId(); diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java index 81a4a7603a6..f208abe4fdd 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java @@ -28,6 +28,7 @@ import org.elasticsearch.xcontent.XContentType; import org.springframework.context.ApplicationContext; import org.springframework.core.io.Resource; +import org.springframework.dao.InvalidDataAccessApiUsageException; import org.springframework.data.annotation.Id; import org.springframework.data.elasticsearch.annotations.Setting; import org.springframework.data.elasticsearch.core.ElasticsearchOperations; @@ -370,13 +371,19 @@ private void reindexQueried(org.springframework.data.mongodb.core.query.Query qu Case aCase = cursor.next(); prepareCase(aCase); ElasticCase elasticCase = caseMappingService.transform(aCase); - ElasticCase savedCase = elasticCaseRepository.findByStringId(aCase.getStringId()); - if (savedCase == null) { - savedCase = elasticCase; + ElasticCase existingCase = null; + try { + existingCase = elasticCaseRepository.findByStringId(aCase.getStringId()); + } catch (InvalidDataAccessApiUsageException ignored) { + log.debug("[{}]: Case \"{}\" has duplicates, will reindex.", aCase.getStringId(), aCase.getTitle()); + elasticCaseRepository.deleteAllByStringId(aCase.getStringId()); + } + if (existingCase == null) { + existingCase = elasticCase; } else { - savedCase.update(elasticCase); + existingCase.update(elasticCase); } - prepareCaseBulkOperation(savedCase, caseOperations); + prepareCaseBulkOperation(existingCase, caseOperations); caseIds.add(aCase.getStringId()); if (++currentBatchSize == caseBatchSize || !cursor.hasNext()) { @@ -413,13 +420,19 @@ private void bulkIndexTasks(List caseIds, int taskBatchSize) { while (cursor.hasNext()) { Task task = cursor.next(); ElasticTask elasticTask = taskMappingService.transform(task); - ElasticTask savedTask = elasticTaskRepository.findByStringId(task.getStringId()); - if (savedTask == null) { - savedTask = elasticTask; + ElasticTask existingTask = null; + try { + existingTask = elasticTaskRepository.findByStringId(task.getStringId()); + } catch (InvalidDataAccessApiUsageException ignored) { + log.debug("[{}]: Task \"{}\" has duplicates, will reindex.", task.getStringId(), task.getTitle()); + elasticCaseRepository.deleteAllByStringId(task.getStringId()); + } + if (existingTask == null) { + existingTask = elasticTask; } else { - savedTask.update(elasticTask); + existingTask.update(elasticTask); } - prepareTaskBulkOperation(savedTask, taskOperations); + prepareTaskBulkOperation(existingTask, taskOperations); if (++currentBatchSize == taskBatchSize || !cursor.hasNext()) { log.info("Reindexing task page {} / {}", page, numOfPages); From 1788209bfb7fefcbacbc119d29b1282124980977 Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Thu, 14 Aug 2025 12:00:29 +0200 Subject: [PATCH 23/27] Fix incorrect repository usage in task reindexing logic Replaced elasticCaseRepository with elasticTaskRepository when deleting tasks by stringId. This ensures the correct repository operation is invoked and resolves potential data consistency issues during reindexing. --- .../application/engine/elastic/service/ElasticIndexService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java index f208abe4fdd..41142a1c6e9 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java @@ -425,7 +425,7 @@ private void bulkIndexTasks(List caseIds, int taskBatchSize) { existingTask = elasticTaskRepository.findByStringId(task.getStringId()); } catch (InvalidDataAccessApiUsageException ignored) { log.debug("[{}]: Task \"{}\" has duplicates, will reindex.", task.getStringId(), task.getTitle()); - elasticCaseRepository.deleteAllByStringId(task.getStringId()); + elasticTaskRepository.deleteAllByStringId(task.getStringId()); } if (existingTask == null) { existingTask = elasticTask; From 37f2023206c39279cf3ac3bd5192e7e107ed8487 Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Thu, 14 Aug 2025 17:34:13 +0200 Subject: [PATCH 24/27] Refactor Elasticsearch client integration and authentication. Removed ElasticSearchJsonpMapper and transitioned to using RestHighLevelClient with configurable credentials support. Refactored bulk operation handling and mapped serializer setup to ObjectMapper bean. Adjusted LocalDateTime handling in date field transformations. --- .../ElasticsearchConfiguration.java | 65 +++++++----- .../properties/ElasticsearchProperties.java | 4 + .../service/ElasticCaseMappingService.java | 4 +- .../elastic/service/ElasticIndexService.java | 98 +++++++++++-------- .../service/ElasticSearchJsonpMapper.java | 28 ------ 5 files changed, 108 insertions(+), 91 deletions(-) delete mode 100644 src/main/java/com/netgrif/application/engine/elastic/service/ElasticSearchJsonpMapper.java diff --git a/src/main/java/com/netgrif/application/engine/configuration/ElasticsearchConfiguration.java b/src/main/java/com/netgrif/application/engine/configuration/ElasticsearchConfiguration.java index 9444649e314..9bbb202adf2 100644 --- a/src/main/java/com/netgrif/application/engine/configuration/ElasticsearchConfiguration.java +++ b/src/main/java/com/netgrif/application/engine/configuration/ElasticsearchConfiguration.java @@ -1,14 +1,21 @@ package com.netgrif.application.engine.configuration; -import co.elastic.clients.elasticsearch.ElasticsearchClient; -import co.elastic.clients.transport.ElasticsearchTransport; -import co.elastic.clients.transport.rest_client.RestClientTransport; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import com.netgrif.application.engine.configuration.properties.ElasticsearchProperties; import com.netgrif.application.engine.configuration.properties.UriProperties; -import com.netgrif.application.engine.elastic.service.ElasticSearchJsonpMapper; +import com.netgrif.application.engine.elastic.serializer.LocalDateTimeJsonDeserializer; +import com.netgrif.application.engine.elastic.serializer.LocalDateTimeJsonSerializer; import com.netgrif.application.engine.workflow.service.CaseEventHandler; +import lombok.RequiredArgsConstructor; import org.apache.http.HttpHost; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.impl.client.BasicCredentialsProvider; import org.elasticsearch.client.RestClient; +import org.elasticsearch.client.RestClientBuilder; import org.elasticsearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; @@ -16,15 +23,12 @@ import org.springframework.data.elasticsearch.core.ElasticsearchOperations; import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate; +import java.time.LocalDateTime; + @Configuration +@RequiredArgsConstructor public class ElasticsearchConfiguration { - @Value("${spring.data.elasticsearch.url}") - private String url; - - @Value("${spring.data.elasticsearch.searchport}") - private int port; - @Value("${spring.data.elasticsearch.index.petriNet}") private String petriNetIndex; @@ -37,11 +41,9 @@ public class ElasticsearchConfiguration { @Value("${spring.data.elasticsearch.reindex}") private String cron; - private final UriProperties uriProperties; + private final ElasticsearchProperties elasticsearchProperties; - public ElasticsearchConfiguration(UriProperties uriProperties) { - this.uriProperties = uriProperties; - } + private final UriProperties uriProperties; @Bean public String springElasticsearchReindex() { @@ -70,9 +72,18 @@ public String elasticUriIndex() { @Bean public RestHighLevelClient client() { - - return new RestHighLevelClient( - RestClient.builder(new HttpHost(url, port, "http"))); + RestClientBuilder builder = RestClient.builder(new HttpHost(elasticsearchProperties.getUrl(), elasticsearchProperties.getSearchPort())); + if (hasCredentials()) { + CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); + credentialsProvider.setCredentials(AuthScope.ANY, + new UsernamePasswordCredentials( + elasticsearchProperties.getUsername(), + elasticsearchProperties.getPassword() + ) + ); + builder.setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)); + } + return new RestHighLevelClient(builder); } @Bean @@ -85,11 +96,21 @@ public CaseEventHandler caseEventHandler() { return new CaseEventHandler(); } - @Bean - public ElasticsearchClient elasticsearchClient() { - RestClient restClient = RestClient.builder(new HttpHost(url, port)).build(); - ElasticsearchTransport transport = new RestClientTransport(restClient, new ElasticSearchJsonpMapper()); - return new ElasticsearchClient(transport); + @Bean(name = "elasticCaseObjectMapper") + public ObjectMapper configureMapper() { + ObjectMapper mapper = new ObjectMapper(); + + JavaTimeModule javaTimeModule = new JavaTimeModule(); + javaTimeModule.addSerializer(LocalDateTime.class, new LocalDateTimeJsonSerializer()); + javaTimeModule.addDeserializer(LocalDateTime.class, new LocalDateTimeJsonDeserializer()); + + mapper.registerModule(javaTimeModule); + mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); + return mapper; + } + private boolean hasCredentials() { + return elasticsearchProperties.getUsername() != null && !elasticsearchProperties.getUsername().isBlank() && + elasticsearchProperties.getPassword() != null && !elasticsearchProperties.getPassword().isBlank(); } } diff --git a/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java b/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java index 99c2e1c180b..f3b1269b094 100644 --- a/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java +++ b/src/main/java/com/netgrif/application/engine/configuration/properties/ElasticsearchProperties.java @@ -35,6 +35,10 @@ public class ElasticsearchProperties { private String url; + private String username; + + private String password; + private Map index; private boolean analyzerEnabled = false; diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticCaseMappingService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticCaseMappingService.java index ee50866e3ae..11286761dae 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticCaseMappingService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticCaseMappingService.java @@ -230,11 +230,11 @@ private StringBuilder buildFullName(String name, String surname) { protected Optional transformDateField(com.netgrif.application.engine.workflow.domain.DataField dateField, com.netgrif.application.engine.petrinet.domain.dataset.DateField netField) { if (dateField.getValue() instanceof LocalDate) { LocalDate date = (LocalDate) dateField.getValue(); - return formatDateField(LocalDateTime.of(date, LocalTime.NOON)); + return formatDateField(LocalDateTime.of(date, LocalTime.MIDNIGHT)); } else if (dateField.getValue() instanceof Date) { // log.warn(String.format("DateFields should have LocalDate values! DateField (%s) with Date value found! Value will be converted for indexation.", netField.getImportId())); LocalDateTime transformed = this.transformDateValueField(dateField); - return formatDateField(LocalDateTime.of(transformed.toLocalDate(), LocalTime.NOON)); + return formatDateField(LocalDateTime.of(transformed.toLocalDate(), LocalTime.MIDNIGHT)); } else { // TODO throw error? log.error(String.format("Unsupported DateField value type (%s)! Skipping indexation...", dateField.getValue().getClass().getCanonicalName())); diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java index 41142a1c6e9..d4152cbd8be 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java @@ -1,10 +1,6 @@ package com.netgrif.application.engine.elastic.service; -import co.elastic.clients.elasticsearch.ElasticsearchClient; -import co.elastic.clients.elasticsearch.core.BulkRequest; -import co.elastic.clients.elasticsearch.core.BulkResponse; -import co.elastic.clients.elasticsearch.core.bulk.BulkOperation; import com.fasterxml.jackson.databind.ObjectMapper; import com.netgrif.application.engine.configuration.properties.ElasticsearchProperties; import com.netgrif.application.engine.elastic.domain.ElasticCase; @@ -15,17 +11,21 @@ import com.netgrif.application.engine.petrinet.service.PetriNetService; import com.netgrif.application.engine.workflow.domain.Case; import com.netgrif.application.engine.workflow.domain.Task; -import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.admin.indices.open.OpenIndexRequest; import org.elasticsearch.action.admin.indices.open.OpenIndexResponse; +import org.elasticsearch.action.bulk.BulkRequest; +import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.action.update.UpdateRequest; import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.client.indices.CloseIndexRequest; import org.elasticsearch.client.indices.CloseIndexResponse; import org.elasticsearch.client.indices.PutIndexTemplateRequest; import org.elasticsearch.xcontent.XContentType; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.ApplicationContext; import org.springframework.core.io.Resource; import org.springframework.dao.InvalidDataAccessApiUsageException; @@ -52,7 +52,6 @@ @Slf4j @Service -@RequiredArgsConstructor public class ElasticIndexService implements IElasticIndexService { private static final String PLACEHOLDERS = "petriNetIndex, caseIndex, taskIndex"; @@ -61,7 +60,7 @@ public class ElasticIndexService implements IElasticIndexService { private final ElasticsearchRestTemplate elasticsearchTemplate; - private final ElasticsearchClient elasticsearchClient; + private final RestHighLevelClient elasticsearchClient; private final ElasticsearchOperations operations; @@ -79,6 +78,34 @@ public class ElasticIndexService implements IElasticIndexService { private final ElasticTaskMappingService taskMappingService; + private final ObjectMapper objectMapper; + + public ElasticIndexService(ApplicationContext context, + ElasticsearchRestTemplate elasticsearchTemplate, + RestHighLevelClient elasticsearchClient, + ElasticsearchOperations operations, + ElasticsearchProperties elasticsearchProperties, + ElasticCaseRepository elasticCaseRepository, + ElasticTaskRepository elasticTaskRepository, + PetriNetService petriNetService, + MongoTemplate mongoTemplate, + ElasticCaseMappingService caseMappingService, + ElasticTaskMappingService taskMappingService, + @Qualifier("elasticCaseObjectMapper") + ObjectMapper objectMapper) { + this.context = context; + this.elasticsearchTemplate = elasticsearchTemplate; + this.elasticsearchClient = elasticsearchClient; + this.operations = operations; + this.elasticsearchProperties = elasticsearchProperties; + this.elasticCaseRepository = elasticCaseRepository; + this.elasticTaskRepository = elasticTaskRepository; + this.petriNetService = petriNetService; + this.mongoTemplate = mongoTemplate; + this.caseMappingService = caseMappingService; + this.taskMappingService = taskMappingService; + this.objectMapper = objectMapper; + } @Override public boolean indexExists(String indexName) { @@ -363,7 +390,7 @@ private void reindexQueried(org.springframework.data.mongodb.core.query.Query qu query.cursorBatchSize(caseBatchSize); long page = 1, currentBatchSize = 0; - List caseOperations = new ArrayList<>(); + List caseOperations = new ArrayList<>(); List caseIds = new ArrayList<>(); try (CloseableIterator cursor = mongoTemplate.stream(query, Case.class)) { @@ -414,7 +441,7 @@ private void bulkIndexTasks(List caseIds, int taskBatchSize) { long numOfPages = ((totalSize / taskBatchSize) + 1); long page = 1, currentBatchSize = 0; - List taskOperations = new ArrayList<>(); + List taskOperations = new ArrayList<>(); try (CloseableIterator cursor = mongoTemplate.stream(query, Task.class)) { while (cursor.hasNext()) { @@ -465,17 +492,14 @@ private void prepareCase(Case useCase) { * @param doc transformed ElasticCase object * @param operations collection of BulkOperations to add this operation to */ - private void prepareCaseBulkOperation(ElasticCase doc, List operations) { + private void prepareCaseBulkOperation(ElasticCase doc, List operations) { try { - operations.add(BulkOperation.of(op -> op - .update(u -> u - .index(elasticsearchProperties.getIndex().get("case")) - .id(doc.getId()) - .action(a -> a - .doc(doc) - .docAsUpsert(true) - ) - ))); + UpdateRequest updateRequest = new UpdateRequest() + .id(doc.getId()) + .doc(objectMapper.writeValueAsString(doc), XContentType.JSON) + .upsert(objectMapper.writeValueAsString(doc), XContentType.JSON) + .index(elasticsearchProperties.getIndex().get("case")); + operations.add(updateRequest); } catch (Exception e) { log.error("Failed to prepare bulk operation for case [{}]: {}", doc.getStringId(), e.getMessage()); } @@ -487,18 +511,14 @@ private void prepareCaseBulkOperation(ElasticCase doc, List opera * @param doc transformed ElasticTask object * @param operations collection of BulkOperations to add this operation to */ - private void prepareTaskBulkOperation(ElasticTask doc, List operations) { + private void prepareTaskBulkOperation(ElasticTask doc, List operations) { try { - operations.add(BulkOperation.of(op -> op - .update(u -> u - .index(elasticsearchProperties.getIndex().get("task")) - .id(doc.getId()) - .action(a -> a - .doc(doc) - .docAsUpsert(true) - ) - )) - ); + UpdateRequest updateRequest = new UpdateRequest() + .id(doc.getId()) + .doc(objectMapper.writeValueAsString(doc), XContentType.JSON) + .upsert(objectMapper.writeValueAsString(doc), XContentType.JSON) + .index(elasticsearchProperties.getIndex().get("task")); + operations.add(updateRequest); } catch (Exception e) { log.error("Failed to prepare bulk operation for task [{}]: {}", doc.getStringId(), e.getMessage()); } @@ -509,16 +529,16 @@ private void prepareTaskBulkOperation(ElasticTask doc, List opera * * @param operations list of bulk operations to execute */ - private void executeAndValidate(List operations) { + private void executeAndValidate(List operations) { if (operations.isEmpty()) { return; } - BulkRequest.Builder builder = new BulkRequest.Builder(); - builder.operations(operations); + BulkRequest request = new BulkRequest(); + operations.forEach(request::add); try { - BulkResponse response = elasticsearchClient.bulk(builder.build()); + BulkResponse response = elasticsearchClient.bulk(request, RequestOptions.DEFAULT); checkForBulkUpdateFailure(response); log.info("Batch indexed successfully with {} ops", operations.size()); } catch (ElasticsearchException e) { @@ -532,8 +552,8 @@ private void executeAndValidate(List operations) { log.warn("Dividing the requirement."); int mid = operations.size() / 2; - List left = operations.subList(0, mid); - List right = operations.subList(mid, operations.size()); + List left = operations.subList(0, mid); + List right = operations.subList(mid, operations.size()); executeAndValidate(new ArrayList<>(left)); executeAndValidate(new ArrayList<>(right)); @@ -550,9 +570,9 @@ private void executeAndValidate(List operations) { */ private void checkForBulkUpdateFailure(BulkResponse response) { Map failedDocuments = new HashMap<>(); - response.items().forEach(item -> { - if (item.error() != null) { - failedDocuments.put(item.id(), item.error().reason()); + Arrays.stream(response.getItems()).forEach(item -> { + if (item.getFailure() != null) { + failedDocuments.put(item.getId(), item.getFailure().getMessage()); } }); diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticSearchJsonpMapper.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticSearchJsonpMapper.java deleted file mode 100644 index aa88227a758..00000000000 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticSearchJsonpMapper.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.netgrif.application.engine.elastic.service; - -import co.elastic.clients.json.jackson.JacksonJsonpMapper; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.SerializationFeature; -import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; -import com.netgrif.application.engine.elastic.serializer.LocalDateTimeJsonDeserializer; -import com.netgrif.application.engine.elastic.serializer.LocalDateTimeJsonSerializer; - -import java.time.LocalDateTime; - -public class ElasticSearchJsonpMapper extends JacksonJsonpMapper { - public ElasticSearchJsonpMapper() { - super(configureMapper()); - } - - private static ObjectMapper configureMapper() { - ObjectMapper mapper = new ObjectMapper(); - JavaTimeModule javaTimeModule = new JavaTimeModule(); - - mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); - javaTimeModule.addSerializer(LocalDateTime.class, new LocalDateTimeJsonSerializer()); - javaTimeModule.addDeserializer(LocalDateTime.class, new LocalDateTimeJsonDeserializer()); - mapper.registerModule(javaTimeModule); - - return mapper; - } -} From d1600974369c2809dfd15a7a18b960ef5e122be1 Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Thu, 14 Aug 2025 17:45:34 +0200 Subject: [PATCH 25/27] Fix null ID handling in Elastic bulk operations Updated `prepareCaseBulkOperation` and `prepareTaskBulkOperation` methods to handle cases where `doc.getId()` is null by using `doc.getStringId()` as a fallback. This ensures robust ID assignment during bulk operations and prevents potential null pointer issues. --- .../engine/elastic/service/ElasticIndexService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java index d4152cbd8be..278b330df68 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java @@ -495,7 +495,7 @@ private void prepareCase(Case useCase) { private void prepareCaseBulkOperation(ElasticCase doc, List operations) { try { UpdateRequest updateRequest = new UpdateRequest() - .id(doc.getId()) + .id(doc.getId() == null ? doc.getStringId() : doc.getId()) .doc(objectMapper.writeValueAsString(doc), XContentType.JSON) .upsert(objectMapper.writeValueAsString(doc), XContentType.JSON) .index(elasticsearchProperties.getIndex().get("case")); @@ -514,7 +514,7 @@ private void prepareCaseBulkOperation(ElasticCase doc, List opera private void prepareTaskBulkOperation(ElasticTask doc, List operations) { try { UpdateRequest updateRequest = new UpdateRequest() - .id(doc.getId()) + .id(doc.getId() == null ? doc.getStringId() : doc.getId()) .doc(objectMapper.writeValueAsString(doc), XContentType.JSON) .upsert(objectMapper.writeValueAsString(doc), XContentType.JSON) .index(elasticsearchProperties.getIndex().get("task")); From b88fc8a2dcef01ce5f80cdfc81350f24ac13b36a Mon Sep 17 00:00:00 2001 From: renczesstefan Date: Thu, 14 Aug 2025 17:46:45 +0200 Subject: [PATCH 26/27] Refactor bulk operation JSON handling in ElasticIndexService Extract the JSON serialization of documents into a variable to improve code readability and reduce redundancy. This change applies to both case and task bulk operations. --- .../engine/elastic/service/ElasticIndexService.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java index 278b330df68..ec41ec1f7f0 100644 --- a/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java +++ b/src/main/java/com/netgrif/application/engine/elastic/service/ElasticIndexService.java @@ -494,10 +494,11 @@ private void prepareCase(Case useCase) { */ private void prepareCaseBulkOperation(ElasticCase doc, List operations) { try { + String json = objectMapper.writeValueAsString(doc); UpdateRequest updateRequest = new UpdateRequest() .id(doc.getId() == null ? doc.getStringId() : doc.getId()) - .doc(objectMapper.writeValueAsString(doc), XContentType.JSON) - .upsert(objectMapper.writeValueAsString(doc), XContentType.JSON) + .doc(json, XContentType.JSON) + .upsert(json, XContentType.JSON) .index(elasticsearchProperties.getIndex().get("case")); operations.add(updateRequest); } catch (Exception e) { @@ -513,10 +514,11 @@ private void prepareCaseBulkOperation(ElasticCase doc, List opera */ private void prepareTaskBulkOperation(ElasticTask doc, List operations) { try { + String json = objectMapper.writeValueAsString(doc); UpdateRequest updateRequest = new UpdateRequest() .id(doc.getId() == null ? doc.getStringId() : doc.getId()) - .doc(objectMapper.writeValueAsString(doc), XContentType.JSON) - .upsert(objectMapper.writeValueAsString(doc), XContentType.JSON) + .doc(json, XContentType.JSON) + .upsert(json, XContentType.JSON) .index(elasticsearchProperties.getIndex().get("task")); operations.add(updateRequest); } catch (Exception e) { From 6f102aab75c546565e57db5dbbfa4a8c0fbff019 Mon Sep 17 00:00:00 2001 From: Machac Date: Thu, 14 Aug 2025 18:25:34 +0200 Subject: [PATCH 27/27] [NAE-2136] Speed up Elasticsearch reindex Update `DataSearchRequestTest` to use `MIDNIGHT` instead of `NOON` for `LocalTime` in timestamp creation --- .../application/engine/elastic/DataSearchRequestTest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/groovy/com/netgrif/application/engine/elastic/DataSearchRequestTest.groovy b/src/test/groovy/com/netgrif/application/engine/elastic/DataSearchRequestTest.groovy index 08017730977..56cd0c04d9b 100644 --- a/src/test/groovy/com/netgrif/application/engine/elastic/DataSearchRequestTest.groovy +++ b/src/test/groovy/com/netgrif/application/engine/elastic/DataSearchRequestTest.groovy @@ -158,7 +158,7 @@ class DataSearchRequestTest { new AbstractMap.SimpleEntry("user.emailValue.keyword" as String, "${testUser1.email}" as String), new AbstractMap.SimpleEntry("user.fullNameValue.keyword" as String, "${testUser1.fullName}" as String), new AbstractMap.SimpleEntry("user.userIdValue" as String, "${testUser1.getId()}" as String), - new AbstractMap.SimpleEntry("date.timestampValue" as String, "${Timestamp.valueOf(LocalDateTime.of(date, LocalTime.NOON)).getTime()}" as String), + new AbstractMap.SimpleEntry("date.timestampValue" as String, "${Timestamp.valueOf(LocalDateTime.of(date, LocalTime.MIDNIGHT)).getTime()}" as String), new AbstractMap.SimpleEntry("datetime.timestampValue" as String, "${Timestamp.valueOf(date.atTime(13, 37)).getTime()}" as String), new AbstractMap.SimpleEntry("enumeration" as String, "Alice" as String), new AbstractMap.SimpleEntry("enumeration" as String, "Alica" as String),