From 18c46e50617d15a607a1d14310f8bbcfceb61d64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 24 Oct 2024 19:13:57 +0200 Subject: [PATCH 01/51] datastore: add facet support in mongodb datastore, #TASK-7151, #TASK-7134 --- .../datastore/mongodb/MongoDBCollection.java | 34 ++--- .../datastore/mongodb/MongoDBQueryUtils.java | 127 +++++++++++++++++- .../mongodb/MongoDBCollectionTest.java | 76 ++++++++++- 3 files changed, 204 insertions(+), 33 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java index ed76bdcb2..c4a42f362 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java @@ -331,31 +331,25 @@ public DataResult aggregate(List operations, ComplexTypeC QueryOptions options) { long start = startQuery(); - DataResult queryResult; - MongoDBIterator iterator = mongoDBNativeQuery.aggregate(operations, converter, options); -// MongoCursor iterator = output.iterator(); List list = new LinkedList<>(); - if (queryResultWriter != null) { - try { - queryResultWriter.open(); + if (operations != null && operations.size() > 0) { + MongoDBIterator iterator = mongoDBNativeQuery.aggregate(operations, converter, options); + if (queryResultWriter != null) { + try { + queryResultWriter.open(); + while (iterator.hasNext()) { + queryResultWriter.write(iterator.next()); + } + queryResultWriter.close(); + } catch (IOException e) { + throw new RuntimeException(e.getMessage(), e); + } + } else { while (iterator.hasNext()) { - queryResultWriter.write(iterator.next()); + list.add((T) iterator.next()); } - queryResultWriter.close(); - } catch (IOException e) { - throw new RuntimeException(e.getMessage(), e); - } - } else { -// if (converter != null) { -// while (iterator.hasNext()) { -// list.add(converter.convertToDataModelType(iterator.next())); -// } -// } else { - while (iterator.hasNext()) { - list.add((T) iterator.next()); } -// } } queryResult = endQuery(list, start); return queryResult; diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index e33ced350..48b927442 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -18,6 +18,8 @@ import com.mongodb.client.model.*; import org.apache.commons.lang3.StringUtils; +import org.bson.BsonDocument; +import org.bson.BsonInt32; import org.bson.Document; import org.bson.conversions.Bson; import org.opencb.commons.datastore.core.Query; @@ -27,13 +29,12 @@ import java.time.LocalDateTime; import java.time.ZoneId; import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; + /** * Created by imedina on 17/01/16. */ @@ -41,12 +42,16 @@ public class MongoDBQueryUtils { @Deprecated private static final String REGEX_SEPARATOR = "(\\w+|\\^)"; -// private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~|==?|=?\\^|=?\\$)([^=<>~!]+.*)$"); + // private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~|==?|=?\\^|=?\\$)([^=<>~!]+.*)$"); private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~/?|==?)([^=<>~!]+.*)$"); private static final Pattern OPERATION_NUMERIC_PATTERN = Pattern.compile("^(<=?|>=?|!=|!?=?~|==?)([^=<>~!]+.*)$"); private static final Pattern OPERATION_BOOLEAN_PATTERN = Pattern.compile("^(!=|!?=?~|==?)([^=<>~!]+.*)$"); private static final Pattern OPERATION_DATE_PATTERN = Pattern.compile("^(<=?|>=?|!=|!?=?~|=?=?)([0-9]+)(-?)([0-9]*)"); + private static final Pattern FUNC_ACCUMULATOR_PATTERN = Pattern.compile("([a-zA-Z]+)\\(([.a-zA-Z0-9]+)\\)"); + private static final Pattern RANGE_PATTERN = Pattern.compile("([.a-zA-Z0-9]+)\\[([.0-9]+):([.0-9]+)\\]:([.0-9]+)"); + public static final String TO_REPLACE_DOTS = "."; + // TODO: Added on 10/08/2021 to deprecate STARTS_WITH and ENDS_WITH regex. They need to be done within '/'. @Deprecated private static final Pattern DEPRECATED_PATTERN = Pattern.compile("^(=?\\^|=?\\$)([^=/<>~!]+[.]*)$"); @@ -80,6 +85,15 @@ public enum ComparisonOperator { BETWEEN } + public enum Accumulator { + count, + avg, + min, + max, + stdDevPop, + stdDevSamp, + bucket + } public static Bson createFilter(String mongoDbField, String queryParam, Query query) { return createFilter(mongoDbField, queryParam, query, QueryParam.Type.TEXT, ComparisonOperator.EQUALS, LogicalOperator.OR); @@ -497,7 +511,7 @@ public static Bson createFilter(String mongoDbField, List queryValues, Co * @return the Bson query. */ protected static Bson createDateFilter(String mongoDbField, List dateValues, ComparisonOperator comparator, - QueryParam.Type type) { + QueryParam.Type type) { Bson filter = null; Object date = null; @@ -641,6 +655,107 @@ public static List createGroupBy(Bson query, List groupByField, St } } + public static List createFacet(Bson query, String facetField) { + if (facetField == null || StringUtils.isEmpty(facetField.trim())) { + return new ArrayList<>(); + } + String cleanFacetField = facetField.replace(" ", ""); + ArrayList facetFields = new ArrayList<>(Arrays.asList(cleanFacetField.split(";"))); + return createFacet(query, facetFields); + } + + private static List createFacet(Bson query, List facetFields) { + Set includeFields = new HashSet<>(); + + List boundaries = new ArrayList<>(); + List facets = new ArrayList<>(); + for (String facetField : facetFields) { + Facet facet = null; + if (facetField.contains(",")) { + Document id = new Document(); + for (String field : facetField.split(",")) { + String cleanField = field.replace(".", TO_REPLACE_DOTS); + id.append(cleanField, "$" + field); + includeFields.add(field); + } + facet = new Facet(facetField.replace(".", TO_REPLACE_DOTS).replace(",", "_"), Arrays.asList(Aggregates.group(id, + Accumulators.sum("count", 1)))); + } else { + Accumulator accumulator; + String field; + Matcher matcher = FUNC_ACCUMULATOR_PATTERN.matcher(facetField); + if (matcher.matches()) { + accumulator = Accumulator.valueOf(matcher.group(1)); + field = matcher.group(2); + } else { + matcher = RANGE_PATTERN.matcher(facetField); + if (matcher.matches()) { + accumulator = bucket; + field = matcher.group(1); + double start = Double.parseDouble(matcher.group(2)); + double end = Double.parseDouble(matcher.group(3)); + double step = Double.parseDouble(matcher.group(4)); + for (double i = start; i <= end; i += step) { + boundaries.add(i); + } + } else { + accumulator = count; + field = facetField; + } + } + includeFields.add(field); + + String cleanField = field.replace(".", TO_REPLACE_DOTS); + String id = "$" + field; + switch (accumulator) { + case count: { + facet = new Facet(cleanField + "Counts", Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); + break; + } + case avg: { + facet = new Facet(cleanField + "Avg", Arrays.asList(Aggregates.group(field, Accumulators.avg(avg.name(), id)))); + break; + } + case min: { + facet = new Facet(cleanField + "Min", Arrays.asList(Aggregates.group(field, Accumulators.min(min.name(), id)))); + break; + } + case max: { + facet = new Facet(cleanField + "Max", Arrays.asList(Aggregates.group(field, Accumulators.max(max.name(), id)))); + break; + } + case stdDevPop: { + facet = new Facet(cleanField + "StdDevPop", Arrays.asList(Aggregates.group(field, + Accumulators.stdDevPop(stdDevPop.name(), id)))); + break; + } + case stdDevSamp: { + facet = new Facet(cleanField + "stdDevSamp", Arrays.asList(Aggregates.group(field, + Accumulators.stdDevSamp("stdDevSamp", id)))); + break; + } + case bucket: { + facet = new Facet(cleanField + "Ranges", Aggregates.bucket(id, boundaries, + new BucketOptions() + .defaultBucket("Other") + .output(new BsonField("count", new BsonDocument("$sum", new BsonInt32(1)))))); + break; + } + default: { + break; + } + } + } + if (facet != null) { + facets.add(facet); + } + } + + Bson match = Aggregates.match(query); + Bson project = Aggregates.project(Projections.include(new ArrayList<>(includeFields))); + return Arrays.asList(match, project, Aggregates.facet(facets)); + } + public static void parseQueryOptions(List operations, QueryOptions options) { if (options != null) { Bson projection = getProjection(options); diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 377d610c5..f2ae97c9e 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -55,6 +55,7 @@ public class MongoDBCollectionTest { public ExpectedException thrown = ExpectedException.none(); public static final List NAMES = Arrays.asList("John", "Jack", "Javi"); public static final List SURNAMES = Arrays.asList("Doe", "Davis", null); + public static final List COLORS = Arrays.asList("red", "green", "yellow", "blue"); @BeforeClass public static void beforeClass() throws Exception { @@ -88,16 +89,35 @@ public static class User { public String surname; public int age; public int number; + public House house; + + public static class House { + public String color; + public int numRooms; + public int m2; + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("House{"); + sb.append("color='").append(color).append('\''); + sb.append(", numRooms=").append(numRooms); + sb.append(", m2=").append(m2); + sb.append('}'); + return sb.toString(); + } + } @Override public String toString() { - return "User{" - + "id:" + id - + ", name:\"" + name + '"' - + ", surname:\"" + surname + '"' - + ", age:" + age - + ", number:" + number - + '}'; + final StringBuilder sb = new StringBuilder("User{"); + sb.append("id=").append(id); + sb.append(", name='").append(name).append('\''); + sb.append(", surname='").append(surname).append('\''); + sb.append(", age=").append(age); + sb.append(", number=").append(number); + sb.append(", house=").append(house); + sb.append('}'); + return sb.toString(); } } @@ -111,6 +131,11 @@ private static MongoDBCollection createTestCollection(String test, int size) { document.put("surname", SURNAMES.get(random.nextInt(SURNAMES.size()))); document.put("age", (int) i % 5); document.put("number", (int) i * i); + Document house = new Document(); + house.put("color", COLORS.get(random.nextInt(COLORS.size()))); + house.put("numRooms", (int) (i % 7) + 1); + house.put("m2", (int) i * 23); + document.put("house", house); mongoDBCollection.nativeQuery().insert(document, null); } return mongoDBCollection; @@ -450,6 +475,43 @@ public void testAggregate() { assertTrue(result.contains(queryResult.getResults().get(0))); } + @Test + public void testFacet() { + DataResult allResults = mongoDBCollection.find(new Document(), null); + System.out.println("allResults.getNumResults() = " + allResults.getNumResults()); + + Document match = new Document("age", new BasicDBObject("$gt", 2)); +// List facets = MongoDBQueryUtils.createFacet(match, ""); + List facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0:1000000]:100000"); +// List facets = MongoDBQueryUtils.createFacet(match, "name,surname"); +// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms)"); +// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2)"); +// List facets = MongoDBQueryUtils.createFacet(match, "name,house.color"); +// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms);count(house.color);name,house.color;avg(house.m2);min(house.m2);max(house.m2);house.m2[0:1000000]:100000"); +// List facets = MongoDBQueryUtils.createFacet(match, "house.m2[0:1000000]:100000"); + System.out.println("facets = " + facets); + DataResult aggregate = mongoDBCollection.aggregate(facets, null); + System.out.println("aggregate.getNumResults() = " + aggregate.getNumResults()); + System.out.println(">>>>>>>>> facet results"); + for (Document result : aggregate.getResults()) { + System.out.println("result = " + result); + } + + int counter = 0; + for (Document result : allResults.getResults()) { + if (result.getInteger("age") > 2) { + counter++; + } + } + System.out.println(">>>>>>>>> all results age > 2: " + counter + " of " + allResults.getNumResults()); + for (Document result : allResults.getResults()) { + if (result.getInteger("age") > 2) { + System.out.println("result = " + result); + } + } + } + + @Test public void testInsert() throws Exception { Long countBefore = mongoDBCollectionInsertTest.count().getNumMatches(); From 5495d809263b8513d22869216d771a0280687a10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 25 Oct 2024 08:20:48 +0200 Subject: [PATCH 02/51] datastore: improve code, #TASK-7151, #TASK-7134 --- .../datastore/mongodb/MongoDBQueryUtils.java | 99 +++++++++++-------- 1 file changed, 57 insertions(+), 42 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 48b927442..4e3d961d0 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -656,22 +656,27 @@ public static List createGroupBy(Bson query, List groupByField, St } public static List createFacet(Bson query, String facetField) { + // Sanity check if (facetField == null || StringUtils.isEmpty(facetField.trim())) { return new ArrayList<>(); } String cleanFacetField = facetField.replace(" ", ""); + + // Multiple facets separated by ; ArrayList facetFields = new ArrayList<>(Arrays.asList(cleanFacetField.split(";"))); return createFacet(query, facetFields); } private static List createFacet(Bson query, List facetFields) { Set includeFields = new HashSet<>(); - List boundaries = new ArrayList<>(); List facets = new ArrayList<>(); + for (String facetField : facetFields) { - Facet facet = null; + Facet facet; + if (facetField.contains(",")) { + // Facet combining fields (i.e., AND logical) Document id = new Document(); for (String field : facetField.split(",")) { String cleanField = field.replace(".", TO_REPLACE_DOTS); @@ -681,6 +686,7 @@ private static List createFacet(Bson query, List facetFields) { facet = new Facet(facetField.replace(".", TO_REPLACE_DOTS).replace(",", "_"), Arrays.asList(Aggregates.group(id, Accumulators.sum("count", 1)))); } else { + // Facet with accumulators (count, avg, min...) or range Accumulator accumulator; String field; Matcher matcher = FUNC_ACCUMULATOR_PATTERN.matcher(facetField); @@ -705,57 +711,66 @@ private static List createFacet(Bson query, List facetFields) { } includeFields.add(field); - String cleanField = field.replace(".", TO_REPLACE_DOTS); - String id = "$" + field; - switch (accumulator) { - case count: { - facet = new Facet(cleanField + "Counts", Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); - break; - } - case avg: { - facet = new Facet(cleanField + "Avg", Arrays.asList(Aggregates.group(field, Accumulators.avg(avg.name(), id)))); - break; - } - case min: { - facet = new Facet(cleanField + "Min", Arrays.asList(Aggregates.group(field, Accumulators.min(min.name(), id)))); - break; - } - case max: { - facet = new Facet(cleanField + "Max", Arrays.asList(Aggregates.group(field, Accumulators.max(max.name(), id)))); - break; - } - case stdDevPop: { - facet = new Facet(cleanField + "StdDevPop", Arrays.asList(Aggregates.group(field, - Accumulators.stdDevPop(stdDevPop.name(), id)))); - break; - } - case stdDevSamp: { - facet = new Facet(cleanField + "stdDevSamp", Arrays.asList(Aggregates.group(field, - Accumulators.stdDevSamp("stdDevSamp", id)))); - break; - } - case bucket: { - facet = new Facet(cleanField + "Ranges", Aggregates.bucket(id, boundaries, - new BucketOptions() - .defaultBucket("Other") - .output(new BsonField("count", new BsonDocument("$sum", new BsonInt32(1)))))); - break; - } - default: { - break; - } - } + // Get MongoDB facet + facet = getMongoDBFacet(field, accumulator, boundaries); } if (facet != null) { facets.add(facet); } } + // Build MongoDB pipeline for facets Bson match = Aggregates.match(query); Bson project = Aggregates.project(Projections.include(new ArrayList<>(includeFields))); return Arrays.asList(match, project, Aggregates.facet(facets)); } + private static Facet getMongoDBFacet(String field, Accumulator accumulator, List boundaries) { + String id = "$" + field; + String cleanField = field.replace(".", TO_REPLACE_DOTS); + + Facet facet = null; + switch (accumulator) { + case count: { + facet = new Facet(cleanField + "Counts", Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); + break; + } + case avg: { + facet = new Facet(cleanField + "Avg", Arrays.asList(Aggregates.group(field, Accumulators.avg(avg.name(), id)))); + break; + } + case min: { + facet = new Facet(cleanField + "Min", Arrays.asList(Aggregates.group(field, Accumulators.min(min.name(), id)))); + break; + } + case max: { + facet = new Facet(cleanField + "Max", Arrays.asList(Aggregates.group(field, Accumulators.max(max.name(), id)))); + break; + } + case stdDevPop: { + facet = new Facet(cleanField + "StdDevPop", Arrays.asList(Aggregates.group(field, + Accumulators.stdDevPop(stdDevPop.name(), id)))); + break; + } + case stdDevSamp: { + facet = new Facet(cleanField + "stdDevSamp", Arrays.asList(Aggregates.group(field, + Accumulators.stdDevSamp("stdDevSamp", id)))); + break; + } + case bucket: { + facet = new Facet(cleanField + "Ranges", Aggregates.bucket(id, boundaries, + new BucketOptions() + .defaultBucket("Other") + .output(new BsonField("count", new BsonDocument("$sum", new BsonInt32(1)))))); + break; + } + default: { + break; + } + } + return facet; + } + public static void parseQueryOptions(List operations, QueryOptions options) { if (options != null) { Bson projection = getProjection(options); From 304603e063209541365198edc18c6697cf6021c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 25 Oct 2024 13:44:09 +0200 Subject: [PATCH 03/51] datastore: implement the MongoDB to FacetField converter, #TASK-7151, #TASK-7134 --- .../MongoDBFacetToFacetFieldsConverter.java | 124 ++++++++++++++++++ .../datastore/mongodb/MongoDBQueryUtils.java | 45 ++++--- .../mongodb/MongoDBCollectionTest.java | 55 ++++++-- 3 files changed, 199 insertions(+), 25 deletions(-) create mode 100644 commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java new file mode 100644 index 000000000..c1defe766 --- /dev/null +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java @@ -0,0 +1,124 @@ +package org.opencb.commons.datastore.mongodb; + +import org.apache.commons.lang3.StringUtils; +import org.bson.Document; +import org.opencb.commons.datastore.core.ComplexTypeConverter; +import org.opencb.commons.datastore.core.FacetField; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.count; + +public class MongoDBFacetToFacetFieldsConverter implements ComplexTypeConverter, Document> { + + @Override + public List convertToDataModelType(Document document) { + if (document == null || document.entrySet().size() == 0) { + return Collections.emptyList(); + } + + List facets = new ArrayList<>(); + for (Map.Entry entry : document.entrySet()) { + String key = entry.getKey(); + List documentValues = (List) entry.getValue(); + if (key.endsWith(COUNTS_SUFFIX)) { + List buckets = new ArrayList<>(documentValues.size()); + long total = 0; + for (Document documentValue : documentValues) { + long counter = documentValue.getInteger(count.name()); + String bucketValue; + if (documentValue.get(INTERNAL_ID) instanceof String) { + bucketValue = documentValue.getString(INTERNAL_ID); + } else if (documentValue.get(INTERNAL_ID) instanceof Boolean) { + bucketValue = documentValue.getBoolean(INTERNAL_ID).toString(); + } else { + Document combined = (Document) documentValue.get(INTERNAL_ID); + bucketValue = StringUtils.join(combined.values(), AND_SEPARATOR); + } + buckets.add(new FacetField.Bucket(bucketValue, counter, null)); + total += counter; + } + key = key.substring(0, key.length() - COUNTS_SUFFIX.length()); + facets.add(new FacetField(key, total, buckets)); + } else if (key.endsWith(RANGES_SUFFIX)) { + List facetFieldValues = new ArrayList<>(); + Number start = null; + Number end = null; + Number step = null; + Double other = null; + for (Document value : documentValues) { + if (value.get(INTERNAL_ID) instanceof String && OTHER.equals(value.getString(INTERNAL_ID))) { + other = 1.0d * value.getInteger(count.name()); + } else { + Double range = value.getDouble(INTERNAL_ID); + Integer counter = value.getInteger(count.name()); + facetFieldValues.add(1.0d * counter); + if (start == null) { + start = range; + } + end = range; + if (step == null && start != end) { + step = end.doubleValue() - start.doubleValue(); + } + } + } + System.out.println("entry = " + entry); + key = key.substring(0, key.length() - RANGES_SUFFIX.length()).replace(GenericDocumentComplexConverter.TO_REPLACE_DOTS, "."); + if (other != null) { + key += " (out of range = " + other + ")"; + } + FacetField facetField = new FacetField(key, "range", facetFieldValues) + .setStart(start) + .setEnd(end) + .setStep(step); + facets.add(facetField); + } else { + Document documentValue = ((List) entry.getValue()).get(0); + MongoDBQueryUtils.Accumulator accumulator = getAccumulator(documentValue); + switch (accumulator) { + case max: + case min: + case avg: + case stdDevPop: + case stdDevSamp: { + Double fieldValue; + if (documentValue.get(accumulator.name()) instanceof Integer) { + fieldValue = 1.0d * documentValue.getInteger(accumulator.name()); + } else { + fieldValue = documentValue.getDouble(accumulator.name()); + } + facets.add(new FacetField(documentValue.getString(INTERNAL_ID), accumulator.name(), + Collections.singletonList(fieldValue))); + break; + } + default: { + // Nothing to do + break; + } + } + } + } + return facets; + } + + private MongoDBQueryUtils.Accumulator getAccumulator(Document document) { + for (Map.Entry entry : document.entrySet()) { + try { + MongoDBQueryUtils.Accumulator accumulator = MongoDBQueryUtils.Accumulator.valueOf(entry.getKey()); + return accumulator; + } catch (IllegalArgumentException e) { + // Do nothing + } + } + throw new IllegalArgumentException("No accumulators found in facet document: " + StringUtils.join(document.keySet(), ",")); + } + + @Override + public Document convertToStorageType(List facetFields) { + throw new RuntimeException("Not yet implemented"); + } +} diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 4e3d961d0..9937e7563 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -50,7 +50,18 @@ public class MongoDBQueryUtils { private static final Pattern FUNC_ACCUMULATOR_PATTERN = Pattern.compile("([a-zA-Z]+)\\(([.a-zA-Z0-9]+)\\)"); private static final Pattern RANGE_PATTERN = Pattern.compile("([.a-zA-Z0-9]+)\\[([.0-9]+):([.0-9]+)\\]:([.0-9]+)"); - public static final String TO_REPLACE_DOTS = "."; + + public static final String INTERNAL_ID = "_id"; + public static final String AND_SEPARATOR = "_and_"; + public static final String OTHER = "Other"; + + public static final String COUNTS_SUFFIX = "Counts"; + public static final String AVG_SUFFIX = "Avg"; + public static final String MIN_SUFFIX = "Min"; + public static final String MAX_SUFFIX = "Max"; + public static final String STD_DEV_POP_SUFFIX = "StdDevPop"; + public static final String STD_DEV_SAMP_SUFFIX = "stdDevSamp"; + public static final String RANGES_SUFFIX = "Ranges"; // TODO: Added on 10/08/2021 to deprecate STARTS_WITH and ENDS_WITH regex. They need to be done within '/'. @Deprecated @@ -679,12 +690,11 @@ private static List createFacet(Bson query, List facetFields) { // Facet combining fields (i.e., AND logical) Document id = new Document(); for (String field : facetField.split(",")) { - String cleanField = field.replace(".", TO_REPLACE_DOTS); - id.append(cleanField, "$" + field); + id.append(field, "$" + field); includeFields.add(field); } - facet = new Facet(facetField.replace(".", TO_REPLACE_DOTS).replace(",", "_"), Arrays.asList(Aggregates.group(id, - Accumulators.sum("count", 1)))); + facet = new Facet(facetField.replace(",", AND_SEPARATOR) + COUNTS_SUFFIX, + Arrays.asList(Aggregates.group(id, Accumulators.sum("count", 1)))); } else { // Facet with accumulators (count, avg, min...) or range Accumulator accumulator; @@ -722,46 +732,49 @@ private static List createFacet(Bson query, List facetFields) { // Build MongoDB pipeline for facets Bson match = Aggregates.match(query); Bson project = Aggregates.project(Projections.include(new ArrayList<>(includeFields))); - return Arrays.asList(match, project, Aggregates.facet(facets)); + // Dot notation management for facets + Document aggregates = GenericDocumentComplexConverter.replaceDots(Document.parse(Aggregates.facet(facets).toBsonDocument() + .toJson())); + + return Arrays.asList(match, project, aggregates); } private static Facet getMongoDBFacet(String field, Accumulator accumulator, List boundaries) { String id = "$" + field; - String cleanField = field.replace(".", TO_REPLACE_DOTS); Facet facet = null; switch (accumulator) { case count: { - facet = new Facet(cleanField + "Counts", Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); + facet = new Facet(field + COUNTS_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); break; } case avg: { - facet = new Facet(cleanField + "Avg", Arrays.asList(Aggregates.group(field, Accumulators.avg(avg.name(), id)))); + facet = new Facet(field + AVG_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.avg(avg.name(), id)))); break; } case min: { - facet = new Facet(cleanField + "Min", Arrays.asList(Aggregates.group(field, Accumulators.min(min.name(), id)))); + facet = new Facet(field + MIN_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.min(min.name(), id)))); break; } case max: { - facet = new Facet(cleanField + "Max", Arrays.asList(Aggregates.group(field, Accumulators.max(max.name(), id)))); + facet = new Facet(field + MAX_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.max(max.name(), id)))); break; } case stdDevPop: { - facet = new Facet(cleanField + "StdDevPop", Arrays.asList(Aggregates.group(field, + facet = new Facet(field + STD_DEV_POP_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.stdDevPop(stdDevPop.name(), id)))); break; } case stdDevSamp: { - facet = new Facet(cleanField + "stdDevSamp", Arrays.asList(Aggregates.group(field, + facet = new Facet(field + STD_DEV_SAMP_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.stdDevSamp("stdDevSamp", id)))); break; } case bucket: { - facet = new Facet(cleanField + "Ranges", Aggregates.bucket(id, boundaries, + facet = new Facet(field + RANGES_SUFFIX, Aggregates.bucket(id, boundaries, new BucketOptions() - .defaultBucket("Other") - .output(new BsonField("count", new BsonDocument("$sum", new BsonInt32(1)))))); + .defaultBucket(OTHER) + .output(new BsonField(count.name(), new BsonDocument("$sum", new BsonInt32(1)))))); break; } default: { diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index f2ae97c9e..9f51cee6d 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -25,10 +25,7 @@ import org.hamcrest.CoreMatchers; import org.junit.*; import org.junit.rules.ExpectedException; -import org.opencb.commons.datastore.core.DataResult; -import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.commons.datastore.core.QueryResultWriter; +import org.opencb.commons.datastore.core.*; import java.io.DataOutputStream; import java.io.FileOutputStream; @@ -481,36 +478,76 @@ public void testFacet() { System.out.println("allResults.getNumResults() = " + allResults.getNumResults()); Document match = new Document("age", new BasicDBObject("$gt", 2)); +// Document match = new Document("house.m2", new BasicDBObject("$gt", 10000)); // List facets = MongoDBQueryUtils.createFacet(match, ""); - List facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0:1000000]:100000"); +// List facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0:1000000]:100000"); // List facets = MongoDBQueryUtils.createFacet(match, "name,surname"); // List facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms)"); // List facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2)"); // List facets = MongoDBQueryUtils.createFacet(match, "name,house.color"); -// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms);count(house.color);name,house.color;avg(house.m2);min(house.m2);max(house.m2);house.m2[0:1000000]:100000"); -// List facets = MongoDBQueryUtils.createFacet(match, "house.m2[0:1000000]:100000"); - System.out.println("facets = " + facets); + List facetsWithDots = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms);count(house.color);name,house.color;avg(house.m2);min(house.m2);max(house.m2);house.m2[0:20000]:1000"); + // List facets = MongoDBQueryUtils.createFacet(match, "house.m2[0:20000]:1000"); + + System.out.println("facetsWithDots = " + facetsWithDots); + + List facetsWithoutDots = new ArrayList<>(); + for (Bson facet : facetsWithDots) { + Document facetDocument = GenericDocumentComplexConverter.replaceDots(Document.parse(facet.toBsonDocument().toJson())); + facetsWithoutDots.add(facetDocument); + } + System.out.println("facetsWithoutDots = " + facetsWithoutDots); + + List facets = facetsWithoutDots; + DataResult aggregate = mongoDBCollection.aggregate(facets, null); System.out.println("aggregate.getNumResults() = " + aggregate.getNumResults()); - System.out.println(">>>>>>>>> facet results"); + System.out.println(">>>>>>>>> facet results (raw)"); for (Document result : aggregate.getResults()) { System.out.println("result = " + result); } + System.out.println(">>>>>>>>> facet results (restore dots)"); + for (Document result : aggregate.getResults()) { + System.out.println("result = " + GenericDocumentComplexConverter.restoreDots(result)); + } int counter = 0; for (Document result : allResults.getResults()) { +// if (result.getInteger("house.m2") > 10000) { if (result.getInteger("age") > 2) { counter++; } } System.out.println(">>>>>>>>> all results age > 2: " + counter + " of " + allResults.getNumResults()); +// System.out.println(">>>>>>>>> all results house.m2 > 10000: " + counter + " of " + allResults.getNumResults()); for (Document result : allResults.getResults()) { +// if (result.getInteger("house.m2") > 10000) { if (result.getInteger("age") > 2) { System.out.println("result = " + result); } } } + @Test + public void testFacetUsingConverter() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); +// Document match = new Document("house.m2", new BasicDBObject("$gt", 10000)); +// List facets = MongoDBQueryUtils.createFacet(match, ""); + List facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0:1000000]:100000"); +// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2);name;name,surname"); +// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms)"); +// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2);name"); +// List facets = MongoDBQueryUtils.createFacet(match, "house.m2[0:20000]:1000"); + + MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.getNumResults() = " + aggregate.getNumResults()); + for (List result : aggregate.getResults()) { + for (FacetField facetField : result) { + System.out.println("facetField:\n" + facetField); + } + } + } + @Test public void testInsert() throws Exception { From b537e6f27223ce447541bf775fbcce29cd435837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 25 Oct 2024 17:27:26 +0200 Subject: [PATCH 04/51] datastore: fix MongoDB document to FacetField converter, #TASK-7151, #TASK-7134 --- .../MongoDBFacetToFacetFieldsConverter.java | 19 ++++++++++--------- .../datastore/mongodb/MongoDBQueryUtils.java | 9 +++++---- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java index c1defe766..d10ba2906 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java @@ -30,14 +30,16 @@ public List convertToDataModelType(Document document) { long total = 0; for (Document documentValue : documentValues) { long counter = documentValue.getInteger(count.name()); - String bucketValue; - if (documentValue.get(INTERNAL_ID) instanceof String) { - bucketValue = documentValue.getString(INTERNAL_ID); - } else if (documentValue.get(INTERNAL_ID) instanceof Boolean) { - bucketValue = documentValue.getBoolean(INTERNAL_ID).toString(); - } else { - Document combined = (Document) documentValue.get(INTERNAL_ID); - bucketValue = StringUtils.join(combined.values(), AND_SEPARATOR); + String bucketValue = ""; + Object internalIdValue = documentValue.get(INTERNAL_ID); + if (internalIdValue instanceof String) { + bucketValue = (String) internalIdValue; + } else if (internalIdValue instanceof Boolean + || internalIdValue instanceof Integer + || internalIdValue instanceof Double) { + bucketValue = internalIdValue.toString(); + } else if (internalIdValue instanceof Document) { + bucketValue = StringUtils.join(((Document) internalIdValue).values(), AND_SEPARATOR); } buckets.add(new FacetField.Bucket(bucketValue, counter, null)); total += counter; @@ -66,7 +68,6 @@ public List convertToDataModelType(Document document) { } } } - System.out.println("entry = " + entry); key = key.substring(0, key.length() - RANGES_SUFFIX.length()).replace(GenericDocumentComplexConverter.TO_REPLACE_DOTS, "."); if (other != null) { key += " (out of range = " + other + ")"; diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 9937e7563..e44900ac9 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -694,9 +694,9 @@ private static List createFacet(Bson query, List facetFields) { includeFields.add(field); } facet = new Facet(facetField.replace(",", AND_SEPARATOR) + COUNTS_SUFFIX, - Arrays.asList(Aggregates.group(id, Accumulators.sum("count", 1)))); + Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); } else { - // Facet with accumulators (count, avg, min...) or range + // Facet with accumulators (count, avg, min, max,...) or range (bucket) Accumulator accumulator; String field; Matcher matcher = FUNC_ACCUMULATOR_PATTERN.matcher(facetField); @@ -742,7 +742,7 @@ private static List createFacet(Bson query, List facetFields) { private static Facet getMongoDBFacet(String field, Accumulator accumulator, List boundaries) { String id = "$" + field; - Facet facet = null; + Facet facet; switch (accumulator) { case count: { facet = new Facet(field + COUNTS_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); @@ -767,7 +767,7 @@ private static Facet getMongoDBFacet(String field, Accumulator accumulator, List } case stdDevSamp: { facet = new Facet(field + STD_DEV_SAMP_SUFFIX, Arrays.asList(Aggregates.group(field, - Accumulators.stdDevSamp("stdDevSamp", id)))); + Accumulators.stdDevSamp(stdDevSamp.name(), id)))); break; } case bucket: { @@ -778,6 +778,7 @@ private static Facet getMongoDBFacet(String field, Accumulator accumulator, List break; } default: { + facet = null; break; } } From 17f83b29d4e9390987f6ba5ad17d581171fca339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 28 Oct 2024 09:02:37 +0100 Subject: [PATCH 05/51] datastore: change long to Long in FacetField, #TASK-7151, #TASK-7134 --- .../java/org/opencb/commons/datastore/core/FacetField.java | 4 ++-- .../datastore/solr/SolrFacetToFacetFieldsConverter.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java index 2e4a3876b..f6c105bab 100644 --- a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java +++ b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java @@ -24,7 +24,7 @@ public class FacetField { private String name; - private long count; + private Long count; private List buckets; private String aggregationName; private List aggregationValues; @@ -32,7 +32,7 @@ public class FacetField { private Number end; private Number step; - public FacetField(String name, long count, List buckets) { + public FacetField(String name, Long count, List buckets) { this.name = name; this.count = count; this.buckets = buckets; diff --git a/commons-datastore/commons-datastore-solr/src/main/java/org/opencb/commons/datastore/solr/SolrFacetToFacetFieldsConverter.java b/commons-datastore/commons-datastore-solr/src/main/java/org/opencb/commons/datastore/solr/SolrFacetToFacetFieldsConverter.java index 4dcd7973a..ac2a0e7f6 100644 --- a/commons-datastore/commons-datastore-solr/src/main/java/org/opencb/commons/datastore/solr/SolrFacetToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-solr/src/main/java/org/opencb/commons/datastore/solr/SolrFacetToFacetFieldsConverter.java @@ -67,12 +67,12 @@ public static List convert(QueryResponse solrResponse, Map solrFacets, int defaultCount) { + private static long getBucketCount(SimpleOrderedMap solrFacets, long defaultCount) { List> solrBuckets = (List>) solrFacets.get("buckets"); if (solrBuckets == null) { for (int i = 0; i < solrFacets.size(); i++) { if (solrFacets.getName(i).equals("count")) { - return (int) solrFacets.getVal(i); + return (long) solrFacets.getVal(i); } } } From 865b94a1bc460efb0fd83143c61d76c72ab66cfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 28 Oct 2024 09:03:45 +0100 Subject: [PATCH 06/51] datastore: set range format to field[start..end]:step, #TASK-7151, #TASK-7134 --- .../MongoDBFacetToFacetFieldsConverter.java | 2 +- .../datastore/mongodb/MongoDBQueryUtils.java | 37 ++++++++++++++----- .../mongodb/MongoDBCollectionTest.java | 27 +++++++++++--- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java index d10ba2906..c02757f75 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java @@ -70,7 +70,7 @@ public List convertToDataModelType(Document document) { } key = key.substring(0, key.length() - RANGES_SUFFIX.length()).replace(GenericDocumentComplexConverter.TO_REPLACE_DOTS, "."); if (other != null) { - key += " (out of range = " + other + ")"; + key += " (counts out of range: " + other + ")"; } FacetField facetField = new FacetField(key, "range", facetFieldValues) .setStart(start) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index e44900ac9..b80be5f08 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -49,7 +49,14 @@ public class MongoDBQueryUtils { private static final Pattern OPERATION_DATE_PATTERN = Pattern.compile("^(<=?|>=?|!=|!?=?~|=?=?)([0-9]+)(-?)([0-9]*)"); private static final Pattern FUNC_ACCUMULATOR_PATTERN = Pattern.compile("([a-zA-Z]+)\\(([.a-zA-Z0-9]+)\\)"); - private static final Pattern RANGE_PATTERN = Pattern.compile("([.a-zA-Z0-9]+)\\[([.0-9]+):([.0-9]+)\\]:([.0-9]+)"); + private static final String RANGE_MARK = ".."; + private static final String RANGE_MARK1 = "["; + private static final String RANGE_MARK2 = "]"; + private static final String RANGE_SPLIT_MARK = "\\.\\."; + private static final Pattern RANGE_START_PATTERN = Pattern.compile("([.a-zA-Z0-9]+)\\[([.0-9]+)"); + private static final Pattern RANGE_END_PATTERN = Pattern.compile("([.0-9]+)\\]:([.0-9]+)"); + public static final String INVALID_FORMAT_MSG = "Invalid format "; + public static final String RANGE_FORMAT_MSG = " for range aggregation. Valid format is: field[start..end]:step, e.g: size[0..1000]:200"; public static final String INTERNAL_ID = "_id"; public static final String AND_SEPARATOR = "_and_"; @@ -704,15 +711,25 @@ private static List createFacet(Bson query, List facetFields) { accumulator = Accumulator.valueOf(matcher.group(1)); field = matcher.group(2); } else { - matcher = RANGE_PATTERN.matcher(facetField); - if (matcher.matches()) { - accumulator = bucket; - field = matcher.group(1); - double start = Double.parseDouble(matcher.group(2)); - double end = Double.parseDouble(matcher.group(3)); - double step = Double.parseDouble(matcher.group(4)); - for (double i = start; i <= end; i += step) { - boundaries.add(i); + if (facetField.contains(RANGE_MARK) || facetField.contains(RANGE_MARK1) || facetField.contains(RANGE_MARK2)) { + String[] split = facetField.split(RANGE_SPLIT_MARK); + if (split.length == 2) { + Matcher matcher1 = RANGE_START_PATTERN.matcher(split[0]); + Matcher matcher2 = RANGE_END_PATTERN.matcher(split[1]); + if (matcher1.matches() && matcher2.matches()) { + accumulator = bucket; + field = matcher1.group(1); + double start = Double.parseDouble(matcher1.group(2)); + double end = Double.parseDouble(matcher2.group(1)); + double step = Double.parseDouble(matcher2.group(2)); + for (double i = start; i <= end; i += step) { + boundaries.add(i); + } + } else { + throw new IllegalArgumentException(INVALID_FORMAT_MSG + facetField + RANGE_FORMAT_MSG); + } + } else { + throw new IllegalArgumentException(INVALID_FORMAT_MSG + facetField + RANGE_FORMAT_MSG); } } else { accumulator = count; diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 9f51cee6d..434d84fa0 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -480,13 +480,13 @@ public void testFacet() { Document match = new Document("age", new BasicDBObject("$gt", 2)); // Document match = new Document("house.m2", new BasicDBObject("$gt", 10000)); // List facets = MongoDBQueryUtils.createFacet(match, ""); -// List facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0:1000000]:100000"); +// List facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0..1000000]100000"); // List facets = MongoDBQueryUtils.createFacet(match, "name,surname"); // List facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms)"); // List facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2)"); // List facets = MongoDBQueryUtils.createFacet(match, "name,house.color"); - List facetsWithDots = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms);count(house.color);name,house.color;avg(house.m2);min(house.m2);max(house.m2);house.m2[0:20000]:1000"); - // List facets = MongoDBQueryUtils.createFacet(match, "house.m2[0:20000]:1000"); + List facetsWithDots = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms);count(house.color);name,house.color;avg(house.m2);min(house.m2);max(house.m2);house.m2[0..20000]:1000"); + // List facets = MongoDBQueryUtils.createFacet(match, "house.m2[0..20000]:1000"); System.out.println("facetsWithDots = " + facetsWithDots); @@ -532,11 +532,11 @@ public void testFacetUsingConverter() { Document match = new Document("age", new BasicDBObject("$gt", 2)); // Document match = new Document("house.m2", new BasicDBObject("$gt", 10000)); // List facets = MongoDBQueryUtils.createFacet(match, ""); - List facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0:1000000]:100000"); +// List facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0..1000000]:100000"); // List facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2);name;name,surname"); // List facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms)"); // List facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2);name"); -// List facets = MongoDBQueryUtils.createFacet(match, "house.m2[0:20000]:1000"); + List facets = MongoDBQueryUtils.createFacet(match, "house.m2[0..20000]:1000"); MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); @@ -548,6 +548,23 @@ public void testFacetUsingConverter() { } } + @Test(expected = IllegalArgumentException.class) + public void testFacetInvalidRangeFormat() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + MongoDBQueryUtils.createFacet(match, "house.m2[toto0..20000]:1000"); + } + + @Test(expected = IllegalArgumentException.class) + public void testFacetInvalidRangeFormat1() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + MongoDBQueryUtils.createFacet(match, "house.m2[0:20000]:1000"); + } + + @Test(expected = IllegalArgumentException.class) + public void testFacetInvalidRangeFormat2() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + MongoDBQueryUtils.createFacet(match, "house.m2[toto0..20000]..1000"); + } @Test public void testInsert() throws Exception { From 880f2c63df61ac711e2c9234ce84a2aa18e5dc4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 28 Oct 2024 10:59:02 +0100 Subject: [PATCH 07/51] datastore: use JsonInclude.Include.NON_NULL, #TASK-7151, #TASK-7134 --- .../java/org/opencb/commons/datastore/core/FacetField.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java index f6c105bab..ebbcd6091 100644 --- a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java +++ b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java @@ -16,12 +16,15 @@ package org.opencb.commons.datastore.core; +import com.fasterxml.jackson.annotation.JsonInclude; + import java.util.List; /** * Created by jtarraga on 09/03/17. */ +@JsonInclude(JsonInclude.Include.NON_NULL) public class FacetField { private String name; private Long count; From 75dc002d5d05b87cc724f8a0d52f6a0f3c886f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 28 Oct 2024 11:25:53 +0100 Subject: [PATCH 08/51] datastore: fix pom.xml, #TASK-7151, #TASK-7134 --- commons-datastore/commons-datastore-core/pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/commons-datastore/commons-datastore-core/pom.xml b/commons-datastore/commons-datastore-core/pom.xml index f75e11a92..ca4c30bb7 100644 --- a/commons-datastore/commons-datastore-core/pom.xml +++ b/commons-datastore/commons-datastore-core/pom.xml @@ -18,6 +18,11 @@ com.fasterxml.jackson.core jackson-core + + com.fasterxml.jackson.core + jackson-annotations + 2.14.3 + com.fasterxml.jackson.core jackson-databind From 25cbd91652663a47d20b8fc4f1eeb0903b360eef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 28 Oct 2024 12:14:47 +0100 Subject: [PATCH 09/51] datastore: restore FacetField to previous change, #TASK-7151, #TASK-7134 --- commons-datastore/commons-datastore-core/pom.xml | 5 ----- .../org/opencb/commons/datastore/core/FacetField.java | 8 ++------ 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/commons-datastore/commons-datastore-core/pom.xml b/commons-datastore/commons-datastore-core/pom.xml index ca4c30bb7..f75e11a92 100644 --- a/commons-datastore/commons-datastore-core/pom.xml +++ b/commons-datastore/commons-datastore-core/pom.xml @@ -18,11 +18,6 @@ com.fasterxml.jackson.core jackson-core - - com.fasterxml.jackson.core - jackson-annotations - 2.14.3 - com.fasterxml.jackson.core jackson-databind diff --git a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java index ebbcd6091..db4fb9923 100644 --- a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java +++ b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java @@ -16,18 +16,14 @@ package org.opencb.commons.datastore.core; -import com.fasterxml.jackson.annotation.JsonInclude; - import java.util.List; /** * Created by jtarraga on 09/03/17. */ - -@JsonInclude(JsonInclude.Include.NON_NULL) public class FacetField { private String name; - private Long count; + private long count; private List buckets; private String aggregationName; private List aggregationValues; @@ -35,7 +31,7 @@ public class FacetField { private Number end; private Number step; - public FacetField(String name, Long count, List buckets) { + public FacetField(String name, long count, List buckets) { this.name = name; this.count = count; this.buckets = buckets; From 9f0d9b95b21260fbc1ccb0df2b882fedf0038025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 29 Oct 2024 12:42:47 +0100 Subject: [PATCH 10/51] datastore: change count to Number, #TASK-7151, #TASK-7134 --- .../org/opencb/commons/datastore/core/FacetField.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java index db4fb9923..f3e247473 100644 --- a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java +++ b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java @@ -23,7 +23,7 @@ */ public class FacetField { private String name; - private long count; + private Number count; private List buckets; private String aggregationName; private List aggregationValues; @@ -67,17 +67,20 @@ public FacetField setName(String name) { return this; } - public long getCount() { + public Number getCount() { return count; } - public FacetField setCount(long count) { + public FacetField setCount(Number count) { this.count = count; return this; } public FacetField addCount(long delta) { - this.count += delta; + if (this.count == null) { + this.count = 0L; + } + this.count = this.count.longValue() + delta; return this; } From 0f3a24db402a29561e93f0e49e9d08d91fc4488e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 29 Oct 2024 12:43:36 +0100 Subject: [PATCH 11/51] test: add JUnit tests for facets, #TASK-7151, #TASK-7134 --- .../MongoDBFacetToFacetFieldsConverter.java | 15 +- .../datastore/mongodb/MongoDBQueryUtils.java | 13 +- .../mongodb/MongoDBCollectionTest.java | 349 +++++++++++++++--- 3 files changed, 309 insertions(+), 68 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java index c02757f75..e3d6f9fb4 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java @@ -5,13 +5,10 @@ import org.opencb.commons.datastore.core.ComplexTypeConverter; import org.opencb.commons.datastore.core.FacetField; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; +import java.util.*; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*; -import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.count; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; public class MongoDBFacetToFacetFieldsConverter implements ComplexTypeConverter, Document> { @@ -89,6 +86,8 @@ public List convertToDataModelType(Document document) { Double fieldValue; if (documentValue.get(accumulator.name()) instanceof Integer) { fieldValue = 1.0d * documentValue.getInteger(accumulator.name()); + } else if (documentValue.get(accumulator.name()) instanceof Long) { + fieldValue = 1.0d * documentValue.getLong(accumulator.name()); } else { fieldValue = documentValue.getDouble(accumulator.name()); } @@ -97,8 +96,7 @@ public List convertToDataModelType(Document document) { break; } default: { - // Nothing to do - break; + // Do nothing, exception is raised } } } @@ -115,7 +113,8 @@ private MongoDBQueryUtils.Accumulator getAccumulator(Document document) { // Do nothing } } - throw new IllegalArgumentException("No accumulators found in facet document: " + StringUtils.join(document.keySet(), ",")); + throw new IllegalArgumentException("No accumulators found in facet document: " + StringUtils.join(document.keySet(), ",") + + " Valid accumulator functions: " + StringUtils.join(Arrays.asList(count, max, min, avg, stdDevPop, stdDevSamp), ",")); } @Override diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index b80be5f08..060817ebe 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -49,9 +49,9 @@ public class MongoDBQueryUtils { private static final Pattern OPERATION_DATE_PATTERN = Pattern.compile("^(<=?|>=?|!=|!?=?~|=?=?)([0-9]+)(-?)([0-9]*)"); private static final Pattern FUNC_ACCUMULATOR_PATTERN = Pattern.compile("([a-zA-Z]+)\\(([.a-zA-Z0-9]+)\\)"); - private static final String RANGE_MARK = ".."; - private static final String RANGE_MARK1 = "["; - private static final String RANGE_MARK2 = "]"; + public static final String RANGE_MARK = ".."; + public static final String RANGE_MARK1 = "["; + public static final String RANGE_MARK2 = "]"; private static final String RANGE_SPLIT_MARK = "\\.\\."; private static final Pattern RANGE_START_PATTERN = Pattern.compile("([.a-zA-Z0-9]+)\\[([.0-9]+)"); private static final Pattern RANGE_END_PATTERN = Pattern.compile("([.0-9]+)\\]:([.0-9]+)"); @@ -708,7 +708,12 @@ private static List createFacet(Bson query, List facetFields) { String field; Matcher matcher = FUNC_ACCUMULATOR_PATTERN.matcher(facetField); if (matcher.matches()) { - accumulator = Accumulator.valueOf(matcher.group(1)); + try { + accumulator = Accumulator.valueOf(matcher.group(1)); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Invalid accumulator function '" + matcher.group(1) + "'. Valid accumulator" + + " functions: " + StringUtils.join(Arrays.asList(count, max, min, avg, stdDevPop, stdDevSamp), ", ")); + } field = matcher.group(2); } else { if (facetField.contains(RANGE_MARK) || facetField.contains(RANGE_MARK1) || facetField.contains(RANGE_MARK2)) { diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 434d84fa0..14bff9a09 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -20,6 +20,7 @@ import com.mongodb.MongoBulkWriteException; import com.mongodb.MongoWriteException; import com.mongodb.client.model.Filters; +import org.apache.commons.lang3.StringUtils; import org.bson.Document; import org.bson.conversions.Bson; import org.hamcrest.CoreMatchers; @@ -33,12 +34,15 @@ import java.util.*; import static org.junit.Assert.*; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; /** * Created by imedina on 29/03/14. */ public class MongoDBCollectionTest { + public static final String EMPTY = "***EMPTY***"; private static MongoDataStoreManager mongoDataStoreManager; private static MongoDataStore mongoDataStore; private static MongoDBCollection mongoDBCollection; @@ -86,6 +90,7 @@ public static class User { public String surname; public int age; public int number; + public boolean tall; public House house; public static class House { @@ -112,6 +117,7 @@ public String toString() { sb.append(", surname='").append(surname).append('\''); sb.append(", age=").append(age); sb.append(", number=").append(number); + sb.append(", tall=").append(tall); sb.append(", house=").append(house); sb.append('}'); return sb.toString(); @@ -128,6 +134,7 @@ private static MongoDBCollection createTestCollection(String test, int size) { document.put("surname", SURNAMES.get(random.nextInt(SURNAMES.size()))); document.put("age", (int) i % 5); document.put("number", (int) i * i); + document.put("tall", (i % 6 == 0)); Document house = new Document(); house.put("color", COLORS.get(random.nextInt(COLORS.size()))); house.put("numRooms", (int) (i % 7) + 1); @@ -473,77 +480,307 @@ public void testAggregate() { } @Test - public void testFacet() { - DataResult allResults = mongoDBCollection.find(new Document(), null); - System.out.println("allResults.getNumResults() = " + allResults.getNumResults()); + public void testFacetBuckets() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "name"; + List facets = MongoDBQueryUtils.createFacet(match, fieldName); + MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + + String value; + long totalCount = 0; + Map map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + value = result.getString(fieldName); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + map.put(value, 0); + } else if (!map.containsKey(value)) { + map.put(value, 0); + } + map.put(value, 1 + map.get(value)); + totalCount++; + } + for (List result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(map.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + } + Assert.assertEquals(map.get(value).longValue(), bucket.getCount()); + } + } + } + } + + @Test + public void testFacetBucketsBoolean() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "tall"; + List facets = MongoDBQueryUtils.createFacet(match, fieldName); + MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + + String value; + long totalCount = 0; + Map map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + value = "" + result.getBoolean(fieldName); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + map.put(value, 0); + } else if (!map.containsKey(value)) { + map.put(value, 0); + } + map.put(value, 1 + map.get(value)); + totalCount++; + } + for (List result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(map.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + } + Assert.assertEquals(map.get(value).longValue(), bucket.getCount()); + } + } + } + } + + @Test + public void testFacetBucketsDotNotation() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "house.color"; + List facets = MongoDBQueryUtils.createFacet(match, fieldName); + MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + + String value; + long totalCount = 0; + Map map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + Document house = (Document) result.get("house"); + value = house.getString("color"); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + map.put(value, 0); + } else if (!map.containsKey(value)) { + map.put(value, 0); + } + map.put(value, 1 + map.get(value)); + totalCount++; + } + for (List result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(map.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + } + Assert.assertEquals(map.get(value).longValue(), bucket.getCount()); + } + } + } + } + + @Test + public void testFacetMax() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "number"; + List facets = MongoDBQueryUtils.createFacet(match, "max(" + fieldName + ")"); + MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + + double maxValue = 0; + Map map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + Long value = result.getLong(fieldName); + if (value != null) { + if (value > maxValue) { + maxValue = value; + } + } + } + for (List result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertEquals(max.name(), facetField.getAggregationName()); + Assert.assertEquals(maxValue, facetField.getAggregationValues().get(0), 0.0001); + } + } + } + @Test + public void testFacetMin() { Document match = new Document("age", new BasicDBObject("$gt", 2)); -// Document match = new Document("house.m2", new BasicDBObject("$gt", 10000)); -// List facets = MongoDBQueryUtils.createFacet(match, ""); -// List facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0..1000000]100000"); -// List facets = MongoDBQueryUtils.createFacet(match, "name,surname"); -// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms)"); -// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2)"); -// List facets = MongoDBQueryUtils.createFacet(match, "name,house.color"); - List facetsWithDots = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms);count(house.color);name,house.color;avg(house.m2);min(house.m2);max(house.m2);house.m2[0..20000]:1000"); - // List facets = MongoDBQueryUtils.createFacet(match, "house.m2[0..20000]:1000"); - - System.out.println("facetsWithDots = " + facetsWithDots); - - List facetsWithoutDots = new ArrayList<>(); - for (Bson facet : facetsWithDots) { - Document facetDocument = GenericDocumentComplexConverter.replaceDots(Document.parse(facet.toBsonDocument().toJson())); - facetsWithoutDots.add(facetDocument); - } - System.out.println("facetsWithoutDots = " + facetsWithoutDots); - - List facets = facetsWithoutDots; - - DataResult aggregate = mongoDBCollection.aggregate(facets, null); - System.out.println("aggregate.getNumResults() = " + aggregate.getNumResults()); - System.out.println(">>>>>>>>> facet results (raw)"); - for (Document result : aggregate.getResults()) { - System.out.println("result = " + result); - } - System.out.println(">>>>>>>>> facet results (restore dots)"); - for (Document result : aggregate.getResults()) { - System.out.println("result = " + GenericDocumentComplexConverter.restoreDots(result)); - } - - int counter = 0; - for (Document result : allResults.getResults()) { -// if (result.getInteger("house.m2") > 10000) { - if (result.getInteger("age") > 2) { - counter++; + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "number"; + List facets = MongoDBQueryUtils.createFacet(match, "min(" + fieldName + ")"); + MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + + double minValue = Double.MAX_VALUE; + Map map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + Long value = result.getLong(fieldName); + if (value != null) { + if (value < minValue) { + minValue = value; + } } } - System.out.println(">>>>>>>>> all results age > 2: " + counter + " of " + allResults.getNumResults()); -// System.out.println(">>>>>>>>> all results house.m2 > 10000: " + counter + " of " + allResults.getNumResults()); - for (Document result : allResults.getResults()) { -// if (result.getInteger("house.m2") > 10000) { - if (result.getInteger("age") > 2) { - System.out.println("result = " + result); + for (List result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertEquals(min.name(), facetField.getAggregationName()); + Assert.assertEquals(minValue, facetField.getAggregationValues().get(0), 0.0001); } } } @Test - public void testFacetUsingConverter() { + public void testFacetAvg() { Document match = new Document("age", new BasicDBObject("$gt", 2)); -// Document match = new Document("house.m2", new BasicDBObject("$gt", 10000)); -// List facets = MongoDBQueryUtils.createFacet(match, ""); -// List facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0..1000000]:100000"); -// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2);name;name,surname"); -// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms)"); -// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2);name"); - List facets = MongoDBQueryUtils.createFacet(match, "house.m2[0..20000]:1000"); + DataResult matchedResults = mongoDBCollection.find(match, null); + String fieldName = "number"; + List facets = MongoDBQueryUtils.createFacet(match, "avg(" + fieldName + ")"); MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); - System.out.println("aggregate.getNumResults() = " + aggregate.getNumResults()); + + long totalCount = 0; + double totalSum = 0; + Map map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + Long value = result.getLong(fieldName); + if (value != null) { + totalSum += value; + totalCount++; + } + } for (List result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - System.out.println("facetField:\n" + facetField); + Assert.assertEquals(avg.name(), facetField.getAggregationName()); + Assert.assertEquals(totalSum / totalCount, facetField.getAggregationValues().get(0), 0.0001); + } + } + } + + @Test(expected = IllegalArgumentException.class) + public void testFacetInvalidAccumulator() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "number"; + List facets = MongoDBQueryUtils.createFacet(match, "toto(" + fieldName + ")"); + MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + mongoDBCollection.aggregate(facets, converter, null); + } + + @Test + public void testFacetCombine() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "name,surname"; + List facets = MongoDBQueryUtils.createFacet(match, fieldName); + MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + + String name; + String surname; + long totalCount = 0; + Map map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + name = result.getString("name"); + if (StringUtils.isEmpty(name)) { + name = null; + } + surname = result.getString("surname"); + if (StringUtils.isEmpty(surname)) { + surname = null; + } + String key = ""; + if (name != null) { + key += name; + } + key += AND_SEPARATOR; + if (surname != null) { + key += surname; + } + if (!map.containsKey(key)) { + map.put(key, 0); + } + map.put(key, 1 + map.get(key)); + totalCount++; + } + String value; + for (List result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(map.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + Assert.assertEquals(map.get(value).longValue(), bucket.getCount()); + } + } + } + } + + @Test + public void testFacetRange() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + int start = 1000; + int end = 5000; + int step = 1000; + String fieldName = "number" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; + List facets = MongoDBQueryUtils.createFacet(match, fieldName); + MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + + long outOfRange = 0; + List rangeValues = new ArrayList<>(Arrays.asList(0d, 0d, 0d, 0d)); + + Map map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + int bucketNum; + Long value = result.getLong("number"); + if (value != null) { + if (value < start || value > end) { + outOfRange++; + } else { + bucketNum = (int) (value - start) / step; + rangeValues.set(bucketNum, 1 + rangeValues.get(bucketNum)); + } + } + } + for (List result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertTrue(facetField.getName().contains("" + (1.0d * outOfRange))); + for (int i = 0; i < facetField.getAggregationValues().size(); i++) { + Assert.assertEquals(rangeValues.get(i), facetField.getAggregationValues().get(i)); + } } } } From f2b080c7ad7750e79fbfbd359e69e54aaf3231f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 11 Nov 2024 18:53:18 +0100 Subject: [PATCH 12/51] mongodb: rename converter, use Long instead Number, #TASK-7151, #TASL=7134 --- .../commons/datastore/core/FacetField.java | 6 ++-- ...ongoDBDocumentToFacetFieldsConverter.java} | 2 +- .../mongodb/MongoDBCollectionTest.java | 34 ++++++++++++------- 3 files changed, 25 insertions(+), 17 deletions(-) rename commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/{MongoDBFacetToFacetFieldsConverter.java => MongoDBDocumentToFacetFieldsConverter.java} (98%) diff --git a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java index f3e247473..8beecd0d8 100644 --- a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java +++ b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java @@ -23,7 +23,7 @@ */ public class FacetField { private String name; - private Number count; + private Long count; private List buckets; private String aggregationName; private List aggregationValues; @@ -67,11 +67,11 @@ public FacetField setName(String name) { return this; } - public Number getCount() { + public Long getCount() { return count; } - public FacetField setCount(Number count) { + public FacetField setCount(Long count) { this.count = count; return this; } diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java similarity index 98% rename from commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java rename to commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index e3d6f9fb4..401da6b1a 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBFacetToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -10,7 +10,7 @@ import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; -public class MongoDBFacetToFacetFieldsConverter implements ComplexTypeConverter, Document> { +public class MongoDBDocumentToFacetFieldsConverter implements ComplexTypeConverter, Document> { @Override public List convertToDataModelType(Document document) { diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 14bff9a09..488f87da6 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -486,7 +486,7 @@ public void testFacetBuckets() { String fieldName = "name"; List facets = MongoDBQueryUtils.createFacet(match, fieldName); - MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); String value; @@ -505,7 +505,8 @@ public void testFacetBuckets() { } for (List result : aggregate.getResults()) { for (FacetField facetField : result) { - Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertFalse(facetField.getCount() == null); + Assert.assertEquals(totalCount, facetField.getCount().longValue()); Assert.assertEquals(map.size(), facetField.getBuckets().size()); for (FacetField.Bucket bucket : facetField.getBuckets()) { value = bucket.getValue(); @@ -525,7 +526,7 @@ public void testFacetBucketsBoolean() { String fieldName = "tall"; List facets = MongoDBQueryUtils.createFacet(match, fieldName); - MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); String value; @@ -544,7 +545,8 @@ public void testFacetBucketsBoolean() { } for (List result : aggregate.getResults()) { for (FacetField facetField : result) { - Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertFalse(facetField.getCount() == null); + Assert.assertEquals(totalCount, facetField.getCount().longValue()); Assert.assertEquals(map.size(), facetField.getBuckets().size()); for (FacetField.Bucket bucket : facetField.getBuckets()) { value = bucket.getValue(); @@ -564,7 +566,7 @@ public void testFacetBucketsDotNotation() { String fieldName = "house.color"; List facets = MongoDBQueryUtils.createFacet(match, fieldName); - MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); String value; @@ -584,7 +586,8 @@ public void testFacetBucketsDotNotation() { } for (List result : aggregate.getResults()) { for (FacetField facetField : result) { - Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertFalse(facetField.getCount() == null); + Assert.assertEquals(totalCount, facetField.getCount().longValue()); Assert.assertEquals(map.size(), facetField.getBuckets().size()); for (FacetField.Bucket bucket : facetField.getBuckets()) { value = bucket.getValue(); @@ -604,7 +607,7 @@ public void testFacetMax() { String fieldName = "number"; List facets = MongoDBQueryUtils.createFacet(match, "max(" + fieldName + ")"); - MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); double maxValue = 0; @@ -620,6 +623,7 @@ public void testFacetMax() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { + Assert.assertTrue(facetField.getCount() == null); Assert.assertEquals(max.name(), facetField.getAggregationName()); Assert.assertEquals(maxValue, facetField.getAggregationValues().get(0), 0.0001); } @@ -633,7 +637,7 @@ public void testFacetMin() { String fieldName = "number"; List facets = MongoDBQueryUtils.createFacet(match, "min(" + fieldName + ")"); - MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); double minValue = Double.MAX_VALUE; @@ -649,6 +653,7 @@ public void testFacetMin() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { + Assert.assertTrue(facetField.getCount() == null); Assert.assertEquals(min.name(), facetField.getAggregationName()); Assert.assertEquals(minValue, facetField.getAggregationValues().get(0), 0.0001); } @@ -662,7 +667,7 @@ public void testFacetAvg() { String fieldName = "number"; List facets = MongoDBQueryUtils.createFacet(match, "avg(" + fieldName + ")"); - MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); long totalCount = 0; @@ -678,6 +683,7 @@ public void testFacetAvg() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { + Assert.assertTrue(facetField.getCount() == null); Assert.assertEquals(avg.name(), facetField.getAggregationName()); Assert.assertEquals(totalSum / totalCount, facetField.getAggregationValues().get(0), 0.0001); } @@ -691,7 +697,7 @@ public void testFacetInvalidAccumulator() { String fieldName = "number"; List facets = MongoDBQueryUtils.createFacet(match, "toto(" + fieldName + ")"); - MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); mongoDBCollection.aggregate(facets, converter, null); } @@ -702,7 +708,7 @@ public void testFacetCombine() { String fieldName = "name,surname"; List facets = MongoDBQueryUtils.createFacet(match, fieldName); - MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); String name; @@ -735,7 +741,8 @@ public void testFacetCombine() { String value; for (List result : aggregate.getResults()) { for (FacetField facetField : result) { - Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertFalse(facetField.getCount() == null); + Assert.assertEquals(totalCount, facetField.getCount().longValue()); Assert.assertEquals(map.size(), facetField.getBuckets().size()); for (FacetField.Bucket bucket : facetField.getBuckets()) { value = bucket.getValue(); @@ -755,7 +762,7 @@ public void testFacetRange() { int step = 1000; String fieldName = "number" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; List facets = MongoDBQueryUtils.createFacet(match, fieldName); - MongoDBFacetToFacetFieldsConverter converter = new MongoDBFacetToFacetFieldsConverter(); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); long outOfRange = 0; @@ -777,6 +784,7 @@ public void testFacetRange() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { + Assert.assertTrue(facetField.getCount() == null); Assert.assertTrue(facetField.getName().contains("" + (1.0d * outOfRange))); for (int i = 0; i < facetField.getAggregationValues().size(); i++) { Assert.assertEquals(rangeValues.get(i), facetField.getAggregationValues().get(i)); From ea3906cf8a24c61097cf81dba55a06780e1da35e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 12 Nov 2024 09:36:27 +0100 Subject: [PATCH 13/51] mongodb: support lists using accumulators, #TASK-7151, #TASK-7134 --- ...MongoDBDocumentToFacetFieldsConverter.java | 16 +++-- .../mongodb/MongoDBCollectionTest.java | 63 +++++++++++++++++++ 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index 401da6b1a..5fc535b43 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -83,16 +83,20 @@ public List convertToDataModelType(Document document) { case avg: case stdDevPop: case stdDevSamp: { - Double fieldValue; + List fieldValues = new ArrayList<>(); if (documentValue.get(accumulator.name()) instanceof Integer) { - fieldValue = 1.0d * documentValue.getInteger(accumulator.name()); + fieldValues.add(1.0d * documentValue.getInteger(accumulator.name())); } else if (documentValue.get(accumulator.name()) instanceof Long) { - fieldValue = 1.0d * documentValue.getLong(accumulator.name()); + fieldValues.add(1.0d * documentValue.getLong(accumulator.name())); + } else if (documentValue.get(accumulator.name()) instanceof List) { + List list = (List) documentValue.get(accumulator.name()); + for (Number number : list) { + fieldValues.add(number.doubleValue()); + } } else { - fieldValue = documentValue.getDouble(accumulator.name()); + fieldValues.add(documentValue.getDouble(accumulator.name())); } - facets.add(new FacetField(documentValue.getString(INTERNAL_ID), accumulator.name(), - Collections.singletonList(fieldValue))); + facets.add(new FacetField(documentValue.getString(INTERNAL_ID), accumulator.name(), fieldValues)); break; } default: { diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 488f87da6..565d8928e 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -92,6 +92,7 @@ public static class User { public int number; public boolean tall; public House house; + public List dogs; public static class House { public String color; @@ -109,6 +110,20 @@ public String toString() { } } + public static class Dog { + public int age; + public String color; + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("Dog{"); + sb.append("age=").append(age); + sb.append("color=").append(color); + sb.append('}'); + return sb.toString(); + } + } + @Override public String toString() { final StringBuilder sb = new StringBuilder("User{"); @@ -119,6 +134,7 @@ public String toString() { sb.append(", number=").append(number); sb.append(", tall=").append(tall); sb.append(", house=").append(house); + sb.append(", dogs=").append(dogs); sb.append('}'); return sb.toString(); } @@ -140,6 +156,15 @@ private static MongoDBCollection createTestCollection(String test, int size) { house.put("numRooms", (int) (i % 7) + 1); house.put("m2", (int) i * 23); document.put("house", house); + int numDogs = random.nextInt(3); + List dogs = new ArrayList<>(); + for (int j = 0 ; j < numDogs; j++) { + Document dog = new Document(); + dog.put("age", random.nextInt(20)); + dog.put("color", COLORS.get(random.nextInt(COLORS.size()))); + dogs.add(dog); + } + document.put("dogs", dogs); mongoDBCollection.nativeQuery().insert(document, null); } return mongoDBCollection; @@ -690,6 +715,44 @@ public void testFacetAvg() { } } + @Test + public void testFacetMaxDotNotationAndList() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "dogs.age"; + List facets = MongoDBQueryUtils.createFacet(match, "max(" + fieldName + ")"); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + + DataResult aggregate2 = mongoDBCollection.aggregate(facets, null); + + List maxValues = new ArrayList<>(Arrays.asList(0D,0D,0D,0D,0D,0D,0D,0D,0D,0D,0D,0D)); + for (Document result : matchedResults.getResults()) { + List dogs = (List) result.get("dogs"); + if (result.getInteger("age") > 2 && dogs.size() > 0) { + System.out.println(); + for (int i = 0; i < dogs.size(); i++) { + Number value = (Number) dogs.get(i).get("age"); + System.out.print("age = " + result.getInteger("age") + "; i = " + i + "; value = " + value + "; "); + if (value.doubleValue() > maxValues.get(i)) { + maxValues.set(i, value.doubleValue()); + } + } + } + } + for (List result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertTrue(facetField.getCount() == null); + Assert.assertEquals(max.name(), facetField.getAggregationName()); +// for (int i = 0; i < facetField.getAggregationValues().size() ; i++) { +// Assert.assertEquals(maxValues.get(i), facetField.getAggregationValues().get(i), 0.0001); +// } + } + } + } + @Test(expected = IllegalArgumentException.class) public void testFacetInvalidAccumulator() { Document match = new Document("age", new BasicDBObject("$gt", 2)); From e68c30ebba6ad29fcf88c5985701d00b76ec7fd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 13 Nov 2024 13:07:37 +0100 Subject: [PATCH 14/51] mongodb: fix sonnar issues, #TASK-7151, #TASK-7134 --- .../datastore/mongodb/MongoDBCollection.java | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java index c4a42f362..8b26c9eb2 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java @@ -97,8 +97,7 @@ private DataResult endQuery(List result, long numMatches, double start) { long end = System.currentTimeMillis(); int numResults = (result != null) ? result.size() : 0; - DataResult queryResult = new DataResult((int) (end - start), Collections.emptyList(), numResults, result, numMatches, null); - return queryResult; + return new DataResult((int) (end - start), Collections.emptyList(), numResults, result, numMatches, null); } private DataResult endWrite(long start) { @@ -333,7 +332,7 @@ public DataResult aggregate(List operations, ComplexTypeC long start = startQuery(); DataResult queryResult; List list = new LinkedList<>(); - if (operations != null && operations.size() > 0) { + if (operations != null && !operations.isEmpty()) { MongoDBIterator iterator = mongoDBNativeQuery.aggregate(operations, converter, options); if (queryResultWriter != null) { try { @@ -347,7 +346,7 @@ public DataResult aggregate(List operations, ComplexTypeC } } else { while (iterator.hasNext()) { - list.add((T) iterator.next()); + list.add(iterator.next()); } } } @@ -429,7 +428,7 @@ public DataResult update(ClientSession clientSession, List queri return endWrite( wr.getMatchedCount(), - wr.getInsertedCount() + wr.getUpserts().size(), + (long) wr.getInsertedCount() + wr.getUpserts().size(), wr.getModifiedCount(), wr.getDeletedCount(), 0, @@ -547,8 +546,7 @@ public DataResult createIndex(Bson keys, ObjectMap options) { } mongoDBNativeQuery.createIndex(keys, i); - DataResult dataResult = endQuery(Collections.emptyList(), start); - return dataResult; + return endQuery(Collections.emptyList(), start); } public void dropIndexes() { @@ -558,15 +556,13 @@ public void dropIndexes() { public DataResult dropIndex(Bson keys) { long start = startQuery(); mongoDBNativeQuery.dropIndex(keys); - DataResult dataResult = endQuery(Collections.emptyList(), start); - return dataResult; + return endQuery(Collections.emptyList(), start); } public DataResult getIndex() { long start = startQuery(); List index = mongoDBNativeQuery.getIndex(); - DataResult queryResult = endQuery(index, start); - return queryResult; + return endQuery(index, start); } From 84d1f92c222ba68852652c3ae6d846ee4dd6af43 Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 12 Dec 2024 01:23:35 +0000 Subject: [PATCH 15/51] mondodb: add 'sum' to aggregation operators enum --- .../opencb/commons/datastore/mongodb/MongoDBQueryUtils.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 060817ebe..9ed88100f 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -105,6 +105,7 @@ public enum ComparisonOperator { public enum Accumulator { count, + sum, avg, min, max, @@ -712,7 +713,7 @@ private static List createFacet(Bson query, List facetFields) { accumulator = Accumulator.valueOf(matcher.group(1)); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Invalid accumulator function '" + matcher.group(1) + "'. Valid accumulator" - + " functions: " + StringUtils.join(Arrays.asList(count, max, min, avg, stdDevPop, stdDevSamp), ", ")); + + " functions: " + StringUtils.join(Arrays.asList(count, sum, max, min, avg, stdDevPop, stdDevSamp), ", ")); } field = matcher.group(2); } else { From 26c96280394315c0ed4194b595ef4c7137c22e24 Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 12 Dec 2024 02:49:10 +0000 Subject: [PATCH 16/51] mondodb: fix 'sum' aggregation operator --- .../opencb/commons/datastore/mongodb/MongoDBQueryUtils.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 9ed88100f..b35a67e90 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -63,6 +63,7 @@ public class MongoDBQueryUtils { public static final String OTHER = "Other"; public static final String COUNTS_SUFFIX = "Counts"; + public static final String SUM_SUFFIX = "Sum"; public static final String AVG_SUFFIX = "Avg"; public static final String MIN_SUFFIX = "Min"; public static final String MAX_SUFFIX = "Max"; @@ -771,6 +772,10 @@ private static Facet getMongoDBFacet(String field, Accumulator accumulator, List facet = new Facet(field + COUNTS_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); break; } + case sum: { + facet = new Facet(field + SUM_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(sum.name(), id)))); + break; + } case avg: { facet = new Facet(field + AVG_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.avg(avg.name(), id)))); break; From 10a7f0c147fb481be1825ee16367c694f75ee281 Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 12 Dec 2024 03:08:59 +0000 Subject: [PATCH 17/51] mondodb: fix 'sum' aggregation operator --- .../mongodb/MongoDBDocumentToFacetFieldsConverter.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index 5fc535b43..87588d585 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -78,9 +78,10 @@ public List convertToDataModelType(Document document) { Document documentValue = ((List) entry.getValue()).get(0); MongoDBQueryUtils.Accumulator accumulator = getAccumulator(documentValue); switch (accumulator) { + case avg: + case sum: case max: case min: - case avg: case stdDevPop: case stdDevSamp: { List fieldValues = new ArrayList<>(); From 7943e1b214e1c1bfaddb1adf963d94137328b7c7 Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 12 Dec 2024 03:10:01 +0000 Subject: [PATCH 18/51] mondodb: fix 'sum' aggregation operator --- .../mongodb/MongoDBDocumentToFacetFieldsConverter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index 87588d585..7356418a9 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -119,7 +119,7 @@ private MongoDBQueryUtils.Accumulator getAccumulator(Document document) { } } throw new IllegalArgumentException("No accumulators found in facet document: " + StringUtils.join(document.keySet(), ",") - + " Valid accumulator functions: " + StringUtils.join(Arrays.asList(count, max, min, avg, stdDevPop, stdDevSamp), ",")); + + " Valid accumulator functions: " + StringUtils.join(Arrays.asList(count, sum, max, min, avg, stdDevPop, stdDevSamp), ",")); } @Override From e8159f37f540828b3e6d4caa52232c8415fa5558 Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 12 Dec 2024 03:11:48 +0000 Subject: [PATCH 19/51] mondodb: fix check style --- .../mongodb/MongoDBDocumentToFacetFieldsConverter.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index 7356418a9..2e00e6c48 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -78,8 +78,8 @@ public List convertToDataModelType(Document document) { Document documentValue = ((List) entry.getValue()).get(0); MongoDBQueryUtils.Accumulator accumulator = getAccumulator(documentValue); switch (accumulator) { - case avg: case sum: + case avg: case max: case min: case stdDevPop: @@ -118,8 +118,8 @@ private MongoDBQueryUtils.Accumulator getAccumulator(Document document) { // Do nothing } } - throw new IllegalArgumentException("No accumulators found in facet document: " + StringUtils.join(document.keySet(), ",") - + " Valid accumulator functions: " + StringUtils.join(Arrays.asList(count, sum, max, min, avg, stdDevPop, stdDevSamp), ",")); + throw new IllegalArgumentException("No accumulators found in facet document: " + StringUtils.join(document.keySet(), ", ") + + "Valid accumulator functions: " + StringUtils.join(Arrays.asList(count, sum, max, min, avg, stdDevPop, stdDevSamp), ",")); } @Override From 005c45e5c0226ee9dcce63eb46a03c8966d3ec28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 12 Dec 2024 08:52:53 +0100 Subject: [PATCH 20/51] datastore: fix the accumulator 'sum' in MongoDB facets, #TASK-7151, #TASK-7134 --- .../datastore/mongodb/MongoDBQueryUtils.java | 2 +- .../mongodb/MongoDBCollectionTest.java | 40 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index b35a67e90..be195417a 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -773,7 +773,7 @@ private static Facet getMongoDBFacet(String field, Accumulator accumulator, List break; } case sum: { - facet = new Facet(field + SUM_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(sum.name(), id)))); + facet = new Facet(field + SUM_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.sum(sum.name(), id)))); break; } case avg: { diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 565d8928e..cf0608c7a 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -753,6 +753,46 @@ public void testFacetMaxDotNotationAndList() { } } + @Test + public void testFacetSumAccumulator() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + int total = 0; + String fieldName = "number"; + for (Document result : matchedResults.getResults()) { + System.out.println("result = " + result); + total += result.getLong(fieldName); + } + double avg = total / matchedResults.getNumResults(); + + List facets = MongoDBQueryUtils.createFacet(match, "avg(" + fieldName + ")"); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + for (List result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertTrue(facetField.getCount() == null); + Assert.assertEquals(Accumulator.avg.name(), facetField.getAggregationName()); + Assert.assertEquals(avg, facetField.getAggregationValues().get(0), 0.5); +// for (int i = 0; i < facetField.getAggregationValues().size() ; i++) { +// Assert.assertEquals(maxValues.get(i), facetField.getAggregationValues().get(i), 0.0001); +// } + } + } + + + facets = MongoDBQueryUtils.createFacet(match, "sum(" + fieldName + ")"); + aggregate = mongoDBCollection.aggregate(facets, converter, null); + for (List result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertTrue(facetField.getCount() == null); + Assert.assertEquals(Accumulator.sum.name(), facetField.getAggregationName()); + Assert.assertEquals(total, facetField.getAggregationValues().get(0), 0.0001); + } + } + } + @Test(expected = IllegalArgumentException.class) public void testFacetInvalidAccumulator() { Document match = new Document("age", new BasicDBObject("$gt", 2)); From 31424d85d7cf280b8b887bd1c77d5976b591bb65 Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 13 Dec 2024 01:27:15 +0000 Subject: [PATCH 21/51] mongodb: aggregation test. To be reverted. --- .../datastore/mongodb/MongoDBQueryUtils.java | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index be195417a..47065bd94 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -688,36 +688,43 @@ public static List createFacet(Bson query, String facetField) { } private static List createFacet(Bson query, List facetFields) { + List facetList = new ArrayList<>(); Set includeFields = new HashSet<>(); - List boundaries = new ArrayList<>(); - List facets = new ArrayList<>(); + // For each facet field passed we will create a MongoDB facet, thre are 4 types of facets: + // 1. Facet combining fields with commas. In this case, only 'count' is supported as accumulator. for (String facetField : facetFields) { Facet facet; + // 1. Check if it is a facet combining fields with commas. In this case, only 'count' is supported as accumulator. + // Example: aggregationFields=format,type if (facetField.contains(",")) { - // Facet combining fields (i.e., AND logical) - Document id = new Document(); + Document fields = new Document(); for (String field : facetField.split(",")) { - id.append(field, "$" + field); + fields.append(field, "$" + field); includeFields.add(field); } - facet = new Facet(facetField.replace(",", AND_SEPARATOR) + COUNTS_SUFFIX, - Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); + facet = new Facet( + facetField.replace(",", AND_SEPARATOR) + COUNTS_SUFFIX, + Aggregates.group(fields, Accumulators.sum(count.name(), 1)) + ); } else { - // Facet with accumulators (count, avg, min, max,...) or range (bucket) Accumulator accumulator; String field; + List boundaries = new ArrayList<>(); + + // 2. Facet with accumulators (count, avg, min, max,...) or range (bucket) Matcher matcher = FUNC_ACCUMULATOR_PATTERN.matcher(facetField); if (matcher.matches()) { try { accumulator = Accumulator.valueOf(matcher.group(1)); + field = matcher.group(2); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Invalid accumulator function '" + matcher.group(1) + "'. Valid accumulator" + " functions: " + StringUtils.join(Arrays.asList(count, sum, max, min, avg, stdDevPop, stdDevSamp), ", ")); } - field = matcher.group(2); } else { + // 3. Facet with range aggregation if (facetField.contains(RANGE_MARK) || facetField.contains(RANGE_MARK1) || facetField.contains(RANGE_MARK2)) { String[] split = facetField.split(RANGE_SPLIT_MARK); if (split.length == 2) { @@ -739,17 +746,22 @@ private static List createFacet(Bson query, List facetFields) { throw new IllegalArgumentException(INVALID_FORMAT_MSG + facetField + RANGE_FORMAT_MSG); } } else { + // 4. Facet with count as default accumulator accumulator = count; +// accumulator = Accumulators.sum(count.name(), 1); field = facetField; } } + includeFields.add(field); // Get MongoDB facet facet = getMongoDBFacet(field, accumulator, boundaries); } + + // Add facet to the list of facets to be executed if (facet != null) { - facets.add(facet); + facetList.add(facet); } } @@ -757,8 +769,8 @@ private static List createFacet(Bson query, List facetFields) { Bson match = Aggregates.match(query); Bson project = Aggregates.project(Projections.include(new ArrayList<>(includeFields))); // Dot notation management for facets - Document aggregates = GenericDocumentComplexConverter.replaceDots(Document.parse(Aggregates.facet(facets).toBsonDocument() - .toJson())); + Document aggregates = GenericDocumentComplexConverter + .replaceDots(Document.parse(Aggregates.facet(facetList).toBsonDocument().toJson())); return Arrays.asList(match, project, aggregates); } @@ -769,7 +781,8 @@ private static Facet getMongoDBFacet(String field, Accumulator accumulator, List Facet facet; switch (accumulator) { case count: { - facet = new Facet(field + COUNTS_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); +// facet = new Facet(field + COUNTS_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); + facet = new Facet(field + COUNTS_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), "$size")))); break; } case sum: { From 57f2138ed35ed6d5237c239f4457b7040d62bcec Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 13 Dec 2024 02:08:24 +0000 Subject: [PATCH 22/51] mongodb: aggregation test 2. To be reverted. --- .../commons/datastore/mongodb/MongoDBQueryUtils.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 47065bd94..fbb529e08 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -782,7 +782,13 @@ private static Facet getMongoDBFacet(String field, Accumulator accumulator, List switch (accumulator) { case count: { // facet = new Facet(field + COUNTS_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); - facet = new Facet(field + COUNTS_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), "$size")))); + facet = new Facet(field + COUNTS_SUFFIX, + Arrays.asList(Aggregates.group( + id, + Accumulators.sum(count.name(), 1), + Accumulators.sum("size", "$size"), + Accumulators.avg("avg", "$size") + ))); break; } case sum: { From 3cca26fe25cd3e00d28baa23ecef3fea4782d001 Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 13 Dec 2024 11:29:16 +0000 Subject: [PATCH 23/51] mongodb: aggregation test 3. To be reverted. --- .../datastore/mongodb/MongoDBQueryUtils.java | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index fbb529e08..1d3e50f7f 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -781,15 +781,13 @@ private static Facet getMongoDBFacet(String field, Accumulator accumulator, List Facet facet; switch (accumulator) { case count: { -// facet = new Facet(field + COUNTS_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); - facet = new Facet(field + COUNTS_SUFFIX, - Arrays.asList(Aggregates.group( - id, - Accumulators.sum(count.name(), 1), - Accumulators.sum("size", "$size"), - Accumulators.avg("avg", "$size") - ))); - break; + facet = new Facet(field + COUNTS_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), "$size")))); +// facet = new Facet(field + COUNTS_SUFFIX, +// Arrays.asList(Aggregates.group( +// id, +// Accumulators.sum("size", "$size") +// ))); +// break; } case sum: { facet = new Facet(field + SUM_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.sum(sum.name(), id)))); From e177dd7f636d70a155761f69ca82e171f416e8f6 Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 13 Dec 2024 11:32:49 +0000 Subject: [PATCH 24/51] mongodb: aggregation test 4. To be reverted. --- .../org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 1d3e50f7f..3e952acf5 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -787,7 +787,7 @@ private static Facet getMongoDBFacet(String field, Accumulator accumulator, List // id, // Accumulators.sum("size", "$size") // ))); -// break; + break; } case sum: { facet = new Facet(field + SUM_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.sum(sum.name(), id)))); From b15ed9aa9ba971057b28e0522d650af58e03b469 Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 13 Dec 2024 11:48:27 +0000 Subject: [PATCH 25/51] mongodb: revert all tests --- .../datastore/mongodb/MongoDBQueryUtils.java | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 3e952acf5..f81572e03 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -710,7 +710,8 @@ private static List createFacet(Bson query, List facetFields) { ); } else { Accumulator accumulator; - String field; + String groupField; + String accumulatorField = null; List boundaries = new ArrayList<>(); // 2. Facet with accumulators (count, avg, min, max,...) or range (bucket) @@ -718,7 +719,7 @@ private static List createFacet(Bson query, List facetFields) { if (matcher.matches()) { try { accumulator = Accumulator.valueOf(matcher.group(1)); - field = matcher.group(2); + groupField = matcher.group(2); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Invalid accumulator function '" + matcher.group(1) + "'. Valid accumulator" + " functions: " + StringUtils.join(Arrays.asList(count, sum, max, min, avg, stdDevPop, stdDevSamp), ", ")); @@ -732,7 +733,7 @@ private static List createFacet(Bson query, List facetFields) { Matcher matcher2 = RANGE_END_PATTERN.matcher(split[1]); if (matcher1.matches() && matcher2.matches()) { accumulator = bucket; - field = matcher1.group(1); + groupField = matcher1.group(1); double start = Double.parseDouble(matcher1.group(2)); double end = Double.parseDouble(matcher2.group(1)); double step = Double.parseDouble(matcher2.group(2)); @@ -747,16 +748,23 @@ private static List createFacet(Bson query, List facetFields) { } } else { // 4. Facet with count as default accumulator - accumulator = count; -// accumulator = Accumulators.sum(count.name(), 1); - field = facetField; + // TODO extract the right field + if (facetField.contains(":")) { + String[] split = facetField.split("[:\\(\\)]"); + accumulator = Accumulator.valueOf(split[1]); + accumulatorField = split[2]; + } else { + accumulator = count; + } + + groupField = facetField; } } - includeFields.add(field); + includeFields.add(groupField); // Get MongoDB facet - facet = getMongoDBFacet(field, accumulator, boundaries); + facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries); } // Add facet to the list of facets to be executed @@ -775,13 +783,14 @@ private static List createFacet(Bson query, List facetFields) { return Arrays.asList(match, project, aggregates); } - private static Facet getMongoDBFacet(String field, Accumulator accumulator, List boundaries) { - String id = "$" + field; + private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, String accumulatorField, List boundaries) { + String accumulatorId = StringUtils.isNotEmpty(accumulatorField) ? "$" + accumulatorField : "$" + groupField; Facet facet; switch (accumulator) { case count: { - facet = new Facet(field + COUNTS_SUFFIX, Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), "$size")))); + facet = new Facet(groupField + COUNTS_SUFFIX, + Aggregates.group("$" + groupField, Accumulators.sum(count.name(), accumulatorId))); // facet = new Facet(field + COUNTS_SUFFIX, // Arrays.asList(Aggregates.group( // id, @@ -790,33 +799,33 @@ private static Facet getMongoDBFacet(String field, Accumulator accumulator, List break; } case sum: { - facet = new Facet(field + SUM_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.sum(sum.name(), id)))); + facet = new Facet(groupField + SUM_SUFFIX, Aggregates.group(groupField, Accumulators.sum(sum.name(), accumulatorId))); break; } case avg: { - facet = new Facet(field + AVG_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.avg(avg.name(), id)))); + facet = new Facet(groupField + AVG_SUFFIX, Aggregates.group(groupField, Accumulators.avg(avg.name(), accumulatorId))); break; } case min: { - facet = new Facet(field + MIN_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.min(min.name(), id)))); + facet = new Facet(groupField + MIN_SUFFIX, Aggregates.group(groupField, Accumulators.min(min.name(), accumulatorId))); break; } case max: { - facet = new Facet(field + MAX_SUFFIX, Arrays.asList(Aggregates.group(field, Accumulators.max(max.name(), id)))); + facet = new Facet(groupField + MAX_SUFFIX, Aggregates.group(groupField, Accumulators.max(max.name(), accumulatorId))); break; } case stdDevPop: { - facet = new Facet(field + STD_DEV_POP_SUFFIX, Arrays.asList(Aggregates.group(field, - Accumulators.stdDevPop(stdDevPop.name(), id)))); + facet = new Facet(groupField + STD_DEV_POP_SUFFIX, Arrays.asList(Aggregates.group(groupField, + Accumulators.stdDevPop(stdDevPop.name(), accumulatorId)))); break; } case stdDevSamp: { - facet = new Facet(field + STD_DEV_SAMP_SUFFIX, Arrays.asList(Aggregates.group(field, - Accumulators.stdDevSamp(stdDevSamp.name(), id)))); + facet = new Facet(groupField + STD_DEV_SAMP_SUFFIX, Arrays.asList(Aggregates.group(groupField, + Accumulators.stdDevSamp(stdDevSamp.name(), accumulatorId)))); break; } case bucket: { - facet = new Facet(field + RANGES_SUFFIX, Aggregates.bucket(id, boundaries, + facet = new Facet(groupField + RANGES_SUFFIX, Aggregates.bucket(accumulatorId, boundaries, new BucketOptions() .defaultBucket(OTHER) .output(new BsonField(count.name(), new BsonDocument("$sum", new BsonInt32(1)))))); From a7c86e003d182d0bd617896ec9d3f31518542f67 Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 13 Dec 2024 11:59:59 +0000 Subject: [PATCH 26/51] mongodb: fix aggregation regex --- .../opencb/commons/datastore/mongodb/MongoDBQueryUtils.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index f81572e03..1ad7b7bcc 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -748,9 +748,8 @@ private static List createFacet(Bson query, List facetFields) { } } else { // 4. Facet with count as default accumulator - // TODO extract the right field if (facetField.contains(":")) { - String[] split = facetField.split("[:\\(\\)]"); + String[] split = facetField.split("[:()]"); accumulator = Accumulator.valueOf(split[1]); accumulatorField = split[2]; } else { From eb5b519d697f06117c83272382f35a2ffed39ca0 Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 13 Dec 2024 12:02:24 +0000 Subject: [PATCH 27/51] mongodb: aggregation style improvement --- .../commons/datastore/mongodb/MongoDBQueryUtils.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 1ad7b7bcc..a7ad9b80e 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -789,7 +789,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, switch (accumulator) { case count: { facet = new Facet(groupField + COUNTS_SUFFIX, - Aggregates.group("$" + groupField, Accumulators.sum(count.name(), accumulatorId))); + Aggregates.group("$" + groupField, Accumulators.sum(accumulator.name(), accumulatorId))); // facet = new Facet(field + COUNTS_SUFFIX, // Arrays.asList(Aggregates.group( // id, @@ -814,13 +814,13 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, break; } case stdDevPop: { - facet = new Facet(groupField + STD_DEV_POP_SUFFIX, Arrays.asList(Aggregates.group(groupField, - Accumulators.stdDevPop(stdDevPop.name(), accumulatorId)))); + facet = new Facet(groupField + STD_DEV_POP_SUFFIX, + Aggregates.group(groupField, Accumulators.stdDevPop(stdDevPop.name(), accumulatorId))); break; } case stdDevSamp: { - facet = new Facet(groupField + STD_DEV_SAMP_SUFFIX, Arrays.asList(Aggregates.group(groupField, - Accumulators.stdDevSamp(stdDevSamp.name(), accumulatorId)))); + facet = new Facet(groupField + STD_DEV_SAMP_SUFFIX, + Aggregates.group(groupField, Accumulators.stdDevSamp(stdDevSamp.name(), accumulatorId))); break; } case bucket: { From a073e84379de6ccfe547e00bd882ad1087f65889 Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 13 Dec 2024 13:09:44 +0000 Subject: [PATCH 28/51] mongodb: fix aggregation regex --- .../commons/datastore/mongodb/MongoDBQueryUtils.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index a7ad9b80e..bd5721081 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -783,7 +783,12 @@ private static List createFacet(Bson query, List facetFields) { } private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, String accumulatorField, List boundaries) { - String accumulatorId = StringUtils.isNotEmpty(accumulatorField) ? "$" + accumulatorField : "$" + groupField; + String accumulatorId; + if (accumulator == count) { + accumulatorId = StringUtils.isNotEmpty(accumulatorField) ? "$" + accumulatorField : "1"; + } else { + accumulatorId = StringUtils.isNotEmpty(accumulatorField) ? "$" + accumulatorField : "$" + groupField; + } Facet facet; switch (accumulator) { From 3f9386f3a9faea2ddafdf0541103dd87d384e3b1 Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 13 Dec 2024 15:38:25 +0000 Subject: [PATCH 29/51] mongodb: fix aggregation --- .../datastore/mongodb/MongoDBQueryUtils.java | 41 ++++++++----------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index bd5721081..89911f263 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -704,22 +704,21 @@ private static List createFacet(Bson query, List facetFields) { fields.append(field, "$" + field); includeFields.add(field); } - facet = new Facet( - facetField.replace(",", AND_SEPARATOR) + COUNTS_SUFFIX, + facet = new Facet(facetField.replace(",", AND_SEPARATOR) + COUNTS_SUFFIX, Aggregates.group(fields, Accumulators.sum(count.name(), 1)) ); } else { - Accumulator accumulator; String groupField; - String accumulatorField = null; + Accumulator accumulator; List boundaries = new ArrayList<>(); // 2. Facet with accumulators (count, avg, min, max,...) or range (bucket) Matcher matcher = FUNC_ACCUMULATOR_PATTERN.matcher(facetField); if (matcher.matches()) { try { - accumulator = Accumulator.valueOf(matcher.group(1)); groupField = matcher.group(2); + accumulator = Accumulator.valueOf(matcher.group(1)); + facet = MongoDBQueryUtils.getMongoDBFacet(groupField, accumulator, boundaries); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Invalid accumulator function '" + matcher.group(1) + "'. Valid accumulator" + " functions: " + StringUtils.join(Arrays.asList(count, sum, max, min, avg, stdDevPop, stdDevSamp), ", ")); @@ -740,6 +739,7 @@ private static List createFacet(Bson query, List facetFields) { for (double i = start; i <= end; i += step) { boundaries.add(i); } + facet = MongoDBQueryUtils.getMongoDBFacet(groupField, accumulator, boundaries); } else { throw new IllegalArgumentException(INVALID_FORMAT_MSG + facetField + RANGE_FORMAT_MSG); } @@ -750,20 +750,26 @@ private static List createFacet(Bson query, List facetFields) { // 4. Facet with count as default accumulator if (facetField.contains(":")) { String[] split = facetField.split("[:()]"); + groupField = split[0]; accumulator = Accumulator.valueOf(split[1]); - accumulatorField = split[2]; + String accumulatorField = split[2]; + + facet = new Facet(groupField + COUNTS_SUFFIX, + Aggregates.group("$" + groupField, Accumulators.sum(accumulator.name(), "$" + accumulatorField))); } else { + groupField = facetField; accumulator = count; + facet = new Facet(groupField + COUNTS_SUFFIX, + Aggregates.group("$" + groupField, Accumulators.sum(accumulator.name(), 1))); } - - groupField = facetField; +// facet = MongoDBQueryUtils.getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries); } } includeFields.add(groupField); // Get MongoDB facet - facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries); +// facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries); } // Add facet to the list of facets to be executed @@ -782,24 +788,13 @@ private static List createFacet(Bson query, List facetFields) { return Arrays.asList(match, project, aggregates); } - private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, String accumulatorField, List boundaries) { - String accumulatorId; - if (accumulator == count) { - accumulatorId = StringUtils.isNotEmpty(accumulatorField) ? "$" + accumulatorField : "1"; - } else { - accumulatorId = StringUtils.isNotEmpty(accumulatorField) ? "$" + accumulatorField : "$" + groupField; - } + private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, List boundaries) { + String accumulatorId = "$" + groupField; Facet facet; switch (accumulator) { case count: { - facet = new Facet(groupField + COUNTS_SUFFIX, - Aggregates.group("$" + groupField, Accumulators.sum(accumulator.name(), accumulatorId))); -// facet = new Facet(field + COUNTS_SUFFIX, -// Arrays.asList(Aggregates.group( -// id, -// Accumulators.sum("size", "$size") -// ))); + facet = new Facet(groupField + COUNTS_SUFFIX, Aggregates.group("$" + groupField, Accumulators.sum(count.name(), 1))); break; } case sum: { From 13b3e596d8a507d373898712567c1e9fddb7959a Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 13 Dec 2024 16:07:52 +0000 Subject: [PATCH 30/51] mongodb: fix aggregation parse --- ...MongoDBDocumentToFacetFieldsConverter.java | 114 +++++++++--------- .../datastore/mongodb/MongoDBQueryUtils.java | 2 +- 2 files changed, 60 insertions(+), 56 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index 2e00e6c48..2a80dfa56 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -33,6 +33,7 @@ public List convertToDataModelType(Document document) { bucketValue = (String) internalIdValue; } else if (internalIdValue instanceof Boolean || internalIdValue instanceof Integer + || internalIdValue instanceof Long || internalIdValue instanceof Double) { bucketValue = internalIdValue.toString(); } else if (internalIdValue instanceof Document) { @@ -43,65 +44,68 @@ public List convertToDataModelType(Document document) { } key = key.substring(0, key.length() - COUNTS_SUFFIX.length()); facets.add(new FacetField(key, total, buckets)); - } else if (key.endsWith(RANGES_SUFFIX)) { - List facetFieldValues = new ArrayList<>(); - Number start = null; - Number end = null; - Number step = null; - Double other = null; - for (Document value : documentValues) { - if (value.get(INTERNAL_ID) instanceof String && OTHER.equals(value.getString(INTERNAL_ID))) { - other = 1.0d * value.getInteger(count.name()); - } else { - Double range = value.getDouble(INTERNAL_ID); - Integer counter = value.getInteger(count.name()); - facetFieldValues.add(1.0d * counter); - if (start == null) { - start = range; - } - end = range; - if (step == null && start != end) { - step = end.doubleValue() - start.doubleValue(); - } - } - } - key = key.substring(0, key.length() - RANGES_SUFFIX.length()).replace(GenericDocumentComplexConverter.TO_REPLACE_DOTS, "."); - if (other != null) { - key += " (counts out of range: " + other + ")"; - } - FacetField facetField = new FacetField(key, "range", facetFieldValues) - .setStart(start) - .setEnd(end) - .setStep(step); - facets.add(facetField); } else { - Document documentValue = ((List) entry.getValue()).get(0); - MongoDBQueryUtils.Accumulator accumulator = getAccumulator(documentValue); - switch (accumulator) { - case sum: - case avg: - case max: - case min: - case stdDevPop: - case stdDevSamp: { - List fieldValues = new ArrayList<>(); - if (documentValue.get(accumulator.name()) instanceof Integer) { - fieldValues.add(1.0d * documentValue.getInteger(accumulator.name())); - } else if (documentValue.get(accumulator.name()) instanceof Long) { - fieldValues.add(1.0d * documentValue.getLong(accumulator.name())); - } else if (documentValue.get(accumulator.name()) instanceof List) { - List list = (List) documentValue.get(accumulator.name()); - for (Number number : list) { - fieldValues.add(number.doubleValue()); - } + if (key.endsWith(RANGES_SUFFIX)) { + List facetFieldValues = new ArrayList<>(); + Number start = null; + Number end = null; + Number step = null; + Double other = null; + for (Document value : documentValues) { + if (value.get(INTERNAL_ID) instanceof String && OTHER.equals(value.getString(INTERNAL_ID))) { + other = 1.0d * value.getInteger(count.name()); } else { - fieldValues.add(documentValue.getDouble(accumulator.name())); + Double range = value.getDouble(INTERNAL_ID); + Integer counter = value.getInteger(count.name()); + facetFieldValues.add(1.0d * counter); + if (start == null) { + start = range; + } + end = range; + if (step == null && start != end) { + step = end.doubleValue() - start.doubleValue(); + } } - facets.add(new FacetField(documentValue.getString(INTERNAL_ID), accumulator.name(), fieldValues)); - break; } - default: { - // Do nothing, exception is raised + key = key.substring(0, + key.length() - RANGES_SUFFIX.length()).replace(GenericDocumentComplexConverter.TO_REPLACE_DOTS, "."); + if (other != null) { + key += " (counts out of range: " + other + ")"; + } + FacetField facetField = new FacetField(key, "range", facetFieldValues) + .setStart(start) + .setEnd(end) + .setStep(step); + facets.add(facetField); + } else { + Document documentValue = ((List) entry.getValue()).get(0); + MongoDBQueryUtils.Accumulator accumulator = getAccumulator(documentValue); + switch (accumulator) { + case sum: + case avg: + case max: + case min: + case stdDevPop: + case stdDevSamp: { + List fieldValues = new ArrayList<>(); + if (documentValue.get(accumulator.name()) instanceof Integer) { + fieldValues.add(1.0d * documentValue.getInteger(accumulator.name())); + } else if (documentValue.get(accumulator.name()) instanceof Long) { + fieldValues.add(1.0d * documentValue.getLong(accumulator.name())); + } else if (documentValue.get(accumulator.name()) instanceof List) { + List list = (List) documentValue.get(accumulator.name()); + for (Number number : list) { + fieldValues.add(number.doubleValue()); + } + } else { + fieldValues.add(documentValue.getDouble(accumulator.name())); + } + facets.add(new FacetField(documentValue.getString(INTERNAL_ID), accumulator.name(), fieldValues)); + break; + } + default: { + // Do nothing, exception is raised + } } } } diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 89911f263..93f82b2e7 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -755,7 +755,7 @@ private static List createFacet(Bson query, List facetFields) { String accumulatorField = split[2]; facet = new Facet(groupField + COUNTS_SUFFIX, - Aggregates.group("$" + groupField, Accumulators.sum(accumulator.name(), "$" + accumulatorField))); + Aggregates.group("$" + groupField, Accumulators.sum(count.name(), "$" + accumulatorField))); } else { groupField = facetField; accumulator = count; From dd3981272bbc48693b7e0bd763497b38018b5402 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 13 Dec 2024 19:49:40 +0100 Subject: [PATCH 31/51] datastore: implement the facet following the example:bioformat:sum(size), #TASK-7151, #TASK-7134 --- ...MongoDBDocumentToFacetFieldsConverter.java | 29 ++++++- .../datastore/mongodb/MongoDBQueryUtils.java | 75 ++++++++++++++----- .../mongodb/MongoDBCollectionTest.java | 59 +++++++++++++++ 3 files changed, 139 insertions(+), 24 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index 2e00e6c48..355c2bbfc 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -22,7 +22,7 @@ public List convertToDataModelType(Document document) { for (Map.Entry entry : document.entrySet()) { String key = entry.getKey(); List documentValues = (List) entry.getValue(); - if (key.endsWith(COUNTS_SUFFIX)) { + if (key.endsWith(COUNTS_SUFFIX) || key.endsWith(FACET_ACC_SUFFIX)) { List buckets = new ArrayList<>(documentValues.size()); long total = 0; for (Document documentValue : documentValues) { @@ -38,10 +38,31 @@ public List convertToDataModelType(Document document) { } else if (internalIdValue instanceof Document) { bucketValue = StringUtils.join(((Document) internalIdValue).values(), AND_SEPARATOR); } - buckets.add(new FacetField.Bucket(bucketValue, counter, null)); + + List bucketFacetFields = null; + if (key.endsWith(FACET_ACC_SUFFIX)) { + String[] split = key.split(SEPARATOR); + String name = split[2]; + String aggregationName = split[1]; + Double value; + if (documentValue.get(aggregationName) instanceof Integer) { + value = 1.0d * documentValue.getInteger(aggregationName); + } else if (documentValue.get(aggregationName) instanceof Long) { + value = 1.0d * documentValue.getLong(aggregationName); + } else { + value = documentValue.getDouble(aggregationName); + } + List aggregationValues = Collections.singletonList(value); + FacetField facetField = new FacetField(name, aggregationName, aggregationValues); + // Perhaps it’s redundant, as it is also set in the bucket + facetField.setCount(counter); + bucketFacetFields = Collections.singletonList(facetField); + } + + buckets.add(new FacetField.Bucket(bucketValue, counter, bucketFacetFields)); total += counter; } - key = key.substring(0, key.length() - COUNTS_SUFFIX.length()); + key = key.split(SEPARATOR)[0]; facets.add(new FacetField(key, total, buckets)); } else if (key.endsWith(RANGES_SUFFIX)) { List facetFieldValues = new ArrayList<>(); @@ -65,7 +86,7 @@ public List convertToDataModelType(Document document) { } } } - key = key.substring(0, key.length() - RANGES_SUFFIX.length()).replace(GenericDocumentComplexConverter.TO_REPLACE_DOTS, "."); + key = key.split(SEPARATOR)[0].replace(GenericDocumentComplexConverter.TO_REPLACE_DOTS, "."); if (other != null) { key += " (counts out of range: " + other + ")"; } diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index f81572e03..0b204dde5 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -62,6 +62,7 @@ public class MongoDBQueryUtils { public static final String AND_SEPARATOR = "_and_"; public static final String OTHER = "Other"; + public static final String FACET_ACC_SUFFIX = "Acc"; public static final String COUNTS_SUFFIX = "Counts"; public static final String SUM_SUFFIX = "Sum"; public static final String AVG_SUFFIX = "Avg"; @@ -70,6 +71,7 @@ public class MongoDBQueryUtils { public static final String STD_DEV_POP_SUFFIX = "StdDevPop"; public static final String STD_DEV_SAMP_SUFFIX = "stdDevSamp"; public static final String RANGES_SUFFIX = "Ranges"; + public static final String SEPARATOR = "___"; // TODO: Added on 10/08/2021 to deprecate STARTS_WITH and ENDS_WITH regex. They need to be done within '/'. @Deprecated @@ -748,20 +750,22 @@ private static List createFacet(Bson query, List facetFields) { } } else { // 4. Facet with count as default accumulator - // TODO extract the right field if (facetField.contains(":")) { String[] split = facetField.split("[:\\(\\)]"); + groupField = split[0]; accumulator = Accumulator.valueOf(split[1]); accumulatorField = split[2]; } else { + groupField = facetField; accumulator = count; } - - groupField = facetField; } } includeFields.add(groupField); + if (StringUtils.isNotEmpty(accumulatorField)) { + includeFields.add(accumulatorField); + } // Get MongoDB facet facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries); @@ -784,48 +788,78 @@ private static List createFacet(Bson query, List facetFields) { } private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, String accumulatorField, List boundaries) { - String accumulatorId = StringUtils.isNotEmpty(accumulatorField) ? "$" + accumulatorField : "$" + groupField; + String groupFieldId = groupField; + String accumulatorId = "$" + groupField; + String facetName = null; + if (StringUtils.isNotEmpty(accumulatorField)) { + groupFieldId = "$" + groupField; + accumulatorId = "$" + accumulatorField; + facetName = groupField + SEPARATOR + accumulator + SEPARATOR + accumulatorField + SEPARATOR + FACET_ACC_SUFFIX; + } +// String accumulatorId = StringUtils.isNotEmpty(accumulatorField) ? "$" + accumulatorField : "$" + groupField; Facet facet; switch (accumulator) { case count: { - facet = new Facet(groupField + COUNTS_SUFFIX, - Aggregates.group("$" + groupField, Accumulators.sum(count.name(), accumulatorId))); -// facet = new Facet(field + COUNTS_SUFFIX, -// Arrays.asList(Aggregates.group( -// id, -// Accumulators.sum("size", "$size") -// ))); + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + COUNTS_SUFFIX; + } + facet = new Facet(facetName, Aggregates.group("$" + groupFieldId, Accumulators.sum(count.name(), 1))); break; } case sum: { - facet = new Facet(groupField + SUM_SUFFIX, Aggregates.group(groupField, Accumulators.sum(sum.name(), accumulatorId))); + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + SUM_SUFFIX; + } + facet = new Facet(facetName, Aggregates.group(groupFieldId, + Arrays.asList(Accumulators.sum(sum.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } case avg: { - facet = new Facet(groupField + AVG_SUFFIX, Aggregates.group(groupField, Accumulators.avg(avg.name(), accumulatorId))); + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + AVG_SUFFIX; + } + facet = new Facet(facetName, Aggregates.group(groupFieldId, + Arrays.asList(Accumulators.avg(avg.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } case min: { - facet = new Facet(groupField + MIN_SUFFIX, Aggregates.group(groupField, Accumulators.min(min.name(), accumulatorId))); + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + MIN_SUFFIX; + } + facet = new Facet(facetName, Aggregates.group(groupFieldId, + Arrays.asList(Accumulators.min(min.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } case max: { - facet = new Facet(groupField + MAX_SUFFIX, Aggregates.group(groupField, Accumulators.max(max.name(), accumulatorId))); + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + MAX_SUFFIX; + } + facet = new Facet(facetName, Aggregates.group(groupFieldId, + Arrays.asList(Accumulators.max(max.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } case stdDevPop: { - facet = new Facet(groupField + STD_DEV_POP_SUFFIX, Arrays.asList(Aggregates.group(groupField, - Accumulators.stdDevPop(stdDevPop.name(), accumulatorId)))); + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + STD_DEV_POP_SUFFIX; + } + facet = new Facet(facetName, Aggregates.group(groupFieldId, + Arrays.asList(Accumulators.stdDevPop(stdDevPop.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } case stdDevSamp: { - facet = new Facet(groupField + STD_DEV_SAMP_SUFFIX, Arrays.asList(Aggregates.group(groupField, - Accumulators.stdDevSamp(stdDevSamp.name(), accumulatorId)))); + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + STD_DEV_SAMP_SUFFIX; + } + facet = new Facet(facetName, Aggregates.group(groupFieldId, + Arrays.asList(Accumulators.stdDevSamp(stdDevSamp.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } case bucket: { - facet = new Facet(groupField + RANGES_SUFFIX, Aggregates.bucket(accumulatorId, boundaries, + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + RANGES_SUFFIX; + } + facet = new Facet(facetName, Aggregates.bucket(accumulatorId, boundaries, new BucketOptions() .defaultBucket(OTHER) .output(new BsonField(count.name(), new BsonDocument("$sum", new BsonInt32(1)))))); @@ -836,6 +870,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, break; } } + return facet; } diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index cf0608c7a..0ce77cd8a 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -793,6 +793,65 @@ public void testFacetSumAccumulator() { } } + @Test + public void testFacetGroupSumAccumulator() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + int totalCount = 0; + String groupFieldName = "name"; + String accumulatorFieldName = "age"; + Map numberPerNames = new HashMap<>(); + Map countPerNames = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + String name = result.getString(groupFieldName); + if (!numberPerNames.containsKey(name)) { + numberPerNames.put(name, 0); + countPerNames.put(name, 0); + } + numberPerNames.put(name, result.getInteger(accumulatorFieldName) + numberPerNames.get(name)); + countPerNames.put(name, 1 + countPerNames.get(name)); + } + + for (Map.Entry entry : numberPerNames.entrySet()) { + System.out.println(entry.getKey() + " --> " + entry.getValue() + ", count = " + countPerNames.get(entry.getKey())); + totalCount += countPerNames.get(entry.getKey()); + } + System.out.println("totalCount = " + totalCount); + + String acc = "sum"; // "count"; // "avg"; + String facet = groupFieldName + ":" + acc + "(" + accumulatorFieldName + ")"; + List facets = MongoDBQueryUtils.createFacet(match, facet); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate = " + aggregate); + Assert.assertEquals(1, aggregate.getResults().size()); + FacetField facetField = aggregate.getResults().get(0).get(0); + Assert.assertEquals(groupFieldName, facetField.getName()); + Assert.assertEquals(totalCount, facetField.getCount(), 0.001); + Assert.assertEquals(numberPerNames.size(), facetField.getBuckets().size(), 0.001); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + Assert.assertTrue(countPerNames.containsKey(bucket.getValue())); + Assert.assertEquals(countPerNames.get(bucket.getValue()), bucket.getCount(), 0.001); + Assert.assertEquals(1, bucket.getFacetFields().size()); + Assert.assertEquals(accumulatorFieldName, bucket.getFacetFields().get(0).getName()); + Assert.assertEquals(acc, bucket.getFacetFields().get(0).getAggregationName()); + Assert.assertEquals(numberPerNames.get(bucket.getValue()), bucket.getFacetFields().get(0).getAggregationValues().get(0), 0.001); + } + +// for (List result : aggregate.getResults()) { +// Assert.assertEquals(1, result.size()); +// for (FacetField facetField : result) { +// Assert.assertTrue(facetField.getCount() == null); +// Assert.assertEquals(Accumulator.avg.name(), facetField.getAggregationName()); +// Assert.assertEquals(avg, facetField.getAggregationValues().get(0), 0.5); +//// for (int i = 0; i < facetField.getAggregationValues().size() ; i++) { +//// Assert.assertEquals(maxValues.get(i), facetField.getAggregationValues().get(i), 0.0001); +//// } +// } +// } + } + + @Test(expected = IllegalArgumentException.class) public void testFacetInvalidAccumulator() { Document match = new Document("age", new BasicDBObject("$gt", 2)); From ac66d666011b468897f3172ca3d24020ec38d3dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Sat, 14 Dec 2024 19:38:51 +0100 Subject: [PATCH 32/51] datastore: fix facet 'format:count(size)' to behaviour as 'count(format)', #TASK-7151, #TASK-7134 --- .../opencb/commons/datastore/mongodb/MongoDBQueryUtils.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 203fc19bf..b8a0003c1 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -800,10 +800,8 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, Facet facet; switch (accumulator) { case count: { - if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + COUNTS_SUFFIX; - } - facet = new Facet(facetName, Aggregates.group("$" + groupFieldId, Accumulators.sum(count.name(), 1))); + facetName = groupField + SEPARATOR + COUNTS_SUFFIX; + facet = new Facet(facetName, Aggregates.group("$" + groupField, Accumulators.sum(count.name(), 1))); break; } case sum: { From 421d5cea5c64e79f8df68d91b9eb2d84908fbccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 16 Jan 2025 16:52:42 +0100 Subject: [PATCH 33/51] datastore: improve MongoDB facets for arrays by using unwind, #TASK-7151, #TASK-7134 --- .../commons/datastore/core/FacetField.java | 7 + ...MongoDBDocumentToFacetFieldsConverter.java | 6 +- .../datastore/mongodb/MongoDBQueryUtils.java | 40 +++- .../mongodb/MongoDBCollectionTest.java | 211 +++++++++++++++++- 4 files changed, 242 insertions(+), 22 deletions(-) diff --git a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java index 8beecd0d8..8f1251364 100644 --- a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java +++ b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java @@ -37,6 +37,13 @@ public FacetField(String name, long count, List buckets) { this.buckets = buckets; } + public FacetField(String name, long count, String aggregationName, List aggregationValues) { + this.name = name; + this.count = count; + this.aggregationName = aggregationName; + this.aggregationValues = aggregationValues; + } + public FacetField(String name, String aggregationName, List aggregationValues) { this.name = name; this.aggregationName = aggregationName; diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index 3da563f88..986ee990c 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -152,7 +152,11 @@ public List convertToDataModelType(Document document) { } else { fieldValues.add(documentValue.getDouble(accumulator.name())); } - facets.add(new FacetField(documentValue.getString(INTERNAL_ID), accumulator.name(), fieldValues)); + long count = 0; + if (documentValue.containsKey("count")) { + count = Long.valueOf(documentValue.getInteger("count")); + } + facets.add(new FacetField(documentValue.getString(INTERNAL_ID), count, accumulator.name(), fieldValues)); break; } default: { diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index b8a0003c1..1b166f4c6 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -692,6 +692,7 @@ public static List createFacet(Bson query, String facetField) { private static List createFacet(Bson query, List facetFields) { List facetList = new ArrayList<>(); Set includeFields = new HashSet<>(); + List unwindList = new ArrayList<>(); // For each facet field passed we will create a MongoDB facet, thre are 4 types of facets: // 1. Facet combining fields with commas. In this case, only 'count' is supported as accumulator. @@ -739,8 +740,11 @@ private static List createFacet(Bson query, List facetFields) { double start = Double.parseDouble(matcher1.group(2)); double end = Double.parseDouble(matcher2.group(1)); double step = Double.parseDouble(matcher2.group(2)); - for (double i = start; i <= end; i += step) { - boundaries.add(i); + int numSections = (int) Math.ceil((end - start + 1) / step); + double boundary = start; + for (int i = 0; i < numSections + 1; i++) { + boundaries.add(boundary); + boundary += step; } } else { throw new IllegalArgumentException(INVALID_FORMAT_MSG + facetField + RANGE_FORMAT_MSG); @@ -769,6 +773,17 @@ private static List createFacet(Bson query, List facetFields) { // Get MongoDB facet facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries); + + // Unwind in any case + String[] split = groupField.split("\\."); + String acc = ""; + for (String s : split) { + if (!StringUtils.isEmpty(acc)) { + acc += "."; + } + acc += s; + unwindList.add(Aggregates.unwind("$" + acc)); + } } // Add facet to the list of facets to be executed @@ -777,14 +792,19 @@ private static List createFacet(Bson query, List facetFields) { } } - // Build MongoDB pipeline for facets - Bson match = Aggregates.match(query); - Bson project = Aggregates.project(Projections.include(new ArrayList<>(includeFields))); - // Dot notation management for facets - Document aggregates = GenericDocumentComplexConverter - .replaceDots(Document.parse(Aggregates.facet(facetList).toBsonDocument().toJson())); - - return Arrays.asList(match, project, aggregates); + // Build and return the MongoDB pipeline for facets: match, project, [unwind,] aggregates + List result = new ArrayList<>(); + // 1 - Match + result.add(Aggregates.match(query)); + // 2 - Project + result.add(Aggregates.project(Projections.include(new ArrayList<>(includeFields)))); + // 3 - Unwind + if (!unwindList.isEmpty()) { + result.addAll(unwindList); + } + // 4 - Aggregates (dot notation management for facets) + result.add(GenericDocumentComplexConverter.replaceDots(Document.parse(Aggregates.facet(facetList).toBsonDocument().toJson()))); + return result; } private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, String accumulatorField, List boundaries) { diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 1a5af4be6..d0e70a605 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -156,7 +156,7 @@ private static MongoDBCollection createTestCollection(String test, int size) { house.put("numRooms", (int) (i % 7) + 1); house.put("m2", (int) i * 23); document.put("house", house); - int numDogs = random.nextInt(3); + int numDogs = random.nextInt(5); List dogs = new ArrayList<>(); for (int j = 0 ; j < numDogs; j++) { Document dog = new Document(); @@ -166,6 +166,7 @@ private static MongoDBCollection createTestCollection(String test, int size) { } document.put("dogs", dogs); mongoDBCollection.nativeQuery().insert(document, null); + System.out.println("document.toJson() = " + document.toJson()); } return mongoDBCollection; } @@ -625,6 +626,182 @@ public void testFacetBucketsDotNotation() { } } + @Test + public void testFacetCountBucketsArray() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "dogs.color"; + List facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + for (List facetFieldList : aggregate.getResults()) { + System.out.println("facetFieldList = " + facetFieldList); + } + + String value; + long totalCount = 0; + Map map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + List dogs = (List) result.get("dogs"); + for (Document dog : dogs) { + totalCount++; + String color = dog.getString("color"); + if (StringUtils.isEmpty(color)) { + color = EMPTY; + map.put(color, 0); + } else if (!map.containsKey(color)) { + map.put(color, 0); + } + map.put(color, 1 + map.get(color)); + } + } + + for (List result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount().longValue()); + Assert.assertEquals(map.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + } + Assert.assertEquals(map.get(value).longValue(), bucket.getCount()); + } + } + } + } + + @Test + public void testFacetAvgBucketsArray() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "avg(dogs.age)"; + List facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + for (List facetFieldList : aggregate.getResults()) { + System.out.println("facetFieldList = " + facetFieldList); + } + + int counter = 0; + int acc = 0; + for (Document doc : matchedResults.getResults()) { + List dogs = (List) doc.get("dogs"); + for (Document dog : dogs) { + counter++; + acc += (int) dog.get("age"); + } + } + System.out.println("counter = " + counter); + System.out.println("(acc/counter) = " + (1.0d * acc / counter)); + Assert.assertEquals(aggregate.getResults().get(0).get(0).getAggregationValues().get(0), 1.0d * acc / counter, 0.0001); + } + + @Test + public void testFacetFilterAccumulatorBucketsArray() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "dogs.color:avg(dogs.age)"; + List facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + for (List facetFieldList : aggregate.getResults()) { + System.out.println("facetFieldList = " + facetFieldList); + } + + String value; + long totalCount = 0; + Map counterMap = new HashMap<>(); + Map accMap = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + List dogs = (List) result.get("dogs"); + for (Document dog : dogs) { + totalCount++; + String color = dog.getString("color"); + int age = (int) dog.get("age"); + if (StringUtils.isEmpty(color)) { + color = EMPTY; + counterMap.put(color, 0); + accMap.put(color, 0); + } else if (!counterMap.containsKey(color)) { + counterMap.put(color, 0); + accMap.put(color, 0); + } + counterMap.put(color, 1 + counterMap.get(color)); + accMap.put(color, age + accMap.get(color)); + } + } + + for (List result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount().longValue()); + Assert.assertEquals(counterMap.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + } + Assert.assertEquals(counterMap.get(value).longValue(), bucket.getCount()); + Assert.assertEquals(counterMap.get(value).longValue(), bucket.getFacetFields().get(0).getCount().longValue()); + Assert.assertEquals("avg", bucket.getFacetFields().get(0).getAggregationName()); + Assert.assertEquals(1.0 * accMap.get(value) / counterMap.get(value), bucket.getFacetFields().get(0).getAggregationValues().get(0), 0.0001); + } + } + } + } + + @Test + public void testFacetRangeArray() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + int start = 1; + int end = 20; + int step = 5; + String fieldName = "dogs.age" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; + List facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + for (List facetFieldList : aggregate.getResults()) { + System.out.println("facetFieldList = " + facetFieldList); + } + + long outOfRange = 0; + List rangeValues = new ArrayList<>(Arrays.asList(0d, 0d, 0d, 0d)); + + Map map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + int bucketNum; + List dogs = (List) result.get("dogs"); + for (Document dog : dogs) { + int value = (int) dog.get("age"); + if (value < start || value > end) { + outOfRange++; + } else { + bucketNum = (int) (value - start) / step; + rangeValues.set(bucketNum, 1 + rangeValues.get(bucketNum)); + } + } + } + for (List result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertTrue(facetField.getCount() == null); + Assert.assertTrue(facetField.getName().contains("" + (1.0d * outOfRange))); + for (int i = 0; i < facetField.getAggregationValues().size(); i++) { + Assert.assertEquals(rangeValues.get(i), facetField.getAggregationValues().get(i)); + } + } + } + } + @Test public void testFacetMax() { Document match = new Document("age", new BasicDBObject("$gt", 2)); @@ -635,10 +812,12 @@ public void testFacetMax() { MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + long totalCount = 0; double maxValue = 0; Map map = new HashMap<>(); for (Document result : matchedResults.getResults()) { Long value = result.getLong(fieldName); + totalCount++; if (value != null) { if (value > maxValue) { maxValue = value; @@ -648,7 +827,7 @@ public void testFacetMax() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertTrue(facetField.getCount() == null); + Assert.assertEquals(totalCount, facetField.getCount().longValue()); Assert.assertEquals(max.name(), facetField.getAggregationName()); Assert.assertEquals(maxValue, facetField.getAggregationValues().get(0), 0.0001); } @@ -665,10 +844,12 @@ public void testFacetMin() { MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + long count = 0; double minValue = Double.MAX_VALUE; Map map = new HashMap<>(); for (Document result : matchedResults.getResults()) { Long value = result.getLong(fieldName); + count++; if (value != null) { if (value < minValue) { minValue = value; @@ -678,7 +859,7 @@ public void testFacetMin() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertTrue(facetField.getCount() == null); + Assert.assertEquals(count, facetField.getCount().longValue()); Assert.assertEquals(min.name(), facetField.getAggregationName()); Assert.assertEquals(minValue, facetField.getAggregationValues().get(0), 0.0001); } @@ -708,7 +889,7 @@ public void testFacetAvg() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertTrue(facetField.getCount() == null); + Assert.assertEquals(totalCount, facetField.getCount().longValue()); Assert.assertEquals(avg.name(), facetField.getAggregationName()); Assert.assertEquals(totalSum / totalCount, facetField.getAggregationValues().get(0), 0.0001); } @@ -727,6 +908,7 @@ public void testFacetMaxDotNotationAndList() { DataResult aggregate2 = mongoDBCollection.aggregate(facets, null); + int count = 0; List maxValues = new ArrayList<>(Arrays.asList(0D,0D,0D,0D,0D,0D,0D,0D,0D,0D,0D,0D)); for (Document result : matchedResults.getResults()) { List dogs = (List) result.get("dogs"); @@ -734,6 +916,7 @@ public void testFacetMaxDotNotationAndList() { System.out.println(); for (int i = 0; i < dogs.size(); i++) { Number value = (Number) dogs.get(i).get("age"); + count++; System.out.print("age = " + result.getInteger("age") + "; i = " + i + "; value = " + value + "; "); if (value.doubleValue() > maxValues.get(i)) { maxValues.set(i, value.doubleValue()); @@ -744,7 +927,7 @@ public void testFacetMaxDotNotationAndList() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertTrue(facetField.getCount() == null); + Assert.assertEquals(count, facetField.getCount().longValue()); Assert.assertEquals(max.name(), facetField.getAggregationName()); // for (int i = 0; i < facetField.getAggregationValues().size() ; i++) { // Assert.assertEquals(maxValues.get(i), facetField.getAggregationValues().get(i), 0.0001); @@ -758,9 +941,11 @@ public void testFacetSumAccumulator() { Document match = new Document("age", new BasicDBObject("$gt", 2)); DataResult matchedResults = mongoDBCollection.find(match, null); int total = 0; + int count = 0; String fieldName = "number"; for (Document result : matchedResults.getResults()) { System.out.println("result = " + result); + count++; total += result.getLong(fieldName); } double avg = total / matchedResults.getNumResults(); @@ -771,7 +956,7 @@ public void testFacetSumAccumulator() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertTrue(facetField.getCount() == null); + Assert.assertEquals(count, facetField.getCount().longValue()); Assert.assertEquals(Accumulator.avg.name(), facetField.getAggregationName()); Assert.assertEquals(avg, facetField.getAggregationValues().get(0), 0.5); // for (int i = 0; i < facetField.getAggregationValues().size() ; i++) { @@ -786,7 +971,7 @@ public void testFacetSumAccumulator() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertTrue(facetField.getCount() == null); + Assert.assertEquals(count, facetField.getCount().longValue()); Assert.assertEquals(Accumulator.sum.name(), facetField.getAggregationName()); Assert.assertEquals(total, facetField.getAggregationValues().get(0), 0.0001); } @@ -912,25 +1097,29 @@ public void testFacetRange() { int step = 1000; String fieldName = "number" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; List facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.first() = " + aggregate.first()); long outOfRange = 0; - List rangeValues = new ArrayList<>(Arrays.asList(0d, 0d, 0d, 0d)); + List rangeValues = new ArrayList<>(Arrays.asList(0d, 0d, 0d, 0d, 0d)); Map map = new HashMap<>(); for (Document result : matchedResults.getResults()) { int bucketNum; Long value = result.getLong("number"); if (value != null) { - if (value < start || value > end) { + bucketNum = (int) (value - start) / step; + int numSections = (int) Math.ceil((end - start + 1) / step); + if (value < start || bucketNum > numSections) { outOfRange++; } else { - bucketNum = (int) (value - start) / step; - rangeValues.set(bucketNum, 1 + rangeValues.get(bucketNum)); + rangeValues.set(bucketNum, 1 + rangeValues.get(bucketNum)); } } } + System.out.println("rangeValues.toString() = " + rangeValues.toString()); for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { From 1184be36ed2c5646c2ed12f16724aebb8247ba31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 17 Jan 2025 13:55:20 +0100 Subject: [PATCH 34/51] datastore: fix MongoDB facet parser, #TASK-7151, #TASK-7134 --- .../datastore/mongodb/MongoDBQueryUtils.java | 34 +++++++++++++---- .../mongodb/MongoDBCollectionTest.java | 38 +++++++++++++++++++ 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 1b166f4c6..eb8ee37ab 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -775,14 +775,18 @@ private static List createFacet(Bson query, List facetFields) { facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries); // Unwind in any case - String[] split = groupField.split("\\."); - String acc = ""; - for (String s : split) { - if (!StringUtils.isEmpty(acc)) { - acc += "."; - } - acc += s; - unwindList.add(Aggregates.unwind("$" + acc)); + Set unwindFields = new HashSet<>(); + if (StringUtils.isNotEmpty(groupField)) { + unwindFields.addAll(getUnwindFields(groupField)); + } + if (StringUtils.isNotEmpty(accumulatorField)) { + unwindFields.addAll(getUnwindFields(accumulatorField)); + } + // We must order the "unwind" fields + List unwindFieldList = new ArrayList<>(unwindFields); + unwindFieldList.sort(Comparator.comparingInt(s -> s.length() - s.replace(".", "").length())); + for (String unwindField : unwindFieldList) { + unwindList.add(Aggregates.unwind("$" + unwindField)); } } @@ -807,6 +811,20 @@ private static List createFacet(Bson query, List facetFields) { return result; } + private static Collection getUnwindFields(String field) { + List unwindFields = new ArrayList<>(); + String[] split = field.split("\\."); + String acc = ""; + for (String s : split) { + if (!StringUtils.isEmpty(acc)) { + acc += "."; + } + acc += s; + unwindFields.add(acc); + } + return unwindFields; + } + private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, String accumulatorField, List boundaries) { String groupFieldId = groupField; String accumulatorId = "$" + groupField; diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index d0e70a605..3e9d03ac0 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -112,12 +112,14 @@ public String toString() { public static class Dog { public int age; + public List years; public String color; @Override public String toString() { final StringBuilder sb = new StringBuilder("Dog{"); sb.append("age=").append(age); + sb.append("years=").append(years); sb.append("color=").append(color); sb.append('}'); return sb.toString(); @@ -161,6 +163,14 @@ private static MongoDBCollection createTestCollection(String test, int size) { for (int j = 0 ; j < numDogs; j++) { Document dog = new Document(); dog.put("age", random.nextInt(20)); + int numYears = random.nextInt(3); + List years = new ArrayList<>(); + for (int k = 0 ; k < numYears; k++) { + years.add(random.nextInt(100) + 1900); + } + if (years.size() > 1) { + dog.put("years", years); + } dog.put("color", COLORS.get(random.nextInt(COLORS.size()))); dogs.add(dog); } @@ -701,6 +711,34 @@ public void testFacetAvgBucketsArray() { Assert.assertEquals(aggregate.getResults().get(0).get(0).getAggregationValues().get(0), 1.0d * acc / counter, 0.0001); } +// @Test +// public void testFacetAccumulatorMaxBucketsArray() { +// Document match = new Document("age", new BasicDBObject("$gt", 2)); +// DataResult matchedResults = mongoDBCollection.find(match, null); +// +// String fieldName = "dogs.color:max(dogs.years)"; +// List facets = MongoDBQueryUtils.createFacet(match, fieldName); +// System.out.println("facets = " + facets); +// MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); +// DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); +// for (List facetFieldList : aggregate.getResults()) { +// System.out.println("facetFieldList = " + facetFieldList); +// } +// +// int counter = 0; +// int acc = 0; +// for (Document doc : matchedResults.getResults()) { +// List dogs = (List) doc.get("dogs"); +// for (Document dog : dogs) { +// counter++; +// acc += (int) dog.get("age"); +// } +// } +// System.out.println("counter = " + counter); +// System.out.println("(acc/counter) = " + (1.0d * acc / counter)); +// Assert.assertEquals(aggregate.getResults().get(0).get(0).getAggregationValues().get(0), 1.0d * acc / counter, 0.0001); +// } +// @Test public void testFacetFilterAccumulatorBucketsArray() { Document match = new Document("age", new BasicDBObject("$gt", 2)); From 7255b4238785148c2c699308acce4816365269cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 20 Jan 2025 08:37:41 +0100 Subject: [PATCH 35/51] datastore: fix the converter by replacing '.' by '.' in the facet field name results, #TASK-7151, #TASK-7134 --- ...MongoDBDocumentToFacetFieldsConverter.java | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index 986ee990c..aa0f52174 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -7,6 +7,7 @@ import java.util.*; +import static org.opencb.commons.datastore.mongodb.GenericDocumentComplexConverter.TO_REPLACE_DOTS; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; @@ -18,6 +19,7 @@ public List convertToDataModelType(Document document) { return Collections.emptyList(); } + String facetFieldName; List facets = new ArrayList<>(); for (Map.Entry entry : document.entrySet()) { String key = entry.getKey(); @@ -54,7 +56,7 @@ public List convertToDataModelType(Document document) { value = documentValue.getDouble(aggregationName); } List aggregationValues = Collections.singletonList(value); - FacetField facetField = new FacetField(name, aggregationName, aggregationValues); + FacetField facetField = new FacetField(name.replace(TO_REPLACE_DOTS, "."), aggregationName, aggregationValues); // Perhaps it’s redundant, as it is also set in the bucket facetField.setCount(counter); bucketFacetFields = Collections.singletonList(facetField); @@ -63,8 +65,8 @@ public List convertToDataModelType(Document document) { buckets.add(new FacetField.Bucket(bucketValue, counter, bucketFacetFields)); total += counter; } - key = key.split(SEPARATOR)[0]; - facets.add(new FacetField(key, total, buckets)); + facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, "."); + facets.add(new FacetField(facetFieldName, total, buckets)); } else if (key.endsWith(RANGES_SUFFIX)) { List facetFieldValues = new ArrayList<>(); Number start = null; @@ -87,11 +89,11 @@ public List convertToDataModelType(Document document) { } } } - key = key.split(SEPARATOR)[0].replace(GenericDocumentComplexConverter.TO_REPLACE_DOTS, "."); + facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, "."); if (other != null) { - key += " (counts out of range: " + other + ")"; + facetFieldName += " (counts out of range: " + other + ")"; } - FacetField facetField = new FacetField(key, "range", facetFieldValues) + FacetField facetField = new FacetField(facetFieldName, "range", facetFieldValues) .setStart(start) .setEnd(end) .setStep(step); @@ -119,12 +121,11 @@ public List convertToDataModelType(Document document) { } } } - key = key.substring(0, - key.length() - RANGES_SUFFIX.length()).replace(GenericDocumentComplexConverter.TO_REPLACE_DOTS, "."); + facetFieldName = key.substring(0, key.length() - RANGES_SUFFIX.length()).replace(TO_REPLACE_DOTS, "."); if (other != null) { - key += " (counts out of range: " + other + ")"; + facetFieldName += " (counts out of range: " + other + ")"; } - FacetField facetField = new FacetField(key, "range", facetFieldValues) + FacetField facetField = new FacetField(facetFieldName, "range", facetFieldValues) .setStart(start) .setEnd(end) .setStep(step); @@ -156,7 +157,8 @@ public List convertToDataModelType(Document document) { if (documentValue.containsKey("count")) { count = Long.valueOf(documentValue.getInteger("count")); } - facets.add(new FacetField(documentValue.getString(INTERNAL_ID), count, accumulator.name(), fieldValues)); + facetFieldName = documentValue.getString(INTERNAL_ID).replace(TO_REPLACE_DOTS, "."); + facets.add(new FacetField(facetFieldName, count, accumulator.name(), fieldValues)); break; } default: { From 0d6f430e84c88fc56f4317765a5c9a96af6a5855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 30 Jan 2025 06:11:44 +0100 Subject: [PATCH 36/51] datastore: support facets for 'dates' in MongoDB, #TASK-7151, #TASK-7134 --- ...MongoDBDocumentToFacetFieldsConverter.java | 9 +- .../datastore/mongodb/MongoDBQueryUtils.java | 106 ++++++++++++++---- .../mongodb/MongoDBCollectionTest.java | 51 +++++++++ 3 files changed, 143 insertions(+), 23 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index aa0f52174..d7cb20a84 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -24,7 +24,8 @@ public List convertToDataModelType(Document document) { for (Map.Entry entry : document.entrySet()) { String key = entry.getKey(); List documentValues = (List) entry.getValue(); - if (key.endsWith(COUNTS_SUFFIX) || key.endsWith(FACET_ACC_SUFFIX)) { + if (key.endsWith(COUNTS_SUFFIX) || key.endsWith(FACET_ACC_SUFFIX) || key.endsWith(YEAR_SUFFIX) || key.endsWith(MONTH_SUFFIX) + || key.endsWith(DAY_SUFFIX)) { List buckets = new ArrayList<>(documentValues.size()); long total = 0; for (Document documentValue : documentValues) { @@ -66,7 +67,11 @@ public List convertToDataModelType(Document document) { total += counter; } facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, "."); - facets.add(new FacetField(facetFieldName, total, buckets)); + FacetField facetField = new FacetField(facetFieldName, total, buckets); + if (key.endsWith(YEAR_SUFFIX) || key.endsWith(MONTH_SUFFIX) || key.endsWith(DAY_SUFFIX)) { + facetField.setAggregationName(key.split(SEPARATOR)[1].toLowerCase(Locale.ROOT)); + } + facets.add(facetField); } else if (key.endsWith(RANGES_SUFFIX)) { List facetFieldValues = new ArrayList<>(); Number start = null; diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index eb8ee37ab..70a37e0bd 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -33,6 +33,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import static com.mongodb.client.model.Aggregates.*; +import static com.mongodb.client.model.Projections.*; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; /** @@ -68,6 +70,9 @@ public class MongoDBQueryUtils { public static final String AVG_SUFFIX = "Avg"; public static final String MIN_SUFFIX = "Min"; public static final String MAX_SUFFIX = "Max"; + public static final String YEAR_SUFFIX = "Year"; + public static final String MONTH_SUFFIX = "Month"; + public static final String DAY_SUFFIX = "Day"; public static final String STD_DEV_POP_SUFFIX = "StdDevPop"; public static final String STD_DEV_SAMP_SUFFIX = "stdDevSamp"; public static final String RANGES_SUFFIX = "Ranges"; @@ -114,7 +119,10 @@ public enum Accumulator { max, stdDevPop, stdDevSamp, - bucket + bucket, + year, + month, + day } public static Bson createFilter(String mongoDbField, String queryParam, Query query) { @@ -635,12 +643,12 @@ public static List createGroupBy(Bson query, String groupByField, String i return createGroupBy(query, Arrays.asList(groupByField.split(",")), idField, count); } else { Bson match = Aggregates.match(query); - Bson project = Aggregates.project(Projections.include(groupByField, idField)); + Bson project = project(Projections.include(groupByField, idField)); Bson group; if (count) { - group = Aggregates.group("$" + groupByField, Accumulators.sum("count", 1)); + group = group("$" + groupByField, Accumulators.sum("count", 1)); } else { - group = Aggregates.group("$" + groupByField, Accumulators.addToSet("features", "$" + idField)); + group = group("$" + groupByField, Accumulators.addToSet("features", "$" + idField)); } return Arrays.asList(match, project, group); } @@ -660,7 +668,7 @@ public static List createGroupBy(Bson query, List groupByField, St // add all group-by fields to the projection together with the aggregation field name List groupByFields = new ArrayList<>(groupByField); groupByFields.add(idField); - Bson project = Aggregates.project(Projections.include(groupByFields)); + Bson project = project(Projections.include(groupByFields)); // _id document creation to have the multiple id Document id = new Document(); @@ -669,9 +677,9 @@ public static List createGroupBy(Bson query, List groupByField, St } Bson group; if (count) { - group = Aggregates.group(id, Accumulators.sum("count", 1)); + group = group(id, Accumulators.sum("count", 1)); } else { - group = Aggregates.group(id, Accumulators.addToSet("features", "$" + idField)); + group = group(id, Accumulators.addToSet("features", "$" + idField)); } return Arrays.asList(match, project, group); } @@ -693,6 +701,7 @@ private static List createFacet(Bson query, List facetFields) { List facetList = new ArrayList<>(); Set includeFields = new HashSet<>(); List unwindList = new ArrayList<>(); + List dateProjections = new ArrayList<>(); // For each facet field passed we will create a MongoDB facet, thre are 4 types of facets: // 1. Facet combining fields with commas. In this case, only 'count' is supported as accumulator. @@ -709,7 +718,7 @@ private static List createFacet(Bson query, List facetFields) { } facet = new Facet( facetField.replace(",", AND_SEPARATOR) + COUNTS_SUFFIX, - Aggregates.group(fields, Accumulators.sum(count.name(), 1)) + group(fields, Accumulators.sum(count.name(), 1)) ); } else { Accumulator accumulator; @@ -771,6 +780,27 @@ private static List createFacet(Bson query, List facetFields) { includeFields.add(accumulatorField); } + // Date management in format YYYYMMDDhhmmss: year, month, day + switch (accumulator) { + case year: { + dateProjections.add(computed(groupField + SEPARATOR + year.name(), new Document("$toInt", new Document("$substrCP", + Arrays.asList("$" + groupField, 0, 4))))); + break; + } + case month: { + dateProjections.add(computed(groupField + SEPARATOR + month.name(), new Document("$toInt", new Document("$substrCP", + Arrays.asList("$" + groupField, 4, 2))))); + break; + } + case day: { + dateProjections.add(computed(groupField + SEPARATOR + day.name(), new Document("$toInt", new Document("$substrCP", + Arrays.asList("$" + groupField, 6, 2))))); + break; + } + default: + break; + } + // Get MongoDB facet facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries); @@ -801,13 +831,26 @@ private static List createFacet(Bson query, List facetFields) { // 1 - Match result.add(Aggregates.match(query)); // 2 - Project - result.add(Aggregates.project(Projections.include(new ArrayList<>(includeFields)))); + + List projections = new ArrayList<>(); + + // 2.1 - Include fields + for (String field : includeFields) { + projections.add(include(field)); + } + + // 2.2 - Compute data fields + projections.addAll(dateProjections); + + result.add(project(fields(projections))); + // 3 - Unwind if (!unwindList.isEmpty()) { - result.addAll(unwindList); + result.addAll(unwindList); } + // 4 - Aggregates (dot notation management for facets) - result.add(GenericDocumentComplexConverter.replaceDots(Document.parse(Aggregates.facet(facetList).toBsonDocument().toJson()))); + result.add(GenericDocumentComplexConverter.replaceDots(Document.parse(facet(facetList).toBsonDocument().toJson()))); return result; } @@ -839,14 +882,35 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, switch (accumulator) { case count: { facetName = groupField + SEPARATOR + COUNTS_SUFFIX; - facet = new Facet(facetName, Aggregates.group("$" + groupField, Accumulators.sum(count.name(), 1))); + facet = new Facet(facetName, group("$" + groupField, Accumulators.sum(count.name(), 1))); + break; + } + case year: { + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + YEAR_SUFFIX; + } + facet = new Facet(facetName, group("$" + groupField + SEPARATOR + year.name(), Accumulators.sum(count.name(), 1))); + break; + } + case month: { + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + MONTH_SUFFIX; + } + facet = new Facet(facetName, group("$" + groupField + SEPARATOR + month.name(), Accumulators.sum(count.name(), 1))); + break; + } + case day: { + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + DAY_SUFFIX; + } + facet = new Facet(facetName, group("$" + groupField + SEPARATOR + day.name(), Accumulators.sum(count.name(), 1))); break; } case sum: { if (StringUtils.isEmpty(facetName)) { facetName = groupField + SEPARATOR + SUM_SUFFIX; } - facet = new Facet(facetName, Aggregates.group(groupFieldId, + facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.sum(sum.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } @@ -854,7 +918,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, if (StringUtils.isEmpty(facetName)) { facetName = groupField + SEPARATOR + AVG_SUFFIX; } - facet = new Facet(facetName, Aggregates.group(groupFieldId, + facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.avg(avg.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } @@ -862,7 +926,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, if (StringUtils.isEmpty(facetName)) { facetName = groupField + SEPARATOR + MIN_SUFFIX; } - facet = new Facet(facetName, Aggregates.group(groupFieldId, + facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.min(min.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } @@ -870,7 +934,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, if (StringUtils.isEmpty(facetName)) { facetName = groupField + SEPARATOR + MAX_SUFFIX; } - facet = new Facet(facetName, Aggregates.group(groupFieldId, + facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.max(max.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } @@ -878,7 +942,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, if (StringUtils.isEmpty(facetName)) { facetName = groupField + SEPARATOR + STD_DEV_POP_SUFFIX; } - facet = new Facet(facetName, Aggregates.group(groupFieldId, + facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.stdDevPop(stdDevPop.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } @@ -886,7 +950,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, if (StringUtils.isEmpty(facetName)) { facetName = groupField + SEPARATOR + STD_DEV_SAMP_SUFFIX; } - facet = new Facet(facetName, Aggregates.group(groupFieldId, + facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.stdDevSamp(stdDevSamp.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } @@ -963,7 +1027,7 @@ public static Bson getSkip(QueryOptions options) { public static Bson getProjection(QueryOptions options) { Bson projection = getProjection(null, options); - return projection != null ? Aggregates.project(projection) : null; + return projection != null ? project(projection) : null; } protected static Bson getProjection(Bson projection, QueryOptions options) { @@ -1017,7 +1081,7 @@ protected static Bson getProjection(Bson projection, QueryOptions options) { projections.add(include); // MongoDB allows to exclude _id when include is present if (excludeId) { - projections.add(Projections.excludeId()); + projections.add(excludeId()); } } else { if (exclude != null) { @@ -1051,7 +1115,7 @@ protected static Bson getProjection(Bson projection, QueryOptions options) { } if (projections.size() > 0) { - projectionResult = Projections.fields(projections); + projectionResult = fields(projections); } return projectionResult; diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 3e9d03ac0..a915f8c48 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -31,6 +31,9 @@ import java.io.DataOutputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.text.SimpleDateFormat; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; import java.util.*; import static org.junit.Assert.*; @@ -90,6 +93,7 @@ public static class User { public String surname; public int age; public int number; + public String date; public boolean tall; public House house; public List dogs; @@ -134,6 +138,7 @@ public String toString() { sb.append(", surname='").append(surname).append('\''); sb.append(", age=").append(age); sb.append(", number=").append(number); + sb.append(", date='").append(date).append('\''); sb.append(", tall=").append(tall); sb.append(", house=").append(house); sb.append(", dogs=").append(dogs); @@ -146,12 +151,19 @@ private static MongoDBCollection createTestCollection(String test, int size) { MongoDBCollection mongoDBCollection = mongoDataStore.getCollection(test); Document document; Random random = new Random(); + + + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMddHHmmss"); + LocalDateTime now = LocalDateTime.now(); + for (long i = 0; i < size; i++) { document = new Document("id", i); document.put("name", NAMES.get(random.nextInt(NAMES.size()))); document.put("surname", SURNAMES.get(random.nextInt(SURNAMES.size()))); document.put("age", (int) i % 5); document.put("number", (int) i * i); + LocalDateTime futureDate = now.plusDays(random.nextInt(1000)); + document.put("date", futureDate.format(formatter)); document.put("tall", (i % 6 == 0)); Document house = new Document(); house.put("color", COLORS.get(random.nextInt(COLORS.size()))); @@ -1188,6 +1200,45 @@ public void testFacetInvalidRangeFormat2() { MongoDBQueryUtils.createFacet(match, "house.m2[toto0..20000]..1000"); } + @Test + public void testFacetYear() { + Date date = new Date(); + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss"); + System.out.println("sdf.format(date) = " + sdf.format(date)); + + Document match = new Document("age", new BasicDBObject("$gt", 2)); + //MongoDBQueryUtils.createFacet(match, "year(date)"); + List facets = createFacet(match, "name"); + System.out.println("counts for 'name'; facets = " + facets); + + String facetField = "year(date)"; + System.out.println("\nfacetField = " + facetField); + facets = createFacet(match, facetField); + System.out.println("year counts for 'date'; facets = " + facets); + + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.first() = " + aggregate.first()); + + facetField = "month(date)"; + System.out.println("\nfacetField = " + facetField); + facets = createFacet(match, facetField); + System.out.println("year counts for 'date'; facets = " + facets); + + converter = new MongoDBDocumentToFacetFieldsConverter(); + aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.first() = " + aggregate.first()); + + facetField = "day(date)"; + System.out.println("\nfacetField = " + facetField); + facets = createFacet(match, facetField); + System.out.println("year counts for 'date'; facets = " + facets); + + converter = new MongoDBDocumentToFacetFieldsConverter(); + aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.first() = " + aggregate.first()); + } + @Test public void testInsert() throws Exception { Long countBefore = mongoDBCollectionInsertTest.count().getNumMatches(); From 0dde11ae814469b7b9bbc2eba9941861ae4c572a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 30 Jan 2025 09:33:58 +0100 Subject: [PATCH 37/51] datastore: improve MongoDB facet exception message, #TASK-7151, #TASK-7134 --- .../commons/datastore/mongodb/MongoDBQueryUtils.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 70a37e0bd..f1a987333 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -26,12 +26,14 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryParam; +import javax.swing.plaf.ActionMapUIResource; import java.time.LocalDateTime; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; import static com.mongodb.client.model.Aggregates.*; import static com.mongodb.client.model.Projections.*; @@ -733,8 +735,12 @@ private static List createFacet(Bson query, List facetFields) { accumulator = Accumulator.valueOf(matcher.group(1)); groupField = matcher.group(2); } catch (IllegalArgumentException e) { + List validAccumulators = Arrays.stream(Accumulator.values()) + .filter(acc -> !acc.name().equalsIgnoreCase(bucket.name())) + .collect(Collectors.toList()); throw new IllegalArgumentException("Invalid accumulator function '" + matcher.group(1) + "'. Valid accumulator" - + " functions: " + StringUtils.join(Arrays.asList(count, sum, max, min, avg, stdDevPop, stdDevSamp), ", ")); + + " functions: " + StringUtils.join(validAccumulators, ", ")); + } } else { // 3. Facet with range aggregation From fc5dd926bb51ed276989a25867c538f0295ad088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 30 Jan 2025 09:37:49 +0100 Subject: [PATCH 38/51] datastore: fix checkstyle, #TASK-7151, #TASK-7134 --- .../opencb/commons/datastore/mongodb/MongoDBQueryUtils.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index f1a987333..b9644803e 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -26,7 +26,6 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryParam; -import javax.swing.plaf.ActionMapUIResource; import java.time.LocalDateTime; import java.time.ZoneId; import java.time.format.DateTimeFormatter; @@ -37,6 +36,8 @@ import static com.mongodb.client.model.Aggregates.*; import static com.mongodb.client.model.Projections.*; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.bucket; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.count; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; /** From 80104033ff192624e0a29dbbbca90a2212fc5ee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 30 Jan 2025 19:45:34 +0100 Subject: [PATCH 39/51] datastore: improve facets for dates, #TASK-7151, #TASK-7134 --- ...MongoDBDocumentToFacetFieldsConverter.java | 5 +- .../datastore/mongodb/MongoDBQueryUtils.java | 78 ++++++++++++------- .../mongodb/MongoDBCollectionTest.java | 47 +++++++---- 3 files changed, 85 insertions(+), 45 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index d7cb20a84..b33ec05c8 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -69,7 +69,10 @@ public List convertToDataModelType(Document document) { facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, "."); FacetField facetField = new FacetField(facetFieldName, total, buckets); if (key.endsWith(YEAR_SUFFIX) || key.endsWith(MONTH_SUFFIX) || key.endsWith(DAY_SUFFIX)) { - facetField.setAggregationName(key.split(SEPARATOR)[1].toLowerCase(Locale.ROOT)); + // Remove the data field and keep year, month and day + List labels = new ArrayList<>(Arrays.asList(key.split(SEPARATOR))); + labels.remove(0); + facetField.setAggregationName(StringUtils.join(labels, AND_SEPARATOR).toLowerCase(Locale.ROOT)); } facets.add(facetField); } else if (key.endsWith(RANGES_SUFFIX)) { diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index b9644803e..8f287255a 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -63,6 +63,10 @@ public class MongoDBQueryUtils { public static final String INVALID_FORMAT_MSG = "Invalid format "; public static final String RANGE_FORMAT_MSG = " for range aggregation. Valid format is: field[start..end]:step, e.g: size[0..1000]:200"; + public static final String YEAR_FACET_MARK = "[YEAR]"; + public static final String MONTH_FACET_MARK = "[MONTH]"; + public static final String DAY_FACET_MARK = "[DAY]"; + public static final String INTERNAL_ID = "_id"; public static final String AND_SEPARATOR = "_and_"; public static final String OTHER = "Other"; @@ -737,12 +741,42 @@ private static List createFacet(Bson query, List facetFields) { groupField = matcher.group(2); } catch (IllegalArgumentException e) { List validAccumulators = Arrays.stream(Accumulator.values()) - .filter(acc -> !acc.name().equalsIgnoreCase(bucket.name())) + .filter(acc -> !acc.name().equalsIgnoreCase(bucket.name()) + && !acc.name().equalsIgnoreCase(year.name()) + && !acc.name().equalsIgnoreCase(month.name()) + && !acc.name().equalsIgnoreCase(day.name())) .collect(Collectors.toList()); throw new IllegalArgumentException("Invalid accumulator function '" + matcher.group(1) + "'. Valid accumulator" + " functions: " + StringUtils.join(validAccumulators, ", ")); } + } else if (facetField.toUpperCase(Locale.ROOT).endsWith(YEAR_FACET_MARK)) { + groupField = facetField.substring(0, facetField.length() - YEAR_FACET_MARK.length()); + accumulator = year; + + // Add projections + dateProjections.add(computed(groupField + SEPARATOR + year.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 0, 4)))); + } else if (facetField.toUpperCase(Locale.ROOT).endsWith(MONTH_FACET_MARK)) { + groupField = facetField.substring(0, facetField.length() - MONTH_FACET_MARK.length()); + accumulator = month; + + // Add projections + dateProjections.add(computed(groupField + SEPARATOR + year.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 0, 4)))); + dateProjections.add(computed(groupField + SEPARATOR + month.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 4, 2)))); + } else if (facetField.toUpperCase(Locale.ROOT).endsWith(DAY_FACET_MARK)) { + groupField = facetField.substring(0, facetField.length() - DAY_FACET_MARK.length()); + accumulator = day; + + // Add projections + dateProjections.add(computed(groupField + SEPARATOR + year.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 0, 4)))); + dateProjections.add(computed(groupField + SEPARATOR + month.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 4, 2)))); + dateProjections.add(computed(groupField + SEPARATOR + day.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 6, 2)))); } else { // 3. Facet with range aggregation if (facetField.contains(RANGE_MARK) || facetField.contains(RANGE_MARK1) || facetField.contains(RANGE_MARK2)) { @@ -787,27 +821,6 @@ private static List createFacet(Bson query, List facetFields) { includeFields.add(accumulatorField); } - // Date management in format YYYYMMDDhhmmss: year, month, day - switch (accumulator) { - case year: { - dateProjections.add(computed(groupField + SEPARATOR + year.name(), new Document("$toInt", new Document("$substrCP", - Arrays.asList("$" + groupField, 0, 4))))); - break; - } - case month: { - dateProjections.add(computed(groupField + SEPARATOR + month.name(), new Document("$toInt", new Document("$substrCP", - Arrays.asList("$" + groupField, 4, 2))))); - break; - } - case day: { - dateProjections.add(computed(groupField + SEPARATOR + day.name(), new Document("$toInt", new Document("$substrCP", - Arrays.asList("$" + groupField, 6, 2))))); - break; - } - default: - break; - } - // Get MongoDB facet facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries); @@ -853,7 +866,7 @@ private static List createFacet(Bson query, List facetFields) { // 3 - Unwind if (!unwindList.isEmpty()) { - result.addAll(unwindList); + result.addAll(unwindList); } // 4 - Aggregates (dot notation management for facets) @@ -901,16 +914,27 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case month: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + MONTH_SUFFIX; + facetName = groupField + SEPARATOR + YEAR_SUFFIX + SEPARATOR + MONTH_SUFFIX; } - facet = new Facet(facetName, group("$" + groupField + SEPARATOR + month.name(), Accumulators.sum(count.name(), 1))); + + Document fields = new Document(); + fields.append(groupField + SEPARATOR + year.name(), "$" + groupField + SEPARATOR + year.name()); + fields.append(groupField + SEPARATOR + month.name(), "$" + groupField + SEPARATOR + month.name()); + + facet = new Facet(facetName, group(fields, Accumulators.sum(count.name(), 1))); break; } case day: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + DAY_SUFFIX; + facetName = groupField + SEPARATOR + YEAR_SUFFIX + SEPARATOR + MONTH_SUFFIX + SEPARATOR + DAY_SUFFIX; } - facet = new Facet(facetName, group("$" + groupField + SEPARATOR + day.name(), Accumulators.sum(count.name(), 1))); + + Document fields = new Document(); + fields.append(groupField + SEPARATOR + year.name(), "$" + groupField + SEPARATOR + year.name()); + fields.append(groupField + SEPARATOR + month.name(), "$" + groupField + SEPARATOR + month.name()); + fields.append(groupField + SEPARATOR + day.name(), "$" + groupField + SEPARATOR + day.name()); + + facet = new Facet(facetName, group(fields, Accumulators.sum(count.name(), 1))); break; } case sum: { diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index a915f8c48..18d59f239 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -1086,7 +1086,7 @@ public void testFacetInvalidAccumulator() { mongoDBCollection.aggregate(facets, converter, null); } - @Test + @Test public void testFacetCombine() { Document match = new Document("age", new BasicDBObject("$gt", 2)); DataResult matchedResults = mongoDBCollection.find(match, null); @@ -1207,36 +1207,49 @@ public void testFacetYear() { System.out.println("sdf.format(date) = " + sdf.format(date)); Document match = new Document("age", new BasicDBObject("$gt", 2)); - //MongoDBQueryUtils.createFacet(match, "year(date)"); - List facets = createFacet(match, "name"); - System.out.println("counts for 'name'; facets = " + facets); - String facetField = "year(date)"; + String facetField = "date[YEAR]"; System.out.println("\nfacetField = " + facetField); - facets = createFacet(match, facetField); - System.out.println("year counts for 'date'; facets = " + facets); + List facets = createFacet(match, facetField); + System.out.println("\nyear counts for 'date'; facets = " + facets); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); System.out.println("aggregate.first() = " + aggregate.first()); - facetField = "month(date)"; + Assert.assertEquals(year.name(), aggregate.first().get(0).getAggregationName()); + } + + @Test + public void testFacetMonth() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + + String facetField = "date[MONTH]"; System.out.println("\nfacetField = " + facetField); - facets = createFacet(match, facetField); - System.out.println("year counts for 'date'; facets = " + facets); + List facets = createFacet(match, facetField); + System.out.println("\nmonth counts for 'date'; facets = " + facets); - converter = new MongoDBDocumentToFacetFieldsConverter(); - aggregate = mongoDBCollection.aggregate(facets, converter, null); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); System.out.println("aggregate.first() = " + aggregate.first()); - facetField = "day(date)"; + Assert.assertEquals(year.name() + AND_SEPARATOR + month.name(), aggregate.first().get(0).getAggregationName()); + } + + @Test + public void testFacetDay() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + + String facetField = "date[DAY]"; System.out.println("\nfacetField = " + facetField); - facets = createFacet(match, facetField); - System.out.println("year counts for 'date'; facets = " + facets); + List facets = createFacet(match, facetField); + System.out.println("\nmonth counts for 'date'; facets = " + facets); - converter = new MongoDBDocumentToFacetFieldsConverter(); - aggregate = mongoDBCollection.aggregate(facets, converter, null); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); System.out.println("aggregate.first() = " + aggregate.first()); + + Assert.assertEquals(year.name() + AND_SEPARATOR + month.name() + AND_SEPARATOR + day.name(), aggregate.first().get(0).getAggregationName()); } @Test From 9de45bb86b9615413b92d94322e8726161716f3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 31 Jan 2025 09:26:52 +0100 Subject: [PATCH 40/51] datastore: rename the separator '_and_' to '_' in MongoDB facet results, #TASK-7151, #TASK-7134 --- .../mongodb/MongoDBDocumentToFacetFieldsConverter.java | 4 ++-- .../commons/datastore/mongodb/MongoDBQueryUtils.java | 4 ++-- .../commons/datastore/mongodb/MongoDBCollectionTest.java | 9 +++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index b33ec05c8..b59f2b6bc 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -40,7 +40,7 @@ public List convertToDataModelType(Document document) { || internalIdValue instanceof Double) { bucketValue = internalIdValue.toString(); } else if (internalIdValue instanceof Document) { - bucketValue = StringUtils.join(((Document) internalIdValue).values(), AND_SEPARATOR); + bucketValue = StringUtils.join(((Document) internalIdValue).values(), COMBINE_SEPARATOR); } List bucketFacetFields = null; @@ -72,7 +72,7 @@ public List convertToDataModelType(Document document) { // Remove the data field and keep year, month and day List labels = new ArrayList<>(Arrays.asList(key.split(SEPARATOR))); labels.remove(0); - facetField.setAggregationName(StringUtils.join(labels, AND_SEPARATOR).toLowerCase(Locale.ROOT)); + facetField.setAggregationName(StringUtils.join(labels, COMBINE_SEPARATOR).toLowerCase(Locale.ROOT)); } facets.add(facetField); } else if (key.endsWith(RANGES_SUFFIX)) { diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 8f287255a..0e27090fd 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -68,7 +68,7 @@ public class MongoDBQueryUtils { public static final String DAY_FACET_MARK = "[DAY]"; public static final String INTERNAL_ID = "_id"; - public static final String AND_SEPARATOR = "_and_"; + public static final String COMBINE_SEPARATOR = "_"; public static final String OTHER = "Other"; public static final String FACET_ACC_SUFFIX = "Acc"; @@ -724,7 +724,7 @@ private static List createFacet(Bson query, List facetFields) { includeFields.add(field); } facet = new Facet( - facetField.replace(",", AND_SEPARATOR) + COUNTS_SUFFIX, + facetField.replace(",", COMBINE_SEPARATOR) + COUNTS_SUFFIX, group(fields, Accumulators.sum(count.name(), 1)) ); } else { diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 18d59f239..6a4d3ae43 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -1095,8 +1095,9 @@ public void testFacetCombine() { List facets = MongoDBQueryUtils.createFacet(match, fieldName); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.toString() = " + aggregate.toString()); - String name; + String name; String surname; long totalCount = 0; Map map = new HashMap<>(); @@ -1113,7 +1114,7 @@ public void testFacetCombine() { if (name != null) { key += name; } - key += AND_SEPARATOR; + key += COMBINE_SEPARATOR; if (surname != null) { key += surname; } @@ -1233,7 +1234,7 @@ public void testFacetMonth() { DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); System.out.println("aggregate.first() = " + aggregate.first()); - Assert.assertEquals(year.name() + AND_SEPARATOR + month.name(), aggregate.first().get(0).getAggregationName()); + Assert.assertEquals(year.name() + COMBINE_SEPARATOR + month.name(), aggregate.first().get(0).getAggregationName()); } @Test @@ -1249,7 +1250,7 @@ public void testFacetDay() { DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); System.out.println("aggregate.first() = " + aggregate.first()); - Assert.assertEquals(year.name() + AND_SEPARATOR + month.name() + AND_SEPARATOR + day.name(), aggregate.first().get(0).getAggregationName()); + Assert.assertEquals(year.name() + COMBINE_SEPARATOR + month.name() + COMBINE_SEPARATOR + day.name(), aggregate.first().get(0).getAggregationName()); } @Test From 6413bed185a9dc2176b8452e97483ac173fd280c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 31 Jan 2025 17:16:27 +0100 Subject: [PATCH 41/51] datastore: use '__' as separator, #TASK-7151, #TASK-7134 --- ...MongoDBDocumentToFacetFieldsConverter.java | 7 +-- .../datastore/mongodb/MongoDBQueryUtils.java | 46 ++++++++----------- .../mongodb/MongoDBCollectionTest.java | 19 +++++--- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index b59f2b6bc..e4783ebc7 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -8,8 +8,8 @@ import java.util.*; import static org.opencb.commons.datastore.mongodb.GenericDocumentComplexConverter.TO_REPLACE_DOTS; -import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*; public class MongoDBDocumentToFacetFieldsConverter implements ComplexTypeConverter, Document> { @@ -40,7 +40,7 @@ public List convertToDataModelType(Document document) { || internalIdValue instanceof Double) { bucketValue = internalIdValue.toString(); } else if (internalIdValue instanceof Document) { - bucketValue = StringUtils.join(((Document) internalIdValue).values(), COMBINE_SEPARATOR); + bucketValue = StringUtils.join(((Document) internalIdValue).values(), SEPARATOR); } List bucketFacetFields = null; @@ -68,11 +68,12 @@ public List convertToDataModelType(Document document) { } facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, "."); FacetField facetField = new FacetField(facetFieldName, total, buckets); + facetField.setAggregationName(count.name()); if (key.endsWith(YEAR_SUFFIX) || key.endsWith(MONTH_SUFFIX) || key.endsWith(DAY_SUFFIX)) { // Remove the data field and keep year, month and day List labels = new ArrayList<>(Arrays.asList(key.split(SEPARATOR))); labels.remove(0); - facetField.setAggregationName(StringUtils.join(labels, COMBINE_SEPARATOR).toLowerCase(Locale.ROOT)); + facetField.setAggregationName(StringUtils.join(labels, SEPARATOR).toLowerCase(Locale.ROOT)); } facets.add(facetField); } else if (key.endsWith(RANGES_SUFFIX)) { diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 0e27090fd..f1862d23a 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -68,22 +68,22 @@ public class MongoDBQueryUtils { public static final String DAY_FACET_MARK = "[DAY]"; public static final String INTERNAL_ID = "_id"; - public static final String COMBINE_SEPARATOR = "_"; +// public static final String COMBINE_SEPARATOR = "_"; public static final String OTHER = "Other"; public static final String FACET_ACC_SUFFIX = "Acc"; public static final String COUNTS_SUFFIX = "Counts"; - public static final String SUM_SUFFIX = "Sum"; - public static final String AVG_SUFFIX = "Avg"; - public static final String MIN_SUFFIX = "Min"; - public static final String MAX_SUFFIX = "Max"; +// public static final String SUM_SUFFIX = "Sum"; +// public static final String AVG_SUFFIX = "Avg"; +// public static final String MIN_SUFFIX = "Min"; +// public static final String MAX_SUFFIX = "Max"; public static final String YEAR_SUFFIX = "Year"; public static final String MONTH_SUFFIX = "Month"; public static final String DAY_SUFFIX = "Day"; - public static final String STD_DEV_POP_SUFFIX = "StdDevPop"; - public static final String STD_DEV_SAMP_SUFFIX = "stdDevSamp"; +// public static final String STD_DEV_POP_SUFFIX = "StdDevPop"; +// public static final String STD_DEV_SAMP_SUFFIX = "stdDevSamp"; public static final String RANGES_SUFFIX = "Ranges"; - public static final String SEPARATOR = "___"; + public static final String SEPARATOR = "__"; // TODO: Added on 10/08/2021 to deprecate STARTS_WITH and ENDS_WITH regex. They need to be done within '/'. @Deprecated @@ -724,7 +724,7 @@ private static List createFacet(Bson query, List facetFields) { includeFields.add(field); } facet = new Facet( - facetField.replace(",", COMBINE_SEPARATOR) + COUNTS_SUFFIX, + facetField.replace(",", SEPARATOR), // + COUNTS_SUFFIX, group(fields, Accumulators.sum(count.name(), 1)) ); } else { @@ -906,16 +906,12 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, break; } case year: { - if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + YEAR_SUFFIX; - } + facetName = groupField + SEPARATOR + YEAR_SUFFIX; facet = new Facet(facetName, group("$" + groupField + SEPARATOR + year.name(), Accumulators.sum(count.name(), 1))); break; } case month: { - if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + YEAR_SUFFIX + SEPARATOR + MONTH_SUFFIX; - } + facetName = groupField + SEPARATOR + YEAR_SUFFIX + SEPARATOR + MONTH_SUFFIX; Document fields = new Document(); fields.append(groupField + SEPARATOR + year.name(), "$" + groupField + SEPARATOR + year.name()); @@ -925,9 +921,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, break; } case day: { - if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + YEAR_SUFFIX + SEPARATOR + MONTH_SUFFIX + SEPARATOR + DAY_SUFFIX; - } + facetName = groupField + SEPARATOR + YEAR_SUFFIX + SEPARATOR + MONTH_SUFFIX + SEPARATOR + DAY_SUFFIX; Document fields = new Document(); fields.append(groupField + SEPARATOR + year.name(), "$" + groupField + SEPARATOR + year.name()); @@ -939,7 +933,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case sum: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + SUM_SUFFIX; + facetName = groupField; // + SEPARATOR + SUM_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.sum(sum.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -947,7 +941,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case avg: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + AVG_SUFFIX; + facetName = groupField; // + SEPARATOR + AVG_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.avg(avg.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -955,7 +949,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case min: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + MIN_SUFFIX; + facetName = groupField; // + SEPARATOR + MIN_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.min(min.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -963,7 +957,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case max: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + MAX_SUFFIX; + facetName = groupField; // + SEPARATOR + MAX_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.max(max.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -971,7 +965,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case stdDevPop: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + STD_DEV_POP_SUFFIX; + facetName = groupField; // + SEPARATOR + STD_DEV_POP_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.stdDevPop(stdDevPop.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -979,16 +973,14 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case stdDevSamp: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + STD_DEV_SAMP_SUFFIX; + facetName = groupField; // + SEPARATOR + STD_DEV_SAMP_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.stdDevSamp(stdDevSamp.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } case bucket: { - if (StringUtils.isEmpty(facetName)) { - facetName = groupField + SEPARATOR + RANGES_SUFFIX; - } + facetName = groupField + SEPARATOR + RANGES_SUFFIX; facet = new Facet(facetName, Aggregates.bucket(accumulatorId, boundaries, new BucketOptions() .defaultBucket(OTHER) diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 6a4d3ae43..2f54143bd 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -658,6 +658,8 @@ public void testFacetCountBucketsArray() { System.out.println("facets = " + facets); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.toString() = " + aggregate); + for (List facetFieldList : aggregate.getResults()) { System.out.println("facetFieldList = " + facetFieldList); } @@ -819,9 +821,10 @@ public void testFacetRangeArray() { System.out.println("facets = " + facets); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); - for (List facetFieldList : aggregate.getResults()) { - System.out.println("facetFieldList = " + facetFieldList); - } + System.out.println("aggregate = " + aggregate); +// for (List facetFieldList : aggregate.getResults()) { +// System.out.println("facetFieldList = " + facetFieldList); +// } long outOfRange = 0; List rangeValues = new ArrayList<>(Arrays.asList(0d, 0d, 0d, 0d)); @@ -925,6 +928,7 @@ public void testFacetAvg() { List facets = MongoDBQueryUtils.createFacet(match, "avg(" + fieldName + ")"); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.toString() = " + aggregate); long totalCount = 0; double totalSum = 0; @@ -955,8 +959,10 @@ public void testFacetMaxDotNotationAndList() { List facets = MongoDBQueryUtils.createFacet(match, "max(" + fieldName + ")"); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.toString() = " + aggregate); DataResult aggregate2 = mongoDBCollection.aggregate(facets, null); + System.out.println("aggregate2.toString() = " + aggregate2); int count = 0; List maxValues = new ArrayList<>(Arrays.asList(0D,0D,0D,0D,0D,0D,0D,0D,0D,0D,0D,0D)); @@ -1003,6 +1009,7 @@ public void testFacetSumAccumulator() { List facets = MongoDBQueryUtils.createFacet(match, "avg(" + fieldName + ")"); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.toString() = " + aggregate); for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { @@ -1114,7 +1121,7 @@ public void testFacetCombine() { if (name != null) { key += name; } - key += COMBINE_SEPARATOR; + key += SEPARATOR; if (surname != null) { key += surname; } @@ -1234,7 +1241,7 @@ public void testFacetMonth() { DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); System.out.println("aggregate.first() = " + aggregate.first()); - Assert.assertEquals(year.name() + COMBINE_SEPARATOR + month.name(), aggregate.first().get(0).getAggregationName()); + Assert.assertEquals(year.name() + SEPARATOR + month.name(), aggregate.first().get(0).getAggregationName()); } @Test @@ -1250,7 +1257,7 @@ public void testFacetDay() { DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); System.out.println("aggregate.first() = " + aggregate.first()); - Assert.assertEquals(year.name() + COMBINE_SEPARATOR + month.name() + COMBINE_SEPARATOR + day.name(), aggregate.first().get(0).getAggregationName()); + Assert.assertEquals(year.name() + SEPARATOR + month.name() + SEPARATOR + day.name(), aggregate.first().get(0).getAggregationName()); } @Test From 52ae7ee683f460fc75c2b02c495854307d23f354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 31 Jan 2025 17:23:21 +0100 Subject: [PATCH 42/51] datastore: fix sonnar issues, #TASK-7151, #TASK-7134 --- .../datastore/mongodb/MongoDBQueryUtils.java | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index f1862d23a..c4b726a22 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -45,9 +45,6 @@ */ public class MongoDBQueryUtils { - @Deprecated - private static final String REGEX_SEPARATOR = "(\\w+|\\^)"; - // private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~|==?|=?\\^|=?\\$)([^=<>~!]+.*)$"); private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~/?|==?)([^=<>~!]+.*)$"); private static final Pattern OPERATION_NUMERIC_PATTERN = Pattern.compile("^(<=?|>=?|!=|!?=?~|==?)([^=<>~!]+.*)$"); private static final Pattern OPERATION_BOOLEAN_PATTERN = Pattern.compile("^(!=|!?=?~|==?)([^=<>~!]+.*)$"); @@ -68,20 +65,13 @@ public class MongoDBQueryUtils { public static final String DAY_FACET_MARK = "[DAY]"; public static final String INTERNAL_ID = "_id"; -// public static final String COMBINE_SEPARATOR = "_"; public static final String OTHER = "Other"; public static final String FACET_ACC_SUFFIX = "Acc"; public static final String COUNTS_SUFFIX = "Counts"; -// public static final String SUM_SUFFIX = "Sum"; -// public static final String AVG_SUFFIX = "Avg"; -// public static final String MIN_SUFFIX = "Min"; -// public static final String MAX_SUFFIX = "Max"; public static final String YEAR_SUFFIX = "Year"; public static final String MONTH_SUFFIX = "Month"; public static final String DAY_SUFFIX = "Day"; -// public static final String STD_DEV_POP_SUFFIX = "StdDevPop"; -// public static final String STD_DEV_SAMP_SUFFIX = "stdDevSamp"; public static final String RANGES_SUFFIX = "Ranges"; public static final String SEPARATOR = "__"; @@ -933,7 +923,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case sum: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; // + SEPARATOR + SUM_SUFFIX; + facetName = groupField; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.sum(sum.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -941,7 +931,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case avg: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; // + SEPARATOR + AVG_SUFFIX; + facetName = groupField; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.avg(avg.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -949,7 +939,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case min: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; // + SEPARATOR + MIN_SUFFIX; + facetName = groupField; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.min(min.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -957,7 +947,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case max: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; // + SEPARATOR + MAX_SUFFIX; + facetName = groupField; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.max(max.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -965,7 +955,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case stdDevPop: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; // + SEPARATOR + STD_DEV_POP_SUFFIX; + facetName = groupField; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.stdDevPop(stdDevPop.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -973,7 +963,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case stdDevSamp: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; // + SEPARATOR + STD_DEV_SAMP_SUFFIX; + facetName = groupField; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.stdDevSamp(stdDevSamp.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); From 9243ebb6fc8bf18b244038e365c13482bcc2d3c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 31 Jan 2025 21:41:28 +0100 Subject: [PATCH 43/51] datastore: fix MongoDB facets when combining multiple fields, #TASK-7151, #TASK-7134 --- .../MongoDBDocumentToFacetFieldsConverter.java | 7 ++++++- .../datastore/mongodb/MongoDBQueryUtils.java | 2 +- .../datastore/mongodb/MongoDBCollectionTest.java | 14 ++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index e4783ebc7..b71e6c138 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -26,9 +26,12 @@ public List convertToDataModelType(Document document) { List documentValues = (List) entry.getValue(); if (key.endsWith(COUNTS_SUFFIX) || key.endsWith(FACET_ACC_SUFFIX) || key.endsWith(YEAR_SUFFIX) || key.endsWith(MONTH_SUFFIX) || key.endsWith(DAY_SUFFIX)) { + facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, "."); + List buckets = new ArrayList<>(documentValues.size()); long total = 0; for (Document documentValue : documentValues) { + long counter = documentValue.getInteger(count.name()); String bucketValue = ""; Object internalIdValue = documentValue.get(INTERNAL_ID); @@ -41,6 +44,9 @@ public List convertToDataModelType(Document document) { bucketValue = internalIdValue.toString(); } else if (internalIdValue instanceof Document) { bucketValue = StringUtils.join(((Document) internalIdValue).values(), SEPARATOR); + if (key.endsWith(COUNTS_SUFFIX)) { + facetFieldName = key.substring(0, key.indexOf(COUNTS_SUFFIX)); + } } List bucketFacetFields = null; @@ -66,7 +72,6 @@ public List convertToDataModelType(Document document) { buckets.add(new FacetField.Bucket(bucketValue, counter, bucketFacetFields)); total += counter; } - facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, "."); FacetField facetField = new FacetField(facetFieldName, total, buckets); facetField.setAggregationName(count.name()); if (key.endsWith(YEAR_SUFFIX) || key.endsWith(MONTH_SUFFIX) || key.endsWith(DAY_SUFFIX)) { diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index c4b726a22..c60dee0c4 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -714,7 +714,7 @@ private static List createFacet(Bson query, List facetFields) { includeFields.add(field); } facet = new Facet( - facetField.replace(",", SEPARATOR), // + COUNTS_SUFFIX, + facetField.replace(",", SEPARATOR) + COUNTS_SUFFIX, group(fields, Accumulators.sum(count.name(), 1)) ); } else { diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 2f54143bd..c8d440f4d 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -1093,6 +1093,20 @@ public void testFacetInvalidAccumulator() { mongoDBCollection.aggregate(facets, converter, null); } + @Test + public void testFacetMultiple() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "name;surname"; + List facets = MongoDBQueryUtils.createFacet(match, fieldName); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate = " + aggregate); + + Assert.assertEquals(2, aggregate.first().size()); + } + @Test public void testFacetCombine() { Document match = new Document("age", new BasicDBObject("$gt", 2)); From d3666fcc849e45e4597aee902a78f95dd91fe866 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 31 Jan 2025 23:32:21 +0100 Subject: [PATCH 44/51] datastore: add more JUnit tests for MongoDB facets, #TASK-7151, #TASK-7134 --- .../datastore/mongodb/MongoDBQueryUtils.java | 18 ++++--- .../mongodb/MongoDBCollectionTest.java | 47 +++++++++++++++++-- 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index c60dee0c4..f76e9557e 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -69,6 +69,12 @@ public class MongoDBQueryUtils { public static final String FACET_ACC_SUFFIX = "Acc"; public static final String COUNTS_SUFFIX = "Counts"; + public static final String SUM_SUFFIX = "Sum"; + public static final String MIN_SUFFIX = "Min"; + public static final String MAX_SUFFIX = "Max"; + public static final String AVG_SUFFIX = "Avg"; + public static final String STDDEVPOP_SUFFIX = "StdDevPop"; + public static final String STDDEVSAMP_SUFFIX = "StdDevSamp"; public static final String YEAR_SUFFIX = "Year"; public static final String MONTH_SUFFIX = "Month"; public static final String DAY_SUFFIX = "Day"; @@ -923,7 +929,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case sum: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; + facetName = groupField + SEPARATOR + SUM_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.sum(sum.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -931,7 +937,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case avg: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; + facetName = groupField + SEPARATOR + AVG_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.avg(avg.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -939,7 +945,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case min: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; + facetName = groupField + SEPARATOR + MIN_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.min(min.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -947,7 +953,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case max: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; + facetName = groupField + SEPARATOR + MAX_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.max(max.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -955,7 +961,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case stdDevPop: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; + facetName = groupField + SEPARATOR + STDDEVPOP_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.stdDevPop(stdDevPop.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); @@ -963,7 +969,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, } case stdDevSamp: { if (StringUtils.isEmpty(facetName)) { - facetName = groupField; + facetName = groupField + SEPARATOR + STDDEVSAMP_SUFFIX; } facet = new Facet(facetName, group(groupFieldId, Arrays.asList(Accumulators.stdDevSamp(stdDevSamp.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index c8d440f4d..d801d0e90 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -725,7 +725,7 @@ public void testFacetAvgBucketsArray() { Assert.assertEquals(aggregate.getResults().get(0).get(0).getAggregationValues().get(0), 1.0d * acc / counter, 0.0001); } -// @Test + // @Test // public void testFacetAccumulatorMaxBucketsArray() { // Document match = new Document("age", new BasicDBObject("$gt", 2)); // DataResult matchedResults = mongoDBCollection.find(match, null); @@ -1100,6 +1100,7 @@ public void testFacetMultiple() { String fieldName = "name;surname"; List facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); System.out.println("aggregate = " + aggregate); @@ -1107,7 +1108,47 @@ public void testFacetMultiple() { Assert.assertEquals(2, aggregate.first().size()); } - @Test + @Test + public void testFacetMultipleAccumulators() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult matchedResults = mongoDBCollection.find(match, null); + + double min = Double.MAX_VALUE; + double max = Double.MIN_VALUE; + for (Document result : matchedResults.getResults()) { + double value = 1.0d * result.getLong("number"); + if (value > max) { + max = value; + } + if (value < min) { + min = value; + } + } + + String fieldName = "min(number);max(number)"; + List facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate = " + aggregate); + + Assert.assertEquals(2, aggregate.first().size()); + for (FacetField result : aggregate.first()) { + Assert.assertEquals("number", result.getName()); + Assert.assertEquals(Long.valueOf(matchedResults.getNumResults()), result.getCount()); + double value = 0d; + if ("min".equals(result.getAggregationName())) { + value = min; + } else if ("max".equals(result.getAggregationName())) { + value = max; + } else { + fail(); + } + Assert.assertEquals(value, result.getAggregationValues().get(0), 0.001d); + } + } + + @Test public void testFacetCombine() { Document match = new Document("age", new BasicDBObject("$gt", 2)); DataResult matchedResults = mongoDBCollection.find(match, null); @@ -1118,7 +1159,7 @@ public void testFacetCombine() { DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); System.out.println("aggregate.toString() = " + aggregate.toString()); - String name; + String name; String surname; long totalCount = 0; Map map = new HashMap<>(); From dc4fc6d2c6db6e040bc810159e4180140cdf0273 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 3 Feb 2025 09:00:44 +0100 Subject: [PATCH 45/51] datastore: sort dates facets, #TASK-7151, #TASK-7134 --- ...MongoDBDocumentToFacetFieldsConverter.java | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index b71e6c138..8f2534032 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -13,6 +13,23 @@ public class MongoDBDocumentToFacetFieldsConverter implements ComplexTypeConverter, Document> { + private static final Map monthMap = new HashMap<>(); + + static { + monthMap.put("01", "Jan"); + monthMap.put("02", "Feb"); + monthMap.put("03", "Mar"); + monthMap.put("04", "Apr"); + monthMap.put("05", "May"); + monthMap.put("06", "Jun"); + monthMap.put("07", "Jul"); + monthMap.put("08", "Aug"); + monthMap.put("09", "Sep"); + monthMap.put("10", "Oct"); + monthMap.put("11", "Nov"); + monthMap.put("12", "Dec"); + } + @Override public List convertToDataModelType(Document document) { if (document == null || document.entrySet().size() == 0) { @@ -75,6 +92,18 @@ public List convertToDataModelType(Document document) { FacetField facetField = new FacetField(facetFieldName, total, buckets); facetField.setAggregationName(count.name()); if (key.endsWith(YEAR_SUFFIX) || key.endsWith(MONTH_SUFFIX) || key.endsWith(DAY_SUFFIX)) { + Collections.sort(buckets, Comparator.comparing(FacetField.Bucket::getValue)); + if (key.endsWith(MONTH_SUFFIX)) { + for (FacetField.Bucket b : buckets) { + String[] split = b.getValue().split(SEPARATOR); + b.setValue(monthMap.get(split[1]) + ", " + split[0]); + } + } else if (key.endsWith(DAY_SUFFIX)) { + for (FacetField.Bucket b : buckets) { + String[] split = b.getValue().split(SEPARATOR); + b.setValue(split[2] + " " + monthMap.get(split[1]) + ", " + split[0]); + } + } // Remove the data field and keep year, month and day List labels = new ArrayList<>(Arrays.asList(key.split(SEPARATOR))); labels.remove(0); From 35a6e20ab79447a1bce5620c87ced0ce249d6d16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 3 Feb 2025 09:05:03 +0100 Subject: [PATCH 46/51] datastore: fix checkstyle, #TASK-7151, #TASK-7134 --- ...MongoDBDocumentToFacetFieldsConverter.java | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index 8f2534032..c0cab6d7f 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -13,21 +13,21 @@ public class MongoDBDocumentToFacetFieldsConverter implements ComplexTypeConverter, Document> { - private static final Map monthMap = new HashMap<>(); + private static final Map MONTH_MAP = new HashMap<>(); static { - monthMap.put("01", "Jan"); - monthMap.put("02", "Feb"); - monthMap.put("03", "Mar"); - monthMap.put("04", "Apr"); - monthMap.put("05", "May"); - monthMap.put("06", "Jun"); - monthMap.put("07", "Jul"); - monthMap.put("08", "Aug"); - monthMap.put("09", "Sep"); - monthMap.put("10", "Oct"); - monthMap.put("11", "Nov"); - monthMap.put("12", "Dec"); + MONTH_MAP.put("01", "Jan"); + MONTH_MAP.put("02", "Feb"); + MONTH_MAP.put("03", "Mar"); + MONTH_MAP.put("04", "Apr"); + MONTH_MAP.put("05", "May"); + MONTH_MAP.put("06", "Jun"); + MONTH_MAP.put("07", "Jul"); + MONTH_MAP.put("08", "Aug"); + MONTH_MAP.put("09", "Sep"); + MONTH_MAP.put("10", "Oct"); + MONTH_MAP.put("11", "Nov"); + MONTH_MAP.put("12", "Dec"); } @Override @@ -96,12 +96,12 @@ public List convertToDataModelType(Document document) { if (key.endsWith(MONTH_SUFFIX)) { for (FacetField.Bucket b : buckets) { String[] split = b.getValue().split(SEPARATOR); - b.setValue(monthMap.get(split[1]) + ", " + split[0]); + b.setValue(MONTH_MAP.get(split[1]) + ", " + split[0]); } } else if (key.endsWith(DAY_SUFFIX)) { for (FacetField.Bucket b : buckets) { String[] split = b.getValue().split(SEPARATOR); - b.setValue(split[2] + " " + monthMap.get(split[1]) + ", " + split[0]); + b.setValue(split[2] + " " + MONTH_MAP.get(split[1]) + ", " + split[0]); } } // Remove the data field and keep year, month and day From 63739357d96937a63a4cec102d22e346a1f23603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 3 Feb 2025 09:16:52 +0100 Subject: [PATCH 47/51] datastore: use date format '01 Jan 2025', #TASK-7151, #TASK-7134 --- .../mongodb/MongoDBDocumentToFacetFieldsConverter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index c0cab6d7f..def3bf4a8 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -96,12 +96,12 @@ public List convertToDataModelType(Document document) { if (key.endsWith(MONTH_SUFFIX)) { for (FacetField.Bucket b : buckets) { String[] split = b.getValue().split(SEPARATOR); - b.setValue(MONTH_MAP.get(split[1]) + ", " + split[0]); + b.setValue(MONTH_MAP.get(split[1]) + " " + split[0]); } } else if (key.endsWith(DAY_SUFFIX)) { for (FacetField.Bucket b : buckets) { String[] split = b.getValue().split(SEPARATOR); - b.setValue(split[2] + " " + MONTH_MAP.get(split[1]) + ", " + split[0]); + b.setValue(split[2] + " " + MONTH_MAP.get(split[1]) + " " + split[0]); } } // Remove the data field and keep year, month and day From 5da23b3c28337f455355ff49cbd0ad10c50e9afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 3 Feb 2025 16:55:41 +0100 Subject: [PATCH 48/51] datastore: sort facets results in descending order (counts), #TASK --- .../datastore/mongodb/MongoDBNativeQuery.java | 1 + .../datastore/mongodb/MongoDBQueryUtils.java | 35 +++++++++++++++---- .../mongodb/MongoDBCollectionTest.java | 7 ++-- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBNativeQuery.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBNativeQuery.java index b926cb7a4..ad2a9ad42 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBNativeQuery.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBNativeQuery.java @@ -121,6 +121,7 @@ public MongoDBIterator aggregate(ClientSession clientSession, List bsonOperations = new ArrayList<>(operations); parseQueryOptions(bsonOperations, options); + System.out.println("bsonOperations = " + bsonOperations); MongoDBIterator iterator = null; if (bsonOperations.size() > 0) { long numMatches = -1; diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index f76e9557e..11eb50458 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -689,6 +689,10 @@ public static List createGroupBy(Bson query, List groupByField, St } public static List createFacet(Bson query, String facetField) { + return createFacet(query, facetField, QueryOptions.DESCENDING); + } + + public static List createFacet(Bson query, String facetField, String order) { // Sanity check if (facetField == null || StringUtils.isEmpty(facetField.trim())) { return new ArrayList<>(); @@ -697,10 +701,10 @@ public static List createFacet(Bson query, String facetField) { // Multiple facets separated by ; ArrayList facetFields = new ArrayList<>(Arrays.asList(cleanFacetField.split(";"))); - return createFacet(query, facetFields); + return createFacet(query, facetFields, order); } - private static List createFacet(Bson query, List facetFields) { + private static List createFacet(Bson query, List facetFields, String order) { List facetList = new ArrayList<>(); Set includeFields = new HashSet<>(); List unwindList = new ArrayList<>(); @@ -719,10 +723,15 @@ private static List createFacet(Bson query, List facetFields) { fields.append(field, "$" + field); includeFields.add(field); } + Bson bsonSort; + if (QueryOptions.ASCENDING.equals(order)) { + bsonSort = sort(Sorts.ascending(count.name())); + } else { + bsonSort = sort(Sorts.descending(count.name())); + } facet = new Facet( facetField.replace(",", SEPARATOR) + COUNTS_SUFFIX, - group(fields, Accumulators.sum(count.name(), 1)) - ); + Arrays.asList(group(fields, Accumulators.sum(Accumulator.count.name(), 1)), bsonSort)); } else { Accumulator accumulator; String groupField; @@ -818,7 +827,7 @@ private static List createFacet(Bson query, List facetFields) { } // Get MongoDB facet - facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries); + facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries, order); // Unwind in any case Set unwindFields = new HashSet<>(); @@ -884,7 +893,8 @@ private static Collection getUnwindFields(String field) { return unwindFields; } - private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, String accumulatorField, List boundaries) { + private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, String accumulatorField, List boundaries, + String order) { String groupFieldId = groupField; String accumulatorId = "$" + groupField; String facetName = null; @@ -898,7 +908,18 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, switch (accumulator) { case count: { facetName = groupField + SEPARATOR + COUNTS_SUFFIX; - facet = new Facet(facetName, group("$" + groupField, Accumulators.sum(count.name(), 1))); + + Bson bsonSort; + if (QueryOptions.ASCENDING.equals(order)) { + bsonSort = sort(Sorts.ascending(count.name())); + } else { + bsonSort = sort(Sorts.descending(count.name())); + } +// facet = new Facet( +// facetField.replace(",", SEPARATOR) + COUNTS_SUFFIX, +// Arrays.asList(group(fields, Accumulators.sum(Accumulator.count.name(), 1)), bsonSort)); + + facet = new Facet(facetName, Arrays.asList(group("$" + groupField, Accumulators.sum(count.name(), 1)), bsonSort)); break; } case year: { diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index d801d0e90..d7e7b4874 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -37,6 +37,7 @@ import java.util.*; import static org.junit.Assert.*; +import static org.opencb.commons.datastore.core.QueryOptions.*; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; @@ -240,7 +241,7 @@ public void testDistinct() throws Exception { @Test public void testSortOrder() throws Exception { Document query = new Document(); - QueryOptions queryOptions = new QueryOptions(QueryOptions.LIMIT, 10).append(QueryOptions.SORT, "number") + QueryOptions queryOptions = new QueryOptions(QueryOptions.LIMIT, 10).append(SORT, "number") .append(QueryOptions.ORDER, "asc"); List result = mongoDBCollection.find(query, queryOptions).getResults(); assertEquals(0L, result.get(0).get("number")); @@ -250,7 +251,7 @@ public void testSortOrder() throws Exception { public void testMultipleSortOrder() throws Exception { Document query = new Document(); QueryOptions queryOptions = new QueryOptions(QueryOptions.LIMIT, 500) - .append(QueryOptions.SORT, Arrays.asList("age:ASC", "number:DESC")) + .append(SORT, Arrays.asList("age:ASC", "number:DESC")) .append(QueryOptions.ORDER, "asc"); int age = 0; long number = Long.MAX_VALUE; @@ -536,6 +537,7 @@ public void testFacetBuckets() { List facets = MongoDBQueryUtils.createFacet(match, fieldName); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate = " + aggregate); String value; long totalCount = 0; @@ -576,6 +578,7 @@ public void testFacetBucketsBoolean() { List facets = MongoDBQueryUtils.createFacet(match, fieldName); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate = " + aggregate); String value; long totalCount = 0; From 627850786c8a49280f5e240d14ee7282f4f36f27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 5 Feb 2025 12:59:23 +0100 Subject: [PATCH 49/51] datastore: improve MongoDB facets for range, #TASK-7151, #TASK-7134 --- ...MongoDBDocumentToFacetFieldsConverter.java | 143 ++++++++---------- .../mongodb/MongoDBCollectionTest.java | 38 +++-- 2 files changed, 90 insertions(+), 91 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index def3bf4a8..141b6a7af 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -111,102 +111,85 @@ public List convertToDataModelType(Document document) { } facets.add(facetField); } else if (key.endsWith(RANGES_SUFFIX)) { - List facetFieldValues = new ArrayList<>(); - Number start = null; - Number end = null; - Number step = null; - Double other = null; + List buckets = new ArrayList<>(documentValues.size()); + int total = 0; + Double start = null; + Double end = null; + Double step = null; + int other = 0; for (Document value : documentValues) { if (value.get(INTERNAL_ID) instanceof String && OTHER.equals(value.getString(INTERNAL_ID))) { - other = 1.0d * value.getInteger(count.name()); + other = value.getInteger(count.name()); } else { - Double range = value.getDouble(INTERNAL_ID); - Integer counter = value.getInteger(count.name()); - facetFieldValues.add(1.0d * counter); - if (start == null) { - start = range; + FacetField.Bucket bucket = new FacetField.Bucket(String.valueOf(value.getDouble(INTERNAL_ID)), + value.getInteger(count.name()), null); + buckets.add(bucket); + total += bucket.getCount(); + if (step == null && start != null) { + step = Double.parseDouble(bucket.getValue()) - (Double) start; } - end = range; - if (step == null && start != end) { - step = end.doubleValue() - start.doubleValue(); + if (start == null) { + start = Double.parseDouble(bucket.getValue()); } + end = Double.parseDouble(bucket.getValue()); +// +// Double range = value.getDouble(INTERNAL_ID); +// Integer counter = value.getInteger(count.name()); +//// buckets.add(new FacetField.Bucket()); +// //facetFieldValues.add(1.0d * counter); +// if (start == null) { +// start = range; +// } +// end = range; +// if (step == null && start != end) { +// step = end.doubleValue() - start.doubleValue(); +// } } } facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, "."); - if (other != null) { - facetFieldName += " (counts out of range: " + other + ")"; + if (other > 0) { + FacetField.Bucket bucket = new FacetField.Bucket("Other", other, null); + buckets.add(bucket); + total += bucket.getCount(); } - FacetField facetField = new FacetField(facetFieldName, "range", facetFieldValues) + FacetField facetField = new FacetField(facetFieldName, total, buckets) .setStart(start) - .setEnd(end) + .setEnd(end + step) .setStep(step); facets.add(facetField); } else { - if (key.endsWith(RANGES_SUFFIX)) { - List facetFieldValues = new ArrayList<>(); - Number start = null; - Number end = null; - Number step = null; - Double other = null; - for (Document value : documentValues) { - if (value.get(INTERNAL_ID) instanceof String && OTHER.equals(value.getString(INTERNAL_ID))) { - other = 1.0d * value.getInteger(count.name()); - } else { - Double range = value.getDouble(INTERNAL_ID); - Integer counter = value.getInteger(count.name()); - facetFieldValues.add(1.0d * counter); - if (start == null) { - start = range; - } - end = range; - if (step == null && start != end) { - step = end.doubleValue() - start.doubleValue(); - } - } - } - facetFieldName = key.substring(0, key.length() - RANGES_SUFFIX.length()).replace(TO_REPLACE_DOTS, "."); - if (other != null) { - facetFieldName += " (counts out of range: " + other + ")"; - } - FacetField facetField = new FacetField(facetFieldName, "range", facetFieldValues) - .setStart(start) - .setEnd(end) - .setStep(step); - facets.add(facetField); - } else { - Document documentValue = ((List) entry.getValue()).get(0); - MongoDBQueryUtils.Accumulator accumulator = getAccumulator(documentValue); - switch (accumulator) { - case sum: - case avg: - case max: - case min: - case stdDevPop: - case stdDevSamp: { - List fieldValues = new ArrayList<>(); - if (documentValue.get(accumulator.name()) instanceof Integer) { - fieldValues.add(1.0d * documentValue.getInteger(accumulator.name())); - } else if (documentValue.get(accumulator.name()) instanceof Long) { - fieldValues.add(1.0d * documentValue.getLong(accumulator.name())); - } else if (documentValue.get(accumulator.name()) instanceof List) { - List list = (List) documentValue.get(accumulator.name()); - for (Number number : list) { - fieldValues.add(number.doubleValue()); - } - } else { - fieldValues.add(documentValue.getDouble(accumulator.name())); + Document documentValue = ((List) entry.getValue()).get(0); + MongoDBQueryUtils.Accumulator accumulator = getAccumulator(documentValue); + switch (accumulator) { + case sum: + case avg: + case max: + case min: + case stdDevPop: + case stdDevSamp: { + List fieldValues = new ArrayList<>(); + if (documentValue.get(accumulator.name()) instanceof Integer) { + fieldValues.add(1.0d * documentValue.getInteger(accumulator.name())); + } else if (documentValue.get(accumulator.name()) instanceof Long) { + fieldValues.add(1.0d * documentValue.getLong(accumulator.name())); + } else if (documentValue.get(accumulator.name()) instanceof List) { + List list = (List) documentValue.get(accumulator.name()); + for (Number number : list) { + fieldValues.add(number.doubleValue()); } - long count = 0; - if (documentValue.containsKey("count")) { - count = Long.valueOf(documentValue.getInteger("count")); - } - facetFieldName = documentValue.getString(INTERNAL_ID).replace(TO_REPLACE_DOTS, "."); - facets.add(new FacetField(facetFieldName, count, accumulator.name(), fieldValues)); - break; + } else { + fieldValues.add(documentValue.getDouble(accumulator.name())); } - default: { - // Do nothing, exception is raised + long count = 0; + if (documentValue.containsKey("count")) { + count = Long.valueOf(documentValue.getInteger("count")); } + facetFieldName = documentValue.getString(INTERNAL_ID).replace(TO_REPLACE_DOTS, "."); + facets.add(new FacetField(facetFieldName, count, accumulator.name(), fieldValues)); + break; + } + default: { + // Do nothing, exception is raised } } } diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index d7e7b4874..2c537839d 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -830,7 +830,7 @@ public void testFacetRangeArray() { // } long outOfRange = 0; - List rangeValues = new ArrayList<>(Arrays.asList(0d, 0d, 0d, 0d)); + List rangeValues = new ArrayList<>(Arrays.asList(0L, 0L, 0L, 0L)); Map map = new HashMap<>(); for (Document result : matchedResults.getResults()) { @@ -846,13 +846,22 @@ public void testFacetRangeArray() { } } } + + System.out.println("rangeValues = " + rangeValues); + System.out.println("outOfRange = " + outOfRange); + for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertTrue(facetField.getCount() == null); - Assert.assertTrue(facetField.getName().contains("" + (1.0d * outOfRange))); - for (int i = 0; i < facetField.getAggregationValues().size(); i++) { - Assert.assertEquals(rangeValues.get(i), facetField.getAggregationValues().get(i)); + Assert.assertEquals(outOfRange + rangeValues.stream().mapToLong(Long::longValue).sum(), facetField.getCount().longValue()); + Assert.assertEquals(rangeValues.size() + 1, facetField.getBuckets().size()); + for (int i = 0; i < facetField.getBuckets().size(); i++) { + FacetField.Bucket bucket = facetField.getBuckets().get(i); + if (bucket.getValue().equals("Other")) { + Assert.assertEquals(outOfRange, bucket.getCount()); + } else { + Assert.assertEquals(rangeValues.get(i).longValue(), bucket.getCount()); + } } } } @@ -1219,7 +1228,7 @@ public void testFacetRange() { System.out.println("aggregate.first() = " + aggregate.first()); long outOfRange = 0; - List rangeValues = new ArrayList<>(Arrays.asList(0d, 0d, 0d, 0d, 0d)); + List rangeValues = new ArrayList<>(Arrays.asList(0L, 0L, 0L, 0L, 0L)); Map map = new HashMap<>(); for (Document result : matchedResults.getResults()) { @@ -1235,14 +1244,21 @@ public void testFacetRange() { } } } - System.out.println("rangeValues.toString() = " + rangeValues.toString()); + System.out.println("rangeValues = " + rangeValues); + System.out.println("outOfRange = " + outOfRange); + for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertTrue(facetField.getCount() == null); - Assert.assertTrue(facetField.getName().contains("" + (1.0d * outOfRange))); - for (int i = 0; i < facetField.getAggregationValues().size(); i++) { - Assert.assertEquals(rangeValues.get(i), facetField.getAggregationValues().get(i)); + Assert.assertEquals(outOfRange + rangeValues.stream().mapToLong(Long::longValue).sum(), facetField.getCount().longValue()); + Assert.assertEquals(rangeValues.size() + 1, facetField.getBuckets().size()); + for (int i = 0; i < facetField.getBuckets().size(); i++) { + FacetField.Bucket bucket = facetField.getBuckets().get(i); + if (bucket.getValue().equals("Other")) { + Assert.assertEquals(outOfRange, bucket.getCount()); + } else { + Assert.assertEquals(rangeValues.get(i).longValue(), bucket.getCount()); + } } } } From 7b6962300f34a2196ee757a89b1eb6e1b03d82d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 6 Feb 2025 10:22:28 +0100 Subject: [PATCH 50/51] datastore: improve MongoDB facets for ranges by filling with zeros, #TASK-7151, #TASK-7134 --- ...MongoDBDocumentToFacetFieldsConverter.java | 67 ++++---- .../datastore/mongodb/MongoDBQueryUtils.java | 44 +++-- .../mongodb/MongoDBCollectionTest.java | 155 +++++++++++++----- 3 files changed, 180 insertions(+), 86 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java index 141b6a7af..68f9472a3 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -5,6 +5,8 @@ import org.opencb.commons.datastore.core.ComplexTypeConverter; import org.opencb.commons.datastore.core.FacetField; +import java.math.BigDecimal; +import java.math.RoundingMode; import java.util.*; import static org.opencb.commons.datastore.mongodb.GenericDocumentComplexConverter.TO_REPLACE_DOTS; @@ -113,37 +115,23 @@ public List convertToDataModelType(Document document) { } else if (key.endsWith(RANGES_SUFFIX)) { List buckets = new ArrayList<>(documentValues.size()); int total = 0; - Double start = null; - Double end = null; - Double step = null; + + String[] split = key.split(SEPARATOR); + double start = Double.parseDouble(split[1].replace(TO_REPLACE_DOTS, ".")); + double end = Double.parseDouble(split[2].replace(TO_REPLACE_DOTS, ".")); + double step = Double.parseDouble(split[3].replace(TO_REPLACE_DOTS, ".")); + int other = 0; + for (double i = start; i <= end; i += step) { + int bucketCount = getBucketCountFromRanges(i, documentValues); + FacetField.Bucket bucket = new FacetField.Bucket(String.valueOf(roundToTwoSignificantDecimals(i)), bucketCount, null); + buckets.add(bucket); + total += bucketCount; + } + for (Document value : documentValues) { if (value.get(INTERNAL_ID) instanceof String && OTHER.equals(value.getString(INTERNAL_ID))) { other = value.getInteger(count.name()); - } else { - FacetField.Bucket bucket = new FacetField.Bucket(String.valueOf(value.getDouble(INTERNAL_ID)), - value.getInteger(count.name()), null); - buckets.add(bucket); - total += bucket.getCount(); - if (step == null && start != null) { - step = Double.parseDouble(bucket.getValue()) - (Double) start; - } - if (start == null) { - start = Double.parseDouble(bucket.getValue()); - } - end = Double.parseDouble(bucket.getValue()); -// -// Double range = value.getDouble(INTERNAL_ID); -// Integer counter = value.getInteger(count.name()); -//// buckets.add(new FacetField.Bucket()); -// //facetFieldValues.add(1.0d * counter); -// if (start == null) { -// start = range; -// } -// end = range; -// if (step == null && start != end) { -// step = end.doubleValue() - start.doubleValue(); -// } } } facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, "."); @@ -154,7 +142,7 @@ public List convertToDataModelType(Document document) { } FacetField facetField = new FacetField(facetFieldName, total, buckets) .setStart(start) - .setEnd(end + step) + .setEnd(end) .setStep(step); facets.add(facetField); } else { @@ -214,4 +202,27 @@ private MongoDBQueryUtils.Accumulator getAccumulator(Document document) { public Document convertToStorageType(List facetFields) { throw new RuntimeException("Not yet implemented"); } + + private static double roundToTwoSignificantDecimals(double value) { + if (value == 0) { + return 0; + } + + BigDecimal bd = new BigDecimal(value); + int integerDigits = bd.precision() - bd.scale(); + int scale = Math.max(0, 2 + integerDigits); + return bd.setScale(scale, RoundingMode.HALF_UP).doubleValue(); + } + + + private int getBucketCountFromRanges(double inputRange, List documentValues) { + for (Document document : documentValues) { + if (!OTHER.equals(document.get(INTERNAL_ID))) { + if (inputRange == document.getDouble(INTERNAL_ID)) { + return document.getInteger(count.name()); + } + } + } + return 0; + } } diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index 11eb50458..aafd191f9 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -713,7 +713,7 @@ private static List createFacet(Bson query, List facetFields, Stri // For each facet field passed we will create a MongoDB facet, thre are 4 types of facets: // 1. Facet combining fields with commas. In this case, only 'count' is supported as accumulator. for (String facetField : facetFields) { - Facet facet; + Facet facet = null; // 1. Check if it is a facet combining fields with commas. In this case, only 'count' is supported as accumulator. // Example: aggregationFields=format,type @@ -795,12 +795,20 @@ private static List createFacet(Bson query, List facetFields, Stri double start = Double.parseDouble(matcher1.group(2)); double end = Double.parseDouble(matcher2.group(1)); double step = Double.parseDouble(matcher2.group(2)); - int numSections = (int) Math.ceil((end - start + 1) / step); - double boundary = start; - for (int i = 0; i < numSections + 1; i++) { - boundaries.add(boundary); - boundary += step; + double i; + for (i = start; i <= end; i += step) { + boundaries.add(i); } + if (boundaries.get(boundaries.size() - 1) < end) { + boundaries.add(i); + } + + String facetName = groupField + SEPARATOR + start + SEPARATOR + end + SEPARATOR + step + SEPARATOR + + RANGES_SUFFIX; + facet = new Facet(facetName, Aggregates.bucket("$" + groupField, boundaries, + new BucketOptions() + .defaultBucket(OTHER) + .output(new BsonField(count.name(), new BsonDocument("$sum", new BsonInt32(1)))))); } else { throw new IllegalArgumentException(INVALID_FORMAT_MSG + facetField + RANGE_FORMAT_MSG); } @@ -827,7 +835,9 @@ private static List createFacet(Bson query, List facetFields, Stri } // Get MongoDB facet - facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries, order); + if (facet == null) { + facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries, order); + } // Unwind in any case Set unwindFields = new HashSet<>(); @@ -938,7 +948,7 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, break; } case day: { - facetName = groupField + SEPARATOR + YEAR_SUFFIX + SEPARATOR + MONTH_SUFFIX + SEPARATOR + DAY_SUFFIX; + facetName = groupField + SEPARATOR + YEAR_SUFFIX + SEPARATOR + MONTH_SUFFIX + SEPARATOR + DAY_SUFFIX; Document fields = new Document(); fields.append(groupField + SEPARATOR + year.name(), "$" + groupField + SEPARATOR + year.name()); @@ -996,14 +1006,16 @@ private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, Arrays.asList(Accumulators.stdDevSamp(stdDevSamp.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); break; } - case bucket: { - facetName = groupField + SEPARATOR + RANGES_SUFFIX; - facet = new Facet(facetName, Aggregates.bucket(accumulatorId, boundaries, - new BucketOptions() - .defaultBucket(OTHER) - .output(new BsonField(count.name(), new BsonDocument("$sum", new BsonInt32(1)))))); - break; - } + case bucket: +// { +// // Nothing to do +// facetName = groupField + SEPARATOR + RANGES_SUFFIX; +// facet = new Facet(facetName, Aggregates.bucket(accumulatorId, boundaries, +// new BucketOptions() +// .defaultBucket(OTHER) +// .output(new BsonField(count.name(), new BsonDocument("$sum", new BsonInt32(1)))))); +// break; +// } default: { facet = null; break; diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 2c537839d..a36132dbb 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -37,7 +37,7 @@ import java.util.*; import static org.junit.Assert.*; -import static org.opencb.commons.datastore.core.QueryOptions.*; +import static org.opencb.commons.datastore.core.QueryOptions.SORT; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*; import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; @@ -53,6 +53,7 @@ public class MongoDBCollectionTest { private static MongoDBCollection mongoDBCollectionInsertTest; private static MongoDBCollection mongoDBCollectionUpdateTest; private static MongoDBCollection mongoDBCollectionRemoveTest; + private static MongoDBCollection mongoDBCollectionFacetRange; private static int N = 1000; @@ -74,6 +75,7 @@ public static void beforeClass() throws Exception { mongoDBCollectionInsertTest = createTestCollection("insert_test", 50); mongoDBCollectionUpdateTest = createTestCollection("update_test", 50); mongoDBCollectionRemoveTest = createTestCollection("remove_test", 50); + mongoDBCollectionFacetRange = createTestCollection2("facet-range-test"); } @Before @@ -194,6 +196,19 @@ private static MongoDBCollection createTestCollection(String test, int size) { return mongoDBCollection; } + private static MongoDBCollection createTestCollection2(String name) { + MongoDBCollection mongoDBCollection = mongoDataStore.getCollection(name); + Document document; + Random random = new Random(); + + List ages = Arrays.asList(1, 3, 3, 9, 10, 11, 12); + for (Integer age : ages) { + document = new Document("age", age); + mongoDBCollection.nativeQuery().insert(document, null); + } + return mongoDBCollection; + } + @Test public void testQueryResultWriter() throws Exception { @@ -1212,56 +1227,112 @@ public void testFacetCombine() { } } +// @Test +// public void testFacetRange() { +// Document match = new Document("age", new BasicDBObject("$gt", 2)); +// DataResult matchedResults = mongoDBCollection.find(match, null); +// +// int start = 1000; +// int end = 5000; +// int step = 100; +// String fieldName = "number" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; +// List facets = MongoDBQueryUtils.createFacet(match, fieldName); +// System.out.println("facets = " + facets); +// MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); +// DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); +// System.out.println("aggregate.first() = " + aggregate.first()); +// +// long outOfRange = 0; +// List rangeValues = new ArrayList<>(Arrays.asList(0L, 0L, 0L, 0L, 0L)); +// +// Map map = new HashMap<>(); +// for (Document result : matchedResults.getResults()) { +// int bucketNum; +// Long value = result.getLong("number"); +// if (value != null) { +// bucketNum = (int) (value - start) / step; +// int numSections = (int) Math.ceil((end - start + 1) / step); +// if (value < start || bucketNum > numSections) { +// outOfRange++; +// } else { +// rangeValues.set(bucketNum, 1 + rangeValues.get(bucketNum)); +// } +// } +// } +// System.out.println("rangeValues = " + rangeValues); +// System.out.println("outOfRange = " + outOfRange); +// +// for (List result : aggregate.getResults()) { +// Assert.assertEquals(1, result.size()); +// for (FacetField facetField : result) { +// Assert.assertEquals(outOfRange + rangeValues.stream().mapToLong(Long::longValue).sum(), facetField.getCount().longValue()); +// Assert.assertEquals(rangeValues.size() + 1, facetField.getBuckets().size()); +// for (int i = 0; i < facetField.getBuckets().size(); i++) { +// FacetField.Bucket bucket = facetField.getBuckets().get(i); +// if (bucket.getValue().equals("Other")) { +// Assert.assertEquals(outOfRange, bucket.getCount()); +// } else { +// Assert.assertEquals(rangeValues.get(i).longValue(), bucket.getCount()); +// } +// } +// } +// } +// } + @Test public void testFacetRange() { - Document match = new Document("age", new BasicDBObject("$gt", 2)); - DataResult matchedResults = mongoDBCollection.find(match, null); + DataResult matchedResults = mongoDBCollectionFacetRange.find(new Document(), null); + for (Document result : matchedResults.getResults()) { + System.out.println("age = " + result.get("age")); + } - int start = 1000; - int end = 5000; - int step = 1000; - String fieldName = "number" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; - List facets = MongoDBQueryUtils.createFacet(match, fieldName); + double start = 3; + double end = 10; + double step = 2; + String fieldName = "age" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; + System.out.println("fieldName = " + fieldName); + List facets = MongoDBQueryUtils.createFacet(new Document(), fieldName); System.out.println("facets = " + facets); MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); - DataResult> aggregate = mongoDBCollection.aggregate(facets, converter, null); + DataResult> aggregate = mongoDBCollectionFacetRange.aggregate(facets, converter, null); System.out.println("aggregate.first() = " + aggregate.first()); - - long outOfRange = 0; - List rangeValues = new ArrayList<>(Arrays.asList(0L, 0L, 0L, 0L, 0L)); - Map map = new HashMap<>(); - for (Document result : matchedResults.getResults()) { - int bucketNum; - Long value = result.getLong("number"); - if (value != null) { - bucketNum = (int) (value - start) / step; - int numSections = (int) Math.ceil((end - start + 1) / step); - if (value < start || bucketNum > numSections) { - outOfRange++; - } else { - rangeValues.set(bucketNum, 1 + rangeValues.get(bucketNum)); - } - } + map.put("3.0", 2); + map.put("5.0", 0); + map.put("7.0", 0); + map.put("9.0", 2); + map.put("Other", 3); + Assert.assertTrue(aggregate.first().get(0).getBuckets().size() > 0); + for (FacetField.Bucket bucket : aggregate.first().get(0).getBuckets()) { + Assert.assertTrue(map.containsKey(bucket.getValue())); + Assert.assertEquals(map.get(bucket.getValue()), bucket.getCount(), 0.0001); } - System.out.println("rangeValues = " + rangeValues); - System.out.println("outOfRange = " + outOfRange); - - for (List result : aggregate.getResults()) { - Assert.assertEquals(1, result.size()); - for (FacetField facetField : result) { - Assert.assertEquals(outOfRange + rangeValues.stream().mapToLong(Long::longValue).sum(), facetField.getCount().longValue()); - Assert.assertEquals(rangeValues.size() + 1, facetField.getBuckets().size()); - for (int i = 0; i < facetField.getBuckets().size(); i++) { - FacetField.Bucket bucket = facetField.getBuckets().get(i); - if (bucket.getValue().equals("Other")) { - Assert.assertEquals(outOfRange, bucket.getCount()); - } else { - Assert.assertEquals(rangeValues.get(i).longValue(), bucket.getCount()); - } - } - } + Assert.assertEquals(start, aggregate.first().get(0).getStart()); + Assert.assertEquals(end, aggregate.first().get(0).getEnd()); + Assert.assertEquals(step, aggregate.first().get(0).getStep()); + + step = 3; + System.out.println(); + fieldName = "age" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; + System.out.println("fieldName = " + fieldName); + facets = MongoDBQueryUtils.createFacet(new Document(), fieldName); + System.out.println("facets = " + facets); + converter = new MongoDBDocumentToFacetFieldsConverter(); + aggregate = mongoDBCollectionFacetRange.aggregate(facets, converter, null); + System.out.println("aggregate.first() = " + aggregate.first()); + map.clear(); + map.put("3.0", 2); + map.put("6.0", 0); + map.put("9.0", 3); + map.put("Other", 2); + Assert.assertTrue(aggregate.first().get(0).getBuckets().size() > 0); + for (FacetField.Bucket bucket : aggregate.first().get(0).getBuckets()) { + Assert.assertTrue(map.containsKey(bucket.getValue())); + Assert.assertEquals(map.get(bucket.getValue()), bucket.getCount(), 0.0001); } + Assert.assertEquals(start, aggregate.first().get(0).getStart()); + Assert.assertEquals(end, aggregate.first().get(0).getEnd()); + Assert.assertEquals(step, aggregate.first().get(0).getStep()); } @Test(expected = IllegalArgumentException.class) From e0b5d3fda7be3a9e5faa95bff6e2ef5cb314ee31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 10 Feb 2025 09:26:12 +0100 Subject: [PATCH 51/51] datastore: change Long to long, and fix JUnit tests, #TASK-7151, #TASK-7134 --- .../commons/datastore/core/FacetField.java | 11 +++--- .../mongodb/MongoDBCollectionTest.java | 34 ++++++++----------- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java index 8f1251364..753938c05 100644 --- a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java +++ b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java @@ -23,7 +23,7 @@ */ public class FacetField { private String name; - private Long count; + private long count; private List buckets; private String aggregationName; private List aggregationValues; @@ -74,20 +74,17 @@ public FacetField setName(String name) { return this; } - public Long getCount() { + public long getCount() { return count; } - public FacetField setCount(Long count) { + public FacetField setCount(long count) { this.count = count; return this; } public FacetField addCount(long delta) { - if (this.count == null) { - this.count = 0L; - } - this.count = this.count.longValue() + delta; + this.count = this.count + delta; return this; } diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index a36132dbb..b89a37b45 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -570,8 +570,7 @@ public void testFacetBuckets() { } for (List result : aggregate.getResults()) { for (FacetField facetField : result) { - Assert.assertFalse(facetField.getCount() == null); - Assert.assertEquals(totalCount, facetField.getCount().longValue()); + Assert.assertEquals(totalCount, facetField.getCount()); Assert.assertEquals(map.size(), facetField.getBuckets().size()); for (FacetField.Bucket bucket : facetField.getBuckets()) { value = bucket.getValue(); @@ -611,8 +610,7 @@ public void testFacetBucketsBoolean() { } for (List result : aggregate.getResults()) { for (FacetField facetField : result) { - Assert.assertFalse(facetField.getCount() == null); - Assert.assertEquals(totalCount, facetField.getCount().longValue()); + Assert.assertEquals(totalCount, facetField.getCount()); Assert.assertEquals(map.size(), facetField.getBuckets().size()); for (FacetField.Bucket bucket : facetField.getBuckets()) { value = bucket.getValue(); @@ -652,8 +650,7 @@ public void testFacetBucketsDotNotation() { } for (List result : aggregate.getResults()) { for (FacetField facetField : result) { - Assert.assertFalse(facetField.getCount() == null); - Assert.assertEquals(totalCount, facetField.getCount().longValue()); + Assert.assertEquals(totalCount, facetField.getCount()); Assert.assertEquals(map.size(), facetField.getBuckets().size()); for (FacetField.Bucket bucket : facetField.getBuckets()) { value = bucket.getValue(); @@ -702,7 +699,7 @@ public void testFacetCountBucketsArray() { for (List result : aggregate.getResults()) { for (FacetField facetField : result) { - Assert.assertEquals(totalCount, facetField.getCount().longValue()); + Assert.assertEquals(totalCount, facetField.getCount()); Assert.assertEquals(map.size(), facetField.getBuckets().size()); for (FacetField.Bucket bucket : facetField.getBuckets()) { value = bucket.getValue(); @@ -810,7 +807,7 @@ public void testFacetFilterAccumulatorBucketsArray() { for (List result : aggregate.getResults()) { for (FacetField facetField : result) { - Assert.assertEquals(totalCount, facetField.getCount().longValue()); + Assert.assertEquals(totalCount, facetField.getCount()); Assert.assertEquals(counterMap.size(), facetField.getBuckets().size()); for (FacetField.Bucket bucket : facetField.getBuckets()) { value = bucket.getValue(); @@ -818,7 +815,7 @@ public void testFacetFilterAccumulatorBucketsArray() { value = EMPTY; } Assert.assertEquals(counterMap.get(value).longValue(), bucket.getCount()); - Assert.assertEquals(counterMap.get(value).longValue(), bucket.getFacetFields().get(0).getCount().longValue()); + Assert.assertEquals(counterMap.get(value).longValue(), bucket.getFacetFields().get(0).getCount()); Assert.assertEquals("avg", bucket.getFacetFields().get(0).getAggregationName()); Assert.assertEquals(1.0 * accMap.get(value) / counterMap.get(value), bucket.getFacetFields().get(0).getAggregationValues().get(0), 0.0001); } @@ -868,7 +865,7 @@ public void testFacetRangeArray() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertEquals(outOfRange + rangeValues.stream().mapToLong(Long::longValue).sum(), facetField.getCount().longValue()); + Assert.assertEquals(outOfRange + rangeValues.stream().mapToLong(Long::longValue).sum(), facetField.getCount()); Assert.assertEquals(rangeValues.size() + 1, facetField.getBuckets().size()); for (int i = 0; i < facetField.getBuckets().size(); i++) { FacetField.Bucket bucket = facetField.getBuckets().get(i); @@ -907,7 +904,7 @@ public void testFacetMax() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertEquals(totalCount, facetField.getCount().longValue()); + Assert.assertEquals(totalCount, facetField.getCount()); Assert.assertEquals(max.name(), facetField.getAggregationName()); Assert.assertEquals(maxValue, facetField.getAggregationValues().get(0), 0.0001); } @@ -939,7 +936,7 @@ public void testFacetMin() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertEquals(count, facetField.getCount().longValue()); + Assert.assertEquals(count, facetField.getCount()); Assert.assertEquals(min.name(), facetField.getAggregationName()); Assert.assertEquals(minValue, facetField.getAggregationValues().get(0), 0.0001); } @@ -970,7 +967,7 @@ public void testFacetAvg() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertEquals(totalCount, facetField.getCount().longValue()); + Assert.assertEquals(totalCount, facetField.getCount()); Assert.assertEquals(avg.name(), facetField.getAggregationName()); Assert.assertEquals(totalSum / totalCount, facetField.getAggregationValues().get(0), 0.0001); } @@ -1010,7 +1007,7 @@ public void testFacetMaxDotNotationAndList() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertEquals(count, facetField.getCount().longValue()); + Assert.assertEquals(count, facetField.getCount()); Assert.assertEquals(max.name(), facetField.getAggregationName()); // for (int i = 0; i < facetField.getAggregationValues().size() ; i++) { // Assert.assertEquals(maxValues.get(i), facetField.getAggregationValues().get(i), 0.0001); @@ -1040,7 +1037,7 @@ public void testFacetSumAccumulator() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertEquals(count, facetField.getCount().longValue()); + Assert.assertEquals(count, facetField.getCount()); Assert.assertEquals(Accumulator.avg.name(), facetField.getAggregationName()); Assert.assertEquals(avg, facetField.getAggregationValues().get(0), 0.5); // for (int i = 0; i < facetField.getAggregationValues().size() ; i++) { @@ -1055,7 +1052,7 @@ public void testFacetSumAccumulator() { for (List result : aggregate.getResults()) { Assert.assertEquals(1, result.size()); for (FacetField facetField : result) { - Assert.assertEquals(count, facetField.getCount().longValue()); + Assert.assertEquals(count, facetField.getCount()); Assert.assertEquals(Accumulator.sum.name(), facetField.getAggregationName()); Assert.assertEquals(total, facetField.getAggregationValues().get(0), 0.0001); } @@ -1162,7 +1159,7 @@ public void testFacetMultipleAccumulators() { Assert.assertEquals(2, aggregate.first().size()); for (FacetField result : aggregate.first()) { Assert.assertEquals("number", result.getName()); - Assert.assertEquals(Long.valueOf(matchedResults.getNumResults()), result.getCount()); + Assert.assertEquals(matchedResults.getNumResults(), result.getCount()); double value = 0d; if ("min".equals(result.getAggregationName())) { value = min; @@ -1216,8 +1213,7 @@ public void testFacetCombine() { String value; for (List result : aggregate.getResults()) { for (FacetField facetField : result) { - Assert.assertFalse(facetField.getCount() == null); - Assert.assertEquals(totalCount, facetField.getCount().longValue()); + Assert.assertEquals(totalCount, facetField.getCount()); Assert.assertEquals(map.size(), facetField.getBuckets().size()); for (FacetField.Bucket bucket : facetField.getBuckets()) { value = bucket.getValue();