diff --git a/build.sbt b/build.sbt index 4852c189..3bf483e7 100644 --- a/build.sbt +++ b/build.sbt @@ -19,7 +19,7 @@ ThisBuild / organization := "app.softnetwork" name := "softclient4es" -ThisBuild / version := "0.9.1" +ThisBuild / version := "0.9.2" ThisBuild / scalaVersion := scala213 diff --git a/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala b/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala index cb597e3f..bff99219 100644 --- a/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala +++ b/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala @@ -888,8 +888,9 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M | p.birthDate, | p.children, | inner_children.name, - | inner_children.birthDate - |FROM + | inner_children.birthDate, + | inner_children.parentId + | FROM | parent as p | JOIN UNNEST(p.children) as inner_children |WHERE diff --git a/documentation/functions_aggregate.md b/documentation/functions_aggregate.md index 86b06d69..9cc876d3 100644 --- a/documentation/functions_aggregate.md +++ b/documentation/functions_aggregate.md @@ -149,7 +149,7 @@ FROM emp; Collect values into an array for each partition. Implemented using `OVER` and pushed to ES as `top_hits`. Post-processing converts hits to an array of scalars. **Inputs:** -- `expr` with optional `OVER (PARTITION BY ... ORDER BY ... LIMIT n)` +- `expr` with optional `OVER (PARTITION BY ... ORDER BY ... )` If `OVER` is not provided, only the expr column name is used for the sorting. **Output:** @@ -161,9 +161,9 @@ SELECT department, ARRAY_AGG(name) OVER ( PARTITION BY department ORDER BY hire_date ASC - LIMIT 100 ) AS employees -FROM emp; +FROM emp +LIMIT 100; -- Result: employees as an array of name values -- per department (sorted and limited) ``` diff --git a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index f9a46d9c..991621a5 100644 --- a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -3,11 +3,13 @@ package app.softnetwork.elastic.sql.bridge import app.softnetwork.elastic.sql.query.{ Asc, Bucket, - BucketSelectorScript, + BucketIncludesExcludes, Criteria, Desc, - ElasticBoolQuery, Field, + MetricSelectorScript, + NestedElement, + NestedElements, SortOrder } import app.softnetwork.elastic.sql.function._ @@ -16,7 +18,6 @@ import com.sksamuel.elastic4s.ElasticApi.{ avgAgg, bucketSelectorAggregation, cardinalityAgg, - filterAgg, maxAgg, minAgg, nestedAggregation, @@ -48,9 +49,10 @@ case class ElasticAggregation( filteredAgg: Option[FilterAggregation] = None, aggType: AggregateFunction, agg: Aggregation, - direction: Option[SortOrder] = None + direction: Option[SortOrder] = None, + nestedElement: Option[NestedElement] = None ) { - val nested: Boolean = nestedAgg.nonEmpty + val nested: Boolean = nestedElement.nonEmpty val filtered: Boolean = filteredAgg.nonEmpty } @@ -61,7 +63,7 @@ object ElasticAggregation { bucketsDirection: Map[String, SortOrder] ): ElasticAggregation = { import sqlAgg._ - val sourceField = identifier.name + val sourceField = identifier.path val direction = bucketsDirection.get(identifier.identifierName) @@ -103,7 +105,7 @@ object ElasticAggregation { buildScript: (String, Script) => Aggregation ): Aggregation = { if (transformFuncs.nonEmpty) { - val scriptSrc = identifier.painless + val scriptSrc = identifier.painless() val script = Script(scriptSrc).lang("painless") buildScript(aggName, script) } else { @@ -143,7 +145,7 @@ object ElasticAggregation { .copy( scripts = th.fields .filter(_.isScriptField) - .map(f => f.sourceField -> Script(f.painless).lang("painless")) + .map(f => f.sourceField -> Script(f.painless()).lang("painless")) .toMap ) .size(limit) sortBy th.orderBy.sorts.map(sort => @@ -160,31 +162,13 @@ object ElasticAggregation { } } ) - /*th.fields.filter(_.isScriptField).foldLeft(topHits) { (agg, f) => - agg.script(f.sourceField, Script(f.painless, lang = Some("painless"))) - }*/ topHits } val filteredAggName = "filtered_agg" - val filteredAgg: Option[FilterAggregation] = - having match { - case Some(f) => - val boolQuery = Option(ElasticBoolQuery(group = true)) - Some( - filterAgg( - filteredAggName, - f.asFilter(boolQuery) - .query(Set(identifier.innerHitsName).flatten, boolQuery) - ) - ) - case _ => - None - } - def filtered(): Unit = - filteredAgg match { + having match { case Some(_) => aggPath ++= Seq(filteredAggName) aggPath ++= Seq(aggName) @@ -192,28 +176,55 @@ object ElasticAggregation { aggPath ++= Seq(aggName) } + val nestedElement = identifier.nestedElement + + val nestedElements: Seq[NestedElement] = + nestedElement.map(n => NestedElements.buildNestedTrees(Seq(n))).getOrElse(Nil) + val nestedAgg = - if (identifier.nested) { - val path = sourceField.split("\\.").head - val nestedAgg = s"nested_${identifier.nestedType.getOrElse(aggName)}" - aggPath ++= Seq(nestedAgg) - filtered() - Some(nestedAggregation(nestedAgg, path)) - } else { - filtered() - None + nestedElements match { + case Nil => + None + case nestedElements => + def buildNested(n: NestedElement): NestedAggregation = { + aggPath ++= Seq(n.innerHitsName) + val children = n.children + if (children.nonEmpty) { + val innerAggs = children.map(buildNested) + val combinedAgg = if (innerAggs.size == 1) { + innerAggs.head + } else { + innerAggs.reduceLeft { (agg1, agg2) => + agg1.copy(subaggs = agg1.subaggs ++ Seq(agg2)) + } + } + nestedAggregation( + n.innerHitsName, + n.path + ) subaggs Seq(combinedAgg) + } else { + nestedAggregation( + n.innerHitsName, + n.path + ) + } + } + + Some(buildNested(nestedElements.head)) } + filtered() + ElasticAggregation( aggPath.mkString("."), field, sourceField, distinct = distinct, nestedAgg = nestedAgg, - filteredAgg = filteredAgg, aggType = aggType, agg = _agg, - direction = direction + direction = direction, + nestedElement = nestedElement ) } @@ -224,20 +235,38 @@ object ElasticAggregation { aggregationsDirection: Map[String, SortOrder], having: Option[Criteria] ): Option[TermsAggregation] = { - Console.println(bucketsDirection) buckets.reverse.foldLeft(Option.empty[TermsAggregation]) { (current, bucket) => - val agg = { + var agg = { bucketsDirection.get(bucket.identifier.identifierName) match { case Some(direction) => - termsAgg(bucket.name, s"${bucket.identifier.name}.keyword") + termsAgg(bucket.name, s"${bucket.identifier.path}.keyword") .order(Seq(direction match { - case Asc => TermsOrder(bucket.name, asc = true) - case _ => TermsOrder(bucket.name, asc = false) + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) })) case None => - termsAgg(bucket.name, s"${bucket.identifier.name}.keyword") + termsAgg(bucket.name, s"${bucket.identifier.path}.keyword") } } + bucket.size.foreach(s => agg = agg.size(s)) + having match { + case Some(criteria) => + criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = agg.include(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = agg.include(values.toArray) + case _ => + } + criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = agg.exclude(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = agg.exclude(values.toArray) + case _ => + } + case _ => + } current match { case Some(subAgg) => Some(agg.copy(subaggs = Seq(subAgg))) case None => @@ -254,12 +283,15 @@ object ElasticAggregation { agg val withHaving = having match { case Some(criteria) => - import BucketSelectorScript._ - val script = toPainless(criteria) - val bucketsPath = extractBucketsPath(criteria) + val script = MetricSelectorScript.metricSelector(criteria) + val bucketsPath = criteria.extractMetricsPath val bucketSelector = - bucketSelectorAggregation("having_filter", Script(script), bucketsPath) + bucketSelectorAggregation( + "having_filter", + Script(script.replaceAll("1 == 1 &&", "").replaceAll("&& 1 == 1", "").trim), + bucketsPath + ) withAggregationOrders.copy(subaggs = aggregations :+ bucketSelector) diff --git a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala index cfeb311f..c99899f4 100644 --- a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala +++ b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala @@ -1,8 +1,8 @@ package app.softnetwork.elastic.sql.bridge +import app.softnetwork.elastic.sql.operator.AND import app.softnetwork.elastic.sql.query.{ BetweenExpr, - DistanceCriteria, ElasticBoolQuery, ElasticChild, ElasticFilter, @@ -14,10 +14,16 @@ import app.softnetwork.elastic.sql.query.{ IsNotNullCriteria, IsNotNullExpr, IsNullCriteria, - IsNullExpr + IsNullExpr, + NestedElement, + NestedElements, + Predicate } import com.sksamuel.elastic4s.ElasticApi._ -import com.sksamuel.elastic4s.searches.queries.Query +import com.sksamuel.elastic4s.FetchSourceContext +import com.sksamuel.elastic4s.searches.queries.{InnerHit, Query} + +import scala.annotation.tailrec case class ElasticQuery(filter: ElasticFilter) { def query( @@ -38,16 +44,94 @@ case class ElasticQuery(filter: ElasticFilter) { if (innerHitsNames.contains(innerHitsName.getOrElse(""))) { criteria.asFilter(currentQuery).query(innerHitsNames, currentQuery) } else { - val boolQuery = Option(ElasticBoolQuery(group = true)) - nestedQuery( - relationType.getOrElse(""), - criteria - .asFilter(boolQuery) - .query(innerHitsNames + innerHitsName.getOrElse(""), boolQuery) - ) /*.scoreMode(ScoreMode.None)*/ - .inner( - innerHits(innerHitsName.getOrElse("")).from(0).size(limit.map(_.limit).getOrElse(3)) - ) + NestedElements.buildNestedTrees(criteria.nestedElements) match { + case Nil => + matchAllQuery() + case nestedTrees => + def nestedInner(n: NestedElement): InnerHit = { + var inner = innerHits(n.innerHitsName) + n.size match { + case Some(s) => + inner = inner.from(0).size(s) + case _ => + } + if (n.sources.nonEmpty) { + inner = inner.fetchSource( + FetchSourceContext( + fetchSource = true, + includes = n.sources.map { source => + (n.path.split('.').toSeq ++ Seq(source)).mkString(".") + }.toArray + ) + ) + } + inner + } + + def buildNestedQuery(n: NestedElement, q: Query): Query = { + val children = n.children + if (children.nonEmpty) { + val innerQueries = children.map(child => buildNestedQuery(child, q)) + val combinedQuery = if (innerQueries.size == 1) { + innerQueries.head + } else { + must(innerQueries) + } + nestedQuery( + n.path, + combinedQuery + ) /*.scoreMode(ScoreMode.None)*/ + .inner( + nestedInner(n) + ) + } else { + nestedQuery( + n.path, + q + ) /*.scoreMode(ScoreMode.None)*/ + .inner( + nestedInner(n) + ) + } + } + + criteria match { + case p: Predicate if nestedTrees.size > 1 => + val leftNested = ElasticNested(p.leftCriteria, p.leftCriteria.limit) + val leftBoolQuery = Option(ElasticBoolQuery(group = true)) + val leftQuery = ElasticQuery(leftNested) + .query(innerHitsNames /*++ leftNested.innerHitsName.toSet*/, leftBoolQuery) + + val rightNested = ElasticNested(p.rightCriteria, p.rightCriteria.limit) + val rightBoolQuery = Option(ElasticBoolQuery(group = true)) + val rightQuery = ElasticQuery(rightNested) + .query(innerHitsNames /*++ rightNested.innerHitsName.toSet*/, rightBoolQuery) + + p.operator match { + case AND => + p.not match { + case Some(_) => not(rightQuery).filter(leftQuery) + case _ => must(leftQuery, rightQuery) + } + case _ => + p.not match { + case Some(_) => not(rightQuery).should(leftQuery) + case _ => should(leftQuery, rightQuery) + } + } + case _ => + val boolQuery = Option(ElasticBoolQuery(group = true)) + val q = criteria + .asFilter(boolQuery) + .query(innerHitsNames + innerHitsName.getOrElse(""), boolQuery) + if (nestedTrees.size == 1) { + buildNestedQuery(nestedTrees.head, q) + } else { + val innerQueries = nestedTrees.map(nested => buildNestedQuery(nested, q)) + must(innerQueries) + } + } + } } case child: ElasticChild => import child._ diff --git a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index 4d3c79df..d0a8c4f6 100644 --- a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -5,20 +5,245 @@ import app.softnetwork.elastic.sql.function.aggregate.COUNT import app.softnetwork.elastic.sql.function.geo.{Distance, Meters} import app.softnetwork.elastic.sql.operator._ import app.softnetwork.elastic.sql.query._ -import com.sksamuel.elastic4s.ElasticApi +import com.sksamuel.elastic4s.{ElasticApi, FetchSourceContext} import com.sksamuel.elastic4s.ElasticApi._ import com.sksamuel.elastic4s.http.ElasticDsl.BuildableTermsNoOp import com.sksamuel.elastic4s.http.search.SearchBodyBuilderFn import com.sksamuel.elastic4s.script.Script import com.sksamuel.elastic4s.script.ScriptType.Source -import com.sksamuel.elastic4s.searches.aggs.Aggregation -import com.sksamuel.elastic4s.searches.queries.Query +import com.sksamuel.elastic4s.searches.aggs.{Aggregation, FilterAggregation, NestedAggregation} +import com.sksamuel.elastic4s.searches.queries.{InnerHit, Query} import com.sksamuel.elastic4s.searches.{MultiSearchRequest, SearchRequest} import com.sksamuel.elastic4s.searches.sort.FieldSort import scala.language.implicitConversions package object bridge { + + implicit def requestToNestedFilterAggregation( + request: SQLSearchRequest, + innerHitsName: String + ): Option[FilterAggregation] = + request.where.flatMap(_.criteria) match { + case Some(f) => + f.nestedCriteria(innerHitsName) match { + case Nil => None + case cs => + val boolQuery = ElasticBoolQuery(group = true) + cs.map(c => boolQuery.filter(c.asFilter(Option(boolQuery)))) + Some( + filterAgg( + s"filtered_$innerHitsName", + boolQuery.query( + request.aggregates.flatMap(_.identifier.innerHitsName).toSet, + Option(boolQuery) + ) + ) + ) + } + case _ => + None + } + + implicit def requestToFilterAggregation( + request: SQLSearchRequest + ): Option[FilterAggregation] = + request.having.flatMap(_.criteria) match { + case Some(f) => + val boolQuery = Option(ElasticBoolQuery(group = true)) + Some( + filterAgg( + "filtered_agg", + f.asFilter(boolQuery) + .query(request.aggregates.flatMap(_.identifier.innerHitsName).toSet, boolQuery) + ) + ) + case _ => + None + } + + implicit def requestToRootAggregations( + request: SQLSearchRequest + ): Seq[Aggregation] = { + val aggregations = request.aggregates.map( + ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts) + ) + + val notNestedAggregations = aggregations.filterNot(_.nested) + + val rootAggregations = notNestedAggregations match { + case Nil => Nil + case aggs => + val directions: Map[String, SortOrder] = aggs + .filter(_.direction.isDefined) + .map(agg => agg.agg.name -> agg.direction.get) + .toMap + val aggregations = aggs.map(_.agg) + val buckets = ElasticAggregation.buildBuckets( + request.buckets.filterNot(_.nested), + request.sorts -- directions.keys, + aggregations, + directions, + request.having.flatMap(_.criteria) + ) match { + case Some(b) => Seq(b) + case _ => aggregations + } + buckets + } + rootAggregations + } + + implicit def requestToScopedAggregations( + request: SQLSearchRequest + ): Seq[NestedAggregation] = { + val aggregations = request.aggregates.map( + ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts) + ) + + val nestedAggregations: Map[String, Seq[ElasticAggregation]] = aggregations + .filter(_.nested) + .groupBy( + _.nestedElement + .map(_.path) + .getOrElse( + throw new IllegalArgumentException("Nested aggregation must have a nested element") + ) + ) + + val nestedGroupedBuckets = + request.buckets + .filter(_.nested) + .groupBy( + _.nestedBucket.getOrElse( + throw new IllegalArgumentException( + "Nested bucket must have a nested element" + ) + ) + ) + + val havingCriteria = request.having.flatMap(_.criteria) + + val scopedAggregations = NestedElements + .buildNestedTrees( + nestedAggregations.values.flatMap(_.flatMap(_.nestedElement)).toSeq.distinct + ) + .map { tree => + def buildNestedAgg(n: NestedElement): NestedAggregation = { + val elasticAggregations = nestedAggregations.getOrElse(n.path, Seq.empty) + val aggregations = elasticAggregations.map(_.agg) + val directions: Map[String, SortOrder] = + elasticAggregations + .filter(_.direction.isDefined) + .map(elasticAggregation => + elasticAggregation.agg.name -> elasticAggregation.direction.getOrElse(Asc) + ) + .toMap + val buckets: Seq[Aggregation] = + ElasticAggregation.buildBuckets( + nestedGroupedBuckets + .getOrElse(n.innerHitsName, Seq.empty), + request.sorts -- directions.keys, + aggregations, + directions, + havingCriteria + ) match { + case Some(b) => Seq(b) + case _ => aggregations + } + val nestedFilteredAgg: Option[FilterAggregation] = + requestToNestedFilterAggregation(request, n.innerHitsName) + val children = n.children + if (children.nonEmpty) { + val innerAggs = children.map(buildNestedAgg) + val combinedAgg = if (innerAggs.size == 1) { + innerAggs.head + } else { + innerAggs.reduceLeft { (agg1, agg2) => + agg1.copy(subaggs = agg1.subaggs ++ Seq(agg2)) + } + } + nestedAggregation( + n.innerHitsName, + n.path + ) subaggs (nestedFilteredAgg match { + case Some(filteredAgg) => + Seq(filteredAgg subaggs buckets ++ Seq(combinedAgg)) + case _ => buckets ++ Seq(combinedAgg) + }) + } else { + nestedAggregation( + n.innerHitsName, + n.path + ) subaggs (nestedFilteredAgg match { + case Some(filteredAgg) => + Seq(filteredAgg subaggs buckets) + case _ => buckets + }) + } + } + buildNestedAgg(tree) + } + scopedAggregations + } + + implicit def requestToNestedWithoutCriteriaQuery(request: SQLSearchRequest): Option[Query] = + NestedElements.buildNestedTrees(request.nestedElementsWithoutCriteria) match { + case Nil => None + case nestedTrees => + def nestedInner(n: NestedElement): InnerHit = { + var inner = innerHits(n.innerHitsName) + n.size match { + case Some(s) => + inner = inner.from(0).size(s) + case _ => + } + if (n.sources.nonEmpty) { + inner = inner.fetchSource( + FetchSourceContext( + fetchSource = true, + includes = n.sources.toArray + ) + ) + } + inner + } + + def buildNestedQuery(n: NestedElement): Query = { + val children = n.children + if (children.nonEmpty) { + val innerQueries = children.map(child => buildNestedQuery(child)) + val combinedQuery = if (innerQueries.size == 1) { + innerQueries.head + } else { + must(innerQueries) + } + nestedQuery( + n.path, + combinedQuery + ) /*.scoreMode(ScoreMode.None)*/ + .inner( + nestedInner(n) + ) + } else { + nestedQuery( + n.path, + matchAllQuery() + ) /*.scoreMode(ScoreMode.None)*/ + .inner( + nestedInner(n) + ) + } + } + + if (nestedTrees.size == 1) { + Some(buildNestedQuery(nestedTrees.head)) + } else { + val innerQueries = nestedTrees.map(nested => buildNestedQuery(nested)) + Some(boolQuery().filter(innerQueries)) + } + } + implicit def requestToElasticSearchRequest(request: SQLSearchRequest): ElasticSearchRequest = ElasticSearchRequest( request.select.fields, @@ -36,80 +261,43 @@ package object bridge { implicit def requestToSearchRequest(request: SQLSearchRequest): SearchRequest = { import request._ - val notNestedBuckets = buckets.filterNot(_.identifier.nested) - val nestedBuckets = buckets.filter(_.identifier.nested).groupBy(_.nestedBucket.getOrElse("")) - val aggregations = - aggregates.map(ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts)) - val notNestedAggregations = aggregations.filterNot(_.nested) - val nestedAggregations = - aggregations.filter(_.nested).groupBy(_.nestedAgg.map(_.name).getOrElse("")) + + val rootAggregations = requestToRootAggregations(request) + + val scopedAggregations = requestToScopedAggregations(request) + + val aggregations = rootAggregations ++ scopedAggregations + + val nestedWithoutCriteriaQuery: Option[Query] = requestToNestedWithoutCriteriaQuery(request) + var _search: SearchRequest = search("") query { - where.flatMap(_.criteria.map(_.asQuery())).getOrElse(matchAllQuery()) + where.flatMap(_.criteria.map(_.asQuery())) match { + case Some(c) => + val baseQuery = c + nestedWithoutCriteriaQuery match { + case Some(nc) => boolQuery().filter(baseQuery, nc) + case _ => baseQuery + } + case _ => + nestedWithoutCriteriaQuery.getOrElse(matchAllQuery()) + } } sourceFiltering (fields, excludes) - _search = if (nestedAggregations.nonEmpty) { + _search = if (aggregations.nonEmpty) { _search aggregations { - nestedAggregations.map { case (nested, aggs) => - val first = aggs.head - val aggregations = aggs.map(_.agg) - val aggregationDirections: Map[String, SortOrder] = - aggs - .filter(_.direction.isDefined) - .map(agg => agg.agg.name -> agg.direction.getOrElse(Asc)) - .toMap - val buckets = - ElasticAggregation.buildBuckets( - nestedBuckets.getOrElse(nested, Seq.empty), - request.sorts -- aggregationDirections.keys, - aggregations, - aggregationDirections, - request.having.flatMap(_.criteria) - ) match { - case Some(b) => Seq(b) - case _ => aggregations - } - val filtered: Option[Aggregation] = - first.filteredAgg.map(filtered => filtered.subAggregations(buckets)) - first.nestedAgg.get.subAggregations(filtered.map(Seq(_)).getOrElse(buckets)) - } + aggregations } } else { _search } - _search = notNestedAggregations match { - case Nil => _search - case _ => - _search aggregations { - val first = notNestedAggregations.head - val aggregationDirections: Map[String, SortOrder] = notNestedAggregations - .filter(_.direction.isDefined) - .map(agg => agg.agg.name -> agg.direction.get) - .toMap - val aggregations = notNestedAggregations.map(_.agg) - val buckets = ElasticAggregation.buildBuckets( - notNestedBuckets, - request.sorts -- aggregationDirections.keys, - aggregations, - aggregationDirections, - request.having.flatMap(_.criteria) - ) match { - case Some(b) => Seq(b) - case _ => aggregations - } - val filtered: Option[Aggregation] = - first.filteredAgg.map(filtered => filtered.subAggregations(buckets)) - filtered.map(Seq(_)).getOrElse(buckets) - } - } - _search = scriptFields.filterNot(_.aggregation) match { case Nil => _search case _ => _search scriptfields scriptFields.map { field => scriptField( field.scriptName, - Script(script = field.painless) + Script(script = field.painless()) .lang("painless") .scriptType("source") .params(field.identifier.functions.headOption match { @@ -164,7 +352,7 @@ package object bridge { case _ => true })) ) { - return scriptQuery(Script(script = painless).lang("painless").scriptType("source")) + return scriptQuery(Script(script = painless()).lang("painless").scriptType("source")) } // Geo distance special case identifier.functions.headOption match { @@ -378,10 +566,10 @@ package object bridge { case NE | DIFF => not(rangeQuery(identifier.name) gte script lte script) } case _ => - scriptQuery(Script(script = painless).lang("painless").scriptType("source")) + scriptQuery(Script(script = painless()).lang("painless").scriptType("source")) } case _ => - scriptQuery(Script(script = painless).lang("painless").scriptType("source")) + scriptQuery(Script(script = painless()).lang("painless").scriptType("source")) } case _ => matchAllQuery() } @@ -535,7 +723,7 @@ package object bridge { case _ => scriptQuery( Script( - script = distanceCriteria.painless, + script = distanceCriteria.painless(), lang = Some("painless"), scriptType = Source, params = distance.params @@ -565,6 +753,7 @@ package object bridge { ElasticQuery(filter) } + @deprecated implicit def sqlQueryToAggregations( query: SQLQuery ): Seq[ElasticAggregation] = { @@ -572,6 +761,7 @@ package object bridge { request .map { case Left(l) => + val filteredAgg: Option[FilterAggregation] = requestToFilterAggregation(l) l.aggregates .map(ElasticAggregation(_, l.having.flatMap(_.criteria), l.sorts)) .map(aggregation => { @@ -597,7 +787,7 @@ package object bridge { } aggregations { val filtered = - aggregation.filteredAgg match { + filteredAgg match { case Some(filtered) => filtered.subAggregations(aggregation.agg) case _ => aggregation.agg } diff --git a/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala b/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala index 864a0011..88221237 100644 --- a/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala +++ b/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala @@ -413,80 +413,84 @@ class SQLCriteriaSpec extends AnyFlatSpec with Matchers { } it should "filter nested predicate" in { - asQuery(nestedPredicate) shouldBe """{ - - |"query":{ - | "bool":{ - | "filter" : [ - | { - | "term" : { - | "identifier1" : { - | "value" : 1 - | } - | } - | }, - | { - | "nested" : { - | "path" : "nested", - | "query" : { - | "bool" : { - | "should" : [ - | { - | "range" : { - | "nested.identifier2" : { - | "gt" : 2 - | } - | } - | }, - | { - | "term" : { - | "nested.identifier3" : { - | "value" : 3 - | } - | } - | } - | ] - | } - | }, - | "inner_hits":{"name":"nested","from":0,"size":3} - | } - | } - | ] - | } - | } - |}""".stripMargin.replaceAll("\\s", "") + asQuery(nestedPredicate) shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "identifier1": { + | "value": 1 + | } + | } + | }, + | { + | "nested": { + | "path": "nested", + | "query": { + | "bool": { + | "should": [ + | { + | "range": { + | "nested.identifier2": { + | "gt": 2 + | } + | } + | }, + | { + | "term": { + | "nested.identifier3": { + | "value": 3 + | } + | } + | } + | ] + | } + | }, + | "inner_hits": { + | "name": "nested" + | } + | } + | } + | ] + | } + | } + |}""".stripMargin.replaceAll("\\s", "") } it should "filter nested criteria" in { - asQuery(nestedCriteria) shouldBe """{ - - |"query":{ - | "bool":{ - | "filter" : [ - | { - | "term" : { - | "identifier1" : { - | "value" : 1 - | } - | } - | }, - | { - | "nested" : { - | "path" : "nested", - | "query" : { - | "term" : { - | "nested.identifier3" : { - | "value" : 3 - | } - | } - | }, - | "inner_hits":{"name":"nested","from":0,"size":3} - | } - | } - | ] - | } - | } - |}""".stripMargin.replaceAll("\\s", "") + asQuery(nestedCriteria) shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "identifier1": { + | "value": 1 + | } + | } + | }, + | { + | "nested": { + | "path": "nested", + | "query": { + | "term": { + | "nested.identifier3": { + | "value": 3 + | } + | } + | }, + | "inner_hits": { + | "name": "nested" + | } + | } + | } + | ] + | } + | } + |}""".stripMargin.replaceAll("\\s", "") } it should "filter child predicate" in { @@ -670,36 +674,44 @@ class SQLCriteriaSpec extends AnyFlatSpec with Matchers { } it should "filter nested with between" in { - asQuery(nestedWithBetween) shouldBe """{ - - |"query":{ - | "bool":{"filter":[{"nested" : { - | "path" : "ciblage", - | "query" : { - | "bool" : { - | "filter" : [ - | { - | "range" : { - | "ciblage.Archivage_CreationDate" : { - | "gte" : "NOW-3M/M", - | "lte" : "NOW" - | } - | } - | }, - | { - | "term" : { - | "ciblage.statutComportement" : { - | "value" : 1 - | } - | } - | } - | ] - | } - | }, - | "inner_hits":{"name":"ciblage","from":0,"size":3} - | } - | } - |]}}}""".stripMargin.replaceAll("\\s", "") + asQuery(nestedWithBetween) shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "nested": { + | "path": "ciblage", + | "query": { + | "bool": { + | "filter": [ + | { + | "range": { + | "ciblage.Archivage_CreationDate": { + | "gte": "now-3M/M", + | "lte": "now" + | } + | } + | }, + | { + | "term": { + | "ciblage.statutComportement": { + | "value": 1 + | } + | } + | } + | ] + | } + | }, + | "inner_hits": { + | "name": "ciblage" + | } + | } + | } + | ] + | } + | } + |}""".stripMargin.replaceAll("\\s", "") } it should "filter boolean eq" in { diff --git a/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index a32ef594..8a5cef5e 100644 --- a/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -3,7 +3,6 @@ package app.softnetwork.elastic.sql import app.softnetwork.elastic.sql.bridge._ import app.softnetwork.elastic.sql.Queries._ import app.softnetwork.elastic.sql.query.SQLQuery -import com.google.gson.{JsonArray, JsonObject, JsonParser, JsonPrimitive} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers @@ -102,13 +101,13 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { it should "perform nested count" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(inner_emails.value) as email from index i join unnest(emails) as inner_emails where i.nom = \"Nom\"" + "select count(inner_emails.value) as email from index i join unnest(i.emails) as inner_emails where i.nom = \"Nom\"" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_emails.email" + result.aggName shouldBe "inner_emails.email" result.field shouldBe "email" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe @@ -128,7 +127,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_emails": { + | "inner_emails": { | "nested": { | "path": "emails" | }, @@ -147,273 +146,279 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { it should "perform nested count with nested criteria" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(inner_emails.value) as count_emails from index join unnest(emails) as inner_emails join unnest(profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\"))" + "select count(inner_emails.value) as count_emails from index join unnest(index.emails) as inner_emails join unnest(index.profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\"))" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_emails.count_emails" + result.aggName shouldBe "inner_emails.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") - result.query.getOrElse("") shouldBe + val query = result.query.getOrElse("") + println(query) + query shouldBe """{ - | "query": { - | "bool":{ - | "filter": [ - | { - | "term": { - | "nom": { - | "value": "Nom" - | } - | } - | }, - | { - | "nested": { - | "path": "profiles", - | "query": { - | "terms": { - | "profiles.postalCode": [ - | "75001", - | "75002" - | ] - | } - | }, - | "inner_hits":{"name":"inner_profiles","from":0,"size":3} - | } - | } - | ] - | } - | }, - | "size": 0, - | "aggs": { - | "nested_emails": { - | "nested": { - | "path": "emails" - | }, - | "aggs": { - | "count_emails": { - | "value_count": { - | "field": "emails.value" - | } - | } - | } - | } - | } - |}""".stripMargin.replaceAll("\\s+", "") + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "nom": { + | "value": "Nom" + | } + | } + | }, + | { + | "nested": { + | "path": "profiles", + | "query": { + | "terms": { + | "profiles.postalCode": [ + | "75001", + | "75002" + | ] + | } + | }, + | "inner_hits": { + | "name": "inner_profiles" + | } + | } + | } + | ] + | } + | }, + | "size": 0, + | "aggs": { + | "inner_emails": { + | "nested": { + | "path": "emails" + | }, + | "aggs": { + | "count_emails": { + | "value_count": { + | "field": "emails.value" + | } + | } + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") } it should "perform nested count with filter" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(inner_emails.value) as count_emails from index join unnest(emails) as inner_emails join unnest(profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\")) having inner_emails.context = \"profile\"" + "select count(inner_emails.value) as count_emails from index join unnest(index.emails) as inner_emails join unnest(index.profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\")) having inner_emails.context = \"profile\"" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_emails.filtered_agg.count_emails" + result.aggName shouldBe "inner_emails.filtered_agg.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") - result.query.getOrElse("") shouldBe + val query = result.query.getOrElse("") + println(query) + query shouldBe """{ - | "query": { - | "bool":{ - | "filter": [ - | { - | "term": { - | "nom": { - | "value": "Nom" - | } - | } - | }, - | { - | "nested": { - | "path": "profiles", - | "query": { - | "terms": { - | "profiles.postalCode": [ - | "75001", - | "75002" - | ] - | } - | }, - | "inner_hits":{"name":"inner_profiles","from":0,"size":3} - | } - | } - | ] - | } - | }, - | "size": 0, - | "aggs": { - | "nested_emails": { - | "nested": { - | "path": "emails" - | }, - | "aggs": { - | "filtered_agg": { - | "filter": { - | "term": { - | "emails.context": { - | "value": "profile" - | } - | } - | }, - | "aggs": { - | "count_emails": { - | "value_count": { - | "field": "emails.value" - | } - | } - | } - | } - | } - | } - | } - |}""".stripMargin.replaceAll("\\s+", "") + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "nom": { + | "value": "Nom" + | } + | } + | }, + | { + | "nested": { + | "path": "profiles", + | "query": { + | "terms": { + | "profiles.postalCode": [ + | "75001", + | "75002" + | ] + | } + | }, + | "inner_hits": { + | "name": "inner_profiles" + | } + | } + | } + | ] + | } + | }, + | "size": 0, + | "aggs": { + | "inner_emails": { + | "nested": { + | "path": "emails" + | }, + | "aggs": { + | "filtered_agg": { + | "filter": { + | "term": { + | "emails.context": { + | "value": "profile" + | } + | } + | }, + | "aggs": { + | "count_emails": { + | "value_count": { + | "field": "emails.value" + | } + | } + | } + | } + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") } it should "perform nested count with \"and not\" operator" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(distinct inner_emails.value) as count_emails from index join unnest(emails) as inner_emails join unnest(profiles) as inner_profiles where ((inner_profiles.postalCode = \"33600\") and (inner_profiles.postalCode <> \"75001\"))" + "select count(distinct inner_emails.value) as count_emails from index join unnest(index.emails) as inner_emails join unnest(index.profiles) as inner_profiles where ((inner_profiles.postalCode = \"33600\") and (inner_profiles.postalCode <> \"75001\"))" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe true - result.aggName shouldBe "nested_emails.count_emails" + result.aggName shouldBe "inner_emails.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") - result.query.getOrElse("") shouldBe - """ - |{ - | "query": { - | "bool": { - | "filter": [ - | { - | "nested": { - | "path": "profiles", - | "query": { - | "bool": { - | "filter": [ - | { - | "term": { - | "profiles.postalCode": { - | "value": "33600" - | } - | } - | }, - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "profiles.postalCode": { - | "value": "75001" - | } - | } - | } - | ] - | } - | } - | ] - | } - | }, - | "inner_hits": { - | "name": "inner_profiles", - | "from": 0, - | "size": 3 - | } - | } - | } - | ] - | } - | }, - | "size": 0, - | "aggs": { - | "nested_emails": { - | "nested": { - | "path": "emails" - | }, - | "aggs": { - | "count_emails": { - | "cardinality": { - | "field": "emails.value" - | } - | } - | } - | } - | } - |} - |""".stripMargin.replaceAll("\\s+", "") + val query = result.query.getOrElse("") + println(query) + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "nested": { + | "path": "profiles", + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "profiles.postalCode": { + | "value": "33600" + | } + | } + | }, + | { + | "bool": { + | "must_not": [ + | { + | "term": { + | "profiles.postalCode": { + | "value": "75001" + | } + | } + | } + | ] + | } + | } + | ] + | } + | }, + | "inner_hits": { + | "name": "inner_profiles" + | } + | } + | } + | ] + | } + | }, + | "size": 0, + | "aggs": { + | "inner_emails": { + | "nested": { + | "path": "emails" + | }, + | "aggs": { + | "count_emails": { + | "cardinality": { + | "field": "emails.value" + | } + | } + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") } it should "perform nested count with date filtering" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(distinct inner_emails.value) as count_distinct_emails from index join unnest(emails) as inner_emails join unnest(profiles) as inner_profiles where inner_profiles.postalCode = \"33600\" and inner_profiles.createdDate <= \"now-35M/M\"" + "select count(distinct inner_emails.value) as count_distinct_emails from index join unnest(index.emails) as inner_emails join unnest(index.profiles) as inner_profiles where inner_profiles.postalCode = \"33600\" and inner_profiles.createdDate <= \"now-35M/M\"" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe true - result.aggName shouldBe "nested_emails.count_distinct_emails" + result.aggName shouldBe "inner_emails.count_distinct_emails" result.field shouldBe "count_distinct_emails" result.sources shouldBe Seq[String]("index") - result.query.getOrElse("") shouldBe + val query = result.query.getOrElse("") + println(query) + query shouldBe """{ - "query": { - | "bool": { - | "filter": [ - | { - | "nested": { - | "path": "profiles", - | "query": { - | "bool": { - | "filter": [ - | { - | "term": { - | "profiles.postalCode": { - | "value": "33600" - | } - | } - | }, - | { - | "range": { - | "profiles.createdDate": { - | "lte": "now-35M/M" - | } - | } - | } - | ] - | } - | }, - | "inner_hits": { - | "name": "inner_profiles", - | "from": 0, - | "size": 3 - | } + | "query": { + | "bool": { + | "filter": [ + | { + | "nested": { + | "path": "profiles", + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "profiles.postalCode": { + | "value": "33600" + | } | } - | } - | ] - | } - | }, - | "size": 0, - | "aggs": { - | "nested_emails": { - | "nested": { - | "path": "emails" - | }, - | "aggs": { - | "count_distinct_emails": { - | "cardinality": { - | "field": "emails.value" + | }, + | { + | "range": { + | "profiles.createdDate": { + | "lte": "now-35M/M" + | } | } - | } + | } + | ] + | } + | }, + | "inner_hits": { + | "name": "inner_profiles" | } + | } | } + | ] + | } + | }, + | "size": 0, + | "aggs": { + | "inner_emails": { + | "nested": { + | "path": "emails" + | }, + | "aggs": { + | "count_distinct_emails": { + | "cardinality": { + | "field": "emails.value" + | } + | } + | } | } + | } |}""".stripMargin.replaceAll("\\s+", "") } @@ -428,76 +433,71 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { |profile_ccm.lastName as lastName, |profile_ccm.postalCode as postalCode, |profile_ccm.birthYear as birthYear - |FROM index join unnest(profiles) as profile_ccm + |FROM index join unnest(index.profiles) as profile_ccm |WHERE |((profile_ccm.postalCode BETWEEN "10" AND "99999") |AND |(profile_ccm.birthYear <= 2000)) |limit 100""".stripMargin) val query = select.query - val queryWithoutSource = query.substring(0, query.indexOf("_source") - 2) + "}" - queryWithoutSource shouldBe + println(query) + query shouldBe """{ - | "query": { - | "bool": { - | "filter": [ - | { - | "nested": { - | "path": "profiles", - | "query": { - | "bool": { - | "filter": [ - | { - | "range": { - | "profiles.postalCode": { - | "gte": "10", - | "lte": "99999" - | } - | } - | }, - | { - | "range": { - | "profiles.birthYear": { - | "lte": 2000 - | } - | } - | } - | ] - | } - | }, - | "inner_hits": { - | "name": "profile_ccm", - | "from": 0, - | "size": 3 - | } + | "query": { + | "bool": { + | "filter": [ + | { + | "nested": { + | "path": "profiles", + | "query": { + | "bool": { + | "filter": [ + | { + | "range": { + | "profiles.postalCode": { + | "gte": "10", + | "lte": "99999" + | } | } - | } - | ] + | }, + | { + | "range": { + | "profiles.birthYear": { + | "lte": 2000 + | } + | } + | } + | ] + | } + | }, + | "inner_hits": { + | "name": "profile_ccm", + | "from": 0, + | "_source": { + | "includes": [ + | "profiles.email", + | "profiles.city", + | "profiles.firstName", + | "profiles.lastName", + | "profiles.postalCode", + | "profiles.birthYear" + | ] + | }, + | "size": 100 + | } + | } | } - | }, - | "from": 0, - | "size": 100 + | ] + | } + | }, + | "from": 0, + | "size": 100, + | "_source": { + | "includes": [ + | "profileId" + | ] + | } |}""".stripMargin.replaceAll("\\s+", "") - val includes = new JsonParser() - .parse(query.substring(query.indexOf("_source") + 9, query.length - 1)) - .asInstanceOf[JsonObject] - .get("includes") - .asInstanceOf[JsonArray] - .iterator() - .asScala - val sourceIncludes: Seq[String] = ( - for (i <- includes) yield i.asInstanceOf[JsonPrimitive].getAsString - ).toSeq - val expectedSourceIncludes = Seq( - "profileId", - "profile_ccm.email", - "profile_ccm.city", - "profile_ccm.firstName", - "profile_ccm.lastName", - "profile_ccm.postalCode", - "profile_ccm.birthYear" - ) - sourceIncludes should contain theSameElementsAs expectedSourceIncludes } it should "exclude fields from select" in { @@ -531,73 +531,36 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "size": 0, | "_source": true, | "aggs": { - | "filtered_agg": { - | "filter": { - | "bool": { - | "filter": [ - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "Country": { - | "value": "USA" - | } - | } - | } - | ] - | } - | }, - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "City": { - | "value": "Berlin" - | } - | } - | } - | ] - | } - | }, - | { - | "match_all": {} - | } - | ] + | "Country": { + | "terms": { + | "field": "Country.keyword", + | "exclude": "USA", + | "order": { + | "_key": "asc" | } | }, | "aggs": { - | "Country": { + | "City": { | "terms": { - | "field": "Country.keyword", + | "field": "City.keyword", + | "exclude": "Berlin", | "order": { - | "Country": "asc" + | "cnt": "desc" | } | }, | "aggs": { - | "City": { - | "terms": { - | "field": "City.keyword", - | "order": { - | "cnt": "desc" - | } - | }, - | "aggs": { - | "cnt": { - | "value_count": { - | "field": "CustomerID" - | } + | "cnt": { + | "value_count": { + | "field": "CustomerID" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "cnt": "cnt" | }, - | "having_filter": { - | "bucket_selector": { - | "buckets_path": { - | "cnt": "cnt" - | }, - | "script": { - | "source": "1 == 1 && 1 == 1 && params.cnt > 1" - | } - | } + | "script": { + | "source": "params.cnt > 1" | } | } | } @@ -622,7 +585,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | max(inner_products.price) as max_price |FROM | stores store - | JOIN UNNEST(store.products LIMIT 10) as inner_products + | JOIN UNNEST(store.products) as inner_products |WHERE | ( | firstName is not null AND @@ -642,231 +605,157 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { |HAVING inner_products.deleted=false AND | inner_products.upForSale=true AND | inner_products.stock > 0 AND - | match (inner_products.name) against ("lasagnes") AND - | match (inner_products.description, inner_products.ingredients) against ("lasagnes") AND + | match ( + | inner_products.name, + | inner_products.description, + | inner_products.ingredients + | ) against ("lasagnes") AND | min(inner_products.price) > 5.0 AND | max(inner_products.price) < 50.0 AND - | inner_products.category <> "coffee"""".stripMargin + | inner_products.category <> "coffee" + | LIMIT 10""".stripMargin ).minScore(1.0) val query = select.query println(query) query shouldBe - """ - |{ - | "query": { - | "bool": { - | "filter": [ - | { - | "bool": { - | "filter": [ - | { - | "exists": { - | "field": "firstName" - | } - | }, - | { - | "exists": { - | "field": "lastName" - | } - | }, - | { - | "exists": { - | "field": "description" - | } - | }, - | { - | "range": { - | "preparationTime": { - | "lte": 120 - | } - | } - | }, - | { - | "term": { - | "deliveryPeriods.dayOfWeek": { - | "value": 6 - | } - | } - | }, - | { - | "bool": { - | "must_not": [ - | { - | "regexp": { - | "blockedCustomers": { - | "value": ".*uuid.*" - | } - | } - | } - | ] - | } - | }, - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "receiptOfOrdersDisabled": { - | "value": true - | } - | } - | } - | ] - | } - | }, - | { - | "bool": { - | "should": [ - | { - | "geo_distance": { - | "distance": "7000m", - | "pickup.location": [ - | 0.0, - | 0.0 - | ] - | } - | }, - | { - | "geo_distance": { - | "distance": "7000m", - | "withdrawals.location": [ - | 0.0, - | 0.0 - | ] - | } - | } - | ] - | } - | } - | ] - | } - | } - | ] - | } - | }, - | "size": 0, - | "min_score": 1.0, - | "_source": true, - | "aggs": { - | "nested_products": { - | "nested": { - | "path": "products" - | }, - | "aggs": { - | "filtered_agg": { - | "filter": { - | "bool": { - | "filter": [ - | { - | "term": { - | "products.deleted": { - | "value": false - | } - | } - | }, - | { - | "term": { - | "products.upForSale": { - | "value": true - | } - | } - | }, - | { - | "range": { - | "products.stock": { - | "gt": 0 - | } - | } - | }, - | { - | "match": { - | "products.name": { - | "query": "lasagnes" - | } - | } - | }, - | { - | "bool": { - | "should": [ - | { - | "match": { - | "products.description": { - | "query": "lasagnes" - | } - | } - | }, - | { - | "match": { - | "products.ingredients": { - | "query": "lasagnes" - | } - | } - | } - | ] - | } - | }, - | { - | "match_all": {} - | }, - | { - | "match_all": {} - | }, - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "products.category": { - | "value": "coffee" - | } - | } - | } - | ] - | } - | } - | ] - | } - | }, - | "aggs": { - | "cat": { - | "terms": { - | "field": "products.category.keyword" - | }, - | "aggs": { - | "min_price": { - | "min": { - | "field": "products.price" - | } - | }, - | "max_price": { - | "max": { - | "field": "products.price" - | } - | }, - | "having_filter": { - | "bucket_selector": { - | "buckets_path": { - | "min_price": "min_price", - | "max_price": "max_price" - | }, - | "script": { - | "source": "1 == 1 && 1 == 1 && 1 == 1 && 1 == 1 && 1 == 1 && params.min_price > 5.0 && params.max_price < 50.0 && 1 == 1" - | } - | } - | } - | } - | } - | } - | } - | } - | } - | } - |}""".stripMargin + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "bool": { + | "filter": [ + | { + | "exists": { + | "field": "firstName" + | } + | }, + | { + | "exists": { + | "field": "lastName" + | } + | }, + | { + | "exists": { + | "field": "description" + | } + | }, + | { + | "range": { + | "preparationTime": { + | "lte": 120 + | } + | } + | }, + | { + | "term": { + | "deliveryPeriods.dayOfWeek": { + | "value": 6 + | } + | } + | }, + | { + | "bool": { + | "must_not": [ + | { + | "regexp": { + | "blockedCustomers": { + | "value": ".*uuid.*" + | } + | } + | } + | ] + | } + | }, + | { + | "bool": { + | "must_not": [ + | { + | "term": { + | "receiptOfOrdersDisabled": { + | "value": true + | } + | } + | } + | ] + | } + | }, + | { + | "bool": { + | "should": [ + | { + | "geo_distance": { + | "distance": "7000m", + | "pickup.location": [ + | 0.0, + | 0.0 + | ] + | } + | }, + | { + | "geo_distance": { + | "distance": "7000m", + | "withdrawals.location": [ + | 0.0, + | 0.0 + | ] + | } + | } + | ] + | } + | } + | ] + | } + | } + | ] + | } + | }, + | "size": 0, + | "min_score": 1.0, + | "_source": true, + | "aggs": { + | "inner_products": { + | "nested": { + | "path": "products" + | }, + | "aggs": { + | "cat": { + | "terms": { + | "field": "products.category.keyword" + | }, + | "aggs": { + | "min_price": { + | "min": { + | "field": "products.price" + | } + | }, + | "max_price": { + | "max": { + | "field": "products.price" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "min_price": "inner_products>min_price", + | "max_price": "inner_products>max_price" + | }, + | "script": { + | "source": "params.min_price > 5.0 && params.max_price < 50.0" + | } + | } + | } + | } + | } + | } + | } + | } + |}""".stripMargin .replaceAll("\\s+", "") .replaceAll("==", " == ") .replaceAll("&&", " && ") - .replaceAll("<", " < ") - .replaceAll(">", " > ") + .replaceAll("<(\\d)", " < $1") + .replaceAll(">(\\d)", " > $1") } @@ -1043,30 +932,23 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "size": 0, | "_source": true, | "aggs": { - | "filtered_agg": { - | "filter": { - | "match_all": {} + | "userId": { + | "terms": { + | "field": "userId.keyword" | }, | "aggs": { - | "userId": { - | "terms": { - | "field": "userId.keyword" - | }, - | "aggs": { - | "lastSeen": { - | "max": { - | "field": "createdAt" - | } + | "lastSeen": { + | "max": { + | "field": "createdAt" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "lastSeen": "lastSeen" | }, - | "having_filter": { - | "bucket_selector": { - | "buckets_path": { - | "lastSeen": "lastSeen" - | }, - | "script": { - | "source": "params.lastSeen > ZonedDateTime.now(ZoneId.of('Z')).minus(7, ChronoUnit.DAYS)" - | } - | } + | "script": { + | "source": "params.lastSeen > ZonedDateTime.now(ZoneId.of('Z')).minus(7, ChronoUnit.DAYS).toInstant().toEpochMilli()" | } | } | } @@ -1094,82 +976,39 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "size": 0, | "_source": true, | "aggs": { - | "filtered_agg": { - | "filter": { - | "bool": { - | "filter": [ - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "Country": { - | "value": "USA" - | } - | } - | } - | ] - | } - | }, - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "City": { - | "value": "Berlin" - | } - | } - | } - | ] - | } - | }, - | { - | "match_all": {} - | }, - | { - | "range": { - | "lastSeen": { - | "gt": "now-7d" - | } - | } - | } - | ] + | "Country": { + | "terms": { + | "field": "Country.keyword", + | "exclude": "USA", + | "order": { + | "_key": "asc" | } | }, | "aggs": { - | "Country": { + | "City": { | "terms": { - | "field": "Country.keyword", - | "order": { - | "Country": "asc" - | } + | "field": "City.keyword", + | "exclude": "Berlin" | }, - | "aggs": { - | "City": { - | "terms": { - | "field": "City.keyword" - | }, - | "aggs": { - | "cnt": { - | "value_count": { - | "field": "CustomerID" - | } - | }, - | "lastSeen": { - | "max": { - | "field": "createdAt" - | } + | "aggs": { + | "cnt": { + | "value_count": { + | "field": "CustomerID" + | } + | }, + | "lastSeen": { + | "max": { + | "field": "createdAt" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "cnt": "cnt", + | "lastSeen": "lastSeen" | }, - | "having_filter": { - | "bucket_selector": { - | "buckets_path": { - | "cnt": "cnt" - | }, - | "script": { - | "source": "1 == 1 && 1 == 1 && params.cnt > 1 && 1 == 1" - | } - | } + | "script": { + | "source": "params.cnt > 1 && params.lastSeen > ZonedDateTime.now(ZoneId.of('Z')).minus(7, ChronoUnit.DAYS).toInstant().toEpochMilli()" | } | } | } @@ -2877,20 +2716,335 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll("; if", ";if") .replaceAll("==", " == ") .replaceAll("\\+", " + ") - .replaceAll("-", " - ") - .replaceAll("\\*", " * ") - .replaceAll("/", " / ") - .replaceAll(">", " > ") + .replaceAll("-", " - ") + .replaceAll("\\*", " * ") + .replaceAll("/", " / ") + .replaceAll(">", " > ") + .replaceAll("<", " < ") + .replaceAll("!=", " != ") + .replaceAll("&&", " && ") + .replaceAll("\\|\\|", " || ") + .replaceAll("(\\d)=", "$1 = ") + } + + it should "handle geo distance as script fields and criteria" in { + val select: ElasticSearchRequest = + SQLQuery(geoDistance) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "bool": { + | "must": [ + | { + | "script": { + | "script": { + | "lang": "painless", + | "source": "(def arg0 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon)) >= 4000000.0", + | "params": { + | "lat": -70.0, + | "lon": 40.0 + | } + | } + | } + | }, + | { + | "geo_distance": { + | "distance": "5000km", + | "toLocation": [ + | 40.0, + | -70.0 + | ] + | } + | } + | ] + | } + | }, + | { + | "script": { + | "script": { + | "lang": "painless", + | "source": "(def arg0 = (!doc.containsKey('fromLocation') || doc['fromLocation'].empty ? null : doc['fromLocation']); def arg1 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null || arg1 == null) ? null : arg0.arcDistance(arg1.lat, arg1.lon)) < 2000000.0" + | } + | } + | }, + | { + | "script": { + | "script": { + | "lang": "painless", + | "source": "0.0 < 1000000.0" + | } + | } + | } + | ] + | } + | }, + | "script_fields": { + | "d1": { + | "script": { + | "lang": "painless", + | "source": "(def arg0 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", + | "params": { + | "lat": -70.0, + | "lon": 40.0 + | } + | } + | }, + | "d2": { + | "script": { + | "lang": "painless", + | "source": "(def arg0 = (!doc.containsKey('fromLocation') || doc['fromLocation'].empty ? null : doc['fromLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", + | "params": { + | "lat": -70.0, + | "lon": 40.0 + | } + | } + | }, + | "d3": { + | "script": { + | "lang": "painless", + | "source": "8318612.0" + | } + | } + | }, + | "_source": true + |}""".stripMargin + .replaceAll("\\s+", "") + .replaceAll("defv", " def v") + .replaceAll("defa", "def a") + .replaceAll("defe", "def e") + .replaceAll("defl", "def l") + .replaceAll("def_", "def _") + .replaceAll("=_", " = _") + .replaceAll(",_", ", _") + .replaceAll(",\\(", ", (") + .replaceAll("if\\(", "if (") + .replaceAll("=\\(", " = (") + .replaceAll(":\\(", " : (") + .replaceAll(",(\\d)", ", $1") + .replaceAll("\\?", " ? ") + .replaceAll(":null", " : null") + .replaceAll("null:", "null : ") + .replaceAll("return", " return ") + .replaceAll(";", "; ") + .replaceAll("; if", ";if") + .replaceAll("==", " == ") + .replaceAll("\\+", " + ") + .replaceAll("\\*", " * ") + .replaceAll("/", " / ") + .replaceAll(">(\\d)", " > $1") + .replaceAll("=(\\d)", "= $1") + .replaceAll(">=", " >=") + .replaceAll("<", " < ") + .replaceAll("!=", " != ") + .replaceAll("&&", " && ") + .replaceAll("\\|\\|", " || ") + .replaceAll("(\\d)=", "$1 = ") + .replaceAll(",params", ", params") + .replaceAll("GeoPoint", " GeoPoint") + .replaceAll("lat,arg", "lat, arg") + } + + it should "handle between with temporal" in { + val select: ElasticSearchRequest = + SQLQuery(betweenTemporal) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "range": { + | "createdAt": { + | "gte": "now-1M/d", + | "lte": "now/d" + | } + | } + | }, + | { + | "bool": { + | "must": [ + | { + | "script": { + | "script": { + | "lang": "painless", + | "source": "def left = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value); left == null ? false : left >= (def e2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern('yyyy-MM-dd')); e2.withDayOfMonth(e2.lengthOfMonth()))" + | } + | } + | }, + | { + | "range": { + | "lastUpdated": { + | "lte": "now/d" + | } + | } + | } + | ] + | } + | } + | ] + | } + | }, + | "_source": { + | "includes": [ + | "*" + | ] + | } + |}""".stripMargin + .replaceAll("\\s+", "") + .replaceAll("\\s+", "") + .replaceAll("defv", " def v") + .replaceAll("defa", "def a") + .replaceAll("defe", "def e") + .replaceAll("defl", "def l") + .replaceAll("def_", "def _") + .replaceAll("=_", " = _") + .replaceAll(",_", ", _") + .replaceAll(",\\(", ", (") + .replaceAll("if\\(", "if (") + .replaceAll(">=", " >= ") + .replaceAll("=\\(", " = (") + .replaceAll(":\\(", " : (") + .replaceAll(",(\\d)", ", $1") + .replaceAll("\\?", " ? ") + .replaceAll(":null", " : null") + .replaceAll("null:", "null : ") + .replaceAll("return", " return ") + .replaceAll(";", "; ") + .replaceAll("; if", ";if") + .replaceAll("==", " == ") + .replaceAll("\\+", " + ") + .replaceAll(">(\\d)", " > $1") + .replaceAll("=(\\d)", "= $1") + .replaceAll("<", " < ") + .replaceAll("!=", " != ") + .replaceAll("&&", " && ") + .replaceAll("\\|\\|", " || ") + .replaceAll("(\\d)=", "$1 = ") + .replaceAll(",params", ", params") + .replaceAll("GeoPoint", " GeoPoint") + .replaceAll("lat,arg", "lat, arg") + .replaceAll("false:", "false : ") + .replaceAll("DateTimeFormatter", " DateTimeFormatter") + } + + it should "handle nested of nested" in { + val select: ElasticSearchRequest = + SQLQuery(nestedOfNested) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "nested": { + | "path": "comments", + | "query": { + | "nested": { + | "path": "comments.replies", + | "query": { + | "bool": { + | "filter": [ + | { + | "match": { + | "comments.content": { + | "query": "Nice" + | } + | } + | }, + | { + | "script": { + | "script": { + | "lang": "painless", + | "source": "def left = (!doc.containsKey('comments.replies.lastUpdated') || doc['comments.replies.lastUpdated'].empty ? null : doc['comments.replies.lastUpdated'].value); left == null ? false : left < (def e2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern('yyyy-MM-dd')); e2.withDayOfMonth(e2.lengthOfMonth()))" + | } + | } + | } + | ] + | } + | }, + | "inner_hits": { + | "name": "matched_replies", + | "from": 0, + | "_source": { + | "includes": [ + | "comments.replies.reply_author", + | "comments.replies.reply_text" + | ] + | }, + | "size": 5 + | } + | } + | }, + | "inner_hits": { + | "name": "matched_comments", + | "from": 0, + | "_source": { + | "includes": [ + | "comments.author", + | "comments.comments" + | ] + | }, + | "size": 5 + | } + | } + | } + | ] + | } + | }, + | "from": 0, + | "size": 5, + | "_source": true + |}""".stripMargin + .replaceAll("\\s+", "") + .replaceAll("\\s+", "") + .replaceAll("\\s+", "") + .replaceAll("defv", " def v") + .replaceAll("defa", "def a") + .replaceAll("defe", "def e") + .replaceAll("defl", "def l") + .replaceAll("def_", "def _") + .replaceAll("=_", " = _") + .replaceAll(",_", ", _") + .replaceAll(",\\(", ", (") + .replaceAll("if\\(", "if (") + .replaceAll(">=", " >= ") + .replaceAll("=\\(", " = (") + .replaceAll(":\\(", " : (") + .replaceAll(",(\\d)", ", $1") + .replaceAll("\\?", " ? ") + .replaceAll(":null", " : null") + .replaceAll("null:", "null : ") + .replaceAll("return", " return ") + .replaceAll(";", "; ") + .replaceAll("; if", ";if") + .replaceAll("==", " == ") + .replaceAll("\\+", " + ") + .replaceAll(">(\\d)", " > $1") + .replaceAll("=(\\d)", "= $1") .replaceAll("<", " < ") .replaceAll("!=", " != ") .replaceAll("&&", " && ") .replaceAll("\\|\\|", " || ") .replaceAll("(\\d)=", "$1 = ") + .replaceAll(",params", ", params") + .replaceAll("GeoPoint", " GeoPoint") + .replaceAll("lat,arg", "lat, arg") + .replaceAll("false:", "false : ") + .replaceAll("DateTimeFormatter", " DateTimeFormatter") } - it should "handle geo distance as script fields and criteria" in { + it should "handle predicate with distinct nested" in { val select: ElasticSearchRequest = - SQLQuery(geoDistance) + SQLQuery(predicateWithDistinctNested) val query = select.query println(query) query shouldBe @@ -2900,81 +3054,69 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "filter": [ | { | "bool": { - | "must": [ + | "must_not": [ | { - | "script": { - | "script": { - | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon)) >= 4000000.0", - | "params": { - | "lat": -70.0, - | "lon": 40.0 + | "nested": { + | "path": "replies", + | "query": { + | "script": { + | "script": { + | "lang": "painless", + | "source": "def left = (!doc.containsKey('replies.lastUpdated') || doc['replies.lastUpdated'].empty ? null : doc['replies.lastUpdated'].value); left == null ? false : left < (def e2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern('yyyy-MM-dd')); e2.withDayOfMonth(e2.lengthOfMonth()))" + | } | } + | }, + | "inner_hits": { + | "name": "matched_replies", + | "from": 0, + | "_source": { + | "includes": [ + | "replies.reply_author", + | "replies.reply_text" + | ] + | }, + | "size": 5 | } | } - | }, + | } + | ], + | "filter": [ | { - | "geo_distance": { - | "distance": "5000km", - | "toLocation": [ - | 40.0, - | -70.0 - | ] + | "nested": { + | "path": "comments", + | "query": { + | "match": { + | "comments.content": { + | "query": "Nice" + | } + | } + | }, + | "inner_hits": { + | "name": "matched_comments", + | "from": 0, + | "_source": { + | "includes": [ + | "comments.author", + | "comments.comments" + | ] + | }, + | "size": 5 + | } | } | } | ] | } - | }, - | { - | "script": { - | "script": { - | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('fromLocation') || doc['fromLocation'].empty ? null : doc['fromLocation']); def arg1 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null || arg1 == null) ? null : arg0.arcDistance(arg1.lat, arg1.lon)) < 2000000.0" - | } - | } - | }, - | { - | "script": { - | "script": { - | "lang": "painless", - | "source": "0.0 < 1000000.0" - | } - | } | } | ] | } | }, - | "script_fields": { - | "d1": { - | "script": { - | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('toLocation') || doc['toLocation'].empty ? null : doc['toLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", - | "params": { - | "lat": -70.0, - | "lon": 40.0 - | } - | } - | }, - | "d2": { - | "script": { - | "lang": "painless", - | "source": "(def arg0 = (!doc.containsKey('fromLocation') || doc['fromLocation'].empty ? null : doc['fromLocation']); (arg0 == null) ? null : arg0.arcDistance(params.lat, params.lon))", - | "params": { - | "lat": -70.0, - | "lon": 40.0 - | } - | } - | }, - | "d3": { - | "script": { - | "lang": "painless", - | "source": "8318612.0" - | } - | } - | }, + | "from": 0, + | "size": 5, | "_source": true |}""".stripMargin .replaceAll("\\s+", "") + .replaceAll("\\s+", "") + .replaceAll("\\s+", "") .replaceAll("defv", " def v") .replaceAll("defa", "def a") .replaceAll("defe", "def e") @@ -2984,6 +3126,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll(",_", ", _") .replaceAll(",\\(", ", (") .replaceAll("if\\(", "if (") + .replaceAll(">=", " >= ") .replaceAll("=\\(", " = (") .replaceAll(":\\(", " : (") .replaceAll(",(\\d)", ", $1") @@ -2995,11 +3138,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll("; if", ";if") .replaceAll("==", " == ") .replaceAll("\\+", " + ") - .replaceAll("\\*", " * ") - .replaceAll("/", " / ") .replaceAll(">(\\d)", " > $1") .replaceAll("=(\\d)", "= $1") - .replaceAll(">=", " >=") .replaceAll("<", " < ") .replaceAll("!=", " != ") .replaceAll("&&", " && ") @@ -3008,11 +3148,13 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll(",params", ", params") .replaceAll("GeoPoint", " GeoPoint") .replaceAll("lat,arg", "lat, arg") + .replaceAll("false:", "false : ") + .replaceAll("DateTimeFormatter", " DateTimeFormatter") } - it should "handle between with temporal" in { + it should "handle nested without criteria" in { val select: ElasticSearchRequest = - SQLQuery(betweenTemporal) + SQLQuery(nestedWithoutCriteria) val query = select.query println(query) query shouldBe @@ -3021,45 +3163,64 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "bool": { | "filter": [ | { - | "range": { - | "createdAt": { - | "gte": "now-1M/d", - | "lte": "now/d" - | } - | } - | }, - | { | "bool": { - | "must": [ + | "filter": [ | { | "script": { | "script": { | "lang": "painless", - | "source": "def left = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value); left == null ? false : left >= (def e2 = LocalDate.parse(\"2025-09-11\", DateTimeFormatter.ofPattern('yyyy-MM-dd')); e2.withDayOfMonth(e2.lengthOfMonth()))" - | } - | } - | }, - | { - | "range": { - | "lastUpdated": { - | "lte": "now/d" + | "source": "def left = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value); left == null ? false : left < ZonedDateTime.now(ZoneId.of('Z')).toLocalDate()" | } | } | } | ] | } + | }, + | { + | "nested": { + | "path": "comments", + | "query": { + | "nested": { + | "path": "comments.replies", + | "query": { + | "match_all": {} + | }, + | "inner_hits": { + | "name": "matched_replies", + | "from": 0, + | "_source": { + | "includes": [ + | "reply_author", + | "reply_text" + | ] + | }, + | "size": 5 + | } + | } + | }, + | "inner_hits": { + | "name": "matched_comments", + | "from": 0, + | "_source": { + | "includes": [ + | "author", + | "comments" + | ] + | }, + | "size": 5 + | } + | } | } | ] | } | }, - | "_source": { - | "includes": [ - | "*" - | ] - | } + | "from": 0, + | "size": 5, + | "_source": true |}""".stripMargin .replaceAll("\\s+", "") .replaceAll("\\s+", "") + .replaceAll("\\s+", "") .replaceAll("defv", " def v") .replaceAll("defa", "def a") .replaceAll("defe", "def e") @@ -3094,4 +3255,146 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll("false:", "false : ") .replaceAll("DateTimeFormatter", " DateTimeFormatter") } + + it should "determine the aggregation context" in { + val select: ElasticSearchRequest = + SQLQuery(determinationOfTheAggregationContext) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "match_all": {} + | }, + | "size": 0, + | "_source": true, + | "aggs": { + | "avg_popularity": { + | "avg": { + | "field": "popularity" + | } + | }, + | "comments": { + | "nested": { + | "path": "comments" + | }, + | "aggs": { + | "avg_comment_likes": { + | "avg": { + | "field": "comments.likes" + | } + | } + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") + } + + it should "handle aggregation with nested of nested context" in { + val select: ElasticSearchRequest = + SQLQuery(aggregationWithNestedOfNestedContext) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "match_all": {} + | }, + | "size": 0, + | "_source": true, + | "aggs": { + | "comments": { + | "nested": { + | "path": "comments" + | }, + | "aggs": { + | "replies": { + | "nested": { + | "path": "comments.replies" + | }, + | "aggs": { + | "avg_reply_likes": { + | "avg": { + | "field": "comments.replies.likes" + | } + | } + | } + | } + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") + } + + it should "handle where filters according to scope" in { + val select: ElasticSearchRequest = + SQLQuery(whereFiltersAccordingToScope) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "status": { + | "value": "active" + | } + | } + | }, + | { + | "nested": { + | "path": "comments", + | "query": { + | "term": { + | "comments.sentiment": { + | "value": "positive" + | } + | } + | }, + | "inner_hits": { + | "name": "comments" + | } + | } + | } + | ] + | } + | }, + | "size": 0, + | "_source": true, + | "aggs": { + | "comments": { + | "nested": { + | "path": "comments" + | }, + | "aggs": { + | "filtered_comments": { + | "filter": { + | "bool": { + | "filter": [ + | { + | "term": { + | "comments.sentiment": { + | "value": "positive" + | } + | } + | } + | ] + | } + | }, + | "aggs": { + | "nb_comments": { + | "value_count": { + | "field": "comments.id" + | } + | } + | } + | } + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") + } + } diff --git a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 6b799e46..1b8950c3 100644 --- a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -2,12 +2,14 @@ package app.softnetwork.elastic.sql.bridge import app.softnetwork.elastic.sql.query.{ Asc, - BucketSelectorScript, - ElasticBoolQuery, - Field, Bucket, + BucketIncludesExcludes, + MetricSelectorScript, + Field, Criteria, Desc, + NestedElement, + NestedElements, SortOrder } import app.softnetwork.elastic.sql.function._ @@ -16,7 +18,6 @@ import com.sksamuel.elastic4s.ElasticApi.{ avgAgg, bucketSelectorAggregation, cardinalityAgg, - filterAgg, maxAgg, minAgg, nestedAggregation, @@ -48,8 +49,9 @@ case class ElasticAggregation( filteredAgg: Option[FilterAggregation] = None, aggType: AggregateFunction, agg: Aggregation, - direction: Option[SortOrder] = None) { - val nested: Boolean = nestedAgg.nonEmpty + direction: Option[SortOrder] = None, + nestedElement: Option[NestedElement] = None) { + val nested: Boolean = nestedElement.nonEmpty val filtered: Boolean = filteredAgg.nonEmpty } @@ -60,7 +62,7 @@ object ElasticAggregation { bucketsDirection: Map[String, SortOrder] ): ElasticAggregation = { import sqlAgg._ - val sourceField = identifier.name + val sourceField = identifier.path val direction = bucketsDirection.get(identifier.identifierName) @@ -102,7 +104,7 @@ object ElasticAggregation { buildScript: (String, Script) => Aggregation ): Aggregation = { if (transformFuncs.nonEmpty) { - val scriptSrc = identifier.painless + val scriptSrc = identifier.painless() val script = Script(scriptSrc).lang("painless") buildScript(aggName, script) } else { @@ -140,7 +142,7 @@ object ElasticAggregation { Array.empty ).copy( scripts = th.fields.filter(_.isScriptField).map(f => - f.sourceField -> Script(f.painless).lang("painless") + f.sourceField -> Script(f.painless()).lang("painless") ).toMap ) .size(limit) sortBy th.orderBy.sorts.map(sort => @@ -165,23 +167,8 @@ object ElasticAggregation { val filteredAggName = "filtered_agg" - val filteredAgg: Option[FilterAggregation] = - having match { - case Some(f) => - val boolQuery = Option(ElasticBoolQuery(group = true)) - Some( - filterAgg( - filteredAggName, - f.asFilter(boolQuery) - .query(Set(identifier.innerHitsName).flatten, boolQuery) - ) - ) - case _ => - None - } - def filtered(): Unit = - filteredAgg match { + having match { case Some(_) => aggPath ++= Seq(filteredAggName) aggPath ++= Seq(aggName) @@ -189,28 +176,55 @@ object ElasticAggregation { aggPath ++= Seq(aggName) } + val nestedElement = identifier.nestedElement + + val nestedElements: Seq[NestedElement] = + nestedElement.map(n => NestedElements.buildNestedTrees(Seq(n))).getOrElse(Nil) + val nestedAgg = - if (identifier.nested) { - val path = sourceField.split("\\.").head - val nestedAgg = s"nested_${identifier.nestedType.getOrElse(aggName)}" - aggPath ++= Seq(nestedAgg) - filtered() - Some(nestedAggregation(nestedAgg, path)) - } else { - filtered() - None + nestedElements match { + case Nil => + None + case nestedElements => + def buildNested(n: NestedElement): NestedAggregation = { + aggPath ++= Seq(n.innerHitsName) + val children = n.children + if (children.nonEmpty) { + val innerAggs = children.map(buildNested) + val combinedAgg = if (innerAggs.size == 1) { + innerAggs.head + } else { + innerAggs.reduceLeft { (agg1, agg2) => + agg1.copy(subaggs = agg1.subaggs ++ Seq(agg2)) + } + } + nestedAggregation( + n.innerHitsName, + n.path + ) subaggs Seq(combinedAgg) + } else { + nestedAggregation( + n.innerHitsName, + n.path + ) + } + } + + Some(buildNested(nestedElements.head)) } + filtered() + ElasticAggregation( aggPath.mkString("."), field, sourceField, distinct = distinct, nestedAgg = nestedAgg, - filteredAgg = filteredAgg, aggType = aggType, agg = _agg, - direction = direction + direction = direction, + nestedElement = nestedElement ) } @@ -223,18 +237,37 @@ object ElasticAggregation { ): Option[TermsAggregation] = { Console.println(bucketsDirection) buckets.reverse.foldLeft(Option.empty[TermsAggregation]) { (current, bucket) => - val agg = { + var agg = { bucketsDirection.get(bucket.identifier.identifierName) match { case Some(direction) => - termsAgg(bucket.name, s"${bucket.identifier.name}.keyword") + termsAgg(bucket.name, s"${bucket.identifier.path}.keyword") .order(Seq(direction match { - case Asc => TermsOrder(bucket.name, asc = true) - case _ => TermsOrder(bucket.name, asc = false) + case Asc => TermsOrder("_key", asc = true) + case _ => TermsOrder("_key", asc = false) })) case None => - termsAgg(bucket.name, s"${bucket.identifier.name}.keyword") + termsAgg(bucket.name, s"${bucket.identifier.path}.keyword") } } + bucket.size.foreach(s => agg = agg.size(s)) + having match { + case Some(criteria) => + criteria.includes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = agg.includeRegex(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = agg.includeExactValues(values.toArray) + case _ => + } + criteria.excludes(bucket, not = false, BucketIncludesExcludes()) match { + case BucketIncludesExcludes(_, Some(regex)) if regex.nonEmpty => + agg = agg.excludeRegex(regex) + case BucketIncludesExcludes(values, _) if values.nonEmpty => + agg = agg.excludeExactValues(values.toArray) + case _ => + } + case _ => + } current match { case Some(subAgg) => Some(agg.copy(subaggs = Seq(subAgg))) case None => @@ -251,12 +284,15 @@ object ElasticAggregation { agg val withHaving = having match { case Some(criteria) => - import BucketSelectorScript._ - val script = toPainless(criteria) - val bucketsPath = extractBucketsPath(criteria) + val script = MetricSelectorScript.metricSelector(criteria) + val bucketsPath = criteria.extractMetricsPath val bucketSelector = - bucketSelectorAggregation("having_filter", Script(script), bucketsPath) + bucketSelectorAggregation( + "having_filter", + Script(script.replaceAll("1 == 1 &&", "").replaceAll("&& 1 == 1", "").trim), + bucketsPath + ) withAggregationOrders.copy(subaggs = aggregations :+ bucketSelector) diff --git a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala index 04c558f2..8cc8150a 100644 --- a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala +++ b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala @@ -1,23 +1,29 @@ package app.softnetwork.elastic.sql.bridge +import app.softnetwork.elastic.sql.operator.AND import app.softnetwork.elastic.sql.query.{ + BetweenExpr, ElasticBoolQuery, ElasticChild, ElasticFilter, - DistanceCriteria, ElasticMatch, ElasticNested, ElasticParent, - BetweenExpr, GenericExpression, InExpr, - IsNotNullExpr, IsNotNullCriteria, + IsNotNullExpr, + IsNullCriteria, IsNullExpr, - IsNullCriteria + NestedElement, + NestedElements, + Predicate } import com.sksamuel.elastic4s.ElasticApi._ -import com.sksamuel.elastic4s.requests.searches.queries.Query +import com.sksamuel.elastic4s.requests.common.FetchSourceContext +import com.sksamuel.elastic4s.requests.searches.queries.{InnerHit, Query} + +import scala.annotation.tailrec case class ElasticQuery(filter: ElasticFilter) { def query( @@ -39,15 +45,98 @@ case class ElasticQuery(filter: ElasticFilter) { criteria.asFilter(currentQuery).query(innerHitsNames, currentQuery) } else { val boolQuery = Option(ElasticBoolQuery(group = true)) - nestedQuery( - relationType.getOrElse(""), - criteria - .asFilter(boolQuery) - .query(innerHitsNames + innerHitsName.getOrElse(""), boolQuery) - ) /*.scoreMode(ScoreMode.None)*/ - .inner( - innerHits(innerHitsName.getOrElse("")).from(0).size(limit.map(_.limit).getOrElse(3)) - ) + val q = criteria + .asFilter(boolQuery) + .query(innerHitsNames + innerHitsName.getOrElse(""), boolQuery) + + NestedElements.buildNestedTrees(criteria.nestedElements) match { + case Nil => + matchAllQuery() + case nestedTrees => + def nestedInner(n: NestedElement): InnerHit = { + var inner = innerHits(n.innerHitsName) + n.size match { + case Some(s) => + inner = inner.from(0).size(s) + case _ => + } + if (n.sources.nonEmpty) { + inner = inner.fetchSource( + FetchSourceContext( + fetchSource = true, + includes = n.sources.map {source => + (n.path.split('.').toSeq ++ Seq(source)).mkString(".") + }.toArray + ) + ) + } + inner + } + + def buildNestedQuery(n: NestedElement, q: Query): Query = { + val children = n.children + if (children.nonEmpty) { + val innerQueries = children.map(child => buildNestedQuery(child, q)) + val combinedQuery = if (innerQueries.size == 1) { + innerQueries.head + } else { + must(innerQueries) + } + nestedQuery( + n.path, + combinedQuery + ) /*.scoreMode(ScoreMode.None)*/ + .inner( + nestedInner(n) + ) + } else { + nestedQuery( + n.path, + q + ) /*.scoreMode(ScoreMode.None)*/ + .inner( + nestedInner(n) + ) + } + } + + criteria match { + case p: Predicate if nestedTrees.size > 1 => + val leftNested = ElasticNested(p.leftCriteria, p.leftCriteria.limit) + val leftBoolQuery = Option(ElasticBoolQuery(group = true)) + val leftQuery = ElasticQuery(leftNested) + .query(innerHitsNames /*++ leftNested.innerHitsName.toSet*/, leftBoolQuery) + + val rightNested = ElasticNested(p.rightCriteria, p.rightCriteria.limit) + val rightBoolQuery = Option(ElasticBoolQuery(group = true)) + val rightQuery = ElasticQuery(rightNested) + .query(innerHitsNames /*++ rightNested.innerHitsName.toSet*/, rightBoolQuery) + + p.operator match { + case AND => + p.not match { + case Some(_) => not(rightQuery).filter(leftQuery) + case _ => must(leftQuery, rightQuery) + } + case _ => + p.not match { + case Some(_) => not(rightQuery).should(leftQuery) + case _ => should(leftQuery, rightQuery) + } + } + case _ => + val boolQuery = Option(ElasticBoolQuery(group = true)) + val q = criteria + .asFilter(boolQuery) + .query(innerHitsNames + innerHitsName.getOrElse(""), boolQuery) + if (nestedTrees.size == 1) { + buildNestedQuery(nestedTrees.head, q) + } else { + val innerQueries = nestedTrees.map(nested => buildNestedQuery(nested, q)) + must(innerQueries) + } + } + } } case child: ElasticChild => import child._ diff --git a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index a93097d8..343da652 100644 --- a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -8,10 +8,11 @@ import app.softnetwork.elastic.sql.query._ import com.sksamuel.elastic4s.ElasticApi import com.sksamuel.elastic4s.ElasticApi._ +import com.sksamuel.elastic4s.requests.common.FetchSourceContext import com.sksamuel.elastic4s.requests.script.Script import com.sksamuel.elastic4s.requests.script.ScriptType.Source -import com.sksamuel.elastic4s.requests.searches.aggs.Aggregation -import com.sksamuel.elastic4s.requests.searches.queries.Query +import com.sksamuel.elastic4s.requests.searches.aggs.{Aggregation, FilterAggregation, NestedAggregation} +import com.sksamuel.elastic4s.requests.searches.queries.{InnerHit, Query} import com.sksamuel.elastic4s.requests.searches.sort.FieldSort import com.sksamuel.elastic4s.requests.searches.{ MultiSearchRequest, @@ -22,6 +23,228 @@ import com.sksamuel.elastic4s.requests.searches.{ import scala.language.implicitConversions package object bridge { + + implicit def requestToNestedFilterAggregation( + request: SQLSearchRequest, + innerHitsName: String + ): Option[FilterAggregation] = + request.where.flatMap(_.criteria) match { + case Some(f) => + f.nestedCriteria(innerHitsName) match { + case Nil => None + case cs => + val boolQuery = ElasticBoolQuery(group = true) + cs.map(c => boolQuery.filter(c.asFilter(Option(boolQuery)))) + Some( + filterAgg( + s"filtered_$innerHitsName", + boolQuery.query(request.aggregates.flatMap(_.identifier.innerHitsName).toSet, Option(boolQuery)) + ) + ) + } + case _ => + None + } + + implicit def requestToFilterAggregation( + request: SQLSearchRequest + ): Option[FilterAggregation] = + request.having.flatMap(_.criteria) match { + case Some(f) => + val boolQuery = Option(ElasticBoolQuery(group = true)) + Some( + filterAgg( + "filtered_agg", + f.asFilter(boolQuery) + .query(request.aggregates.flatMap(_.identifier.innerHitsName).toSet, boolQuery) + ) + ) + case _ => + None + } + + implicit def requestToRootAggregations( + request: SQLSearchRequest + ): Seq[Aggregation] = { + val aggregations = request.aggregates.map( + ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts) + ) + + val notNestedAggregations = aggregations.filterNot(_.nested) + + val rootAggregations = notNestedAggregations match { + case Nil => Nil + case aggs => + val directions: Map[String, SortOrder] = aggs + .filter(_.direction.isDefined) + .map(agg => agg.agg.name -> agg.direction.get) + .toMap + val aggregations = aggs.map(_.agg) + val buckets = ElasticAggregation.buildBuckets( + request.buckets.filterNot(_.nested), + request.sorts -- directions.keys, + aggregations, + directions, + request.having.flatMap(_.criteria) + ) match { + case Some(b) => Seq(b) + case _ => aggregations + } + buckets + } + rootAggregations + } + + implicit def requestToScopedAggregations( + request: SQLSearchRequest + ): Seq[NestedAggregation] = { + val aggregations = request.aggregates.map( + ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts) + ) + + val nestedAggregations: Map[String, Seq[ElasticAggregation]] = aggregations + .filter(_.nested) + .groupBy( + _.nestedElement + .map(_.path) + .getOrElse( + throw new IllegalArgumentException("Nested aggregation must have a nested element") + ) + ) + + val nestedGroupedBuckets = + request.buckets + .filter(_.nested) + .groupBy( + _.nestedBucket.getOrElse( + throw new IllegalArgumentException( + "Nested bucket must have a nested element" + ) + ) + ) + + val havingCriteria = request.having.flatMap(_.criteria) + + val scopedAggregations = NestedElements + .buildNestedTrees( + nestedAggregations.values.flatMap(_.flatMap(_.nestedElement)).toSeq.distinct + ) + .map { tree => + def buildNestedAgg(n: NestedElement): NestedAggregation = { + val elasticAggregations = nestedAggregations.getOrElse(n.path, Seq.empty) + val aggregations = elasticAggregations.map(_.agg) + val directions: Map[String, SortOrder] = + elasticAggregations + .filter(_.direction.isDefined) + .map(elasticAggregation => + elasticAggregation.agg.name -> elasticAggregation.direction.getOrElse(Asc) + ) + .toMap + val buckets: Seq[Aggregation] = + ElasticAggregation.buildBuckets( + nestedGroupedBuckets + .getOrElse(n.innerHitsName, Seq.empty), + request.sorts -- directions.keys, + aggregations, + directions, + havingCriteria + ) match { + case Some(b) => Seq(b) + case _ => aggregations + } + val nestedFilteredAgg: Option[FilterAggregation] = + requestToNestedFilterAggregation(request, n.innerHitsName) + val children = n.children + if (children.nonEmpty) { + val innerAggs = children.map(buildNestedAgg) + val combinedAgg = if (innerAggs.size == 1) { + innerAggs.head + } else { + innerAggs.reduceLeft { (agg1, agg2) => + agg1.copy(subaggs = agg1.subaggs ++ Seq(agg2)) + } + } + nestedAggregation( + n.innerHitsName, + n.path + ) subaggs (nestedFilteredAgg match { + case Some(filteredAgg) => + Seq(filteredAgg subaggs buckets ++ Seq(combinedAgg)) + case _ => buckets ++ Seq(combinedAgg) + }) + } else { + nestedAggregation( + n.innerHitsName, + n.path + ) subaggs (nestedFilteredAgg match { + case Some(filteredAgg) => + Seq(filteredAgg subaggs buckets) + case _ => buckets + }) + } + } + buildNestedAgg(tree) + } + scopedAggregations + } + + implicit def requestToNestedWithoutCriteriaQuery(request: SQLSearchRequest): Option[Query] = + NestedElements.buildNestedTrees(request.nestedElementsWithoutCriteria) match { + case Nil => None + case nestedTrees => + def nestedInner(n: NestedElement): InnerHit = { + var inner = innerHits(n.innerHitsName) + n.size match { + case Some(s) => + inner = inner.from(0).size(s) + case _ => + } + if (n.sources.nonEmpty) { + inner = inner.fetchSource( + FetchSourceContext( + fetchSource = true, + includes = n.sources.toArray + ) + ) + } + inner + } + + def buildNestedQuery(n: NestedElement): Query = { + val children = n.children + if (children.nonEmpty) { + val innerQueries = children.map(child => buildNestedQuery(child)) + val combinedQuery = if (innerQueries.size == 1) { + innerQueries.head + } else { + must(innerQueries) + } + nestedQuery( + n.path, + combinedQuery + ) /*.scoreMode(ScoreMode.None)*/ + .inner( + nestedInner(n) + ) + } else { + nestedQuery( + n.path, + matchAllQuery() + ) /*.scoreMode(ScoreMode.None)*/ + .inner( + nestedInner(n) + ) + } + } + + if (nestedTrees.size == 1) { + Some(buildNestedQuery(nestedTrees.head)) + } else { + val innerQueries = nestedTrees.map(nested => buildNestedQuery(nested)) + Some(boolQuery().filter(innerQueries)) + } + } + implicit def requestToElasticSearchRequest(request: SQLSearchRequest): ElasticSearchRequest = ElasticSearchRequest( request.select.fields, @@ -39,79 +262,43 @@ package object bridge { implicit def requestToSearchRequest(request: SQLSearchRequest): SearchRequest = { import request._ - val notNestedBuckets = buckets.filterNot(_.identifier.nested) - val nestedBuckets = buckets.filter(_.identifier.nested).groupBy(_.nestedBucket.getOrElse("")) - val aggregations = - aggregates.map(ElasticAggregation(_, request.having.flatMap(_.criteria), request.sorts)) - val notNestedAggregations = aggregations.filterNot(_.nested) - val nestedAggregations = - aggregations.filter(_.nested).groupBy(_.nestedAgg.map(_.name).getOrElse("")) + + val rootAggregations = requestToRootAggregations(request) + + val scopedAggregations = requestToScopedAggregations(request) + + val aggregations = rootAggregations ++ scopedAggregations + + val nestedWithoutCriteriaQuery: Option[Query] = requestToNestedWithoutCriteriaQuery(request) + var _search: SearchRequest = search("") query { - where.flatMap(_.criteria.map(_.asQuery())).getOrElse(matchAllQuery()) + where.flatMap(_.criteria.map(_.asQuery())) match { + case Some(c) => + val baseQuery = c + nestedWithoutCriteriaQuery match { + case Some(nc) => boolQuery().filter(baseQuery, nc) + case _ => baseQuery + } + case _ => + nestedWithoutCriteriaQuery.getOrElse(matchAllQuery()) + } } sourceFiltering (fields, excludes) - _search = if (nestedAggregations.nonEmpty) { + _search = if (aggregations.nonEmpty) { _search aggregations { - nestedAggregations.map { case (nested, aggs) => - val first = aggs.head - val aggregations = aggs.map(_.agg) - val aggregationDirections: Map[String, SortOrder] = - aggs - .filter(_.direction.isDefined) - .map(agg => agg.agg.name -> agg.direction.getOrElse(Asc)) - .toMap - val buckets = - ElasticAggregation.buildBuckets( - nestedBuckets.getOrElse(nested, Seq.empty), - request.sorts -- aggregationDirections.keys, - aggregations, - aggregationDirections, - request.having.flatMap(_.criteria) - ) match { - case Some(b) => Seq(b) - case _ => aggregations - } - val filtered: Option[Aggregation] = - first.filteredAgg.map(filtered => filtered.subAggregations(buckets)) - first.nestedAgg.get.subAggregations(filtered.map(Seq(_)).getOrElse(buckets)) - } + aggregations } } else { _search } - _search = notNestedAggregations match { - case Nil => _search - case _ => _search aggregations { - val first = notNestedAggregations.head - val aggregationDirections: Map[String, SortOrder] = notNestedAggregations - .filter(_.direction.isDefined) - .map(agg => agg.agg.name -> agg.direction.get) - .toMap - val aggregations = notNestedAggregations.map(_.agg) - val buckets = ElasticAggregation.buildBuckets( - notNestedBuckets, - request.sorts -- aggregationDirections.keys, - aggregations, - aggregationDirections, - request.having.flatMap(_.criteria) - ) match { - case Some(b) => Seq(b) - case _ => aggregations - } - val filtered: Option[Aggregation] = - first.filteredAgg.map(filtered => filtered.subAggregations(buckets)) - filtered.map(Seq(_)).getOrElse(buckets) - } - } - _search = scriptFields.filterNot(_.aggregation) match { case Nil => _search case _ => _search scriptfields scriptFields.map { field => scriptField( field.scriptName, - Script(script = field.painless) + Script(script = field.painless()) .lang("painless") .scriptType("source") .params(field.identifier.functions.headOption match { @@ -166,7 +353,7 @@ package object bridge { case _ => true })) ) { - return scriptQuery(Script(script = painless).lang("painless").scriptType("source")) + return scriptQuery(Script(script = painless()).lang("painless").scriptType("source")) } // Geo distance special case identifier.functions.headOption match { @@ -380,25 +567,25 @@ package object bridge { case NE | DIFF => not(rangeQuery(identifier.name) gte script lte script) } case _ => - scriptQuery(Script(script = painless).lang("painless").scriptType("source")) + scriptQuery(Script(script = painless()).lang("painless").scriptType("source")) } case _ => - scriptQuery(Script(script = painless).lang("painless").scriptType("source")) + scriptQuery(Script(script = painless()).lang("painless").scriptType("source")) } case _ => matchAllQuery() } } implicit def isNullToQuery( - isNull: IsNullExpr - ): Query = { + isNull: IsNullExpr + ): Query = { import isNull._ not(existsQuery(identifier.name)) } implicit def isNotNullToQuery( - isNotNull: IsNotNullExpr - ): Query = { + isNotNull: IsNotNullExpr + ): Query = { import isNotNull._ existsQuery(identifier.name) } @@ -537,7 +724,7 @@ package object bridge { case _ => scriptQuery( Script( - script = distanceCriteria.painless, + script = distanceCriteria.painless(), lang = Some("painless"), scriptType = Source, params = distance.params @@ -547,33 +734,34 @@ package object bridge { } implicit def matchToQuery( - matchExpression: ElasticMatch - ): Query = { + matchExpression: ElasticMatch + ): Query = { import matchExpression._ matchQuery(identifier.name, value.value) } implicit def criteriaToElasticCriteria( - criteria: Criteria - ): ElasticCriteria = { + criteria: Criteria + ): ElasticCriteria = { ElasticCriteria( criteria ) } implicit def filterToQuery( - filter: ElasticFilter - ): ElasticQuery = { + filter: ElasticFilter + ): ElasticQuery = { ElasticQuery(filter) } implicit def sqlQueryToAggregations( - query: SQLQuery - ): Seq[ElasticAggregation] = { + query: SQLQuery + ): Seq[ElasticAggregation] = { import query._ request .map { case Left(l) => + val filteredAgg: Option[FilterAggregation] = requestToFilterAggregation(l) l.aggregates .map(ElasticAggregation(_, l.having.flatMap(_.criteria), l.sorts)) .map(aggregation => { @@ -599,7 +787,7 @@ package object bridge { } aggregations { val filtered = - aggregation.filteredAgg match { + filteredAgg match { case Some(filtered) => filtered.subAggregations(aggregation.agg) case _ => aggregation.agg } diff --git a/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala b/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala index 4f3aa58f..8da795a9 100644 --- a/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala +++ b/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala @@ -412,80 +412,84 @@ class SQLCriteriaSpec extends AnyFlatSpec with Matchers { } it should "filter nested predicate" in { - asQuery(nestedPredicate) shouldBe """{ - - |"query":{ - | "bool":{ - | "filter" : [ - | { - | "term" : { - | "identifier1" : { - | "value" : 1 - | } - | } - | }, - | { - | "nested" : { - | "path" : "nested", - | "query" : { - | "bool" : { - | "should" : [ - | { - | "range" : { - | "nested.identifier2" : { - | "gt" : 2 - | } - | } - | }, - | { - | "term" : { - | "nested.identifier3" : { - | "value" : 3 - | } - | } - | } - | ] - | } - | }, - | "inner_hits":{"name":"nested","from":0,"size":3} - | } - | } - | ] - | } - | } - |}""".stripMargin.replaceAll("\\s", "") + asQuery(nestedPredicate) shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "identifier1": { + | "value": 1 + | } + | } + | }, + | { + | "nested": { + | "path": "nested", + | "query": { + | "bool": { + | "should": [ + | { + | "range": { + | "nested.identifier2": { + | "gt": 2 + | } + | } + | }, + | { + | "term": { + | "nested.identifier3": { + | "value": 3 + | } + | } + | } + | ] + | } + | }, + | "inner_hits": { + | "name": "nested" + | } + | } + | } + | ] + | } + | } + |}""".stripMargin.replaceAll("\\s", "") } it should "filter nested criteria" in { - asQuery(nestedCriteria) shouldBe """{ - - |"query":{ - | "bool":{ - | "filter" : [ - | { - | "term" : { - | "identifier1" : { - | "value" : 1 - | } - | } - | }, - | { - | "nested" : { - | "path" : "nested", - | "query" : { - | "term" : { - | "nested.identifier3" : { - | "value" : 3 - | } - | } - | }, - | "inner_hits":{"name":"nested","from":0,"size":3} - | } - | } - | ] - | } - | } - |}""".stripMargin.replaceAll("\\s", "") + asQuery(nestedCriteria) shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "identifier1": { + | "value": 1 + | } + | } + | }, + | { + | "nested": { + | "path": "nested", + | "query": { + | "term": { + | "nested.identifier3": { + | "value": 3 + | } + | } + | }, + | "inner_hits": { + | "name": "nested" + | } + | } + | } + | ] + | } + | } + |}""".stripMargin.replaceAll("\\s", "") } it should "filter child predicate" in { @@ -669,36 +673,44 @@ class SQLCriteriaSpec extends AnyFlatSpec with Matchers { } it should "filter nested with between" in { - asQuery(nestedWithBetween) shouldBe """{ - - |"query":{ - | "bool":{"filter":[{"nested" : { - | "path" : "ciblage", - | "query" : { - | "bool" : { - | "filter" : [ - | { - | "range" : { - | "ciblage.Archivage_CreationDate" : { - | "gte" : "NOW-3M/M", - | "lte" : "NOW" - | } - | } - | }, - | { - | "term" : { - | "ciblage.statutComportement" : { - | "value" : 1 - | } - | } - | } - | ] - | } - | }, - | "inner_hits":{"name":"ciblage","from":0,"size":3} - | } - | } - |]}}}""".stripMargin.replaceAll("\\s", "") + asQuery(nestedWithBetween) shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "nested": { + | "path": "ciblage", + | "query": { + | "bool": { + | "filter": [ + | { + | "range": { + | "ciblage.Archivage_CreationDate": { + | "gte": "now-3M/M", + | "lte": "now" + | } + | } + | }, + | { + | "term": { + | "ciblage.statutComportement": { + | "value": 1 + | } + | } + | } + | ] + | } + | }, + | "inner_hits": { + | "name": "ciblage" + | } + | } + | } + | ] + | } + | } + |}""".stripMargin.replaceAll("\\s", "") } it should "filter boolean eq" in { diff --git a/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index c87911ba..86617c50 100644 --- a/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -3,7 +3,6 @@ package app.softnetwork.elastic.sql import app.softnetwork.elastic.sql.bridge._ import app.softnetwork.elastic.sql.Queries._ import app.softnetwork.elastic.sql.query._ -import com.google.gson.{JsonArray, JsonObject, JsonParser, JsonPrimitive} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers @@ -38,29 +37,29 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { result.field shouldBe "c2" result.sources shouldBe Seq[String]("Table") result.query.getOrElse("") shouldBe - """|{ - | "query": { - | "bool": { - | "filter": [ - | { - | "term": { - | "nom": { - | "value": "Nom" - | } - | } - | } - | ] - | } - | }, - | "size": 0, - | "aggs": { - | "c2": { - | "value_count": { - | "field": "id" - | } - | } - | } - |}""".stripMargin.replaceAll("\\s+", "") + """|{ + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "nom": { + | "value": "Nom" + | } + | } + | } + | ] + | } + | }, + | "size": 0, + | "aggs": { + | "c2": { + | "value_count": { + | "field": "id" + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") } it should "perform count distinct" in { @@ -74,45 +73,45 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { result.field shouldBe "c2" result.sources shouldBe Seq[String]("Table") result.query.getOrElse("") shouldBe - """|{ - | "query": { - | "bool": { - | "filter": [ - | { - | "term": { - | "nom": { - | "value": "Nom" - | } - | } - | } - | ] - | } - | }, - | "size": 0, - | "aggs": { - | "c2": { - | "cardinality": { - | "field": "id" - | } - | } - | } - |}""".stripMargin.replaceAll("\\s+", "") + """|{ + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "nom": { + | "value": "Nom" + | } + | } + | } + | ] + | } + | }, + | "size": 0, + | "aggs": { + | "c2": { + | "cardinality": { + | "field": "id" + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") } it should "perform nested count" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(inner_emails.value) as email from index i join unnest(emails) as inner_emails where i.nom = \"Nom\"" + "select count(inner_emails.value) as email from index i join unnest(i.emails) as inner_emails where i.nom = \"Nom\"" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_emails.email" + result.aggName shouldBe "inner_emails.email" result.field shouldBe "email" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe - """{ + """{ | "query": { | "bool": { | "filter": [ @@ -128,7 +127,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_emails": { + | "inner_emails": { | "nested": { | "path": "emails" | }, @@ -147,47 +146,51 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { it should "perform nested count with nested criteria" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(inner_emails.value) as count_emails from index join unnest(emails) as inner_emails join unnest(profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\"))" + "select count(inner_emails.value) as count_emails from index join unnest(index.emails) as inner_emails join unnest(index.profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\"))" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_emails.count_emails" + result.aggName shouldBe "inner_emails.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") - result.query.getOrElse("") shouldBe - """{ + val query = result.query.getOrElse("") + println(query) + query shouldBe + """{ | "query": { - | "bool":{ + | "bool": { | "filter": [ - | { - | "term": { - | "nom": { - | "value": "Nom" - | } + | { + | "term": { + | "nom": { + | "value": "Nom" | } - | }, - | { - | "nested": { - | "path": "profiles", - | "query": { - | "terms": { - | "profiles.postalCode": [ - | "75001", - | "75002" - | ] - | } - | }, - | "inner_hits":{"name":"inner_profiles","from":0,"size":3} + | } + | }, + | { + | "nested": { + | "path": "profiles", + | "query": { + | "terms": { + | "profiles.postalCode": [ + | "75001", + | "75002" + | ] + | } + | }, + | "inner_hits": { + | "name": "inner_profiles" | } | } + | } | ] | } | }, | "size": 0, | "aggs": { - | "nested_emails": { + | "inner_emails": { | "nested": { | "path": "emails" | }, @@ -206,19 +209,21 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { it should "perform nested count with filter" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(inner_emails.value) as count_emails from index join unnest(emails) as inner_emails join unnest(profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\")) having inner_emails.context = \"profile\"" + "select count(inner_emails.value) as count_emails from index join unnest(index.emails) as inner_emails join unnest(index.profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\")) having inner_emails.context = \"profile\"" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_emails.filtered_agg.count_emails" + result.aggName shouldBe "inner_emails.filtered_agg.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") - result.query.getOrElse("") shouldBe - """{ + val query = result.query.getOrElse("") + println(query) + query shouldBe + """{ | "query": { - | "bool":{ + | "bool": { | "filter": [ | { | "term": { @@ -238,7 +243,9 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | ] | } | }, - | "inner_hits":{"name":"inner_profiles","from":0,"size":3} + | "inner_hits": { + | "name": "inner_profiles" + | } | } | } | ] @@ -246,7 +253,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_emails": { + | "inner_emails": { | "nested": { | "path": "emails" | }, @@ -276,18 +283,19 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { it should "perform nested count with \"and not\" operator" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(distinct inner_emails.value) as count_emails from index join unnest(emails) as inner_emails join unnest(profiles) as inner_profiles where ((inner_profiles.postalCode = \"33600\") and (inner_profiles.postalCode <> \"75001\"))" + "select count(distinct inner_emails.value) as count_emails from index join unnest(index.emails) as inner_emails join unnest(index.profiles) as inner_profiles where ((inner_profiles.postalCode = \"33600\") and (inner_profiles.postalCode <> \"75001\"))" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe true - result.aggName shouldBe "nested_emails.count_emails" + result.aggName shouldBe "inner_emails.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") - result.query.getOrElse("") shouldBe - """ - |{ + val query = result.query.getOrElse("") + println(query) + query shouldBe + """{ | "query": { | "bool": { | "filter": [ @@ -321,9 +329,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "inner_hits": { - | "name": "inner_profiles", - | "from": 0, - | "size": 3 + | "name": "inner_profiles" | } | } | } @@ -332,7 +338,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_emails": { + | "inner_emails": { | "nested": { | "path": "emails" | }, @@ -345,159 +351,153 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | } | } - |} - |""".stripMargin.replaceAll("\\s+", "") + |}""".stripMargin.replaceAll("\\s+", "") } it should "perform nested count with date filtering" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(distinct inner_emails.value) as count_distinct_emails from index join unnest(emails) as inner_emails join unnest(profiles) as inner_profiles where inner_profiles.postalCode = \"33600\" and inner_profiles.createdDate <= \"now-35M/M\"" + "select count(distinct inner_emails.value) as count_distinct_emails from index join unnest(index.emails) as inner_emails join unnest(index.profiles) as inner_profiles where inner_profiles.postalCode = \"33600\" and inner_profiles.createdDate <= \"now-35M/M\"" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe true - result.aggName shouldBe "nested_emails.count_distinct_emails" + result.aggName shouldBe "inner_emails.count_distinct_emails" result.field shouldBe "count_distinct_emails" result.sources shouldBe Seq[String]("index") - result.query.getOrElse("") shouldBe - """{ - "query": { - | "bool": { - | "filter": [ - | { - | "nested": { - | "path": "profiles", - | "query": { - | "bool": { - | "filter": [ - | { - | "term": { - | "profiles.postalCode": { - | "value": "33600" - | } - | } - | }, - | { - | "range": { - | "profiles.createdDate": { - | "lte": "now-35M/M" - | } - | } - | } - | ] - | } - | }, - | "inner_hits": { - | "name": "inner_profiles", - | "from": 0, - | "size": 3 - | } - | } - | } - | ] - | } - | }, - | "size": 0, - | "aggs": { - | "nested_emails": { - | "nested": { - | "path": "emails" - | }, - | "aggs": { - | "count_distinct_emails": { - | "cardinality": { - | "field": "emails.value" - | } - | } - | } - | } - | } - |}""".stripMargin.replaceAll("\\s+", "") + val query = result.query.getOrElse("") + println(query) + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "nested": { + | "path": "profiles", + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "profiles.postalCode": { + | "value": "33600" + | } + | } + | }, + | { + | "range": { + | "profiles.createdDate": { + | "lte": "now-35M/M" + | } + | } + | } + | ] + | } + | }, + | "inner_hits": { + | "name": "inner_profiles" + | } + | } + | } + | ] + | } + | }, + | "size": 0, + | "aggs": { + | "inner_emails": { + | "nested": { + | "path": "emails" + | }, + | "aggs": { + | "count_distinct_emails": { + | "cardinality": { + | "field": "emails.value" + | } + | } + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") } it should "perform nested select" in { val select: ElasticSearchRequest = SQLQuery(""" - |SELECT - |profileId, - |profile_ccm.email as email, - |profile_ccm.city as city, - |profile_ccm.firstName as firstName, - |profile_ccm.lastName as lastName, - |profile_ccm.postalCode as postalCode, - |profile_ccm.birthYear as birthYear - |FROM index join unnest(profiles) as profile_ccm - |WHERE - |((profile_ccm.postalCode BETWEEN "10" AND "99999") - |AND - |(profile_ccm.birthYear <= 2000)) - |limit 100""".stripMargin) + |SELECT + |profileId, + |profile_ccm.email as email, + |profile_ccm.city as city, + |profile_ccm.firstName as firstName, + |profile_ccm.lastName as lastName, + |profile_ccm.postalCode as postalCode, + |profile_ccm.birthYear as birthYear + |FROM index join unnest(index.profiles) as profile_ccm + |WHERE + |((profile_ccm.postalCode BETWEEN "10" AND "99999") + |AND + |(profile_ccm.birthYear <= 2000)) + |limit 100""".stripMargin) val query = select.query - val queryWithoutSource = query.substring(0, query.indexOf("_source") - 2) + "}" - queryWithoutSource shouldBe - """{ - | "query": { - | "bool": { - | "filter": [ - | { - | "nested": { - | "path": "profiles", - | "query": { - | "bool": { - | "filter": [ - | { - | "range": { - | "profiles.postalCode": { - | "gte": "10", - | "lte": "99999" - | } - | } - | }, - | { - | "range": { - | "profiles.birthYear": { - | "lte": 2000 - | } - | } - | } - | ] - | } - | }, - | "inner_hits": { - | "name": "profile_ccm", - | "from": 0, - | "size": 3 - | } - | } - | } - | ] - | } - | }, - | "from": 0, - | "size": 100 - |}""".stripMargin.replaceAll("\\s+", "") - val includes = new JsonParser() - .parse(query.substring(query.indexOf("_source") + 9, query.length - 1)) - .asInstanceOf[JsonObject] - .get("includes") - .asInstanceOf[JsonArray] - .iterator() - .asScala - val sourceIncludes: Seq[String] = ( - for (i <- includes) yield i.asInstanceOf[JsonPrimitive].getAsString - ).toSeq - val expectedSourceIncludes = Seq( - "profileId", - "profile_ccm.email", - "profile_ccm.city", - "profile_ccm.firstName", - "profile_ccm.lastName", - "profile_ccm.postalCode", - "profile_ccm.birthYear" - ) - sourceIncludes should contain theSameElementsAs expectedSourceIncludes + println(query) + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "nested": { + | "path": "profiles", + | "query": { + | "bool": { + | "filter": [ + | { + | "range": { + | "profiles.postalCode": { + | "gte": "10", + | "lte": "99999" + | } + | } + | }, + | { + | "range": { + | "profiles.birthYear": { + | "lte": 2000 + | } + | } + | } + | ] + | } + | }, + | "inner_hits": { + | "name": "profile_ccm", + | "from": 0, + | "_source": { + | "includes": [ + | "profiles.email", + | "profiles.postalCode", + | "profiles.firstName", + | "profiles.lastName", + | "profiles.birthYear", + | "profiles.city" + | ] + | }, + | "size": 100 + | } + | } + | } + | ] + | } + | }, + | "from": 0, + | "size": 100, + | "_source": { + | "includes": [ + | "profileId" + | ] + | } + |}""".stripMargin.replaceAll("\\s+", "") } it should "exclude fields from select" in { @@ -506,7 +506,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { except ) select.query shouldBe - """ + """ |{ | "query":{ | "match_all":{} @@ -531,73 +531,36 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "size": 0, | "_source": true, | "aggs": { - | "filtered_agg": { - | "filter": { - | "bool": { - | "filter": [ - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "Country": { - | "value": "USA" - | } - | } - | } - | ] - | } - | }, - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "City": { - | "value": "Berlin" - | } - | } - | } - | ] - | } - | }, - | { - | "match_all": {} - | } - | ] + | "Country": { + | "terms": { + | "field": "Country.keyword", + | "exclude": ["USA"], + | "order": { + | "_key": "asc" | } | }, | "aggs": { - | "Country": { + | "City": { | "terms": { - | "field": "Country.keyword", + | "field": "City.keyword", + | "exclude": ["Berlin"], | "order": { - | "Country": "asc" + | "cnt": "desc" | } | }, | "aggs": { - | "City": { - | "terms": { - | "field": "City.keyword", - | "order": { - | "cnt": "desc" - | } - | }, - | "aggs": { - | "cnt": { - | "value_count": { - | "field": "CustomerID" - | } + | "cnt": { + | "value_count": { + | "field": "CustomerID" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "cnt": "cnt" | }, - | "having_filter": { - | "bucket_selector": { - | "buckets_path": { - | "cnt": "cnt" - | }, - | "script": { - | "source": "1 == 1 && 1 == 1 && params.cnt > 1" - | } - | } + | "script": { + | "source": "params.cnt > 1" | } | } | } @@ -622,7 +585,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | max(inner_products.price) as max_price |FROM | stores store - | JOIN UNNEST(store.products LIMIT 10) as inner_products + | JOIN UNNEST(store.products) as inner_products |WHERE | ( | firstName is not null AND @@ -642,17 +605,20 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { |HAVING inner_products.deleted=false AND | inner_products.upForSale=true AND | inner_products.stock > 0 AND - | match (inner_products.name) against ("lasagnes") AND - | match (inner_products.description, inner_products.ingredients) against ("lasagnes") AND + | match ( + | inner_products.name, + | inner_products.description, + | inner_products.ingredients + | ) against ("lasagnes") AND | min(inner_products.price) > 5.0 AND | max(inner_products.price) < 50.0 AND - | inner_products.category <> "coffee"""".stripMargin + | inner_products.category <> "coffee" + | LIMIT 10""".stripMargin ).minScore(1.0) val query = select.query println(query) query shouldBe - """ - |{ + """{ | "query": { | "bool": { | "filter": [ @@ -748,111 +714,34 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "min_score": 1.0, | "_source": true, | "aggs": { - | "nested_products": { + | "inner_products": { | "nested": { | "path": "products" | }, | "aggs": { - | "filtered_agg": { - | "filter": { - | "bool": { - | "filter": [ - | { - | "term": { - | "products.deleted": { - | "value": false - | } - | } - | }, - | { - | "term": { - | "products.upForSale": { - | "value": true - | } - | } - | }, - | { - | "range": { - | "products.stock": { - | "gt": 0 - | } - | } - | }, - | { - | "match": { - | "products.name": { - | "query": "lasagnes" - | } - | } + | "cat": { + | "terms": { + | "field": "products.category.keyword" + | }, + | "aggs": { + | "min_price": { + | "min": { + | "field": "products.price" + | } + | }, + | "max_price": { + | "max": { + | "field": "products.price" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "min_price": "inner_products>min_price", + | "max_price": "inner_products>max_price" | }, - | { - | "bool": { - | "should": [ - | { - | "match": { - | "products.description": { - | "query": "lasagnes" - | } - | } - | }, - | { - | "match": { - | "products.ingredients": { - | "query": "lasagnes" - | } - | } - | } - | ] - | } - | }, - | { - | "match_all": {} - | }, - | { - | "match_all": {} - | }, - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "products.category": { - | "value": "coffee" - | } - | } - | } - | ] - | } - | } - | ] - | } - | }, - | "aggs": { - | "cat": { - | "terms": { - | "field": "products.category.keyword" - | }, - | "aggs": { - | "min_price": { - | "min": { - | "field": "products.price" - | } - | }, - | "max_price": { - | "max": { - | "field": "products.price" - | } - | }, - | "having_filter": { - | "bucket_selector": { - | "buckets_path": { - | "min_price": "min_price", - | "max_price": "max_price" - | }, - | "script": { - | "source": "1 == 1 && 1 == 1 && 1 == 1 && 1 == 1 && 1 == 1 && params.min_price > 5.0 && params.max_price < 50.0 && 1 == 1" - | } - | } + | "script": { + | "source": "params.min_price > 5.0 && params.max_price < 50.0" | } | } | } @@ -861,11 +750,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | } | } - |}""".stripMargin.replaceAll("\\s+", "") + |}""".stripMargin + .replaceAll("\\s+", "") .replaceAll("==", " == ") .replaceAll("&&", " && ") - .replaceAll("<", " < ") - .replaceAll(">", " > ") + .replaceAll("<(\\d)", " < $1") + .replaceAll(">(\\d)", " > $1") } @@ -892,7 +782,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier" | ] | } - |}""".stripMargin.replaceAll("\\s", "") + |}""".stripMargin + .replaceAll("\\s", "") .replaceAll("defv", "def v") .replaceAll("defe", "def e") .replaceAll("if\\(", "if (") @@ -982,46 +873,47 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { SQLQuery(filterWithTimeAndInterval) val query = select.query println(query) - """{ - | "query": { - | "bool": { - | "filter": [ - | { - | "range": { - | "createdAt": { - | "lt": "now/s" - | } - | } - | }, - | { - | "range": { - | "createdAt": { - | "gte": "now-10m/s" - | } - | } - | } - | ] - | } - | }, - | "_source": { - | "includes": [ - | "*" - | ] - | } - |}""".stripMargin - .replaceAll("\\s", "") - .replaceAll("ChronoUnit", " ChronoUnit") - .replaceAll(">=", " >= ") - .replaceAll("<", " < ") - .replaceAll("\\|\\|", " || ") - .replaceAll("null:", "null : ") - .replaceAll("false:", "false : ") - .replaceAll(":null", " : null ") - .replaceAll("\\?", " ? ") - .replaceAll("==", " == ") - .replaceAll("\\);", "); ") - .replaceAll("=\\(", " = (") - .replaceAll("defl", "def l") + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "range": { + | "createdAt": { + | "lt": "now/s" + | } + | } + | }, + | { + | "range": { + | "createdAt": { + | "gte": "now-10m/s" + | } + | } + | } + | ] + | } + | }, + | "_source": { + | "includes": [ + | "*" + | ] + | } + |}""".stripMargin + .replaceAll("\\s", "") + .replaceAll("ChronoUnit", " ChronoUnit") + .replaceAll(">=", " >= ") + .replaceAll("<", " < ") + .replaceAll("\\|\\|", " || ") + .replaceAll("null:", "null : ") + .replaceAll("false:", "false : ") + .replaceAll(":null", " : null ") + .replaceAll("\\?", " ? ") + .replaceAll("==", " == ") + .replaceAll("\\);", "); ") + .replaceAll("=\\(", " = (") + .replaceAll("defl", "def l") } it should "handle having with date functions" in { @@ -1040,37 +932,31 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "size": 0, | "_source": true, | "aggs": { - | "filtered_agg": { - | "filter": { - | "match_all": {} + | "userId": { + | "terms": { + | "field": "userId.keyword" | }, | "aggs": { - | "userId": { - | "terms": { - | "field": "userId.keyword" - | }, - | "aggs": { - | "lastSeen": { - | "max": { - | "field": "createdAt" - | } + | "lastSeen": { + | "max": { + | "field": "createdAt" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "lastSeen": "lastSeen" | }, - | "having_filter": { - | "bucket_selector": { - | "buckets_path": { - | "lastSeen": "lastSeen" - | }, - | "script": { - | "source": "params.lastSeen > ZonedDateTime.now(ZoneId.of('Z')).minus(7, ChronoUnit.DAYS)" - | } - | } + | "script": { + | "source": "params.lastSeen > ZonedDateTime.now(ZoneId.of('Z')).minus(7, ChronoUnit.DAYS).toInstant().toEpochMilli()" | } | } | } | } | } | } - |}""".stripMargin.replaceAll("\\s", "") + |}""".stripMargin + .replaceAll("\\s", "") .replaceAll("ChronoUnit", " ChronoUnit") .replaceAll("!=", " != ") .replaceAll("&&", " && ") @@ -1079,7 +965,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { it should "handle group by with having and date time functions" in { val select: ElasticSearchRequest = - SQLQuery(groupByWithHavingAndDateTimeFunctions) + SQLQuery(groupByWithHavingAndDateTimeFunctions.replace("GROUP BY 3, 2", "GROUP BY 3, 2")) val query = select.query println(query) query shouldBe @@ -1090,82 +976,39 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "size": 0, | "_source": true, | "aggs": { - | "filtered_agg": { - | "filter": { - | "bool": { - | "filter": [ - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "Country": { - | "value": "USA" - | } - | } - | } - | ] - | } - | }, - | { - | "bool": { - | "must_not": [ - | { - | "term": { - | "City": { - | "value": "Berlin" - | } - | } - | } - | ] - | } - | }, - | { - | "match_all": {} - | }, - | { - | "range": { - | "lastSeen": { - | "gt": "now-7d" - | } - | } - | } - | ] + | "Country": { + | "terms": { + | "field": "Country.keyword", + | "exclude": ["USA"], + | "order": { + | "_key": "asc" | } | }, | "aggs": { - | "Country": { + | "City": { | "terms": { - | "field": "Country.keyword", - | "order": { - | "Country": "asc" - | } + | "field": "City.keyword", + | "exclude": ["Berlin"] | }, | "aggs": { - | "City": { - | "terms": { - | "field": "City.keyword" - | }, - | "aggs": { - | "cnt": { - | "value_count": { - | "field": "CustomerID" - | } - | }, - | "lastSeen": { - | "max": { - | "field": "createdAt" - | } + | "cnt": { + | "value_count": { + | "field": "CustomerID" + | } + | }, + | "lastSeen": { + | "max": { + | "field": "createdAt" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "cnt": "cnt", + | "lastSeen": "lastSeen" | }, - | "having_filter": { - | "bucket_selector": { - | "buckets_path": { - | "cnt": "cnt" - | }, - | "script": { - | "source": "1 == 1 && 1 == 1 && params.cnt > 1 && 1 == 1" - | } - | } + | "script": { + | "source": "params.cnt > 1 && params.lastSeen > ZonedDateTime.now(ZoneId.of('Z')).minus(7, ChronoUnit.DAYS).toInstant().toEpochMilli()" | } | } | } @@ -1174,7 +1017,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | } | } - |}""".stripMargin.replaceAll("\\s", "") + |}""".stripMargin + .replaceAll("\\s", "") .replaceAll("ChronoUnit", " ChronoUnit") .replaceAll("==", " == ") .replaceAll("!=", " != ") @@ -1239,7 +1083,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll("null:", "null : ") .replaceAll("return", " return ") .replaceAll(";", "; ") - .replaceAll("ChronoUnit", " ChronoUnit") + .replaceAll(",ChronoUnit", ", ChronoUnit") .replaceAll("==", " == ") .replaceAll("!=", " != ") .replaceAll("&&", " && ") @@ -1439,7 +1283,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier" | ] | } - |}""".stripMargin.replaceAll("\\s", "") + |}""".stripMargin + .replaceAll("\\s", "") .replaceAll("defv", "def v") .replaceAll("defe", "def e") .replaceAll("defs", "def s") @@ -1489,7 +1334,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier" | ] | } - |}""".stripMargin.replaceAll("\\s", "") + |}""".stripMargin + .replaceAll("\\s", "") .replaceAll("defv", "def v") .replaceAll("defe", "def e") .replaceAll("defs", "def s") @@ -1539,7 +1385,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier" | ] | } - |}""".stripMargin.replaceAll("\\s+", "") + |}""".stripMargin + .replaceAll("\\s+", "") .replaceAll("defv", "def v") .replaceAll("defe", "def e") .replaceAll("defs", "def s") @@ -1589,7 +1436,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier" | ] | } - |}""".stripMargin.replaceAll("\\s+", "") + |}""".stripMargin + .replaceAll("\\s+", "") .replaceAll("defv", "def v") .replaceAll("defe", "def e") .replaceAll("defs", "def s") @@ -1627,7 +1475,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "_source": true - |}""".stripMargin.replaceAll("\\s+", "") + |}""".stripMargin + .replaceAll("\\s+", "") .replaceAll("defv", "def v") .replaceAll("defe", "def e") .replaceAll("defs", "def s") @@ -1668,7 +1517,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "identifier" | ] | } - |}""".stripMargin.replaceAll("\\s+", "") + |}""".stripMargin + .replaceAll("\\s+", "") .replaceAll("defv", "def v") .replaceAll("defe", "def e") .replaceAll("defs", "def s") @@ -3083,4 +2933,468 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll("false:", "false : ") .replaceAll("DateTimeFormatter", " DateTimeFormatter") } + + it should "handle nested of nested" in { + val select: ElasticSearchRequest = + SQLQuery(nestedOfNested) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "nested": { + | "path": "comments", + | "query": { + | "nested": { + | "path": "comments.replies", + | "query": { + | "bool": { + | "filter": [ + | { + | "match": { + | "comments.content": { + | "query": "Nice" + | } + | } + | }, + | { + | "script": { + | "script": { + | "lang": "painless", + | "source": "def left = (!doc.containsKey('comments.replies.lastUpdated') || doc['comments.replies.lastUpdated'].empty ? null : doc['comments.replies.lastUpdated'].value); left == null ? false : left < (def e2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern('yyyy-MM-dd')); e2.withDayOfMonth(e2.lengthOfMonth()))" + | } + | } + | } + | ] + | } + | }, + | "inner_hits": { + | "name": "matched_replies", + | "from": 0, + | "_source": { + | "includes": [ + | "comments.replies.reply_author", + | "comments.replies.reply_text" + | ] + | }, + | "size": 5 + | } + | } + | }, + | "inner_hits": { + | "name": "matched_comments", + | "from": 0, + | "_source": { + | "includes": [ + | "comments.author", + | "comments.comments" + | ] + | }, + | "size": 5 + | } + | } + | } + | ] + | } + | }, + | "from": 0, + | "size": 5, + | "_source": true + |}""".stripMargin + .replaceAll("\\s+", "") + .replaceAll("\\s+", "") + .replaceAll("\\s+", "") + .replaceAll("defv", " def v") + .replaceAll("defa", "def a") + .replaceAll("defe", "def e") + .replaceAll("defl", "def l") + .replaceAll("def_", "def _") + .replaceAll("=_", " = _") + .replaceAll(",_", ", _") + .replaceAll(",\\(", ", (") + .replaceAll("if\\(", "if (") + .replaceAll(">=", " >= ") + .replaceAll("=\\(", " = (") + .replaceAll(":\\(", " : (") + .replaceAll(",(\\d)", ", $1") + .replaceAll("\\?", " ? ") + .replaceAll(":null", " : null") + .replaceAll("null:", "null : ") + .replaceAll("return", " return ") + .replaceAll(";", "; ") + .replaceAll("; if", ";if") + .replaceAll("==", " == ") + .replaceAll("\\+", " + ") + .replaceAll(">(\\d)", " > $1") + .replaceAll("=(\\d)", "= $1") + .replaceAll("<", " < ") + .replaceAll("!=", " != ") + .replaceAll("&&", " && ") + .replaceAll("\\|\\|", " || ") + .replaceAll("(\\d)=", "$1 = ") + .replaceAll(",params", ", params") + .replaceAll("GeoPoint", " GeoPoint") + .replaceAll("lat,arg", "lat, arg") + .replaceAll("false:", "false : ") + .replaceAll("DateTimeFormatter", " DateTimeFormatter") + } + + it should "handle predicate with distinct nested" in { + val select: ElasticSearchRequest = + SQLQuery(predicateWithDistinctNested) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "bool": { + | "must_not": [ + | { + | "nested": { + | "path": "replies", + | "query": { + | "script": { + | "script": { + | "lang": "painless", + | "source": "def left = (!doc.containsKey('replies.lastUpdated') || doc['replies.lastUpdated'].empty ? null : doc['replies.lastUpdated'].value); left == null ? false : left < (def e2 = LocalDate.parse(\"2025-09-10\", DateTimeFormatter.ofPattern('yyyy-MM-dd')); e2.withDayOfMonth(e2.lengthOfMonth()))" + | } + | } + | }, + | "inner_hits": { + | "name": "matched_replies", + | "from": 0, + | "_source": { + | "includes": [ + | "replies.reply_author", + | "replies.reply_text" + | ] + | }, + | "size": 5 + | } + | } + | } + | ], + | "filter": [ + | { + | "nested": { + | "path": "comments", + | "query": { + | "match": { + | "comments.content": { + | "query": "Nice" + | } + | } + | }, + | "inner_hits": { + | "name": "matched_comments", + | "from": 0, + | "_source": { + | "includes": [ + | "comments.author", + | "comments.comments" + | ] + | }, + | "size": 5 + | } + | } + | } + | ] + | } + | } + | ] + | } + | }, + | "from": 0, + | "size": 5, + | "_source": true + |}""".stripMargin + .replaceAll("\\s+", "") + .replaceAll("\\s+", "") + .replaceAll("\\s+", "") + .replaceAll("defv", " def v") + .replaceAll("defa", "def a") + .replaceAll("defe", "def e") + .replaceAll("defl", "def l") + .replaceAll("def_", "def _") + .replaceAll("=_", " = _") + .replaceAll(",_", ", _") + .replaceAll(",\\(", ", (") + .replaceAll("if\\(", "if (") + .replaceAll(">=", " >= ") + .replaceAll("=\\(", " = (") + .replaceAll(":\\(", " : (") + .replaceAll(",(\\d)", ", $1") + .replaceAll("\\?", " ? ") + .replaceAll(":null", " : null") + .replaceAll("null:", "null : ") + .replaceAll("return", " return ") + .replaceAll(";", "; ") + .replaceAll("; if", ";if") + .replaceAll("==", " == ") + .replaceAll("\\+", " + ") + .replaceAll(">(\\d)", " > $1") + .replaceAll("=(\\d)", "= $1") + .replaceAll("<", " < ") + .replaceAll("!=", " != ") + .replaceAll("&&", " && ") + .replaceAll("\\|\\|", " || ") + .replaceAll("(\\d)=", "$1 = ") + .replaceAll(",params", ", params") + .replaceAll("GeoPoint", " GeoPoint") + .replaceAll("lat,arg", "lat, arg") + .replaceAll("false:", "false : ") + .replaceAll("DateTimeFormatter", " DateTimeFormatter") + } + + it should "handle nested without criteria" in { + val select: ElasticSearchRequest = + SQLQuery(nestedWithoutCriteria) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "bool": { + | "filter": [ + | { + | "script": { + | "script": { + | "lang": "painless", + | "source": "def left = (!doc.containsKey('lastUpdated') || doc['lastUpdated'].empty ? null : doc['lastUpdated'].value); left == null ? false : left < ZonedDateTime.now(ZoneId.of('Z')).toLocalDate()" + | } + | } + | } + | ] + | } + | }, + | { + | "nested": { + | "path": "comments", + | "query": { + | "nested": { + | "path": "comments.replies", + | "query": { + | "match_all": {} + | }, + | "inner_hits": { + | "name": "matched_replies", + | "from": 0, + | "_source": { + | "includes": [ + | "reply_author", + | "reply_text" + | ] + | }, + | "size": 5 + | } + | } + | }, + | "inner_hits": { + | "name": "matched_comments", + | "from": 0, + | "_source": { + | "includes": [ + | "author", + | "comments" + | ] + | }, + | "size": 5 + | } + | } + | } + | ] + | } + | }, + | "from": 0, + | "size": 5, + | "_source": true + |}""".stripMargin + .replaceAll("\\s+", "") + .replaceAll("\\s+", "") + .replaceAll("\\s+", "") + .replaceAll("defv", " def v") + .replaceAll("defa", "def a") + .replaceAll("defe", "def e") + .replaceAll("defl", "def l") + .replaceAll("def_", "def _") + .replaceAll("=_", " = _") + .replaceAll(",_", ", _") + .replaceAll(",\\(", ", (") + .replaceAll("if\\(", "if (") + .replaceAll(">=", " >= ") + .replaceAll("=\\(", " = (") + .replaceAll(":\\(", " : (") + .replaceAll(",(\\d)", ", $1") + .replaceAll("\\?", " ? ") + .replaceAll(":null", " : null") + .replaceAll("null:", "null : ") + .replaceAll("return", " return ") + .replaceAll(";", "; ") + .replaceAll("; if", ";if") + .replaceAll("==", " == ") + .replaceAll("\\+", " + ") + .replaceAll(">(\\d)", " > $1") + .replaceAll("=(\\d)", "= $1") + .replaceAll("<", " < ") + .replaceAll("!=", " != ") + .replaceAll("&&", " && ") + .replaceAll("\\|\\|", " || ") + .replaceAll("(\\d)=", "$1 = ") + .replaceAll(",params", ", params") + .replaceAll("GeoPoint", " GeoPoint") + .replaceAll("lat,arg", "lat, arg") + .replaceAll("false:", "false : ") + .replaceAll("DateTimeFormatter", " DateTimeFormatter") + } + + it should "determine the aggregation context" in { + val select: ElasticSearchRequest = + SQLQuery(determinationOfTheAggregationContext) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "match_all": {} + | }, + | "size": 0, + | "_source": true, + | "aggs": { + | "avg_popularity": { + | "avg": { + | "field": "popularity" + | } + | }, + | "comments": { + | "nested": { + | "path": "comments" + | }, + | "aggs": { + | "avg_comment_likes": { + | "avg": { + | "field": "comments.likes" + | } + | } + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") + } + + it should "handle aggregation with nested of nested context" in { + val select: ElasticSearchRequest = + SQLQuery(aggregationWithNestedOfNestedContext) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "match_all": {} + | }, + | "size": 0, + | "_source": true, + | "aggs": { + | "comments": { + | "nested": { + | "path": "comments" + | }, + | "aggs": { + | "replies": { + | "nested": { + | "path": "comments.replies" + | }, + | "aggs": { + | "avg_reply_likes": { + | "avg": { + | "field": "comments.replies.likes" + | } + | } + | } + | } + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") + } + + it should "handle where filters according to scope" in { + val select: ElasticSearchRequest = + SQLQuery(whereFiltersAccordingToScope) + val query = select.query + println(query) + query shouldBe + """{ + | "query": { + | "bool": { + | "filter": [ + | { + | "term": { + | "status": { + | "value": "active" + | } + | } + | }, + | { + | "nested": { + | "path": "comments", + | "query": { + | "term": { + | "comments.sentiment": { + | "value": "positive" + | } + | } + | }, + | "inner_hits": { + | "name": "comments" + | } + | } + | } + | ] + | } + | }, + | "size": 0, + | "_source": true, + | "aggs": { + | "comments": { + | "nested": { + | "path": "comments" + | }, + | "aggs": { + | "filtered_comments": { + | "filter": { + | "bool": { + | "filter": [ + | { + | "term": { + | "comments.sentiment": { + | "value": "positive" + | } + | } + | } + | ] + | } + | }, + | "aggs": { + | "nb_comments": { + | "value_count": { + | "field": "comments.id" + | } + | } + | } + | } + | } + | } + | } + |}""".stripMargin.replaceAll("\\s+", "") + } + } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala index 3b7669ee..babe2357 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala @@ -45,7 +45,7 @@ package object aggregate { def topHits: TopHits def limit: Option[Limit] - lazy val buckets: Seq[Bucket] = partitionBy.map(Bucket) + lazy val buckets: Seq[Bucket] = partitionBy.map(identifier => Bucket(identifier, None)) lazy val bucketNames: Map[String, Bucket] = buckets.map { b => b.identifier.identifierName -> b @@ -55,7 +55,7 @@ package object aggregate { val partitionByStr = if (partitionBy.nonEmpty) s"$PARTITION_BY ${partitionBy.mkString(", ")}" else "" - s"$topHits($identifier) $OVER ($partitionByStr$orderBy${asString(limit)})" + s"$topHits($identifier) $OVER ($partitionByStr$orderBy)" } override def toSQL(base: String): String = sql @@ -65,7 +65,8 @@ package object aggregate { def withFields(fields: Seq[Field]): TopHitsAggregation def update(request: SQLSearchRequest): TopHitsAggregation = { - val updated = this.withPartitionBy(partitionBy = partitionBy.map(_.update(request))) + val updated = this + .withPartitionBy(partitionBy = partitionBy.map(_.update(request))) updated.withFields( fields = request.select.fields .filterNot(field => @@ -81,9 +82,9 @@ package object aggregate { identifier: Identifier, partitionBy: Seq[Identifier] = Seq.empty, orderBy: OrderBy, - fields: Seq[Field] = Seq.empty, - limit: Option[Limit] = None + fields: Seq[Field] = Seq.empty ) extends TopHitsAggregation { + override def limit: Option[Limit] = Some(Limit(1, None)) override def topHits: TopHits = FIRST_VALUE override def withPartitionBy(partitionBy: Seq[Identifier]): TopHitsAggregation = this.copy(partitionBy = partitionBy) @@ -94,9 +95,9 @@ package object aggregate { identifier: Identifier, partitionBy: Seq[Identifier] = Seq.empty, orderBy: OrderBy, - fields: Seq[Field] = Seq.empty, - limit: Option[Limit] = None + fields: Seq[Field] = Seq.empty ) extends TopHitsAggregation { + override def limit: Option[Limit] = Some(Limit(1, None)) override def topHits: TopHits = LAST_VALUE override def withPartitionBy(partitionBy: Seq[Identifier]): TopHitsAggregation = this.copy(partitionBy = partitionBy) @@ -114,6 +115,12 @@ package object aggregate { override def withPartitionBy(partitionBy: Seq[Identifier]): TopHitsAggregation = this.copy(partitionBy = partitionBy) override def withFields(fields: Seq[Field]): TopHitsAggregation = this + override def update(request: SQLSearchRequest): TopHitsAggregation = super + .update(request) + .asInstanceOf[ArrayAgg] + .copy( + limit = limit.orElse(request.limit) + ) } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/cond/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/cond/package.scala index c6a27d5b..7a870dbc 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/cond/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/cond/package.scala @@ -7,7 +7,7 @@ import app.softnetwork.elastic.sql.query.Expression package object cond { sealed trait ConditionalOp extends PainlessScript with TokenRegex { - override def painless: String = sql + override def painless(): String = sql } case object Coalesce extends Expr("COALESCE") with ConditionalOp @@ -32,7 +32,7 @@ package object cond { override def outputType: SQLBool = SQLTypes.Boolean - override def toPainless(base: String, idx: Int): String = s"($base$painless)" + override def toPainless(base: String, idx: Int): String = s"($base${painless()})" } case class IsNull(identifier: Identifier) extends ConditionalFunction[SQLAny] { @@ -44,12 +44,12 @@ package object cond { override def toSQL(base: String): String = sql - override def painless: String = s" == null" + override def painless(): String = s" == null" override def toPainless(base: String, idx: Int): String = { if (nullable) - s"(def e$idx = $base; e$idx$painless)" + s"(def e$idx = $base; e$idx${painless()})" else - s"$base$painless" + s"$base${painless()}" } } @@ -62,12 +62,12 @@ package object cond { override def toSQL(base: String): String = sql - override def painless: String = s" != null" + override def painless(): String = s" != null" override def toPainless(base: String, idx: Int): String = { if (nullable) - s"(def e$idx = $base; e$idx$painless)" + s"(def e$idx = $base; e$idx${painless()})" else - s"$base$painless" + s"$base${painless()}" } } @@ -99,9 +99,9 @@ package object cond { else Right(()) } - override def toPainless(base: String, idx: Int): String = s"$base$painless" + override def toPainless(base: String, idx: Int): String = s"$base${painless()}" - override def painless: String = { + override def painless(): String = { require(values.nonEmpty, "COALESCE requires at least one argument") val checks = values @@ -183,7 +183,7 @@ package object cond { else Right(()) } - override def painless: String = { + override def painless(): String = { val base = expression match { case Some(expr) => @@ -208,11 +208,11 @@ package object cond { } else { res match { case i: Identifier if i.name == name && cond.isInstanceOf[Identifier] => - i.nullable = false + i.withNullable(false) if (cond.asInstanceOf[Identifier].functions.isEmpty) s"def val$idx = $c; if (expr == val$idx) return ${SQLTypeUtils.coerce(i.toPainless(s"val$idx"), i.baseType, out, nullable = false)};" else { - cond.asInstanceOf[Identifier].nullable = false + cond.asInstanceOf[Identifier].withNullable(false) s"def e$idx = ${i.checkNotNull}; def val$idx = e$idx != null ? ${SQLTypeUtils .coerce(cond.asInstanceOf[Identifier].toPainless(s"e$idx"), cond.baseType, out, nullable = false)} : null; if (expr == val$idx) return ${SQLTypeUtils .coerce(i.toPainless(s"e$idx"), i.baseType, out, nullable = false)};" @@ -226,7 +226,7 @@ package object cond { val r = res match { case i: Identifier if i.name == name && cond.isInstanceOf[Expression] => - i.nullable = false + i.withNullable(false) SQLTypeUtils.coerce(i.toPainless("left"), i.baseType, out, nullable = false) case _ => SQLTypeUtils.coerce(res, out) } @@ -240,7 +240,7 @@ package object cond { s"{ $base$cases $defaultCase }" } - override def toPainless(base: String, idx: Int): String = s"$base$painless" + override def toPainless(base: String, idx: Int): String = s"$base${painless()}" override def nullable: Boolean = conditions.exists { case (_, res) => res.nullable } || default.forall(_.nullable) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/convert/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/convert/package.scala index a6be2a65..0f74f7b9 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/convert/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/convert/package.scala @@ -19,7 +19,7 @@ package object convert { //override def nullable: Boolean = value.nullable - override def painless: String = SQLTypeUtils.coerce(value, targetType) + override def painless(): String = SQLTypeUtils.coerce(value, targetType) override def toPainless(base: String, idx: Int): String = { val ret = SQLTypeUtils.coerce(base, value.baseType, targetType, value.nullable) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/geo/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/geo/package.scala index 3cff8405..a574ff0c 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/geo/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/geo/package.scala @@ -57,7 +57,7 @@ package object geo { case object Distance extends Expr("ST_DISTANCE") with Function with Operator { override def words: List[String] = List(sql, "DISTANCE") - override def painless: String = ".arcDistance" + override def painless(): String = ".arcDistance" def haversine(lat1: Double, lon1: Double, lat2: Double, lon2: Double): Double = { val R = 6371e3 // Radius of the earth in meters @@ -125,7 +125,7 @@ package object geo { else Map.empty - override def painless: String = { + override def painless(): String = { val nullCheck = identifiers.zipWithIndex .map { case (_, i) => s"arg$i == null" } @@ -141,7 +141,7 @@ package object geo { val ret = if (oneIdentifier) { - s"arg0${fun.map(_.painless).getOrElse("")}(params.lat, params.lon)" + s"arg0${fun.map(_.painless()).getOrElse("")}(params.lat, params.lon)" } else if (identifiers.isEmpty) { s"${Distance.haversine( fromPoint.get.lat.value, @@ -150,7 +150,7 @@ package object geo { toPoint.get.lon.value )}" } else { - s"arg0${fun.map(_.painless).getOrElse("")}(arg1.lat, arg1.lon)" + s"arg0${fun.map(_.painless()).getOrElse("")}(arg1.lat, arg1.lon)" } if (identifiers.nonEmpty) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/math/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/math/package.scala index 11b27be6..0e49d227 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/math/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/math/package.scala @@ -6,7 +6,7 @@ import app.softnetwork.elastic.sql.`type`.{SQLNumeric, SQLType, SQLTypes} package object math { sealed trait MathOp extends PainlessScript with TokenRegex { - override def painless: String = s"Math.${sql.toLowerCase()}" + override def painless(): String = s"Math.${sql.toLowerCase()}" override def toString: String = s" $sql " override def baseType: SQLNumeric = SQLTypes.Numeric @@ -79,7 +79,8 @@ package object math { List(arg) ++ scale.map(IntValue(_)).toList override def toPainlessCall(callArgs: List[String]): String = - s"(def p = ${Pow(IntValue(10), scale.getOrElse(0)).painless}; ${mathOp.painless}((${callArgs.head} * p) / p))" + s"(def p = ${Pow(IntValue(10), scale.getOrElse(0)) + .painless()}; ${mathOp.painless()}((${callArgs.head} * p) / p))" } case class Sign(arg: PainlessScript) extends MathematicalFunction { @@ -87,12 +88,12 @@ package object math { override def args: List[PainlessScript] = List(arg) - override def painless: String = { + override def painless(): String = { val ret = "arg0 > 0 ? 1 : (arg0 < 0 ? -1 : 0)" if (arg.nullable) - s"(def arg0 = ${arg.painless}; arg0 != null ? ($ret) : null)" + s"(def arg0 = ${arg.painless()}; arg0 != null ? ($ret) : null)" else - s"(def arg0 = ${arg.painless}; $ret)" + s"(def arg0 = ${arg.painless()}; $ret)" } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala index 172eb749..ea2d9aee 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/package.scala @@ -124,7 +124,7 @@ package object function { override def toSQL(base: String): String = s"$base$sql" - override def painless: String = { + override def painless(): String = { val nullCheck = args.zipWithIndex .filter(_._1.nullable) @@ -135,7 +135,7 @@ package object function { args.zipWithIndex .filter(_._1.nullable) .map { case (a, i) => - s"def arg$i = ${SQLTypeUtils.coerce(a.painless, a.baseType, argTypes(i), nullable = false)};" + s"def arg$i = ${SQLTypeUtils.coerce(a.painless(), a.baseType, argTypes(i), nullable = false)};" } .mkString(" ") @@ -144,7 +144,7 @@ package object function { if (a.nullable) s"arg$i" else - SQLTypeUtils.coerce(a.painless, a.baseType, argTypes(i), nullable = false) + SQLTypeUtils.coerce(a.painless(), a.baseType, argTypes(i), nullable = false) } if (args.exists(_.nullable)) @@ -155,9 +155,9 @@ package object function { def toPainlessCall(callArgs: List[String]): String = if (callArgs.nonEmpty) - s"${fun.map(_.painless).getOrElse("")}(${callArgs.mkString(argsSeparator)})" + s"${fun.map(_.painless()).getOrElse("")}(${callArgs.mkString(argsSeparator)})" else - fun.map(_.painless).getOrElse("") + fun.map(_.painless()).getOrElse("") } trait BinaryFunction[In1 <: SQLType, In2 <: SQLType, Out <: SQLType] extends FunctionN[In2, Out] { @@ -174,9 +174,9 @@ package object function { trait TransformFunction[In <: SQLType, Out <: SQLType] extends FunctionN[In, Out] { def toPainless(base: String, idx: Int): String = { if (nullable && base.nonEmpty) - s"(def e$idx = $base; e$idx != null ? e$idx$painless : null)" + s"(def e$idx = $base; e$idx != null ? e$idx${painless()} : null)" else - s"$base$painless" + s"$base${painless()}" } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/string/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/string/package.scala index c7d46de3..5e985fe7 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/string/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/string/package.scala @@ -13,14 +13,14 @@ import app.softnetwork.elastic.sql.`type`.{ package object string { sealed trait StringOp extends PainlessScript with TokenRegex { - override def painless: String = s".${sql.toLowerCase()}()" + override def painless(): String = s".${sql.toLowerCase()}()" } case object Concat extends Expr("CONCAT") with StringOp { - override def painless: String = " + " + override def painless(): String = " + " } case object Pipe extends Expr("\\|\\|") with StringOp { - override def painless: String = " + " + override def painless(): String = " + " } case object Lower extends Expr("LOWER") with StringOp { override lazy val words: List[String] = List(sql, "LCASE") @@ -30,13 +30,13 @@ package object string { } case object Trim extends Expr("TRIM") with StringOp case object Ltrim extends Expr("LTRIM") with StringOp { - override def painless: String = ".replaceAll(\"^\\\\s+\",\"\")" + override def painless(): String = ".replaceAll(\"^\\\\s+\",\"\")" } case object Rtrim extends Expr("RTRIM") with StringOp { - override def painless: String = ".replaceAll(\"\\\\s+$\",\"\")" + override def painless(): String = ".replaceAll(\"\\\\s+$\",\"\")" } case object Substring extends Expr("SUBSTRING") with StringOp { - override def painless: String = ".substring" + override def painless(): String = ".substring" override lazy val words: List[String] = List(sql, "SUBSTR") } case object LeftOp extends Expr("LEFT") with StringOp @@ -47,22 +47,22 @@ package object string { } case object Replace extends Expr("REPLACE") with StringOp { override lazy val words: List[String] = List(sql, "STR_REPLACE") - override def painless: String = ".replace" + override def painless(): String = ".replace" } case object Reverse extends Expr("REVERSE") with StringOp case object Position extends Expr("POSITION") with StringOp { override lazy val words: List[String] = List(sql, "STRPOS") - override def painless: String = ".indexOf" + override def painless(): String = ".indexOf" } case object RegexpLike extends Expr("REGEXP_LIKE") with StringOp { override lazy val words: List[String] = List(sql, "REGEXP") - override def painless: String = ".matches" + override def painless(): String = ".matches" } case class MatchFlags(flags: String) extends PainlessScript { override def sql: String = s"'$flags'" - override def painless: String = flags.toCharArray + override def painless(): String = flags.toCharArray .map { case 'i' => "java.util.regex.Pattern.CASE_INSENSITIVE" case 'c' => "0" @@ -158,13 +158,16 @@ package object string { .map { case (arg, idx) => SQLTypeUtils.coerce(arg, values(idx).baseType, SQLTypes.Varchar, nullable = false) } - .mkString(stringOp.painless) + .mkString(stringOp.painless()) } override def validate(): Either[String, Unit] = if (values.isEmpty) Left("CONCAT requires at least one argument") else - values.map(_.validate()).find(_.isLeft).getOrElse(Right(())) + values.map(_.validate()).filter(_.isLeft) match { + case Nil => Right(()) + case errors => Left(errors.map { case Left(err) => err }.mkString("\n")) + } override def toSQL(base: String): String = sql } @@ -243,7 +246,10 @@ package object string { } override def validate(): Either[String, Unit] = - args.map(_.validate()).find(_.isLeft).getOrElse(Right(())) + args.map(_.validate()).filter(_.isLeft) match { + case Nil => Right(()) + case errors => Left(errors.map { case Left(err) => err }.mkString("\n")) + } override def toSQL(base: String): String = sql } @@ -321,7 +327,10 @@ package object string { } override def validate(): Either[String, Unit] = - args.map(_.validate()).find(_.isLeft).getOrElse(Right(())) + args.map(_.validate()).filter(_.isLeft) match { + case Nil => Right(()) + case errors => Left(errors.map { case Left(err) => err }.mkString("\n")) + } override def toSQL(base: String): String = sql } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala index 77e69089..aaf30401 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/time/package.scala @@ -59,9 +59,9 @@ package object time { override def toPainless(base: String, idx: Int): String = if (nullable) - s"(def e$idx = $base; e$idx != null ? ${SQLTypeUtils.coerce(s"e$idx", expr.baseType, out, nullable = false)}$painless : null)" + s"(def e$idx = $base; e$idx != null ? ${SQLTypeUtils.coerce(s"e$idx", expr.baseType, out, nullable = false)}${painless()} : null)" else - s"${SQLTypeUtils.coerce(base, expr.baseType, out, nullable = expr.nullable)}$painless" + s"${SQLTypeUtils.coerce(base, expr.baseType, out, nullable = expr.nullable)}${painless()}" } sealed trait AddInterval[IO <: SQLTemporal] extends IntervalFunction[IO] { @@ -104,17 +104,17 @@ package object time { } sealed trait CurrentDateTimeFunction extends DateTimeFunction with CurrentFunction { - override def painless: String = + override def painless(): String = SQLTypeUtils.coerce(now, this.baseType, this.out, nullable = false) } sealed trait CurrentDateFunction extends DateFunction with CurrentFunction { - override def painless: String = + override def painless(): String = SQLTypeUtils.coerce(s"$now.toLocalDate()", this.baseType, this.out, nullable = false) } sealed trait CurrentTimeFunction extends TimeFunction with CurrentFunction { - override def painless: String = + override def painless(): String = SQLTypeUtils.coerce(s"$now.toLocalTime()", this.baseType, this.out, nullable = false) } @@ -161,7 +161,7 @@ package object time { } case object DateTrunc extends Expr("DATE_TRUNC") with TokenRegex with PainlessScript { - override def painless: String = ".truncatedTo" + override def painless(): String = ".truncatedTo" override lazy val words: List[String] = List(sql, "DATETRUNC") } @@ -188,7 +188,7 @@ package object time { } case object Extract extends Expr("EXTRACT") with TokenRegex with PainlessScript { - override def painless: String = ".get" + override def painless(): String = ".get" } case class Extract(field: TimeField) @@ -248,7 +248,7 @@ package object time { class WeekOfWeekBasedYear extends TimeFieldExtract(WEEK_OF_WEEK_BASED_YEAR) case object LastDayOfMonth extends Expr("LAST_DAY") with TokenRegex with PainlessScript { - override def painless: String = ".withDayOfMonth" + override def painless(): String = ".withDayOfMonth" override lazy val words: List[String] = List(sql, "LASTDAY") } @@ -281,7 +281,7 @@ package object time { override def toPainlessCall(callArgs: List[String]): String = { callArgs match { - case arg :: Nil => s"$arg${LastDayOfMonth.painless}($arg.lengthOfMonth())" + case arg :: Nil => s"$arg${LastDayOfMonth.painless()}($arg.lengthOfMonth())" case _ => throw new IllegalArgumentException("LastDayOfMonth requires exactly one argument") } } @@ -289,7 +289,7 @@ package object time { } case object DateDiff extends Expr("DATE_DIFF") with TokenRegex with PainlessScript { - override def painless: String = ".between" + override def painless(): String = ".between" override lazy val words: List[String] = List(sql, "DATEDIFF") } @@ -310,7 +310,7 @@ package object time { override def toSQL(base: String): String = s"$sql(${end.sql}, ${start.sql}, ${unit.sql})" override def toPainlessCall(callArgs: List[String]): String = - s"${unit.painless}${DateDiff.painless}(${callArgs.mkString(", ")})" + s"${unit.painless()}${DateDiff.painless()}(${callArgs.mkString(", ")})" } case object DateAdd extends Expr("DATE_ADD") with TokenRegex { @@ -395,7 +395,7 @@ package object time { } case object DateParse extends Expr("DATE_PARSE") with TokenRegex with PainlessScript { - override def painless: String = ".parse" + override def painless(): String = ".parse" } case class DateParse(identifier: Identifier, format: String) @@ -416,7 +416,9 @@ package object time { s"$sql($base, '$format')" } - override def painless: String = throw new NotImplementedError("Use toPainless instead") + override def painless(): String = throw new NotImplementedError( + "Use toPainless instead" + ) override def toPainless(base: String, idx: Int): String = if (nullable) s"(def e$idx = $base; e$idx != null ? DateTimeFormatter.ofPattern('${convert()}').parse(e$idx, LocalDate::from) : null)" @@ -440,7 +442,7 @@ package object time { } case object DateFormat extends Expr("DATE_FORMAT") with TokenRegex with PainlessScript { - override def painless: String = ".format" + override def painless(): String = ".format" } case class DateFormat(identifier: Identifier, format: String) @@ -460,7 +462,9 @@ package object time { s"$sql($base, '$format')" } - override def painless: String = throw new NotImplementedError("Use toPainless instead") + override def painless(): String = throw new NotImplementedError( + "Use toPainless instead" + ) override def toPainless(base: String, idx: Int): String = if (nullable) s"(def e$idx = $base; e$idx != null ? DateTimeFormatter.ofPattern('${convert()}').format(e$idx) : null)" @@ -505,7 +509,7 @@ package object time { } case object DateTimeParse extends Expr("DATETIME_PARSE") with TokenRegex with PainlessScript { - override def painless: String = ".parse" + override def painless(): String = ".parse" } case class DateTimeParse(identifier: Identifier, format: String) @@ -526,7 +530,9 @@ package object time { s"$sql($base, '$format')" } - override def painless: String = throw new NotImplementedError("Use toPainless instead") + override def painless(): String = throw new NotImplementedError( + "Use toPainless instead" + ) override def toPainless(base: String, idx: Int): String = if (nullable) s"(def e$idx = $base; e$idx != null ? DateTimeFormatter.ofPattern('${convert(includeTimeZone = true)}').parse(e$idx, ZonedDateTime::from) : null)" @@ -550,7 +556,7 @@ package object time { } case object DateTimeFormat extends Expr("DATETIME_FORMAT") with TokenRegex with PainlessScript { - override def painless: String = ".format" + override def painless(): String = ".format" } case class DateTimeFormat(identifier: Identifier, format: String) @@ -570,7 +576,9 @@ package object time { s"$sql($base, '$format')" } - override def painless: String = throw new NotImplementedError("Use toPainless instead") + override def painless(): String = throw new NotImplementedError( + "Use toPainless instead" + ) override def toPainless(base: String, idx: Int): String = if (nullable) s"(def e$idx = $base; e$idx != null ? DateTimeFormatter.ofPattern('${convert(includeTimeZone = true)}').format(e$idx) : null)" diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala index 15ea7ca3..bdc702ca 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/math/ArithmeticExpression.scala @@ -46,12 +46,12 @@ case class ArithmeticExpression( val l = left match { case t: TransformFunction[_, _] => SQLTypeUtils.coerce(t.toPainless("", idx + 1), left.baseType, out, nullable = false) - case _ => SQLTypeUtils.coerce(left.painless, left.baseType, out, nullable = false) + case _ => SQLTypeUtils.coerce(left.painless(), left.baseType, out, nullable = false) } val r = right match { case t: TransformFunction[_, _] => SQLTypeUtils.coerce(t.toPainless("", idx + 1), right.baseType, out, nullable = false) - case _ => SQLTypeUtils.coerce(right.painless, right.baseType, out, nullable = false) + case _ => SQLTypeUtils.coerce(right.painless(), right.baseType, out, nullable = false) } var expr = "" if (left.nullable) @@ -59,22 +59,22 @@ case class ArithmeticExpression( if (right.nullable) expr += s"def rv$idx = ($r); " if (left.nullable && right.nullable) - expr += s"(lv$idx == null || rv$idx == null) ? null : (lv$idx ${operator.painless} rv$idx)" + expr += s"(lv$idx == null || rv$idx == null) ? null : (lv$idx ${operator.painless()} rv$idx)" else if (left.nullable) - expr += s"(lv$idx == null) ? null : (lv$idx ${operator.painless} $r)" + expr += s"(lv$idx == null) ? null : (lv$idx ${operator.painless()} $r)" else - expr += s"(rv$idx == null) ? null : ($l ${operator.painless} rv$idx)" + expr += s"(rv$idx == null) ? null : ($l ${operator.painless()} rv$idx)" if (group) expr = s"($expr)" return s"$base$expr" } - s"$base$painless" + s"$base${painless()}" } - override def painless: String = { + override def painless(): String = { val l = SQLTypeUtils.coerce(left, out) val r = SQLTypeUtils.coerce(right, out) - val expr = s"$l ${operator.painless} $r" + val expr = s"$l ${operator.painless()} $r" if (group) s"($expr)" else diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/package.scala index 650b8e38..d63c7a97 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/package.scala @@ -3,7 +3,7 @@ package app.softnetwork.elastic.sql package object operator { trait Operator extends Token with PainlessScript with TokenRegex { - override def painless: String = this match { + override def painless(): String = this match { case AND => "&&" case OR => "||" case NOT => "!" diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/time/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/time/package.scala index 8a347d43..828a4191 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/operator/time/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/operator/time/package.scala @@ -7,7 +7,7 @@ package object time { sealed trait IntervalOperator extends Operator with BinaryOperator with DateMathScript { override def script: Option[String] = Some(sql) override def toString: String = s" $sql " - override def painless: String = this match { + override def painless(): String = this match { case PLUS => ".plus" case MINUS => ".minus" case _ => sql @@ -15,11 +15,11 @@ package object time { } case object PLUS extends Expr("+") with IntervalOperator { - override def painless: String = ".plus" + override def painless(): String = ".plus" } case object MINUS extends Expr("-") with IntervalOperator { - override def painless: String = ".minus" + override def painless(): String = ".minus" } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala index cd8b06bb..4e4e05af 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala @@ -2,11 +2,7 @@ package app.softnetwork.elastic import app.softnetwork.elastic.sql.function.aggregate.{MAX, MIN} import app.softnetwork.elastic.sql.function.geo.DistanceUnit -import app.softnetwork.elastic.sql.function.time.{ - CurrentDateFunction, - CurrentDateTimeFunction, - CurrentFunction -} +import app.softnetwork.elastic.sql.function.time.CurrentFunction import app.softnetwork.elastic.sql.operator._ import app.softnetwork.elastic.sql.parser.{Validation, Validator} import app.softnetwork.elastic.sql.query._ @@ -56,7 +52,7 @@ package object sql { } trait PainlessScript extends Token { - def painless: String + def painless(): String def nullValue: String = "null" } @@ -118,7 +114,7 @@ package object sql { case _ => values.headOption } } - override def painless: String = + override def painless(): String = SQLTypeUtils.coerce( value match { case s: String => s""""$s"""" @@ -136,7 +132,7 @@ package object sql { case object Null extends Value[Null](null) with TokenRegex { override def sql: String = "NULL" - override def painless: String = "null" + override def painless(): String = "null" override def nullable: Boolean = true override def baseType: SQLType = SQLTypes.Null } @@ -240,13 +236,13 @@ package object sql { case object PiValue extends Value[Double](Math.PI) with TokenRegex { override def sql: String = "PI" - override def painless: String = "Math.PI" + override def painless(): String = "Math.PI" override def baseType: SQLNumeric = SQLTypes.Double } case object EValue extends Value[Double](Math.E) with TokenRegex { override def sql: String = "E" - override def painless: String = "Math.E" + override def painless(): String = "Math.E" override def baseType: SQLNumeric = SQLTypes.Double } @@ -256,7 +252,7 @@ package object sql { override def baseType: SQLNumeric = SQLTypes.Double override def sql: String = s"$longValue $unit" def geoDistance: String = s"$longValue$unit" - override def painless: String = s"$value" + override def painless(): String = s"$value" } sealed abstract class FromTo(val from: TokenValue, val to: TokenValue) extends Token { @@ -321,7 +317,8 @@ package object sql { extends Token with PainlessScript { override def sql = s"(${values.map(_.sql).mkString(",")})" - override def painless: String = s"[${values.map(_.painless).mkString(",")}]" + override def painless(): String = + s"[${values.map(_.painless()).mkString(",")}]" lazy val innerValues: Seq[R] = values.map(_.value) override def nullable: Boolean = values.exists(_.nullable) override def baseType: SQLArray = SQLTypes.Array(SQLTypes.Any) @@ -463,9 +460,23 @@ package object sql { def tableAlias: Option[String] def distinct: Boolean def nested: Boolean + def nestedElement: Option[NestedElement] def limit: Option[Limit] def fieldAlias: Option[String] def bucket: Option[Bucket] + def hasBucket: Boolean = bucket.isDefined + def metricsPath: Map[String, String] = { // TODO add bucket context ? + if (aggregation) { + val metricName = aliasOrName + nestedElement match { + case Some(ne) => Map(metricName -> s"${ne.bucketPath}>$metricName") + case _ => Map(metricName -> metricName) + } + } else { + Map.empty + } + } + override def sql: String = { var parts: Seq[String] = name.split("\\.").toSeq tableAlias match { @@ -491,16 +502,24 @@ package object sql { fun.toSQL(expr) }) // FIXME use AliasUtils.normalize? - lazy val nestedType: Option[String] = if (nested) Some(name.split('.').head) else None - lazy val innerHitsName: Option[String] = if (nested) tableAlias else None lazy val aliasOrName: String = fieldAlias.getOrElse(name) + def path: String = + nestedElement match { + case Some(ne) => + name.split("\\.") match { + case Array(_, _*) => s"${ne.path}.${name.split("\\.").tail.mkString(".")}" + case _ => s"${ne.path}.$name" + } + case None => name + } + def paramName: String = if (aggregation && functions.size == 1) s"params.$aliasOrName" - else if (name.nonEmpty) - s"doc['$name'].value" + else if (path.nonEmpty) + s"doc['$path'].value" else "" def toPainless(base: String): String = { @@ -509,7 +528,7 @@ package object sql { orderedFunctions.zipWithIndex.foreach { case (f, idx) => f match { case f: TransformFunction[_, _] => expr = f.toPainless(expr, idx) - case f: PainlessScript => expr = s"$expr${f.painless}" + case f: PainlessScript => expr = s"$expr${f.painless()}" case f => expr = f.toSQL(expr) // fallback } } @@ -567,11 +586,11 @@ package object sql { override def dateMathScript: Boolean = isTemporal def checkNotNull: String = - if (name.isEmpty) "" + if (path.isEmpty) "" else - s"(!doc.containsKey('$name') || doc['$name'].empty ? $nullValue : doc['$name'].value)" + s"(!doc.containsKey('$path') || doc['$path'].empty ? $nullValue : doc['$path'].value)" - override def painless: String = toPainless( + override def painless(): String = toPainless( if (nullable) checkNotNull else @@ -581,17 +600,27 @@ package object sql { private[this] var _nullable = this.name.nonEmpty && (!aggregation || functions.size > 1) - def nullable_=(b: Boolean): Unit = { + protected def nullable_=(b: Boolean): Unit = { _nullable = b } override def nullable: Boolean = _nullable + def withNullable(b: Boolean): Identifier = { + this.nullable = b + this + } + override def value: String = script match { case Some(s) => s - case _ => painless + case _ => painless() } + + def withNested(nested: Boolean): Identifier = this match { + case g: GenericIdentifier => g.copy(nested = nested) + case _ => this + } } object Identifier { @@ -611,21 +640,48 @@ package object sql { limit: Option[Limit] = None, functions: List[Function] = List.empty, fieldAlias: Option[String] = None, - bucket: Option[Bucket] = None + bucket: Option[Bucket] = None, + nestedElement: Option[NestedElement] = None ) extends Identifier { def withFunctions(functions: List[Function]): Identifier = this.copy(functions = functions) + override def withNullable(b: Boolean): Identifier = { + val id = this.copy() + id.nullable = b + id + } + def update(request: SQLSearchRequest): Identifier = { val parts: Seq[String] = name.split("\\.").toSeq if (request.tableAliases.values.toSeq.contains(parts.head)) { - request.unnests.find(_._1 == parts.head) match { - case Some(tuple) => + request.unnestAliases.find(_._1 == parts.head) match { + case Some(tuple) if !nested => this.copy( tableAlias = Some(parts.head), - name = s"${tuple._2}.${parts.tail.mkString(".")}", + name = s"${tuple._2._1}.${parts.tail.mkString(".")}", nested = true, - limit = tuple._3, + limit = tuple._2._2, + fieldAlias = request.fieldAliases.get(identifierName).orElse(fieldAlias), + bucket = request.bucketNames.get(identifierName).orElse(bucket), + nestedElement = { + request.unnests.get(parts.head) match { + case Some(unnest) => Some(request.toNestedElement(unnest)) + case None => None + } + } + ) + case Some(tuple) if nested => + this.copy( + tableAlias = Some(parts.head), + name = s"${tuple._2._1}.${parts.tail.mkString(".")}", + limit = tuple._2._2, + fieldAlias = request.fieldAliases.get(identifierName).orElse(fieldAlias), + bucket = request.bucketNames.get(identifierName).orElse(bucket) + ) + case None if nested => + this.copy( + tableAlias = Some(parts.head), fieldAlias = request.fieldAliases.get(identifierName).orElse(fieldAlias), bucket = request.bucketNames.get(identifierName).orElse(bucket) ) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/FromParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/FromParser.scala index 8bdfaa15..53800f8d 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/FromParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/FromParser.scala @@ -18,8 +18,8 @@ trait FromParser { self: Parser with WhereParser with LimitParser => def unnest: PackratParser[Unnest] = - Unnest.regex ~ start ~ identifier ~ limit.? ~ end ~ alias.? ^^ { case _ ~ i ~ l ~ _ ~ a => - Unnest(i, l, a) + Unnest.regex ~ start ~ identifier ~ end ~ alias.? ^^ { case _ ~ i ~ _ ~ a => + Unnest(i, None, a) } def inner_join: PackratParser[JoinType] = InnerJoin.regex ^^ { _ => InnerJoin } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Validator.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Validator.scala index 947029e5..15e6e32d 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Validator.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Validator.scala @@ -11,9 +11,9 @@ object Validator { case Nil => return Right(()) case _ => } - functions.map(_.validate()).find(_.isLeft) match { - case Some(left) => return left - case None => + functions.map(_.validate()).filter(_.isLeft) match { + case Nil => // ok + case errors => return Left(errors.map { case Left(err) => err }.mkString("\n")) } val funcs = functions.collect { case f: FunctionN[_, _] => f } funcs.sliding(2).foreach { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala index 56e76708..0eced705 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala @@ -236,11 +236,11 @@ trait WhereParser { def nestedCriteria: PackratParser[ElasticRelation] = Nested.regex ~ start.? ~ criteria ~ end.? ^^ { case _ ~ _ ~ c ~ _ => - ElasticNested(c, None) + ElasticNested(c, None, fromCriteria = false) } def nestedPredicate: PackratParser[ElasticRelation] = Nested.regex ~ start ~ predicate ~ end ^^ { - case _ ~ _ ~ p ~ _ => ElasticNested(p, None) + case _ ~ _ ~ p ~ _ => ElasticNested(p, None, fromCriteria = false) } def childCriteria: PackratParser[ElasticRelation] = Child.regex ~ start.? ~ criteria ~ end.? ^^ { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala index 2dbe1b06..8da134ce 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/function/aggregate/package.scala @@ -30,33 +30,32 @@ package object aggregate { def partition_by: PackratParser[Seq[Identifier]] = PARTITION_BY.regex ~> rep1sep(identifier, separator) - private[this] def over: Parser[(Seq[Identifier], OrderBy, Option[Limit])] = - OVER.regex ~> start ~ partition_by.? ~ orderBy ~ limit.? <~ end ^^ { case _ ~ pb ~ ob ~ l => - (pb.getOrElse(Seq.empty), ob, l) + private[this] def over: Parser[(Seq[Identifier], OrderBy)] = + OVER.regex ~> start ~ partition_by.? ~ orderBy <~ end ^^ { case _ ~ pb ~ ob => + (pb.getOrElse(Seq.empty), ob) } - private[this] def top_hits - : PackratParser[(Identifier, Seq[Identifier], OrderBy, Option[Limit])] = + private[this] def top_hits: PackratParser[(Identifier, Seq[Identifier], OrderBy)] = start ~ identifier ~ end ~ over.? ^^ { case _ ~ id ~ _ ~ o => o match { - case Some((pb, ob, l)) => (id, pb, ob, l) - case None => (id, Seq.empty, OrderBy(Seq(FieldSort(id.name, order = None))), None) + case Some((pb, ob)) => (id, pb, ob) + case None => (id, Seq.empty, OrderBy(Seq(FieldSort(id.name, order = None)))) } } def first_value: PackratParser[TopHitsAggregation] = FIRST_VALUE.regex ~ top_hits ^^ { case _ ~ top => - FirstValue(top._1, top._2, top._3, limit = top._4) + FirstValue(top._1, top._2, top._3) } def last_value: PackratParser[TopHitsAggregation] = LAST_VALUE.regex ~ top_hits ^^ { case _ ~ top => - LastValue(top._1, top._2, top._3, limit = top._4) + LastValue(top._1, top._2, top._3) } def array_agg: PackratParser[TopHitsAggregation] = ARRAY_AGG.regex ~ top_hits ^^ { case _ ~ top => - ArrayAgg(top._1, top._2, top._3, limit = top._4) + ArrayAgg(top._1, top._2, top._3, limit = None) } def identifierWithTopHits: PackratParser[Identifier] = diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala index 2bbf1f3a..59d9bab6 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala @@ -10,6 +10,8 @@ import app.softnetwork.elastic.sql.{ Updateable } +import scala.annotation.tailrec + case object From extends Expr("FROM") with TokenRegex sealed trait JoinType extends TokenRegex @@ -39,47 +41,126 @@ sealed trait Join extends Updateable { def on: Option[On] def alias: Option[Alias] override def sql: String = - s"${asString(joinType)} $Join $source${asString(on)}" + s" ${asString(joinType)} $Join $source${asString(on)}${asString(alias)}" override def update(request: SQLSearchRequest): Join + + override def validate(): Either[String, Unit] = + for { + _ <- source.validate() + _ <- alias match { + case Some(a) if a.alias.nonEmpty => Right(()) + case _ => Left(s"JOIN $this requires an alias") + } + _ <- this match { + case j if joinType.isDefined && on.isEmpty && joinType.get != CrossJoin => + Left(s"JOIN $j requires an ON clause") + case j if joinType.isEmpty && on.isDefined => + Left(s"JOIN $j requires a JOIN type") + case j if alias.isEmpty => + Left(s"JOIN $j requires an alias") + case _ => Right(()) + } + } yield () } case object Unnest extends Expr("UNNEST") with TokenRegex -case class Unnest(identifier: Identifier, limit: Option[Limit], alias: Option[Alias] = None) - extends Source +case class Unnest( + identifier: Identifier, + limit: Option[Limit], + alias: Option[Alias] = None, + parent: Option[Unnest] = None +) extends Source with Join { - override def sql: String = s"$Join $Unnest($identifier${asString(limit)})" - def update(request: SQLSearchRequest): Unnest = - this.copy(identifier = identifier.update(request)) - override val name: String = identifier.name + override def sql: String = s"$Join $Unnest($identifier)${asString(alias)}" + def update(request: SQLSearchRequest): Unnest = { + val updated = this.copy( + identifier = identifier.withNested(true).update(request), + limit = limit.orElse(request.limit) + ) + updated.identifier.tableAlias match { + case Some(alias) if updated.identifier.nested => + request.unnests.get(alias) match { + case Some(parent) if parent.path != updated.path => + return updated.copy(parent = Some(parent)) + case _ => + } + case _ => + } + updated + } - override def source: Source = this + override val name: String = { + val parts = identifier.name.split('.') + if (parts.length <= 1) identifier.name + else parts.tail.mkString(".") + } + + def innerHitsName: String = alias.map(_.alias).getOrElse(name) + + def path: String = parent match { + case Some(p) => s"${p.path}.$name" + case None => name + } + + override def source: Source = identifier override def joinType: Option[JoinType] = None override def on: Option[On] = None + + override def validate(): Either[String, Unit] = + for { + _ <- super.validate() + _ <- + if (identifier.name.contains('.')) Right(()) + else Left(s"UNNEST identifier $identifier must be a nested field") + } yield () + } case class Table(name: String, tableAlias: Option[Alias] = None, joins: Seq[Join] = Nil) extends Source { - override def sql: String = s"$name${asString(tableAlias)}${joins.map(_.sql).mkString(" ")}" + override def sql: String = s"$name${asString(tableAlias)} ${joins.map(_.sql).mkString(" ")}".trim def update(request: SQLSearchRequest): Table = this.copy(joins = joins.map(_.update(request))) + + override def validate(): Either[String, Unit] = + for { + _ <- tableAlias match { + case Some(a) if a.alias.isEmpty => Left(s"Table $name alias cannot be empty") + case _ => Right(()) + } + _ <- joins.map(_.validate()).filter(_.isLeft) match { + case Nil => Right(()) + case errors => Left(errors.map { case Left(err) => err }.mkString("\n")) + } + } yield () } case class From(tables: Seq[Table]) extends Updateable { override def sql: String = s" $From ${tables.map(_.sql).mkString(",")}" - lazy val tableAliases: Map[String, String] = tables - .flatMap((table: Table) => table.tableAlias.map(alias => table.name -> alias.alias)) - .toMap ++ unnests.map(unnest => unnest._2 -> unnest._1).toMap - lazy val unnests: Seq[(String, String, Option[Limit])] = tables + lazy val unnests: Seq[Unnest] = tables .map(_.joins) .collect { case j => - j.collect { case u: Unnest => // extract unnest info - (u.alias.map(_.alias).getOrElse(u.identifier.name), u.identifier.name, u.limit) - } + j.collect { case u: Unnest => u } } .flatten + + lazy val tableAliases: Map[String, String] = tables + .flatMap((table: Table) => + table.tableAlias match { + case Some(alias) if alias.alias.nonEmpty => Some(table.name -> alias.alias) + case _ => Some(table.name -> table.name) + } + ) + .toMap ++ unnestAliases.map(unnest => unnest._2._1 -> unnest._1) + + lazy val unnestAliases: Map[String, (String, Option[Limit])] = unnests + .map(u => // extract unnest info + (u.alias.map(_.alias).getOrElse(u.name), (u.name, u.limit)) + ) + .toMap def update(request: SQLSearchRequest): From = this.copy(tables = tables.map(_.update(request))) @@ -87,7 +168,97 @@ case class From(tables: Seq[Table]) extends Updateable { if (tables.isEmpty) { Left("At least one table is required in FROM clause") } else { - Right(()) + for { + _ <- tables.map(_.validate()).filter(_.isLeft) match { + case Nil => Right(()) + case errors => Left(errors.map { case Left(err) => err }.mkString("\n")) + } + } yield () } } } + +case class NestedElement( + path: String, + innerHitsName: String, + size: Option[Int], + children: Seq[NestedElement] = Nil, // TODO remove and use parent instead + sources: Seq[String] = Nil, + parent: Option[NestedElement] +) { + lazy val root: NestedElement = { + parent match { + case Some(p) => p.root + case None => this + } + } + + lazy val level: Int = { + parent match { + case Some(p) => 1 + p.level + case None => 0 + } + } + + lazy val bucketPath: String = { + parent match { + case Some(p) => s"${p.bucketPath}>$innerHitsName" + case None => innerHitsName + } + } +} + +object NestedElements { + + def buildNestedTrees(nestedElements: Seq[NestedElement]): Seq[NestedElement] = { + if (nestedElements.isEmpty) return Nil + val nestedParentsPath: collection.mutable.Map[String, (NestedElement, Seq[NestedElement])] = + collection.mutable.Map.empty + + val distinctNestedElements = nestedElements.distinctBy(_.path) + + @tailrec + def getNestedParents( + n: NestedElement, + parents: Seq[NestedElement] + ): Seq[NestedElement] = { + n.parent match { + case Some(p) => + if (!nestedParentsPath.contains(p.path)) { + p.copy(children = Nil) + nestedParentsPath += p.path -> (p, Seq(n)) + getNestedParents(p, p +: parents) + } else { + nestedParentsPath += p.path -> (p, nestedParentsPath(p.path)._2 :+ n) + parents + } + case _ => parents + } + } + + val deepestNestedElement = + distinctNestedElements.maxBy(_.level) // FIXME we may have multiple deepest elements + val nestedParents = getNestedParents(deepestNestedElement, Seq.empty) + + def innerBuildNestedTree(n: NestedElement): NestedElement = { + val children = nestedParentsPath.get(n.path).map(_._2).getOrElse(Seq.empty) + if (children.nonEmpty) { + val updatedChildren = children.map(innerBuildNestedTree) + n.copy(children = updatedChildren) + } else { + n + } + } + + if (nestedParents.nonEmpty) { + nestedParents.map(innerBuildNestedTree) + } else { + distinctNestedElements + } + } + + def walkNestedTree(n: NestedElement)(f: NestedElement => Unit): Unit = { + f(n) + n.children.foreach(child => walkNestedTree(child)(f)) + } +} diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala index 8c073ed2..ba43a67b 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala @@ -21,10 +21,14 @@ case class GroupBy(buckets: Seq[Bucket]) extends Updateable { Right(()) } } + + def nestedElements: Seq[NestedElement] = + buckets.flatMap(_.nestedElement).distinct } case class Bucket( - identifier: Identifier + identifier: Identifier, + size: Option[Int] = None ) extends Updateable { override def sql: String = s"$identifier" def update(request: SQLSearchRequest): Bucket = { @@ -38,7 +42,7 @@ case class Bucket( ) } else { val field = request.select.fields(func.value.toInt - 1) - this.copy(identifier = field.identifier) + this.copy(identifier = field.identifier, size = request.limit.map(_.limit)) } case _ => this.copy(identifier = identifier.update(request)) } @@ -52,35 +56,36 @@ case class Bucket( } else { identifier.name } + lazy val nested: Boolean = nestedElement.isDefined + lazy val nestedElement: Option[NestedElement] = identifier.nestedElement lazy val nestedBucket: Option[String] = - identifier.nestedType.map(t => s"nested_$t") + identifier.nestedElement.map(_.innerHitsName) lazy val name: String = identifier.fieldAlias.getOrElse(sourceBucket.replace(".", "_")) } -object BucketSelectorScript { +object MetricSelectorScript { - def extractBucketsPath(criteria: Criteria): Map[String, String] = criteria match { + def extractMetricsPath(criteria: Criteria): Map[String, String] = criteria match { case Predicate(left, _, right, _, _) => - extractBucketsPath(left) ++ extractBucketsPath(right) - case relation: ElasticRelation => extractBucketsPath(relation.criteria) + extractMetricsPath(left) ++ extractMetricsPath(right) + case relation: ElasticRelation => extractMetricsPath(relation.criteria) case _: MatchCriteria => Map.empty //MATCH is not supported in bucket_selector case e: Expression if e.aggregation => import e._ maybeValue match { - case Some(v: Identifier) if v.aggregation => - Map(identifier.aliasOrName -> identifier.aliasOrName, v.aliasOrName -> v.aliasOrName) - case _ => Map(identifier.aliasOrName -> identifier.aliasOrName) + case Some(v: Identifier) => identifier.metricsPath ++ v.metricsPath + case _ => identifier.metricsPath } case _ => Map.empty } - def toPainless(expr: Criteria): String = expr match { + def metricSelector(expr: Criteria): String = expr match { case Predicate(left, op, right, maybeNot, group) => - val leftStr = toPainless(left) - val rightStr = toPainless(right) + val leftStr = metricSelector(left) + val rightStr = metricSelector(right) val opStr = op match { - case AND | OR => op.painless + case AND | OR => op.painless() case _ => throw new IllegalArgumentException(s"Unsupported logical operator: $op") } val not = maybeNot.nonEmpty @@ -89,24 +94,28 @@ object BucketSelectorScript { else s"$leftStr $opStr $rightStr" - case relation: ElasticRelation => toPainless(relation.criteria) + case relation: ElasticRelation => metricSelector(relation.criteria) case _: MatchCriteria => "1 == 1" //MATCH is not supported in bucket_selector case e: Expression if e.aggregation => - val paramName = e.identifier.paramName - e.out match { - case SQLTypes.Date if e.operator.isInstanceOf[ComparisonOperator] => - // protect against null params and compare epoch millis - s"($paramName != null) && (${e.painless}.truncatedTo(ChronoUnit.DAYS).toInstant().toEpochMilli())" - case SQLTypes.Time if e.operator.isInstanceOf[ComparisonOperator] => - s"($paramName != null) && (${e.painless}.truncatedTo(ChronoUnit.SECONDS).toInstant().toEpochMilli())" - case SQLTypes.DateTime if e.operator.isInstanceOf[ComparisonOperator] => - s"($paramName != null) && (${e.painless}.toInstant().toEpochMilli())" - case _ => - e.painless + val painless = e.painless() + e.maybeValue match { + case Some(value) if e.operator.isInstanceOf[ComparisonOperator] => + value.out match { // compare epoch millis + case SQLTypes.Date => + s"$painless.truncatedTo(ChronoUnit.DAYS).toInstant().toEpochMilli()" + case SQLTypes.Time if e.operator.isInstanceOf[ComparisonOperator] => + s"$painless.truncatedTo(ChronoUnit.SECONDS).toInstant().toEpochMilli()" + case SQLTypes.DateTime if e.operator.isInstanceOf[ComparisonOperator] => + s"$painless.toInstant().toEpochMilli()" + case _ => painless + } + case _ => painless } case _ => "1 == 1" //throw new IllegalArgumentException(s"Unsupported SQLCriteria type: $expr") } } + +case class BucketIncludesExcludes(values: Set[String] = Set.empty, regex: Option[String] = None) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Having.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Having.scala index 73b2cd1b..c4510b44 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Having.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Having.scala @@ -13,4 +13,7 @@ case class Having(criteria: Option[Criteria]) extends Updateable { this.copy(criteria = criteria.map(_.update(request))) override def validate(): Either[String, Unit] = criteria.map(_.validate()).getOrElse(Right(())) + + def nestedElements: Seq[NestedElement] = + criteria.map(_.nestedElements).getOrElse(Seq.empty).distinctBy(_.path) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala index 1cf69ef6..c0ba2906 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala @@ -1,7 +1,7 @@ package app.softnetwork.elastic.sql.query import app.softnetwork.elastic.sql.function.{Function, FunctionChain} -import app.softnetwork.elastic.sql.{Expr, Token, TokenRegex} +import app.softnetwork.elastic.sql.{Expr, TokenRegex, Updateable} case object OrderBy extends Expr("ORDER BY") with TokenRegex @@ -15,12 +15,28 @@ case class FieldSort( field: String, order: Option[SortOrder], functions: List[Function] = List.empty -) extends FunctionChain { +) extends FunctionChain + with Updateable { lazy val direction: SortOrder = order.getOrElse(Asc) lazy val name: String = toSQL(field) override def sql: String = s"$name $direction" + override def update(request: SQLSearchRequest): FieldSort = this // No update logic for now TODO } -case class OrderBy(sorts: Seq[FieldSort]) extends Token { +case class OrderBy(sorts: Seq[FieldSort]) extends Updateable { override def sql: String = s" $OrderBy ${sorts.mkString(", ")}" + + override def validate(): Either[String, Unit] = + for { + _ <- + if (sorts.isEmpty) + Left("At least one sort field is required") + else + sorts.map(_.validate()).filter(_.isLeft) match { + case Nil => Right(()) + case errors => Left(errors.map { case Left(err) => err }.mkString("\n")) + } + } yield () + + def update(request: SQLSearchRequest): OrderBy = this.copy(sorts = sorts.map(_.update(request))) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLMultiSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLMultiSearchRequest.scala index 7b4d6ebb..eda9f18d 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLMultiSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLMultiSearchRequest.scala @@ -8,6 +8,9 @@ case class SQLMultiSearchRequest(requests: Seq[SQLSearchRequest]) extends Token def update(): SQLMultiSearchRequest = this.copy(requests = requests.map(_.update())) override def validate(): Either[String, Unit] = { - requests.map(_.validate()).find(_.isLeft).getOrElse(Right(())) + requests.map(_.validate()).filter(_.isLeft) match { + case Nil => Right(()) + case errors => Left(errors.map { case Left(err) => err }.mkString("\n")) + } } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala index aa3170f5..59fab788 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala @@ -1,7 +1,7 @@ package app.softnetwork.elastic.sql.query import app.softnetwork.elastic.sql.function.aggregate.TopHitsAggregation -import app.softnetwork.elastic.sql.{asString, Identifier, Token} +import app.softnetwork.elastic.sql.{asString, Token} case class SQLSearchRequest( select: Select = Select(), @@ -18,21 +18,62 @@ case class SQLSearchRequest( lazy val fieldAliases: Map[String, String] = select.fieldAliases lazy val tableAliases: Map[String, String] = from.tableAliases - lazy val unnests: Seq[(String, String, Option[Limit])] = from.unnests + lazy val unnestAliases: Map[String, (String, Option[Limit])] = from.unnestAliases lazy val bucketNames: Map[String, Bucket] = buckets.map { b => b.identifier.identifierName -> b }.toMap + lazy val unnests: Map[String, Unnest] = + from.unnests.map(u => u.alias.map(_.alias).getOrElse(u.name) -> u).toMap + lazy val nestedFields: Map[String, Seq[Field]] = + select.fields + .filterNot(_.aggregation) + .filter(_.nested) + .groupBy(_.identifier.innerHitsName.getOrElse("")) + lazy val nested: Seq[NestedElement] = from.unnests.map(toNestedElement).distinctBy(_.path) + private[this] lazy val nestedFieldsWithoutCriteria: Map[String, Seq[Field]] = { + // nested fields that are not part of where, having or group by clauses + val innerHitsWithCriteria = (where.map(_.nestedElements).getOrElse(Seq.empty) ++ + having.map(_.nestedElements).getOrElse(Seq.empty) ++ + groupBy.map(_.nestedElements).getOrElse(Seq.empty)).distinctBy(_.path).map(_.innerHitsName) + val ret = nestedFields.filterNot { case (innerHitsName, _) => + innerHitsWithCriteria.contains(innerHitsName) + } + ret + } + lazy val nestedElementsWithoutCriteria: Seq[NestedElement] = + nested.filter(n => nestedFieldsWithoutCriteria.keys.toSeq.contains(n.innerHitsName)) + + def toNestedElement(u: Unnest): NestedElement = { + NestedElement( + path = u.path, + innerHitsName = u.innerHitsName, + size = limit.map(_.limit), + children = Nil, + sources = nestedFields + .get(u.innerHitsName) + .map(_.map(_.identifier.name.split('.').tail.mkString("."))) + .getOrElse(Nil), + parent = u.parent.map(toNestedElement) + ) + } lazy val sorts: Map[String, SortOrder] = orderBy.map { _.sorts.map(s => s.name -> s.direction) }.getOrElse(Map.empty).toMap def update(): SQLSearchRequest = { - val updated = this.copy(from = from.update(this)) - updated.copy( - select = select.update(updated), - where = where.map(_.update(updated)), - groupBy = groupBy.map(_.update(updated)), - having = having.map(_.update(updated)) + (for { + from <- Option(this.copy(from = from.update(this))) + select <- Option( + from.copy( + select = select.update(from), + groupBy = groupBy.map(_.update(from)), + having = having.map(_.update(from)) + ) + ) + where <- Option(select.copy(where = where.map(_.update(select)))) + updated <- Option(where.copy(orderBy = orderBy.map(_.update(where)))) + } yield updated).getOrElse( + throw new IllegalStateException("Failed to update SQLSearchRequest") ) } @@ -42,6 +83,7 @@ case class SQLSearchRequest( if (aggregates.isEmpty && buckets.isEmpty) select.fields .filterNot(_.isScriptField) + .filterNot(_.nested) .map(_.sourceField) .filterNot(f => excludes.contains(f)) else diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala index 1da93cec..5cd3a74e 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala @@ -64,13 +64,17 @@ case class Field( this.copy(identifier = updated.update(request)) } - def painless: String = identifier.painless + def painless(): String = identifier.painless() def script: Option[String] = identifier.script lazy val scriptName: String = fieldAlias.map(_.alias).getOrElse(sourceField) override def validate(): Either[String, Unit] = identifier.validate() + + lazy val nested: Boolean = identifier.nested + + lazy val path: String = identifier.path } case object Except extends Expr("except") with TokenRegex @@ -97,6 +101,9 @@ case class Select( if (fields.isEmpty) { Left("At least one field is required in SELECT clause") } else { - fields.map(_.validate()).find(_.isLeft).getOrElse(Right(())) + fields.map(_.validate()).filter(_.isLeft) match { + case Nil => Right(()) + case errors => Left(errors.map { case Left(err) => err }.mkString("\n")) + } } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala index e6ba336c..171afe92 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala @@ -15,8 +15,72 @@ case object Where extends Expr("WHERE") with TokenRegex sealed trait Criteria extends Updateable with PainlessScript { def operator: Operator + def identifiers: Seq[Identifier] = this match { + case Predicate(left, _, right, _, _) => left.identifiers ++ right.identifiers + case c: Expression => c.identifiers + case relation: ElasticRelation => relation.criteria.identifiers + case m: MatchCriteria => m.identifiers + case _ => Nil + } + def nested: Boolean = false + def nestedElement: Option[NestedElement] + + def nestedElements: Seq[NestedElement] = + this match { + case p: Predicate => p.nestedElements + case r: ElasticRelation => r.criteria.nestedElements + case e: Expression => e.nestedElement.toSeq + case m: MatchCriteria => m.criteria.nestedElements + case _ => Nil + } + + def nestedCriteria(innerHitsName: String): Seq[Criteria] = { + this match { + case e: ElasticNested => e.criteria.nestedCriteria(innerHitsName) + case _ => + nestedElement + .filter(_ => nestedElement.exists(_.innerHitsName == innerHitsName)) + .map(_ => this) + .toSeq + } + } + + def extractMetricsPath: Map[String, String] = this match { // used for bucket_selector + case Predicate(left, _, right, _, _) => + left.extractMetricsPath ++ right.extractMetricsPath + case relation: ElasticRelation => relation.criteria.extractMetricsPath + case _: MatchCriteria => Map.empty //MATCH is not supported in bucket_selector + case e: Expression => e.extractMetricsPath + case _ => Map.empty + } + + def includes( + bucket: Bucket, + not: Boolean, + bucketIncludesExcludes: BucketIncludesExcludes + ): BucketIncludesExcludes = this match { + case Predicate(left, _, right, n, _) => + right.includes( + bucket, + (!not && n.isDefined) || (not && n.isEmpty), + left.includes(bucket, not, bucketIncludesExcludes) + ) + case relation: ElasticRelation => + relation.criteria.includes(bucket, not, bucketIncludesExcludes) + case m: MatchCriteria => m.criteria.includes(bucket, not, bucketIncludesExcludes) + case e: Expression => e.includes(bucket, not, bucketIncludesExcludes) + case _ => bucketIncludesExcludes + } + + def excludes( + bucket: Bucket, + not: Boolean, + bucketIncludesExcludes: BucketIncludesExcludes + ): BucketIncludesExcludes = + includes(bucket, !not, bucketIncludesExcludes) + def limit: Option[Limit] = None def update(request: SQLSearchRequest): Criteria @@ -42,22 +106,22 @@ sealed trait Criteria extends Updateable with PainlessScript { override def out: SQLType = SQLTypes.Boolean - override def painless: String = this match { + override def painless(): String = this match { case Predicate(left, op, right, maybeNot, group) => - val leftStr = left.painless - val rightStr = right.painless + val leftStr = left.painless() + val rightStr = right.painless() val opStr = op match { - case AND | OR => op.painless + case AND | OR => op.painless() case _ => throw new IllegalArgumentException(s"Unsupported logical operator: $op") } val not = maybeNot.nonEmpty if (group || not) - s"${maybeNot.map(_.painless).getOrElse("")}($leftStr $opStr $rightStr)" + s"${maybeNot.map(_.painless()).getOrElse("")}($leftStr $opStr $rightStr)" else s"$leftStr $opStr $rightStr" - case relation: ElasticRelation => asGroup(relation.criteria.painless) - case m: MatchCriteria => asGroup(m.criteria.painless) - case expr: Expression => asGroup(expr.painless) + case relation: ElasticRelation => asGroup(relation.criteria.painless()) + case m: MatchCriteria => asGroup(m.criteria.painless()) + case expr: Expression => asGroup(expr.painless()) case _ => throw new IllegalArgumentException(s"Unsupported criteria: $this") } } @@ -71,7 +135,7 @@ case class Predicate( ) extends Criteria { override def sql = s"${if (group) s"($leftCriteria" else leftCriteria} $operator${not - .map(_ => " not") + .map(_ => " NOT") .getOrElse("")} ${if (group) s"$rightCriteria)" else rightCriteria}" override def update(request: SQLSearchRequest): Criteria = { val updatedPredicate = this.copy( @@ -113,7 +177,14 @@ case class Predicate( } } - override def nested: Boolean = leftCriteria.nested && rightCriteria.nested + def nestedElement: Option[NestedElement] = None + + override def nestedElements: Seq[NestedElement] = { + leftCriteria.nestedElements ++ rightCriteria.nestedElements + } + + override def nested: Boolean = + leftCriteria.nested && rightCriteria.nested override def matchCriteria: Boolean = leftCriteria.matchCriteria || rightCriteria.matchCriteria @@ -122,6 +193,9 @@ case class Predicate( _ <- leftCriteria.validate() _ <- rightCriteria.validate() } yield () + + override def nestedCriteria(innerHitsName: String): Seq[Criteria] = + leftCriteria.nestedCriteria(innerHitsName) ++ rightCriteria.nestedCriteria(innerHitsName) } sealed trait ElasticFilter @@ -182,6 +256,7 @@ case class ElasticBoolQuery( sealed trait Expression extends FunctionChain with ElasticFilter with Criteria { // to fix output type as Boolean def identifier: Identifier + def nestedElement: Option[NestedElement] = identifier.nestedElement override def nested: Boolean = identifier.nested override def group: Boolean = false override lazy val limit: Option[Limit] = identifier.limit @@ -192,24 +267,96 @@ sealed trait Expression extends FunctionChain with ElasticFilter with Criteria { def valueAsString: String = maybeValue.map(v => s" $v").getOrElse("") override def sql = s"$identifier $notAsString$operator$valueAsString" + override def identifiers: Seq[Identifier] = + maybeValue match { + case Some(id: Identifier) => Seq(identifier, id) + case _ => Seq(identifier) + } + + override def extractMetricsPath: Map[String, String] = maybeValue match { + case Some(v: Identifier) => identifier.metricsPath ++ v.metricsPath + case _ => identifier.metricsPath + } + + override def includes( + bucket: Bucket, + not: Boolean, + bucketIncludesExcludes: BucketIncludesExcludes + ): BucketIncludesExcludes = { + identifier.bucket.find(_.name == bucket.name) match { + case Some(_) => + operator match { + case EQ => + if ((!not && maybeNot.isEmpty) || (not && maybeNot.isDefined)) + maybeValue match { + case Some(v: Value[_]) if v.sql.nonEmpty => + bucketIncludesExcludes.copy(values = + bucketIncludesExcludes.values ++ Set(v.sql.replaceAll("'", "")) + ) + case _ => bucketIncludesExcludes + } + else bucketIncludesExcludes + case NE | DIFF => + if ((not && maybeNot.isEmpty) || (!not && maybeNot.isDefined)) + maybeValue match { + case Some(v: Value[_]) if v.sql.nonEmpty => + bucketIncludesExcludes.copy(values = + bucketIncludesExcludes.values ++ Set(v.sql.replaceAll("'", "")) + ) + case _ => bucketIncludesExcludes + } + else bucketIncludesExcludes + case LIKE => + if ((!not && maybeNot.isEmpty) || (not && maybeNot.isDefined)) + maybeValue match { + case Some(v: StringValue) if v.sql.nonEmpty => + bucketIncludesExcludes.copy(regex = + bucketIncludesExcludes.regex.orElse(Option(v.sql.replaceAll("%", ".*"))) + ) + case _ => bucketIncludesExcludes + } + else bucketIncludesExcludes + case RLIKE => + if ((!not && maybeNot.isEmpty) || (not && maybeNot.isDefined)) + maybeValue match { + case Some(v: StringValue) if v.sql.nonEmpty => + bucketIncludesExcludes.copy(regex = + bucketIncludesExcludes.regex.orElse(Option(v.sql)) + ) + case _ => bucketIncludesExcludes + } + else bucketIncludesExcludes + case _ => + bucketIncludesExcludes + } + case _ => bucketIncludesExcludes + } + } + override lazy val aggregation: Boolean = maybeValue match { case Some(v: FunctionChain) => identifier.aggregation || v.aggregation case _ => identifier.aggregation } + def hasBucket: Boolean = identifier.hasBucket || maybeValue.exists { + case v: Identifier => v.hasBucket + case v: Expression => v.hasBucket + case _ => false + } + def painlessNot: String = operator match { case _: ComparisonOperator => "" - case _ => maybeNot.map(_.painless).getOrElse("") + case _ => maybeNot.map(_.painless()).getOrElse("") } def painlessOp: String = operator match { - case o: ComparisonOperator if maybeNot.isDefined => o.not.painless - case _ => operator.painless + case o: ComparisonOperator if maybeNot.isDefined => o.not.painless() + case _ => operator.painless() } def painlessValue: String = maybeValue .map { - case v: PainlessScript => v.painless + case v: PainlessScript => v.painless() case v => v.sql } .getOrElse("") /*{ @@ -233,7 +380,7 @@ sealed trait Expression extends FunctionChain with ElasticFilter with Criteria { case _ => s"$painlessOp($painlessValue)" } - override def painless: String = { + override def painless(): String = { if (identifier.nullable) { return s"def left = $left; left == null ? false : ${painlessNot}left$check" } @@ -349,7 +496,7 @@ case class IsNullCriteria(identifier: Identifier) extends CriteriaWithConditiona } else updated } - override def painless: String = { + override def painless(): String = { if (identifier.nullable) { return s"def left = $left; left == null" } @@ -372,7 +519,7 @@ case class IsNotNullCriteria(identifier: Identifier) updated } - override def painless: String = { + override def painless(): String = { if (identifier.nullable) { return s"def left = $left; left != null" } @@ -403,9 +550,27 @@ case class InExpr[R, +T <: Value[R]]( override def maybeValue: Option[Token] = Some(values) + override def includes( + bucket: Bucket, + not: Boolean, + bucketIncludesExcludes: BucketIncludesExcludes + ): BucketIncludesExcludes = { + identifier.bucket.find(_.name == bucket.name) match { + case Some(_) => + if ((!not && maybeNot.isEmpty) || (not && maybeNot.isDefined)) + bucketIncludesExcludes.copy(values = + bucketIncludesExcludes.values ++ values.values.map(_.sql.replaceAll("'", "")).toSet + ) + else bucketIncludesExcludes + case _ => bucketIncludesExcludes + } + } + override def asFilter(currentQuery: Option[ElasticBoolQuery]): ElasticFilter = this - override def painless: String = s"$painlessNot${identifier.painless}$painlessOp($painlessValue)" + override def painless(): String = + s"$painlessNot${identifier.painless()}$painlessOp($painlessValue)" + } case class BetweenExpr( @@ -435,7 +600,7 @@ case class BetweenExpr( } yield () } - override def painless: String = { + override def painless(): String = { if (identifier.nullable) { return s"def left = $left; left == null ? false : $painlessNot(${fromTo.from} <= left <= ${fromTo.to})" } @@ -465,8 +630,9 @@ case class DistanceCriteria( } case class MatchCriteria( - identifiers: Seq[Identifier], - value: StringValue + override val identifiers: Seq[Identifier], + value: StringValue, + nestedElement: Option[NestedElement] = None ) extends Criteria { override def sql: String = s"$operator (${identifiers.mkString(",")}) $AGAINST ($value)" @@ -521,9 +687,27 @@ case class ElasticMatch( override def asFilter(currentQuery: Option[ElasticBoolQuery]): ElasticFilter = this + override def includes( + bucket: Bucket, + not: Boolean, + bucketIncludesExcludes: BucketIncludesExcludes + ): BucketIncludesExcludes = { + identifier.bucket.find(_.name == bucket.name) match { + case Some(_) => + if ((!not && maybeNot.isEmpty) || (not && maybeNot.isDefined)) + bucketIncludesExcludes.copy(regex = + bucketIncludesExcludes.regex.orElse(Option(value.sql)) + ) + else bucketIncludesExcludes + case _ => bucketIncludesExcludes + } + } + override def matchCriteria: Boolean = true - override def painless: String = s"$painlessNot${identifier.painless}$painlessOp($painlessValue)" + override def painless(): String = + s"$painlessNot${identifier.painless()}$painlessOp($painlessValue)" + } sealed abstract class ElasticRelation(val criteria: Criteria, val operator: ElasticOperator) @@ -534,7 +718,9 @@ sealed abstract class ElasticRelation(val criteria: Criteria, val operator: Elas private[this] def rtype(criteria: Criteria): Option[String] = criteria match { case Predicate(left, _, right, _, _) => rtype(left).orElse(rtype(right)) case c: Expression => - c.identifier.nestedType.orElse(c.identifier.name.split('.').headOption) + c.identifier.nestedElement + .map(_.innerHitsName) + .orElse(c.identifier.name.split('.').headOption) case relation: ElasticRelation => relation.relationType case _ => None } @@ -547,8 +733,17 @@ sealed abstract class ElasticRelation(val criteria: Criteria, val operator: Elas } -case class ElasticNested(override val criteria: Criteria, override val limit: Option[Limit]) - extends ElasticRelation(criteria, Nested) { +case class ElasticNested( + override val criteria: Criteria, + override val limit: Option[Limit], + fromCriteria: Boolean = true +) extends ElasticRelation(criteria, Nested) { + override def sql: String = + if (!fromCriteria) s"$operator($criteria)" + else s"$criteria" + + def nestedElement: Option[NestedElement] = None + override def update(request: SQLSearchRequest): ElasticNested = this.copy(criteria = criteria.update(request)) @@ -565,13 +760,18 @@ case class ElasticNested(override val criteria: Criteria, override val limit: Op lazy val innerHitsName: Option[String] = name(criteria) } -case class ElasticChild(override val criteria: Criteria) extends ElasticRelation(criteria, Child) { +case class ElasticChild( + override val criteria: Criteria +) extends ElasticRelation(criteria, Child) { + def nestedElement: Option[NestedElement] = None override def update(request: SQLSearchRequest): ElasticChild = this.copy(criteria = criteria.update(request)) } -case class ElasticParent(override val criteria: Criteria) - extends ElasticRelation(criteria, Parent) { +case class ElasticParent( + override val criteria: Criteria +) extends ElasticRelation(criteria, Parent) { + def nestedElement: Option[NestedElement] = None override def update(request: SQLSearchRequest): ElasticParent = this.copy(criteria = criteria.update(request)) } @@ -589,4 +789,8 @@ case class Where(criteria: Option[Criteria]) extends Updateable { case _ => Right(()) } + def nestedElements: Seq[NestedElement] = criteria match { + case Some(c) => c.nestedElements.distinctBy(_.path) + case _ => Nil + } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/time/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/time/package.scala index e51e3d9e..cbc2cad5 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/time/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/time/package.scala @@ -7,7 +7,7 @@ import scala.util.matching.Regex package object time { sealed trait TimeField extends PainlessScript with TokenRegex { - override def painless: String = s"ChronoField.$timeField" + override def painless(): String = s"ChronoField.$timeField" override def nullable: Boolean = false @@ -62,7 +62,7 @@ package object time { sealed trait IsoField extends TimeField { def isoField: String def timeField: String = isoField - override def painless: String = s"java.time.temporal.IsoFields.$isoField" + override def painless(): String = s"java.time.temporal.IsoFields.$isoField" } object IsoField { @@ -82,7 +82,7 @@ package object time { def timeUnit: String = sql.toUpperCase() + "S" - override def painless: String = s"ChronoUnit.$timeUnit" + override def painless(): String = s"ChronoUnit.$timeUnit" override def nullable: Boolean = false @@ -134,7 +134,7 @@ package object time { def unit: TimeUnit override def sql: String = s"$Interval $value ${unit.sql}" - override def painless: String = s"$value, ${unit.painless}" + override def painless(): String = s"$value, ${unit.painless()}" override def script: Option[String] = Some(TimeInterval.script(this)) diff --git a/sql/src/test/scala/app/softnetwork/elastic/sql/SQLParserSpec.scala b/sql/src/test/scala/app/softnetwork/elastic/sql/SQLParserSpec.scala index 919819f3..c74d48a4 100644 --- a/sql/src/test/scala/app/softnetwork/elastic/sql/SQLParserSpec.scala +++ b/sql/src/test/scala/app/softnetwork/elastic/sql/SQLParserSpec.scala @@ -12,18 +12,18 @@ object Queries { val numericalGt = "SELECT * FROM Table WHERE identifier > 1" val numericalGe = "SELECT * FROM Table WHERE identifier >= 1" val numericalNe = "SELECT * FROM Table WHERE identifier <> 1" - val literalEq = """SELECT * FROM Table WHERE identifier = 'un'""" - val literalLt = "SELECT * FROM Table WHERE createdAt < 'NOW-35M/M'" - val literalLe = "SELECT * FROM Table WHERE createdAt <= 'NOW-35M/M'" - val literalGt = "SELECT * FROM Table WHERE createdAt > 'NOW-35M/M'" - val literalGe = "SELECT * FROM Table WHERE createdAt >= 'NOW-35M/M'" - val literalNe = """SELECT * FROM Table WHERE identifier <> 'un'""" - val boolEq = """SELECT * FROM Table WHERE identifier = true""" - val boolNe = """SELECT * FROM Table WHERE identifier <> false""" - val literalLike = """SELECT * FROM Table WHERE identifier LIKE '%u_n%'""" - val literalRlike = """SELECT * FROM Table WHERE identifier RLIKE '.*u.n.*'""" - val literalNotLike = """SELECT * FROM Table WHERE identifier NOT LIKE '%un%'""" - val betweenExpression = """SELECT * FROM Table WHERE identifier BETWEEN '1' AND '2'""" + val literalEq = "SELECT * FROM Table WHERE identifier = 'un'" + val literalLt = "SELECT * FROM Table WHERE createdAt < 'now-35M/M'" + val literalLe = "SELECT * FROM Table WHERE createdAt <= 'now-35M/M'" + val literalGt = "SELECT * FROM Table WHERE createdAt > 'now-35M/M'" + val literalGe = "SELECT * FROM Table WHERE createdAt >= 'now-35M/M'" + val literalNe = "SELECT * FROM Table WHERE identifier <> 'un'" + val boolEq = "SELECT * FROM Table WHERE identifier = true" + val boolNe = "SELECT * FROM Table WHERE identifier <> false" + val literalLike = "SELECT * FROM Table WHERE identifier LIKE '%u_n%'" + val literalRlike = "SELECT * FROM Table WHERE identifier RLIKE '.*u.n.*'" + val literalNotLike = "SELECT * FROM Table WHERE identifier NOT LIKE '%un%'" + val betweenExpression = "SELECT * FROM Table WHERE identifier BETWEEN '1' AND '2'" val andPredicate = "SELECT * FROM Table WHERE identifier1 = 1 AND identifier2 > 2" val orPredicate = "SELECT * FROM Table WHERE identifier1 = 1 OR identifier2 > 2" val leftPredicate = @@ -33,9 +33,9 @@ object Queries { val predicates = "SELECT * FROM Table WHERE (identifier1 = 1 AND identifier2 > 2) OR (identifier3 = 3 AND identifier4 = 4)" val nestedPredicate = - "SELECT * FROM Table WHERE identifier1 = 1 AND nested(nested.identifier2 > 2 OR nested.identifier3 = 3)" + "SELECT * FROM Table JOIN UNNEST(Table.nested) AS nested WHERE identifier1 = 1 AND (nested.identifier2 > 2 OR nested.identifier3 = 3)" val nestedCriteria = - "SELECT * FROM Table WHERE identifier1 = 1 AND nested(nested.identifier3 = 3)" + "SELECT * FROM Table JOIN UNNEST(Table.nested) AS nested WHERE identifier1 = 1 AND nested.identifier3 = 3" val childPredicate = "SELECT * FROM Table WHERE identifier1 = 1 AND child(child.identifier2 > 2 OR child.identifier3 = 3)" val childCriteria = "SELECT * FROM Table WHERE identifier1 = 1 AND child(child.identifier3 = 3)" @@ -53,7 +53,7 @@ object Queries { val notInNumericalExpressionWithDoubleValues = "SELECT * FROM Table WHERE identifier NOT IN (1.0,2.1,3.4)" val nestedWithBetween = - "SELECT * FROM Table WHERE nested(ciblage.Archivage_CreationDate BETWEEN 'NOW-3M/M' AND 'NOW' AND ciblage.statutComportement = 1)" + "SELECT * FROM Table JOIN UNNEST(Table.ciblage) AS ciblage WHERE ciblage.Archivage_CreationDate BETWEEN 'now-3M/M' AND 'now' AND ciblage.statutComportement = 1" val COUNT = "SELECT COUNT(t.id) AS c1 FROM Table AS t WHERE t.nom = 'Nom'" val countDistinct = "SELECT COUNT(distinct t.id) AS c2 FROM Table AS t WHERE t.nom = 'Nom'" val countNested = @@ -96,7 +96,7 @@ object Queries { """SELECT COUNT(CustomerID) AS cnt, City, Country, MAX(createdAt) AS lastSeen |FROM Table |GROUP BY Country, City - |HAVING Country <> 'USA' AND City != 'Berlin' AND COUNT(CustomerID) > 1 AND lastSeen > NOW - INTERVAL 7 DAY + |HAVING Country <> 'USA' AND City != 'Berlin' AND COUNT(CustomerID) > 1 AND MAX(createdAt) > NOW - INTERVAL 7 DAY |ORDER BY Country ASC""".stripMargin .replaceAll("\n", " ") val dateParse = @@ -166,7 +166,7 @@ object Queries { "SELECT identifier, LENGTH(identifier2) AS len, LOWER(identifier2) AS low, UPPER(identifier2) AS upp, SUBSTRING(identifier2, 1, 3) AS sub, TRIM(identifier2) AS tr, LTRIM(identifier2) AS ltr, RTRIM(identifier2) AS rtr, CONCAT(identifier2, '_test', 1) AS con, LEFT(identifier2, 5) AS l, RIGHT(identifier2, 3) AS r, REPLACE(identifier2, 'el', 'le') AS rep, REVERSE(identifier2) AS rev, POSITION('soft', identifier2, 1) AS pos, REGEXP_LIKE(identifier2, 'soft', 'im') AS reg FROM Table WHERE LENGTH(TRIM(identifier2)) > 10" val topHits: String = - "SELECT department AS dept, firstName, CAST(hire_date AS DATE) AS hire_date, COUNT(DISTINCT salary) AS cnt, FIRST_VALUE(salary) OVER (PARTITION BY department ORDER BY hire_date ASC) AS first_salary, LAST_VALUE(salary) OVER (PARTITION BY department ORDER BY hire_date ASC) AS last_salary, ARRAY_AGG(name) OVER (PARTITION BY department ORDER BY hire_date ASC, salary DESC LIMIT 1000) AS employees FROM emp" + "SELECT department AS dept, firstName, CAST(hire_date AS DATE) AS hire_date, COUNT(DISTINCT salary) AS cnt, FIRST_VALUE(salary) OVER (PARTITION BY department ORDER BY hire_date ASC) AS first_salary, LAST_VALUE(salary) OVER (PARTITION BY department ORDER BY hire_date ASC) AS last_salary, ARRAY_AGG(name) OVER (PARTITION BY department ORDER BY hire_date ASC, salary DESC) AS employees FROM emp LIMIT 1000" val lastDay: String = "SELECT LAST_DAY(CAST(createdAt AS DATE)) AS ld, identifier FROM Table WHERE EXTRACT(DAY FROM LAST_DAY(CURRENT_TIMESTAMP)) > 28" @@ -179,6 +179,34 @@ object Queries { val betweenTemporal = "SELECT * FROM Table WHERE createdAt BETWEEN CURRENT_DATE - INTERVAL 1 MONTH AND CURRENT_DATE AND lastUpdated BETWEEN LAST_DAY('2025-09-11'::DATE) AND DATE_TRUNC(CURRENT_TIMESTAMP, DAY)" + + val nestedOfNested = + "SELECT matched_comments.author AS comment_authors, matched_comments.comments AS comments, matched_replies.reply_author, matched_replies.reply_text FROM blogs AS blogs JOIN UNNEST(blogs.comments) AS matched_comments JOIN UNNEST(matched_comments.replies) AS matched_replies WHERE MATCH (matched_comments.content) AGAINST ('Nice') AND matched_replies.lastUpdated < LAST_DAY('2025-09-10'::DATE) LIMIT 5" // GROUP BY 1 + + val predicateWithDistinctNested = + "SELECT matched_comments.author AS comment_authors, matched_comments.comments AS comments, matched_replies.reply_author, matched_replies.reply_text FROM blogs AS blogs JOIN UNNEST(blogs.comments) AS matched_comments JOIN UNNEST(blogs.replies) AS matched_replies WHERE MATCH (matched_comments.content) AGAINST ('Nice') AND NOT matched_replies.lastUpdated < LAST_DAY('2025-09-10'::DATE) LIMIT 5" // GROUP BY 1 + + val nestedWithoutCriteria = + "SELECT matched_comments.author AS comment_authors, matched_comments.comments AS comments, matched_replies.reply_author, matched_replies.reply_text FROM blogs AS blogs JOIN UNNEST(blogs.comments) AS matched_comments JOIN UNNEST(matched_comments.replies) AS matched_replies WHERE blogs.lastUpdated::DATE < CURRENT_DATE LIMIT 5" // GROUP BY 1 + + val determinationOfTheAggregationContext: String = + """SELECT AVG(blogs.popularity) AS avg_popularity, + |AVG(comments.likes) AS avg_comment_likes + |FROM blogs + |JOIN UNNEST(blogs.comments) AS comments""".stripMargin.replaceAll("\n", " ") + + val aggregationWithNestedOfNestedContext: String = + """SELECT AVG(replies.likes) AS avg_reply_likes + |FROM blogs + |JOIN UNNEST(blogs.comments) AS comments + |JOIN UNNEST(comments.replies) AS replies""".stripMargin.replaceAll("\n", " ") + + val whereFiltersAccordingToScope: String = + """SELECT COUNT(comments.id) AS nb_comments + |FROM blogs + |JOIN UNNEST(blogs.comments) AS comments + |WHERE blogs.status = 'active' + |AND comments.sentiment = 'positive'""".stripMargin.replaceAll("\n", " ") } /** Created by smanciot on 15/02/17. @@ -372,8 +400,7 @@ class SQLParserSpec extends AnyFlatSpec with Matchers { val result = Parser(nestedPredicate) result.toOption .flatMap(_.left.toOption.map(_.sql)) - .getOrElse("") - .equalsIgnoreCase(nestedPredicate) shouldBe true + .getOrElse("") shouldBe nestedPredicate } it should "parse nested criteria" in { @@ -845,4 +872,45 @@ class SQLParserSpec extends AnyFlatSpec with Matchers { .getOrElse("") shouldBe betweenTemporal } + it should "parse nested of nested" in { + val result = Parser(nestedOfNested) + result.toOption + .flatMap(_.left.toOption.map(_.sql)) + .getOrElse("") shouldBe nestedOfNested + } + + it should "parse predicate with distinct nested" in { + val result = Parser(predicateWithDistinctNested) + result.toOption + .flatMap(_.left.toOption.map(_.sql)) + .getOrElse("") shouldBe predicateWithDistinctNested + } + + it should "parse nested without criteria" in { + val result = Parser(nestedWithoutCriteria) + result.toOption + .flatMap(_.left.toOption.map(_.sql)) + .getOrElse("") shouldBe nestedWithoutCriteria + } + + it should "determine the aggregation context" in { + val result = Parser(determinationOfTheAggregationContext) + result.toOption + .flatMap(_.left.toOption.map(_.sql)) + .getOrElse("") shouldBe determinationOfTheAggregationContext + } + + it should "parse aggregation with nested of nested context" in { + val result = Parser(aggregationWithNestedOfNestedContext) + result.toOption + .flatMap(_.left.toOption.map(_.sql)) + .getOrElse("") shouldBe aggregationWithNestedOfNestedContext + } + + it should "parse where filters according to scope" in { + val result = Parser(whereFiltersAccordingToScope) + result.toOption + .flatMap(_.left.toOption.map(_.sql)) + .getOrElse("") shouldBe whereFiltersAccordingToScope + } }