diff --git a/README.md b/README.md new file mode 100644 index 00000000..00dcbc4b --- /dev/null +++ b/README.md @@ -0,0 +1,40 @@ +# ![SoftClient4ES Logo](https://raw.githubusercontent.com/SOFTNETWORK-APP/SoftClient4ES/main/logo.png) + + +![Build Status](https://github.com/SOFTNETWORK-APP/SoftClient4ES/workflows/Build/badge.svg) +[![codecov](https://codecov.io/gh/SOFTNETWORK-APP/SoftClient4ES/graph/badge.svg?token=XYCWBGVHAC)](https://codecov.io/gh/SOFTNETWORK-APP/SoftClient4ES) +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/1c13d6eb7d6c4a1495cd47e457c132dc)](https://app.codacy.com/gh/SOFTNETWORK-APP/elastic/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) +[![License](https://img.shields.io/github/license/SOFTNETWORK-APP/elastic)](https://github.com/SOFTNETWORK-APP/elastic/blob/main/LICENSE) + +**SoftClient4ES** is a modular and version-resilient interface built on top of Elasticsearch clients, providing a unified and stable API that simplifies migration across Elasticsearch versions, accelerates development, and offers advanced features for search, indexing, and data manipulation. + +## Key Features + +**Unified Elasticsearch API** +This project provides a trait-based interface (`ElasticClientApi`) that aggregates the core functionalities of Elasticsearch: indexing, searching, updating, deleting, mapping, aliases, refreshing, and more. This design abstracts the underlying client implementation and ensures compatibility across different Elasticsearch versions. + +- `JestClientApi`: For Elasticsearch 6 using the open-source [Jest client](https://github.com/searchbox-io/Jest). +- `RestHighLevelClientApi`: For Elasticsearch 6 and 7 using the official high-level REST client. +- `ElasticsearchClientApi`: For Elasticsearch 8 and 9 using the official Java client. + +By relying on these concrete implementations, developers can switch between versions with minimal changes to their business logic. + +**SQL to Elasticsearch Query Translation** +Elastic Client includes a parser capable of translating SQL `SELECT` queries into Elasticsearch queries. The parser produces an intermediate representation, which is then converted into [Elastic4s](https://github.com/sksamuel/elastic4s) DSL queries and ultimately into native Elasticsearch queries. This allows data engineers and analysts to express queries in familiar SQL syntax. + +**Dynamic Mapping Migration** +Elastic Client provides tools to analyze and compare existing mappings with new ones. If differences are detected, it can automatically perform safe migrations. This includes creating temporary indices, reindexing, and renaming — all while preserving data integrity. This eliminates the need for manual mapping migrations and reduces downtime. + +**High-Performance Bulk API with Akka Streams** +Bulk operations leverage the power of Akka Streams to efficiently process and index large volumes of data. This stream-based approach improves performance, resilience, and backpressure handling, especially for real-time or high-throughput indexing scenarios. + +**Akka Persistence Integration** +The project offers seamless integration with Akka Persistence. This enables Elasticsearch indices to be updated reactively based on persistent events, offering a robust pattern for event-sourced systems. + +## Roadmap + +Future enhancements include expanding the SQL parser to support additional operations such as `INSERT`, `UPDATE`, and `DELETE`. The long-term vision is to deliver a fully functional, open-source **JDBC connector for Elasticsearch**, empowering users to interact with their data using standard SQL tooling. + +## License + +This project is open source and licensed under the Apache License 2.0. diff --git a/build.sbt b/build.sbt index fb9186f1..9bebae57 100644 --- a/build.sbt +++ b/build.sbt @@ -19,7 +19,7 @@ ThisBuild / organization := "app.softnetwork" name := "softclient4es" -ThisBuild / version := "0.2.1" +ThisBuild / version := "0.3.0" ThisBuild / scalaVersion := scala213 diff --git a/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala b/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala index 96f5a474..374cf575 100644 --- a/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala +++ b/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala @@ -191,14 +191,14 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M case other => fail(other.toString) } - pClient.search[Person]("select * from person_mapping where match(name, 'gum')") match { + pClient.search[Person]("select * from person_mapping where match (name) against ('gum')") match { case r if r.size == 1 => r.map(_.uuid) should contain only "A16" case other => fail(other.toString) } pClient.search[Person]( - "select * from person_mapping where uuid <> 'A16' and match(name, 'gum')" + "select * from person_mapping where uuid <> 'A16' and match (name) against ('gum')" ) match { case r if r.isEmpty => case other => fail(other.toString) @@ -239,7 +239,7 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person_migration" should haveCount(3) - pClient.search[Person]("select * from person_migration where match(name, 'gum')") match { + pClient.search[Person]("select * from person_migration where match (name) against ('gum')") match { case r if r.isEmpty => case other => fail(other.toString) } @@ -288,7 +288,7 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M pClient.shouldUpdateMapping("person_migration", newMapping) shouldBe true pClient.updateMapping("person_migration", newMapping) shouldBe true - pClient.search[Person]("select * from person_migration where match(name, 'gum')") match { + pClient.search[Person]("select * from person_migration where match (name) against ('gum')") match { case r if r.size == 1 => r.map(_.uuid) should contain only "A16" case other => fail(other.toString) diff --git a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 6b7b578a..c7e7f5c3 100644 --- a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -7,21 +7,28 @@ import app.softnetwork.elastic.sql.{ ElasticBoolQuery, Max, Min, - SQLAggregate, + SQLBucket, + SQLCriteria, + SQLField, Sum } import com.sksamuel.elastic4s.ElasticApi.{ avgAgg, cardinalityAgg, filterAgg, - matchAllQuery, maxAgg, minAgg, nestedAggregation, sumAgg, + termsAgg, valueCountAgg } -import com.sksamuel.elastic4s.searches.aggs.Aggregation +import com.sksamuel.elastic4s.searches.aggs.{ + Aggregation, + FilterAggregation, + NestedAggregation, + TermsAggregation +} import scala.language.implicitConversions @@ -32,78 +39,92 @@ case class ElasticAggregation( sources: Seq[String] = Seq.empty, query: Option[String] = None, distinct: Boolean = false, - nested: Boolean = false, - filtered: Boolean = false, + nestedAgg: Option[NestedAggregation] = None, + filteredAgg: Option[FilterAggregation] = None, aggType: AggregateFunction, agg: Aggregation -) +) { + val nested: Boolean = nestedAgg.nonEmpty + val filtered: Boolean = filteredAgg.nonEmpty +} object ElasticAggregation { - def apply(sqlAgg: SQLAggregate): ElasticAggregation = { + def apply(sqlAgg: SQLField, filter: Option[SQLCriteria]): ElasticAggregation = { import sqlAgg._ - val sourceField = identifier.columnName + val sourceField = identifier.name - val field = alias match { + val field = fieldAlias match { case Some(alias) => alias.alias case _ => sourceField } - val distinct = identifier.distinct.isDefined + val distinct = identifier.distinct - val agg = - if (distinct) - s"${function}_distinct_${sourceField.replace(".", "_")}" + val aggType = aggregateFunction.getOrElse( + throw new IllegalArgumentException("Aggregation function is required") + ) + + val aggName = { + if (fieldAlias.isDefined) + field + else if (distinct) + s"${aggType}_distinct_${sourceField.replace(".", "_")}" else - s"${function}_${sourceField.replace(".", "_")}" + s"${aggType}_${sourceField.replace(".", "_")}" + } var aggPath = Seq[String]() val _agg = - function match { + aggType match { case Count => if (distinct) - cardinalityAgg(agg, sourceField) + cardinalityAgg(aggName, sourceField) else { - valueCountAgg(agg, sourceField) + valueCountAgg(aggName, sourceField) } - case Min => minAgg(agg, sourceField) - case Max => maxAgg(agg, sourceField) - case Avg => avgAgg(agg, sourceField) - case Sum => sumAgg(agg, sourceField) + case Min => minAgg(aggName, sourceField) + case Max => maxAgg(aggName, sourceField) + case Avg => avgAgg(aggName, sourceField) + case Sum => sumAgg(aggName, sourceField) } - def _filtered: Aggregation = filter match { - case Some(f) => - val boolQuery = Option(ElasticBoolQuery(group = true)) - val filteredAgg = s"filtered_agg" - aggPath ++= Seq(filteredAgg) - filterAgg( - filteredAgg, - f.criteria - .map( - _.asFilter(boolQuery) + val filteredAggName = "filtered_agg" + + val filteredAgg: Option[FilterAggregation] = + filter match { + case Some(f) => + val boolQuery = Option(ElasticBoolQuery(group = true)) + Some( + filterAgg( + filteredAggName, + f.asFilter(boolQuery) .query(Set(identifier.innerHitsName).flatten, boolQuery) ) - .getOrElse(matchAllQuery()) - ) subaggs { - aggPath ++= Seq(agg) - _agg - } - case _ => - aggPath ++= Seq(agg) - _agg - } + ) + case _ => + None + } + + def filtered(): Unit = + filteredAgg match { + case Some(_) => + aggPath ++= Seq(filteredAggName) + aggPath ++= Seq(aggName) + case _ => + aggPath ++= Seq(aggName) + } - val aggregation = + val nestedAgg = if (identifier.nested) { val path = sourceField.split("\\.").head - val nestedAgg = s"nested_$agg" + val nestedAgg = s"nested_${identifier.nestedType.getOrElse(aggName)}" aggPath ++= Seq(nestedAgg) - nestedAggregation(nestedAgg, path) subaggs { - _filtered - } + filtered() + Some(nestedAggregation(nestedAgg, path)) } else { - _filtered + filtered() + None } ElasticAggregation( @@ -111,10 +132,48 @@ object ElasticAggregation { field, sourceField, distinct = distinct, - nested = identifier.nested, - filtered = filter.nonEmpty, - aggType = function, - agg = aggregation + nestedAgg = nestedAgg, + filteredAgg = filteredAgg, + aggType = aggType, + agg = _agg ) } + + /* + def apply( + buckets: Seq[SQLBucket], + aggregations: Seq[Aggregation], + current: Option[TermsAggregation] + ): Option[TermsAggregation] = { + buckets match { + case Nil => + current.map(_.copy(subaggs = aggregations)) + case bucket +: tail => + val agg = termsAgg(bucket.name, s"${bucket.identifier.name}.keyword") + current match { + case Some(a) => + apply(tail, aggregations, Some(agg)) match { + case Some(subAgg) => + Some(a.copy(subaggs = a.subaggs :+ subAgg)) + case _ => Some(a) + } + case None => + apply(tail, aggregations, Some(agg)) + } + } + } + */ + + def buildBuckets( + buckets: Seq[SQLBucket], + aggregations: Seq[Aggregation] + ): Option[TermsAggregation] = { + buckets.reverse.foldLeft(Option.empty[TermsAggregation]) { (current, bucket) => + val agg = termsAgg(bucket.name, s"${bucket.identifier.name}.keyword") + current match { + case Some(subAgg) => Some(agg.copy(subaggs = Seq(subAgg))) + case None => Some(agg.copy(subaggs = aggregations)) + } + } + } } diff --git a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala index 83310eaa..35950d9c 100644 --- a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala +++ b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala @@ -1,6 +1,6 @@ package app.softnetwork.elastic.sql.bridge -import app.softnetwork.elastic.sql.{SQLCriteria, SQLExcept, SQLField} +import app.softnetwork.elastic.sql.{SQLBucket, SQLCriteria, SQLExcept, SQLField} import com.sksamuel.elastic4s.searches.SearchRequest import com.sksamuel.elastic4s.http.search.SearchBodyBuilderFn @@ -11,6 +11,7 @@ case class ElasticSearchRequest( criteria: Option[SQLCriteria], limit: Option[Int], search: SearchRequest, + buckets: Seq[SQLBucket] = Seq.empty, aggregations: Seq[ElasticAggregation] = Seq.empty ) { def minScore(score: Option[Double]): ElasticSearchRequest = { diff --git a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index 98ad716f..122cf9c0 100644 --- a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -4,6 +4,7 @@ import com.sksamuel.elastic4s.ElasticApi import com.sksamuel.elastic4s.ElasticApi._ import com.sksamuel.elastic4s.http.ElasticDsl.BuildableTermsNoOp import com.sksamuel.elastic4s.http.search.SearchBodyBuilderFn +import com.sksamuel.elastic4s.searches.aggs.Aggregation import com.sksamuel.elastic4s.searches.queries.Query import com.sksamuel.elastic4s.searches.{MultiSearchRequest, SearchRequest} import com.sksamuel.elastic4s.searches.sort.FieldSort @@ -19,24 +20,60 @@ package object bridge { request.where.flatMap(_.criteria), request.limit.map(_.limit), request, - request.aggregates.map(ElasticAggregation(_)) + request.buckets, + request.aggregates.map(ElasticAggregation(_, request.having.flatMap(_.criteria))) ).minScore(request.score) implicit def requestToSearchRequest(request: SQLSearchRequest): SearchRequest = { import request._ - val aggregations = aggregates.map(ElasticAggregation(_)) + val notNestedBuckets = buckets.filterNot(_.identifier.nested) + val nestedBuckets = buckets.filter(_.identifier.nested).groupBy(_.nestedBucket.getOrElse("")) + val aggregations = aggregates.map(ElasticAggregation(_, request.having.flatMap(_.criteria))) + val notNestedAggregations = aggregations.filterNot(_.nested) + val nestedAggregations = + aggregations.filter(_.nested).groupBy(_.nestedAgg.map(_.name).getOrElse("")) var _search: SearchRequest = search("") query { where.flatMap(_.criteria.map(_.asQuery())).getOrElse(matchAllQuery()) - } sourceInclude fields + } sourceFiltering (fields, excludes) - _search = excludes match { - case Nil => _search - case excludes => _search sourceExclude excludes + _search = if (nestedAggregations.nonEmpty) { + _search aggregations { + nestedAggregations.map { case (nested, aggs) => + val first = aggs.head + val aggregations = aggs.map(_.agg) + val buckets = ElasticAggregation.buildBuckets( + nestedBuckets.getOrElse(nested, Seq.empty), + aggregations + ) match { + case Some(b) => Seq(b) + case _ => aggregations + } + val filtered: Option[Aggregation] = + first.filteredAgg.map(filtered => filtered.subAggregations(buckets)) + first.nestedAgg.get.subAggregations(filtered.map(Seq(_)).getOrElse(buckets)) + } + } + } else { + _search } - _search = aggregations match { + _search = notNestedAggregations match { case Nil => _search - case _ => _search aggregations { aggregations.map(_.agg) } + case _ => + _search aggregations { + val first = notNestedAggregations.head + val aggregations = notNestedAggregations.map(_.agg) + val buckets = ElasticAggregation.buildBuckets( + notNestedBuckets, + aggregations + ) match { + case Some(b) => Seq(b) + case _ => aggregations + } + val filtered: Option[Aggregation] = + first.filteredAgg.map(filtered => filtered.subAggregations(buckets)) + filtered.map(Seq(_)).getOrElse(buckets) + } } _search = orderBy match { @@ -50,7 +87,7 @@ package object bridge { case _ => _search } - if (aggregations.nonEmpty && fields.isEmpty) { + if (aggregations.nonEmpty || buckets.nonEmpty) { _search size 0 } else { limit match { @@ -76,44 +113,44 @@ package object bridge { case _: Ge.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) lt n.sql + rangeQuery(identifier.name) lt n.sql case _ => - rangeQuery(identifier.columnName) gte n.sql + rangeQuery(identifier.name) gte n.sql } case _: Gt.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) lte n.sql + rangeQuery(identifier.name) lte n.sql case _ => - rangeQuery(identifier.columnName) gt n.sql + rangeQuery(identifier.name) gt n.sql } case _: Le.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) gt n.sql + rangeQuery(identifier.name) gt n.sql case _ => - rangeQuery(identifier.columnName) lte n.sql + rangeQuery(identifier.name) lte n.sql } case _: Lt.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) gte n.sql + rangeQuery(identifier.name) gte n.sql case _ => - rangeQuery(identifier.columnName) lt n.sql + rangeQuery(identifier.name) lt n.sql } case _: Eq.type => maybeNot match { case Some(_) => - not(termQuery(identifier.columnName, n.sql)) + not(termQuery(identifier.name, n.sql)) case _ => - termQuery(identifier.columnName, n.sql) + termQuery(identifier.name, n.sql) } case _: Ne.type => maybeNot match { case Some(_) => - termQuery(identifier.columnName, n.sql) + termQuery(identifier.name, n.sql) case _ => - not(termQuery(identifier.columnName, n.sql)) + not(termQuery(identifier.name, n.sql)) } case _ => matchAllQuery() } @@ -122,51 +159,51 @@ package object bridge { case _: Like.type => maybeNot match { case Some(_) => - not(regexQuery(identifier.columnName, toRegex(l.value))) + not(regexQuery(identifier.name, toRegex(l.value))) case _ => - regexQuery(identifier.columnName, toRegex(l.value)) + regexQuery(identifier.name, toRegex(l.value)) } case _: Ge.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) lt l.value + rangeQuery(identifier.name) lt l.value case _ => - rangeQuery(identifier.columnName) gte l.value + rangeQuery(identifier.name) gte l.value } case _: Gt.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) lte l.value + rangeQuery(identifier.name) lte l.value case _ => - rangeQuery(identifier.columnName) gt l.value + rangeQuery(identifier.name) gt l.value } case _: Le.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) gt l.value + rangeQuery(identifier.name) gt l.value case _ => - rangeQuery(identifier.columnName) lte l.value + rangeQuery(identifier.name) lte l.value } case _: Lt.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) gte l.value + rangeQuery(identifier.name) gte l.value case _ => - rangeQuery(identifier.columnName) lt l.value + rangeQuery(identifier.name) lt l.value } case _: Eq.type => maybeNot match { case Some(_) => - not(termQuery(identifier.columnName, l.value)) + not(termQuery(identifier.name, l.value)) case _ => - termQuery(identifier.columnName, l.value) + termQuery(identifier.name, l.value) } case _: Ne.type => maybeNot match { case Some(_) => - termQuery(identifier.columnName, l.value) + termQuery(identifier.name, l.value) case _ => - not(termQuery(identifier.columnName, l.value)) + not(termQuery(identifier.name, l.value)) } case _ => matchAllQuery() } @@ -175,16 +212,16 @@ package object bridge { case _: Eq.type => maybeNot match { case Some(_) => - not(termQuery(identifier.columnName, b.value)) + not(termQuery(identifier.name, b.value)) case _ => - termQuery(identifier.columnName, b.value) + termQuery(identifier.name, b.value) } case _: Ne.type => maybeNot match { case Some(_) => - termQuery(identifier.columnName, b.value) + termQuery(identifier.name, b.value) case _ => - not(termQuery(identifier.columnName, b.value)) + not(termQuery(identifier.name, b.value)) } case _ => matchAllQuery() } @@ -196,14 +233,14 @@ package object bridge { isNull: SQLIsNull ): Query = { import isNull._ - not(existsQuery(identifier.columnName)) + not(existsQuery(identifier.name)) } implicit def isNotNullToQuery( isNotNull: SQLIsNotNull ): Query = { import isNotNull._ - existsQuery(identifier.columnName) + existsQuery(identifier.name) } implicit def inToQuery[R, T <: SQLValue[R]](in: SQLIn[R, T]): Query = { @@ -212,12 +249,12 @@ package object bridge { val t = _values.headOption match { case Some(_: Double) => - termsQuery(identifier.columnName, _values.asInstanceOf[Seq[Double]]) + termsQuery(identifier.name, _values.asInstanceOf[Seq[Double]]) case Some(_: Integer) => - termsQuery(identifier.columnName, _values.asInstanceOf[Seq[Integer]]) + termsQuery(identifier.name, _values.asInstanceOf[Seq[Integer]]) case Some(_: Long) => - termsQuery(identifier.columnName, _values.asInstanceOf[Seq[Long]]) - case _ => termsQuery(identifier.columnName, _values.map(_.toString)) + termsQuery(identifier.name, _values.asInstanceOf[Seq[Long]]) + case _ => termsQuery(identifier.name, _values.map(_.toString)) } maybeNot match { case Some(_) => not(t) @@ -229,7 +266,7 @@ package object bridge { between: SQLBetween ): Query = { import between._ - val r = rangeQuery(identifier.columnName) gte from.value lte to.value + val r = rangeQuery(identifier.name) gte from.value lte to.value maybeNot match { case Some(_) => not(r) case _ => r @@ -240,14 +277,14 @@ package object bridge { geoDistance: ElasticGeoDistance ): Query = { import geoDistance._ - geoDistanceQuery(identifier.columnName, lat.value, lon.value) distance distance.value + geoDistanceQuery(identifier.name, lat.value, lon.value) distance distance.value } implicit def matchToQuery( matchExpression: ElasticMatch ): Query = { import matchExpression._ - matchQuery(identifier.columnName, value.value) + matchQuery(identifier.name, value.value) } implicit def criteriaToElasticCriteria( @@ -272,7 +309,7 @@ package object bridge { .map { case Left(l) => l.aggregates - .map(ElasticAggregation(_)) + .map(ElasticAggregation(_, l.having.flatMap(_.criteria))) .map(aggregation => { val queryFiltered = l.where @@ -295,7 +332,15 @@ package object bridge { queryFiltered } aggregations { - aggregation.agg + val filtered = + aggregation.filteredAgg match { + case Some(filtered) => filtered.subAggregations(aggregation.agg) + case _ => aggregation.agg + } + aggregation.nestedAgg match { + case Some(nested) => nested.subAggregations(filtered) + case _ => filtered + } } size 0 ) diff --git a/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala b/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala index 9f938334..93220c1f 100644 --- a/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala +++ b/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala @@ -147,7 +147,7 @@ class SQLCriteriaSpec extends AnyFlatSpec with Matchers { |"query":{ | "bool":{"filter":[{"regexp" : { | "identifier" : { - | "value" : ".*?un.*?" + | "value" : ".*un.*" | } | } | } @@ -161,7 +161,7 @@ class SQLCriteriaSpec extends AnyFlatSpec with Matchers { | "filter":[{"bool":{"must_not": [{ | "regexp": { | "identifier": { - | "value": ".*?un.*?" + | "value": ".*un.*" | } | } | }] @@ -782,10 +782,24 @@ class SQLCriteriaSpec extends AnyFlatSpec with Matchers { """{ | "query":{ | "bool":{ - | "filter":[ + | "should":[ | { | "match":{ - | "identifier":{ + | "identifier1":{ + | "query":"value" + | } + | } + | }, + | { + | "match":{ + | "identifier2":{ + | "query":"value" + | } + | } + | }, + | { + | "match":{ + | "identifier3":{ | "query":"value" | } | } diff --git a/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 37110577..d8f7467b 100644 --- a/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -32,7 +32,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe false result.distinct shouldBe false - result.aggName shouldBe "count_id" + result.aggName shouldBe "c2" result.field shouldBe "c2" result.sources shouldBe Seq[String]("Table") result.query.getOrElse("") shouldBe @@ -52,7 +52,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "count_id": { + | "c2": { | "value_count": { | "field": "id" | } @@ -68,7 +68,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe false result.distinct shouldBe true - result.aggName shouldBe "count_distinct_id" + result.aggName shouldBe "c2" result.field shouldBe "c2" result.sources shouldBe Seq[String]("Table") result.query.getOrElse("") shouldBe @@ -88,7 +88,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "count_distinct_id": { + | "c2": { | "cardinality": { | "field": "id" | } @@ -106,7 +106,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_count_emails_value.count_emails_value" + result.aggName shouldBe "nested_emails.email" result.field shouldBe "email" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe @@ -126,12 +126,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_count_emails_value": { + | "nested_emails": { | "nested": { | "path": "emails" | }, | "aggs": { - | "count_emails_value": { + | "email": { | "value_count": { | "field": "emails.value" | } @@ -151,7 +151,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_count_emails_value.count_emails_value" + result.aggName shouldBe "nested_emails.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe @@ -185,12 +185,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_count_emails_value": { + | "nested_emails": { | "nested": { | "path": "emails" | }, | "aggs": { - | "count_emails_value": { + | "count_emails": { | "value_count": { | "field": "emails.value" | } @@ -204,13 +204,13 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { it should "perform nested count with filter" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(inner_emails.value) as count_emails filter[inner_emails.context = \"profile\"] from index, unnest(emails) as inner_emails, unnest(profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\"))" + "select count(inner_emails.value) as count_emails from index, unnest(emails) as inner_emails, unnest(profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\")) having inner_emails.context = \"profile\"" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_count_emails_value.filtered_agg.count_emails_value" + result.aggName shouldBe "nested_emails.filtered_agg.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe @@ -244,7 +244,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_count_emails_value": { + | "nested_emails": { | "nested": { | "path": "emails" | }, @@ -258,7 +258,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "aggs": { - | "count_emails_value": { + | "count_emails": { | "value_count": { | "field": "emails.value" | } @@ -280,7 +280,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe true result.distinct shouldBe true - result.aggName shouldBe "nested_count_distinct_emails_value.count_distinct_emails_value" + result.aggName shouldBe "nested_emails.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe @@ -330,12 +330,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_count_distinct_emails_value": { + | "nested_emails": { | "nested": { | "path": "emails" | }, | "aggs": { - | "count_distinct_emails_value": { + | "count_emails": { | "cardinality": { | "field": "emails.value" | } @@ -356,7 +356,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe true result.distinct shouldBe true - result.aggName shouldBe "nested_count_distinct_emails_value.count_distinct_emails_value" + result.aggName shouldBe "nested_emails.count_distinct_emails" result.field shouldBe "count_distinct_emails" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe @@ -399,12 +399,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_count_distinct_emails_value": { + | "nested_emails": { | "nested": { | "path": "emails" | }, | "aggs": { - | "count_distinct_emails_value": { + | "count_distinct_emails": { | "cardinality": { | "field": "emails.value" | } @@ -510,6 +510,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "match_all":{} | }, | "_source":{ + | "includes":["*"], | "excludes":["col1","col2"] | } |}""".stripMargin.replaceAll("\\s+", "") @@ -519,9 +520,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val select: ElasticSearchRequest = SQLQuery( s"""SELECT - | inner_products.name, - | inner_products.category, - | inner_products.price, + | inner_products.category as category, + | inner_products.name as productName, | min(inner_products.price) as min_price, | max(inner_products.price) as max_price |FROM @@ -539,20 +539,16 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | ( | distance(pickup.location,(0.0,0.0)) <= "7000m" OR | distance(withdrawals.location,(0.0,0.0)) <= "7000m" - | ) AND - | ( - | inner_products.deleted=false AND - | inner_products.upForSale=true AND - | inner_products.stock > 0 - | ) - | ) AND - | ( - | match(products.name, "lasagnes") AND - | ( - | match(products.description, "lasagnes") OR - | match(products.ingredients, "lasagnes") | ) | ) + |GROUP BY + | inner_products.category, + | inner_products.name + |HAVING inner_products.deleted=false AND + | inner_products.upForSale=true AND + | inner_products.stock > 0 AND + | match (inner_products.name) against ("lasagnes") AND + | match (inner_products.description, inner_products.ingredients) against ("lasagnes") |ORDER BY preparationTime ASC, nbOrders DESC |LIMIT 100""".stripMargin ).minScore(1.0) @@ -563,35 +559,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { |{ | "query": { | "bool": { - | "must": [ - | { - | "match": { - | "products.name": { - | "query": "lasagnes" - | } - | } - | }, - | { - | "bool": { - | "should": [ - | { - | "match": { - | "products.description": { - | "query": "lasagnes" - | } - | } - | }, - | { - | "match": { - | "products.ingredients": { - | "query": "lasagnes" - | } - | } - | } - | ] - | } - | } - | ], | "filter": [ | { | "bool": { @@ -631,7 +598,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | { | "regexp": { | "blockedCustomers": { - | "value": ".*?uuid.*?" + | "value": ".*uuid.*" | } | } | } @@ -674,43 +641,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | ] | } - | }, - | { - | "nested": { - | "path": "products", - | "query": { - | "bool": { - | "filter": [ - | { - | "term": { - | "products.deleted": { - | "value": false - | } - | } - | }, - | { - | "term": { - | "products.upForSale": { - | "value": true - | } - | } - | }, - | { - | "range": { - | "products.stock": { - | "gt": "0" - | } - | } - | } - | ] - | } - | }, - | "inner_hits": { - | "name": "inner_products", - | "from": 0, - | "size": 10 - | } - | } | } | ] | } @@ -718,8 +648,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | ] | } | }, - | "from": 0, - | "size": 100, + | "size": 0, | "min_score": 1.0, | "sort": [ | { @@ -733,34 +662,93 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | } | ], - | "_source": { - | "includes": [ - | "inner_products.name", - | "inner_products.category", - | "inner_products.price" - | ] - | }, + | "_source": true, | "aggs": { - | "nested_min_products_price": { - | "nested": { - | "path": "products" - | }, - | "aggs": { - | "min_products_price": { - | "min": { - | "field": "products.price" - | } - | } - | } - | }, - | "nested_max_products_price": { + | "nested_products": { | "nested": { | "path": "products" | }, | "aggs": { - | "max_products_price": { - | "max": { - | "field": "products.price" + | "filtered_agg": { + | "filter": { + | "bool": { + | "filter": [ + | { + | "term": { + | "products.deleted": { + | "value": false + | } + | } + | }, + | { + | "term": { + | "products.upForSale": { + | "value": true + | } + | } + | }, + | { + | "range": { + | "products.stock": { + | "gt": "0" + | } + | } + | }, + | { + | "match": { + | "products.name": { + | "query": "lasagnes" + | } + | } + | }, + | { + | "bool": { + | "should": [ + | { + | "match": { + | "products.description": { + | "query": "lasagnes" + | } + | } + | }, + | { + | "match": { + | "products.ingredients": { + | "query": "lasagnes" + | } + | } + | } + | ] + | } + | } + | ] + | } + | }, + | "aggs": { + | "category": { + | "terms": { + | "field": "products.category.keyword" + | }, + | "aggs": { + | "productName": { + | "terms": { + | "field": "products.name.keyword" + | }, + | "aggs": { + | "min_price": { + | "min": { + | "field": "products.price" + | } + | }, + | "max_price": { + | "max": { + | "field": "products.price" + | } + | } + | } + | } + | } + | } | } | } | } diff --git a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 27f05588..5d40d1db 100644 --- a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -7,103 +7,123 @@ import app.softnetwork.elastic.sql.{ ElasticBoolQuery, Max, Min, - SQLAggregate, + SQLBucket, + SQLCriteria, + SQLField, Sum } import com.sksamuel.elastic4s.ElasticApi.{ avgAgg, cardinalityAgg, filterAgg, - matchAllQuery, maxAgg, minAgg, nestedAggregation, sumAgg, + termsAgg, valueCountAgg } -import com.sksamuel.elastic4s.requests.searches.aggs.Aggregation +import com.sksamuel.elastic4s.requests.searches.aggs.{ + Aggregation, + FilterAggregation, + NestedAggregation, + TermsAggregation +} import scala.language.implicitConversions case class ElasticAggregation( - aggName: String, - field: String, - sourceField: String, - sources: Seq[String] = Seq.empty, - query: Option[String] = None, - distinct: Boolean = false, - nested: Boolean = false, - filtered: Boolean = false, - aggType: AggregateFunction, - agg: Aggregation -) + aggName: String, + field: String, + sourceField: String, + sources: Seq[String] = Seq.empty, + query: Option[String] = None, + distinct: Boolean = false, + nestedAgg: Option[NestedAggregation] = None, + filteredAgg: Option[FilterAggregation] = None, + aggType: AggregateFunction, + agg: Aggregation) { + val nested: Boolean = nestedAgg.nonEmpty + val filtered: Boolean = filteredAgg.nonEmpty +} object ElasticAggregation { - def apply(sqlAgg: SQLAggregate): ElasticAggregation = { + def apply(sqlAgg: SQLField, filter: Option[SQLCriteria]): ElasticAggregation = { import sqlAgg._ - val sourceField = identifier.columnName + val sourceField = identifier.name - val field = alias match { + val field = fieldAlias match { case Some(alias) => alias.alias case _ => sourceField } - val distinct = identifier.distinct.isDefined + val distinct = identifier.distinct - val agg = - if (distinct) - s"${function}_distinct_${sourceField.replace(".", "_")}" + val aggType = aggregateFunction.getOrElse( + throw new IllegalArgumentException("Aggregation function is required") + ) + + val aggName = { + if (fieldAlias.isDefined) + field + else if (distinct) + s"${aggType}_distinct_${sourceField.replace(".", "_")}" else - s"${function}_${sourceField.replace(".", "_")}" + s"${aggType}_${sourceField.replace(".", "_")}" + } var aggPath = Seq[String]() val _agg = - function match { + aggType match { case Count => if (distinct) - cardinalityAgg(agg, sourceField) + cardinalityAgg(aggName, sourceField) else { - valueCountAgg(agg, sourceField) + valueCountAgg(aggName, sourceField) } - case Min => minAgg(agg, sourceField) - case Max => maxAgg(agg, sourceField) - case Avg => avgAgg(agg, sourceField) - case Sum => sumAgg(agg, sourceField) + case Min => minAgg(aggName, sourceField) + case Max => maxAgg(aggName, sourceField) + case Avg => avgAgg(aggName, sourceField) + case Sum => sumAgg(aggName, sourceField) } - def _filtered: Aggregation = filter match { - case Some(f) => - val boolQuery = Option(ElasticBoolQuery(group = true)) - val filteredAgg = s"filtered_agg" - aggPath ++= Seq(filteredAgg) - filterAgg( - filteredAgg, - f.criteria - .map( - _.asFilter(boolQuery) + val filteredAggName = "filtered_agg" + + val filteredAgg: Option[FilterAggregation] = + filter match { + case Some(f) => + val boolQuery = Option(ElasticBoolQuery(group = true)) + Some( + filterAgg( + filteredAggName, + f.asFilter(boolQuery) .query(Set(identifier.innerHitsName).flatten, boolQuery) ) - .getOrElse(matchAllQuery()) - ) subaggs { - aggPath ++= Seq(agg) - _agg - } - case _ => - aggPath ++= Seq(agg) - _agg - } + ) + case _ => + None + } + + def filtered(): Unit = + filteredAgg match { + case Some(_) => + aggPath ++= Seq(filteredAggName) + aggPath ++= Seq(aggName) + case _ => + aggPath ++= Seq(aggName) + } - val aggregation = + val nestedAgg = if (identifier.nested) { val path = sourceField.split("\\.").head - val nestedAgg = s"nested_$agg" + val nestedAgg = s"nested_${identifier.nestedType.getOrElse(aggName)}" aggPath ++= Seq(nestedAgg) - nestedAggregation(nestedAgg, path) subaggs { - _filtered - } + filtered() + Some(nestedAggregation(nestedAgg, path)) } else { - _filtered + filtered() + None } ElasticAggregation( @@ -111,10 +131,48 @@ object ElasticAggregation { field, sourceField, distinct = distinct, - nested = identifier.nested, - filtered = filter.nonEmpty, - aggType = function, - agg = aggregation + nestedAgg = nestedAgg, + filteredAgg = filteredAgg, + aggType = aggType, + agg = _agg ) } + + /* + def apply( + buckets: Seq[SQLBucket], + aggregations: Seq[Aggregation], + current: Option[TermsAggregation] + ): Option[TermsAggregation] = { + buckets match { + case Nil => + current.map(_.copy(subaggs = aggregations)) + case bucket +: tail => + val agg = termsAgg(bucket.name, s"${bucket.identifier.name}.keyword") + current match { + case Some(a) => + apply(tail, aggregations, Some(agg)) match { + case Some(subAgg) => + Some(a.copy(subaggs = a.subaggs :+ subAgg)) + case _ => Some(a) + } + case None => + apply(tail, aggregations, Some(agg)) + } + } + } + */ + + def buildBuckets( + buckets: Seq[SQLBucket], + aggregations: Seq[Aggregation] + ): Option[TermsAggregation] = { + buckets.reverse.foldLeft(Option.empty[TermsAggregation]) { (current, bucket) => + val agg = termsAgg(bucket.name, s"${bucket.identifier.name}.keyword") + current match { + case Some(subAgg) => Some(agg.copy(subaggs = Seq(subAgg))) + case None => Some(agg.copy(subaggs = aggregations)) + } + } + } } diff --git a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala index 1d0530ff..bfd1bc7f 100644 --- a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala +++ b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticSearchRequest.scala @@ -1,6 +1,6 @@ package app.softnetwork.elastic.sql.bridge -import app.softnetwork.elastic.sql.{SQLCriteria, SQLExcept, SQLField} +import app.softnetwork.elastic.sql.{SQLBucket, SQLCriteria, SQLExcept, SQLField} import com.sksamuel.elastic4s.requests.searches.{SearchBodyBuilderFn, SearchRequest} case class ElasticSearchRequest( @@ -10,6 +10,7 @@ case class ElasticSearchRequest( criteria: Option[SQLCriteria], limit: Option[Int], search: SearchRequest, + buckets: Seq[SQLBucket] = Seq.empty, aggregations: Seq[ElasticAggregation] = Seq.empty ) { def minScore(score: Option[Double]): ElasticSearchRequest = { diff --git a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index 8a8456f1..1ae79170 100644 --- a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -2,6 +2,7 @@ package app.softnetwork.elastic.sql import com.sksamuel.elastic4s.ElasticApi import com.sksamuel.elastic4s.ElasticApi._ +import com.sksamuel.elastic4s.requests.searches.aggs.Aggregation import com.sksamuel.elastic4s.requests.searches.queries.Query import com.sksamuel.elastic4s.requests.searches.sort.FieldSort import com.sksamuel.elastic4s.requests.searches.{ @@ -21,24 +22,59 @@ package object bridge { request.where.flatMap(_.criteria), request.limit.map(_.limit), request, - request.aggregates.map(ElasticAggregation(_)) + request.buckets, + request.aggregates.map(ElasticAggregation(_, request.having.flatMap(_.criteria))) ).minScore(request.score) implicit def requestToSearchRequest(request: SQLSearchRequest): SearchRequest = { import request._ - val aggregations = aggregates.map(ElasticAggregation(_)) + val notNestedBuckets = buckets.filterNot(_.identifier.nested) + val nestedBuckets = buckets.filter(_.identifier.nested).groupBy(_.nestedBucket.getOrElse("")) + val aggregations = aggregates.map(ElasticAggregation(_, request.having.flatMap(_.criteria))) + val notNestedAggregations = aggregations.filterNot(_.nested) + val nestedAggregations = + aggregations.filter(_.nested).groupBy(_.nestedAgg.map(_.name).getOrElse("")) var _search: SearchRequest = search("") query { where.flatMap(_.criteria.map(_.asQuery())).getOrElse(matchAllQuery()) - } sourceInclude fields + } sourceFiltering (fields, excludes) - _search = excludes match { - case Nil => _search - case excludes => _search sourceExclude excludes + _search = if (nestedAggregations.nonEmpty) { + _search aggregations { + nestedAggregations.map { case (nested, aggs) => + val first = aggs.head + val aggregations = aggs.map(_.agg) + val buckets = ElasticAggregation.buildBuckets( + nestedBuckets.getOrElse(nested, Seq.empty), + aggregations + ) match { + case Some(b) => Seq(b) + case _ => aggregations + } + val filtered: Option[Aggregation] = + first.filteredAgg.map(filtered => filtered.subAggregations(buckets)) + first.nestedAgg.get.subAggregations(filtered.map(Seq(_)).getOrElse(buckets)) + } + } + } else { + _search } - _search = aggregations match { + _search = notNestedAggregations match { case Nil => _search - case _ => _search aggregations { aggregations.map(_.agg) } + case _ => _search aggregations { + val first = notNestedAggregations.head + val aggregations = notNestedAggregations.map(_.agg) + val buckets = ElasticAggregation.buildBuckets( + notNestedBuckets, + aggregations + ) match { + case Some(b) => Seq(b) + case _ => aggregations + } + val filtered: Option[Aggregation] = + first.filteredAgg.map(filtered => filtered.subAggregations(buckets)) + filtered.map(Seq(_)).getOrElse(buckets) + } } _search = orderBy match { @@ -63,8 +99,8 @@ package object bridge { } implicit def requestToMultiSearchRequest( - request: SQLMultiSearchRequest - ): MultiSearchRequest = { + request: SQLMultiSearchRequest + ): MultiSearchRequest = { MultiSearchRequest( request.requests.map(implicitly[SearchRequest](_)) ) @@ -78,44 +114,44 @@ package object bridge { case _: Ge.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) lt n.sql + rangeQuery(identifier.name) lt n.sql case _ => - rangeQuery(identifier.columnName) gte n.sql + rangeQuery(identifier.name) gte n.sql } case _: Gt.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) lte n.sql + rangeQuery(identifier.name) lte n.sql case _ => - rangeQuery(identifier.columnName) gt n.sql + rangeQuery(identifier.name) gt n.sql } case _: Le.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) gt n.sql + rangeQuery(identifier.name) gt n.sql case _ => - rangeQuery(identifier.columnName) lte n.sql + rangeQuery(identifier.name) lte n.sql } case _: Lt.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) gte n.sql + rangeQuery(identifier.name) gte n.sql case _ => - rangeQuery(identifier.columnName) lt n.sql + rangeQuery(identifier.name) lt n.sql } case _: Eq.type => maybeNot match { case Some(_) => - not(termQuery(identifier.columnName, n.sql)) + not(termQuery(identifier.name, n.sql)) case _ => - termQuery(identifier.columnName, n.sql) + termQuery(identifier.name, n.sql) } case _: Ne.type => maybeNot match { case Some(_) => - termQuery(identifier.columnName, n.sql) + termQuery(identifier.name, n.sql) case _ => - not(termQuery(identifier.columnName, n.sql)) + not(termQuery(identifier.name, n.sql)) } case _ => matchAllQuery() } @@ -124,51 +160,51 @@ package object bridge { case _: Like.type => maybeNot match { case Some(_) => - not(regexQuery(identifier.columnName, toRegex(l.value))) + not(regexQuery(identifier.name, toRegex(l.value))) case _ => - regexQuery(identifier.columnName, toRegex(l.value)) + regexQuery(identifier.name, toRegex(l.value)) } case _: Ge.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) lt l.value + rangeQuery(identifier.name) lt l.value case _ => - rangeQuery(identifier.columnName) gte l.value + rangeQuery(identifier.name) gte l.value } case _: Gt.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) lte l.value + rangeQuery(identifier.name) lte l.value case _ => - rangeQuery(identifier.columnName) gt l.value + rangeQuery(identifier.name) gt l.value } case _: Le.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) gt l.value + rangeQuery(identifier.name) gt l.value case _ => - rangeQuery(identifier.columnName) lte l.value + rangeQuery(identifier.name) lte l.value } case _: Lt.type => maybeNot match { case Some(_) => - rangeQuery(identifier.columnName) gte l.value + rangeQuery(identifier.name) gte l.value case _ => - rangeQuery(identifier.columnName) lt l.value + rangeQuery(identifier.name) lt l.value } case _: Eq.type => maybeNot match { case Some(_) => - not(termQuery(identifier.columnName, l.value)) + not(termQuery(identifier.name, l.value)) case _ => - termQuery(identifier.columnName, l.value) + termQuery(identifier.name, l.value) } case _: Ne.type => maybeNot match { case Some(_) => - termQuery(identifier.columnName, l.value) + termQuery(identifier.name, l.value) case _ => - not(termQuery(identifier.columnName, l.value)) + not(termQuery(identifier.name, l.value)) } case _ => matchAllQuery() } @@ -177,16 +213,16 @@ package object bridge { case _: Eq.type => maybeNot match { case Some(_) => - not(termQuery(identifier.columnName, b.value)) + not(termQuery(identifier.name, b.value)) case _ => - termQuery(identifier.columnName, b.value) + termQuery(identifier.name, b.value) } case _: Ne.type => maybeNot match { case Some(_) => - termQuery(identifier.columnName, b.value) + termQuery(identifier.name, b.value) case _ => - not(termQuery(identifier.columnName, b.value)) + not(termQuery(identifier.name, b.value)) } case _ => matchAllQuery() } @@ -195,17 +231,17 @@ package object bridge { } implicit def isNullToQuery( - isNull: SQLIsNull - ): Query = { + isNull: SQLIsNull + ): Query = { import isNull._ - not(existsQuery(identifier.columnName)) + not(existsQuery(identifier.name)) } implicit def isNotNullToQuery( - isNotNull: SQLIsNotNull - ): Query = { + isNotNull: SQLIsNotNull + ): Query = { import isNotNull._ - existsQuery(identifier.columnName) + existsQuery(identifier.name) } implicit def inToQuery[R, T <: SQLValue[R]](in: SQLIn[R, T]): Query = { @@ -214,12 +250,12 @@ package object bridge { val t = _values.headOption match { case Some(_: Double) => - termsQuery(identifier.columnName, _values.asInstanceOf[Seq[Double]]) + termsQuery(identifier.name, _values.asInstanceOf[Seq[Double]]) case Some(_: Integer) => - termsQuery(identifier.columnName, _values.asInstanceOf[Seq[Integer]]) + termsQuery(identifier.name, _values.asInstanceOf[Seq[Integer]]) case Some(_: Long) => - termsQuery(identifier.columnName, _values.asInstanceOf[Seq[Long]]) - case _ => termsQuery(identifier.columnName, _values.map(_.toString)) + termsQuery(identifier.name, _values.asInstanceOf[Seq[Long]]) + case _ => termsQuery(identifier.name, _values.map(_.toString)) } maybeNot match { case Some(_) => not(t) @@ -228,10 +264,10 @@ package object bridge { } implicit def betweenToQuery( - between: SQLBetween - ): Query = { + between: SQLBetween + ): Query = { import between._ - val r = rangeQuery(identifier.columnName) gte from.value lte to.value + val r = rangeQuery(identifier.name) gte from.value lte to.value maybeNot match { case Some(_) => not(r) case _ => r @@ -239,42 +275,42 @@ package object bridge { } implicit def geoDistanceToQuery( - geoDistance: ElasticGeoDistance - ): Query = { + geoDistance: ElasticGeoDistance + ): Query = { import geoDistance._ - geoDistanceQuery(identifier.columnName, lat.value, lon.value) distance distance.value + geoDistanceQuery(identifier.name, lat.value, lon.value) distance distance.value } implicit def matchToQuery( - matchExpression: ElasticMatch - ): Query = { + matchExpression: ElasticMatch + ): Query = { import matchExpression._ - matchQuery(identifier.columnName, value.value) + matchQuery(identifier.name, value.value) } implicit def criteriaToElasticCriteria( - criteria: SQLCriteria - ): ElasticCriteria = { + criteria: SQLCriteria + ): ElasticCriteria = { ElasticCriteria( criteria ) } implicit def filterToQuery( - filter: ElasticFilter - ): ElasticQuery = { + filter: ElasticFilter + ): ElasticQuery = { ElasticQuery(filter) } implicit def sqlQueryToAggregations( - query: SQLQuery - ): Seq[ElasticAggregation] = { + query: SQLQuery + ): Seq[ElasticAggregation] = { import query._ request .map { case Left(l) => l.aggregates - .map(ElasticAggregation(_)) + .map(ElasticAggregation(_, l.having.flatMap(_.criteria))) .map(aggregation => { val queryFiltered = l.where @@ -296,10 +332,18 @@ package object bridge { ElasticApi.search("") query { queryFiltered } - aggregations { - aggregation.agg + aggregations { + val filtered = + aggregation.filteredAgg match { + case Some(filtered) => filtered.subAggregations(aggregation.agg) + case _ => aggregation.agg + } + aggregation.nestedAgg match { + case Some(nested) => nested.subAggregations(filtered) + case _ => filtered + } } - size 0 + size 0 ) }).string.replace("\"version\":true,", "") /*FIXME*/ ) diff --git a/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala b/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala index 2d1db9ec..c4b3d720 100644 --- a/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala +++ b/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLCriteriaSpec.scala @@ -146,7 +146,7 @@ class SQLCriteriaSpec extends AnyFlatSpec with Matchers { |"query":{ | "bool":{"filter":[{"regexp" : { | "identifier" : { - | "value" : ".*?un.*?" + | "value" : ".*un.*" | } | } | } @@ -160,7 +160,7 @@ class SQLCriteriaSpec extends AnyFlatSpec with Matchers { | "filter":[{"bool":{"must_not": [{ | "regexp": { | "identifier": { - | "value": ".*?un.*?" + | "value": ".*un.*" | } | } | }] @@ -781,10 +781,24 @@ class SQLCriteriaSpec extends AnyFlatSpec with Matchers { """{ | "query":{ | "bool":{ - | "filter":[ + | "should":[ | { | "match":{ - | "identifier":{ + | "identifier1":{ + | "query":"value" + | } + | } + | }, + | { + | "match":{ + | "identifier2":{ + | "query":"value" + | } + | } + | }, + | { + | "match":{ + | "identifier3":{ | "query":"value" | } | } diff --git a/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 30a87937..d8f7467b 100644 --- a/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -18,7 +18,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { sqlQuery.request match { case Some(Left(value)) => value.copy(score = sqlQuery.score) - case _ => + case None => throw new IllegalArgumentException( s"SQL query ${sqlQuery.query} does not contain a valid search request" ) @@ -32,7 +32,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe false result.distinct shouldBe false - result.aggName shouldBe "count_id" + result.aggName shouldBe "c2" result.field shouldBe "c2" result.sources shouldBe Seq[String]("Table") result.query.getOrElse("") shouldBe @@ -52,7 +52,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "count_id": { + | "c2": { | "value_count": { | "field": "id" | } @@ -68,7 +68,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe false result.distinct shouldBe true - result.aggName shouldBe "count_distinct_id" + result.aggName shouldBe "c2" result.field shouldBe "c2" result.sources shouldBe Seq[String]("Table") result.query.getOrElse("") shouldBe @@ -88,7 +88,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "count_distinct_id": { + | "c2": { | "cardinality": { | "field": "id" | } @@ -106,7 +106,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_count_emails_value.count_emails_value" + result.aggName shouldBe "nested_emails.email" result.field shouldBe "email" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe @@ -126,12 +126,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_count_emails_value": { + | "nested_emails": { | "nested": { | "path": "emails" | }, | "aggs": { - | "count_emails_value": { + | "email": { | "value_count": { | "field": "emails.value" | } @@ -151,7 +151,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_count_emails_value.count_emails_value" + result.aggName shouldBe "nested_emails.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe @@ -185,12 +185,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_count_emails_value": { + | "nested_emails": { | "nested": { | "path": "emails" | }, | "aggs": { - | "count_emails_value": { + | "count_emails": { | "value_count": { | "field": "emails.value" | } @@ -204,13 +204,13 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { it should "perform nested count with filter" in { val results: Seq[ElasticAggregation] = SQLQuery( - "select count(inner_emails.value) as count_emails filter[inner_emails.context = \"profile\"] from index, unnest(emails) as inner_emails, unnest(profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\"))" + "select count(inner_emails.value) as count_emails from index, unnest(emails) as inner_emails, unnest(profiles) as inner_profiles where nom = \"Nom\" and (inner_profiles.postalCode in (\"75001\",\"75002\")) having inner_emails.context = \"profile\"" ) results.size shouldBe 1 val result = results.head result.nested shouldBe true result.distinct shouldBe false - result.aggName shouldBe "nested_count_emails_value.filtered_agg.count_emails_value" + result.aggName shouldBe "nested_emails.filtered_agg.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe @@ -244,7 +244,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_count_emails_value": { + | "nested_emails": { | "nested": { | "path": "emails" | }, @@ -258,7 +258,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "aggs": { - | "count_emails_value": { + | "count_emails": { | "value_count": { | "field": "emails.value" | } @@ -280,7 +280,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe true result.distinct shouldBe true - result.aggName shouldBe "nested_count_distinct_emails_value.count_distinct_emails_value" + result.aggName shouldBe "nested_emails.count_emails" result.field shouldBe "count_emails" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe @@ -330,12 +330,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_count_distinct_emails_value": { + | "nested_emails": { | "nested": { | "path": "emails" | }, | "aggs": { - | "count_distinct_emails_value": { + | "count_emails": { | "cardinality": { | "field": "emails.value" | } @@ -356,7 +356,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { val result = results.head result.nested shouldBe true result.distinct shouldBe true - result.aggName shouldBe "nested_count_distinct_emails_value.count_distinct_emails_value" + result.aggName shouldBe "nested_emails.count_distinct_emails" result.field shouldBe "count_distinct_emails" result.sources shouldBe Seq[String]("index") result.query.getOrElse("") shouldBe @@ -399,12 +399,12 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | }, | "size": 0, | "aggs": { - | "nested_count_distinct_emails_value": { + | "nested_emails": { | "nested": { | "path": "emails" | }, | "aggs": { - | "count_distinct_emails_value": { + | "count_distinct_emails": { | "cardinality": { | "field": "emails.value" | } @@ -503,8 +503,8 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { SQLQuery( except ) - List( - """ + select.query shouldBe + """ |{ | "query":{ | "match_all":{} @@ -513,26 +513,15 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "includes":["*"], | "excludes":["col1","col2"] | } - |}""".stripMargin.replaceAll("\\s+", ""), - """ - |{ - | "query":{ - | "match_all":{} - | }, - | "_source":{ - | "excludes":["col1","col2"] - | } |}""".stripMargin.replaceAll("\\s+", "") - ) should contain(select.query) } it should "perform complex query" in { val select: ElasticSearchRequest = SQLQuery( s"""SELECT - | inner_products.name, - | inner_products.category, - | inner_products.price, + | inner_products.category as category, + | inner_products.name as productName, | min(inner_products.price) as min_price, | max(inner_products.price) as max_price |FROM @@ -550,20 +539,16 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | ( | distance(pickup.location,(0.0,0.0)) <= "7000m" OR | distance(withdrawals.location,(0.0,0.0)) <= "7000m" - | ) AND - | ( - | inner_products.deleted=false AND - | inner_products.upForSale=true AND - | inner_products.stock > 0 - | ) - | ) AND - | ( - | match(products.name, "lasagnes") AND - | ( - | match(products.description, "lasagnes") OR - | match(products.ingredients, "lasagnes") | ) | ) + |GROUP BY + | inner_products.category, + | inner_products.name + |HAVING inner_products.deleted=false AND + | inner_products.upForSale=true AND + | inner_products.stock > 0 AND + | match (inner_products.name) against ("lasagnes") AND + | match (inner_products.description, inner_products.ingredients) against ("lasagnes") |ORDER BY preparationTime ASC, nbOrders DESC |LIMIT 100""".stripMargin ).minScore(1.0) @@ -574,35 +559,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { |{ | "query": { | "bool": { - | "must": [ - | { - | "match": { - | "products.name": { - | "query": "lasagnes" - | } - | } - | }, - | { - | "bool": { - | "should": [ - | { - | "match": { - | "products.description": { - | "query": "lasagnes" - | } - | } - | }, - | { - | "match": { - | "products.ingredients": { - | "query": "lasagnes" - | } - | } - | } - | ] - | } - | } - | ], | "filter": [ | { | "bool": { @@ -642,7 +598,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | { | "regexp": { | "blockedCustomers": { - | "value": ".*?uuid.*?" + | "value": ".*uuid.*" | } | } | } @@ -685,43 +641,6 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | ] | } - | }, - | { - | "nested": { - | "path": "products", - | "query": { - | "bool": { - | "filter": [ - | { - | "term": { - | "products.deleted": { - | "value": false - | } - | } - | }, - | { - | "term": { - | "products.upForSale": { - | "value": true - | } - | } - | }, - | { - | "range": { - | "products.stock": { - | "gt": "0" - | } - | } - | } - | ] - | } - | }, - | "inner_hits": { - | "name": "inner_products", - | "from": 0, - | "size": 10 - | } - | } | } | ] | } @@ -729,8 +648,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | ] | } | }, - | "from": 0, - | "size": 100, + | "size": 0, | "min_score": 1.0, | "sort": [ | { @@ -744,34 +662,93 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | } | ], - | "_source": { - | "includes": [ - | "inner_products.name", - | "inner_products.category", - | "inner_products.price" - | ] - | }, + | "_source": true, | "aggs": { - | "nested_min_products_price": { + | "nested_products": { | "nested": { | "path": "products" | }, | "aggs": { - | "min_products_price": { - | "min": { - | "field": "products.price" - | } - | } - | } - | }, - | "nested_max_products_price": { - | "nested": { - | "path": "products" - | }, - | "aggs": { - | "max_products_price": { - | "max": { - | "field": "products.price" + | "filtered_agg": { + | "filter": { + | "bool": { + | "filter": [ + | { + | "term": { + | "products.deleted": { + | "value": false + | } + | } + | }, + | { + | "term": { + | "products.upForSale": { + | "value": true + | } + | } + | }, + | { + | "range": { + | "products.stock": { + | "gt": "0" + | } + | } + | }, + | { + | "match": { + | "products.name": { + | "query": "lasagnes" + | } + | } + | }, + | { + | "bool": { + | "should": [ + | { + | "match": { + | "products.description": { + | "query": "lasagnes" + | } + | } + | }, + | { + | "match": { + | "products.ingredients": { + | "query": "lasagnes" + | } + | } + | } + | ] + | } + | } + | ] + | } + | }, + | "aggs": { + | "category": { + | "terms": { + | "field": "products.category.keyword" + | }, + | "aggs": { + | "productName": { + | "terms": { + | "field": "products.name.keyword" + | }, + | "aggs": { + | "min_price": { + | "min": { + | "field": "products.price" + | } + | }, + | "max_price": { + | "max": { + | "field": "products.price" + | } + | } + | } + | } + | } + | } | } | } | } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLFrom.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLFrom.scala index 9ede01c9..15f36837 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLFrom.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLFrom.scala @@ -2,86 +2,28 @@ package app.softnetwork.elastic.sql case object From extends SQLExpr("from") with SQLRegex -sealed trait SQLSource extends Updateable { - def update(request: SQLSearchRequest): SQLSource -} - -case class SQLIdentifier( - columnName: String, - alias: Option[String] = None, - distinct: Boolean = false, - nested: Boolean = false, - limit: Option[SQLLimit] = None, - function: Option[SQLFunction] = None -) extends SQLExpr({ - var parts: Seq[String] = columnName.split("\\.").toSeq - alias match { - case Some(a) => parts = a +: parts - case _ => - } - val sql = { - if (distinct) { - s"$Distinct ${parts.mkString(".")}".trim - } else { - parts.mkString(".").trim - } - } - function match { - case Some(f) => s"$f($sql)" - case _ => sql - } - }) - with SQLSource - with SQLTokenWithFunction { - - lazy val aggregationName: Option[String] = if (aggregation) alias else None - - lazy val nestedType: Option[String] = if (nested) Some(columnName.split('.').head) else None - - lazy val innerHitsName: Option[String] = if (nested) alias else None - - def update(request: SQLSearchRequest): SQLIdentifier = { - val parts: Seq[String] = columnName.split("\\.").toSeq - if (request.aliases.contains(parts.head)) { - request.unnests.find(_._1 == parts.head) match { - case Some(tuple) => - this.copy( - alias = Some(parts.head), - columnName = s"${tuple._2}.${parts.tail.mkString(".")}", - nested = true, - limit = tuple._3 - ) - case _ => - this.copy( - alias = Some(parts.head), - columnName = parts.tail.mkString(".") - ) - } - } else { - this - } - } -} - case object Unnest extends SQLExpr("unnest") with SQLRegex case class SQLUnnest(identifier: SQLIdentifier, limit: Option[SQLLimit]) extends SQLSource { override def sql: String = s"$Unnest($identifier${asString(limit)})" def update(request: SQLSearchRequest): SQLUnnest = this.copy(identifier = identifier.update(request)) + override val name: String = identifier.name } -case class SQLTable(source: SQLSource, alias: Option[SQLAlias] = None) extends Updateable { - override def sql: String = s"$source${asString(alias)}" +case class SQLTable(source: SQLSource, tableAlias: Option[SQLAlias] = None) extends Updateable { + override def sql: String = s"$source${asString(tableAlias)}" def update(request: SQLSearchRequest): SQLTable = this.copy(source = source.update(request)) } case class SQLFrom(tables: Seq[SQLTable]) extends Updateable { override def sql: String = s" $From ${tables.map(_.sql).mkString(",")}" - lazy val aliases: Seq[String] = tables.flatMap((table: SQLTable) => table.alias).map(_.alias) + lazy val tableAliases: Map[String, String] = tables + .flatMap((table: SQLTable) => table.tableAlias.map(alias => table.source.name -> alias.alias)) + .toMap lazy val unnests: Seq[(String, String, Option[SQLLimit])] = tables.collect { case SQLTable(u: SQLUnnest, a) => - (a.map(_.alias).getOrElse(u.identifier.columnName), u.identifier.columnName, u.limit) + (a.map(_.alias).getOrElse(u.identifier.name), u.identifier.name, u.limit) } def update(request: SQLSearchRequest): SQLFrom = this.copy(tables = tables.map(_.update(request))) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLGroupBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLGroupBy.scala index fc67e7e8..6ab8726e 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLGroupBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLGroupBy.scala @@ -2,22 +2,10 @@ package app.softnetwork.elastic.sql case object GroupBy extends SQLExpr("group by") with SQLRegex -case object Having extends SQLExpr("having") with SQLRegex - -case class SQLHaving(criteria: Option[SQLCriteria]) extends Updateable { - override def sql: String = criteria match { - case Some(c) => s" $Having $c" - case _ => "" - } - def update(request: SQLSearchRequest): SQLHaving = - this.copy(criteria = criteria.map(_.update(request))) -} - -case class SQLGroupBy(buckets: Seq[SQLBucket], having: Option[SQLHaving] = None) - extends Updateable { - override def sql: String = s" $GroupBy ${buckets.mkString(",")}${asString(having)}" +case class SQLGroupBy(buckets: Seq[SQLBucket]) extends Updateable { + override def sql: String = s" $GroupBy ${buckets.mkString(",")}" def update(request: SQLSearchRequest): SQLGroupBy = - this.copy(buckets = buckets.map(_.update(request)), having = having.map(_.update(request))) + this.copy(buckets = buckets.map(_.update(request))) } case class SQLBucket( @@ -28,10 +16,14 @@ case class SQLBucket( this.copy(identifier = identifier.update(request)) lazy val sourceBucket: String = if (identifier.nested) { - identifier.alias + identifier.tableAlias .map(a => s"$a.") - .getOrElse("") + identifier.columnName.split("\\.").tail.mkString(".") + .getOrElse("") + identifier.name.split("\\.").tail.mkString(".") } else { - identifier.columnName + identifier.name } + lazy val nestedBucket: Option[String] = + identifier.nestedType.map(t => s"nested_$t") + + lazy val name: String = identifier.fieldAlias.getOrElse(sourceBucket.replace(".", "_")) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLHaving.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLHaving.scala new file mode 100644 index 00000000..a96351da --- /dev/null +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLHaving.scala @@ -0,0 +1,12 @@ +package app.softnetwork.elastic.sql + +case object Having extends SQLExpr("having") with SQLRegex + +case class SQLHaving(criteria: Option[SQLCriteria]) extends Updateable { + override def sql: String = criteria match { + case Some(c) => s" $Having $c" + case _ => "" + } + def update(request: SQLSearchRequest): SQLHaving = + this.copy(criteria = criteria.map(_.update(request))) +} diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLOperator.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLOperator.scala index 174b16e6..5df94ea8 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLOperator.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLOperator.scala @@ -21,6 +21,9 @@ case object Between extends SQLExpr("between") with SQLLogicalOperator case object IsNull extends SQLExpr("is null") with SQLLogicalOperator case object IsNotNull extends SQLExpr("is not null") with SQLLogicalOperator case object Not extends SQLExpr("not") with SQLLogicalOperator +case object Match extends SQLExpr("match") with SQLLogicalOperator + +case object Against extends SQLExpr("against") with SQLRegex sealed trait SQLPredicateOperator extends SQLLogicalOperator @@ -33,4 +36,3 @@ sealed trait ElasticOperator extends SQLOperator with SQLRegex case object Nested extends SQLExpr("nested") with ElasticOperator case object Child extends SQLExpr("child") with ElasticOperator case object Parent extends SQLExpr("parent") with ElasticOperator -case object Match extends SQLExpr("match") with ElasticOperator diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLParser.scala index 93c5116d..41bcc3b6 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLParser.scala @@ -1,6 +1,7 @@ package app.softnetwork.elastic.sql -import scala.util.parsing.combinator.RegexParsers +import scala.util.parsing.combinator.{PackratParsers, RegexParsers} +import scala.util.parsing.input.CharSequenceReader /** Created by smanciot on 27/06/2018. * @@ -17,25 +18,28 @@ object SQLParser with SQLFromParser with SQLWhereParser with SQLGroupByParser + with SQLHavingParser with SQLOrderByParser - with SQLLimitParser { + with SQLLimitParser + with PackratParsers { - def request: Parser[SQLSearchRequest] = { - phrase(select ~ from ~ where.? ~ groupBy.? ~ orderBy.? ~ limit.?) ^^ { - case s ~ f ~ w ~ g ~ o ~ l => - SQLSearchRequest(s, f, w, g, o, l) + def request: PackratParser[SQLSearchRequest] = { + phrase(select ~ from ~ where.? ~ groupBy.? ~ having.? ~ orderBy.? ~ limit.?) ^^ { + case s ~ f ~ w ~ g ~ h ~ o ~ l => + SQLSearchRequest(s, f, w, g, h, o, l) .update() } } - def union: Parser[Union.type] = Union.regex ^^ (_ => Union) + def union: PackratParser[Union.type] = Union.regex ^^ (_ => Union) - def requests: Parser[List[SQLSearchRequest]] = rep1sep(request, union) ^^ (s => s) + def requests: PackratParser[List[SQLSearchRequest]] = rep1sep(request, union) ^^ (s => s) def apply( query: String ): Either[SQLParserError, Either[SQLSearchRequest, SQLMultiSearchRequest]] = { - parse(requests, query) match { + val reader = new PackratReader(new CharSequenceReader(query)) + parse(requests, reader) match { case NoSuccess(msg, _) => Console.err.println(msg) Left(SQLParserError(msg)) @@ -53,47 +57,48 @@ trait SQLCompilationError case class SQLParserError(msg: String) extends SQLCompilationError -trait SQLParser extends RegexParsers { +trait SQLParser extends RegexParsers with PackratParsers { - def literal: Parser[SQLLiteral] = + def literal: PackratParser[SQLLiteral] = """"[^"]*"|'[^']*'""".r ^^ (str => SQLLiteral(str.substring(1, str.length - 1))) - def int: Parser[SQLInt] = """(-)?(0|[1-9]\d*)""".r ^^ (str => SQLInt(str.toInt)) + def int: PackratParser[SQLInt] = """(-)?(0|[1-9]\d*)""".r ^^ (str => SQLInt(str.toInt)) - def double: Parser[SQLDouble] = """(-)?(\d+\.\d+)""".r ^^ (str => SQLDouble(str.toDouble)) + def double: PackratParser[SQLDouble] = """(-)?(\d+\.\d+)""".r ^^ (str => SQLDouble(str.toDouble)) - def boolean: Parser[SQLBoolean] = """(true|false)""".r ^^ (bool => SQLBoolean(bool.toBoolean)) + def boolean: PackratParser[SQLBoolean] = + """(true|false)""".r ^^ (bool => SQLBoolean(bool.toBoolean)) - def start: Parser[SQLDelimiter] = "(" ^^ (_ => StartPredicate) + def start: PackratParser[SQLDelimiter] = "(" ^^ (_ => StartPredicate) - def end: Parser[SQLDelimiter] = ")" ^^ (_ => EndPredicate) + def end: PackratParser[SQLDelimiter] = ")" ^^ (_ => EndPredicate) - def separator: Parser[SQLDelimiter] = "," ^^ (_ => Separator) + def separator: PackratParser[SQLDelimiter] = "," ^^ (_ => Separator) - def count: Parser[AggregateFunction] = Count.regex ^^ (_ => Count) + def count: PackratParser[AggregateFunction] = Count.regex ^^ (_ => Count) - def min: Parser[AggregateFunction] = Min.regex ^^ (_ => Min) + def min: PackratParser[AggregateFunction] = Min.regex ^^ (_ => Min) - def max: Parser[AggregateFunction] = Max.regex ^^ (_ => Max) + def max: PackratParser[AggregateFunction] = Max.regex ^^ (_ => Max) - def avg: Parser[AggregateFunction] = Avg.regex ^^ (_ => Avg) + def avg: PackratParser[AggregateFunction] = Avg.regex ^^ (_ => Avg) - def sum: Parser[AggregateFunction] = Sum.regex ^^ (_ => Sum) + def sum: PackratParser[AggregateFunction] = Sum.regex ^^ (_ => Sum) - def aggregateFunction: Parser[AggregateFunction] = count | min | max | avg | sum + def aggregateFunction: PackratParser[AggregateFunction] = count | min | max | avg | sum - def distanceFunction: Parser[SQLFunction] = Distance.regex ^^ (_ => Distance) + def distanceFunction: PackratParser[SQLFunction] = Distance.regex ^^ (_ => Distance) - def sqlFunction: Parser[SQLFunction] = aggregateFunction | distanceFunction + def sqlFunction: PackratParser[SQLFunction] = aggregateFunction | distanceFunction private val regexIdentifier = """[\*a-zA-Z_\-][a-zA-Z0-9_\-\.\[\]\*]*""" - def identifierWithFunction: Parser[SQLIdentifier] = sqlFunction ~ start ~ identifier ~ end ^^ { - case f ~ _ ~ i ~ _ => + def identifierWithFunction: PackratParser[SQLIdentifier] = + sqlFunction ~ start ~ identifier ~ end ^^ { case f ~ _ ~ i ~ _ => i.copy(function = Some(f)) - } + } - def identifier: Parser[SQLIdentifier] = + def identifier: PackratParser[SQLIdentifier] = Distinct.regex.? ~ regexIdentifier.r ^^ { case d ~ i => SQLIdentifier( i, @@ -103,12 +108,13 @@ trait SQLParser extends RegexParsers { } private val regexAlias = - """\b(?!(?i)except\b)\b(?!(?i)where\b)\b(?!(?i)filter\b)\b(?!(?i)from\b)\b(?!(?i)group\b)\b(?!(?i)having\b)\b(?!(?i)order\b)\b(?!(?i)limit\b)[a-zA-Z0-9_]*""" + """\b(?!(?i)as\b)\b(?!(?i)except\b)\b(?!(?i)where\b)\b(?!(?i)filter\b)\b(?!(?i)from\b)\b(?!(?i)group\b)\b(?!(?i)having\b)\b(?!(?i)order\b)\b(?!(?i)limit\b)[a-zA-Z0-9_]*""" - def alias: Parser[SQLAlias] = Alias.regex.? ~ regexAlias.r ^^ { case _ ~ b => SQLAlias(b) } + def alias: PackratParser[SQLAlias] = Alias.regex.? ~ regexAlias.r ^^ { case _ ~ b => SQLAlias(b) } - def field: Parser[SQLField] = (identifierWithFunction | identifier) ~ alias.? ^^ { case i ~ a => - SQLField(i, a) + def field: PackratParser[SQLField] = (identifierWithFunction | identifier) ~ alias.? ^^ { + case i ~ a => + SQLField(i, a) } } @@ -116,13 +122,13 @@ trait SQLParser extends RegexParsers { trait SQLSelectParser { self: SQLParser with SQLWhereParser => - def except: Parser[SQLExcept] = Except.regex ~ start ~ rep1sep(field, separator) ~ end ^^ { + def except: PackratParser[SQLExcept] = Except.regex ~ start ~ rep1sep(field, separator) ~ end ^^ { case _ ~ _ ~ e ~ _ => SQLExcept(e) } - def select: Parser[SQLSelect] = - Select.regex ~ rep1sep(aggregate | field, separator) ~ except.? ^^ { case _ ~ fields ~ e => + def select: PackratParser[SQLSelect] = + Select.regex ~ rep1sep(field, separator) ~ except.? ^^ { case _ ~ fields ~ e => SQLSelect(fields, e) } @@ -131,14 +137,14 @@ trait SQLSelectParser { trait SQLFromParser { self: SQLParser with SQLLimitParser => - def unnest: Parser[SQLTable] = Unnest.regex ~ start ~ identifier ~ limit.? ~ end ~ alias ^^ { - case _ ~ _ ~ i ~ l ~ _ ~ a => + def unnest: PackratParser[SQLTable] = + Unnest.regex ~ start ~ identifier ~ limit.? ~ end ~ alias ^^ { case _ ~ _ ~ i ~ l ~ _ ~ a => SQLTable(SQLUnnest(i, l), Some(a)) - } + } - def table: Parser[SQLTable] = identifier ~ alias.? ^^ { case i ~ a => SQLTable(i, a) } + def table: PackratParser[SQLTable] = identifier ~ alias.? ^^ { case i ~ a => SQLTable(i, a) } - def from: Parser[SQLFrom] = From.regex ~ rep1sep(unnest | table, separator) ^^ { + def from: PackratParser[SQLFrom] = From.regex ~ rep1sep(unnest | table, separator) ^^ { case _ ~ tables => SQLFrom(tables) } @@ -148,53 +154,44 @@ trait SQLFromParser { trait SQLWhereParser { self: SQLParser with SQLGroupByParser with SQLOrderByParser => - def isNull: Parser[SQLCriteria] = identifier ~ IsNull.regex ^^ { case i ~ _ => SQLIsNull(i) } + def isNull: PackratParser[SQLCriteria] = identifier ~ IsNull.regex ^^ { case i ~ _ => + SQLIsNull(i) + } - def isNotNull: Parser[SQLCriteria] = identifier ~ IsNotNull.regex ^^ { case i ~ _ => + def isNotNull: PackratParser[SQLCriteria] = identifier ~ IsNotNull.regex ^^ { case i ~ _ => SQLIsNotNull(i) } - private def eq: Parser[SQLExpressionOperator] = Eq.sql ^^ (_ => Eq) + private def eq: PackratParser[SQLExpressionOperator] = Eq.sql ^^ (_ => Eq) - private def ne: Parser[SQLExpressionOperator] = Ne.sql ^^ (_ => Ne) + private def ne: PackratParser[SQLExpressionOperator] = Ne.sql ^^ (_ => Ne) - def filter: Parser[SQLFilter] = Filter.regex ~> "[" ~> whereCriteria <~ "]" ^^ { rawTokens => - SQLFilter( - processTokens(rawTokens) - ) - } - - def aggregate: Parser[SQLAggregate] = - aggregateFunction ~ start ~ identifier ~ end ~ alias.? ~ filter.? ^^ { - case agg ~ _ ~ i ~ _ ~ a ~ f => new SQLAggregate(agg, i, a, f) - } - - private def equality: Parser[SQLExpression] = + private def equality: PackratParser[SQLExpression] = not.? ~ (identifierWithFunction | identifier) ~ (eq | ne) ~ (boolean | literal | double | int) ^^ { case n ~ i ~ o ~ v => SQLExpression(i, o, v, n) } - def like: Parser[SQLExpression] = + def like: PackratParser[SQLExpression] = (identifierWithFunction | identifier) ~ not.? ~ Like.regex ~ literal ^^ { case i ~ n ~ _ ~ v => SQLExpression(i, Like, v, n) } - private def ge: Parser[SQLExpressionOperator] = Ge.sql ^^ (_ => Ge) + private def ge: PackratParser[SQLExpressionOperator] = Ge.sql ^^ (_ => Ge) - def gt: Parser[SQLExpressionOperator] = Gt.sql ^^ (_ => Gt) + def gt: PackratParser[SQLExpressionOperator] = Gt.sql ^^ (_ => Gt) - private def le: Parser[SQLExpressionOperator] = Le.sql ^^ (_ => Le) + private def le: PackratParser[SQLExpressionOperator] = Le.sql ^^ (_ => Le) - def lt: Parser[SQLExpressionOperator] = Lt.sql ^^ (_ => Lt) + def lt: PackratParser[SQLExpressionOperator] = Lt.sql ^^ (_ => Lt) - private def comparison: Parser[SQLExpression] = + private def comparison: PackratParser[SQLExpression] = not.? ~ (identifierWithFunction | identifier) ~ (ge | gt | le | lt) ~ (double | int | literal) ^^ { case n ~ i ~ o ~ v => SQLExpression(i, o, v, n) } - def in: Parser[SQLExpressionOperator] = In.regex ^^ (_ => In) + def in: PackratParser[SQLExpressionOperator] = In.regex ^^ (_ => In) - private def inLiteral: Parser[SQLCriteria] = + private def inLiteral: PackratParser[SQLCriteria] = identifier ~ not.? ~ in ~ start ~ rep1(literal ~ separator.?) ~ end ^^ { case i ~ n ~ _ ~ _ ~ v ~ _ => SQLIn( @@ -206,7 +203,7 @@ trait SQLWhereParser { ) } - private def inNumerical: Parser[SQLCriteria] = + private def inNumerical: PackratParser[SQLCriteria] = (identifierWithFunction | identifier) ~ not.? ~ in ~ start ~ rep1( (double | int) ~ separator.? ) ~ end ^^ { case i ~ n ~ _ ~ _ ~ v ~ _ => @@ -219,71 +216,76 @@ trait SQLWhereParser { ) } - def between: Parser[SQLCriteria] = + def between: PackratParser[SQLCriteria] = (identifierWithFunction | identifier) ~ not.? ~ Between.regex ~ literal ~ and ~ literal ^^ { case i ~ n ~ _ ~ from ~ _ ~ to => SQLBetween(i, from, to, n) } - def distance: Parser[SQLCriteria] = + def distance: PackratParser[SQLCriteria] = distanceFunction ~ start ~ identifier ~ separator ~ start ~ double ~ separator ~ double ~ end ~ end ~ le ~ literal ^^ { case _ ~ _ ~ i ~ _ ~ _ ~ lat ~ _ ~ lon ~ _ ~ _ ~ _ ~ d => ElasticGeoDistance(i, d, lat, lon) } - def matchCriteria: Parser[ElasticMatch] = - Match.regex ~ start ~ identifier ~ separator ~ literal ~ separator.? ~ literal.? ~ end ^^ { - case _ ~ _ ~ i ~ _ ~ l ~ _ ~ o ~ _ => ElasticMatch(i, l, o.map(_.value)) + def matchCriteria: PackratParser[SQLMatch] = + Match.regex ~ start ~ rep1sep( + identifier, + separator + ) ~ end ~ Against.regex ~ start ~ literal ~ end ^^ { case _ ~ _ ~ i ~ _ ~ _ ~ _ ~ l ~ _ => + SQLMatch(i, l) } - def and: Parser[SQLPredicateOperator] = And.regex ^^ (_ => And) + def and: PackratParser[SQLPredicateOperator] = And.regex ^^ (_ => And) - def or: Parser[SQLPredicateOperator] = Or.regex ^^ (_ => Or) + def or: PackratParser[SQLPredicateOperator] = Or.regex ^^ (_ => Or) - def not: Parser[Not.type] = Not.regex ^^ (_ => Not) + def not: PackratParser[Not.type] = Not.regex ^^ (_ => Not) - def criteria: Parser[SQLCriteria] = + def criteria: PackratParser[SQLCriteria] = (equality | like | comparison | inLiteral | inNumerical | between | isNotNull | isNull | distance | matchCriteria) ^^ ( c => c ) - def predicate: Parser[SQLPredicate] = criteria ~ (and | or) ~ not.? ~ criteria ^^ { + def predicate: PackratParser[SQLPredicate] = criteria ~ (and | or) ~ not.? ~ criteria ^^ { case l ~ o ~ n ~ r => SQLPredicate(l, o, r, n) } - def nestedCriteria: Parser[ElasticRelation] = Nested.regex ~ start.? ~ criteria ~ end.? ^^ { - case _ ~ _ ~ c ~ _ => ElasticNested(c, None) - } + def nestedCriteria: PackratParser[ElasticRelation] = + Nested.regex ~ start.? ~ criteria ~ end.? ^^ { case _ ~ _ ~ c ~ _ => + ElasticNested(c, None) + } - def nestedPredicate: Parser[ElasticRelation] = Nested.regex ~ start ~ predicate ~ end ^^ { + def nestedPredicate: PackratParser[ElasticRelation] = Nested.regex ~ start ~ predicate ~ end ^^ { case _ ~ _ ~ p ~ _ => ElasticNested(p, None) } - def childCriteria: Parser[ElasticRelation] = Child.regex ~ start.? ~ criteria ~ end.? ^^ { + def childCriteria: PackratParser[ElasticRelation] = Child.regex ~ start.? ~ criteria ~ end.? ^^ { case _ ~ _ ~ c ~ _ => ElasticChild(c) } - def childPredicate: Parser[ElasticRelation] = Child.regex ~ start ~ predicate ~ end ^^ { + def childPredicate: PackratParser[ElasticRelation] = Child.regex ~ start ~ predicate ~ end ^^ { case _ ~ _ ~ p ~ _ => ElasticChild(p) } - def parentCriteria: Parser[ElasticRelation] = Parent.regex ~ start.? ~ criteria ~ end.? ^^ { - case _ ~ _ ~ c ~ _ => ElasticParent(c) - } + def parentCriteria: PackratParser[ElasticRelation] = + Parent.regex ~ start.? ~ criteria ~ end.? ^^ { case _ ~ _ ~ c ~ _ => + ElasticParent(c) + } - def parentPredicate: Parser[ElasticRelation] = Parent.regex ~ start ~ predicate ~ end ^^ { + def parentPredicate: PackratParser[ElasticRelation] = Parent.regex ~ start ~ predicate ~ end ^^ { case _ ~ _ ~ p ~ _ => ElasticParent(p) } - private def allPredicate: Parser[SQLCriteria] = + private def allPredicate: PackratParser[SQLCriteria] = nestedPredicate | childPredicate | parentPredicate | predicate - private def allCriteria: Parser[SQLToken] = + private def allCriteria: PackratParser[SQLToken] = nestedCriteria | childCriteria | parentCriteria | criteria - def whereCriteria: Parser[List[SQLToken]] = rep1( + def whereCriteria: PackratParser[List[SQLToken]] = rep1( allPredicate | allCriteria | start | or | and | end ) - def where: Parser[SQLWhere] = + def where: PackratParser[SQLWhere] = Where.regex ~ whereCriteria ^^ { case _ ~ rawTokens => SQLWhere(processTokens(rawTokens)) } @@ -437,16 +439,28 @@ trait SQLWhereParser { trait SQLGroupByParser { self: SQLParser with SQLWhereParser => - private def having: Parser[SQLHaving] = Having.regex ~> whereCriteria ^^ { rawTokens => + private def having: PackratParser[SQLHaving] = Having.regex ~> whereCriteria ^^ { rawTokens => SQLHaving( processTokens(rawTokens) ) } - def bucket: Parser[SQLBucket] = identifier ^^ (i => SQLBucket(i)) + def bucket: PackratParser[SQLBucket] = identifier ^^ (i => SQLBucket(i)) + + def groupBy: PackratParser[SQLGroupBy] = + GroupBy.regex ~ rep1sep(bucket, separator) ^^ { case _ ~ buckets => + SQLGroupBy(buckets) + } - def groupBy: Parser[SQLGroupBy] = GroupBy.regex ~ rep1sep(bucket, separator) ~ having.? ^^ { - case _ ~ buckets ~ having => SQLGroupBy(buckets, having) +} + +trait SQLHavingParser { + self: SQLParser with SQLWhereParser => + + def having: PackratParser[SQLHaving] = Having.regex ~> whereCriteria ^^ { rawTokens => + SQLHaving( + processTokens(rawTokens) + ) } } @@ -454,18 +468,19 @@ trait SQLGroupByParser { trait SQLOrderByParser { self: SQLParser => - def asc: Parser[Asc.type] = Asc.regex ^^ (_ => Asc) + def asc: PackratParser[Asc.type] = Asc.regex ^^ (_ => Asc) - def desc: Parser[Desc.type] = Desc.regex ^^ (_ => Desc) + def desc: PackratParser[Desc.type] = Desc.regex ^^ (_ => Desc) - private def fieldName: Parser[String] = + private def fieldName: PackratParser[String] = """\b(?!(?i)limit\b)[a-zA-Z_][a-zA-Z0-9_]*""".r ^^ (f => f) - def fieldWithFunction: Parser[(String, SQLFunction)] = sqlFunction ~ start ~ fieldName ~ end ^^ { - case f ~ _ ~ n ~ _ => (n, f) - } + def fieldWithFunction: PackratParser[(String, SQLFunction)] = + sqlFunction ~ start ~ fieldName ~ end ^^ { case f ~ _ ~ n ~ _ => + (n, f) + } - def sort: Parser[SQLFieldSort] = + def sort: PackratParser[SQLFieldSort] = (fieldWithFunction | fieldName) ~ (asc | desc).? ^^ { case f ~ o => f match { case i: (String, SQLFunction) => SQLFieldSort(i._1, o, Some(i._2)) @@ -473,8 +488,9 @@ trait SQLOrderByParser { } } - def orderBy: Parser[SQLOrderBy] = OrderBy.regex ~ rep1sep(sort, separator) ^^ { case _ ~ s => - SQLOrderBy(s) + def orderBy: PackratParser[SQLOrderBy] = OrderBy.regex ~ rep1sep(sort, separator) ^^ { + case _ ~ s => + SQLOrderBy(s) } } @@ -482,6 +498,6 @@ trait SQLOrderByParser { trait SQLLimitParser { self: SQLParser => - def limit: Parser[SQLLimit] = Limit.regex ~ int ^^ { case _ ~ i => SQLLimit(i.value) } + def limit: PackratParser[SQLLimit] = Limit.regex ~ int ^^ { case _ ~ i => SQLLimit(i.value) } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLSearchRequest.scala index 8f6b8993..f4f14e09 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLSearchRequest.scala @@ -5,30 +5,36 @@ case class SQLSearchRequest( from: SQLFrom, where: Option[SQLWhere], groupBy: Option[SQLGroupBy] = None, + having: Option[SQLHaving] = None, orderBy: Option[SQLOrderBy] = None, limit: Option[SQLLimit] = None, score: Option[Double] = None ) extends SQLToken { override def sql: String = - s"$select$from${asString(where)}${asString(groupBy)}${asString(orderBy)}${asString(limit)}" + s"$select$from${asString(where)}${asString(groupBy)}${asString(having)}${asString(orderBy)}${asString(limit)}" - lazy val aliases: Seq[String] = from.aliases + lazy val fieldAliases: Map[String, String] = select.fieldAliases + lazy val tableAliases: Map[String, String] = from.tableAliases lazy val unnests: Seq[(String, String, Option[SQLLimit])] = from.unnests def update(): SQLSearchRequest = { val updated = this.copy(from = from.update(this)) - updated.copy(select = select.update(updated), where = where.map(_.update(updated))) + updated.copy( + select = select.update(updated), + where = where.map(_.update(updated)), + groupBy = groupBy.map(_.update(updated)), + having = having.map(_.update(updated)) + ) } - lazy val fields: Seq[String] = - select.fields - .filterNot { - case _: SQLAggregate => true - case _ => false - } - .map(_.sourceField) + lazy val fields: Seq[String] = { + if (aggregates.isEmpty && buckets.isEmpty) + select.fields.map(_.sourceField).filterNot(f => excludes.contains(f)) + else + Seq.empty + } - lazy val aggregates: Seq[SQLAggregate] = select.fields.collect { case a: SQLAggregate => a } + lazy val aggregates: Seq[SQLField] = select.fields.filter(_.aggregation) lazy val excludes: Seq[String] = select.except.map(_.fields.map(_.sourceField)).getOrElse(Nil) @@ -36,5 +42,5 @@ case class SQLSearchRequest( source.sql } - lazy val buckets: Option[Seq[SQLBucket]] = groupBy.map(_.buckets) + lazy val buckets: Seq[SQLBucket] = groupBy.map(_.buckets).getOrElse(Seq.empty) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLSelect.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLSelect.scala index f0b0df71..18e0da9d 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLSelect.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLSelect.scala @@ -4,19 +4,23 @@ case object Select extends SQLExpr("select") with SQLRegex case class SQLField( identifier: SQLIdentifier, - alias: Option[SQLAlias] = None -) extends Updateable { - override def sql: String = s"$identifier${asString(alias)}" + fieldAlias: Option[SQLAlias] = None +) extends Updateable + with SQLTokenWithFunction { + override def sql: String = s"$identifier${asString(fieldAlias)}" def update(request: SQLSearchRequest): SQLField = this.copy(identifier = identifier.update(request)) lazy val sourceField: String = if (identifier.nested) { - identifier.alias + identifier.tableAlias + .orElse(fieldAlias.map(_.alias)) .map(a => s"$a.") - .getOrElse("") + identifier.columnName.split("\\.").tail.mkString(".") + .getOrElse("") + identifier.name.split("\\.").tail.mkString(".") } else { - identifier.columnName + identifier.name } + + override def function: Option[SQLFunction] = identifier.function } case object Except extends SQLExpr("except") with SQLRegex @@ -27,34 +31,15 @@ case class SQLExcept(fields: Seq[SQLField]) extends Updateable { this.copy(fields = fields.map(_.update(request))) } -case object Filter extends SQLExpr("filter") with SQLRegex - -case class SQLFilter(criteria: Option[SQLCriteria]) extends Updateable { - override def sql: String = criteria match { - case Some(c) => s" $Filter($c)" - case _ => "" - } - def update(request: SQLSearchRequest): SQLFilter = - this.copy(criteria = criteria.map(_.update(request))) -} - -class SQLAggregate( - val function: AggregateFunction, - override val identifier: SQLIdentifier, - override val alias: Option[SQLAlias] = None, - val filter: Option[SQLFilter] = None -) extends SQLField(identifier, alias) { - override def sql: String = s"$function($identifier)${asString(alias)}" - override def update(request: SQLSearchRequest): SQLAggregate = - new SQLAggregate(function, identifier.update(request), alias, filter.map(_.update(request))) -} - case class SQLSelect( fields: Seq[SQLField] = Seq(SQLField(identifier = SQLIdentifier("*"))), except: Option[SQLExcept] = None ) extends Updateable { override def sql: String = s"$Select ${fields.mkString(",")}${except.getOrElse("")}" + lazy val fieldAliases: Map[String, String] = fields.flatMap { field => + field.fieldAlias.map(a => field.identifier.identifierName -> a.alias) + }.toMap def update(request: SQLSearchRequest): SQLSelect = this.copy(fields = fields.map(_.update(request)), except = except.map(_.update(request))) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLWhere.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLWhere.scala index 28e71a98..63ad2e77 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/SQLWhere.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/SQLWhere.scala @@ -269,6 +269,40 @@ case class ElasticGeoDistance( override def asFilter(currentQuery: Option[ElasticBoolQuery]): ElasticFilter = this } +case class SQLMatch( + identifiers: Seq[SQLIdentifier], + value: SQLLiteral +) extends SQLCriteria { + override def sql: String = + s"$operator (${identifiers.mkString(",")}) $Against ($value)" + override def operator: SQLOperator = Match + override def update(request: SQLSearchRequest): SQLCriteria = + this.copy(identifiers = identifiers.map(_.update(request))) + + override lazy val nested: Boolean = identifiers.forall(_.nested) + + lazy val criteria: SQLCriteria = { + identifiers.map(id => ElasticMatch(id, value, None)) match { + case Nil => throw new IllegalArgumentException("No identifiers for MATCH") + case single :: Nil => single + case first :: second :: rest => + val initial: SQLCriteria = SQLPredicate(first, Or, second) + rest.foldLeft(initial) { (acc, next) => + SQLPredicate(acc, Or, next) + } + } + } + + override def asFilter(currentQuery: Option[ElasticBoolQuery]): ElasticFilter = criteria match { + case predicate: SQLPredicate => predicate.copy(group = true).asFilter(currentQuery) + case _ => criteria.asFilter(currentQuery) + } + + override def matchCriteria: Boolean = true + + override def group: Boolean = false +} + case class ElasticMatch( identifier: SQLIdentifier, value: SQLLiteral, @@ -294,7 +328,7 @@ sealed abstract class ElasticRelation(val criteria: SQLCriteria, val operator: E private[this] def rtype(criteria: SQLCriteria): Option[String] = criteria match { case SQLPredicate(left, _, right, _, _) => rtype(left).orElse(rtype(right)) case c: SQLCriteriaWithIdentifier => - c.identifier.nestedType.orElse(c.identifier.columnName.split('.').headOption) + c.identifier.nestedType.orElse(c.identifier.name.split('.').headOption) case relation: ElasticRelation => relation.relationType case _ => None } @@ -317,7 +351,7 @@ case class ElasticNested(override val criteria: SQLCriteria, override val limit: private[this] def name(criteria: SQLCriteria): Option[String] = criteria match { case SQLPredicate(left, _, right, _, _) => name(left).orElse(name(right)) case c: SQLCriteriaWithIdentifier => - c.identifier.innerHitsName.orElse(c.identifier.columnName.split('.').headOption) + c.identifier.innerHitsName.orElse(c.identifier.name.split('.').headOption) case n: ElasticNested => name(n.criteria) case _ => None } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala index 75e32395..d532a721 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala @@ -187,7 +187,7 @@ package object sql { value.substring(0, value.length - 1) else value - s"""${if (startWith) ".*?"}$v${if (endWith) ".*?"}""" + s"""${if (startWith) ".*"}$v${if (endWith) ".*"}""" } case object Alias extends SQLExpr("as") with SQLRegex @@ -197,4 +197,78 @@ package object sql { trait SQLRegex extends SQLToken { lazy val regex: Regex = s"\\b(?i)$sql\\b".r } + + trait SQLSource extends Updateable { + def name: String + def update(request: SQLSearchRequest): SQLSource + } + + case class SQLIdentifier( + name: String, + tableAlias: Option[String] = None, + distinct: Boolean = false, + nested: Boolean = false, + limit: Option[SQLLimit] = None, + function: Option[SQLFunction] = None, + fieldAlias: Option[String] = None + ) extends SQLExpr({ + var parts: Seq[String] = name.split("\\.").toSeq + tableAlias match { + case Some(a) => parts = a +: (if (nested) parts.tail else parts) + case _ => + } + val sql = { + if (distinct) { + s"$Distinct ${parts.mkString(".")}".trim + } else { + parts.mkString(".").trim + } + } + function match { + case Some(f) => s"$f($sql)" + case _ => sql + } + }) + with SQLSource + with SQLTokenWithFunction { + + lazy val aggregationName: Option[String] = + if (aggregation) fieldAlias.orElse(Option(name)) else None + + lazy val identifierName: String = + (function match { + case Some(f) => s"${f.sql}($name)" + case _ => name + }).toLowerCase + + lazy val nestedType: Option[String] = if (nested) Some(name.split('.').head) else None + + lazy val innerHitsName: Option[String] = if (nested) tableAlias else None + + def update(request: SQLSearchRequest): SQLIdentifier = { + val parts: Seq[String] = name.split("\\.").toSeq + if (request.tableAliases.values.toSeq.contains(parts.head)) { + request.unnests.find(_._1 == parts.head) match { + case Some(tuple) => + this.copy( + tableAlias = Some(parts.head), + name = s"${tuple._2}.${parts.tail.mkString(".")}", + nested = true, + limit = tuple._3, + fieldAlias = request.fieldAliases.get(identifierName).orElse(fieldAlias) + ) + case _ => + this.copy( + tableAlias = Some(parts.head), + name = parts.tail.mkString("."), + fieldAlias = request.fieldAliases.get(identifierName).orElse(fieldAlias) + ) + } + } else { + this.copy( + fieldAlias = request.fieldAliases.get(identifierName).orElse(fieldAlias) + ) + } + } + } } diff --git a/sql/src/test/scala/app/softnetwork/elastic/sql/SQLParserSpec.scala b/sql/src/test/scala/app/softnetwork/elastic/sql/SQLParserSpec.scala index d56c7144..93758fba 100644 --- a/sql/src/test/scala/app/softnetwork/elastic/sql/SQLParserSpec.scala +++ b/sql/src/test/scala/app/softnetwork/elastic/sql/SQLParserSpec.scala @@ -60,7 +60,8 @@ object Queries { val geoDistanceCriteria = "select * from Table where distance(profile.location,(-70.0,40.0)) <= \"5km\"" val except = "select * except(col1,col2) from Table" - val matchCriteria = "select * from Table where match(identifier,\"value\",\"options\")" + val matchCriteria = + "select * from Table where match (identifier1,identifier2,identifier3) against (\"value\")" val groupBy = "select identifier,count(identifier) from Table where identifier is not null group by identifier" val orderBy = "select * from Table order by identifier desc" @@ -341,4 +342,5 @@ class SQLParserSpec extends AnyFlatSpec with Matchers { val result = SQLParser(groupByWithHaving) result.toOption.flatMap(_.left.toOption.map(_.sql)).getOrElse("") should ===(groupByWithHaving) } + }