diff --git a/README.md b/README.md index 999d1911..d908a011 100644 --- a/README.md +++ b/README.md @@ -172,10 +172,10 @@ Download the self-contained fat JAR for your Elasticsearch version: | Elasticsearch Version | Artifact | |-----------------------|----------------------------------------| -| ES 6.x | `softclient4es6-jdbc-driver-0.1.0.jar` | -| ES 7.x | `softclient4es7-jdbc-driver-0.1.0.jar` | -| ES 8.x | `softclient4es8-jdbc-driver-0.1.0.jar` | -| ES 9.x | `softclient4es9-jdbc-driver-0.1.0.jar` | +| ES 6.x | `softclient4es6-jdbc-driver-0.1.1.jar` | +| ES 7.x | `softclient4es7-jdbc-driver-0.1.1.jar` | +| ES 8.x | `softclient4es8-jdbc-driver-0.1.1.jar` | +| ES 9.x | `softclient4es9-jdbc-driver-0.1.1.jar` | ```text JDBC URL: jdbc:elastic://localhost:9200 @@ -184,30 +184,58 @@ Driver class: app.softnetwork.elastic.jdbc.ElasticDriver ### Maven / Gradle / sbt +**Maven:** + ```xml app.softnetwork.elastic softclient4es8-jdbc-driver - 0.1.0 + 0.1.1 ``` +**Gradle:** + +```groovy +implementation 'app.softnetwork.elastic:softclient4es8-jdbc-driver:0.1.1' +``` + +**sbt:** + +```scala +libraryDependencies += "app.softnetwork.elastic" % "softclient4es8-jdbc-driver" % "0.1.1" +``` + The JDBC driver JARs are Scala-version-independent (no `_2.12` or `_2.13` suffix) and include all required dependencies. --- ## 🛠️ Scala Library Integration -For programmatic access, add SoftClient4ES to your project: +For programmatic access, add SoftClient4ES to your project. + +### Client Library Matrix + +| Elasticsearch | Artifact | Scala | JDK | +|----------------|------------------------------|------------|------| +| 6.x | `softclient4es6-jest-client` | 2.12, 2.13 | 8+ | +| 6.x | `softclient4es6-rest-client` | 2.12, 2.13 | 8+ | +| 7.x | `softclient4es7-rest-client` | 2.12, 2.13 | 8+ | +| 8.x | `softclient4es8-java-client` | 2.12, 2.13 | 8+ | +| 9.x | `softclient4es9-java-client` | 2.13 only | 17+ | + +### sbt Setup ```scala // build.sbt resolvers += "Softnetwork" at "https://softnetwork.jfrog.io/artifactory/releases/" // Choose your Elasticsearch version -libraryDependencies += "app.softnetwork.elastic" %% "softclient4es8-java-client" % "0.17.4" +libraryDependencies += "app.softnetwork.elastic" %% "softclient4es8-java-client" % "0.18.0" // Add the community extensions for materialized views (optional) libraryDependencies += "app.softnetwork.elastic" %% "softclient4es-community-extensions" % "0.1.1" +// Add the JDBC driver if you want to use it from Scala (optional) +libraryDependencies += "app.softnetwork.elastic" %% "softclient4es-jdbc-driver" % "0.1.1" ``` ```scala @@ -286,34 +314,25 @@ Seamlessly sync event-sourced systems with Elasticsearch. ## 📦 Editions and Licensing -SoftClient4ES is available in two editions: - -### Community Edition (Open Source) - -Licensed under the **Apache License 2.0**. Includes the core SQL engine, REPL client, Scala library, and the community extensions library with limited materialized views support: - -| Feature | Community | -|--------------------------------------------------------------------|-------------| -| Full SQL DDL (CREATE, ALTER, DROP TABLE) | Yes | -| Full SQL DML (INSERT, UPDATE, DELETE, COPY INTO) | Yes | -| Full SQL DQL (SELECT, JOIN UNNEST, aggregations, window functions) | Yes | -| Pipelines, Watchers, Enrich Policies | Yes | -| Interactive REPL client | Yes | -| Scala library (Akka Streams) | Yes | -| Community extensions library (Scala) | Yes | -| Materialized Views (CREATE, REFRESH, DESCRIBE) | Yes (max 3) | -| Elasticsearch 6, 7, 8, 9 support | Yes | +SoftClient4ES uses a dual-license model: -### Pro / Enterprise Edition (Commercial) +- **Core** (SQL engine, REPL client, Scala library) — **Apache License 2.0** (open source) +- **JDBC Driver** and **Materialized Views** — **Elastic License 2.0** (free to use, not open source) -Adds the **JDBC driver** (which includes the community extensions) and raises materialized view limits: +### Feature Matrix -| Feature | Community | Pro | Enterprise | -|--------------------------------------|-----------|---------|------------| -| Everything in Community | Yes | Yes | Yes | -| JDBC driver (DBeaver, Tableau, etc.) | - | Yes | Yes | -| Maximum materialized views | 3 | Limited | Unlimited | -| Priority support | - | - | Yes | +| Feature | Community | Pro | Enterprise | +|--------------------------------------------------------------------|-----------|---------|------------| +| Full SQL DDL (CREATE, ALTER, DROP TABLE) | Yes | Yes | Yes | +| Full SQL DML (INSERT, UPDATE, DELETE, COPY INTO) | Yes | Yes | Yes | +| Full SQL DQL (SELECT, JOIN UNNEST, aggregations, window functions) | Yes | Yes | Yes | +| Pipelines, Watchers, Enrich Policies | Yes | Yes | Yes | +| Interactive REPL client | Yes | Yes | Yes | +| Scala library (Akka Streams) | Yes | Yes | Yes | +| Elasticsearch 6, 7, 8, 9 support | Yes | Yes | Yes | +| JDBC driver (DBeaver, Tableau, etc.) | Yes | Yes | Yes | +| Materialized Views (CREATE, REFRESH, DESCRIBE) | Max 3 | Limited | Unlimited | +| Priority support | - | - | Yes | ### Elasticsearch License Requirements @@ -349,7 +368,7 @@ Contributions are welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines The core SQL engine and REPL client are licensed under the **Apache License 2.0** — see [LICENSE](LICENSE) for details. -The JDBC driver and Materialized Views extension are available under a commercial license. Contact us for pricing information. +The JDBC driver and Materialized Views extension are licensed under the **Elastic License 2.0** — free to use, not open source. --- diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 51d90c74..ea6999c1 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -2486,7 +2486,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 % 2)" | } | }, - | "identifier_mul_identifier2_minus_10": { + | "__c7": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); def lv0 = ((param1 == null || param2 == null) ? null : (param1 * param2)); (lv0 == null) ? null : (lv0 - 10)" @@ -2546,109 +2546,109 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "script_fields": { - | "abs_identifier_plus_1_0_mul_2": { + | "__c2": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); ((param1 == null) ? null : Double.valueOf(Math.abs(param1)) + 1.0) * ((double) 2)" | } | }, - | "ceil_identifier": { + | "__c3": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.ceil(param1))" | } | }, - | "floor_identifier": { + | "__c4": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.floor(param1))" | } | }, - | "sqrt_identifier": { + | "__c5": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.sqrt(param1))" | } | }, - | "exp_identifier": { + | "__c6": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.exp(param1))" | } | }, - | "log_identifier": { + | "__c7": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.log(param1))" | } | }, - | "log10_identifier": { + | "__c8": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.log10(param1))" | } | }, - | "pow_identifier_3": { + | "__c9": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.pow(param1, 3))" | } | }, - | "round_identifier": { + | "__c10": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = Math.pow(10, 0); (param1 == null || param2 == null) ? null : Long.valueOf(Math.round((param1 * param2) / param2))" | } | }, - | "round_identifier_2": { + | "__c11": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = Math.pow(10, 2); (param1 == null || param2 == null) ? null : Long.valueOf(Math.round((param1 * param2) / param2))" | } | }, - | "sign_identifier": { + | "__c12": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 > 0 ? 1 : (param1 < 0 ? -1 : 0))" | } | }, - | "cos_identifier": { + | "__c13": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.cos(param1))" | } | }, - | "acos_identifier": { + | "__c14": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.acos(param1))" | } | }, - | "sin_identifier": { + | "__c15": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.sin(param1))" | } | }, - | "asin_identifier": { + | "__c16": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.asin(param1))" | } | }, - | "tan_identifier": { + | "__c17": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.tan(param1))" | } | }, - | "atan_identifier": { + | "__c18": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.atan(param1))" | } | }, - | "atan2_identifier_3_0": { + | "__c19": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.atan2(param1, 3.0))" diff --git a/build.sbt b/build.sbt index 015662dc..a546141d 100644 --- a/build.sbt +++ b/build.sbt @@ -20,7 +20,7 @@ ThisBuild / organization := "app.softnetwork" name := "softclient4es" -ThisBuild / version := "0.17.4" +ThisBuild / version := "0.18.0" ThisBuild / scalaVersion := scala213 diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala index 6a51f881..5225b01e 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala @@ -449,16 +449,7 @@ trait ElasticConversion { wrapperAggs.flatMap { entry => val aggName = normalizeAggregationKey(entry.getKey) val aggValue = entry.getValue - val docCount = Option(aggValue.get("doc_count")) - .map(_.asLong()) - .getOrElse(0L) - - // Add the doc_count to the context if necessary - val currentContext = if (docCount > 0) { - parentContext + (s"${aggName}_doc_count" -> docCount) - } else { - parentContext - } + val currentContext = parentContext // Extract subaggregations (excluding doc_count) val subAggsNode = mapper.createObjectNode() @@ -502,13 +493,9 @@ trait ElasticConversion { val allTopHits = extractAllTopHits(bucket, fieldAliases, aggregations) val bucketKey = extractBucketKey(bucket) - val docCount = Option(bucket.get("doc_count")) - .map(_.asLong()) - .getOrElse(0L) val currentContext = parentContext ++ ListMap( - aggName -> bucketKey, - s"${aggName}_doc_count" -> docCount + aggName -> bucketKey ) ++ metrics ++ allTopHits // Check for sub-aggregations diff --git a/core/src/main/scala/app/softnetwork/elastic/client/GatewayApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/GatewayApi.scala index aa2c5998..29210b93 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/GatewayApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/GatewayApi.scala @@ -1675,6 +1675,7 @@ trait GatewayApi extends IndicesApi with ElasticClientHelpers { // ======================================================================== def run(sql: String)(implicit system: ActorSystem): Future[ElasticResult[QueryResult]] = { + logger.info(s"📥 SQL: $sql") val normalizedQuery = sql .split("\n") diff --git a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala index 8dbfc6e5..34fb18c4 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala @@ -61,7 +61,7 @@ trait SearchApi extends ElasticConversion with ElasticClientHelpers { * alias if present, otherwise the source field name. Returns empty Seq for SELECT * queries. */ protected def extractOutputFieldNames(single: SingleSearch): Seq[String] = { - val fields = single.select.fields + val fields = single.select.fieldsWithComputedAliases if (fields.size == 1 && fields.head.identifier.identifierName == "*") Seq.empty else fields.map(f => f.fieldAlias.map(_.alias).getOrElse(f.sourceField)) } diff --git a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index fa2c65cb..192c3263 100644 --- a/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -2486,7 +2486,7 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 % 2)" | } | }, - | "identifier_mul_identifier2_minus_10": { + | "__c7": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = (doc['identifier2'].size() == 0 ? null : doc['identifier2'].value); def lv0 = ((param1 == null || param2 == null) ? null : (param1 * param2)); (lv0 == null) ? null : (lv0 - 10)" @@ -2546,109 +2546,109 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { | } | }, | "script_fields": { - | "abs_identifier_plus_1_0_mul_2": { + | "__c2": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); ((param1 == null) ? null : Double.valueOf(Math.abs(param1)) + 1.0) * ((double) 2)" | } | }, - | "ceil_identifier": { + | "__c3": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.ceil(param1))" | } | }, - | "floor_identifier": { + | "__c4": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.floor(param1))" | } | }, - | "sqrt_identifier": { + | "__c5": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.sqrt(param1))" | } | }, - | "exp_identifier": { + | "__c6": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.exp(param1))" | } | }, - | "log_identifier": { + | "__c7": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.log(param1))" | } | }, - | "log10_identifier": { + | "__c8": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.log10(param1))" | } | }, - | "pow_identifier_3": { + | "__c9": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.pow(param1, 3))" | } | }, - | "round_identifier": { + | "__c10": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = Math.pow(10, 0); (param1 == null || param2 == null) ? null : Long.valueOf(Math.round((param1 * param2) / param2))" | } | }, - | "round_identifier_2": { + | "__c11": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); def param2 = Math.pow(10, 2); (param1 == null || param2 == null) ? null : Long.valueOf(Math.round((param1 * param2) / param2))" | } | }, - | "sign_identifier": { + | "__c12": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : (param1 > 0 ? 1 : (param1 < 0 ? -1 : 0))" | } | }, - | "cos_identifier": { + | "__c13": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.cos(param1))" | } | }, - | "acos_identifier": { + | "__c14": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.acos(param1))" | } | }, - | "sin_identifier": { + | "__c15": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.sin(param1))" | } | }, - | "asin_identifier": { + | "__c16": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.asin(param1))" | } | }, - | "tan_identifier": { + | "__c17": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.tan(param1))" | } | }, - | "atan_identifier": { + | "__c18": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.atan(param1))" | } | }, - | "atan2_identifier_3_0": { + | "__c19": { | "script": { | "lang": "painless", | "source": "def param1 = (doc['identifier'].size() == 0 ? null : doc['identifier'].value); (param1 == null) ? null : Double.valueOf(Math.atan2(param1, 3.0))" diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala index dcc68e6d..383d79a5 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/GroupByParser.scala @@ -23,6 +23,7 @@ trait GroupByParser { self: Parser with WhereParser => def bucketWithFunction: PackratParser[Identifier] = + quotedIdentifier | identifierWithArithmeticExpression | identifierWithTransformation | identifierWithWindowFunction | diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala index 6be69619..49e3efeb 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/OrderByParser.scala @@ -31,6 +31,7 @@ trait OrderByParser { """\b(?!(?i)limit\b)[a-zA-Z_][a-zA-Z0-9_]*""".r ^^ (f => f) def fieldWithFunction: PackratParser[Identifier] = + quotedIdentifier | identifierWithArithmeticExpression | identifierWithTransformation | identifierWithWindowFunction | diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala index 15719298..7340b561 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala @@ -1108,6 +1108,7 @@ trait Parser identifierWithTransformation | // transformations applied to an identifier identifierWithIntervalFunction | identifierWithFunction | // fonctions applied to an identifier + quotedIdentifier | // double-quoted identifiers (ANSI SQL-92 delimited identifiers) identifierWithValue | identifier } @@ -1278,6 +1279,15 @@ trait Parser val identifierRegex: Regex = identifierRegexStr.r // scala.util.matching.Regex + def quotedIdentifier: PackratParser[Identifier] = + ("\"" ~> """([^"\\]|\\.)*""".r <~ "\"") ^^ { str => + GenericIdentifier( + str.replace("\\\"", "\"").replace("\\\\", "\\"), + None, + distinct = false + ) + } + def identifier: PackratParser[Identifier] = (Distinct.regex.? ~ identifierRegex ^^ { case d ~ i => GenericIdentifier( @@ -1318,4 +1328,9 @@ trait Parser def alias: PackratParser[Alias] = Alias.regex.? ~ regexAlias.r ^^ { case _ ~ b => Alias(b) } + def quotedAlias: PackratParser[Alias] = + Alias.regex.? ~ ("\"" ~> """([^"\\]|\\.)*""".r <~ "\"") ^^ { case _ ~ b => + Alias(b.replace("\\\"", "\"").replace("\\\\", "\\")) + } + } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala index 89b6aff9..ef9bde15 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/SelectParser.scala @@ -22,13 +22,14 @@ trait SelectParser { self: Parser with WhereParser => def field: PackratParser[Field] = - (identifierWithArithmeticExpression | + (quotedIdentifier | + identifierWithArithmeticExpression | identifierWithTransformation | identifierWithWindowFunction | identifierWithAggregation | identifierWithIntervalFunction | identifierWithFunction | - identifier) ~ alias.? ^^ { case i ~ a => + identifier) ~ (quotedAlias | alias).? ^^ { case i ~ a => Field(i, a) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala index be23a45a..57f391af 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/WhereParser.scala @@ -78,13 +78,15 @@ import app.softnetwork.elastic.sql.query.{ trait WhereParser { self: Parser with GroupByParser with OrderByParser => - def isNull: PackratParser[Criteria] = identifier ~ IS_NULL.regex ^^ { case i ~ _ => - IsNullExpr(i) + def isNull: PackratParser[Criteria] = (quotedIdentifier | identifier) ~ IS_NULL.regex ^^ { + case i ~ _ => + IsNullExpr(i) } - def isNotNull: PackratParser[Criteria] = identifier ~ IS_NOT_NULL.regex ^^ { case i ~ _ => - IsNotNullExpr(i) - } + def isNotNull: PackratParser[Criteria] = + (quotedIdentifier | identifier) ~ IS_NOT_NULL.regex ^^ { case i ~ _ => + IsNotNullExpr(i) + } def eq: PackratParser[ComparisonOperator] = EQ.sql ^^ (_ => EQ) @@ -93,6 +95,7 @@ trait WhereParser { def diff: PackratParser[ComparisonOperator] = DIFF.sql ^^ (_ => DIFF) private def any_identifier: PackratParser[Identifier] = + quotedIdentifier | identifierWithArithmeticExpression | identifierWithTransformation | identifierWithWindowFunction | diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala index 16ded040..1be9643c 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/GroupBy.scala @@ -91,7 +91,7 @@ case class Bucket( lazy val name: String = identifier.fieldAlias.getOrElse(path) - lazy val path: String = sourceBucket.replace(".", "_") + lazy val path: String = sourceBucket lazy val nestedPath: String = { identifier.nestedElement match { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala index bfe89908..b8db2adf 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala @@ -129,14 +129,16 @@ case class Select( ) extends Updateable { override def sql: String = s"$Select ${fields.mkString(", ")}${except.getOrElse("")}" - lazy val fieldAliases: ListMap[String, String] = ListMap(fields.flatMap { field => - field.fieldAlias - .map(a => field.identifier.identifierName -> a.alias) - /*.orElse(field.identifier.name match { - case name if name.nonEmpty => Some(name -> name) - case _ => None - })*/ - }: _*) + lazy val fieldsWithComputedAliases: Seq[Field] = fields.zipWithIndex.map { + case (f, i) if f.fieldAlias.isEmpty && f.identifier.functions.nonEmpty => + f.copy(fieldAlias = Some(Alias(s"__c${i + 1}"))) + case (f, _) => f + } + lazy val fieldAliases: ListMap[String, String] = ListMap( + fieldsWithComputedAliases.flatMap { field => + field.fieldAlias.map(a => field.identifier.identifierName -> a.alias) + }: _* + ) lazy val aliasesToMap: ListMap[String, String] = fieldAliases.map(_.swap) def update(request: SingleSearch): Select = this.copy(fields = fields.map(_.update(request)), except = except.map(_.update(request))) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala index 060b26b3..1597fa48 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala @@ -163,14 +163,21 @@ package object query { /** Mapping from inner hit name to (subFieldName, outputFieldName) pairs. Used to flatten inner * hits into individual rows (UNNEST row explosion). */ - lazy val nestedHitsMappings: Map[String, Seq[(String, String)]] = + lazy val nestedHitsMappings: Map[String, Seq[(String, String)]] = { + val computedAliasMap = select.fieldsWithComputedAliases + .flatMap(f => f.fieldAlias.map(a => f.identifier.identifierName -> a.alias)) + .toMap nestedFields.map { case (innerHitsName, fields) => innerHitsName -> fields.map { f => val subField = f.sourceField.stripPrefix(innerHitsName + ".") - val outputName = f.fieldAlias.map(_.alias).getOrElse(f.sourceField) + val outputName = f.fieldAlias + .map(_.alias) + .orElse(computedAliasMap.get(f.identifier.identifierName)) + .getOrElse(f.sourceField) (subField, outputName) } } + } def toNestedElement(u: Unnest): NestedElement = { val updated = unnests.getOrElse(u.alias.map(_.alias).getOrElse(u.name), u) @@ -216,7 +223,7 @@ package object query { if (aggregates.nonEmpty) Seq.empty else - select.fields.filter(_.isScriptField) + select.fieldsWithComputedAliases.filter(_.isScriptField) } lazy val fields: Seq[String] = { @@ -232,12 +239,13 @@ package object query { Seq.empty } - lazy val windowFields: Seq[Field] = select.fields.filter(_.identifier.hasWindow) + lazy val windowFields: Seq[Field] = + select.fieldsWithComputedAliases.filter(_.identifier.hasWindow) lazy val windowFunctions: Seq[WindowFunction] = windowFields.flatMap(_.identifier.windows) lazy val aggregates: Seq[Field] = - select.fields + select.fieldsWithComputedAliases .filter(f => f.isAggregation || f.isBucketScript) .filterNot(_.identifier.hasWindow) ++ windowFields @@ -281,9 +289,7 @@ package object query { val nonAggregatedFields = select.fields.filterNot(f => f.hasAggregation) val invalidFields = nonAggregatedFields.filterNot(f => - buckets.exists(b => - b.name == f.fieldAlias.map(_.alias).getOrElse(f.sourceField.replace(".", "_")) - ) + buckets.exists(b => b.name == f.fieldAlias.map(_.alias).getOrElse(f.sourceField)) ) if (invalidFields.nonEmpty) { Left( diff --git a/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala b/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala index 9fcfdfe0..d876caa4 100644 --- a/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala +++ b/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala @@ -1591,6 +1591,17 @@ class ParserSpec extends AnyFlatSpec with Matchers { } } + it should "parse DESC TABLE statement" in { + val sql = "DESC TABLE users" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case DescribeTable("users") => + case _ => fail("Expected DescTable") + } + } + behavior of "Parser DDL with Pipeline Statements" it should "parse CREATE OR REPLACE PIPELINE" in { @@ -2815,4 +2826,146 @@ class ParserSpec extends AnyFlatSpec with Matchers { } } + // ── Double-quoted identifiers (ANSI SQL-92) ────────────────────────────── + + it should "parse Superset query with double-quoted alias and ORDER BY" in { + val sql = + """SELECT country AS country, sum(total_price) AS "Revenue" FROM ecommerce GROUP BY country ORDER BY "Revenue" DESC LIMIT 10""" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case ss: SingleSearch => + val fields = ss.select.fields + fields should have size 2 + fields(1).fieldAlias.map(_.alias) shouldBe Some("Revenue") + ss.orderBy.get.sorts.head.field.aliasOrName shouldBe "Revenue" + case _ => fail("Expected SingleSearch") + } + } + + it should "parse Superset query with double-quoted alias containing spaces" in { + val sql = + """SELECT customer_name AS customer_name, sum(total_price) AS "Total Spend", COUNT(*) AS "Orders" FROM ecommerce GROUP BY customer_name ORDER BY "Total Spend" DESC LIMIT 10""" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case ss: SingleSearch => + val fields = ss.select.fields + fields(1).fieldAlias.map(_.alias) shouldBe Some("Total Spend") + fields(2).fieldAlias.map(_.alias) shouldBe Some("Orders") + ss.orderBy.get.sorts.head.field.aliasOrName shouldBe "Total Spend" + case _ => fail("Expected SingleSearch") + } + } + + it should "parse double-quoted identifier in SELECT" in { + val sql = """SELECT "col" FROM t""" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case ss: SingleSearch => + ss.select.fields.head.identifier.name shouldBe "col" + case _ => fail("Expected SingleSearch") + } + } + + it should "parse double-quoted identifier in WHERE" in { + val sql = """SELECT * FROM t WHERE "col" > 10""" + val result = Parser(sql) + result.isRight shouldBe true + } + + it should "parse double-quoted identifier in GROUP BY" in { + val sql = """SELECT "col", count(*) AS cnt FROM t GROUP BY "col"""" + val result = Parser(sql) + result.isRight shouldBe true + } + + it should "parse reserved word as double-quoted identifier" in { + val sql = """SELECT "select" FROM t""" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case ss: SingleSearch => + ss.select.fields.head.identifier.name shouldBe "select" + case _ => fail("Expected SingleSearch") + } + } + + // ── Computed aliases for unnamed expression columns (Issue #001) ─────────── + + it should "generate computed aliases for aggregate functions without explicit alias" in { + val sql = """SELECT COUNT(*), SUM(quantity) FROM t""" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case ss: SingleSearch => + // SQL round-trip: no AS __cN injected + stmt.sql shouldBe sql + // computed aliases via fieldsWithComputedAliases + val computed = ss.select.fieldsWithComputedAliases + computed(0).fieldAlias.map(_.alias) shouldBe Some("__c1") + computed(1).fieldAlias.map(_.alias) shouldBe Some("__c2") + // original fields unchanged + ss.select.fields(0).fieldAlias shouldBe None + ss.select.fields(1).fieldAlias shouldBe None + case _ => fail("Expected SingleSearch") + } + } + + it should "preserve explicit aliases and only compute for unnamed expressions" in { + val sql = """SELECT name, COUNT(*) AS total FROM t GROUP BY name""" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case ss: SingleSearch => + stmt.sql shouldBe sql + val computed = ss.select.fieldsWithComputedAliases + // name: simple column, no computed alias + computed(0).fieldAlias shouldBe None + // COUNT(*) AS total: explicit alias preserved + computed(1).fieldAlias.map(_.alias) shouldBe Some("total") + case _ => fail("Expected SingleSearch") + } + } + + it should "generate computed alias for unnamed expression but not for named columns" in { + val sql = """SELECT name, COUNT(*) FROM t GROUP BY name""" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case ss: SingleSearch => + stmt.sql shouldBe sql + val computed = ss.select.fieldsWithComputedAliases + // name: simple column, no alias + computed(0).fieldAlias shouldBe None + // COUNT(*): expression, gets __c2 + computed(1).fieldAlias.map(_.alias) shouldBe Some("__c2") + case _ => fail("Expected SingleSearch") + } + } + + it should "generate computed alias for arithmetic expression" in { + val sql = """SELECT unit_price * quantity FROM t""" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case ss: SingleSearch => + stmt.sql shouldBe sql + val computed = ss.select.fieldsWithComputedAliases + computed(0).fieldAlias.map(_.alias) shouldBe Some("__c1") + // original unchanged + ss.select.fields(0).fieldAlias shouldBe None + case _ => fail("Expected SingleSearch") + } + } + } diff --git a/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala b/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala index 63a42cbb..6e914c6b 100644 --- a/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala +++ b/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala @@ -16,7 +16,7 @@ package app.softnetwork.elastic.client.repl -import app.softnetwork.elastic.client.result.{DmlResult, OutputFormat, QueryRows} +import app.softnetwork.elastic.client.result.{DmlResult, OutputFormat, QueryRows, QueryStructured} import app.softnetwork.elastic.scalatest.ElasticTestKit import java.time.LocalDate @@ -704,6 +704,140 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { assertSelectResult(System.nanoTime(), executeSync(sql)) } + it should "not leak internal _doc_count columns in GROUP BY results" in { + val sql = + """SELECT profile.city AS city, + | COUNT(*) AS cnt, + | AVG(age) AS avg_age + |FROM dql_users + |GROUP BY profile.city + |HAVING COUNT(*) >= 1""".stripMargin + + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + rows.foreach { row => + row.keys.filter(_.endsWith("_doc_count")) shouldBe empty + } + } + + it should "support double-quoted identifiers (ANSI SQL-92, Superset compatibility)" in { + val sql = + """SELECT profile.city AS "City", + | COUNT(*) AS "Total", + | AVG(age) AS "Avg Age" + |FROM dql_users + |GROUP BY profile.city + |ORDER BY "Total" DESC""".stripMargin + + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + } + + it should "generate computed aliases for unnamed expression columns" in { + val sql = + """SELECT profile.city, + | COUNT(*), + | AVG(age) + |FROM dql_users + |GROUP BY profile.city""".stripMargin + + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + // Verify computed aliases are used as column names + val firstRow = rows.head + firstRow.keys should contain("profile.city") + firstRow.keys should contain("__c2") // COUNT(*) + firstRow.keys should contain("__c3") // AVG(age) + } + + // Issue #006 — Result column order must match SELECT clause order + it should "preserve SELECT column order: aggregation first, bucket second" in { + val sql = + """SELECT COUNT(*), + | profile.city + |FROM dql_users + |GROUP BY profile.city""".stripMargin + + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + // Column order must match SELECT: __c1 (COUNT(*)), then profile.city + val columnOrder = rows.head.keys.toSeq + columnOrder.indexOf("__c1") should be < columnOrder.indexOf("profile.city") + } + + it should "preserve SELECT column order: bucket first, aliased aggregation second" in { + val sql = + """SELECT profile.city, + | COUNT(*) AS cnt + |FROM dql_users + |GROUP BY profile.city""".stripMargin + + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + // Column order must match SELECT: profile.city, then cnt + val columnOrder = rows.head.keys.toSeq + columnOrder.indexOf("profile.city") should be < columnOrder.indexOf("cnt") + } + + it should "preserve SELECT column order: multiple buckets and mixed aggregations" in { + // Need a table with two groupable columns — use dql_users with name + profile.city + val sql = + """SELECT profile.city, + | COUNT(*) AS cnt, + | AVG(age) + |FROM dql_users + |GROUP BY profile.city""".stripMargin + + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + // Column order must match SELECT: profile.city, cnt, __c3 + val columnOrder = rows.head.keys.toSeq + columnOrder.indexOf("profile.city") should be < columnOrder.indexOf("cnt") + columnOrder.indexOf("cnt") should be < columnOrder.indexOf("__c3") + } + it should "support arithmetic, IN, BETWEEN, IS NULL, LIKE, RLIKE" in { val sql = """SELECT id,