From b4ac4624f9266efd6f0559e7fffbad26757a7387 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Sun, 15 Mar 2026 10:16:18 +0100 Subject: [PATCH 01/10] feat: add demo charts and dashboard creation for Elasticsearch with Superset + duckdb demo --- README.md | 18 +- demo/.env | 5 + demo/README.md | 136 ++++++++ demo/docker-compose.yml | 193 +++++++++++ demo/duckdb/Dockerfile | 14 + demo/duckdb/demo.py | 185 ++++++++++ demo/init/load-ecommerce-data.sh | 96 ++++++ demo/superset/Dockerfile | 16 + demo/superset/dashboards/create_dashboards.py | 319 ++++++++++++++++++ demo/superset/dashboards/datasources.yaml | 4 + demo/superset/elastic_dialect.py | 14 + demo/superset/init-superset.sh | 88 +++++ demo/superset/setup_dialect.py | 13 + demo/superset/superset_config.py | 19 ++ 14 files changed, 1111 insertions(+), 9 deletions(-) create mode 100644 demo/.env create mode 100644 demo/README.md create mode 100644 demo/docker-compose.yml create mode 100644 demo/duckdb/Dockerfile create mode 100644 demo/duckdb/demo.py create mode 100755 demo/init/load-ecommerce-data.sh create mode 100644 demo/superset/Dockerfile create mode 100644 demo/superset/dashboards/create_dashboards.py create mode 100644 demo/superset/dashboards/datasources.yaml create mode 100644 demo/superset/elastic_dialect.py create mode 100755 demo/superset/init-superset.sh create mode 100644 demo/superset/setup_dialect.py create mode 100644 demo/superset/superset_config.py diff --git a/README.md b/README.md index d908a011..c29e4699 100644 --- a/README.md +++ b/README.md @@ -172,10 +172,10 @@ Download the self-contained fat JAR for your Elasticsearch version: | Elasticsearch Version | Artifact | |-----------------------|----------------------------------------| -| ES 6.x | `softclient4es6-jdbc-driver-0.1.1.jar` | -| ES 7.x | `softclient4es7-jdbc-driver-0.1.1.jar` | -| ES 8.x | `softclient4es8-jdbc-driver-0.1.1.jar` | -| ES 9.x | `softclient4es9-jdbc-driver-0.1.1.jar` | +| ES 6.x | `softclient4es6-jdbc-driver-0.1.2.jar` | +| ES 7.x | `softclient4es7-jdbc-driver-0.1.2.jar` | +| ES 8.x | `softclient4es8-jdbc-driver-0.1.2.jar` | +| ES 9.x | `softclient4es9-jdbc-driver-0.1.2.jar` | ```text JDBC URL: jdbc:elastic://localhost:9200 @@ -190,20 +190,20 @@ Driver class: app.softnetwork.elastic.jdbc.ElasticDriver app.softnetwork.elastic softclient4es8-jdbc-driver - 0.1.1 + 0.1.2 ``` **Gradle:** ```groovy -implementation 'app.softnetwork.elastic:softclient4es8-jdbc-driver:0.1.1' +implementation 'app.softnetwork.elastic:softclient4es8-jdbc-driver:0.1.2' ``` **sbt:** ```scala -libraryDependencies += "app.softnetwork.elastic" % "softclient4es8-jdbc-driver" % "0.1.1" +libraryDependencies += "app.softnetwork.elastic" % "softclient4es8-jdbc-driver" % "0.1.2" ``` The JDBC driver JARs are Scala-version-independent (no `_2.12` or `_2.13` suffix) and include all required dependencies. @@ -233,9 +233,9 @@ resolvers += "Softnetwork" at "https://softnetwork.jfrog.io/artifactory/releases // Choose your Elasticsearch version libraryDependencies += "app.softnetwork.elastic" %% "softclient4es8-java-client" % "0.18.0" // Add the community extensions for materialized views (optional) -libraryDependencies += "app.softnetwork.elastic" %% "softclient4es-community-extensions" % "0.1.1" +libraryDependencies += "app.softnetwork.elastic" %% "softclient4es-community-extensions" % "0.1.3" // Add the JDBC driver if you want to use it from Scala (optional) -libraryDependencies += "app.softnetwork.elastic" %% "softclient4es-jdbc-driver" % "0.1.1" +libraryDependencies += "app.softnetwork.elastic" %% "softclient4es-jdbc-driver" % "0.1.2" ``` ```scala diff --git a/demo/.env b/demo/.env new file mode 100644 index 00000000..b41957cf --- /dev/null +++ b/demo/.env @@ -0,0 +1,5 @@ +# Elasticsearch version (full version) +ES_VERSION=8.18.3 + +# Elasticsearch major version (for Flight SQL server image tag) +ES_MAJOR_VERSION=8 diff --git a/demo/README.md b/demo/README.md new file mode 100644 index 00000000..d4963356 --- /dev/null +++ b/demo/README.md @@ -0,0 +1,136 @@ +# SoftClient4ES Integration Demos + +Docker Compose demos showcasing Elasticsearch SQL access via [Arrow Flight SQL](https://arrow.apache.org/docs/format/FlightSql.html). + +## Prerequisites + +- Docker and Docker Compose v2+ +- ~4 GB free RAM (Elasticsearch + demo services) + +## Architecture + +``` + ┌─────────────────┐ + │ Elasticsearch │ + └────────┬────────┘ + │ + ┌────────▼────────┐ + │ Arrow Flight │ + │ SQL Server │ + └──────┬───┬──────┘ + │ │ + ┌────▼┐ ┌▼─────────┐ + │Duck │ │ Superset │ + │ DB │ │ │ + └─────┘ └──────────┘ +``` + +All profiles share a common Elasticsearch instance with preloaded e-commerce sample data (20 orders). + +## Profiles + +### Superset — BI Dashboarding via Flight SQL + +```bash +docker compose --profile superset-flight up +``` + +| Service | URL | +|---------------|-----------------------| +| Superset UI | http://localhost:8088 | +| Elasticsearch | http://localhost:9200 | + +**Login:** `admin` / `admin123` + +The init script automatically: +- Bootstraps the Superset database +- Creates the admin user +- Configures an Arrow Flight SQL datasource +- Registers the `ecommerce` table as a dataset +- Creates 6 demo charts and an **E-Commerce Analytics** dashboard + +Navigate to **Dashboards** to see the pre-built analytics, **SQL Lab** to run ad-hoc queries, or **Datasets** to explore the data model. + +### DuckDB — In-Process Analytics + +```bash +docker compose --profile duckdb up +``` + +Runs a Python demo script that: +1. Connects to the Flight SQL server via `adbc_driver_flightsql` +2. Fetches Arrow tables from Elasticsearch +3. Registers them in DuckDB for local analytical queries +4. Demonstrates zero-copy Arrow data flow: ES → Flight SQL → DuckDB + +Output is printed directly to the console. + +## Configuration + +Edit `.env` to change versions: + +```env +# Elasticsearch version (full) +ES_VERSION=8.18.3 + +# Major version (selects the Flight SQL server image) +ES_MAJOR_VERSION=8 +``` + +Available Flight SQL server images: +- `softnetwork/softclient4es6-arrow-flight-server:latest` +- `softnetwork/softclient4es7-arrow-flight-server:latest` +- `softnetwork/softclient4es8-arrow-flight-server:latest` +- `softnetwork/softclient4es9-arrow-flight-server:latest` + +## Cleanup + +```bash +# Stop and remove containers +docker compose --profile superset-flight down +docker compose --profile duckdb down + +# Remove volumes (data) +docker compose --profile superset-flight down -v +docker compose --profile duckdb down -v +``` + +## Sample Data + +The `ecommerce` index contains 20 orders with the following fields: + +| Field | Type | Example | +|------------------|---------|----------------------| +| `order_id` | keyword | ORD-001 | +| `order_date` | date | 2025-01-15T10:30:00Z | +| `customer_name` | keyword | Alice Martin | +| `country` | keyword | France | +| `city` | keyword | Paris | +| `category` | keyword | Electronics | +| `product_name` | text | Wireless Headphones | +| `quantity` | integer | 2 | +| `unit_price` | double | 79.99 | +| `total_price` | double | 159.98 | +| `payment_method` | keyword | Credit Card | +| `status` | keyword | delivered | + +### Example Queries + +```sql +-- Revenue by country +SELECT country, COUNT(*) as orders, SUM(total_price) as revenue +FROM ecommerce +GROUP BY country +ORDER BY revenue DESC + +-- Top categories +SELECT category, SUM(total_price) as revenue +FROM ecommerce +GROUP BY category +ORDER BY revenue DESC + +-- Order status breakdown +SELECT status, COUNT(*) as ct +FROM ecommerce +GROUP BY status +``` diff --git a/demo/docker-compose.yml b/demo/docker-compose.yml new file mode 100644 index 00000000..e5dccc0c --- /dev/null +++ b/demo/docker-compose.yml @@ -0,0 +1,193 @@ +networks: + elastic-demo: + name: elastic-demo + +volumes: + elasticsearch-data: + superset-db: + superset-home: + +x-common-env: &common-env + ELASTIC_HOST: elasticsearch + ELASTIC_PORT: "9200" + ELASTIC_AUTH_METHOD: noauth + +services: + + # ────────────────────────────────────────────── + # Base services (always started) + # ────────────────────────────────────────────── + + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:${ES_VERSION:-8.18.3} + container_name: es-demo + environment: + - cluster.name=demo + - node.name=es01 + - discovery.type=single-node + - xpack.security.enabled=false + - xpack.security.http.ssl.enabled=false + - xpack.security.transport.ssl.enabled=false + - xpack.license.self_generated.type=trial + - bootstrap.memory_lock=true + - ES_JAVA_OPTS=-Xms512m -Xmx512m + ulimits: + memlock: + soft: -1 + hard: -1 + volumes: + - elasticsearch-data:/usr/share/elasticsearch/data + ports: + - "9200:9200" + networks: + - elastic-demo + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:9200/_cluster/health?wait_for_status=green&timeout=1s"] + interval: 5s + timeout: 3s + start_period: 30s + + es-init: + image: curlimages/curl:latest + container_name: es-init + depends_on: + elasticsearch: + condition: service_healthy + volumes: + - ./init:/init:ro + entrypoint: ["/bin/sh", "/init/load-ecommerce-data.sh"] + environment: + ES_HOST: elasticsearch + ES_PORT: "9200" + networks: + - elastic-demo + + # ────────────────────────────────────────────── + # Profile: superset-flight + # ────────────────────────────────────────────── + + flight-sql-superset: + image: softnetwork/softclient4es${ES_MAJOR_VERSION:-8}-arrow-flight-sql:latest + pull_policy: always + container_name: flight-sql-superset + profiles: ["superset-flight"] + depends_on: + es-init: + condition: service_completed_successfully + environment: + <<: *common-env + ports: + - "32010:32010" + networks: + - elastic-demo + healthcheck: + test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/32010'"] + interval: 5s + timeout: 3s + start_period: 15s + + superset-db: + image: postgres:16-alpine + container_name: superset-db + profiles: ["superset-flight"] + environment: + POSTGRES_USER: superset + POSTGRES_PASSWORD: superset + POSTGRES_DB: superset + volumes: + - superset-db:/var/lib/postgresql/data + networks: + - elastic-demo + healthcheck: + test: ["CMD-SHELL", "pg_isready -U superset"] + interval: 5s + timeout: 3s + + superset: + build: + context: ./superset + dockerfile: Dockerfile + container_name: superset + profiles: ["superset-flight"] + depends_on: + superset-db: + condition: service_healthy + flight-sql-superset: + condition: service_healthy + environment: + SUPERSET_SECRET_KEY: "demo-secret-key-change-in-production" + SQLALCHEMY_DATABASE_URI: "postgresql+psycopg2://superset:superset@superset-db:5432/superset" + FLIGHT_SQL_HOST: flight-sql-superset + FLIGHT_SQL_PORT: "32010" + ports: + - "8088:8088" + volumes: + - superset-home:/app/superset_home + networks: + - elastic-demo + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:8088/health"] + interval: 10s + timeout: 5s + start_period: 60s + + superset-init: + build: + context: ./superset + dockerfile: Dockerfile + container_name: superset-init + profiles: ["superset-flight"] + depends_on: + superset: + condition: service_healthy + environment: + SUPERSET_SECRET_KEY: "demo-secret-key-change-in-production" + SQLALCHEMY_DATABASE_URI: "postgresql+psycopg2://superset:superset@superset-db:5432/superset" + FLIGHT_SQL_HOST: flight-sql-superset + FLIGHT_SQL_PORT: "32010" + volumes: + - ./superset/init-superset.sh:/init-superset.sh:ro + - ./superset/dashboards:/dashboards:ro + - superset-home:/app/superset_home + entrypoint: ["/bin/sh", "/init-superset.sh"] + networks: + - elastic-demo + + # ────────────────────────────────────────────── + # Profile: duckdb + # ────────────────────────────────────────────── + + flight-sql-duckdb: + image: softnetwork/softclient4es${ES_MAJOR_VERSION:-8}-arrow-flight-sql:latest + pull_policy: always + container_name: flight-sql-duckdb + profiles: ["duckdb"] + depends_on: + es-init: + condition: service_completed_successfully + environment: + <<: *common-env + networks: + - elastic-demo + healthcheck: + test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/32010'"] + interval: 5s + timeout: 3s + start_period: 15s + + duckdb: + build: + context: ./duckdb + dockerfile: Dockerfile + container_name: duckdb-demo + profiles: ["duckdb"] + depends_on: + flight-sql-duckdb: + condition: service_healthy + volumes: + - ./duckdb/demo.py:/demo/demo.py:ro + environment: + FLIGHT_SQL_HOST: flight-sql-duckdb + FLIGHT_SQL_PORT: "32010" + networks: + - elastic-demo diff --git a/demo/duckdb/Dockerfile b/demo/duckdb/Dockerfile new file mode 100644 index 00000000..2c082bad --- /dev/null +++ b/demo/duckdb/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.12-slim + +RUN pip install --no-cache-dir \ + duckdb \ + adbc-driver-flightsql \ + adbc-driver-manager \ + pyarrow \ + tabulate + +WORKDIR /demo + +COPY demo.py /demo/demo.py + +ENTRYPOINT ["python", "/demo/demo.py"] diff --git a/demo/duckdb/demo.py b/demo/duckdb/demo.py new file mode 100644 index 00000000..b73221fc --- /dev/null +++ b/demo/duckdb/demo.py @@ -0,0 +1,185 @@ +""" +DuckDB + ADBC Flight SQL Demo +============================== +Demonstrates DuckDB querying Elasticsearch via Arrow Flight SQL, +using zero-copy Arrow columnar transport. +""" + +import os +import sys +import time + +import adbc_driver_flightsql.dbapi as flight_sql +import duckdb +from tabulate import tabulate + + +def wait_for_flight_sql(host: str, port: int, max_retries: int = 30) -> None: + """Wait for the Arrow Flight SQL server to be ready.""" + uri = f"grpc://{host}:{port}" + print(f"Waiting for Flight SQL server at {uri}...") + for attempt in range(max_retries): + try: + conn = flight_sql.connect(uri) + conn.close() + print("Flight SQL server is ready.\n") + return + except Exception: + time.sleep(2) + print("ERROR: Flight SQL server not available after retries.", file=sys.stderr) + sys.exit(1) + + +def run_demo(host: str, port: int) -> None: + """Run the DuckDB + Flight SQL demo queries.""" + uri = f"grpc://{host}:{port}" + + # Connect to Flight SQL via ADBC + flight_conn = flight_sql.connect(uri) + + # Connect DuckDB (in-memory) + db = duckdb.connect() + + print("=" * 60) + print(" DuckDB + Arrow Flight SQL + Elasticsearch Demo") + print("=" * 60) + + queries = [ + ( + "1. Browse all orders (SELECT *)", + "SELECT * FROM ecommerce ORDER BY order_date LIMIT 10", + ), + ( + "2. Revenue by country", + """ + SELECT country, COUNT(*) as order_count, SUM(total_price) as revenue + FROM ecommerce + GROUP BY country + ORDER BY revenue DESC + """, + ), + ( + "3. Revenue by category", + """ + SELECT category, COUNT(*) as order_count, SUM(total_price) as revenue + FROM ecommerce + GROUP BY category + ORDER BY revenue DESC + """, + ), + ( + "4. Average order value by payment method", + """ + SELECT payment_method, COUNT(*) as orders, AVG(total_price) as avg_order_value + FROM ecommerce + GROUP BY payment_method + ORDER BY avg_order_value DESC + """, + ), + ( + "5. Order status distribution", + """ + SELECT status, COUNT(*) as ct + FROM ecommerce + GROUP BY status + ORDER BY ct DESC + """, + ), + ( + "6. Top 5 customers by spend", + """ + SELECT customer_name, country, SUM(total_price) as total_spend + FROM ecommerce + GROUP BY customer_name, country + ORDER BY total_spend DESC + LIMIT 5 + """, + ), + ] + + for title, sql in queries: + print(f"\n{'─' * 60}") + print(f" {title}") + print(f"{'─' * 60}") + print(f" SQL: {' '.join(sql.split())}\n") + + try: + # Execute via Flight SQL, get Arrow table + cursor = flight_conn.cursor() + cursor.execute(sql.strip()) + arrow_table = cursor.fetch_arrow_table() + cursor.close() + + # Register Arrow table in DuckDB and query it + db.register("result", arrow_table) + result = db.execute("SELECT * FROM result").fetchall() + columns = [desc[0] for desc in db.description] + + print(tabulate(result, headers=columns, tablefmt="rounded_grid")) + print(f" ({len(result)} rows)") + + db.unregister("result") + + except Exception as e: + print(f" ERROR: {e}") + + # DuckDB-specific analytics on top of Flight SQL data + print(f"\n{'─' * 60}") + print(" 7. DuckDB cross-query analytics (join two Flight SQL results)") + print(f"{'─' * 60}") + + try: + # Fetch full dataset once + cursor = flight_conn.cursor() + cursor.execute("SELECT * FROM ecommerce") + full_data = cursor.fetch_arrow_table() + cursor.close() + + db.register("ecommerce", full_data) + + # Run a DuckDB-native analytical query + analytics_sql = """ + WITH country_stats AS ( + SELECT + country, + COUNT(*) as orders, + SUM(total_price) as revenue, + AVG(total_price) as avg_order + FROM ecommerce + GROUP BY country + ) + SELECT + country, + orders, + ROUND(revenue, 2) as revenue, + ROUND(avg_order, 2) as avg_order, + ROUND(100.0 * revenue / SUM(revenue) OVER (), 1) as pct_revenue + FROM country_stats + ORDER BY revenue DESC + """ + print(f" SQL (DuckDB-native on Arrow data):\n {' '.join(analytics_sql.split())}\n") + + result = db.execute(analytics_sql).fetchall() + columns = [desc[0] for desc in db.description] + print(tabulate(result, headers=columns, tablefmt="rounded_grid")) + print(f" ({len(result)} rows)") + + db.unregister("ecommerce") + + except Exception as e: + print(f" ERROR: {e}") + + flight_conn.close() + db.close() + + print(f"\n{'=' * 60}") + print(" Demo complete!") + print(f"{'=' * 60}\n") + + +if __name__ == "__main__": + host = os.environ.get("FLIGHT_SQL_HOST", os.environ.get("ES_HOST", "flight-sql-duckdb")) + port = int(os.environ.get("FLIGHT_SQL_PORT", "32010")) + + wait_for_flight_sql(host, port) + run_demo(host, port) diff --git a/demo/init/load-ecommerce-data.sh b/demo/init/load-ecommerce-data.sh new file mode 100755 index 00000000..9f3982aa --- /dev/null +++ b/demo/init/load-ecommerce-data.sh @@ -0,0 +1,96 @@ +#!/bin/sh +set -e + +ES_URL="http://${ES_HOST}:${ES_PORT}" + +echo "==> Waiting for Elasticsearch at ${ES_URL}..." +until curl -sf "${ES_URL}/_cluster/health?wait_for_status=green&timeout=1s" > /dev/null 2>&1; do + sleep 2 +done +echo "==> Elasticsearch is ready." + +# Check if index already exists +if curl -sf "${ES_URL}/ecommerce" > /dev/null 2>&1; then + echo "==> Index 'ecommerce' already exists, skipping data load." + exit 0 +fi + +echo "==> Creating 'ecommerce' index with mappings..." +curl -sf -X PUT "${ES_URL}/ecommerce" -H 'Content-Type: application/json' -d '{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0 + }, + "mappings": { + "properties": { + "order_id": { "type": "keyword" }, + "order_date": { "type": "date" }, + "customer_name": { "type": "keyword" }, + "customer_email": { "type": "keyword" }, + "country": { "type": "keyword" }, + "city": { "type": "keyword" }, + "category": { "type": "keyword" }, + "product_name": { "type": "text", "fields": { "keyword": { "type": "keyword" } } }, + "quantity": { "type": "integer" }, + "unit_price": { "type": "double" }, + "total_price": { "type": "double" }, + "payment_method": { "type": "keyword" }, + "status": { "type": "keyword" } + } + } +}' +echo "" + +echo "==> Loading e-commerce sample data..." +curl -sf -X POST "${ES_URL}/_bulk" -H 'Content-Type: application/x-ndjson' -d ' +{"index":{"_index":"ecommerce","_id":"1"}} +{"order_id":"ORD-001","order_date":"2025-01-15T10:30:00Z","customer_name":"Alice Martin","customer_email":"alice@example.com","country":"France","city":"Paris","category":"Electronics","product_name":"Wireless Headphones","quantity":2,"unit_price":79.99,"total_price":159.98,"payment_method":"Credit Card","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"2"}} +{"order_id":"ORD-002","order_date":"2025-01-15T14:20:00Z","customer_name":"Bob Smith","customer_email":"bob@example.com","country":"United States","city":"New York","category":"Books","product_name":"The Art of SQL","quantity":1,"unit_price":45.00,"total_price":45.00,"payment_method":"PayPal","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"3"}} +{"order_id":"ORD-003","order_date":"2025-01-16T09:15:00Z","customer_name":"Claire Dubois","customer_email":"claire@example.com","country":"France","city":"Lyon","category":"Clothing","product_name":"Winter Jacket","quantity":1,"unit_price":129.50,"total_price":129.50,"payment_method":"Credit Card","status":"shipped"} +{"index":{"_index":"ecommerce","_id":"4"}} +{"order_id":"ORD-004","order_date":"2025-01-16T11:45:00Z","customer_name":"David Chen","customer_email":"david@example.com","country":"Germany","city":"Berlin","category":"Electronics","product_name":"USB-C Hub","quantity":3,"unit_price":35.99,"total_price":107.97,"payment_method":"Credit Card","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"5"}} +{"order_id":"ORD-005","order_date":"2025-01-17T08:00:00Z","customer_name":"Eva Rossi","customer_email":"eva@example.com","country":"Italy","city":"Rome","category":"Home","product_name":"Ceramic Vase","quantity":2,"unit_price":55.00,"total_price":110.00,"payment_method":"Bank Transfer","status":"processing"} +{"index":{"_index":"ecommerce","_id":"6"}} +{"order_id":"ORD-006","order_date":"2025-01-17T16:30:00Z","customer_name":"Frank Mueller","customer_email":"frank@example.com","country":"Germany","city":"Munich","category":"Electronics","product_name":"Mechanical Keyboard","quantity":1,"unit_price":149.99,"total_price":149.99,"payment_method":"PayPal","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"7"}} +{"order_id":"ORD-007","order_date":"2025-01-18T12:00:00Z","customer_name":"Grace Kim","customer_email":"grace@example.com","country":"South Korea","city":"Seoul","category":"Books","product_name":"Elasticsearch in Action","quantity":2,"unit_price":52.00,"total_price":104.00,"payment_method":"Credit Card","status":"shipped"} +{"index":{"_index":"ecommerce","_id":"8"}} +{"order_id":"ORD-008","order_date":"2025-01-18T15:45:00Z","customer_name":"Henri Dupont","customer_email":"henri@example.com","country":"France","city":"Marseille","category":"Clothing","product_name":"Running Shoes","quantity":1,"unit_price":89.99,"total_price":89.99,"payment_method":"Credit Card","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"9"}} +{"order_id":"ORD-009","order_date":"2025-01-19T10:20:00Z","customer_name":"Isabella Garcia","customer_email":"isabella@example.com","country":"Spain","city":"Madrid","category":"Home","product_name":"LED Desk Lamp","quantity":2,"unit_price":42.50,"total_price":85.00,"payment_method":"PayPal","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"10"}} +{"order_id":"ORD-010","order_date":"2025-01-19T13:10:00Z","customer_name":"James Wilson","customer_email":"james@example.com","country":"United Kingdom","city":"London","category":"Electronics","product_name":"Portable SSD 1TB","quantity":1,"unit_price":99.99,"total_price":99.99,"payment_method":"Credit Card","status":"processing"} +{"index":{"_index":"ecommerce","_id":"11"}} +{"order_id":"ORD-011","order_date":"2025-01-20T09:00:00Z","customer_name":"Keiko Tanaka","customer_email":"keiko@example.com","country":"Japan","city":"Tokyo","category":"Books","product_name":"Data Engineering with Apache Arrow","quantity":1,"unit_price":59.99,"total_price":59.99,"payment_method":"Credit Card","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"12"}} +{"order_id":"ORD-012","order_date":"2025-01-20T14:30:00Z","customer_name":"Luca Bianchi","customer_email":"luca@example.com","country":"Italy","city":"Milan","category":"Clothing","product_name":"Cashmere Scarf","quantity":3,"unit_price":65.00,"total_price":195.00,"payment_method":"Bank Transfer","status":"shipped"} +{"index":{"_index":"ecommerce","_id":"13"}} +{"order_id":"ORD-013","order_date":"2025-01-21T11:15:00Z","customer_name":"Maria Santos","customer_email":"maria@example.com","country":"Brazil","city":"Sao Paulo","category":"Electronics","product_name":"Wireless Mouse","quantity":4,"unit_price":29.99,"total_price":119.96,"payment_method":"Credit Card","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"14"}} +{"order_id":"ORD-014","order_date":"2025-01-21T17:00:00Z","customer_name":"Nils Johansson","customer_email":"nils@example.com","country":"Sweden","city":"Stockholm","category":"Home","product_name":"Minimalist Wall Clock","quantity":1,"unit_price":75.00,"total_price":75.00,"payment_method":"PayPal","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"15"}} +{"order_id":"ORD-015","order_date":"2025-01-22T08:45:00Z","customer_name":"Olivia Brown","customer_email":"olivia@example.com","country":"United States","city":"San Francisco","category":"Electronics","product_name":"4K Monitor","quantity":1,"unit_price":349.99,"total_price":349.99,"payment_method":"Credit Card","status":"processing"} +{"index":{"_index":"ecommerce","_id":"16"}} +{"order_id":"ORD-016","order_date":"2025-01-22T12:30:00Z","customer_name":"Pierre Moreau","customer_email":"pierre@example.com","country":"France","city":"Toulouse","category":"Books","product_name":"Distributed Systems Design","quantity":1,"unit_price":48.50,"total_price":48.50,"payment_method":"Credit Card","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"17"}} +{"order_id":"ORD-017","order_date":"2025-01-23T10:00:00Z","customer_name":"Qin Wei","customer_email":"qin@example.com","country":"China","city":"Shanghai","category":"Clothing","product_name":"Silk Tie","quantity":2,"unit_price":38.00,"total_price":76.00,"payment_method":"Bank Transfer","status":"shipped"} +{"index":{"_index":"ecommerce","_id":"18"}} +{"order_id":"ORD-018","order_date":"2025-01-23T15:20:00Z","customer_name":"Rachel Green","customer_email":"rachel@example.com","country":"United States","city":"Chicago","category":"Home","product_name":"French Press Coffee Maker","quantity":1,"unit_price":34.99,"total_price":34.99,"payment_method":"PayPal","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"19"}} +{"order_id":"ORD-019","order_date":"2025-01-24T09:30:00Z","customer_name":"Stefan Braun","customer_email":"stefan@example.com","country":"Germany","city":"Hamburg","category":"Electronics","product_name":"Noise Cancelling Earbuds","quantity":1,"unit_price":199.99,"total_price":199.99,"payment_method":"Credit Card","status":"delivered"} +{"index":{"_index":"ecommerce","_id":"20"}} +{"order_id":"ORD-020","order_date":"2025-01-24T14:00:00Z","customer_name":"Tanya Petrova","customer_email":"tanya@example.com","country":"Russia","city":"Moscow","category":"Books","product_name":"Apache Arrow Cookbook","quantity":2,"unit_price":41.00,"total_price":82.00,"payment_method":"Credit Card","status":"processing"} +' +echo "" + +echo "==> Refreshing index..." +curl -sf -X POST "${ES_URL}/ecommerce/_refresh" +echo "" + +echo "==> Verifying data load..." +COUNT=$(curl -sf "${ES_URL}/ecommerce/_count" | sed 's/.*"count":\([0-9]*\).*/\1/') +echo "==> Loaded ${COUNT} documents into 'ecommerce' index." +echo "==> E-commerce sample data ready." diff --git a/demo/superset/Dockerfile b/demo/superset/Dockerfile new file mode 100644 index 00000000..7e4079df --- /dev/null +++ b/demo/superset/Dockerfile @@ -0,0 +1,16 @@ +FROM apache/superset:latest + +USER root + +COPY elastic_dialect.py /tmp/dialect/elastic_dialect.py +COPY setup_dialect.py /tmp/dialect/setup.py + +RUN /app/.venv/bin/python -m ensurepip && \ + /app/.venv/bin/python -m pip install --no-cache-dir \ + flightsql-dbapi \ + psycopg2-binary && \ + /app/.venv/bin/python -m pip install --no-cache-dir /tmp/dialect/ + +USER superset + +COPY superset_config.py /app/pythonpath/superset_config.py diff --git a/demo/superset/dashboards/create_dashboards.py b/demo/superset/dashboards/create_dashboards.py new file mode 100644 index 00000000..9375e7b6 --- /dev/null +++ b/demo/superset/dashboards/create_dashboards.py @@ -0,0 +1,319 @@ +""" +Create demo charts and dashboards in Superset via REST API. +Runs inside the Superset container after init. +""" + +import json +import sys +import time + +import requests + +SUPERSET_URL = "http://superset:8088" +USERNAME = "admin" +PASSWORD = "admin123" + + +def authenticate(session): + """Authenticate and return access token + CSRF token.""" + # Get CSRF token + resp = session.get(f"{SUPERSET_URL}/api/v1/security/csrf_token/") + + # Login + resp = session.post( + f"{SUPERSET_URL}/api/v1/security/login", + json={"username": USERNAME, "password": PASSWORD, "provider": "db", "refresh": True}, + ) + resp.raise_for_status() + access_token = resp.json()["access_token"] + session.headers.update({"Authorization": f"Bearer {access_token}"}) + + # Get CSRF token (again, now authenticated) + resp = session.get(f"{SUPERSET_URL}/api/v1/security/csrf_token/") + csrf_token = resp.json()["result"] + session.headers.update({"X-CSRFToken": csrf_token}) + + return session + + +def get_dataset_id(session, table_name="ecommerce"): + """Find the dataset ID for the given table name.""" + resp = session.get(f"{SUPERSET_URL}/api/v1/dataset/", params={"q": json.dumps({"filters": [{"col": "table_name", "opr": "eq", "value": table_name}]})}) + resp.raise_for_status() + results = resp.json().get("result", []) + if not results: + return None + return results[0]["id"] + + +def create_chart(session, dataset_id, name, viz_type, params): + """Create a chart and return its ID.""" + params["viz_type"] = viz_type + params["datasource"] = f"{dataset_id}__table" + + payload = { + "datasource_id": dataset_id, + "datasource_type": "table", + "slice_name": name, + "viz_type": viz_type, + "params": json.dumps(params), + } + + resp = session.post(f"{SUPERSET_URL}/api/v1/chart/", json=payload) + if resp.status_code == 422: + print(f" (chart '{name}' may already exist)") + return None + resp.raise_for_status() + chart_id = resp.json()["id"] + print(f" Created chart '{name}' (id={chart_id})") + return chart_id + + +def simple_metric(column, aggregate, label): + """Build a SIMPLE metric definition.""" + return { + "expressionType": "SIMPLE", + "column": {"column_name": column}, + "aggregate": aggregate, + "label": label, + } + + +def sql_metric(expression, label): + """Build a SQL metric definition.""" + return { + "expressionType": "SQL", + "sqlExpression": expression, + "label": label, + } + + +def count_star_metric(label="Orders"): + """COUNT(*) metric.""" + return { + "expressionType": "SQL", + "sqlExpression": "COUNT(*)", + "label": label, + } + + +def create_all_charts(session, dataset_id): + """Create all demo charts, return list of (chart_id, name) tuples.""" + charts = [] + + # 1. Revenue by Country — horizontal bar + cid = create_chart(session, dataset_id, "Revenue by Country", "echarts_bar", { + "x_axis": "country", + "metrics": [simple_metric("total_price", "SUM", "Revenue")], + "groupby": [], + "order_desc": True, + "row_limit": 10, + "show_legend": False, + "x_axis_sort": "Revenue", + "x_axis_sort_asc": False, + }) + if cid: + charts.append(cid) + + # 2. Orders by Category — pie chart + cid = create_chart(session, dataset_id, "Orders by Category", "pie", { + "metric": count_star_metric(), + "groupby": ["category"], + "row_limit": 10, + "show_labels": True, + "label_type": "key_percent", + "number_format": "SMART_NUMBER", + "donut": False, + "show_legend": True, + }) + if cid: + charts.append(cid) + + # 3. Order Status Distribution — pie chart (donut) + cid = create_chart(session, dataset_id, "Order Status", "pie", { + "metric": count_star_metric(), + "groupby": ["status"], + "row_limit": 10, + "show_labels": True, + "label_type": "key_value", + "number_format": "SMART_NUMBER", + "donut": True, + "show_legend": True, + }) + if cid: + charts.append(cid) + + # 4. Avg Order Value by Payment Method — bar + cid = create_chart(session, dataset_id, "Avg Order Value by Payment", "echarts_bar", { + "x_axis": "payment_method", + "metrics": [simple_metric("total_price", "AVG", "Avg Order Value")], + "groupby": [], + "order_desc": True, + "row_limit": 10, + "show_legend": False, + "y_axis_format": ",.2f", + }) + if cid: + charts.append(cid) + + # 5. Revenue by Category — bar + cid = create_chart(session, dataset_id, "Revenue by Category", "echarts_bar", { + "x_axis": "category", + "metrics": [simple_metric("total_price", "SUM", "Revenue")], + "groupby": [], + "order_desc": True, + "row_limit": 10, + "show_legend": False, + "x_axis_sort": "Revenue", + "x_axis_sort_asc": False, + "y_axis_format": ",.2f", + }) + if cid: + charts.append(cid) + + # 6. Top Customers — table + cid = create_chart(session, dataset_id, "Top Customers by Spend", "table", { + "metrics": [simple_metric("total_price", "SUM", "Total Spend"), count_star_metric()], + "groupby": ["customer_name", "country"], + "order_desc": True, + "row_limit": 10, + "all_columns": [], + "include_time": False, + "order_by_cols": [], + "table_timestamp_format": "smart_date", + }) + if cid: + charts.append(cid) + + return charts + + +def build_dashboard_layout(chart_ids): + """Build a simple grid layout for the dashboard.""" + # Superset dashboard layout uses a component tree with rows/columns + # Each chart is placed in a CHART component inside a ROW + components = { + "DASHBOARD_VERSION_KEY": "v2", + "ROOT_ID": {"type": "ROOT", "id": "ROOT_ID", "children": ["GRID_ID"]}, + "GRID_ID": {"type": "GRID", "id": "GRID_ID", "children": []}, + "HEADER_ID": { + "type": "HEADER", + "id": "HEADER_ID", + "meta": {"text": "E-Commerce Analytics (Elasticsearch)"}, + }, + } + + # Place charts in rows of 2 + row_idx = 0 + for i in range(0, len(chart_ids), 2): + row_id = f"ROW-row{row_idx}" + row_children = [] + + for j, cid in enumerate(chart_ids[i : i + 2]): + chart_comp_id = f"CHART-chart{i + j}" + col_id = f"COLUMN-col{i + j}" + row_children.append(col_id) + components[col_id] = { + "type": "COLUMN", + "id": col_id, + "children": [chart_comp_id], + "meta": {"width": 6, "background": "BACKGROUND_TRANSPARENT"}, + } + components[chart_comp_id] = { + "type": "CHART", + "id": chart_comp_id, + "children": [], + "meta": { + "width": 6, + "height": 50, + "chartId": cid, + "sliceName": "", + }, + } + + components[row_id] = { + "type": "ROW", + "id": row_id, + "children": row_children, + "meta": {"background": "BACKGROUND_TRANSPARENT"}, + } + components["GRID_ID"]["children"].append(row_id) + row_idx += 1 + + return components + + +def create_dashboard(session, chart_ids): + """Create the demo dashboard.""" + position_json = build_dashboard_layout(chart_ids) + + payload = { + "dashboard_title": "E-Commerce Analytics", + "slug": "ecommerce-demo", + "position_json": json.dumps(position_json), + "published": True, + } + + resp = session.post(f"{SUPERSET_URL}/api/v1/dashboard/", json=payload) + if resp.status_code == 422: + print(" (dashboard may already exist)") + return None + resp.raise_for_status() + dashboard_id = resp.json()["id"] + print(f" Created dashboard 'E-Commerce Analytics' (id={dashboard_id})") + return dashboard_id + + +def associate_charts_with_dashboard(session, chart_ids, dashboard_id): + """Link each chart to the dashboard (required for Superset to display them).""" + for cid in chart_ids: + resp = session.put( + f"{SUPERSET_URL}/api/v1/chart/{cid}", + json={"dashboards": [dashboard_id]}, + ) + if not resp.ok: + print(f" WARNING: Could not associate chart {cid} with dashboard {dashboard_id}") + + +def main(): + print("==> Creating demo charts and dashboard...") + + session = requests.Session() + session.headers.update({"Content-Type": "application/json", "Accept": "application/json"}) + + # Wait for Superset to be ready + for _ in range(30): + try: + resp = session.get(f"{SUPERSET_URL}/health") + if resp.status_code == 200: + break + except requests.ConnectionError: + pass + time.sleep(2) + + try: + authenticate(session) + except Exception as e: + print(f" ERROR: Authentication failed: {e}", file=sys.stderr) + sys.exit(1) + + dataset_id = get_dataset_id(session) + if not dataset_id: + print(" ERROR: 'ecommerce' dataset not found. Skipping dashboard creation.", file=sys.stderr) + sys.exit(1) + + print(f" Found dataset 'ecommerce' (id={dataset_id})") + + chart_ids = create_all_charts(session, dataset_id) + + if chart_ids: + dashboard_id = create_dashboard(session, chart_ids) + if dashboard_id: + associate_charts_with_dashboard(session, chart_ids, dashboard_id) + print(f"==> Done! {len(chart_ids)} charts created.") + else: + print(" No charts created (may already exist).") + + +if __name__ == "__main__": + main() diff --git a/demo/superset/dashboards/datasources.yaml b/demo/superset/dashboards/datasources.yaml new file mode 100644 index 00000000..69f3020d --- /dev/null +++ b/demo/superset/dashboards/datasources.yaml @@ -0,0 +1,4 @@ +databases: + - database_name: "Elasticsearch (Flight SQL)" + sqlalchemy_uri: "elastiq://flight-sql-superset:32010?insecure=true" + extra: '{"allows_virtual_table_explore": true}' diff --git a/demo/superset/elastic_dialect.py b/demo/superset/elastic_dialect.py new file mode 100644 index 00000000..7610c953 --- /dev/null +++ b/demo/superset/elastic_dialect.py @@ -0,0 +1,14 @@ +""" +SQLAlchemy dialect that registers the ``elastiq://`` URI scheme. + +Delegates entirely to flightsql-dbapi's FlightSQLDialect — the Elasticsearch +Arrow Flight SQL server returns all required SqlInfo metadata and schema +information natively. +""" + +from flightsql.sqlalchemy import FlightSQLDialect + + +class ElastiqDialect(FlightSQLDialect): + name = "elastiq" + supports_statement_cache = False diff --git a/demo/superset/init-superset.sh b/demo/superset/init-superset.sh new file mode 100755 index 00000000..ed2ffa17 --- /dev/null +++ b/demo/superset/init-superset.sh @@ -0,0 +1,88 @@ +#!/bin/sh +set -e + +echo "==> Initializing Superset database..." +superset db upgrade + +echo "==> Creating admin user..." +superset fab create-admin \ + --username admin \ + --firstname Admin \ + --lastname User \ + --email admin@demo.local \ + --password admin123 || echo "(admin may already exist)" + +echo "==> Initializing Superset..." +superset init + +echo "==> Creating Arrow Flight SQL datasource..." +superset import-datasources -p /dashboards/datasources.yaml || echo "(datasource import failed, will create via API)" + +SUPERSET_URL="http://superset:8088" + +echo "==> Waiting for Superset API..." +until curl -sf "${SUPERSET_URL}/health" > /dev/null 2>&1; do + sleep 3 +done + +# Get CSRF token and session cookie +echo "==> Authenticating with Superset API..." +CSRF_TOKEN=$(curl -sf -c /tmp/superset-cookies.txt \ + "${SUPERSET_URL}/api/v1/security/csrf_token/" | sed 's/.*"result":"\([^"]*\)".*/\1/') + +LOGIN_RESP=$(curl -sf -b /tmp/superset-cookies.txt -c /tmp/superset-cookies.txt \ + -X POST "${SUPERSET_URL}/api/v1/security/login" \ + -H 'Content-Type: application/json' \ + -d '{"username":"admin","password":"admin123","provider":"db","refresh":true}') + +ACCESS_TOKEN=$(echo "${LOGIN_RESP}" | sed 's/.*"access_token":"\([^"]*\)".*/\1/') + +if [ -z "${ACCESS_TOKEN}" ]; then + echo "==> WARNING: Could not get access token. Manual datasource setup required." +else + FLIGHT_HOST="${FLIGHT_SQL_HOST:-flight-sql-superset}" + FLIGHT_PORT="${FLIGHT_SQL_PORT:-32010}" + + echo "==> Creating Flight SQL database connection..." + curl -sf -X POST "${SUPERSET_URL}/api/v1/database/" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H 'Content-Type: application/json' \ + -d "{ + \"database_name\": \"Elasticsearch (Flight SQL)\", + \"engine\": \"elastiq\", + \"sqlalchemy_uri\": \"elastiq://${FLIGHT_HOST}:${FLIGHT_PORT}?insecure=true\", + \"extra\": \"{\\\"allows_virtual_table_explore\\\": true}\" + }" || echo "(database may already exist)" + + echo "" + echo "==> Creating sample dataset for 'ecommerce' table..." + # Get database ID + DB_ID=$(curl -sf "${SUPERSET_URL}/api/v1/database/" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" | \ + sed 's/.*"id":\([0-9]*\).*/\1/' | head -1) + + if [ -n "${DB_ID}" ]; then + # Create dataset + curl -sf -X POST "${SUPERSET_URL}/api/v1/dataset/" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H 'Content-Type: application/json' \ + -d "{ + \"database\": ${DB_ID}, + \"table_name\": \"ecommerce\", + \"schema\": \"\" + }" || echo "(dataset may already exist)" + + echo "" + echo "==> Creating demo charts and dashboard..." + /app/.venv/bin/python /dashboards/create_dashboards.py || echo "(dashboard creation skipped)" + fi +fi + +echo "" +echo "============================================" +echo " Superset is ready!" +echo " Web UI: http://localhost:8088" +echo " Login: admin / admin123" +echo " Source: Elasticsearch (Flight SQL)" +echo " Dashboard: E-Commerce Analytics" +echo "============================================" diff --git a/demo/superset/setup_dialect.py b/demo/superset/setup_dialect.py new file mode 100644 index 00000000..0e0136fd --- /dev/null +++ b/demo/superset/setup_dialect.py @@ -0,0 +1,13 @@ +from setuptools import setup + +setup( + name="elastiq-dialect", + version="0.1.0", + py_modules=["elastic_dialect"], + entry_points={ + "sqlalchemy.dialects": [ + "elastiq = elastic_dialect:ElastiqDialect", + ], + }, + install_requires=["flightsql-dbapi"], +) diff --git a/demo/superset/superset_config.py b/demo/superset/superset_config.py new file mode 100644 index 00000000..b70ea896 --- /dev/null +++ b/demo/superset/superset_config.py @@ -0,0 +1,19 @@ +import os + +SECRET_KEY = os.environ.get("SUPERSET_SECRET_KEY", "demo-secret-key-change-in-production") + +SQLALCHEMY_DATABASE_URI = os.environ.get( + "SQLALCHEMY_DATABASE_URI", + "postgresql+psycopg2://superset:superset@superset-db:5432/superset", +) + +# Disable CSRF for demo simplicity +WTF_CSRF_ENABLED = False + +# Allow embedding dashboards +SESSION_COOKIE_SAMESITE = "Lax" + +# Enable feature flags for Flight SQL +FEATURE_FLAGS = { + "ENABLE_TEMPLATE_PROCESSING": True, +} From 8347e15ca36bb063d21d521e4f65a61b6833c2cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Sun, 15 Mar 2026 11:12:26 +0100 Subject: [PATCH 02/10] feat: update chart types to use timeseries bar for revenue and average order value visualizations --- demo/superset/dashboards/create_dashboards.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/demo/superset/dashboards/create_dashboards.py b/demo/superset/dashboards/create_dashboards.py index 9375e7b6..dc9bee4b 100644 --- a/demo/superset/dashboards/create_dashboards.py +++ b/demo/superset/dashboards/create_dashboards.py @@ -102,7 +102,7 @@ def create_all_charts(session, dataset_id): charts = [] # 1. Revenue by Country — horizontal bar - cid = create_chart(session, dataset_id, "Revenue by Country", "echarts_bar", { + cid = create_chart(session, dataset_id, "Revenue by Country", "echarts_timeseries_bar", { "x_axis": "country", "metrics": [simple_metric("total_price", "SUM", "Revenue")], "groupby": [], @@ -144,7 +144,7 @@ def create_all_charts(session, dataset_id): charts.append(cid) # 4. Avg Order Value by Payment Method — bar - cid = create_chart(session, dataset_id, "Avg Order Value by Payment", "echarts_bar", { + cid = create_chart(session, dataset_id, "Avg Order Value by Payment", "echarts_timeseries_bar", { "x_axis": "payment_method", "metrics": [simple_metric("total_price", "AVG", "Avg Order Value")], "groupby": [], @@ -157,7 +157,7 @@ def create_all_charts(session, dataset_id): charts.append(cid) # 5. Revenue by Category — bar - cid = create_chart(session, dataset_id, "Revenue by Category", "echarts_bar", { + cid = create_chart(session, dataset_id, "Revenue by Category", "echarts_timeseries_bar", { "x_axis": "category", "metrics": [simple_metric("total_price", "SUM", "Revenue")], "groupby": [], From 562b4497069437825bf00bc5d29daaa745752487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 16 Mar 2026 07:25:24 +0100 Subject: [PATCH 03/10] feat: add timeseries limit metric for total spend in dashboard creation --- demo/superset/dashboards/create_dashboards.py | 1 + 1 file changed, 1 insertion(+) diff --git a/demo/superset/dashboards/create_dashboards.py b/demo/superset/dashboards/create_dashboards.py index dc9bee4b..97acbcc0 100644 --- a/demo/superset/dashboards/create_dashboards.py +++ b/demo/superset/dashboards/create_dashboards.py @@ -176,6 +176,7 @@ def create_all_charts(session, dataset_id): "metrics": [simple_metric("total_price", "SUM", "Total Spend"), count_star_metric()], "groupby": ["customer_name", "country"], "order_desc": True, + "timeseries_limit_metric": simple_metric("total_price", "SUM", "Total Spend"), "row_limit": 10, "all_columns": [], "include_time": False, From 41370356ffc8bc4c39aa271232c04865875bc17f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 16 Mar 2026 10:46:21 +0100 Subject: [PATCH 04/10] feat: enhance aggregation handling for HAVING clause and COUNT functions Closed Issues #50, #53 --- .../sql/bridge/ElasticAggregation.scala | 7 +- .../elastic/sql/SQLQuerySpec.scala | 271 ++++++++++++++++++ .../app/softnetwork/elastic/sql/package.scala | 9 +- .../softnetwork/elastic/sql/query/Where.scala | 21 ++ .../elastic/sql/query/package.scala | 23 +- .../repl/ReplGatewayIntegrationSpec.scala | 37 +++ 6 files changed, 361 insertions(+), 7 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 0c303fa1..8707c25e 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -131,15 +131,16 @@ object ElasticAggregation { val aggName = { if (fieldAlias.isDefined) field - else if (distinct) - s"${aggType}_distinct_${sourceField.replace(".", "_")}" else { aggType match { + case COUNT if sourceField == "*" => + if (distinct) "count_distinct_all" else "count_all" + case _ if distinct => + s"${aggType}_distinct_${sourceField.replace(".", "_")}" case th: WindowFunction => s"${th.window.sql.toLowerCase}_${sourceField.replace(".", "_")}" case _ => s"${aggType}_${sourceField.replace(".", "_")}" - } } } diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index ea6999c1..58bbf61b 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -3834,4 +3834,275 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { |}""".stripMargin.replaceAll("\\s+", "") } + // === Issue #008: HAVING COUNT(*) without alias === + + it should "handle HAVING COUNT(*) without alias (issue #008)" in { + val select: ElasticSearchRequest = + SelectStatement( + """SELECT Country, City, COUNT(*) + |FROM Customers + |GROUP BY Country, City + |HAVING COUNT(*) > 1 + |ORDER BY COUNT(*) DESC""".stripMargin + ) + val query = select.query + query shouldBe + """{ + | "query": { + | "match_all": {} + | }, + | "size": 0, + | "_source": false, + | "aggs": { + | "Country": { + | "terms": { + | "field": "Country", + | "min_doc_count": 1 + | }, + | "aggs": { + | "City": { + | "terms": { + | "field": "City", + | "min_doc_count": 1, + | "order": { + | "__c3": "desc" + | } + | }, + | "aggs": { + | "__c3": { + | "value_count": { + | "field": "_index" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "__c3": "__c3" + | }, + | "script": { + | "source": "params.__c3 > 1" + | } + | } + | } + | } + | } + | } + | } + | } + |}""".stripMargin + .replaceAll("\\s+", "") + .replaceAll("==", " == ") + .replaceAll("&&", " && ") + .replaceAll(">", " > ") + } + + it should "handle HAVING COUNT(*) without alias combined with aliased aggregation" in { + val select: ElasticSearchRequest = + SelectStatement( + """SELECT Country, COUNT(*), AVG(age) AS avg_age + |FROM Customers + |GROUP BY Country + |HAVING COUNT(*) >= 1 AND AVG(age) > 25""".stripMargin + ) + val query = select.query + query shouldBe + """{ + | "query": { + | "match_all": {} + | }, + | "size": 0, + | "_source": false, + | "aggs": { + | "Country": { + | "terms": { + | "field": "Country", + | "min_doc_count": 1 + | }, + | "aggs": { + | "__c2": { + | "value_count": { + | "field": "_index" + | } + | }, + | "avg_age": { + | "avg": { + | "field": "age" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "__c2": "__c2", + | "avg_age": "avg_age" + | }, + | "script": { + | "source": "params.__c2 >= 1 && params.avg_age > 25" + | } + | } + | } + | } + | } + | } + |}""".stripMargin + .replaceAll("\\s+", "") + .replaceAll("==", " == ") + .replaceAll("&&", " && ") + .replaceAll(">=", " >= ") + .replaceAll("(?)>(?!=)", " > ") + } + + it should "handle HAVING COUNT(*) only in HAVING clause not in SELECT" in { + val select: ElasticSearchRequest = + SelectStatement( + """SELECT Country, SUM(age) AS total + |FROM Customers + |GROUP BY Country + |HAVING COUNT(*) > 1""".stripMargin + ) + val query = select.query + query shouldBe + """{ + | "query": { + | "match_all": {} + | }, + | "size": 0, + | "_source": false, + | "aggs": { + | "Country": { + | "terms": { + | "field": "Country", + | "min_doc_count": 1 + | }, + | "aggs": { + | "total": { + | "sum": { + | "field": "age" + | } + | }, + | "count_all": { + | "value_count": { + | "field": "_index" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "count_all": "count_all" + | }, + | "script": { + | "source": "params.count_all > 1" + | } + | } + | } + | } + | } + | } + |}""".stripMargin + .replaceAll("\\s+", "") + .replaceAll("==", " == ") + .replaceAll("&&", " && ") + .replaceAll(">", " > ") + } + + it should "handle HAVING COUNT(DISTINCT *) without alias" in { + val select: ElasticSearchRequest = + SelectStatement( + """SELECT Country, COUNT(DISTINCT *) + |FROM Customers + |GROUP BY Country + |HAVING COUNT(DISTINCT *) > 1""".stripMargin + ) + val query = select.query + query shouldBe + """{ + | "query": { + | "match_all": {} + | }, + | "size": 0, + | "_source": false, + | "aggs": { + | "Country": { + | "terms": { + | "field": "Country", + | "min_doc_count": 1 + | }, + | "aggs": { + | "__c2": { + | "cardinality": { + | "field": "_index" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "__c2": "__c2" + | }, + | "script": { + | "source": "params.__c2 > 1" + | } + | } + | } + | } + | } + | } + |}""".stripMargin + .replaceAll("\\s+", "") + .replaceAll("==", " == ") + .replaceAll("&&", " && ") + .replaceAll(">", " > ") + } + + it should "handle HAVING COUNT(DISTINCT *) only in HAVING clause not in SELECT" in { + val select: ElasticSearchRequest = + SelectStatement( + """SELECT Country, SUM(age) AS total + |FROM Customers + |GROUP BY Country + |HAVING COUNT(DISTINCT *) > 1""".stripMargin + ) + val query = select.query + query shouldBe + """{ + | "query": { + | "match_all": {} + | }, + | "size": 0, + | "_source": false, + | "aggs": { + | "Country": { + | "terms": { + | "field": "Country", + | "min_doc_count": 1 + | }, + | "aggs": { + | "total": { + | "sum": { + | "field": "age" + | } + | }, + | "count_distinct_all": { + | "cardinality": { + | "field": "_index" + | } + | }, + | "having_filter": { + | "bucket_selector": { + | "buckets_path": { + | "count_distinct_all": "count_distinct_all" + | }, + | "script": { + | "source": "params.count_distinct_all > 1" + | } + | } + | } + | } + | } + | } + |}""".stripMargin + .replaceAll("\\s+", "") + .replaceAll("==", " == ") + .replaceAll("&&", " && ") + .replaceAll(">", " > ") + } + } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala index db713684..0a3490fb 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala @@ -17,7 +17,12 @@ package app.softnetwork.elastic import app.softnetwork.elastic.schema.NamingUtils -import app.softnetwork.elastic.sql.function.aggregate.{AggregateFunction, COUNT, WindowFunction} +import app.softnetwork.elastic.sql.function.aggregate.{ + AggregateFunction, + COUNT, + CountAgg, + WindowFunction +} import app.softnetwork.elastic.sql.function.geo.DistanceUnit import app.softnetwork.elastic.sql.function.time.CurrentFunction import app.softnetwork.elastic.sql.parser.{Validation, Validator} @@ -925,7 +930,7 @@ package object sql { aggregateFunction match { case Some(af) => af match { - case COUNT => + case COUNT | _: CountAgg => aliasOrName match { case "*" => if (distinct) { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala index a1884277..c353faec 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Where.scala @@ -73,6 +73,27 @@ sealed trait Criteria extends Updateable with PainlessScript { case _ => Map.empty } + /** Extracts aggregation fields from criteria expressions (e.g. HAVING COUNT(*) > 1). Used to + * ensure aggregations referenced only in HAVING/WHERE are included in the query. Note: returned + * Fields are not updated against SingleSearch — they carry only the identifier and a computed + * alias (metricName), which is sufficient for metric aggregation creation via + * SQLAggregation.fromField. + */ + def extractAggregationFields: Seq[Field] = + this match { + case Predicate(left, _, right, _, _) => + left.extractAggregationFields ++ right.extractAggregationFields + case relation: ElasticRelation => relation.criteria.extractAggregationFields + case e: Expression => + val identifiers = Seq(e.identifier) ++ e.maybeValue.collect { case id: Identifier => id } + identifiers + .filter(_.aggregations.nonEmpty) + .flatMap { id => + id.metricName.map(name => Field(id, Some(Alias(name)))) + } + case _ => Seq.empty + } + def includes( bucket: Bucket, not: Boolean, diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala index 1597fa48..4e8d202b 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala @@ -244,10 +244,29 @@ package object query { lazy val windowFunctions: Seq[WindowFunction] = windowFields.flatMap(_.identifier.windows) - lazy val aggregates: Seq[Field] = - select.fieldsWithComputedAliases + lazy val aggregates: Seq[Field] = { + val selectAggs = select.fieldsWithComputedAliases .filter(f => f.isAggregation || f.isBucketScript) .filterNot(_.identifier.hasWindow) ++ windowFields + // Include aggregations referenced only in HAVING or WHERE clauses + val selectAggNames = selectAggs.flatMap(_.fieldAlias.map(_.alias)).toSet ++ + selectAggs.map(_.identifier.identifierName).toSet + val havingAggs = having + .flatMap(_.criteria) + .map(_.extractAggregationFields) + .getOrElse(Seq.empty) + val whereAggs = where + .flatMap(_.criteria) + .map(_.extractAggregationFields) + .getOrElse(Seq.empty) + val extraAggs = (havingAggs ++ whereAggs) + .filterNot(f => + f.fieldAlias.exists(a => selectAggNames.contains(a.alias)) || + selectAggNames.contains(f.identifier.identifierName) + ) + .distinctBy(_.fieldAlias.map(_.alias)) + selectAggs ++ extraAggs + } lazy val sqlAggregations: ListMap[String, SQLAggregation] = ListMap( diff --git a/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala b/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala index 6e914c6b..682f5b01 100644 --- a/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala +++ b/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala @@ -727,6 +727,43 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { } } + // === Issue #008: HAVING COUNT(*) without alias === + + it should "support HAVING COUNT(*) without alias (issue #008)" in { + val sql = + """SELECT profile.city, + | COUNT(*), + | AVG(age) AS avg_age + |FROM dql_users + |GROUP BY profile.city + |HAVING COUNT(*) >= 1 + |ORDER BY COUNT(*) DESC""".stripMargin + + assertSelectResult(System.nanoTime(), executeSync(sql)) + } + + it should "support HAVING COUNT(*) only in HAVING clause not in SELECT" in { + val sql = + """SELECT profile.city AS city, + | AVG(age) AS avg_age + |FROM dql_users + |GROUP BY profile.city + |HAVING COUNT(*) >= 1""".stripMargin + + assertSelectResult(System.nanoTime(), executeSync(sql)) + } + + it should "support HAVING COUNT(DISTINCT *) only in HAVING clause not in SELECT" in { + val sql = + """SELECT profile.city AS city, + | AVG(age) AS avg_age + |FROM dql_users + |GROUP BY profile.city + |HAVING COUNT(DISTINCT *) >= 1""".stripMargin + + assertSelectResult(System.nanoTime(), executeSync(sql)) + } + it should "support double-quoted identifiers (ANSI SQL-92, Superset compatibility)" in { val sql = """SELECT profile.city AS "City", From f1a57a879ac237e1e3facab123223610740e1c0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 16 Mar 2026 17:44:12 +0100 Subject: [PATCH 05/10] feat: support ORDER BY on aggregation aliases and improve aggregation field extraction Closed Issue #52 --- .../sql/bridge/ElasticAggregation.scala | 1 + .../elastic/sql/SQLQuerySpec.scala | 92 ++++++++++++++++++- .../client/ElasticConversionSpec.scala | 4 +- .../elastic/sql/query/OrderBy.scala | 8 +- .../elastic/sql/query/package.scala | 5 +- .../elastic/sql/parser/ParserSpec.scala | 2 +- .../repl/ReplGatewayIntegrationSpec.scala | 55 ++++++++++- 7 files changed, 157 insertions(+), 10 deletions(-) diff --git a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 8707c25e..56dc7701 100644 --- a/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -111,6 +111,7 @@ object ElasticAggregation { bucketsDirection .get(identifier.identifierName) .orElse(bucketsDirection.get(identifier.aliasOrName)) + .orElse(fieldAlias.flatMap(a => bucketsDirection.get(a.alias))) val field = fieldAlias match { case Some(alias) => alias.alias diff --git a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 58bbf61b..cedde523 100644 --- a/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -3834,9 +3834,9 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { |}""".stripMargin.replaceAll("\\s+", "") } - // === Issue #008: HAVING COUNT(*) without alias === + // === Issue #50: HAVING COUNT(*) without alias === - it should "handle HAVING COUNT(*) without alias (issue #008)" in { + it should "handle HAVING COUNT(*) without alias (issue #50)" in { val select: ElasticSearchRequest = SelectStatement( """SELECT Country, City, COUNT(*) @@ -4004,6 +4004,94 @@ class SQLQuerySpec extends AnyFlatSpec with Matchers { .replaceAll(">", " > ") } + // === Issue #52: ORDER BY on aggregation alias === + + it should "handle ORDER BY on aggregation alias (issue #52)" in { + val select: ElasticSearchRequest = + SelectStatement( + """SELECT Country, SUM(age) AS "Total Age", COUNT(*) AS "Orders" + |FROM Customers + |GROUP BY Country + |ORDER BY "Total Age" DESC + |LIMIT 10""".stripMargin + ) + val query = select.query + query shouldBe + """{ + | "query": { + | "match_all": {} + | }, + | "size": 0, + | "_source": false, + | "aggs": { + | "Country": { + | "terms": { + | "field": "Country", + | "size": 10, + | "min_doc_count": 1, + | "order": { + | "Total Age": "desc" + | } + | }, + | "aggs": { + | "Total Age": { + | "sum": { + | "field": "age" + | } + | }, + | "Orders": { + | "value_count": { + | "field": "_index" + | } + | } + | } + | } + | } + |}""".stripMargin + .replaceAll("(?m)^\\s+", "") + .replaceAll("\\n", "") + .replaceAll(":\\s+", ":") + .replaceAll(",\\s+", ",") + } + + it should "handle ORDER BY on aggregation not in SELECT (issue #52)" in { + val select: ElasticSearchRequest = + SelectStatement( + """SELECT Country + |FROM Customers + |GROUP BY Country + |ORDER BY COUNT(*) DESC""".stripMargin + ) + val query = select.query + query shouldBe + """{ + | "query": { + | "match_all": {} + | }, + | "size": 0, + | "_source": false, + | "aggs": { + | "Country": { + | "terms": { + | "field": "Country", + | "min_doc_count": 1, + | "order": { + | "count_all": "desc" + | } + | }, + | "aggs": { + | "count_all": { + | "value_count": { + | "field": "_index" + | } + | } + | } + | } + | } + |}""".stripMargin + .replaceAll("\\s+", "") + } + it should "handle HAVING COUNT(DISTINCT *) without alias" in { val select: ElasticSearchRequest = SelectStatement( diff --git a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala index 823c1240..016302bc 100644 --- a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala +++ b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala @@ -690,9 +690,9 @@ class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConver rows.foreach(println) rows.size shouldBe 1 val row = rows.head - // Issue #002: bucket_root must not leak into the output + // Issue #42: bucket_root must not leak into the output row.keys should not contain "bucket_root" - // Issue #003: "*" must not appear as a column + // Issue #43: "*" must not appear as a column row.keys should not contain "*" // Only the aggregation column should be present row.keys should contain("COUNT(*)") diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala index 3f3212dd..b0a4f062 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala @@ -17,7 +17,7 @@ package app.softnetwork.elastic.sql.query import app.softnetwork.elastic.sql.function.{Function, FunctionChain} -import app.softnetwork.elastic.sql.{Expr, Identifier, TokenRegex, Updateable} +import app.softnetwork.elastic.sql.{Alias, Expr, Identifier, TokenRegex, Updateable} case object OrderBy extends Expr("ORDER BY") with TokenRegex @@ -42,6 +42,12 @@ case class FieldSort( def isScriptSort: Boolean = functions.nonEmpty && !hasAggregation && field.fieldAlias.isEmpty def isBucketScript: Boolean = functions.nonEmpty && !isAggregation && hasAggregation + + def extractAggregationFields: Seq[Field] = + if (field.aggregations.nonEmpty) + field.metricName.map(name => Field(field, Some(Alias(name)))).toSeq + else + Seq.empty } case class OrderBy(sorts: Seq[FieldSort]) extends Updateable { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala index 4e8d202b..c97ac812 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala @@ -259,7 +259,10 @@ package object query { .flatMap(_.criteria) .map(_.extractAggregationFields) .getOrElse(Seq.empty) - val extraAggs = (havingAggs ++ whereAggs) + val orderByAggs = orderBy + .map(_.sorts.flatMap(_.extractAggregationFields)) + .getOrElse(Seq.empty) + val extraAggs = (havingAggs ++ whereAggs ++ orderByAggs) .filterNot(f => f.fieldAlias.exists(a => selectAggNames.contains(a.alias)) || selectAggNames.contains(f.identifier.identifierName) diff --git a/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala b/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala index d876caa4..f4048414 100644 --- a/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala +++ b/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala @@ -2896,7 +2896,7 @@ class ParserSpec extends AnyFlatSpec with Matchers { } } - // ── Computed aliases for unnamed expression columns (Issue #001) ─────────── + // ── Computed aliases for unnamed expression columns (Issue #41) ──────────── it should "generate computed aliases for aggregate functions without explicit alias" in { val sql = """SELECT COUNT(*), SUM(quantity) FROM t""" diff --git a/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala b/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala index 682f5b01..3c77dea7 100644 --- a/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala +++ b/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala @@ -727,9 +727,9 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { } } - // === Issue #008: HAVING COUNT(*) without alias === + // === Issue #50: HAVING COUNT(*) without alias === - it should "support HAVING COUNT(*) without alias (issue #008)" in { + it should "support HAVING COUNT(*) without alias (issue #50)" in { val sql = """SELECT profile.city, | COUNT(*), @@ -764,6 +764,55 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { assertSelectResult(System.nanoTime(), executeSync(sql)) } + // === Issue #52: ORDER BY on aggregation alias === + + it should "support ORDER BY on aggregation alias (issue #52)" in { + val sql = + """SELECT profile.city AS city, + | COUNT(*) AS "Total", + | AVG(age) AS avg_age + |FROM dql_users + |GROUP BY profile.city + |ORDER BY "Total" DESC""".stripMargin + + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + // Verify ordering: rows should be sorted by Total descending + val totals = rows.map(r => r.getOrElse("Total", "0").toString.toDouble) + totals shouldBe totals.sortWith(_ > _) + } + + it should "support ORDER BY on aggregation not in SELECT (issue #52)" in { + val sql = + """SELECT profile.city AS city, + | AVG(age) AS avg_age + |FROM dql_users + |GROUP BY profile.city + |ORDER BY COUNT(*) DESC""".stripMargin + + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + // NOTE: count_all leaks into results even though it's not in SELECT (see issue #55) + rows.foreach { row => + row.keys should contain("city") + row.keys should contain("avg_age") + } + } + it should "support double-quoted identifiers (ANSI SQL-92, Superset compatibility)" in { val sql = """SELECT profile.city AS "City", @@ -808,7 +857,7 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { firstRow.keys should contain("__c3") // AVG(age) } - // Issue #006 — Result column order must match SELECT clause order + // Issue #47 — Result column order must match SELECT clause order it should "preserve SELECT column order: aggregation first, bucket second" in { val sql = """SELECT COUNT(*), From 739779fa09d53ac8084ee3e28a0ab4fa47d62602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 16 Mar 2026 18:25:07 +0100 Subject: [PATCH 06/10] feat: add support for auxiliary aggregations in HAVING, WHERE, and ORDER BY clauses Close Issue #55 --- .../elastic/client/ElasticConversion.scala | 4 +- .../softnetwork/elastic/client/package.scala | 6 ++- .../elastic/sql/query/Select.scala | 1 + .../elastic/sql/query/package.scala | 28 ++++++++++---- .../repl/ReplGatewayIntegrationSpec.scala | 37 +++++++++++++++---- 5 files changed, 58 insertions(+), 18 deletions(-) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala index 5225b01e..c32fa8ae 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala @@ -644,7 +644,9 @@ trait ElasticConversion { } } match { case Some(m) => - metrics ++= Seq(m._1 -> m._2) + // Skip auxiliary aggregations (from HAVING/WHERE/ORDER BY only, not in SELECT) + val isAuxiliary = aggregations.get(m._1).exists(_.auxiliary) + if (!isAuxiliary) metrics ++= Seq(m._1 -> m._2) case _ => } } diff --git a/core/src/main/scala/app/softnetwork/elastic/client/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/package.scala index fe6d06f8..b1e1ff6b 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/package.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/package.scala @@ -364,7 +364,8 @@ package object client extends SerializationApi { sourceField: String, windowing: Boolean, bucketPath: String, - bucketRoot: String + bucketRoot: String, + auxiliary: Boolean = false ) { def multivalued: Boolean = aggType == AggregationType.ArrayAgg def singleValued: Boolean = !multivalued @@ -394,7 +395,8 @@ package object client extends SerializationApi { agg.sourceField, agg.aggType.isWindowing, agg.bucketPath, - agg.bucketRoot + agg.bucketRoot, + agg.auxiliary ) } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala index b8db2adf..c0bbb37a 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala @@ -159,6 +159,7 @@ case class SQLAggregation( field: String, sourceField: String, distinct: Boolean = false, + auxiliary: Boolean = false, aggType: AggregateFunction, direction: Option[SortOrder] = None, nestedElement: Option[NestedElement] = None, diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala index c97ac812..b3d9084e 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/package.scala @@ -244,11 +244,13 @@ package object query { lazy val windowFunctions: Seq[WindowFunction] = windowFields.flatMap(_.identifier.windows) - lazy val aggregates: Seq[Field] = { - val selectAggs = select.fieldsWithComputedAliases + private lazy val selectAggs: Seq[Field] = + select.fieldsWithComputedAliases .filter(f => f.isAggregation || f.isBucketScript) .filterNot(_.identifier.hasWindow) ++ windowFields - // Include aggregations referenced only in HAVING or WHERE clauses + + // Aggregations referenced only in HAVING, WHERE, or ORDER BY clauses (not in SELECT) + private lazy val auxiliaryAggs: Seq[Field] = { val selectAggNames = selectAggs.flatMap(_.fieldAlias.map(_.alias)).toSet ++ selectAggs.map(_.identifier.identifierName).toSet val havingAggs = having @@ -262,19 +264,29 @@ package object query { val orderByAggs = orderBy .map(_.sorts.flatMap(_.extractAggregationFields)) .getOrElse(Seq.empty) - val extraAggs = (havingAggs ++ whereAggs ++ orderByAggs) + (havingAggs ++ whereAggs ++ orderByAggs) .filterNot(f => f.fieldAlias.exists(a => selectAggNames.contains(a.alias)) || selectAggNames.contains(f.identifier.identifierName) ) - .distinctBy(_.fieldAlias.map(_.alias)) - selectAggs ++ extraAggs + .groupBy(_.fieldAlias.map(_.alias)) + .map(_._2.head) + .toSeq } - lazy val sqlAggregations: ListMap[String, SQLAggregation] = + lazy val aggregates: Seq[Field] = selectAggs ++ auxiliaryAggs + + lazy val sqlAggregations: ListMap[String, SQLAggregation] = { + val auxiliaryAggNames = auxiliaryAggs + .flatMap(f => SQLAggregation.fromField(f, this)) + .map(_.aggName) + .toSet ListMap( - aggregates.flatMap(f => SQLAggregation.fromField(f, this)).map(a => a.aggName -> a): _* + aggregates.flatMap(f => SQLAggregation.fromField(f, this)).map { a => + a.aggName -> (if (auxiliaryAggNames.contains(a.aggName)) a.copy(auxiliary = true) else a) + }: _* ) + } lazy val excludes: Seq[String] = select.except.map(_.fields.map(_.sourceField)).getOrElse(Nil) diff --git a/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala b/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala index 3c77dea7..4caafce0 100644 --- a/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala +++ b/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala @@ -742,7 +742,7 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { assertSelectResult(System.nanoTime(), executeSync(sql)) } - it should "support HAVING COUNT(*) only in HAVING clause not in SELECT" in { + it should "support HAVING COUNT(*) only in HAVING clause not in SELECT (issue #55)" in { val sql = """SELECT profile.city AS city, | AVG(age) AS avg_age @@ -750,10 +750,22 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { |GROUP BY profile.city |HAVING COUNT(*) >= 1""".stripMargin - assertSelectResult(System.nanoTime(), executeSync(sql)) + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + // Auxiliary aggregation COUNT(*) should not leak into result columns + rows.foreach { row => + row.keys.toSet shouldBe Set("city", "avg_age") + } } - it should "support HAVING COUNT(DISTINCT *) only in HAVING clause not in SELECT" in { + it should "support HAVING COUNT(DISTINCT *) only in HAVING clause not in SELECT (issue #55)" in { val sql = """SELECT profile.city AS city, | AVG(age) AS avg_age @@ -761,7 +773,19 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { |GROUP BY profile.city |HAVING COUNT(DISTINCT *) >= 1""".stripMargin - assertSelectResult(System.nanoTime(), executeSync(sql)) + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + // Auxiliary aggregation COUNT(DISTINCT *) should not leak into result columns + rows.foreach { row => + row.keys.toSet shouldBe Set("city", "avg_age") + } } // === Issue #52: ORDER BY on aggregation alias === @@ -806,10 +830,9 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { case other => fail(s"Unexpected result type: $other") } rows should not be empty - // NOTE: count_all leaks into results even though it's not in SELECT (see issue #55) + // Auxiliary aggregation COUNT(*) should not leak into result columns (issue #55) rows.foreach { row => - row.keys should contain("city") - row.keys should contain("avg_age") + row.keys.toSet shouldBe Set("city", "avg_age") } } From 00a4e2f301a3f62b24b8f66a8b215b2387abe625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 16 Mar 2026 18:28:20 +0100 Subject: [PATCH 07/10] chore: update version numbers for JDBC driver and Java client to 0.1.3 and 0.19.0 --- README.md | 18 +++++++++--------- build.sbt | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index c29e4699..0ef96cde 100644 --- a/README.md +++ b/README.md @@ -172,10 +172,10 @@ Download the self-contained fat JAR for your Elasticsearch version: | Elasticsearch Version | Artifact | |-----------------------|----------------------------------------| -| ES 6.x | `softclient4es6-jdbc-driver-0.1.2.jar` | -| ES 7.x | `softclient4es7-jdbc-driver-0.1.2.jar` | -| ES 8.x | `softclient4es8-jdbc-driver-0.1.2.jar` | -| ES 9.x | `softclient4es9-jdbc-driver-0.1.2.jar` | +| ES 6.x | `softclient4es6-jdbc-driver-0.1.3.jar` | +| ES 7.x | `softclient4es7-jdbc-driver-0.1.3.jar` | +| ES 8.x | `softclient4es8-jdbc-driver-0.1.3.jar` | +| ES 9.x | `softclient4es9-jdbc-driver-0.1.3.jar` | ```text JDBC URL: jdbc:elastic://localhost:9200 @@ -190,20 +190,20 @@ Driver class: app.softnetwork.elastic.jdbc.ElasticDriver app.softnetwork.elastic softclient4es8-jdbc-driver - 0.1.2 + 0.1.3 ``` **Gradle:** ```groovy -implementation 'app.softnetwork.elastic:softclient4es8-jdbc-driver:0.1.2' +implementation 'app.softnetwork.elastic:softclient4es8-jdbc-driver:0.1.3' ``` **sbt:** ```scala -libraryDependencies += "app.softnetwork.elastic" % "softclient4es8-jdbc-driver" % "0.1.2" +libraryDependencies += "app.softnetwork.elastic" % "softclient4es8-jdbc-driver" % "0.1.3" ``` The JDBC driver JARs are Scala-version-independent (no `_2.12` or `_2.13` suffix) and include all required dependencies. @@ -231,11 +231,11 @@ For programmatic access, add SoftClient4ES to your project. resolvers += "Softnetwork" at "https://softnetwork.jfrog.io/artifactory/releases/" // Choose your Elasticsearch version -libraryDependencies += "app.softnetwork.elastic" %% "softclient4es8-java-client" % "0.18.0" +libraryDependencies += "app.softnetwork.elastic" %% "softclient4es8-java-client" % "0.19.0" // Add the community extensions for materialized views (optional) libraryDependencies += "app.softnetwork.elastic" %% "softclient4es-community-extensions" % "0.1.3" // Add the JDBC driver if you want to use it from Scala (optional) -libraryDependencies += "app.softnetwork.elastic" %% "softclient4es-jdbc-driver" % "0.1.2" +libraryDependencies += "app.softnetwork.elastic" %% "softclient4es-jdbc-driver" % "0.1.3" ``` ```scala diff --git a/build.sbt b/build.sbt index a546141d..ee25df3b 100644 --- a/build.sbt +++ b/build.sbt @@ -20,7 +20,7 @@ ThisBuild / organization := "app.softnetwork" name := "softclient4es" -ThisBuild / version := "0.18.0" +ThisBuild / version := "0.19.0" ThisBuild / scalaVersion := scala213 From 242269956a5a6b1b2f81fb72b6c6633dbca2d6bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 16 Mar 2026 19:01:36 +0100 Subject: [PATCH 08/10] feat: allow DESC and DESCRIBE commands without TABLE keyword in parser Close Issue #55 --- .../elastic/sql/parser/Parser.scala | 4 +- .../elastic/sql/parser/ParserSpec.scala | 33 ++++++++++ .../repl/ReplGatewayIntegrationSpec.scala | 62 ++++++++++++++++++- 3 files changed, 96 insertions(+), 3 deletions(-) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala index 7340b561..b3ed0f56 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/parser/Parser.scala @@ -351,7 +351,7 @@ object Parser } def describeTable: PackratParser[DescribeTable] = - (("DESCRIBE" | "DESC") ~ "TABLE") ~ ident ^^ { case _ ~ table => + (("DESCRIBE" | "DESC") ~ opt("TABLE")) ~ ident ^^ { case _ ~ table => DescribeTable(table) } @@ -907,7 +907,6 @@ object Parser showTables | showTable | showCreateTable | - describeTable | showPipelines | showPipeline | showCreatePipeline | @@ -917,6 +916,7 @@ object Parser showMaterializedView | showCreateMaterializedView | describeMaterializedView | + describeTable | showWatchers | showWatcherStatus | showEnrichPolicy | diff --git a/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala b/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala index f4048414..fdd81391 100644 --- a/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala +++ b/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala @@ -1602,6 +1602,39 @@ class ParserSpec extends AnyFlatSpec with Matchers { } } + it should "parse DESC without TABLE keyword" in { + val sql = "DESC ecommerce" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case DescribeTable("ecommerce") => + case _ => fail("Expected DescribeTable") + } + } + + it should "parse DESCRIBE without TABLE keyword" in { + val sql = "DESCRIBE ecommerce" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case DescribeTable("ecommerce") => + case _ => fail("Expected DescribeTable") + } + } + + it should "still parse DESC PIPELINE correctly" in { + val sql = "DESC PIPELINE mypipe" + val result = Parser(sql) + result.isRight shouldBe true + val stmt = result.toOption.get + stmt match { + case DescribePipeline("mypipe") => + case _ => fail("Expected DescribePipeline") + } + } + behavior of "Parser DDL with Pipeline Statements" it should "parse CREATE OR REPLACE PIPELINE" in { diff --git a/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala b/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala index 4caafce0..087289ef 100644 --- a/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala +++ b/testkit/src/main/scala/app/softnetwork/elastic/client/repl/ReplGatewayIntegrationSpec.scala @@ -93,6 +93,18 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { rows.exists(_("Field") == "profile.city") shouldBe true } + it should "describe a table using DESC without TABLE keyword" in { + val rows = assertQueryRows(System.nanoTime(), executeSync("DESC desc_users")) + rows.size shouldBe 6 + rows.exists(row => + row("Field") == "id" && + row("Null") == "no" && + row("Key") == "PRI" + ) shouldBe true + rows.exists(_("Field") == "name") shouldBe true + rows.exists(_("Field") == "profile.city") shouldBe true + } + // ========================================================================= // 2. DDL — CREATE TABLE, ALTER TABLE, DROP TABLE, TRUNCATE TABLE // ========================================================================= @@ -836,6 +848,54 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { } } + it should "not leak different auxiliary aggs from combined HAVING + ORDER BY (issue #55)" in { + val sql = + """SELECT profile.city AS city, + | AVG(age) AS avg_age + |FROM dql_users + |GROUP BY profile.city + |HAVING COUNT(*) >= 1 + |ORDER BY MIN(age) ASC""".stripMargin + + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + // Both COUNT(*) and MIN(age) are auxiliary — neither should appear in results + rows.foreach { row => + row.keys.toSet shouldBe Set("city", "avg_age") + } + } + + it should "keep aggregation in results when it appears in both SELECT and HAVING (issue #55)" in { + val sql = + """SELECT profile.city AS city, + | COUNT(*) AS cnt, + | AVG(age) AS avg_age + |FROM dql_users + |GROUP BY profile.city + |HAVING COUNT(*) >= 1""".stripMargin + + val res = executeSync(sql) + renderResults(System.nanoTime(), res) + res shouldBe a[ExecutionSuccess] + val rows = res.asInstanceOf[ExecutionSuccess].result match { + case q: QueryRows => q.rows + case q: QueryStructured => q.response.results + case other => fail(s"Unexpected result type: $other") + } + rows should not be empty + // COUNT(*) is in SELECT as cnt — it must NOT be marked auxiliary + rows.foreach { row => + row.keys.toSet shouldBe Set("city", "cnt", "avg_age") + } + } + it should "support double-quoted identifiers (ANSI SQL-92, Superset compatibility)" in { val sql = """SELECT profile.city AS "City", @@ -1265,7 +1325,7 @@ trait ReplGatewayIntegrationSpec extends ReplIntegrationTestKit { assertDml(System.nanoTime(), executeSync(insert)) // Wait for indexing - Thread.sleep(1000) + // Thread.sleep(1000) val createPolicy = """CREATE OR REPLACE ENRICH POLICY my_policy From e397fbaf3bd4dc373208cb6c5b68592ded1121e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 16 Mar 2026 19:55:55 +0100 Subject: [PATCH 09/10] Each SQL operation using Jest created a new HTTP connection pool instead of reusing the existing one. Fix by using lazy val --- .../elastic/client/JestClientInsertByQuerySpec.scala | 2 +- .../softnetwork/elastic/client/JestClientPipelineApiSpec.scala | 2 +- .../softnetwork/elastic/client/JestClientTemplateApiSpec.scala | 2 +- .../app/softnetwork/elastic/client/JestGatewayApiSpec.scala | 2 +- .../elastic/client/repl/JestReplGatewayIntegrationSpec.scala | 2 +- .../elastic/client/repl/Rhl6ReplGatewayIntegrationSpec.scala | 2 +- .../elastic/client/repl/Rhl7ReplGatewayIntegrationSpec.scala | 2 +- .../client/repl/JavaClient8ReplGatewayIntegrationSpec.scala | 2 +- .../client/repl/JavaClient9ReplGatewayIntegrationSpec.scala | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientInsertByQuerySpec.scala b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientInsertByQuerySpec.scala index 631ca626..d22df129 100644 --- a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientInsertByQuerySpec.scala +++ b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientInsertByQuerySpec.scala @@ -4,7 +4,7 @@ import app.softnetwork.elastic.client.spi.JestClientSpi import app.softnetwork.elastic.scalatest.ElasticDockerTestKit class JestClientInsertByQuerySpec extends InsertByQuerySpec with ElasticDockerTestKit { - override def client: ElasticClientApi = new JestClientSpi().client(elasticConfig) + override lazy val client: ElasticClientApi = new JestClientSpi().client(elasticConfig) override def elasticVersion: String = "6.7.2" } diff --git a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientPipelineApiSpec.scala b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientPipelineApiSpec.scala index aa59f134..6112be48 100644 --- a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientPipelineApiSpec.scala +++ b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientPipelineApiSpec.scala @@ -4,5 +4,5 @@ import app.softnetwork.elastic.client.spi.JestClientSpi import app.softnetwork.elastic.scalatest.EmbeddedElasticTestKit class JestClientPipelineApiSpec extends PipelineApiSpec with EmbeddedElasticTestKit { - override def client: PipelineApi = new JestClientSpi().client(elasticConfig) + override lazy val client: PipelineApi = new JestClientSpi().client(elasticConfig) } diff --git a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientTemplateApiSpec.scala b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientTemplateApiSpec.scala index 3882b343..e6160b4a 100644 --- a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientTemplateApiSpec.scala +++ b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientTemplateApiSpec.scala @@ -4,7 +4,7 @@ import app.softnetwork.elastic.client.spi.JestClientSpi import app.softnetwork.elastic.scalatest.ElasticDockerTestKit class JestClientTemplateApiSpec extends TemplateApiSpec with ElasticDockerTestKit { - override def client: TemplateApi with VersionApi = new JestClientSpi().client(elasticConfig) + override lazy val client: TemplateApi with VersionApi = new JestClientSpi().client(elasticConfig) override def elasticVersion: String = "6.7.2" } diff --git a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestGatewayApiSpec.scala b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestGatewayApiSpec.scala index a79e53e9..ded66b82 100644 --- a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestGatewayApiSpec.scala +++ b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestGatewayApiSpec.scala @@ -4,7 +4,7 @@ import app.softnetwork.elastic.client.spi.JestClientSpi import app.softnetwork.elastic.scalatest.ElasticDockerTestKit class JestGatewayApiSpec extends GatewayApiIntegrationSpec with ElasticDockerTestKit { - override def client: GatewayApi = new JestClientSpi().client(elasticConfig) + override lazy val client: GatewayApi = new JestClientSpi().client(elasticConfig) override def elasticVersion: String = "6.7.2" } diff --git a/es6/jest/src/test/scala/app/softnetwork/elastic/client/repl/JestReplGatewayIntegrationSpec.scala b/es6/jest/src/test/scala/app/softnetwork/elastic/client/repl/JestReplGatewayIntegrationSpec.scala index d570df47..c8fcd125 100644 --- a/es6/jest/src/test/scala/app/softnetwork/elastic/client/repl/JestReplGatewayIntegrationSpec.scala +++ b/es6/jest/src/test/scala/app/softnetwork/elastic/client/repl/JestReplGatewayIntegrationSpec.scala @@ -6,7 +6,7 @@ import app.softnetwork.elastic.scalatest.ElasticDockerTestKit class JestReplGatewayIntegrationSpec extends ReplGatewayIntegrationSpec with ElasticDockerTestKit { - override def gateway: GatewayApi = new JestClientSpi().client(elasticConfig) + override lazy val gateway: GatewayApi = new JestClientSpi().client(elasticConfig) override def elasticVersion: String = "6.7.2" } diff --git a/es6/rest/src/test/scala/app/softnetwork/elastic/client/repl/Rhl6ReplGatewayIntegrationSpec.scala b/es6/rest/src/test/scala/app/softnetwork/elastic/client/repl/Rhl6ReplGatewayIntegrationSpec.scala index 359c2fdb..d7ce7c52 100644 --- a/es6/rest/src/test/scala/app/softnetwork/elastic/client/repl/Rhl6ReplGatewayIntegrationSpec.scala +++ b/es6/rest/src/test/scala/app/softnetwork/elastic/client/repl/Rhl6ReplGatewayIntegrationSpec.scala @@ -5,5 +5,5 @@ import app.softnetwork.elastic.client.spi.RestHighLevelClientSpi import app.softnetwork.elastic.scalatest.ElasticDockerTestKit class Rhl6ReplGatewayIntegrationSpec extends ReplGatewayIntegrationSpec with ElasticDockerTestKit { - override def gateway: GatewayApi = new RestHighLevelClientSpi().client(elasticConfig) + override lazy val gateway: GatewayApi = new RestHighLevelClientSpi().client(elasticConfig) } diff --git a/es7/rest/src/test/scala/app/softnetwork/elastic/client/repl/Rhl7ReplGatewayIntegrationSpec.scala b/es7/rest/src/test/scala/app/softnetwork/elastic/client/repl/Rhl7ReplGatewayIntegrationSpec.scala index 72907729..27cd992c 100644 --- a/es7/rest/src/test/scala/app/softnetwork/elastic/client/repl/Rhl7ReplGatewayIntegrationSpec.scala +++ b/es7/rest/src/test/scala/app/softnetwork/elastic/client/repl/Rhl7ReplGatewayIntegrationSpec.scala @@ -5,5 +5,5 @@ import app.softnetwork.elastic.client.spi.RestHighLevelClientSpi import app.softnetwork.elastic.scalatest.ElasticDockerTestKit class Rhl7ReplGatewayIntegrationSpec extends ReplGatewayIntegrationSpec with ElasticDockerTestKit { - override def gateway: GatewayApi = new RestHighLevelClientSpi().client(elasticConfig) + override lazy val gateway: GatewayApi = new RestHighLevelClientSpi().client(elasticConfig) } diff --git a/es8/java/src/test/scala/app/softnetwork/elastic/client/repl/JavaClient8ReplGatewayIntegrationSpec.scala b/es8/java/src/test/scala/app/softnetwork/elastic/client/repl/JavaClient8ReplGatewayIntegrationSpec.scala index 101d86e5..9823f85e 100644 --- a/es8/java/src/test/scala/app/softnetwork/elastic/client/repl/JavaClient8ReplGatewayIntegrationSpec.scala +++ b/es8/java/src/test/scala/app/softnetwork/elastic/client/repl/JavaClient8ReplGatewayIntegrationSpec.scala @@ -7,5 +7,5 @@ import app.softnetwork.elastic.scalatest.ElasticDockerTestKit class JavaClient8ReplGatewayIntegrationSpec extends ReplGatewayIntegrationSpec with ElasticDockerTestKit { - override def gateway: GatewayApi = new JavaClientSpi().client(elasticConfig) + override lazy val gateway: GatewayApi = new JavaClientSpi().client(elasticConfig) } diff --git a/es9/java/src/test/scala/app/softnetwork/elastic/client/repl/JavaClient9ReplGatewayIntegrationSpec.scala b/es9/java/src/test/scala/app/softnetwork/elastic/client/repl/JavaClient9ReplGatewayIntegrationSpec.scala index 94ba1ba5..cb35b793 100644 --- a/es9/java/src/test/scala/app/softnetwork/elastic/client/repl/JavaClient9ReplGatewayIntegrationSpec.scala +++ b/es9/java/src/test/scala/app/softnetwork/elastic/client/repl/JavaClient9ReplGatewayIntegrationSpec.scala @@ -7,5 +7,5 @@ import app.softnetwork.elastic.scalatest.ElasticDockerTestKit class JavaClient9ReplGatewayIntegrationSpec extends ReplGatewayIntegrationSpec with ElasticDockerTestKit { - override def gateway: GatewayApi = new JavaClientSpi().client(elasticConfig) + override lazy val gateway: GatewayApi = new JavaClientSpi().client(elasticConfig) } From 824dc2cf25a731428e7b9c4d29defd622c0dc3e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Manciot?= Date: Mon, 16 Mar 2026 20:45:57 +0100 Subject: [PATCH 10/10] fix: add diagnostic logging for CI watcher creation failures --- .../softnetwork/elastic/client/jest/JestWatcherApi.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestWatcherApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestWatcherApi.scala index 857cf06b..17daa1f2 100644 --- a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestWatcherApi.scala +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestWatcherApi.scala @@ -63,6 +63,13 @@ trait JestWatcherApi extends WatcherApi with JestClientHelpers { result.ElasticSuccess(true) case jestResult: JestResult => val errorMessage = jestResult.getErrorMessage + // FIXME: diagnostic logging for CI watcher creation failures — remove once root cause is identified + val responseBody = Option(jestResult.getJsonString).getOrElse("") + val statusCode = jestResult.getResponseCode + logger.error( + s"Failed to create watcher '${watcher.id}': $errorMessage (status: $statusCode). Response: $responseBody" + ) + // end FIXME result.ElasticFailure( result.ElasticError( s"Failed to create watcher '${watcher.id}': $errorMessage"