sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ama...@apache.org
Subject [sis] 16/45: feat(SQLStore): add partial support for distinct operator on SQL query
Date Tue, 12 Nov 2019 16:44:43 GMT
This is an automated email from the ASF dual-hosted git repository.

amanin pushed a commit to branch refactor/sql-store
in repository https://gitbox.apache.org/repos/asf/sis.git

commit 36b597e5f0f7b023a29b22a14e10f0ed17795ca6
Author: Alexis Manin <amanin@apache.org>
AuthorDate: Thu Sep 26 14:31:37 2019 +0200

    feat(SQLStore): add partial support for distinct operator on SQL query
---
 .../sis/internal/sql/feature/QueryBuilder.java     |   3 +
 .../sis/internal/sql/feature/QueryFeatureSet.java  | 130 +++++++++++++++------
 .../apache/sis/internal/sql/feature/StreamSQL.java |  21 ++--
 .../org/apache/sis/storage/sql/SQLStoreTest.java   |  54 ++++++---
 4 files changed, 145 insertions(+), 63 deletions(-)

diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java
index 24ac4f3..68d798e 100644
--- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java
+++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java
@@ -1,5 +1,8 @@
 package org.apache.sis.internal.sql.feature;
 
+/**
+ * API to allow overrided SQL Stream to delegate a set of intermediate operations to native
driver.
+ */
 interface QueryBuilder {
 
     QueryBuilder limit(long limit);
diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
index b590c4b..9d027a4 100644
--- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
+++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
@@ -20,6 +20,15 @@ import org.apache.sis.internal.storage.AbstractFeatureSet;
 import org.apache.sis.storage.DataStoreException;
 import org.apache.sis.util.collection.BackingStoreException;
 
+/**
+ * Stores SQL query given at built time, and execute it when calling {@link #features(boolean)
data stream}. Note that
+ * {@link #getType() data type} is defined by analyzing sql statement metadata. Note that
user query can be modified at
+ * before execution to adapt various parameters overridable at fetch time as offset and limit
through
+ * {@link Stream#skip(long)} and {@link Stream#limit(long)}.
+ *
+ * Note that this component models query result as close as possible, so built data type
will be simple feature type (no
+ * association).
+ */
 public class QueryFeatureSet extends AbstractFeatureSet {
 
     /**
@@ -35,34 +44,111 @@ public class QueryFeatureSet extends AbstractFeatureSet {
     private static final Pattern OFFSET_PATTERN = Pattern.compile("OFFSET\\s+([^\\s]+)(?:\\s+ROWS?)?",
Pattern.CASE_INSENSITIVE);
 
     /**
+     * Check for a selection of distinct rows.
+     */
+    private static final Pattern DISTINCT_PATTERN = Pattern.compile("^\\s*SELECT\\s+DISTINCT",
Pattern.CASE_INSENSITIVE);
+
+    /**
      * Keep builder to allow native limit and offset through stream operation.
      */
     private final SQLBuilder queryBuilder;
 
+    /**
+     * SQL database handler. Used to open new connections at query time.
+     */
     private final DataSource source;
 
+    /**
+     * Component in charge of conversion from SQL entry to Feature entity. Also provides
output data type.
+     */
     private final FeatureAdapter adapter;
 
+    /**
+     * Offset and limit defined in user query, if any. If none is found, or we cannot determine
safely their value (not
+     * specified as a literal but as a variable name), values will be set to -1.
+     *
+     * @implNote  BE CAREFUL ! We use these fields for optimisations. We remove values from
user query if safely
+     * identified, and add them only at query time, with additional skip/offset defined through
stream or java query.
+     */
+    private final long originOffset, originLimit;
+
+    /**
+     * A flag indicating that we've safely identified a {@literal DISTINCT} keyword in the
user query.
+     */
+    private final boolean distinct;
+
+    /**
+     * Same as {@link #QueryFeatureSet(SQLBuilder, DataSource, Connection)}, except query
is provided by a fixed text
+     * instead of a builder.
+     */
     public QueryFeatureSet(String query, DataSource source, Connection conn) throws SQLException
{
         this(fromQuery(query, conn), source, conn);
     }
 
+    /**
+     * Create a new feature set whose data is provided by given user query. Note that query
will be compiled now, but
+     * only executed when {@link #features(boolean) acquiring a data stream}.
+     *
+     * @param queryBuilder Contains user-defined SQL query. It must contains a valid and
complete SQL statement, because
+     *                     it will be compiled at built time to define feature type. Note
that we will make a copy of
+     *                     it, so any modification done after this feature set is created
won't have any effect on it.
+     * @param source A database pointer we'll keep, to create new connections when {@link
#features(boolean) fetching data}.
+     * @param conn Serves for compiling user query, thus creating output data type. The connection
is not kept nor
+     *             re-used after constructor, so you can close it immediately after. We require
it, so we do not force
+     *             opening a new connection if user already has one ready on its end.
+     * @throws SQLException If input query compiling or analysis of its metadata fails.
+     */
     public QueryFeatureSet(SQLBuilder queryBuilder, DataSource source, Connection conn) throws
SQLException {
         this(queryBuilder, new Analyzer(source, conn.getMetaData(), null, null), source,
conn);
     }
 
-    public QueryFeatureSet(SQLBuilder queryBuilder, Analyzer analyzer, DataSource source,
Connection conn) throws SQLException {
+
+    /**
+     * See {@link #QueryFeatureSet(SQLBuilder, DataSource, Connection)} for details.
+     *
+     * @param analyzer SIS sql analyzer, used for query metadata analysis. Not nullable.
If you do not have any, you
+     *                 can use {@link #QueryFeatureSet(SQLBuilder, DataSource, Connection)
another constructor}.
+     */
+    QueryFeatureSet(SQLBuilder queryBuilder, Analyzer analyzer, DataSource source, Connection
conn) throws SQLException {
         super(analyzer.listeners);
-        // Defensive copy
-        this.queryBuilder = new SQLBuilder(queryBuilder);
-        this.queryBuilder.append(queryBuilder.toString());
         this.source = source;
 
-        final String sql = queryBuilder.toString();
+        String sql = queryBuilder.toString();
         try (PreparedStatement statement = conn.prepareStatement(sql)) {
             final SQLTypeSpecification spec = analyzer.create(statement, sql, null);
             adapter = analyzer.buildAdapter(spec);
         }
+
+        /* We will now try to parse offset and limit from input query. If we encounter unsupported/ambiguous
case,
+         * we will fallback to pure java management of additional limit and offset.
+         * If we successfully retrieve offset and limit, we'll modify user query to take
account of additional
+         * parameters given later.
+         */
+        long tmpOffset = 0, tmpLimit = 0;
+        try {
+            Matcher matcher = OFFSET_PATTERN.matcher(sql);
+            if (matcher.find()) tmpOffset = Long.parseLong(matcher.group(1));
+            if (matcher.find()) throw new UnsupportedOperationException("More than one offset
in the query.");
+            sql = matcher.replaceFirst("");
+
+            matcher = LIMIT_PATTERN.matcher(sql);
+            if (matcher.find()) tmpLimit = Long.parseLong(matcher.group(1));
+            if (matcher.find()) throw new UnsupportedOperationException("More than one limit
in the query.");
+            sql = matcher.replaceFirst("");
+        } catch (RuntimeException e) {
+            sql = source.toString();
+            tmpOffset = -1;
+            tmpLimit = -1;
+        }
+
+        distinct = DISTINCT_PATTERN.matcher(sql).find();
+
+        originOffset = tmpOffset;
+        originLimit = tmpLimit;
+
+        // Defensive copy
+        this.queryBuilder = new SQLBuilder(queryBuilder);
+        this.queryBuilder.append(sql);
     }
 
     /**
@@ -99,7 +185,7 @@ public class QueryFeatureSet extends AbstractFeatureSet {
     }
 
     @Override
-    public Stream<Feature> features(boolean parallel) throws DataStoreException {
+    public Stream<Feature> features(boolean parallel) {
         return new StreamSQL(new QueryAdapter(queryBuilder), source);
     }
 
@@ -107,39 +193,12 @@ public class QueryFeatureSet extends AbstractFeatureSet {
 
         private final SQLBuilder source;
 
-        private final long originOffset, originLimit;
         private long additionalOffset, additionalLimit;
 
         QueryAdapter(SQLBuilder source) {
-            /* We will now try to parse offset and limit from input query. If we encounter
unsupported/ambiguous case,
-             * we will fallback to pure java management of additional limit and offset.
-             * If we successfully retrieve offset and limit, we'll modify user query to take
account of additional
-             * parameters given later.
-             */
-            long tmpOffset = 0, tmpLimit = 0;
-            String sql = source.toString();
-            try {
-                Matcher matcher = OFFSET_PATTERN.matcher(sql);
-                if (matcher.find()) tmpOffset = Long.parseLong(matcher.group(1));
-                if (matcher.find()) throw new UnsupportedOperationException("More than one
offset in the query.");
-                sql = matcher.replaceFirst("");
-
-                matcher = LIMIT_PATTERN.matcher(sql);
-                if (matcher.find()) tmpLimit = Long.parseLong(matcher.group(1));
-                if (matcher.find()) throw new UnsupportedOperationException("More than one
limit in the query.");
-                sql = matcher.replaceFirst("");
-            } catch (RuntimeException e) {
-                sql = source.toString();
-                tmpOffset = -1;
-                tmpLimit = -1;
-            }
-
-            originOffset = tmpOffset;
-            originLimit = tmpLimit;
-
             // defensive copy
             this.source = new SQLBuilder(source);
-            this.source.append(sql);
+            this.source.append(source.toString());
         }
 
         @Override
@@ -156,6 +215,7 @@ public class QueryFeatureSet extends AbstractFeatureSet {
 
         @Override
         public QueryBuilder distinct(boolean activate) {
+            if (distinct == activate) return this;
             throw new UnsupportedOperationException("Not supported yet: modifying user query");
// "Alexis Manin (Geomatys)" on 24/09/2019
         }
 
@@ -250,7 +310,7 @@ public class QueryFeatureSet extends AbstractFeatureSet {
         public long estimateSize() {
             // TODO: economic size estimation ? A count query seems overkill for the aim
of this API. Howver, we could
             // analyze user query in search for a limit value.
-            return Long.MAX_VALUE;
+            return originLimit > 0? originLimit : Long.MAX_VALUE;
         }
 
         @Override
diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java
index e1b1de6..925da4e 100644
--- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java
+++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java
@@ -125,8 +125,13 @@ class StreamSQL extends StreamDecoration<Feature> {
 
     @Override
     public Stream<Feature> distinct() {
-        queryAdapter.distinct();
-        return this;
+        try {
+            queryAdapter.distinct();
+            return this;
+        } catch (UnsupportedOperationException e) {
+            // TODO: emit warning
+            return super.distinct();
+        }
     }
 
     @Override
@@ -239,12 +244,6 @@ class StreamSQL extends StreamDecoration<Feature> {
         }
 
         @Override
-        public Stream<O> distinct() {
-            source = source.distinct();
-            return this;
-        }
-
-        @Override
         public Stream<O> limit(long maxSize) {
             source = source.limit(maxSize);
             return this;
@@ -335,12 +334,6 @@ class StreamSQL extends StreamDecoration<Feature> {
         }
 
         @Override
-        public DoubleStream distinct() {
-            source = source.distinct();
-            return this;
-        }
-
-        @Override
         public DoubleStream limit(long maxSize) {
             source = source.limit(maxSize);
             return this;
diff --git a/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java
b/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java
index 9eb41aa..2929194 100644
--- a/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java
+++ b/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java
@@ -17,6 +17,7 @@
 package org.apache.sis.storage.sql;
 
 import java.sql.Connection;
+import java.sql.SQLException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
@@ -175,8 +176,8 @@ public final strictfp class SQLStoreTest extends TestCase {
 
     private void verifyQueries(DataSource source) throws Exception {
         verifyFetchCityTableAsQuery(source);
-
         verifyLimitOffsetAndColumnSelectionFromQuery(source);
+        verifyDistinctQuery(source);
     }
 
     private void verifyFetchCityTableAsQuery(DataSource source) throws Exception {
@@ -263,7 +264,11 @@ public final strictfp class SQLStoreTest extends TestCase {
      * @param source Database connection provider.
      */
     private void verifyLimitOffsetAndColumnSelectionFromQuery(final DataSource source) throws
Exception {
-        final String query = "SELECT \"english_name\" as \"title\" FROM features.\"Parks\"
ORDER BY \"english_name\" ASC OFFSET 2 ROWS FETCH NEXT 3 ROWS ONLY";
+        // Ensure multiline text is accepted
+        final String query = "SELECT \"english_name\" as \"title\" \n\r" +
+                "FROM features.\"Parks\" \n" +
+                "ORDER BY \"english_name\" ASC \n" +
+                "OFFSET 2 ROWS FETCH NEXT 3 ROWS ONLY";
         final QueryFeatureSet qfs;
         try (Connection conn = source.getConnection()) {
             qfs = new QueryFeatureSet(query, source, conn);
@@ -307,18 +312,39 @@ public final strictfp class SQLStoreTest extends TestCase {
         assertArrayEquals("Only second third name should be returned", new String[]{"Shinjuku
Gyoen"}, parkNames);
     }
 
-/**
-* Checks that operations stacked on feature stream are well executed. This test focus on
mapping and peeking
-* actions overloaded by sql streams. We'd like to test skip and limit operations too, but
ignore it for now,
-* because ordering of results matters for such a test.
-*
-* @implNote Most of stream operations used here are meaningless. We just want to ensure that
the pipeline does not
-* skip any operation.
-*
-* @param cities The feature set to read from. We expect a feature set containing all cities
defined for the test
-*               class.
-* @throws DataStoreException Let's propagate any error raised by input feature set.
-*/
+    /**
+     * Check that a {@link Stream#distinct()} gives coherent results. For now, no optimisation
is done to delegate it to
+     * database, but this test allows for non-regression test, so when an optimisation is
done, we'll immediately test
+     * its validity.
+     */
+    private void verifyDistinctQuery(DataSource source) throws SQLException {
+        // Ensure multiline text is accepted
+        final String query = "SELECT \"country\" FROM features.\"Parks\" ORDER BY \"country\"";
+        final QueryFeatureSet qfs;
+        try (Connection conn = source.getConnection()) {
+            qfs = new QueryFeatureSet(query, source, conn);
+        }
+
+        final Object[] expected = qfs.features(false)
+                .distinct() 
+                .map(f -> f.getPropertyValue("country"))
+                .toArray();
+
+        assertArrayEquals("Distinct country names, sorted in ascending order", new String[]{"CAN",
"FRA", "JPN"}, expected);
+    }
+
+    /**
+    * Checks that operations stacked on feature stream are well executed. This test focus
on mapping and peeking
+    * actions overloaded by sql streams. We'd like to test skip and limit operations too,
but ignore it for now,
+    * because ordering of results matters for such a test.
+    *
+    * @implNote Most of stream operations used here are meaningless. We just want to ensure
that the pipeline does not
+    * skip any operation.
+    *
+    * @param cities The feature set to read from. We expect a feature set containing all
cities defined for the test
+    *               class.
+    * @throws DataStoreException Let's propagate any error raised by input feature set.
+    */
     private static void verifyStreamOperations(final FeatureSet cities) throws DataStoreException
{
         try (Stream<Feature> features = cities.features(false)) {
             final AtomicInteger peekCount = new AtomicInteger();


Mime
View raw message