sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ama...@apache.org
Subject [sis] 19/45: WIP(SQLStore): improve benchmark and add javadoc
Date Tue, 12 Nov 2019 16:44:46 GMT
This is an automated email from the ASF dual-hosted git repository.

amanin pushed a commit to branch refactor/sql-store
in repository https://gitbox.apache.org/repos/asf/sis.git

commit 65aeda1248ce5eb666c9480bab3dac5339a2d395
Author: Alexis Manin <amanin@apache.org>
AuthorDate: Fri Sep 27 11:07:43 2019 +0200

    WIP(SQLStore): improve benchmark and add javadoc
---
 .../apache/sis/internal/sql/feature/Analyzer.java  | 26 ++++-----
 .../sis/internal/sql/feature/PrimaryKey.java       | 12 +++--
 .../sis/internal/sql/feature/QueryFeatureSet.java  | 44 ++++++++++++++-
 .../sql/feature/QuerySpliteratorsBench.java        | 12 ++++-
 .../internal/sql/feature/SQLTypeSpecification.java | 62 ++++++++++++++++++++--
 .../sis/internal/sql/feature/package-info.java     |  5 ++
 6 files changed, 137 insertions(+), 24 deletions(-)

diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java
index 6ac8fd7..9348d01 100644
--- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java
+++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java
@@ -22,6 +22,7 @@ import java.sql.ResultSet;
 import java.sql.ResultSetMetaData;
 import java.sql.SQLException;
 import java.util.*;
+import java.util.function.Supplier;
 import java.util.logging.Level;
 import java.util.logging.LogRecord;
 import javax.sql.DataSource;
@@ -140,6 +141,7 @@ final class Analyzer {
      * The namespace created with {@link #catalog} and {@link #schema}.
      */
     private transient NameSpace namespace;
+    public static final Supplier<GenericName> RANDOME_NAME = () -> Names.createGenericName("sis",
":", UUID.randomUUID().toString());
 
     /**
      * Creates a new analyzer for the database described by given metadata.
@@ -334,9 +336,9 @@ final class Analyzer {
 
     public FeatureAdapter buildAdapter(final SQLTypeSpecification spec) throws SQLException
{
         final FeatureTypeBuilder builder = new FeatureTypeBuilder(nameFactory, functions.library,
locale);
-        builder.setName(spec.getName() == null ? Names.createGenericName("sis", ":", UUID.randomUUID().toString())
: spec.getName());
-        builder.setDefinition(spec.getDefinition());
-        final String geomCol = spec.getPrimaryGeometryColumn().orElse("");
+        builder.setName(spec.getName().orElseGet(RANDOME_NAME));
+        spec.getDefinition().ifPresent(builder::setDefinition);
+        final String geomCol = spec.getPrimaryGeometryColumn().map(ColumnRef::getAttributeName).orElse("");
         final List pkCols = spec.getPK().map(PrimaryKey::getColumns).orElse(Collections.EMPTY_LIST);
         List<PropertyMapper> attributes = new ArrayList<>();
         // JDBC column indices are 1 based.
@@ -470,15 +472,15 @@ final class Analyzer {
         }
 
         @Override
-        public GenericName getName() {
-            return id.getName(Analyzer.this);
+        public Optional<GenericName> getName() {
+            return Optional.of(id.getName(Analyzer.this));
         }
 
         /**
          * The remarks are opportunistically stored in id.freeText if known by the caller.
          */
         @Override
-        public String getDefinition() throws SQLException {
+        public Optional<String> getDefinition() throws SQLException {
             String remarks = id.freeText;
             if (id instanceof Relation) {
                 try (ResultSet reflect = metadata.getTables(id.catalog, schemaEsc, tableEsc,
null)) {
@@ -493,7 +495,7 @@ final class Analyzer {
                     }
                 }
             }
-            return remarks;
+            return Optional.ofNullable(remarks);
         }
 
         @Override
@@ -551,7 +553,7 @@ final class Analyzer {
         }
 
         @Override
-        public Optional<String> getPrimaryGeometryColumn() {
+        public Optional<ColumnRef> getPrimaryGeometryColumn() {
             return Optional.empty();
             //throw new UnsupportedOperationException("Not supported yet"); // "Alexis Manin
(Geomatys)" on 20/09/2019
         }
@@ -589,13 +591,13 @@ final class Analyzer {
         }
 
         @Override
-        public GenericName getName() throws SQLException {
-            return name;
+        public Optional<GenericName> getName() throws SQLException {
+            return Optional.of(name);
         }
 
         @Override
-        public String getDefinition() throws SQLException {
-            return query;
+        public Optional<String> getDefinition() throws SQLException {
+            return Optional.of(query);
         }
 
         @Override
diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java
index 1e68fd3..e220abc 100644
--- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java
+++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java
@@ -10,6 +10,9 @@ import org.apache.sis.util.ArgumentChecks;
 /**
  * Represents SQL primary key constraint. Main information is columns composing the key.
  *
+ * @implNote For now, only list of columns composing the key are returned. However, in the
future it would be possible
+ * to add other information, as a value type to describe how to expose primary key value.
+ *
  * @author "Alexis Manin (Geomatys)"
  */
 interface PrimaryKey {
@@ -20,13 +23,16 @@ interface PrimaryKey {
         return Optional.of(new Composite(cols));
     }
 
-    //Class<T> getViewType();
+    /**
+     *
+     * @return List of column names composing the key. Should neither be null nor empty.
+     */
     List<String> getColumns();
 
     class Simple implements PrimaryKey {
         final String column;
 
-        public Simple(String column) {
+        Simple(String column) {
             this.column = column;
         }
 
@@ -40,7 +46,7 @@ interface PrimaryKey {
          */
         private final List<String> columns;
 
-        public Composite(List<String> columns) {
+        Composite(List<String> columns) {
             ArgumentChecks.ensureNonEmpty("Primary key column names", columns);
             this.columns = Collections.unmodifiableList(new ArrayList<>(columns));
         }
diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
index fdf518d..ffbdc8b 100644
--- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
+++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
@@ -83,7 +83,15 @@ public class QueryFeatureSet extends AbstractFeatureSet {
      * batch loading of results.
      */
     boolean allowBatchLoading = true;
+    /**
+     * Profiling variable. Define the fraction (0 none, 1 all) of a single fetch (as defined
by {@link ResultSet#getFetchSize()}
+     * that {@link PrefetchSpliterator} will load in one go.
+     */
     float fetchRatio = 0.5f;
+    /**
+     * Profiling variable, serves to define {{@link PreparedStatement#setFetchSize(int)}
SQL result fetch size}.
+     */
+    int fetchSize = 100;
 
     /**
      * Same as {@link #QueryFeatureSet(SQLBuilder, DataSource, Connection)}, except query
is provided by a fixed text
@@ -268,7 +276,7 @@ public class QueryFeatureSet extends AbstractFeatureSet {
         @Override
         public Stream<Feature> connect(Connection connection) throws SQLException,
DataStoreException {
             final PreparedStatement statement = connection.prepareStatement(sql, ResultSet.TYPE_FORWARD_ONLY,
ResultSet.CONCUR_READ_ONLY);
-            statement.setFetchSize(100);
+            statement.setFetchSize(fetchSize);
             final ResultSet result = statement.executeQuery();
             final int fetchSize = result.getFetchSize();
             final boolean withPrefetch = !allowBatchLoading || (fetchSize < 1 || fetchSize
>= Integer.MAX_VALUE);
@@ -297,6 +305,17 @@ public class QueryFeatureSet extends AbstractFeatureSet {
         }
     }
 
+    /**
+     * Base class for loading SQL query result loading through {@link Spliterator} API. Concrete
implementations comes
+     * in two experimental flavors :
+     * <ul>
+     *     <li>Sequential streaming: {@link ResultSpliterator}</li>
+     *     <li>Parallelizable batch  streaming: {@link PrefetchSpliterator}</li>
+     * </ul>
+     *
+     * {@link QuerySpliteratorsBench A benchmark is available} to compare both implementations,
which could be useful in
+     * the future to determine which implementation to priorize. For now results does not
show much difference.
+     */
     private abstract class QuerySpliterator  implements java.util.Spliterator<Feature>
{
 
         final ResultSet result;
@@ -347,12 +366,26 @@ public class QueryFeatureSet extends AbstractFeatureSet {
                 .append(query);
     }
 
+    /**
+     * An attempt to optimize feature loading through batching and potential parallelization.
For now, it looks like
+     * there's not much improvement regarding to naive streaming approach. IMHO, two improvements
would really impact
+     * performance positively if done:
+     * <ul>
+     *     <li>Optimisation of batch loading through {@link FeatureAdapter#prefetch(int,
ResultSet)}</li>
+     *     <li>Better splitting balance, as stated by {@link Spliterator#trySplit()}</li>
+     * </ul>
+     */
     private final class PrefetchSpliterator extends QuerySpliterator {
 
         final int fetchSize;
 
         int idx;
         List<Feature> chunk;
+        /**
+         * According to {@link Spliterator#trySplit()} documentation, the original size estimation
must be reduced after
+         * split to remain consistent.
+         */
+        long splittedAmount = 0;
 
         private PrefetchSpliterator(ResultSet result) throws SQLException {
             this(result, 0.5f);
@@ -374,14 +407,21 @@ public class QueryFeatureSet extends AbstractFeatureSet {
 
         public Spliterator<Feature> trySplit() {
             if (!ensureChunkAvailable()) return null;
+            final List<Feature> remainingChunk = chunk.subList(idx, chunk.size());
+            splittedAmount += remainingChunk.size();
             final Spliterator<Feature> chunkSpliterator = idx == 0 ?
-                    chunk.spliterator() : chunk.subList(idx, chunk.size()).spliterator();
+                    chunk.spliterator() : remainingChunk.spliterator();
             chunk = null;
             idx = 0;
             return chunkSpliterator;
         }
 
         @Override
+        public long estimateSize() {
+            return super.estimateSize() - splittedAmount;
+        }
+
+        @Override
         public int characteristics() {
             return super.characteristics() | Spliterator.CONCURRENT;
         }
diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java
index f2c62ef..bb519d9 100644
--- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java
+++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java
@@ -25,8 +25,8 @@ import org.openjdk.jmh.annotations.Warmup;
 import static org.apache.sis.util.ArgumentChecks.ensureStrictlyPositive;
 
 @Fork(value = 2, jvmArgs = {"-server", "-Xmx2g"} )
-@Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
-@Measurement(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS)
+@Warmup(iterations = 2, time = 4, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 2, time = 4, timeUnit = TimeUnit.SECONDS)
 public class QuerySpliteratorsBench {
 
     @State(Scope.Benchmark)
@@ -41,6 +41,12 @@ public class QuerySpliteratorsBench {
         @Param({"true", "false"})
         boolean prefetch;
 
+        @Param({"10", "100", "1000"})
+        int fetchSize;
+
+        @Param({"0.5", "1", "2"})
+        float fetchRatio;
+
         EmbeddedDataSource db;
         QueryFeatureSet fs;
 
@@ -78,6 +84,8 @@ public class QuerySpliteratorsBench {
 
                 fs = new QueryFeatureSet("SELECT * FROM TEST", db, c);
                 fs.allowBatchLoading = prefetch;
+                fs.fetchSize = fetchSize;
+                fs.fetchRatio = fetchRatio;
             }
         }
 
diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java
index 0e35edf..e54823d 100644
--- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java
+++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java
@@ -1,35 +1,87 @@
 package org.apache.sis.internal.sql.feature;
 
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.List;
 import java.util.Optional;
 
 import org.opengis.util.GenericName;
 
+import org.apache.sis.feature.builder.FeatureTypeBuilder;
+import org.apache.sis.internal.feature.AttributeConvention;
 import org.apache.sis.storage.DataStoreContentException;
 
+/**
+ * Defines an application schema inferred from an SQL database (query, table, etc.). Implementations
will be used by
+ * {@link Analyzer} to create an {@link FeatureAdapter adaptation layer to the feature model}.
Default implementations
+ * can be retrieved for tables and queries respectively through {@link Analyzer#create(TableReference,
TableReference)}
+ * and {@link Analyzer#create(PreparedStatement, String, GenericName)} methods.
+ */
 interface SQLTypeSpecification {
     /**
+     * Identifying name for the application schema. It is strongly recommended to be present,
for SIS engine to be
+     * capable to create insightful models. However, in corner cases where no proper names
could be provided, an empty
+     * value is allowed.
      *
-     * @return Name for the feature type to build. Nullable.
+     * @implNote SIS {@link FeatureTypeBuilder feature type builder} <em>requires</em>
a name, and current
+     * {@link Analyzer#buildAdapter(SQLTypeSpecification) analysis implementation} will create
a random UUID if
+     * necessary.
+     *
+     * @return Name for the feature type to build.
      * @throws SQLException If an error occurs while retrieving information from database.
      */
-    GenericName getName() throws SQLException;
+    Optional<GenericName> getName() throws SQLException;
 
     /**
+     * Gives an optional description of the application schema.This information is not necessary
for any kind of
+     * computation, but allows to give end-user global information about the schema (s)he's
manipulating.
      *
-     * @return A succint description of the data source. Nullable.
+     * @return A brief description of the data source.
      * @throws SQLException If an error occurs while retrieving information from database.
      */
-    String getDefinition() throws SQLException;
+    Optional<String> getDefinition() throws SQLException;
 
+    /**
+     * Primary key definition of source schema. Can be empty if no primary key is defined
(Example: query definition).
+     *
+     * @return Primary key definition if any, otherwise an empty shell.
+     * @throws SQLException If an error occurs while exchanging information with underlying
database.
+     */
     Optional<PrimaryKey> getPK() throws SQLException;
 
+    /**
+     *
+     * @return Ordered list of columns in application schema. Order is important, and will
be relied upon to retrieve
+     *  {@link ResultSet#getObject(int) result values by index}.
+     */
     List<SQLColumn> getColumns();
 
+    /**
+     *
+     * @return All identified relations based on a foreign key in <em>current</em>
application schema (1..1 or n..1).
+     * Corresponds to {@link Relation.Direction#IMPORT}. Can be empty but not null.
+     *
+     * @throws SQLException If an error occurs while exchanging information with underlying
database.
+     */
     List<Relation> getImports() throws SQLException;
 
+    /**
+     *
+     * @return All identified relations based on foreign key located in <em>another</em>
application schema (1..n).
+     * Corresponds to {@link Relation.Direction#EXPORT}. Can be empty but not null.
+     * @throws SQLException If an error occurs while exchanging information with underlying
database.
+     * @throws DataStoreContentException If a schema problem is encountered.
+     */
     List<Relation> getExports() throws SQLException, DataStoreContentException;
 
-    default Optional<String> getPrimaryGeometryColumn() {return Optional.empty();}
+    /**
+     * In case target schema contains geographic information, this serves to identify without
ambiguity which column
+     * contains what could be considered main geolocation (as stated by {@link AttributeConvention#GEOMETRY_PROPERTY}).
+     * This is a very important information in case application schema contains multiple
geometric fields.
+     *
+     * @return The name of the column/attribute to be considered as main geometry information,
or an empty shell if
+     * unknown.
+     */
+    default Optional<ColumnRef> getPrimaryGeometryColumn() {return Optional.empty();}
 }
diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java
index 2c198a8..19c3151 100644
--- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java
+++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java
@@ -25,8 +25,13 @@
  * This package is for internal use by SIS only. Classes in this package
  * may change in incompatible ways in any future version without notice.
  *
+ * @implNote Feature type analysis is done through {@link org.apache.sis.internal.sql.feature.Analyzer}
class.
+ * It relies on internal {@link org.apache.sis.internal.sql.feature.SQLTypeSpecification}
API to fetch SQL schema
+ * information, and build {@link org.apache.sis.internal.sql.feature.FeatureAdapter an adapter
to feature model from it}.
+ *
  * @author  Johann Sorel (Geomatys)
  * @author  Martin Desruisseaux (Geomatys)
+ * @author  Alexis Manin (Geomatys)
  * @version 1.0
  * @since   1.0
  * @module


Mime
View raw message