sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject [sis] 02/04: Add support for reading variables of character type. Implies some refactoring for sharing more code between the two reader implementations.
Date Mon, 19 Oct 2020 14:47:25 GMT
This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git

commit 9e986b89802d647b1270b9fa04f988454390224e
Author: Martin Desruisseaux <martin.desruisseaux@geomatys.com>
AuthorDate: Mon Oct 19 13:26:43 2020 +0200

    Add support for reading variables of character type.
    Implies some refactoring for sharing more code between the two reader implementations.
---
 .../org/apache/sis/internal/netcdf/AxisType.java   |   1 +
 .../org/apache/sis/internal/netcdf/Decoder.java    |  21 +++
 .../org/apache/sis/internal/netcdf/FeatureSet.java |  30 ++-
 .../sis/internal/netcdf/{impl => }/HYCOM.java      |  15 +-
 .../org/apache/sis/internal/netcdf/Variable.java   | 203 +++++++++++++++++++--
 .../sis/internal/netcdf/impl/ChannelDecoder.java   |  24 +--
 .../sis/internal/netcdf/impl/VariableInfo.java     | 106 ++++-------
 .../sis/internal/netcdf/ucar/DecoderWrapper.java   |  13 ++
 .../sis/internal/netcdf/ucar/VariableWrapper.java  |  40 ++--
 .../sis/storage/netcdf/NetcdfStoreProvider.java    |   5 +-
 .../internal/storage/io/HyperRectangleReader.java  |   2 +-
 11 files changed, 311 insertions(+), 149 deletions(-)

diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/AxisType.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/AxisType.java
index 2333557..6fcf3f1 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/AxisType.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/AxisType.java
@@ -29,6 +29,7 @@ import ucar.nc2.constants.CF;
 /**
  * Type of coordinate system axis, in the order they should appears for a "normalized" coordinate
reference system.
  * The enumeration name matches the name of the {@code "axis"} attribute in CF-convention.
+ * Enumeration order is the desired order of coordinate values.
  *
  * @author  Martin Desruisseaux (Geomatys)
  * @version 1.1
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Decoder.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Decoder.java
index dd5bf0f..e0a072f 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Decoder.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Decoder.java
@@ -29,6 +29,7 @@ import java.util.logging.LogRecord;
 import java.io.Closeable;
 import java.io.IOException;
 import java.nio.file.Path;
+import java.nio.charset.Charset;
 import org.opengis.util.NameSpace;
 import org.opengis.util.NameFactory;
 import org.opengis.referencing.datum.Datum;
@@ -170,6 +171,18 @@ public abstract class Decoder extends ReferencingFactoryContainer implements
Clo
     }
 
     /**
+     * Checks and potentially modifies the content of this dataset for conventions other
than CF-conventions.
+     * This method should be invoked after construction for handling the particularities
of some datasets
+     * (HYCOM, …).
+     *
+     * @throws IOException if an error occurred while reading the channel.
+     * @throws DataStoreException if an error occurred while interpreting the netCDF file
content.
+     */
+    public final void applyOtherConventions() throws IOException, DataStoreException {
+        HYCOM.convert(this, getVariables());
+    }
+
+    /**
      * Returns information about modifications to apply to netCDF conventions in order to
handle this netCDF file.
      * Customized conventions are necessary when the variables and attributes in a netCDF
file do not follow CF-conventions.
      *
@@ -355,6 +368,14 @@ public abstract class Decoder extends ReferencingFactoryContainer implements
Clo
     }
 
     /**
+     * Returns the encoding for attribute or variable data.
+     * This is <strong>not</strong> the encoding of netCDF names.
+     *
+     * @return encoding of data (not the encoding of netCDF names).
+     */
+    public abstract Charset getEncoding();
+
+    /**
      * Returns all variables found in the netCDF file.
      * This method may return a direct reference to an internal array - do not modify.
      *
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/FeatureSet.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/FeatureSet.java
index c076d2c..5bc3a98 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/FeatureSet.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/FeatureSet.java
@@ -22,7 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.LinkedHashMap;
+import java.util.EnumMap;
 import java.util.Spliterator;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
@@ -121,13 +121,13 @@ final class FeatureSet extends DiscreteSampling {
      * @param  name         name to give to the feature type.
      * @param  counts       the count of instances per feature, or {@code null} if none.
      * @param  identifiers  the feature identifiers, possibly with other singleton properties.
-     * @param  hasTime      whether the {@code coordinates} array contains a temporal variable.
      * @param  coordinates  <var>x</var>, <var>y</var> and potentially
<var>z</var> or <var>t</var> coordinate values.
+     * @param  hasTime      whether the {@code coordinates} array contains a temporal variable.
      * @param  properties   the variables that contain custom time-varying properties.
      * @throws IllegalArgumentException if the given library is non-null but not available.
      */
     private FeatureSet(final Decoder decoder, String name, final Vector counts, final Variable[]
identifiers,
-                       final boolean hasTime, final Variable[] coordinates, final Variable[]
properties)
+                       final Variable[] coordinates, final boolean hasTime, final Variable[]
properties)
     {
         super(decoder.geomlib, decoder.listeners);
         this.counts      = counts;
@@ -145,7 +145,7 @@ final class FeatureSet extends DiscreteSampling {
         final FeatureTypeBuilder builder = new FeatureTypeBuilder(decoder.nameFactory, decoder.geomlib,
decoder.listeners.getLocale());
         for (final Variable v : identifiers) {
             final Class<?> type = v.getDataType().getClass(v.getNumDimensions() >
1);
-            describe(v, builder.addAttribute(Long.class), false);   // TODO: use type.
+            describe(v, builder.addAttribute(type), false);
         }
         if (coordinates.length > (hasTime ? 1 : 0)) {
             final AttributeTypeBuilder<?> geometry = builder.addAttribute(
@@ -272,7 +272,7 @@ search: for (final Variable counts : decoder.getVariables()) {
         final boolean                isPointSet  = sampleDimension.equals(featureDimension);
         final List<Variable>         singletons  = isPointSet ? Collections.emptyList()
: new ArrayList<>();
         final List<Variable>         properties  = new ArrayList<>();
-        final Map<AxisType,Variable> coordinates = new LinkedHashMap<>();
+        final Map<AxisType,Variable> coordinates = new EnumMap<>(AxisType.class);
         for (final Variable data : decoder.getVariables()) {
             if (data.equals(counts)) {
                 continue;
@@ -301,23 +301,19 @@ search: for (final Variable counts : decoder.getVariables()) {
                 if (axisType != null) {
                     final Variable previous = coordinates.putIfAbsent(axisType, data);
                     if (previous != null) {
+                        // Duplicated axis type. Keep the first axis in declaration order.
                         decoder.listeners.warning(decoder.resources().getString(Resources.Keys.DuplicatedAxisType_4,
                                                   decoder.getFilename(), axisType, previous.getName(),
data.getName()));
-                        // TODO: give precedence to which axis?
                     }
                 } else {
                     properties.add(data);
                 }
             }
         }
-        final Variable time = coordinates.remove(AxisType.T);
-        if (time != null) {
-            coordinates.put(AxisType.T, time);      // Make sure that time is last.
-        }
         return features.add(new FeatureSet(decoder, featureName,
                             (counts != null) ? counts.read() : null,
-                            toArray(singletons), time != null,
-                            toArray(coordinates.values()),
+                            toArray(singletons),
+                            toArray(coordinates.values()), coordinates.containsKey(AxisType.T),
                             toArray(properties)));
 
     }
@@ -450,17 +446,17 @@ search: for (final Variable counts : decoder.getVariables()) {
          *
          * @see FeatureSet#identifiers
          */
-        private final Vector[] idValues;
+        private final List<?>[] idValues;
 
         /**
          * Creates a new iterator.
          */
         Iter() throws IOException, DataStoreException {
             count = (int) Math.min(getFeatureCount().orElse(0), Integer.MAX_VALUE);
-            idValues = new Vector[identifiers.length];
+            idValues = new List<?>[identifiers.length];
             for (int i=0; i < idValues.length; i++) {
-                // Efficiency should be okay because those vectors are cached.
-                idValues[i] = identifiers[i].read();
+                // Efficiency should be okay because those lists are cached.
+                idValues[i] = identifiers[i].readAnyType();
             }
         }
 
@@ -475,7 +471,7 @@ search: for (final Variable counts : decoder.getVariables()) {
         @Override
         public boolean tryAdvance(final Consumer<? super Feature> action) {
             final Vector[] coordinateValues  = new Vector[coordinates.length];
-            final Object[] singleProperties  = new Number[identifiers.length];
+            final Object[] singleProperties  = new Object[identifiers.length];
             final Object[] varyingProperties = new Object[properties .length];
             for (int i=0; i < singleProperties.length; i++) {
                 singleProperties[i] = idValues[i].get(index);
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/HYCOM.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/HYCOM.java
similarity index 91%
rename from storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/HYCOM.java
rename to storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/HYCOM.java
index a356aa2..6feb96b 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/HYCOM.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/HYCOM.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sis.internal.netcdf.impl;
+package org.apache.sis.internal.netcdf;
 
 import java.io.IOException;
 import java.time.Instant;
@@ -24,8 +24,8 @@ import java.util.regex.Pattern;
 import java.util.GregorianCalendar;
 import org.apache.sis.math.Vector;
 import org.apache.sis.measure.Units;
+import org.apache.sis.storage.DataStoreException;
 import org.apache.sis.internal.util.StandardDateFormat;
-import org.apache.sis.storage.DataStoreContentException;
 
 
 /**
@@ -33,7 +33,7 @@ import org.apache.sis.storage.DataStoreContentException;
  * We handle them in a separated class for now and may refactor later in a more general mechanism
for providing extensions.
  *
  * @author  Martin Desruisseaux (Geomatys)
- * @version 1.0
+ * @version 1.1
  *
  * @see <a href="https://issues.apache.org/jira/browse/SIS-315">SIS-315</a>
  *
@@ -84,10 +84,10 @@ final class HYCOM {
      * In this example, the real units of {@code Date(MT)} will be taken from {@code MT(MT)},
which is
      * "days since 1900-12-31 00:00:00".
      */
-    static void convert(final ChannelDecoder decoder, final VariableInfo[] variables) throws
IOException, DataStoreContentException {
+    static void convert(final Decoder decoder, final Variable[] variables) throws IOException,
DataStoreException {
         Matcher matcher = null;
-        for (final VariableInfo variable : variables) {
-            if (variable.dimensions.length == 1) {
+        for (final Variable variable : variables) {
+            if (variable.getNumDimensions() == 1) {
                 final String units = variable.getUnitsString();
                 if (units != null) {
                     if (matcher == null) {
@@ -96,7 +96,8 @@ final class HYCOM {
                         matcher.reset(units);
                     }
                     if (matcher.matches()) {
-                        Instant epoch = variable.setUnit(decoder.findVariable(variable.dimensions[0].name),
Units.DAY);
+                        final Dimension dimension = variable.getGridDimensions().get(0);
+                        Instant epoch = variable.setUnit(decoder.findVariable(dimension.getName()),
Units.DAY);
                         if (epoch == null) {
                             epoch = Instant.EPOCH;
                         }
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
index 8f19cae..a777b8c 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
@@ -23,6 +23,7 @@ import java.util.ArrayList;
 import java.util.Locale;
 import java.util.regex.Pattern;
 import java.io.IOException;
+import java.nio.charset.Charset;
 import java.time.Instant;
 import javax.measure.Unit;
 import javax.measure.format.ParserException;
@@ -41,8 +42,10 @@ import org.apache.sis.util.ArraysExt;
 import org.apache.sis.util.CharSequences;
 import org.apache.sis.util.collection.Containers;
 import org.apache.sis.util.collection.WeakHashSet;
+import org.apache.sis.internal.jdk9.JDK9;
 import org.apache.sis.internal.util.Numerics;
 import org.apache.sis.internal.util.CollectionsExt;
+import org.apache.sis.internal.util.UnmodifiableArrayList;
 import org.apache.sis.storage.netcdf.AttributeNames;
 import org.apache.sis.util.resources.Errors;
 import ucar.nc2.constants.CDM;                      // We use only String constants.
@@ -65,8 +68,11 @@ public abstract class Variable extends Node {
      * those vectors can be large, sharing common instances may save a lot of memory.
      *
      * <p>All shared vectors shall be considered read-only.</p>
+     *
+     * @see #read()
+     * @see #setValues(Object)
      */
-    protected static final WeakHashSet<Vector> SHARED_VECTORS = new WeakHashSet<>(Vector.class);
+    private static final WeakHashSet<Vector> SHARED_VECTORS = new WeakHashSet<>(Vector.class);
 
     /**
      * The pattern to use for parsing temporal units of the form "days since 1970-01-01 00:00:00".
@@ -146,6 +152,31 @@ public abstract class Variable extends Node {
     int bandDimension;
 
     /**
+     * The values of the whole variable, or {@code null} if not yet read. This vector should
be assigned only
+     * for relatively small variables, or for variables that are critical to the use of other
variables
+     * (for example the values in coordinate system axes).
+     *
+     * @see #read()
+     * @see #setValues(Object)
+     */
+    private transient Vector values;
+
+    /**
+     * The {@linkplain #values} vector as a list of element of any type (not restricted to
{@link Number} instances).
+     * This is usually the same instance than {@link #values} because {@link Vector} implements
{@code List<Number>}.
+     * This is a different instance if this variable is a two-dimensional character array,
in which case this field
+     * is an instance of {@code List<String>}.
+     *
+     * The difference between {@code values} and {@code valuesAnyType} is that {@code values.get(i)}
may throw
+     * {@link NumberFormatException} because it always try to return its elements as {@link
Number} instances,
+     * while {@code valuesAnyType.get(i)} can return {@link String} instances.
+     *
+     * @see #readAnyType()
+     * @see #setValues(Object)
+     */
+    private transient List<?> valuesAnyType;
+
+    /**
      * Creates a new variable.
      *
      * @param decoder  the netCDF file where this variable is stored.
@@ -157,6 +188,7 @@ public abstract class Variable extends Node {
     /**
      * If {@code flags} is non-null, declares this variable as an enumeration.
      * This method stores the information needed for {@link #meaning(int)} default implementation.
+     * This method is invoked by subclass constructors for completing {@code Variable} creation.
      *
      * @param  flags   the flag meanings as a space-separated string, or {@code null} if
none.
      * @param  values  the flag values as a vector of integer values, or {@code null} if
none.
@@ -305,7 +337,7 @@ public abstract class Variable extends Node {
     /**
      * Sets the unit of measurement and the epoch to the same value than the given variable.
      * This method is not used in CF-compliant files; it is reserved for the handling of
some
-     * particular conventions, for example HYCOM.
+     * particular conventions, for example {@link HYCOM}.
      *
      * @param  other      the variable from which to copy unit and epoch, or {@code null}
if none.
      * @param  overwrite  if non-null, set to the given unit instead than the unit of {@code
other}.
@@ -313,7 +345,7 @@ public abstract class Variable extends Node {
      *
      * @see #getUnit()
      */
-    public final Instant setUnit(final Variable other, Unit<?> overwrite) {
+    final Instant setUnit(final Variable other, Unit<?> overwrite) {
         if (other != null) {
             unit  = other.getUnit();        // May compute the epoch as a side effect.
             epoch = other.epoch;
@@ -856,7 +888,17 @@ public abstract class Variable extends Node {
     }
 
     /**
-     * Reads all the data for this variable and returns them as an array of a Java primitive
type.
+     * Returns whether values in this variable are cached by a system other than Apache SIS.
+     * For example if data are read using UCAR library, that library provides its own cache.
+     *
+     * @return whether values are cached by a library other than Apache SIS.
+     */
+    protected boolean isExternallyCached() {
+        return false;
+    }
+
+    /**
+     * Reads all the data for this variable and returns them as a vector of numerical values.
      * Multi-dimensional variables are flattened as a one-dimensional array (wrapped in a
vector).
      * Example:
      *
@@ -880,16 +922,41 @@ public abstract class Variable extends Node {
      *
      * If {@link #hasRealValues()} returns {@code true}, then this method shall
      * {@linkplain #replaceNaN(Object) replace fill values and missing values by NaN values}.
-     * This method should cache the returned vector since this method may be invoked often.
+     * This method caches the returned vector since this method may be invoked often.
      * Because of caching, this method should not be invoked for large data array.
      * Callers shall not modify the returned vector.
      *
-     * @return the data as an array of a Java primitive type.
+     * @return the data as a vector wrapping a Java array.
+     * @throws IOException if an error occurred while reading the data.
+     * @throws DataStoreException if a logical error occurred.
+     * @throws ArithmeticException if the size of the variable exceeds {@link Integer#MAX_VALUE},
or other overflow occurs.
+     */
+    @SuppressWarnings("ReturnOfCollectionOrArrayField")
+    public final Vector read() throws IOException, DataStoreException {
+        if (values == null) {
+            setValues(readFully());
+        }
+        return values;
+    }
+
+    /**
+     * Reads all the data for this variable and returns them as a list of any object.
+     * The difference between {@code read()} and {@code readAnyType()} is that {@code vector.get(i)}
may throw
+     * {@link NumberFormatException} because it always try to return its elements as {@link
Number} instances,
+     * while {@code list.get(i)} can return {@link String} instances.
+     *
+     * @return the data as a list of numbers or strings.
      * @throws IOException if an error occurred while reading the data.
      * @throws DataStoreException if a logical error occurred.
      * @throws ArithmeticException if the size of the variable exceeds {@link Integer#MAX_VALUE},
or other overflow occurs.
      */
-    public abstract Vector read() throws IOException, DataStoreException;
+    @SuppressWarnings("ReturnOfCollectionOrArrayField")
+    public final List<?> readAnyType() throws IOException, DataStoreException {
+        if (valuesAnyType == null) {
+            setValues(readFully());
+        }
+        return valuesAnyType;
+    }
 
     /**
      * Reads a subsampled sub-area of the variable.
@@ -916,6 +983,116 @@ public abstract class Variable extends Node {
     public abstract Vector read(GridExtent area, int[] subsampling) throws IOException, DataStoreException;
 
     /**
+     * Reads all the data for this variable and returns them as an array of a Java primitive
type.
+     * This is the implementation of {@link #read()} method, invoked when the value is not
cached.
+     *
+     * @return the data as an array of a Java primitive type.
+     * @throws IOException if an error occurred while reading the data.
+     * @throws DataStoreException if a logical error occurred.
+     */
+    protected abstract Object readFully() throws IOException, DataStoreException;
+
+    /**
+     * Sets the values in this variable. The values are normally read from the netCDF file
by the {@link #read()} method,
+     * but this {@code setValues(Object)} method may also be invoked if the caller wants
to overwrite those values.
+     *
+     * @param  array  the values as an array of primitive type (for example {@code float[]}.
+     * @throws ArithmeticException if the dimensions of this variable are too large.
+     */
+    final void setValues(final Object array) {
+        final DataType dataType = getDataType();
+        if (dataType == DataType.CHAR) {
+            int n = getNumDimensions();
+            if (n >= 2) {
+                final List<Dimension> dimensions = getGridDimensions();
+                final int length = Math.toIntExact(dimensions.get(--n).length());
+                int count = Math.toIntExact(dimensions.get(--n).length());
+                while (n > 0) {
+                    count = Math.multiplyExact(count, Math.toIntExact(dimensions.get(--n).length()));
+                }
+                final String[] strings = createStringArray((byte[]) array, count, length,
decoder.getEncoding());
+                /*
+                 * Following method calls take the array reference without cloning it.
+                 * Consequently creating those two objects now (even if we may not use them)
is reasonably cheap.
+                 */
+                values        = Vector.create(strings, false);
+                valuesAnyType = UnmodifiableArrayList.wrap(strings);
+                return;
+            }
+        }
+        Vector data = createDecimalVector(array, dataType.isUnsigned);
+        /*
+         * Do not invoke Vector.compress(…) if data are externally cached. Compressing
vectors is useful only when
+         * original array is discarded. But the UCAR library has its own cache mechanism
which may keep references
+         * to the original arrays. Consequently compressing vectors may result in data being
duplicated.
+         */
+        if (!isExternallyCached()) {
+            /*
+             * This method is usually invoked with vector of increasing or decreasing values.
Set a tolerance threshold to
+             * the precision of greatest (in magnitude) number, provided that this precision
is not larger than increment.
+             * If values are not sorted in increasing or decreasing order, then the tolerance
computed below may be smaller
+             * than optimal value. This is okay because it will cause more conservative compression
+             * (i.e. it does not increase the risk of data loss).
+             */
+            double tolerance = 0;
+            if (Numbers.isFloat(data.getElementType())) {
+                final int n = data.size() - 1;
+                if (n >= 0) {
+                    double first = data.doubleValue(0);
+                    double last  = data.doubleValue(n);
+                    double inc   = Math.abs((last - first) / n);
+                    if (!Double.isNaN(inc)) {
+                        double ulp = Math.ulp(Math.max(Math.abs(first), Math.abs(last)));
+                        tolerance = Math.min(inc, ulp);
+                    }
+                }
+            }
+            data = data.compress(tolerance);
+        }
+        values = SHARED_VECTORS.unique(data);
+        valuesAnyType = values;
+    }
+
+    /**
+     * Creates an array of character strings from a "two-dimensional" array of characters
stored in a flat array.
+     * For each element, leading and trailing spaces and control codes are trimmed.
+     * The array does not contain null element but may contain empty strings.
+     *
+     * @param  chars     the "two-dimensional" array of characters stored in a flat array.
+     * @param  count     number of string elements (size of first dimension).
+     * @param  length    number of characters in each element (size of second dimension).
+     * @param  encoding  conversion from bytes to characters.
+     * @return array of character strings.
+     */
+    private static String[] createStringArray(final byte[] chars, final int count, final
int length, final Charset encoding) {
+        final String[] strings = new String[count];
+        String previous = "";                       // For sharing same `String` instances
when same value is repeated.
+        int plo = 0, phi = 0;                       // Index range of bytes used for building
the previous string.
+        int lower = 0;
+        for (int i=0; i<count; i++) {
+            String element = "";
+            final int upper = lower + length;
+            for (int j=upper; --j >= lower;) {
+                if (Byte.toUnsignedInt(chars[j]) > ' ') {
+                    while (Byte.toUnsignedInt(chars[lower]) <= ' ') lower++;
+                    if (JDK9.equals(chars, lower, ++j, chars, plo, phi)) {
+                        element = previous;
+                    } else {
+                        element  = new String(chars, lower, j - lower, encoding);
+                        previous = element;
+                        plo      = lower;
+                        phi      = j;
+                    }
+                    break;
+                }
+            }
+            strings[i] = element;
+            lower = upper;
+        }
+        return strings;
+    }
+
+    /**
      * Wraps the given data in a {@link Vector} with the assumption that accuracy in base
10 matters.
      * This method is suitable for coordinate axis variables, but should not be used for
the main data.
      *
@@ -978,20 +1155,20 @@ public abstract class Variable extends Node {
      * @param  gridToCRS  the matrix in which to set scale and offset coefficient.
      * @param  srcDim     the source dimension, which is a dimension of the grid. Identifies
the matrix column of scale factor.
      * @param  tgtDim     the target dimension, which is a dimension of the CRS.  Identifies
the matrix row of scale factor.
-     * @param  values     the vector to use for computing scale and offset.
+     * @param  data       the vector to use for computing scale and offset.
      * @return whether this method has successfully set the scale and offset coefficients.
      * @throws IOException if an error occurred while reading the data.
      * @throws DataStoreException if a logical error occurred.
      */
-    protected boolean trySetTransform(final Matrix gridToCRS, final int srcDim, final int
tgtDim, final Vector values)
+    protected boolean trySetTransform(final Matrix gridToCRS, final int srcDim, final int
tgtDim, final Vector data)
             throws IOException, DataStoreException
     {
-        final int n = values.size() - 1;
+        final int n = data.size() - 1;
         if (n >= 0) {
-            final double first = values.doubleValue(0);
+            final double first = data.doubleValue(0);
             Number increment;
             if (n >= 1) {
-                final double last = values.doubleValue(n);
+                final double last = data.doubleValue(n);
                 double error;
                 if (getDataType() == DataType.FLOAT) {
                     error = Math.max(Math.ulp((float) first), Math.ulp((float) last));
@@ -999,7 +1176,7 @@ public abstract class Variable extends Node {
                     error = Math.max(Math.ulp(first), Math.ulp(last));
                 }
                 error = Math.max(Math.ulp(last - first), error) / n;
-                increment = values.increment(error);                        // May return
null.
+                increment = data.increment(error);                          // May return
null.
             } else {
                 increment = Double.NaN;
             }
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
index 4fb9046..2f4ce79 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
@@ -166,6 +166,7 @@ public final class ChannelDecoder extends Decoder {
      * its effect is local to that variable.
      *
      * @see #NAME_ENCODING
+     * @see #getEncoding()
      * @see #readValues(DataType, int)
      */
     private Charset encoding;
@@ -631,18 +632,6 @@ public final class ChannelDecoder extends Decoder {
         return variables;
     }
 
-    /**
-     * Checks and potentially modifies the content of this dataset for conventions other
than CF-conventions.
-     * This method should be invoked after construction for handling the particularities
of some datasets
-     * (HYCOM, …).
-     *
-     * @throws IOException if an error occurred while reading the channel.
-     * @throws DataStoreContentException if an error occurred while interpreting the netCDF
file content.
-     */
-    public final void applyOtherConventions() throws IOException, DataStoreContentException
{
-        HYCOM.convert(this, variables);
-    }
-
 
 
     // --------------------------------------------------------------------------------------------
@@ -883,6 +872,17 @@ public final class ChannelDecoder extends Decoder {
     }
 
     /**
+     * Returns the encoding for attribute or variable data.
+     * This is <strong>not</strong> the encoding of netCDF names.
+     *
+     * @return encoding of data (not the encoding of netCDF names).
+     */
+    @Override
+    public Charset getEncoding() {
+        return encoding;
+    }
+
+    /**
      * Returns all variables found in the netCDF file.
      * This method returns a direct reference to an internal array - do not modify.
      *
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
index aab54bd..ab58f0f 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
@@ -50,7 +50,6 @@ import org.apache.sis.util.collection.TreeTable;
 import org.apache.sis.util.CharSequences;
 import org.apache.sis.util.ArraysExt;
 import org.apache.sis.util.Classes;
-import org.apache.sis.util.Numbers;
 import org.apache.sis.measure.Units;
 import org.apache.sis.math.Vector;
 
@@ -138,6 +137,8 @@ final class VariableInfo extends Variable implements Comparable<VariableInfo>
{
 
     /**
      * The netCDF type of data, or {@code null} if unknown.
+     *
+     * @see #getDataType()
      */
     private final DataType dataType;
 
@@ -159,13 +160,6 @@ final class VariableInfo extends Variable implements Comparable<VariableInfo>
{
     boolean isCoordinateSystemAxis;
 
     /**
-     * The values of the whole variable, or {@code null} if not yet read. This vector should
be assigned only
-     * for relatively small variables, or for variables that are critical to the use of other
variables
-     * (for example the values in coordinate system axes).
-     */
-    private transient Vector values;
-
-    /**
      * Creates a new variable.
      *
      * @param  decoder     the netCDF file where this variable is stored.
@@ -572,78 +566,45 @@ final class VariableInfo extends Variable implements Comparable<VariableInfo>
{
     }
 
     /**
-     * Sets the values in this variable. The values are normally read from the netCDF file
by the {@link #read()} method,
-     * but this {@code setValues(Object)} method may also be invoked if we want to overwrite
those values.
-     *
-     * @param  array  the values as an array of primitive type (for example {@code float[]}.
-     */
-    final void setValues(final Object array) {
-        Vector data = createDecimalVector(array, dataType.isUnsigned);
-        /*
-         * This method is usually invoked with vector of increasing or decreasing values.
Set a tolerance threshold to the
-         * precision of greatest (in magnitude) number, provided that this precision is not
larger than increment. If values
-         * are not sorted in increasing or decreasing order, the tolerance computed below
will be smaller than it could be.
-         * This is okay since it will cause more conservative compression (i.e. it does not
increase the risk of data loss).
-         */
-        double tolerance = 0;
-        if (Numbers.isFloat(data.getElementType())) {
-            final int n = data.size() - 1;
-            if (n >= 0) {
-                double first = data.doubleValue(0);
-                double last  = data.doubleValue(n);
-                double inc   = Math.abs((last - first) / n);
-                if (!Double.isNaN(inc)) {
-                    double ulp = Math.ulp(Math.max(Math.abs(first), Math.abs(last)));
-                    tolerance = Math.min(inc, ulp);
-                }
-            }
-        }
-        values = data.compress(tolerance);
-        values = SHARED_VECTORS.unique(values);
-    }
-
-    /**
      * Reads all the data for this variable and returns them as an array of a Java primitive
type.
      * Multi-dimensional variables are flattened as a one-dimensional array (wrapped in a
vector).
      * Fill values/missing values are replaced by NaN if {@link #hasRealValues()} is {@code
true}.
      * The vector is cached and returned as-is in all future invocation of this method.
      *
      * @throws ArithmeticException if the size of the variable exceeds {@link Integer#MAX_VALUE},
or other overflow occurs.
+     *
+     * @see #read()
      */
     @Override
-    @SuppressWarnings("ReturnOfCollectionOrArrayField")
-    public Vector read() throws IOException, DataStoreContentException {
-        if (values == null) {
-            if (reader == null) {
-                throw new DataStoreContentException(unknownType());
-            }
-            final int    dimension   = dimensions.length;
-            final long[] lower       = new long[dimension];
-            final long[] upper       = new long[dimension];
-            final int [] subsampling = new int [dimension];
-            for (int i=0; i<dimension; i++) {
-                upper[i] = dimensions[(dimension - 1) - i].length();
-                subsampling[i] = 1;
-            }
-            final Region region = new Region(upper, lower, upper, subsampling);
-            applyUnlimitedDimensionStride(region);
-            Object array = reader.read(region);
-            replaceNaN(array);
-            /*
-             * If we can convert a double[] array to a float[] array, we should do that before
-             * to invoke 'setValues(array)' - we can not rely on data.compress(tolerance).
The
-             * reason is because we assume that float[] arrays are accurate in base 10 even
if
-             * the data were originally stored as doubles. The Vector class does not make
such
-             * assumption since it is specific to what we observe with netCDF files. To enable
-             * this assumption, we need to convert to float[] before createDecimalVector(…).
-             */
-            if (array instanceof double[]) {
-                final float[] copy = ArraysExt.copyAsFloatsIfLossless((double[]) array);
-                if (copy != null) array = copy;
-            }
-            setValues(array);
+    protected Object readFully() throws IOException, DataStoreContentException {
+        if (reader == null) {
+            throw new DataStoreContentException(unknownType());
         }
-        return values;
+        final int    dimension   = dimensions.length;
+        final long[] lower       = new long[dimension];
+        final long[] upper       = new long[dimension];
+        final int [] subsampling = new int [dimension];
+        for (int i=0; i<dimension; i++) {
+            upper[i] = dimensions[(dimension - 1) - i].length();
+            subsampling[i] = 1;
+        }
+        final Region region = new Region(upper, lower, upper, subsampling);
+        applyUnlimitedDimensionStride(region);
+        Object array = reader.read(region);
+        replaceNaN(array);
+        /*
+         * If we can convert a double[] array to a float[] array, we should do that before
+         * to invoke 'setValues(array)' - we can not rely on data.compress(tolerance). The
+         * reason is because we assume that float[] arrays are accurate in base 10 even if
+         * the data were originally stored as doubles. The Vector class does not make such
+         * assumption since it is specific to what we observe with netCDF files. To enable
+         * this assumption, we need to convert to float[] before createDecimalVector(…).
+         */
+        if (array instanceof double[]) {
+            final float[] copy = ArraysExt.copyAsFloatsIfLossless((double[]) array);
+            if (copy != null) array = copy;
+        }
+        return array;
     }
 
     /**
@@ -677,9 +638,6 @@ final class VariableInfo extends Variable implements Comparable<VariableInfo>
{
         if (reader == null) {
             throw new DataStoreContentException(unknownType());
         }
-        if (values != null) {
-            throw new DataStoreException();     // TODO: create a view.
-        }
         /*
          * NetCDF sorts datas in reverse dimension order. Example:
          *
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/ucar/DecoderWrapper.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/ucar/DecoderWrapper.java
index 36a6bda..64496d5 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/ucar/DecoderWrapper.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/ucar/DecoderWrapper.java
@@ -24,6 +24,8 @@ import java.util.Formatter;
 import java.util.Collection;
 import java.util.Collections;
 import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import ucar.nc2.Group;
 import ucar.nc2.Attribute;
 import ucar.nc2.VariableIF;
@@ -383,6 +385,17 @@ public final class DecoderWrapper extends Decoder implements CancelTask
{
     }
 
     /**
+     * Returns the encoding for attribute or variable data.
+     * This is <strong>not</strong> the encoding of netCDF names.
+     *
+     * @return encoding of data (not the encoding of netCDF names).
+     */
+    @Override
+    public Charset getEncoding() {
+        return StandardCharsets.ISO_8859_1;
+    }
+
+    /**
      * Returns all variables found in the netCDF file.
      * This method returns a direct reference to an internal array - do not modify.
      *
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/ucar/VariableWrapper.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/ucar/VariableWrapper.java
index 95ee819..96b0100 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/ucar/VariableWrapper.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/ucar/VariableWrapper.java
@@ -80,13 +80,6 @@ final class VariableWrapper extends Variable {
     private final VariableIF raw;
 
     /**
-     * The values of the whole variable, or {@code null} if not yet read. This vector should
be assigned only
-     * for relatively small variables, or for variables that are critical to the use of other
variables
-     * (for example the values in coordinate system axes).
-     */
-    private transient Vector values;
-
-    /**
      * {@code true} if this variable is an enumeration.
      */
     private final boolean isEnumeration;
@@ -466,24 +459,27 @@ final class VariableWrapper extends Variable {
     }
 
     /**
+     * Notifies the parent class that UCAR library may cache the values provided by this
variable.
+     * This is an indication that the parent class should not invoke {@link Vector#compress(double)}.
+     * Compressing vectors is useful only if the original array is discarded.
+     * But the UCAR library has its own cache mechanism which may keep references to the
original arrays.
+     * Consequently compressing vectors may result in data being duplicated.
+     */
+    @Override
+    protected boolean isExternallyCached() {
+        return true;
+    }
+
+    /**
      * Reads all the data for this variable and returns them as an array of a Java primitive
type.
      * Multi-dimensional variables are flattened as a one-dimensional array (wrapped in a
vector).
      * This method may replace fill/missing values by NaN values and caches the returned
vector.
+     *
+     * @see #read()
      */
     @Override
-    @SuppressWarnings("ReturnOfCollectionOrArrayField")
-    public Vector read() throws IOException {
-        if (values == null) {
-            final Array array = variable.read();                // May be already cached
by the UCAR library.
-            values = createDecimalVector(get1DJavaArray(array), variable.isUnsigned());
-            values = SHARED_VECTORS.unique(values);
-            /*
-             * Do not invoke Vector.compress(…). Compressing vectors is useful only if
the original array
-             * is discarded. But the UCAR library has its own cache mechanism which may keep
references to
-             * the original arrays. Consequently compressing vectors may result in data being
duplicated.
-             */
-        }
-        return values;
+    protected Object readFully() throws IOException {
+        return get1DJavaArray(variable.read());             // May be already cached by the
UCAR library.
     }
 
     /**
@@ -540,7 +536,7 @@ final class VariableWrapper extends Variable {
      * This method is invoked only for variables that represent a coordinate system axis.
      */
     @Override
-    protected boolean trySetTransform(final Matrix gridToCRS, final int srcDim, final int
tgtDim, final Vector values)
+    protected boolean trySetTransform(final Matrix gridToCRS, final int srcDim, final int
tgtDim, final Vector data)
             throws IOException, DataStoreException
     {
         if (variable instanceof CoordinateAxis1D) {
@@ -559,7 +555,7 @@ final class VariableWrapper extends Variable {
                  */
             }
         }
-        return super.trySetTransform(gridToCRS, srcDim, tgtDim, values);
+        return super.trySetTransform(gridToCRS, srcDim, tgtDim, data);
     }
 
     /**
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/NetcdfStoreProvider.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/NetcdfStoreProvider.java
index 4eb4f78..4a7d090 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/NetcdfStoreProvider.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/NetcdfStoreProvider.java
@@ -277,9 +277,7 @@ public class NetcdfStoreProvider extends DataStoreProvider {
         Object keepOpen;
         final ChannelDataInput input = connector.getStorageAs(ChannelDataInput.class);
         if (input != null) try {
-            final ChannelDecoder cd = new ChannelDecoder(input, connector.getOption(OptionKey.ENCODING),
geomlib, listeners);
-            cd.applyOtherConventions();
-            decoder = cd;
+            decoder = new ChannelDecoder(input, connector.getOption(OptionKey.ENCODING),
geomlib, listeners);
             keepOpen = input;
         } catch (DataStoreException | ArithmeticException e) {
             final String path = connector.getStorageAs(String.class);
@@ -297,6 +295,7 @@ public class NetcdfStoreProvider extends DataStoreProvider {
             decoder = createByReflection(keepOpen, true, geomlib, listeners);
         }
         connector.closeAllExcept(keepOpen);
+        decoder.applyOtherConventions();
         return decoder;
     }
 
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/HyperRectangleReader.java
b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/HyperRectangleReader.java
index 021c0f9..8f1aeb5 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/HyperRectangleReader.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/HyperRectangleReader.java
@@ -146,7 +146,7 @@ loop:       do {
                      * After we have read as much contiguous data as we can (may be a row,
or a plane, or
                      * a cube, etc. depending if we have to skip values or not between rows/planes/cubes),
                      * search the highest dimension which is going to change (i.e. are we
going to start a
-                     * new row, or a new plane, or a new cube?). This determine how many
bytes we have to
+                     * new row, or a new plane, or a new cube?). This determines how many
bytes we have to
                      * skip.
                      */
                     if (++cursor[i] < region.targetSize[contiguousDataDimension + i])
{


Mime
View raw message