sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject [sis] 01/03: More effort in estimating a tolerance threshold for vector compression.
Date Thu, 20 Dec 2018 15:26:56 GMT
This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git

commit 0bc82eac53ab17f20cc5f022d96c058b6737c5af
Author: Martin Desruisseaux <martin.desruisseaux@geomatys.com>
AuthorDate: Thu Dec 20 10:35:34 2018 +0100

    More effort in estimating a tolerance threshold for vector compression.
---
 .../org/apache/sis/internal/util/Numerics.java     | 42 ++++++++++++++++++++++
 .../src/main/java/org/apache/sis/math/Vector.java  |  9 +++--
 .../org/apache/sis/internal/util/NumericsTest.java | 13 +++++++
 .../sis/internal/netcdf/impl/VariableInfo.java     | 39 ++++++++++++++++++--
 4 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/core/sis-utility/src/main/java/org/apache/sis/internal/util/Numerics.java b/core/sis-utility/src/main/java/org/apache/sis/internal/util/Numerics.java
index 519d83b..757e519 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/internal/util/Numerics.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/internal/util/Numerics.java
@@ -20,6 +20,7 @@ import java.util.Map;
 import java.util.HashMap;
 import org.apache.sis.util.Debug;
 import org.apache.sis.util.Static;
+import org.apache.sis.util.ArraysExt;
 import org.apache.sis.util.ComparisonMode;
 import org.apache.sis.math.DecimalFunctions;
 import org.opengis.referencing.operation.Matrix;    // For javadoc
@@ -223,6 +224,47 @@ public final class Numerics extends Static {
     }
 
     /**
+     * Returns a copy of the given array where each value has been casted to the {@code float}
type,
+     * but only if this cast is lossless. If any cast causes data loss, then this method
returns {@code null}.
+     *
+     * @param  data  the array to copy.
+     * @return a copy of the given array with values casted to the {@code float} type,
+     *         or {@code null} if the cast would cause data lost.
+     */
+    public static float[] copyAsFloatsIfLossless(final double[] data) {
+        /*
+         * Before to allocate a new array, performs a quick sampling of a few values.
+         * Basically the first value, the last value, a value in the middle and a few others.
+         */
+        int i = data.length - 1;
+        if (i < 0) {
+            return ArraysExt.EMPTY_FLOAT;
+        }
+        for (;;) {
+            final double d = data[i];
+            if (Double.doubleToRawLongBits(d) != Double.doubleToRawLongBits((float) d)) {
+                return null;
+            }
+            if (i == 0) break;
+            i >>>= 1;
+        }
+        /*
+         * At this point the quick sampling found no data loss. We can now allocate the array,
+         * but we will still need to check for each value, which may interrupt the copy at
any time.
+         */
+        final float[] result = new float[data.length];
+        for (i = data.length; --i >= 0;) {
+            final double d = data[i];
+            final float  f = (float) d;
+            if (Double.doubleToRawLongBits(d) != Double.doubleToRawLongBits(f)) {
+                return null;
+            }
+            result[i] = f;
+        }
+        return result;
+    }
+
+    /**
      * Returns a copy of the given array where each value has been casted to the {@code float}
type.
      *
      * @param  data  the array to copy, or {@code null}.
diff --git a/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java b/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
index defac99..dcf9fac 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
@@ -709,7 +709,11 @@ search:     for (;;) {
              */
             if (type >= Numbers.FLOAT && type <= Numbers.DOUBLE) {
                 final double first = doubleValue(0);
-                final double inc = (doubleValue(--i) - first) / i;
+                double inc = (doubleValue(--i) - first) / i;                            
 // First estimation of increment.
+                final int pz = Math.max(0, Math.min(i, (int) Math.rint(-first / inc))); 
 // Presumed index of value zero.
+                if (doubleValue(pz) == 0) {
+                    inc = (pz == i) ? -doubleValue(pz-1) : doubleValue(pz+1);   // Presumed
less subject to rounding errors.
+                }
                 if (type == Numbers.FLOAT) {
                     while (i >= 1) {
                         final float  value = floatValue(i);
@@ -723,7 +727,8 @@ search:     for (;;) {
                     if (f == inc) return f;                            // Use the java.lang.Float
wrapper class if possible.
                 } else {
                     while (i >= 1) {
-                        if (!(Math.abs(first + inc*i - doubleValue(i--)) <= tolerance))
{       // Use '!' for catching NaN.
+                        final double delta = Math.abs(first + inc*i - doubleValue(i--));
+                        if (!(delta <= tolerance)) {                   // Use '!' for
catching NaN.
                             return null;
                         }
                     }
diff --git a/core/sis-utility/src/test/java/org/apache/sis/internal/util/NumericsTest.java
b/core/sis-utility/src/test/java/org/apache/sis/internal/util/NumericsTest.java
index dcaa21e..6e880b8 100644
--- a/core/sis-utility/src/test/java/org/apache/sis/internal/util/NumericsTest.java
+++ b/core/sis-utility/src/test/java/org/apache/sis/internal/util/NumericsTest.java
@@ -84,6 +84,19 @@ public final strictfp class NumericsTest extends TestCase {
     }
 
     /**
+     * Tests {@link Numerics#copyAsFloatsIfLossless(double[])}.
+     */
+    @Test
+    public void testCopyAsFloatsIfLossless() {
+        double[] array = {2, 0.5, 0.25, Double.NaN, Double.POSITIVE_INFINITY};
+        float[] result = Numerics.copyAsFloatsIfLossless(array);
+        assertNotNull(result);
+        assertArrayEquals(new float[] {2f, 0.5f, 0.25f, Float.NaN, Float.POSITIVE_INFINITY},
result, 0f);
+        array[3] = 0.3333333333333;
+        assertNull(Numerics.copyAsFloatsIfLossless(array));
+    }
+
+    /**
      * Tests {@link Numerics#isSimplePrecision(double[])}.
      */
     @Test
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
index 3fbf5ac..99c9e82 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
@@ -39,11 +39,13 @@ import org.apache.sis.internal.storage.io.ChannelDataInput;
 import org.apache.sis.internal.storage.io.HyperRectangleReader;
 import org.apache.sis.internal.storage.io.Region;
 import org.apache.sis.internal.util.StandardDateFormat;
+import org.apache.sis.internal.util.Numerics;
 import org.apache.sis.storage.DataStoreException;
 import org.apache.sis.storage.DataStoreContentException;
 import org.apache.sis.storage.netcdf.AttributeNames;
 import org.apache.sis.util.logging.WarningListeners;
 import org.apache.sis.util.CharSequences;
+import org.apache.sis.util.Numbers;
 import org.apache.sis.measure.Units;
 import org.apache.sis.math.Vector;
 
@@ -669,7 +671,27 @@ final class VariableInfo extends Variable implements Comparable<VariableInfo>
{
      * @param  array  the values as an array of primitive type (for example {@code float[]}.
      */
     final void setValues(final Object array) {
-        values = createDecimalVector(array, dataType.isUnsigned).compress(0);
+        Vector data = createDecimalVector(array, dataType.isUnsigned);
+        /*
+         * This method is usually invoked with vector of increasing or decreasing values.
 Set a tolerance threshold to the
+         * precision of gratest (in magnitude) number, provided that this precision is not
larger than increment. If values
+         * are not sorted in increasing or decreasing order, the tolerance computed below
will be smaller than it could be.
+         * This is okay it will cause more conservative compression (i.e. it does not increase
the risk of data loss).
+         */
+        double tolerance = 0;
+        if (Numbers.isFloat(data.getElementType())) {
+            final int n = data.size() - 1;
+            if (n >= 0) {
+                double first = data.doubleValue(0);
+                double last  = data.doubleValue(n);
+                double inc   = Math.abs((last - first) / n);
+                if (!Double.isNaN(inc)) {
+                    double ulp = Math.ulp(Math.max(Math.abs(first), Math.abs(last)));
+                    tolerance = Math.min(inc, ulp);
+                }
+            }
+        }
+        values = data.compress(tolerance);
     }
 
     /**
@@ -696,7 +718,20 @@ final class VariableInfo extends Variable implements Comparable<VariableInfo>
{
             }
             final Region region = new Region(upper, lower, upper, subsampling);
             applyUnlimitedDimensionStride(region);
-            setValues(reader.read(region));
+            Object array = reader.read(region);
+            /*
+             * If we can convert a double[] array to a float[] array, we should do that before
+             * to invoke 'setValues(array)' - we can not rely on data.compress(tolerance).
The
+             * reason is because we assume that float[] arrays are accurate in base 10 even
if
+             * the data were originally stored as doubles. The Vector class does not make
such
+             * assumption since it is specific to what we observe with netCDF files. To enable
+             * this assumption, we need to convert to float[] before createDecimalVector(…).
+             */
+            if (array instanceof double[]) {
+                final float[] copy = Numerics.copyAsFloatsIfLossless((double[]) array);
+                if (copy != null) array = copy;
+            }
+            setValues(array);
         }
         return values;
     }


Mime
View raw message