sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject [sis] branch geoapi-4.0 updated: Implement a mechanism for detecting repetitions in a vector. This is needed for simplifying grid geometries found in some netCDF files, for example HYCOM data.
Date Wed, 21 Nov 2018 18:57:26 GMT
This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git


The following commit(s) were added to refs/heads/geoapi-4.0 by this push:
     new bff02e8  Implement a mechanism for detecting repetitions in a vector. This is needed for simplifying grid geometries found in some netCDF files, for example HYCOM data.
bff02e8 is described below

commit bff02e8de92425654ac0ca09f5b17910a079f1af
Author: Martin Desruisseaux <martin.desruisseaux@geomatys.com>
AuthorDate: Wed Nov 21 19:56:13 2018 +0100

    Implement a mechanism for detecting repetitions in a vector. This is needed for simplifying grid geometries found in some netCDF files, for example HYCOM data.
---
 .../org/apache/sis/internal/metadata/Merger.java   |   2 +-
 .../main/java/org/apache/sis/math/ArrayVector.java |  87 ++++++-
 .../java/org/apache/sis/math/RepeatedVector.java   | 268 ++++++++++++++++++++
 .../src/main/java/org/apache/sis/math/Vector.java  | 274 ++++++++++++++++++---
 .../org/apache/sis/math/RepeatedVectorTest.java    | 126 ++++++++++
 .../test/java/org/apache/sis/math/VectorTest.java  |  32 ++-
 .../apache/sis/test/suite/UtilityTestSuite.java    |   1 +
 .../org/apache/sis/storage/StorageConnector.java   |   4 +-
 8 files changed, 758 insertions(+), 36 deletions(-)

diff --git a/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/Merger.java b/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/Merger.java
index b1438f2..f8bf6ec 100644
--- a/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/Merger.java
+++ b/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/Merger.java
@@ -144,7 +144,7 @@ public class Merger {
         /*
          * Only after we verified that the merge operation is theoretically allowed, remember that
          * we are going to merge those two metadata and verify that we are not in an infinite loop.
-         * We will also verify that the target metadata does not contain a source, or vis-versa.
+         * We will also verify that the target metadata does not contain a source, or vice-versa.
          */
         {   // For keeping 'sourceDone' and 'targetDone' more local.
             final Boolean sourceDone = done.put(source, Boolean.FALSE);
diff --git a/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java b/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java
index d945a1a..4e4e032 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/math/ArrayVector.java
@@ -17,6 +17,7 @@
 package org.apache.sis.math;
 
 import java.io.Serializable;
+import java.util.Arrays;
 import java.util.function.IntSupplier;
 import org.apache.sis.util.Numbers;
 import org.apache.sis.util.resources.Errors;
@@ -27,10 +28,10 @@ import org.apache.sis.measure.NumberRange;
 
 /**
  * A vector backed by an array of a primitive type. This class does not copy the array,
- * so changes in the underlying array is reflected in this vector and vis-versa.
+ * so changes in the underlying array is reflected in this vector and vice-versa.
  *
  * @author  Martin Desruisseaux (MPO, Geomatys)
- * @version 0.8
+ * @version 1.0
  * @since   0.8
  * @module
  */
@@ -243,6 +244,13 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return old;
         }
 
+        /** Finds index of a match or mismatch (depending on {@code equality}). */
+        @Override int indexOf(final int toSearch, int index, final boolean equality) {
+            final long first = Double.doubleToLongBits(array[toSearch]);
+            while (index < array.length && (first == Double.doubleToLongBits(array[index])) != equality) index++;
+            return index;
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override NumberRange<Double> range(final IntSupplier indices, int n) {
             double min = Double.POSITIVE_INFINITY;
@@ -264,6 +272,11 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
         @Override public float[] floatValues() {
             return Numerics.copyAsFloats(array);
         }
+
+        /** Applies hash code contract specified {@link Vector#hashCode()}. */
+        @Override public int hashCode() {
+            return Arrays.hashCode(array);
+        }
     }
 
     /**
@@ -317,6 +330,13 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return old;
         }
 
+        /** Finds index of a match or mismatch (depending on {@code equality}). */
+        @Override int indexOf(final int toSearch, int index, final boolean equality) {
+            final int first = Float.floatToIntBits(array[toSearch]);
+            while (index < array.length && (first == Float.floatToIntBits(array[index])) != equality) index++;
+            return index;
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override final NumberRange<?> range(final IntSupplier indices, int n) {
             float min = Float.POSITIVE_INFINITY;
@@ -342,6 +362,11 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
         @Override public final float[] floatValues() {
             return array.clone();
         }
+
+        /** Applies hash code contract specified {@link Vector#hashCode()}. */
+        @Override public int hashCode() {
+            return Arrays.hashCode(array);
+        }
     }
 
     /**
@@ -373,6 +398,16 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return NumberRange.create(DecimalFunctions.floatToDouble(min), true,
                                       DecimalFunctions.floatToDouble(max), true);
         }
+
+        /** Applies hash code contract specified {@link Vector#hashCode()}. */
+        @Override public int hashCode() {
+            int hash = 0;
+            final int size = size();
+            for (int i=0; i<size; i++) {
+                hash = PRIME * hash + Double.hashCode(doubleValue(i));
+            }
+            return hash;
+        }
     }
 
     /**
@@ -419,6 +454,13 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return old;
         }
 
+        /** Finds index of a match or mismatch (depending on {@code equality}). */
+        @Override final int indexOf(final int toSearch, int index, final boolean equality) {
+            final long first = array[toSearch];
+            while (index < array.length && (first == array[index]) != equality) index++;
+            return index;
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override NumberRange<?> range(final IntSupplier indices, int n) {
             long min = Long.MAX_VALUE;
@@ -459,6 +501,11 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             }
             return null;
         }
+
+        /** Applies hash code contract specified {@link Vector#hashCode()}. */
+        @Override public final int hashCode() {
+            return Arrays.hashCode(array);
+        }
     }
 
     /**
@@ -506,6 +553,13 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return old;
         }
 
+        /** Finds index of a match or mismatch (depending on {@code equality}). */
+        @Override final int indexOf(final int toSearch, int index, final boolean equality) {
+            final int first = array[toSearch];
+            while (index < array.length && (first == array[index]) != equality) index++;
+            return index;
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override NumberRange<?> range(final IntSupplier indices, int n) {
             int min = Integer.MAX_VALUE;
@@ -549,6 +603,11 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             }
             return null;
         }
+
+        /** Applies hash code contract specified {@link Vector#hashCode()}. */
+        @Override public final int hashCode() {
+            return Arrays.hashCode(array);
+        }
     }
 
     /**
@@ -597,6 +656,13 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return old;
         }
 
+        /** Finds index of a match or mismatch (depending on {@code equality}). */
+        @Override final int indexOf(final int toSearch, int index, final boolean equality) {
+            final short first = array[toSearch];
+            while (index < array.length && (first == array[index]) != equality) index++;
+            return index;
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override NumberRange<?> range(final IntSupplier indices, int n) {
             short min = Short.MAX_VALUE;
@@ -614,6 +680,11 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
          * (except if the increment is zero) and the implicit conversion of 'short' to 'int'
          * performed by Java would make the implementation a little bit more tricky.
          */
+
+        /** Applies hash code contract specified {@link Vector#hashCode()}. */
+        @Override public final int hashCode() {
+            return Arrays.hashCode(array);
+        }
     }
 
     /**
@@ -663,6 +734,13 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
             return old;
         }
 
+        /** Finds index of a match or mismatch (depending on {@code equality}). */
+        @Override final int indexOf(final int toSearch, int index, final boolean equality) {
+            final byte first = array[toSearch];
+            while (index < array.length && (first == array[index]) != equality) index++;
+            return index;
+        }
+
         /** Finds the minimum and maximum values in the array or in a subset of the array. */
         @Override NumberRange<?> range(final IntSupplier indices, int n) {
             byte min = Byte.MAX_VALUE;
@@ -680,6 +758,11 @@ abstract class ArrayVector<E extends Number> extends Vector implements CheckedCo
          * (except if the increment is zero) and the implicit conversion of 'byte' to 'int'
          * performed by Java would make the implementation a little bit more tricky.
          */
+
+        /** Applies hash code contract specified {@link Vector#hashCode()}. */
+        @Override public final int hashCode() {
+            return Arrays.hashCode(array);
+        }
     }
 
     /**
diff --git a/core/sis-utility/src/main/java/org/apache/sis/math/RepeatedVector.java b/core/sis-utility/src/main/java/org/apache/sis/math/RepeatedVector.java
new file mode 100644
index 0000000..c50fa0e
--- /dev/null
+++ b/core/sis-utility/src/main/java/org/apache/sis/math/RepeatedVector.java
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sis.math;
+
+import java.io.Serializable;
+import java.util.function.IntSupplier;
+import org.apache.sis.measure.NumberRange;
+import org.apache.sis.util.ArgumentChecks;
+import org.apache.sis.util.resources.Errors;
+
+
+/**
+ * A vector whose values are the repetitions of the values given in a base vector.
+ * This vector can be created as a result of {@link Vector#compress(double)}.
+ * The intent is to handle the cases found in netCDF files where localization grids
+ * (e.g. the {@code "longitude"} variable storing longitude values of all points in a grid)
+ * contains a lot of repetitions.
+ *
+ * <p>{@link #cycleLength} is usually the length of the {@linkplain #base} vector, but not necessarily.
+ * If {@link #occurrences} = 1 and {@code cycleLength} = 4 for example, then this class handles repetitions like below:</p>
+ *
+ * {@preformat text
+ *    10 12 15 20
+ *    10 12 15 20    ← new cycle
+ *    10 12 15 20    ← new cycle
+ *    10 12 15 20    ← new cycle
+ *    …etc…
+ * }
+ *
+ * If {@link #occurrences} &gt; 1, then this class handles repetitions in a different way
+ * (in this example, {@link #cycleLength} is still 4):
+ *
+ * {@preformat text
+ *    10 10 10 10
+ *    12 12 12 12
+ *    15 15 15 15
+ *    20 20 20 20
+ *    10 10 10 10    ← new cycle
+ *    12 12 12 12
+ *    …etc…
+ * }
+ *
+ * @author  Martin Desruisseaux (Geomatys)
+ * @version 1.0
+ * @since   1.0
+ * @module
+ */
+final class RepeatedVector extends Vector implements Serializable {
+    /**
+     * For cross-version compatibility.
+     */
+    private static final long serialVersionUID = 3607036775685492552L;
+
+    /**
+     * The vector on which this vector is derived from.
+     */
+    private final Vector base;
+
+    /**
+     * Number of times that each {@linkplain #base} element appears in a row before to move
+     * to the next {@code base} element. See class javadoc for more information.
+     */
+    private final int occurrences;
+
+    /**
+     * Length of the sequence of values to repeat, after conversion to base vector indices.
+     * Usually equals to the length of the {@linkplain #base} vector but can also be smaller.
+     * Shall not be greater than {@code base.size()}. See class javadoc for more information.
+     */
+    private final int cycleLength;
+
+    /**
+     * The size of this vector.
+     * This is often {@link #cycleLength} × {@link #occurrences}, but not necessarily.
+     */
+    private final int size;
+
+    /**
+     * Creates a new vector of repeated data.
+     *
+     * @param base         the vector on which this vector is derived from.
+     * @param occurrences  number of time that each element is repeated.
+     * @param cycleLength  length of the sequence of values to repeat.
+     * @param size         this vector size, usually {@code base.size() * repetition}.
+     */
+    private RepeatedVector(final Vector base, final int occurrences, final int cycleLength, final int size) {
+        this.base        = base;
+        this.occurrences = occurrences;
+        this.cycleLength = cycleLength;
+        this.size        = size;
+        assert cycleLength <= base.size() : cycleLength;
+    }
+
+    /**
+     * Creates a vector of repeated data from the result of a call to {@link Vector#repetitions()}.
+     *
+     * @param base         the vector on which this vector is derived from.
+     * @param repetitions  results of {@link Vector#repetitions()}. Must be non-empty.
+     * @param tolerance    tolerance factor for compression of the base vector.
+     */
+    RepeatedVector(final Vector base, final int[] repetitions, final double tolerance) {
+        size        = base.size();
+        occurrences = repetitions[0];
+        cycleLength = (repetitions.length >= 2) ? repetitions[1] : size / occurrences;
+        this.base   = base.subSampling(0, occurrences, cycleLength).compress(tolerance);
+    }
+
+    /**
+     * Converts the given index from this vector domain to an index in the {@linkplain #base} vector.
+     */
+    private int toBase(final int index) {
+        ArgumentChecks.ensureValidIndex(size, index);
+        return (index / occurrences) % cycleLength;
+    }
+
+    /**
+     * Returns the type of values, which is inherited from the {@linkplain #base} vector.
+     */
+    @Override
+    public final Class<? extends Number> getElementType() {
+        return base.getElementType();
+    }
+
+    /** Forwards to the base vector. */
+    @Override public final boolean isInteger()        {return base.isInteger();}
+    @Override public final boolean isUnsigned()       {return base.isUnsigned();}
+    @Override public final int     size()             {return size;}
+    @Override public final boolean isNaN      (int i) {return base.isNaN      (toBase(i));}
+    @Override public final double  doubleValue(int i) {return base.doubleValue(toBase(i));}
+    @Override public final float   floatValue (int i) {return base.floatValue (toBase(i));}
+    @Override public final long    longValue  (int i) {return base.longValue  (toBase(i));}
+    @Override public final int     intValue   (int i) {return base.intValue   (toBase(i));}
+    @Override public final short   shortValue (int i) {return base.shortValue (toBase(i));}
+    @Override public final byte    byteValue  (int i) {return base.byteValue  (toBase(i));}
+    @Override public final String  stringValue(int i) {return base.stringValue(toBase(i));}
+    @Override public final Number  get        (int i) {return base.get        (toBase(i));}
+
+    /**
+     * The range of values in this vector is the range of values in the {@linkplain #base} vector
+     * if we use all its data.
+     */
+    @Override
+    public final NumberRange<?> range() {
+        return (cycleLength == base.size()) ? base.range() : super.range();
+    }
+
+    /**
+     * Overridden for efficiency in case {@link #base} itself overrides that method.
+     * Overriding that method is optional; the default implementation would have worked.
+     */
+    @Override
+    final NumberRange<?> range(final IntSupplier indices, final int count) {
+        return base.range(() -> toBase(indices.getAsInt()), count);
+    }
+
+    /**
+     * Do not allow setting values.
+     */
+    @Override
+    public final Number set(int index, Number value) {
+        throw new UnsupportedOperationException(Errors.format(Errors.Keys.CanNotStoreInVector_1, value));
+    }
+
+    /**
+     * Returns the parameters used by this {@code RepeatedVector} instance on the assumption
+     * that they are the result of a previous invocation to {@link Vector#repetitions()}.
+     */
+    @Override
+    public int[] repetitions() {
+        if (cycleLength * occurrences >= size) {
+            return new int[] {occurrences};
+        } else {
+            return new int[] {occurrences, cycleLength};
+        }
+    }
+
+    /**
+     * Returns {@code null} since the repetition of a sequence of numbers implies that there is no regular increment.
+     * An exception to this rule would be if the {@linkplain #base} vector contains a constant value or if the repetition
+     * is exactly 1, but we should not have created a {@code RepeatedVector} in such cases.
+     */
+    @Override
+    public final Number increment(final double tolerance) {
+        return null;
+    }
+
+    /**
+     * Returns {@code this} since this vector is considered already compressed.
+     */
+    @Override
+    @SuppressWarnings("ReturnOfCollectionOrArrayField")
+    public Vector compress(final double tolerance) {
+        return this;
+    }
+
+    /**
+     * Informs {@link #pick(int...)} that this vector is backed by another vector.
+     */
+    @Override
+    @SuppressWarnings("ReturnOfCollectionOrArrayField")
+    final Vector backingVector() {
+        return base;
+    }
+
+    /**
+     * Converts an array of indexes used by this vector to the indexes used by the backing vector.
+     * This method must also check index validity.
+     */
+    @Override
+    final int[] toBacking(int[] indices) {
+        indices = indices.clone();
+        for (int i=0; i<indices.length; i++) {
+            indices[i] = toBase(indices[i]);
+        }
+        return indices;
+    }
+
+    /**
+     * Implementation of {@link #subSampling(int,int,int)}.
+     * Arguments validity has been verified by the caller.
+     *
+     * @param  first   index of the first value to be included in the returned view.
+     * @param  step    the index increment in this vector between two consecutive values
+     *                 in the returned vector. Can be positive, zero or negative.
+     * @param  length  the length of the vector to be returned. Can not be greater than
+     *                 the length of this vector, except if the {@code step} is zero.
+     */
+    @Override
+    @SuppressWarnings("ReturnOfCollectionOrArrayField")
+    Vector createSubSampling(final int first, final int step, final int length) {
+        /*
+         * If the sub-range is such that there is no more repetition,
+         * return the base vector (or a sub-range of it) directly.
+         */
+        if ((step % occurrences) == 0) {
+            final int bs    = step  / occurrences;                      // Step in the base vector.
+            final int lower = first / occurrences;                      // First index in the base vector (inclusive).
+            final int upper = lower + (length-1) * bs;                  // Last index in the base vector (inclusive).
+            if (lower >= 0 && lower <= upper && upper < base.size()
+                    && (lower / cycleLength) == (upper / cycleLength))  // Lower et upper must be member of the same cycle.
+            {
+                return base.subSampling(lower, bs, length);
+            }
+        }
+        /*
+         * We still have repetitions. Return another RepeatedVector if possible.
+         * Fallback on SubSampling wrapper only in last resort.
+         */
+        if (first < occurrences && (occurrences % step) == 0) {
+            return new RepeatedVector(base, occurrences / step, cycleLength, length);
+        }
+        return super.createSubSampling(first, step, length);
+    }
+}
diff --git a/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java b/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
index a1e649d..355318f 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/math/Vector.java
@@ -23,6 +23,7 @@ import java.util.RandomAccess;
 import java.util.function.IntSupplier;
 import org.apache.sis.measure.NumberRange;
 import org.apache.sis.util.Numbers;
+import org.apache.sis.util.ArraysExt;
 import org.apache.sis.util.ArgumentChecks;
 import org.apache.sis.util.logging.Logging;
 import org.apache.sis.util.resources.Errors;
@@ -45,7 +46,7 @@ import static org.apache.sis.util.ArgumentChecks.ensureValidIndex;
  *
  * <div class="section">Instantiation</div>
  * Instances of {@code Vector} are usually created by calls to the {@link #create(Object, boolean)} static method.
- * The supplied array is not cloned – changes to the primitive array are reflected in the vector, and vis-versa.
+ * The supplied array is not cloned – changes to the primitive array are reflected in the vector, and vice-versa.
  * Vectors can be a view over a subsection of the given array, or can provide a view of the elements in reverse order,
  * <i>etc</i>. The example below creates a view over a subsection:
  *
@@ -84,7 +85,7 @@ import static org.apache.sis.util.ArgumentChecks.ensureValidIndex;
  * without concern about whether the data were really stored as {@code double} or as {@code float} values.</div>
  *
  * @author  Martin Desruisseaux (MPO, Geomatys)
- * @version 0.8
+ * @version 1.0
  *
  * @see org.apache.sis.util.collection.IntegerList
  *
@@ -104,7 +105,7 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
      * </ul>
      *
      * The given argument is not cloned.
-     * Consequently changes in the underlying array are reflected in this vector, and vis-versa.
+     * Consequently changes in the underlying array are reflected in this vector, and vice-versa.
      *
      * <div class="section">Unsigned integers</div>
      * Java has no primitive support for unsigned integers. But some file formats use unsigned integers,
@@ -227,6 +228,18 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
     public abstract Class<? extends Number> getElementType();
 
     /**
+     * Returns an estimation of the number of bits used by each value in this vector.
+     * This is an estimation only and should be used only as a hint.
+     */
+    private int getBitCount() {
+        try {
+            return Numbers.primitiveBitCount(getElementType());
+        } catch (IllegalArgumentException e) {
+            return Integer.SIZE;                    // Assume references compressed on 32 bits.
+        }
+    }
+
+    /**
      * Returns {@code true} if this vector contains only integer values.
      * This method may iterate over all values for performing this verification.
      *
@@ -454,6 +467,127 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
     public abstract Number set(int index, Number value);
 
     /**
+     * Returns the index of the first value which is equal (if {@code equality} is true)
+     * or different (if {@code equality} is false) to the value at the {@code toSearch} index.
+     * Subclasses should override if they can provide a more efficient implementation.
+     *
+     * @param  toSearch   index of the value to search.
+     * @param  index      index of the first value where to start the search.
+     * @param  equality   whether we search the first equal value, or the first different value.
+     * @return index of the value found, or the vector size if the value has not been found.
+     */
+    int indexOf(final int toSearch, int index, final boolean equality) {
+        final Number first = get(toSearch);
+        final int size = size();
+        while (index < size && first.equals(get(index)) != equality) index++;
+        return index;
+    }
+
+    /**
+     * Detects repetition patterns in the values contained in this vector. The repetitions detected by this method are
+     * patterns that at repeated at a regular interval on the whole vector; this method does not search for repetitions
+     * occurring at irregular intervals. This method returns an array of typically 0, 1 or 2 elements where zero element
+     * means that no repetition has been found, one element describes a repetition (see the example below), and two elements
+     * describes a repetition of the repetitions (examples below). More elements (deeper recursivity) are theoretically
+     * possible but not yet implemented.
+     *
+     * <p>If the values in this vector are of the form (<var>x</var>, <var>x</var>, …, <var>x</var>, <var>y</var>, <var>y</var>,
+     * …, <var>y</var>, <var>z</var>, <var>z</var>, …, <var>z</var>, …), then the first integer in the returned array is the
+     * number of consecutive <var>x</var> values before the <var>y</var> values. That number of occurrences must be the same
+     * than the number of consecutive <var>y</var> values before the <var>z</var> values, the number of consecutive <var>z</var>
+     * values before the next values, and so on until the end of the vector.</p>
+     *
+     * <div class="note"><b>Examples:</b>
+     * in the following vector, each value is repeated 3 times. So the array returned by this method would be {@code {4}},
+     * meaning that the first number appears 4 times, followed by a new number appearing 4 times, followed by a new number
+     * appearing 4 times, and so on until the end of the vector.
+     *
+     * {@preformat text
+     *    10, 10, 10, 10,
+     *    12, 12, 12, 12,
+     *    15, 15, 15, 15
+     * }</div>
+     *
+     * For the next level (the second integer in the returned array), this method represents above repetitions by single entities
+     * then reapplies the same repetition detection. This method processes has if the (<var>x</var>, <var>x</var>, …, <var>x</var>,
+     * <var>y</var>, <var>y</var>, …, <var>y</var>, <var>z</var>, <var>z</var>, …, <var>z</var>, …) vector was replaced by a new
+     * (<b>x</b>, <b>y</b>, <b>z</b>, …) vector, then the same detection algorithm was applied recursively.
+     *
+     * <div class="note"><b>Examples:</b>
+     * in the following vector, each value is repeated 2 times, then the sequence of 12 values is itself repeated 2 times.
+     * So the array returned by this method would be {@code {3,4}}, meaning that the first number appears 3 times, followed
+     * by a new number appearing 3 times, <i>etc.</i> until we counted 4 groups of 3 numbers. Then the whole sequence is
+     * repeated until the end of the vector.
+     *
+     * {@preformat text
+     *    10, 10, 10,  12, 12, 12,  15, 15, 15,  18, 18, 18,
+     *    10, 10, 10,  12, 12, 12,  15, 15, 15,  18, 18, 18,
+     *    10, 10, 10,  12, 12, 12,  15, 15, 15,  18, 18, 18
+     * }</div>
+     *
+     * <p>This method is useful for analyzing the localization grid provided by some files (for example in netCDF format).
+     * Those grids sometime have constant longitude for the same column index, or constant latitude for the same row index.
+     * This method can detect such regularity, which allows more efficient handling of the <cite>grid to CRS</cite> transform.</p>
+     *
+     * @return the number of times that entities (numbers, or group of numbers) appears consecutively with identical values.
+     *         If no such repetition is found, an empty array.
+     *
+     * @since 1.0
+     */
+    public int[] repetitions() {
+        final int size = size();
+        if (size >= 2) {
+            /*
+             * For the firt level of repetitions, we rely on a method to be overridden by subclasses
+             * for detecting the length of consecutive identical numbers. We could have use the more
+             * generic algorithm based on 'equals(int, int, Vector, int)' instead, but this approach
+             * is faster.
+             */
+            int r0 = 0;
+            for (int i=0; i < size; i += r0) {
+                final int p = r0;
+                r0 = indexOf(i, i+1, false) - i;
+                if (r0 <= 1 || (p % r0) != 0) {
+                    r0 = 1;
+                    break;
+                }
+            }
+            /*
+             * At this point r0 is the number of identical consecutive numbers in vectors like (x,x,x, y,y,y, z,z,z)
+             * and shall not be modified anymore for the rest of this method. This is the first integer value in the
+             * array to be returned. Following algorithm applies to deeper levels.
+             */
+            final int skip = (r0 == 1) ? 1 : 0;     // Optimization (code below would work with skip = 0 all the times).
+            int r = 0;
+nextMatch:  for (;;) {
+                r += r0;
+                if (skip != 0) {
+                    r = indexOf(0, r, true);        // Optimization for reducing the number of method calls when r0 = 1.
+                }
+                if (r >= size) break;
+                if (equals(skip, Math.min(r0, size - r), this, r + skip)) {
+                    /*
+                     * Found a possible repetition of length r. Verify if this repetition pattern is observed until
+                     * the end of the vector. If not, we will search for the next possible repetition.
+                     */
+                    for (int i=r; i<size; i += r) {
+                        if (!equals(0, Math.min(r, size - i), this, i)) {
+                            continue nextMatch;
+                        }
+                    }
+                    break;      // At this point we verified that the repetition is observed until the vector end.
+                }
+            }
+            if (r < size) {
+                return new int[] {r0, r / r0};
+            } else if (r0 != 1) {
+                return new int[] {r0};
+            }
+        }
+        return ArraysExt.EMPTY_INT;
+    }
+
+    /**
      * Returns {@code a-b} as a signed value, throwing an exception if the result overflows a {@code long}.
      * The given values will be interpreted as unsigned values if this vector {@linkplain #isUnsigned() is unsigned}.
      *
@@ -600,7 +734,7 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
      * the element at index <code>(first + step*<var>i</var>)</code> in this vector.
      *
      * <p>This method does not copy the values. Consequently any modification to the
-     * values of this vector will be reflected in the returned view and vis-versa.</p>
+     * values of this vector will be reflected in the returned view and vice-versa.</p>
      *
      * @param  first   index of the first value to be included in the returned view.
      * @param  step    the index increment in this vector between two consecutive values
@@ -613,9 +747,25 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
      */
     @SuppressWarnings("ReturnOfCollectionOrArrayField")
     public Vector subSampling(final int first, final int step, final int length) {
-        if (step == 1 && first == 0 && length == size()) {
+        final int size = size();
+        if (step == 1 && first == 0 && length == size) {
             return this;
         }
+        final long last = first + step * (length - 1L);
+        if (first < 0 || first >= size || last < 0 || last >= size || length < 0) {
+            final short key;
+            final Object arg1, arg2;
+            if (step == 1) {
+                key  = Errors.Keys.IllegalRange_2;
+                arg1 = first;
+                arg2 = last;
+            } else {
+                key  = Errors.Keys.IllegalArgumentValue_2;
+                arg1 = "range";
+                arg2 = "[" + first + ':' + step + ':' + last + ']';
+            }
+            throw new IndexOutOfBoundsException(Errors.format(key, arg1, arg2));
+        }
         return createSubSampling(first, step, length);
     }
 
@@ -644,8 +794,7 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
         final int length;
 
         /** Creates a new view over the given range. */
-        protected SubSampling(final int first, final int step, final int length) {
-            ensureValid(first, step, length);
+        SubSampling(final int first, final int step, final int length) {
             this.first  = first;
             this.step   = step;
             this.length = length;
@@ -771,7 +920,7 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
     /**
      * Returns a view which contains the values of this vector at the given indexes.
      * This method does not copy the values, consequently any modification to the
-     * values of this vector will be reflected in the returned view and vis-versa.
+     * values of this vector will be reflected in the returned view and vice-versa.
      *
      * <p>The indexes do not need to be in any particular order. The same index can be repeated
      * more than once. Thus it is possible to create a vector larger than the original vector.</p>
@@ -871,7 +1020,6 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
 
         /** Delegates to the enclosing vector. */
         @Override Vector createSubSampling(int first, final int step, final int length) {
-            ensureValid(first, step, length);
             final int[] ni = new int[length];
             if (step == 1) {
                 System.arraycopy(indices, first, ni, 0, length);
@@ -909,26 +1057,6 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
     }
 
     /**
-     * Ensures that the range created from the given parameters is valid.
-     */
-    static void ensureValid(final int first, final int step, final int length) {
-        if (length < 0) {
-            final short key;
-            final Object arg1, arg2;
-            if (step == 1) {
-                key  = Errors.Keys.IllegalRange_2;
-                arg1 = first;
-                arg2 = first + length;
-            } else {
-                key  = Errors.Keys.IllegalArgumentValue_2;
-                arg1 = "range";
-                arg2 = "[" + first + ':' + step + ':' + (first + step*length) + ']';
-            }
-            throw new IllegalArgumentException(Errors.format(key, arg1, arg2));
-        }
-    }
-
-    /**
      * Returns the concatenation of this vector with the given one. Indexes in the [0 … {@link #size() size} - 1]
      * range will map to this vector, while indexes in the [{@code size} … {@code size} + {@code toAppend.size}]
      * range while map to the given vector.
@@ -991,7 +1119,7 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
      * @param  tolerance  maximal difference allowed between original and compressed vectors (can be zero).
      * @return a more compact vector with the same data than this vector, or {@code this}.
      */
-    @SuppressWarnings("ReturnOfCollectionOrArrayField")
+    @SuppressWarnings({"fallthrough", "ReturnOfCollectionOrArrayField"})
     public Vector compress(final double tolerance) {
         final int length = size();
         final Number inc = increment(tolerance);
@@ -1009,6 +1137,23 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
             return new SequenceVector.Doubles(getElementType(), NaN, NaN, length);
         } while (isNaN(i++));
         /*
+         * Verify if the vector contains repetitions. If yes, then we can keep only a subregion of this vector.
+         * The thresholds below are arbitrary; they are used for deciding if it is worth to remove repetitions,
+         * keeping in mind that RepeatedVector consumes about 8 words in memory in addition to the base vector.
+         * Assuming that RepeatedVector divides the vector length by 2, we need at least 16 integers before to
+         * compensate. Another threshold is to verify that we do not see a repetition because some values from
+         * the vector beginning appears at the vector end. As an arbitrary threshold, the repetition at vector
+         * end must be at least 1/4 of the vector size.
+         */
+        if (length > 20*Integer.SIZE / getBitCount()) {
+            final int[] repetitions = repetitions();
+            switch (repetitions.length) {
+                default: if (length - repetitions[1] < length/4) break;               // Otherwise fallthrough.
+                case 1:  return new RepeatedVector(this, repetitions, tolerance);
+                case 0:  break;
+            }
+        }
+        /*
          * Try to copy the values in a more compact format.
          * We will use a vector backed by IntegerList in order to use only the amount of bits needed,
          * unless that amount is exactly the number of bits of a primitive type (8, 16, 32 or 64) in
@@ -1103,4 +1248,73 @@ public abstract class Vector extends AbstractList<Number> implements RandomAcces
         }
         return buffer.append(']').toString();
     }
+
+    /**
+     * The prime number used in hash code computation. Must be the same than the prime number used
+     * in {@link Arrays#hashCode(Object[])} computation. More generally, or {@link #hashCode()}
+     * implementations must be the same than {@code hashCode(…)} implementations in {@link Arrays}.
+     */
+    static final int PRIME = 31;
+
+    /**
+     * Returns a hash code for the values in this vector. The hash code is computed as if this vector was converted
+     * to an array of {@link Number}s, then the {@link Arrays#hashCode(Object[])} method invoked for that array.
+     *
+     * @return a hash code value for the values in this vector.
+     *
+     * @since 1.0
+     */
+    @Override
+    public int hashCode() {
+        int hash = 0;
+        final int size = size();
+        for (int i=0; i<size; i++) {
+            hash = PRIME * hash + get(i).hashCode();
+        }
+        return hash;
+    }
+
+    /**
+     * Returns {@code true} if the given object is a vector containing the same values than this vector.
+     * This method performs the comparison as if the two vectors where converted to arrays of {@link Number}s,
+     * then the {@link Arrays#equals(Object[], Object[])} method invoked for those arrays.
+     *
+     * @param  object  the other object to compare with this vector.
+     * @return {@code true} if the given object is a vector containing the same values than this vector.
+     *
+     * @since 1.0
+     */
+    @Override
+    public boolean equals(final Object object) {
+        if (object == this) return true;
+        if (object instanceof Vector) {
+            final Vector other = (Vector) object;
+            final int size = size();
+            if (size == other.size()) {
+                return equals(0, size, other, 0);
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Returns {@code true} if this vector in the given range is equals to the specified vector.
+     * NaN values are considered equal to all other NaN values, and -0.0 is different than +0.0.
+     *
+     * @param  lower        index of the first value to compare in this vector, inclusive.
+     * @param  upper        index after the last value to compare in this vector.
+     * @param  other        the other vector to compare values with this vector. May be {@code this}.
+     * @param  otherOffset  index of the first element to compare in the other vector.
+     * @return whether values over the specified range of the two vectors are equal.
+     *
+     * @todo Override in {@link ArrayVector} on JDK9.
+     */
+    private boolean equals(int lower, final int upper, final Vector other, int otherOffset) {
+        while (lower < upper) {
+            if (!get(lower++).equals(other.get(otherOffset++))) {
+                return false;
+            }
+        }
+        return true;
+    }
 }
diff --git a/core/sis-utility/src/test/java/org/apache/sis/math/RepeatedVectorTest.java b/core/sis-utility/src/test/java/org/apache/sis/math/RepeatedVectorTest.java
new file mode 100644
index 0000000..9f0a5e8
--- /dev/null
+++ b/core/sis-utility/src/test/java/org/apache/sis/math/RepeatedVectorTest.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sis.math;
+
+import org.apache.sis.test.TestCase;
+import org.junit.Test;
+
+import static org.opengis.test.Assert.*;
+
+
+/**
+ * Tests the {@link RepeatedVector} class.
+ *
+ * @author  Martin Desruisseaux (Geomatys)
+ * @version 1.0
+ * @since   1.0
+ * @module
+ */
+public final strictfp class RepeatedVectorTest extends TestCase {
+    /**
+     * Tests the case where values in a grid are repeated horizontally.
+     */
+    @Test
+    public void testHorizontal() {
+        Vector vec = Vector.create(new int[] {
+                10, 10, 10, 10,
+                12, 12, 12, 12,
+                15, 15, 15, 15}, false);
+
+        vec = new RepeatedVector(vec, vec.repetitions(), 0);
+        assertInstanceOf("Should have been compressed.", RepeatedVector.class, vec);
+        assertArrayEquals(new int[] {4}, vec.repetitions());
+
+        assertEquals(10, vec.intValue  ( 0));
+        assertEquals(10, vec.shortValue( 1));
+        assertEquals(10, vec.longValue ( 2));
+        assertEquals(10, vec.intValue  ( 3));
+        assertEquals(12, vec.intValue  ( 4));
+        assertEquals(12, vec.shortValue( 7));
+        assertEquals(15, vec.longValue ( 8));
+        assertEquals(15, vec.intValue  (11));
+
+        Vector sub = vec.subSampling(0, 4, 3);
+        assertFalse("Expected the backing array.", sub instanceof RepeatedVector);
+        assertArrayEquals(new float[] {10, 12, 15}, sub.floatValues(), (float) STRICT);
+    }
+
+    /**
+     * Tests the case where values in a grid are repeated vertically.
+     */
+    @Test
+    public void testVertical() {
+        Vector vec = Vector.create(new int[] {
+                10, 12, 15, 18,
+                10, 12, 15, 18,
+                10, 12, 15, 18}, false);
+
+        vec = new RepeatedVector(vec, vec.repetitions(), 0);
+        assertInstanceOf("Should have been compressed.", RepeatedVector.class, vec);
+        assertArrayEquals(new int[] {1,4}, vec.repetitions());
+
+        assertEquals(10, vec.intValue  ( 0));
+        assertEquals(12, vec.shortValue( 1));
+        assertEquals(15, vec.longValue ( 2));
+        assertEquals(18, vec.intValue  ( 3));
+        assertEquals(10, vec.intValue  ( 4));
+        assertEquals(18, vec.shortValue( 7));
+        assertEquals(10, vec.longValue ( 8));
+        assertEquals(15, vec.intValue  (10));
+
+        Vector sub = vec.subList(0, 4);
+        assertFalse("Expected the backing array.", sub instanceof RepeatedVector);
+        assertArrayEquals(new float[] {10, 12, 15, 18}, sub.floatValues(), (float) STRICT);
+    }
+
+    /**
+     * Tests the case mixing both kind of repetitions.
+     */
+    @Test
+    public void testMixed() {
+        Vector vec = Vector.create(new int[] {
+                10, 10, 10,  12, 12, 12,  15, 15, 15,  18, 18, 18,
+                10, 10, 10,  12, 12, 12,  15, 15, 15,  18, 18, 18,
+                10, 10, 10,  12, 12, 12,  15, 15, 15,  18, 18, 18}, false);
+
+        vec = new RepeatedVector(vec, vec.repetitions(), 0);
+        assertInstanceOf("Should have been compressed.", RepeatedVector.class, vec);
+        assertArrayEquals(new int[] {3,4}, vec.repetitions());
+
+        assertEquals(10, vec.intValue  ( 0));
+        assertEquals(10, vec.shortValue( 1));
+        assertEquals(10, vec.longValue ( 2));
+        assertEquals(12, vec.intValue  ( 3));
+        assertEquals(12, vec.intValue  ( 4));
+        assertEquals(15, vec.shortValue( 7));
+        assertEquals(15, vec.longValue ( 8));
+        assertEquals(18, vec.intValue  (11));
+
+        assertEquals(10, vec.intValue  (13));
+        assertEquals(12, vec.shortValue(17));
+        assertEquals(15, vec.longValue (18));
+        assertEquals(18, vec.intValue  (22));
+        assertEquals(10, vec.intValue  (24));
+        assertEquals(15, vec.shortValue(31));
+        assertEquals(18, vec.longValue (23));
+        assertEquals(12, vec.intValue  (28));
+
+        Vector sub = vec.subSampling(0, 3, 4);
+        assertFalse("Expected the backing array.", sub instanceof RepeatedVector);
+        assertArrayEquals(new float[] {10, 12, 15, 18}, sub.floatValues(), (float) STRICT);
+    }
+}
diff --git a/core/sis-utility/src/test/java/org/apache/sis/math/VectorTest.java b/core/sis-utility/src/test/java/org/apache/sis/math/VectorTest.java
index a7312ce..ab022fd 100644
--- a/core/sis-utility/src/test/java/org/apache/sis/math/VectorTest.java
+++ b/core/sis-utility/src/test/java/org/apache/sis/math/VectorTest.java
@@ -28,7 +28,7 @@ import static org.opengis.test.Assert.*;
  * Tests the {@link Vector} class.
  *
  * @author  Martin Desruisseaux (Geomatys)
- * @version 0.8
+ * @version 1.0
  * @since   0.8
  * @module
  */
@@ -252,6 +252,36 @@ public final strictfp class VectorTest extends TestCase {
     }
 
     /**
+     * Tests {@link Vector#repetitions()}.
+     */
+    @Test
+    public void testRepetitions() {
+        Vector vec = Vector.create(new int[] {
+                10, 10, 10, 10,
+                12, 12, 13, 12,             // Different value (13) break the regularity.
+                15, 15, 15, 15}, false);
+        assertArrayEquals(new int[] {}, vec.repetitions());
+
+        vec = Vector.create(new int[] {
+                10, 10, 10, 10,
+                12, 12, 12, 12,
+                15, 15, 15, 15}, false);
+        assertArrayEquals(new int[] {4}, vec.repetitions());
+
+        vec = Vector.create(new int[] {
+                10, 10, 10,  12, 12, 12,  15, 15, 15,  18, 18, 18,
+                10, 10, 10,  12, 12, 12,  15, 15, 15,  18, 18, 18,
+                10, 10, 10,  12, 12, 12,  15, 15, 15,  18, 18, 18}, false);
+        assertArrayEquals(new int[] {3,4}, vec.repetitions());
+
+        vec = Vector.create(new int[] {
+                10, 12, 15, 18,
+                10, 12, 15, 18,
+                10, 12, 15, 18}, false);
+        assertArrayEquals(new int[] {1,4}, vec.repetitions());
+    }
+
+    /**
      * Tests {@link Vector#increment(double)}.
      */
     @Test
diff --git a/core/sis-utility/src/test/java/org/apache/sis/test/suite/UtilityTestSuite.java b/core/sis-utility/src/test/java/org/apache/sis/test/suite/UtilityTestSuite.java
index f2086a9..f48c77a 100644
--- a/core/sis-utility/src/test/java/org/apache/sis/test/suite/UtilityTestSuite.java
+++ b/core/sis-utility/src/test/java/org/apache/sis/test/suite/UtilityTestSuite.java
@@ -57,6 +57,7 @@ import org.junit.BeforeClass;
     org.apache.sis.util.logging.LoggerAdapterTest.class,
     org.apache.sis.math.FractionTest.class,
     org.apache.sis.math.VectorTest.class,
+    org.apache.sis.math.RepeatedVectorTest.class,
     org.apache.sis.math.MathFunctionsTest.class,
     org.apache.sis.math.DecimalFunctionsTest.class,
     org.apache.sis.math.StatisticsTest.class,
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/storage/StorageConnector.java b/storage/sis-storage/src/main/java/org/apache/sis/storage/StorageConnector.java
index 543ec67..f80143b 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/storage/StorageConnector.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/storage/StorageConnector.java
@@ -127,7 +127,7 @@ public class StorageConnector implements Serializable {
      * A flag for <code>{@linkplain #addView(Class, Object, Class, byte) addView}(…, view, source, flags)</code>
      * telling that before reseting the {@code view}, we need to reset the {@code source} first. This flag should
      * can be unset if any change in the position of {@code view} is immediately reflected in the position of
-     * {@code source}, and vis-versa.
+     * {@code source}, and vice-versa.
      *
      * @see Coupled#cascadeOnReset()
      */
@@ -362,7 +362,7 @@ public class StorageConnector implements Serializable {
         /**
          * {@code true} if calls to {@link #reset()} should cascade to {@link #wrapperFor}.
          * This is {@code false} if any change in the position of {@link #view} is immediately
-         * reflected in the position of {@link #wrapperFor}, and vis-versa.
+         * reflected in the position of {@link #wrapperFor}, and vice-versa.
          */
         final boolean cascadeOnReset() {
             return (cascade & CASCADE_ON_RESET) != 0;


Mime
View raw message