sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject svn commit: r1808070 - in /sis/branches/JDK8/storage: sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/ sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/impl/ sis-storage/src/ma...
Date Mon, 11 Sep 2017 23:02:03 GMT
Author: desruisseaux
Date: Mon Sep 11 23:02:02 2017
New Revision: 1808070

URL: http://svn.apache.org/viewvc?rev=1808070&view=rev
Log:
Support enumeration in netCDF file. As a side effect, set default attribute encoding to UTF-8.

Modified:
    sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
    sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/FeaturesInfo.java
    sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
    sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/NetcdfStoreProvider.java
    sis/branches/JDK8/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/impl/ChannelDecoderTest.java
    sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java
    sis/branches/JDK8/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/ChannelDataInputTest.java

Modified: sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java?rev=1808070&r1=1808069&r2=1808070&view=diff
==============================================================================
--- sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
[UTF-8] (original)
+++ sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
[UTF-8] Mon Sep 11 23:02:02 2017
@@ -32,6 +32,8 @@ import java.util.Locale;
 import java.util.regex.Pattern;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.nio.channels.ReadableByteChannel;
 import javax.measure.UnitConverter;
 import javax.measure.IncommensurableException;
@@ -95,13 +97,13 @@ public final class ChannelDecoder extend
     public static final int MAX_VERSION = 2;
 
     /**
-     * The encoding of dimension, variable and attribute names. This is fixed to {@value}
by the
-     * netCDF specification. Note however that the encoding of attribute values may be different.
+     * The encoding of dimension, variable and attribute names. This is fixed to UTF-8 by
the netCDF specification.
+     * Note however that the encoding of attribute values may be different.
      *
      * @see #encoding
      * @see #readName()
      */
-    private static final String NAME_ENCODING = "UTF-8";
+    private static final Charset NAME_ENCODING = StandardCharsets.UTF_8;
 
     /**
      * The locale of dimension, variable and attribute names. This is used for the conversion
to
@@ -166,12 +168,12 @@ public final class ChannelDecoder extend
      * character data may use other encodings. The variable attribute “_Encoding” is
reserved for this
      * purpose in future implementations."
      *
-     * @todo Fixed to ISO-LATIN-1 for now, needs to be determined in a better way.
+     * @todo "_Encoding" attribute not yet parsed.
      *
      * @see #NAME_ENCODING
      * @see #readValues(DataType, int)
      */
-    private final String encoding = "ISO-8859-1";
+    private final Charset encoding;
 
     /**
      * The variables found in the netCDF file.
@@ -213,7 +215,7 @@ public final class ChannelDecoder extend
      * This constructor parses immediately the header, which shall have the following structure:
      *
      * <ul>
-     *   <li>Magic number:   'C','D','F'</li>
+     *   <li>Magic number: 'C','D','F'</li>
      *   <li>Version number: 1 or 2</li>
      *   <li>Number of records</li>
      *   <li>List of netCDF dimensions  (see {@link #readDimensions(int)})</li>
@@ -221,17 +223,19 @@ public final class ChannelDecoder extend
      *   <li>List of variables          (see {@link #readVariables(int, Dimension[])})</li>
      * </ul>
      *
-     * @param  geomlib    the library for geometric objects, or {@code null} for the default.
      * @param  input      the channel and the buffer from where data are read.
+     * @param  encoding   the encoding of attribute value, or {@code null} for the default
value.
+     * @param  geomlib    the library for geometric objects, or {@code null} for the default.
      * @param  listeners  where to send the warnings.
      * @throws IOException if an error occurred while reading the channel.
      * @throws DataStoreException if the content of the given channel is not a netCDF file.
      */
-    public ChannelDecoder(final ChannelDataInput input, final GeometryLibrary geomlib, final
WarningListeners<DataStore> listeners)
-            throws IOException, DataStoreException
+    public ChannelDecoder(final ChannelDataInput input, final Charset encoding, final GeometryLibrary
geomlib,
+            final WarningListeners<DataStore> listeners) throws IOException, DataStoreException
     {
         super(geomlib, listeners);
         this.input = input;
+        this.encoding = (encoding != null) ? encoding : StandardCharsets.UTF_8;
         /*
          * Check the magic number, which is expected to be exactly 3 bytes forming the "CDF"
string.
          * The 4th byte is the version number, which we opportunistically use after the magic
number check.
@@ -362,7 +366,7 @@ public final class ChannelDecoder extend
     }
 
     /**
-     * Reads a string from the channel in the {@value #NAME_ENCODING}. This is suitable for
the dimension,
+     * Reads a string from the channel in the {@link #NAME_ENCODING}. This is suitable for
the dimension,
      * variable and attribute names in the header. Note that attribute value may have a different
encoding.
      */
     private String readName() throws IOException, DataStoreException {

Modified: sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/FeaturesInfo.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/FeaturesInfo.java?rev=1808070&r1=1808069&r2=1808070&view=diff
==============================================================================
--- sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/FeaturesInfo.java
[UTF-8] (original)
+++ sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/FeaturesInfo.java
[UTF-8] Mon Sep 11 23:02:02 2017
@@ -20,6 +20,7 @@ import java.util.Map;
 import java.util.List;
 import java.util.Collection;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.io.IOException;
@@ -134,8 +135,9 @@ final class FeaturesInfo extends Discret
                     break;
                 }
                 default: {
+                    // TODO: use more accurate Number subtype for value class.
                     variable        = this.properties[i-2];
-                    valueClass      = Number.class;           // TODO: use more accurate
value class.
+                    valueClass      = (variable.meaning(0) != null) ? String.class : Number.class;
                     minOccurs       = 0;
                     maxOccurs       = Integer.MAX_VALUE;
                     break;
@@ -334,7 +336,7 @@ search: for (final VariableInfo counts :
             final int[] step   = {1};
             final Vector   id, t;
             final Vector[] coords = new Vector[coordinates.length];
-            final Vector[] props  = new Vector[properties.length];
+            final Object[] props  = new Object[properties.length];
             try {
                 id = identifiers.read();                    // Efficiency should be okay
because of cached value.
                 t = time.read(lower, upper, step);
@@ -342,7 +344,18 @@ search: for (final VariableInfo counts :
                     coords[i] = coordinates[i].read(lower, upper, step);
                 }
                 for (int i=0; i<properties.length; i++) {
-                    props[i] = properties[i].read(lower, upper, step);
+                    final VariableInfo p = properties[i];
+                    final Vector data = p.read(lower, upper, step);
+                    if (p.isEnumeration()) {
+                        final String[] meanings = new String[data.size()];
+                        for (int j=0; j<meanings.length; j++) {
+                            String m = p.meaning(data.intValue(j));
+                            meanings[j] = (m != null) ? m : "";
+                        }
+                        props[i] = Arrays.asList(meanings);
+                    } else {
+                        props[i] = data;
+                    }
                 }
             } catch (IOException | DataStoreException e) {
                 throw new BackingStoreException(canNotReadFile(), e);

Modified: sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java?rev=1808070&r1=1808069&r2=1808070&view=diff
==============================================================================
--- sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
[UTF-8] (original)
+++ sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
[UTF-8] Mon Sep 11 23:02:02 2017
@@ -31,6 +31,7 @@ import org.apache.sis.internal.storage.i
 import org.apache.sis.internal.storage.io.Region;
 import org.apache.sis.storage.DataStoreException;
 import org.apache.sis.storage.DataStoreContentException;
+import org.apache.sis.storage.netcdf.AttributeNames;
 import org.apache.sis.util.resources.Errors;
 import org.apache.sis.util.CharSequences;
 import org.apache.sis.util.Numbers;
@@ -130,6 +131,15 @@ final class VariableInfo extends Variabl
     private transient Vector values;
 
     /**
+     * The {@code flag_meanings} values (used for enumeration values), or {@code null} if
this variable is not
+     * an enumeration.
+     *
+     * @see #isEnumeration()
+     * @see #meaning(int)
+     */
+    private final String[] meanings;
+
+    /**
      * Creates a new variable.
      *
      * @param  input       the channel together with a buffer for reading the variable data.
@@ -182,6 +192,16 @@ final class VariableInfo extends Variabl
         } else {
             isCoordinateSystemAxis = false;
         }
+        /*
+         * Verify if this variable is an enumeration. If yes, we remove the attributes that
define the
+         * enumeration since those attributes may be verbose and "pollute" the variable definition.
+         */
+        String[] meanings = stringValues(attributes.remove(AttributeNames.FLAG_MEANINGS));
+        switch (meanings.length) {
+            case 0: meanings = null; break;
+            case 1: meanings = (String[]) CharSequences.split(meanings[0], ' '); break;
+        }
+        this.meanings = meanings;
     }
 
     /**
@@ -228,6 +248,13 @@ final class VariableInfo extends Variabl
     }
 
     /**
+     * Returns {@code true} if this variable is an enumeration.
+     */
+    public boolean isEnumeration() {
+        return meanings != null;
+    }
+
+    /**
      * Returns {@code true} if this variable seems to be a coordinate system axis,
      * determined by comparing its name with the name of all dimensions in the netCDF file.
      */
@@ -457,6 +484,18 @@ final class VariableInfo extends Variabl
     }
 
     /**
+     * Returns the meaning of the given ordinal value, or {@code null} if none.
+     * Callers must have verified that {@link #isEnumeration()} returned {@code true}
+     * before to invoke this method
+     *
+     * @param  ordinal  the ordinal of the enumeration for which to get the value.
+     * @return the value associated to the given ordinal, or {@code null} if none.
+     */
+    public String meaning(final int ordinal) {
+        return (ordinal >= 0 && ordinal < meanings.length) ? meanings[ordinal]
: null;
+    }
+
+    /**
      * Returns the error message for an unknown data type.
      */
     private String unknownType() {

Modified: sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/NetcdfStoreProvider.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/NetcdfStoreProvider.java?rev=1808070&r1=1808069&r2=1808070&view=diff
==============================================================================
--- sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/NetcdfStoreProvider.java
[UTF-8] (original)
+++ sis/branches/JDK8/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/NetcdfStoreProvider.java
[UTF-8] Mon Sep 11 23:02:02 2017
@@ -271,7 +271,7 @@ public class NetcdfStoreProvider extends
         Object keepOpen;
         final ChannelDataInput input = connector.getStorageAs(ChannelDataInput.class);
         if (input != null) try {
-            decoder = new ChannelDecoder(input, geomlib, listeners);
+            decoder = new ChannelDecoder(input, connector.getOption(OptionKey.ENCODING),
geomlib, listeners);
             keepOpen = input;
         } catch (DataStoreException e) {
             final String path = connector.getStorageAs(String.class);

Modified: sis/branches/JDK8/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/impl/ChannelDecoderTest.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/impl/ChannelDecoderTest.java?rev=1808070&r1=1808069&r2=1808070&view=diff
==============================================================================
--- sis/branches/JDK8/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/impl/ChannelDecoderTest.java
[UTF-8] (original)
+++ sis/branches/JDK8/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/impl/ChannelDecoderTest.java
[UTF-8] Mon Sep 11 23:02:02 2017
@@ -76,7 +76,7 @@ public final strictfp class ChannelDecod
         assertNotNull(name, in);
         final ChannelDataInput input = new ChannelDataInput(name,
                 Channels.newChannel(in), ByteBuffer.allocate(4096), false);
-        return new ChannelDecoder(input, GeometryLibrary.JAVA2D, LISTENERS);
+        return new ChannelDecoder(input, null, GeometryLibrary.JAVA2D, LISTENERS);
     }
 
     /**

Modified: sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java?rev=1808070&r1=1808069&r2=1808070&view=diff
==============================================================================
--- sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java
[UTF-8] (original)
+++ sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java
[UTF-8] Mon Sep 11 23:02:02 2017
@@ -26,6 +26,7 @@ import java.nio.IntBuffer;
 import java.nio.LongBuffer;
 import java.nio.FloatBuffer;
 import java.nio.DoubleBuffer;
+import java.nio.charset.Charset;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
 import org.apache.sis.internal.storage.Resources;
@@ -822,7 +823,7 @@ public class ChannelDataInput extends Ch
      * @return the string decoded from the {@code length} next bytes.
      * @throws IOException if an error occurred while reading the bytes, or if the given
encoding is invalid.
      */
-    public final String readString(final int length, final String encoding) throws IOException
{
+    public final String readString(final int length, final Charset encoding) throws IOException
{
         if (buffer.hasArray() && length <= buffer.capacity()) {
             ensureBufferContains(length);
             final int position = buffer.position(); // Must be after 'ensureBufferContains(int)'.

Modified: sis/branches/JDK8/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/ChannelDataInputTest.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/ChannelDataInputTest.java?rev=1808070&r1=1808069&r2=1808070&view=diff
==============================================================================
--- sis/branches/JDK8/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/ChannelDataInputTest.java
[UTF-8] (original)
+++ sis/branches/JDK8/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/ChannelDataInputTest.java
[UTF-8] Mon Sep 11 23:02:02 2017
@@ -21,6 +21,7 @@ import java.io.DataInputStream;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import org.junit.Test;
 
 import static org.junit.Assert.*;
@@ -146,7 +147,7 @@ public final strictfp class ChannelDataI
         final ChannelDataInput input = new ChannelDataInput("testReadString",
                 new DripByteChannel(array, random, 1, 32),
                 ByteBuffer.allocate(array.length + 4), false);
-        assertEquals(expected, input.readString(array.length, "UTF-8"));
+        assertEquals(expected, input.readString(array.length, StandardCharsets.UTF_8));
         assertFalse(input.buffer.hasRemaining());
     }
 



Mime
View raw message