sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject [sis] 01/03: Avoid duplication in netCDF variables and attributes when a lower-case name has been generated in addition to the original name.
Date Mon, 30 Nov 2020 17:17:16 GMT
This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git

commit 84e6e5626a5e7340eb05e85e040c293c1cd71959
Author: Martin Desruisseaux <martin.desruisseaux@geomatys.com>
AuthorDate: Mon Nov 30 13:35:17 2020 +0100

    Avoid duplication in netCDF variables and attributes when a lower-case name has been generated
in addition to the original name.
---
 .../apache/sis/internal/util/CollectionsExt.java   |  7 ++-
 .../sis/internal/netcdf/impl/ChannelDecoder.java   | 65 +++++++++++++++++-----
 .../sis/internal/netcdf/impl/VariableInfo.java     | 37 ++++++++----
 3 files changed, 81 insertions(+), 28 deletions(-)

diff --git a/core/sis-utility/src/main/java/org/apache/sis/internal/util/CollectionsExt.java
b/core/sis-utility/src/main/java/org/apache/sis/internal/util/CollectionsExt.java
index 7a8a982..3d4810c 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/internal/util/CollectionsExt.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/internal/util/CollectionsExt.java
@@ -896,6 +896,9 @@ public final class CollectionsExt extends Static {
      * <p>Code searching in the returned map shall ask for the original (non lower-case)
name
      * <strong>before</strong> to ask for the lower-cases version of that name.</p>
      *
+     * <p>Iteration order in map entries is the same than iteration order in the given
collection.
+     * If lower-case names have been generated, they appear immediately after the original
names.</p>
+     *
      * @param  <E>           the type of elements.
      * @param  entries       the entries to store in the map, or {@code null} if none.
      * @param  namesLocale   the locale to use for creating the "all lower cases" names.
@@ -905,10 +908,10 @@ public final class CollectionsExt extends Static {
     public static <E> Map<String,E> toCaseInsensitiveNameMap(
             final Collection<Map.Entry<String,E>> entries, final Locale namesLocale)
     {
-        if (entries == null) {
+        if (entries == null || entries.isEmpty()) {
             return Collections.emptyMap();
         }
-        final Map<String,E> map = new HashMap<>(hashMapCapacity(entries.size()));
+        final Map<String,E> map = new LinkedHashMap<>(hashMapCapacity(entries.size()));
         final Set<String> generated = new HashSet<>();
         for (final Map.Entry<String, ? extends E> entry : entries) {
             final String name = entry.getKey();
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
index 9f7d529..5570ebf 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/ChannelDecoder.java
@@ -59,6 +59,7 @@ import org.apache.sis.storage.event.StoreListeners;
 import org.apache.sis.util.ArraysExt;
 import org.apache.sis.util.resources.Errors;
 import org.apache.sis.util.resources.Vocabulary;
+import org.apache.sis.util.collection.Containers;
 import org.apache.sis.util.collection.TreeTable;
 import org.apache.sis.util.collection.TableColumn;
 import org.apache.sis.setup.GeometryLibrary;
@@ -73,7 +74,7 @@ import org.apache.sis.math.Vector;
  *
  * @author  Johann Sorel (Geomatys)
  * @author  Martin Desruisseaux (Geomatys)
- * @version 1.0
+ * @version 1.1
  *
  * @see <a href="http://portal.opengeospatial.org/files/?artifact_id=43734">NetCDF
Classic and 64-bit Offset Format (1.0)</a>
  *
@@ -179,7 +180,11 @@ public final class ChannelDecoder extends Decoder {
     final VariableInfo[] variables;
 
     /**
-     * Same as {@link #variables}, but as a map for faster search.
+     * Contains all {@link #variables}, but as a map for faster lookup by name. The same
{@link VariableInfo}
+     * instance may be repeated in two entries if the original variable name contains upper
case letters.
+     * In such case, the value is repeated and associated to a key in all lower case key
letters.
+     *
+     * @see #findVariable(String)
      */
     private final Map<String,VariableInfo> variableMap;
 
@@ -193,6 +198,15 @@ public final class ChannelDecoder extends Decoder {
     private final Map<String,Object> attributeMap;
 
     /**
+     * Names of attributes. This is {@code attributeMap.keySet()} unless some attributes
have a name
+     * containing upper case letters. In such case a separated set is created for avoiding
duplicated
+     * names (the name with upper case letters + the name in all lower case letters).
+     *
+     * @see #getAttributeNames()
+     */
+    private final Set<String> attributeNames;
+
+    /**
      * All dimensions in the netCDF files.
      *
      * @see #readDimensions(int)
@@ -257,9 +271,9 @@ public final class ChannelDecoder extends Decoder {
          * Read the dimension, attribute and variable declarations. We expect exactly 3 lists,
          * where any of them can be flagged as absent by a long (64 bits) 0.
          */
-        DimensionInfo[]    dimensions = null;
-        VariableInfo[]     variables  = null;
-        Map<String,Object> attributes = Collections.emptyMap();
+        DimensionInfo[] dimensions = null;
+        VariableInfo[]  variables  = null;
+        List<Map.Entry<String,Object>> attributes = Collections.emptyList();
         for (int i=0; i<3; i++) {
             final long tn = input.readLong();                   // Combination of tag and
nelems
             if (tn != 0) {
@@ -278,7 +292,8 @@ public final class ChannelDecoder extends Decoder {
                 }
             }
         }
-        this.attributeMap = attributes;
+        attributeMap = CollectionsExt.toCaseInsensitiveNameMap(attributes, NAME_LOCALE);
+        attributeNames = attributeNames(attributes, attributeMap);
         if (variables != null) {
             this.variables   = variables;
             this.variableMap = toCaseInsensitiveNameMap(variables);
@@ -541,7 +556,7 @@ public final class ChannelDecoder extends Decoder {
      *
      * @param  nelems  the number of attributes to read.
      */
-    private Map<String,Object> readAttributes(int nelems) throws IOException, DataStoreException
{
+    private List<Map.Entry<String,Object>> readAttributes(int nelems) throws
IOException, DataStoreException {
         final List<Map.Entry<String,Object>> attributes = new ArrayList<>(nelems);
         while (--nelems >= 0) {
             final String name = readName();
@@ -555,7 +570,7 @@ public final class ChannelDecoder extends Decoder {
                 }
             }
         }
-        return CollectionsExt.toCaseInsensitiveNameMap(attributes, NAME_LOCALE);
+        return attributes;
     }
 
     /**
@@ -602,7 +617,7 @@ public final class ChannelDecoder extends Decoder {
              * Following block is almost a copy-and-paste of similar block in the contructor,
              * but with less cases in the "switch" statements.
              */
-            Map<String,Object> attributes = Collections.emptyMap();
+            List<Map.Entry<String,Object>> attributes = Collections.emptyList();
             final long tn = input.readLong();
             if (tn != 0) {
                 final int tag = (int) (tn >>> Integer.SIZE);
@@ -619,7 +634,8 @@ public final class ChannelDecoder extends Decoder {
                     default: throw malformedHeader();
                 }
             }
-            variables[j] = new VariableInfo(this, input, name, varDims, attributes,
+            final Map<String,Object> map = CollectionsExt.toCaseInsensitiveNameMap(attributes,
NAME_LOCALE);
+            variables[j] = new VariableInfo(this, input, name, varDims, map, attributeNames(attributes,
map),
                     DataType.valueOf(input.readInt()), input.readInt(), readOffset());
         }
         /*
@@ -632,6 +648,24 @@ public final class ChannelDecoder extends Decoder {
         return variables;
     }
 
+    /**
+     * Returns the keys of {@code attributeMap} without the duplicated values caused by the
change of name case.
+     * For example if an attribute {@code "Foo"} exists and a {@code "foo"} key has been
generated for enabling
+     * case-insensitive search, only the {@code "Foo"} name is added in the returned set.
+     *
+     * @param  attributes    the attributes returned by {@link #readAttributes(int)}.
+     * @param  attributeMap  the map created by {@link CollectionsExt#toCaseInsensitiveNameMap(Collection,
Locale)}.
+     * @return {@code attributes.keySet()} without duplicated keys.
+     */
+    private static Set<String> attributeNames(final List<Map.Entry<String,Object>>
attributes, final Map<String,?> attributeMap) {
+        if (attributes.size() >= attributeMap.size()) {
+            return Collections.unmodifiableSet(attributeMap.keySet());
+        }
+        final Set<String> attributeNames = new LinkedHashSet<>(Containers.hashMapCapacity(attributes.size()));
+        attributes.forEach((e) -> attributeNames.add(e.getKey()));
+        return attributeNames;
+    }
+
 
 
     // --------------------------------------------------------------------------------------------
@@ -788,12 +822,13 @@ public final class ChannelDecoder extends Decoder {
 
     /**
      * Returns the names of all global attributes found in the file.
+     * The returned set is unmodifiable.
      *
      * @return names of all global attributes in the file.
      */
     @Override
     public Collection<String> getAttributeNames() {
-        return Collections.unmodifiableSet(attributeMap.keySet());
+        return Collections.unmodifiableSet(attributeNames);
     }
 
     /**
@@ -1027,12 +1062,12 @@ nextVar:    for (final VariableInfo variable : variables) {
      */
     @Override
     public void addAttributesTo(final TreeTable.Node root) {
-        for (final Map.Entry<String,VariableInfo> entry : variableMap.entrySet()) {
+        for (final VariableInfo variable : variables) {
             final TreeTable.Node node = root.newChild();
-            node.setValue(TableColumn.NAME, entry.getKey());
-            entry.getValue().addAttributesTo(node);
+            node.setValue(TableColumn.NAME, variable.getName());
+            variable.addAttributesTo(node);
         }
-        VariableInfo.addAttributesTo(root, attributeMap);
+        VariableInfo.addAttributesTo(root, attributeNames, attributeMap);
     }
 
     /**
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
index fc30c71..bd06b4c 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
@@ -141,6 +141,15 @@ final class VariableInfo extends Variable implements Comparable<VariableInfo>
{
     private final Map<String,Object> attributes;
 
     /**
+     * Names of attributes. This is {@code attributeMap.keySet()} unless some attributes
have a name
+     * containing upper case letters. In such case a separated set is used for avoiding duplicated
+     * names (the name with upper case letters + the name in all lower case letters).
+     *
+     * @see #getAttributeNames()
+     */
+    private final Set<String> attributeNames;
+
+    /**
      * The netCDF type of data, or {@code null} if unknown.
      *
      * @see #getDataType()
@@ -183,14 +192,16 @@ final class VariableInfo extends Variable implements Comparable<VariableInfo>
{
                  final String             name,
                  final DimensionInfo[]    dimensions,
                  final Map<String,Object> attributes,
+                 final Set<String>        attributeNames,
                        DataType           dataType,
                  final int                size,
                  final long               offset) throws DataStoreContentException
     {
         super(decoder);
-        this.name       = name;
-        this.dimensions = dimensions;
-        this.attributes = attributes;
+        this.name           = name;
+        this.dimensions     = dimensions;
+        this.attributes     = attributes;
+        this.attributeNames = attributeNames;
         final Object isUnsigned = getAttributeValue(CDM.UNSIGNED, "_unsigned");
         if (isUnsigned instanceof String) {
             dataType = dataType.unsigned(Boolean.valueOf((String) isUnsigned));
@@ -501,12 +512,13 @@ final class VariableInfo extends Variable implements Comparable<VariableInfo>
{
 
     /**
      * Returns the names of all attributes associated to this variable.
+     * The returned set is unmodifiable.
      *
      * @return names of all attributes associated to this variable.
      */
     @Override
     public Collection<String> getAttributeNames() {
-        return Collections.unmodifiableSet(attributes.keySet());
+        return Collections.unmodifiableSet(attributeNames);
     }
 
     /**
@@ -560,7 +572,7 @@ final class VariableInfo extends Variable implements Comparable<VariableInfo>
{
      * @param  branch  where to add new nodes for the attributes of this variable.
      */
     final void addAttributesTo(final TreeTable.Node branch) {
-        addAttributesTo(branch, attributes);
+        addAttributesTo(branch, attributeNames, attributes);
     }
 
     /**
@@ -568,14 +580,17 @@ final class VariableInfo extends Variable implements Comparable<VariableInfo>
{
      * returned by {@link org.apache.sis.storage.netcdf.NetcdfStore#getNativeMetadata()}.
      * This tree is for information purpose only.
      *
-     * @param  branch      where to add new nodes for the given attributes.
-     * @param  attributes  the attributes to add to the specified branch.
+     * @param  branch          where to add new nodes for the given attributes.
+     * @param  attributeNames  name of attribute to add to the specified branch.
+     * @param  attributes      the attributes to add to the specified branch.
      */
-    static void addAttributesTo(final TreeTable.Node branch, final Map<String,Object>
attributes) {
-        for (final Map.Entry<String,Object> entry : attributes.entrySet()) {
+    static void addAttributesTo(final TreeTable.Node branch,
+            final Set<String> attributeNames, final Map<String,Object> attributes)
+    {
+        for (final String name : attributeNames) {
             final TreeTable.Node node = branch.newChild();
-            node.setValue(TableColumn.NAME, entry.getKey());
-            Object value = entry.getValue();
+            node.setValue(TableColumn.NAME, name);
+            Object value = attributes.get(name);
             if (value != null) {
                 if (value instanceof Vector) {
                     value = ((Vector) value).toArray();


Mime
View raw message