sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject [sis] branch geoapi-4.0 updated: Try to produce better identifiers and title for netCDF variables. The identifier excludes the "long_name" attribute, which is sometime more like a sentence than an identifier (the standard name or variable name provides more stable identifiers). Conversely the `metadata.citation.title` uses the "long_name" attribute as first choice, formatted in a way more like English sentence (with '_' and CamelCase replaced by space). Variable names are declared as citation identifiers and shown in the [...]
Date Mon, 16 Mar 2020 16:38:25 GMT
This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git


The following commit(s) were added to refs/heads/geoapi-4.0 by this push:
     new b37fde6  Try to produce better identifiers and title for netCDF variables. The identifier excludes the "long_name" attribute, which is sometime more like a sentence than an identifier (the standard name or variable name provides more stable identifiers). Conversely the `metadata.citation.title` uses the "long_name" attribute as first choice, formatted in a way more like English sentence (with '_' and CamelCase replaced by space). Variable names are declared as citation identifier [...]
b37fde6 is described below

commit b37fde605db9f8f72c2a1e18f9b16afafc4d4c8d
Author: Martin Desruisseaux <martin.desruisseaux@geomatys.com>
AuthorDate: Mon Mar 16 17:30:11 2020 +0100

    Try to produce better identifiers and title for netCDF variables.
    The identifier excludes the "long_name" attribute, which is sometime more like a sentence
    than an identifier (the standard name or variable name provides more stable identifiers).
    Conversely the `metadata.citation.title` uses the "long_name" attribute as first choice,
    formatted in a way more like English sentence (with '_' and CamelCase replaced by space).
    Variable names are declared as citation identifiers and shown in the summary panel.
---
 .../sis/gui/metadata/IdentificationInfo.java       |  30 ++++
 .../org/apache/sis/internal/gui/Resources.java     |   5 +
 .../apache/sis/internal/gui/Resources.properties   |   1 +
 .../sis/internal/gui/Resources_fr.properties       |   1 +
 .../java/org/apache/sis/util/CharSequences.java    | 181 +++++++++++++++++++--
 .../org/apache/sis/util/CharSequencesTest.java     |  26 ++-
 .../apache/sis/internal/netcdf/RasterResource.java |  96 ++++++++---
 .../org/apache/sis/internal/netcdf/Variable.java   |  25 +--
 .../apache/sis/internal/netcdf/impl/GridInfo.java  |   2 +-
 .../sis/internal/netcdf/impl/VariableInfo.java     |   3 +-
 .../apache/sis/storage/netcdf/MetadataReader.java  |   4 +-
 .../sis/internal/storage/AbstractGridResource.java |   4 +-
 .../sis/internal/storage/AbstractResource.java     |  40 ++++-
 .../internal/storage/ConcatenatedFeatureSet.java   |   2 +-
 .../sis/internal/storage/MetadataBuilder.java      |  18 +-
 .../storage/ConcatenatedFeatureSetTest.java        |   5 +-
 16 files changed, 378 insertions(+), 65 deletions(-)

diff --git a/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java b/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java
index eecfca6..bebc66f 100644
--- a/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java
+++ b/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java
@@ -17,6 +17,7 @@
 package org.apache.sis.gui.metadata;
 
 import java.util.Date;
+import java.util.StringJoiner;
 import javafx.geometry.HPos;
 import javafx.scene.canvas.Canvas;
 import javafx.scene.canvas.GraphicsContext;
@@ -53,6 +54,18 @@ import static org.apache.sis.internal.util.CollectionsExt.nonNull;
  * The pane where to show the values of {@link Identification} objects.
  * The same pane can be used for an arbitrary amount of identifications.
  * Each instance is identified by its title.
+ * The content is:
+ *
+ * <ol>
+ *   <li>The title in bold font.</li>
+ *   <li>Identifiers.</li>
+ *   <li>Abstract, or purpose, or credit (in this preference order).</li>
+ *   <li>Topic category.</li>
+ *   <li>Release date, or publication date, or creation date, or any date (in this preference order).</li>
+ *   <li>Type of resource.</li>
+ *   <li>Spatiotemporal extent as a textual description.</li>
+ *   <li>Extent shown as a rectangle on a world map.</li>
+ * </ol>
  *
  * @author  Smaniotto Enzo (GSoC)
  * @author  Martin Desruisseaux (Geomatys)
@@ -147,6 +160,7 @@ final class IdentificationInfo extends Section<Identification> {
     /**
      * Invoked when new identification information should be shown.
      * This method updates all fields in this section with the content of given identification information.
+     * The content is summarized in {@linkplain IdentificationInfo class javadoc}.
      */
     @Override
     void buildContent(final Identification info) {
@@ -166,6 +180,19 @@ final class IdentificationInfo extends Section<Identification> {
         }
         title.setText(text);
         /*
+         * Identifiers as a comma-separated list on a single line. Each identifier
+         * is formatted as "codespace:code" or only "code" if there is no codespace.
+         */
+        if (citation != null) {
+            final StringJoiner buffer = new StringJoiner(", ");
+            for (final Identifier id : citation.getIdentifiers()) {
+                buffer.add(IdentifiedObjects.toString(id));
+            }
+            if (buffer.length() != 0) {
+                addLine(Resources.Keys.Identifiers, buffer.toString());
+            }
+        }
+        /*
          * The abstract, or if there is no abstract the purpose, or if no purpose the credit as a fallback.
          * We use those fallback because they can provide some hints about the product.
          * The topic category (climatology, health, etc.) follows.
@@ -186,6 +213,9 @@ final class IdentificationInfo extends Section<Identification> {
             }
         }
         addLine(label, text);
+        /*
+         * Topic category.
+         */
         addLine(Resources.Keys.TopicCategory, owner.string(nonNull(info.getTopicCategories())));
         /*
          * Select a single, arbitrary date. We take the release or publication date if available.
diff --git a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java
index 4060265..ff0a748 100644
--- a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java
+++ b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java
@@ -211,6 +211,11 @@ public final class Resources extends IndexedResourceBundle {
         public static final short GeospatialFiles = 4;
 
         /**
+         * Identifiers:
+         */
+        public static final short Identifiers = 54;
+
+        /**
          * Loading…
          */
         public static final short Loading = 7;
diff --git a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties
index 7af6c69..d9f07a1 100644
--- a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties
+++ b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties
@@ -51,6 +51,7 @@ Format                 = Format:
 FromMetadata           = From metadata
 FullScreen             = Full screen
 GeospatialFiles        = Geospatial data files
+Identifiers            = Identifiers:
 Loading                = Loading\u2026
 MainWindow             = Main window
 Metadata               = Metadata
diff --git a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties
index 48bb9b3..4b7c231 100644
--- a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties
+++ b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties
@@ -56,6 +56,7 @@ Format                 = Format\u00a0:
 FromMetadata           = Des m\u00e9ta-donn\u00e9es
 FullScreen             = Plein \u00e9cran
 GeospatialFiles        = Fichiers de donn\u00e9es g\u00e9ospatiales
+Identifiers            = Identifiants\u00a0:
 Loading                = Chargement\u2026
 MainWindow             = Fen\u00eatre principale
 Metadata               = Metadonn\u00e9es
diff --git a/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java b/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
index 7448785..8802403 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
@@ -72,7 +72,7 @@ import static java.lang.Character.*;
  * {@code 0} or {@code false} primitive type calculated as if the input was an empty string.
  *
  * @author  Martin Desruisseaux (Geomatys)
- * @version 1.0
+ * @version 1.1
  *
  * @see StringBuilders
  *
@@ -546,9 +546,9 @@ search:     for (; fromIndex <= toIndex; fromIndex++) {
      */
     public static int skipLeadingWhitespaces(final CharSequence text, int fromIndex, final int toIndex) {
         while (fromIndex < toIndex) {
-            final int c = Character.codePointAt(text, fromIndex);
-            if (!Character.isWhitespace(c)) break;
-            fromIndex += Character.charCount(c);
+            final int c = codePointAt(text, fromIndex);
+            if (!isWhitespace(c)) break;
+            fromIndex += charCount(c);
         }
         return fromIndex;
     }
@@ -584,9 +584,9 @@ search:     for (; fromIndex <= toIndex; fromIndex++) {
      */
     public static int skipTrailingWhitespaces(final CharSequence text, final int fromIndex, int toIndex) {
         while (toIndex > fromIndex) {
-            final int c = Character.codePointBefore(text, toIndex);
-            if (!Character.isWhitespace(c)) break;
-            toIndex -= Character.charCount(c);
+            final int c = codePointBefore(text, toIndex);
+            if (!isWhitespace(c)) break;
+            toIndex -= charCount(c);
         }
         return toIndex;
     }
@@ -1187,16 +1187,16 @@ searchWordBreak:    while (true) {
         final StringBuilder buffer = new StringBuilder(identifier.length());
         final int length = identifier.length();
         for (int i=0; i<length;) {
-            int c = Character.codePointAt(identifier, i);
+            int c = codePointAt(identifier, i);
             if (i != 0) {
                 if (c == '_') {
                     c = ' ';
                 } else {
-                    c = Character.toLowerCase(c);
+                    c = toLowerCase(c);
                 }
             }
             buffer.appendCodePoint(c);
-            i += Character.charCount(c);
+            i += charCount(c);
         }
         return buffer;
     }
@@ -1359,7 +1359,7 @@ searchWordBreak:    while (true) {
                 } else if (Character.isUpperCase(c)) {
                     // Test for mixed-case (e.g. "northEast").
                     // Note that i is guaranteed to be greater than 0 here.
-                    if (!Character.isUpperCase(Character.codePointBefore(text, i))) {
+                    if (!Character.isUpperCase(codePointBefore(text, i))) {
                         buffer.appendCodePoint(c);
                     }
                 }
@@ -1892,12 +1892,13 @@ cmp:    while (ia < lga) {
     }
 
     /**
-     * Returns the longest sequence of characters which is found at the beginning of the two
-     * given texts. If one of those texts is {@code null}, then the other text is returned.
+     * Returns the longest sequence of characters which is found at the beginning of the two given texts.
+     * If one of those texts is {@code null}, then the other text is returned.
+     * If there is no common prefix, then this method returns an empty string.
      *
      * @param  s1  the first text,  or {@code null}.
      * @param  s2  the second text, or {@code null}.
-     * @return the common prefix of both texts, or {@code null} if both texts are null.
+     * @return the common prefix of both texts (may be empty), or {@code null} if both texts are null.
      */
     public static CharSequence commonPrefix(final CharSequence s1, final CharSequence s2) {
         if (s1 == null) return s2;
@@ -1927,10 +1928,11 @@ cmp:    while (ia < lga) {
     /**
      * Returns the longest sequence of characters which is found at the end of the two given texts.
      * If one of those texts is {@code null}, then the other text is returned.
+     * If there is no common suffix, then this method returns an empty string.
      *
      * @param  s1  the first text,  or {@code null}.
      * @param  s2  the second text, or {@code null}.
-     * @return the common suffix of both texts, or {@code null} if both texts are null.
+     * @return the common suffix of both texts (may be empty), or {@code null} if both texts are null.
      */
     public static CharSequence commonSuffix(final CharSequence s1, final CharSequence s2) {
         if (s1 == null) return s2;
@@ -1958,6 +1960,149 @@ cmp:    while (ia < lga) {
     }
 
     /**
+     * Returns the words found at the beginning and end of both texts.
+     * The returned string is the concatenation of the {@linkplain #commonPrefix common prefix}
+     * with the {@linkplain #commonSuffix common suffix}, with prefix and suffix eventually made
+     * shorter for avoiding to cut in the middle of a word.
+     *
+     * <p>The purpose of this method is to create a global identifier from a list of component identifiers.
+     * The later are often eastward and northward components of a vector, in which case this method provides
+     * an identifier for the vector as a whole.</p>
+     *
+     * <div class="note"><b>Example:</b>
+     * given the following inputs:
+     * <ul>
+     *   <li>{@code "baroclinic_eastward_velocity"}</li>
+     *   <li>{@code "baroclinic_northward_velocity"}</li>
+     * </ul>
+     * This method returns {@code "baroclinic_velocity"}. Note that the {@code "ward"} characters
+     * are a common suffix of both texts but nevertheless omitted because they cut a word.</div>
+     *
+     * <p>If one of those texts is {@code null}, then the other text is returned.
+     * If there is no common words, then this method returns an empty string.</p>
+     *
+     * <h4>Possible future evolution</h4>
+     * Current implementation searches only for a common prefix and a common suffix, ignoring any common words
+     * that may appear in the middle of the strings. A character is considered the beginning of a word if it is
+     * {@linkplain Character#isLetterOrDigit(int) a letter or digit} which is not preceded by another letter or
+     * digit (as leading "s" and "c" in "snake_case"), or if it is an {@linkplain Character#isUpperCase(int)
+     * upper case} letter preceded by a {@linkplain Character#isLowerCase(int) lower case} letter or no letter
+     * (as both "C" in "CamelCase").
+     *
+     * @param  s1  the first text,  or {@code null}.
+     * @param  s2  the second text, or {@code null}.
+     * @return the common suffix of both texts (may be empty), or {@code null} if both texts are null.
+     *
+     * @since 1.1
+     */
+    public static CharSequence commonWords(final CharSequence s1, final CharSequence s2) {
+        final int lg1 = length(s1);
+        final int lg2 = length(s2);
+        final int shortestLength  = Math.min(lg1, lg2);   // 0 if s1 or s2 is null, in which case prefix and suffix will have the other value.
+        final CharSequence prefix = commonPrefix(s1, s2); int prefixLength = length(prefix); if (prefixLength >= shortestLength) return prefix;
+        final CharSequence suffix = commonSuffix(s1, s2); int suffixLength = length(suffix); if (suffixLength >= shortestLength) return suffix;
+        final int length = prefixLength + suffixLength;
+        if (length >= lg1) return s1;                     // Check if one of the strings is already equal to prefix + suffix.
+        if (length >= lg2) return s2;
+        /*
+         * At this point `s1` and `s2` contain at least one character between the prefix and the suffix.
+         * If the prefix or the suffix seems to stop in the middle of a word, skip the remaining of that word.
+         * For example if `s1` and `s2` are "eastward_velocity" and "northward_velocity", the common suffix is
+         * "ward_velocity" but we want to retain only "velocity".
+         *
+         * The first condition below (before the loop) checks the character after the common prefix (for example "e"
+         * in "baroclinic_eastward_velocity" if the prefix is "baroclinic_"). The intent is to handle the case where
+         * the word separator is not the same (e.g. "baroclinic_eastward_velocity" and "baroclinic northward velocity",
+         * in which case the '_' or ' ' character would not appear in the prefix).
+         */
+        if (!isWordBoundary(s1, prefixLength, codePointAt(s1, prefixLength)) &&
+            !isWordBoundary(s2, prefixLength, codePointAt(s2, prefixLength)))
+        {
+            while (prefixLength > 0) {
+                final int c = codePointBefore(prefix, prefixLength);
+                final int n = charCount(c);
+                prefixLength -= n;
+                if (isWordBoundary(prefix, prefixLength, c)) {
+                    if (!isLetterOrDigit(c)) prefixLength += n;                     // Keep separator character.
+                    break;
+                }
+            }
+        }
+        /*
+         * Same process than for the prefix above. The condition before the loop checks the character before suffix
+         * for the same reason than above, but using only `isLetterOrDigit` ignoring camel-case. The reason is that
+         * if the character before was a word separator according camel-case convention (i.e. an upper-case letter),
+         * we would need to include it in the common suffix.
+         */
+        int suffixStart  = 0;
+        if (isLetterOrDigit(codePointBefore(s1, lg1 - suffixLength)) &&
+            isLetterOrDigit(codePointBefore(s2, lg2 - suffixLength)))
+        {
+            while (suffixStart < suffixLength) {
+                final int c = codePointAt(suffix, suffixStart);
+                if (isWordBoundary(suffix, suffixStart, c)) break;
+                suffixStart += charCount(c);
+            }
+        }
+        /*
+         * At this point we got the final prefix and suffix to use. If the prefix or suffix is empty,
+         * trim whitespaces or '_' character. For example if the suffix is "_velocity" and no prefix,
+         * return "velocity" without leading "_" character.
+         */
+        if (prefixLength == 0) {
+            while (suffixStart < suffixLength) {
+                final int c = codePointAt(suffix, suffixStart);
+                if (isLetterOrDigit(c)) {
+                    return suffix.subSequence(suffixStart, suffixLength);   // Skip leading ignorable characters in suffix.
+                }
+                suffixStart += charCount(c);
+            }
+            return "";
+        }
+        if (suffixStart >= suffixLength) {
+            while (prefixLength > 0) {
+                final int c = codePointBefore(prefix, prefixLength);
+                if (isLetterOrDigit(c)) {
+                    return prefix.subSequence(0, prefixLength);             // Skip trailing ignorable characters in prefix.
+                }
+                prefixLength -= charCount(c);
+            }
+            return "";
+        }
+        /*
+         * All special cases have been examined. Return the concatenation of (possibly shortened)
+         * common prefix and suffix.
+         */
+        final StringBuilder buffer = new StringBuilder(prefixLength + suffixLength).append(prefix);
+        final int c1 = codePointBefore(prefix, prefixLength);
+        final int c2 = codePointAt(suffix, suffixStart);
+        if (isLetterOrDigit(c1) && isLetterOrDigit(c2)) {
+            if (!Character.isUpperCase(c2) || !isLowerCase(c1)) {
+                buffer.append(' ');             // Keep a separator between two words (except if CamelCase is used).
+            }
+        } else if (c1 == c2) {
+            suffixStart += charCount(c2);       // Avoid repeating '_' in e.g. "baroclinic_<removed>_velocity".
+        }
+        return buffer.append(suffix, suffixStart, suffixLength).toString();
+    }
+
+    /**
+     * Returns {@code true} if the character {@code c} is the beginning of a word or a non-word character.
+     * For example this method returns {@code true} if {@code c} is {@code '_'} in {@code "snake_case"} or
+     * {@code "C"} in {@code "CamelCase"}.
+     *
+     * @param  s  the character sequence from which the {@code c} character has been obtained.
+     * @param  i  the index in {@code s} where the {@code c} character has been obtained.
+     * @param  c  the code point in {@code s} as index {@code i}.
+     * @return whether the given character is the beginning of a word or a non-word character.
+     */
+    private static boolean isWordBoundary(final CharSequence s, final int i, final int c) {
+        if (!isLetterOrDigit(c)) return true;
+        if (!Character.isUpperCase(c)) return false;
+        return (i <= 0 || isLowerCase(codePointBefore(s, i)));
+    }
+
+    /**
      * Returns the token starting at the given offset in the given text. For the purpose of this
      * method, a "token" is any sequence of consecutive characters of the same type, as defined
      * below.
@@ -2085,8 +2230,10 @@ cmp:    while (ia < lga) {
         } else if (src instanceof CharBuffer) {
             ((CharBuffer) src).subSequence(srcOffset, srcOffset + length).get(dst, dstOffset, length);
         } else {
-            // An other candidate could be javax.swing.text.Segment, but it
-            // is probably not worth to introduce a Swing dependency for it.
+            /*
+             * Another candidate could be `javax.swing.text.Segment`, but it
+             * is probably not worth to introduce a Swing dependency for it.
+             */
             while (length != 0) {
                 dst[dstOffset++] = src.charAt(srcOffset++);
                 length--;
diff --git a/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java b/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
index 34b164d..5686cea 100644
--- a/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
+++ b/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
@@ -33,7 +33,7 @@ import static org.apache.sis.util.CharSequences.*;
  *
  * @author  Martin Desruisseaux (Geomatys)
  * @author  Johann Sorel (Geomatys)
- * @version 1.0
+ * @version 1.1
  * @since   0.3
  * @module
  */
@@ -507,6 +507,10 @@ public final strictfp class CharSequencesTest extends TestCase {
     @Test
     public void testCommonPrefix() {
         assertEquals("testCommon", commonPrefix(new StringBuilder("testCommonPrefix()"), "testCommonSuffix()"));
+        assertEquals("",           commonPrefix(                  "testCommonPrefix()",  "unrelated"));
+        assertEquals("",           commonPrefix(                  "",                    "unrelated"));
+        assertEquals("",           commonPrefix(                  "",                    ""));
+        assertEquals("equal",      commonPrefix(                  "equal",               "equal"));
     }
 
     /**
@@ -515,6 +519,26 @@ public final strictfp class CharSequencesTest extends TestCase {
     @Test
     public void testCommonSuffix() {
         assertEquals("fix()", commonSuffix(new StringBuilder("testCommonPrefix()"), "testCommonSuffix()"));
+        assertEquals("",      commonSuffix(                  "testCommonPrefix()",  "unrelated"));
+        assertEquals("",      commonSuffix(                  "",                    "unrelated"));
+        assertEquals("",      commonSuffix(                  "",                    ""));
+        assertEquals("equal", commonSuffix(                  "equal",               "equal"));
+    }
+
+    /**
+     * Tests the {@link CharSequences#commonWords(CharSequence, CharSequence)} method.
+     */
+    @Test
+    public void testCommonWords() {
+        assertSame  ("baroclinic_eastward_velocity", commonWords("baroclinic_eastward_velocity", null));
+        assertSame  ("baroclinic_velocity",          commonWords("baroclinic_eastward_velocity", "baroclinic_velocity"));
+        assertEquals("baroclinic_velocity",          commonWords("baroclinic_eastward_velocity", "baroclinic_northward_velocity"));
+        assertEquals("baroclinic velocity",          commonWords("baroclinic_eastward_velocity", "baroclinic northward velocity"));
+        assertEquals("baroclinic",                   commonWords("baroclinic_eastward",          "baroclinic_northward"));
+        assertEquals("velocity",                     commonWords("eastward_velocity",            "northward_velocity"));
+        assertEquals("BaroclinicVelocity",           commonWords("BaroclinicEastwardVelocity",   "BaroclinicNorthwardVelocity"));
+        assertEquals("Baroclinic",                   commonWords("BaroclinicEastward",           "BaroclinicNorthward"));
+        assertEquals("Velocity",                     commonWords("EastwardVelocity",             "NorthwardVelocity"));
     }
 
     /**
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java
index e1c4adb..19f0460 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java
@@ -20,6 +20,7 @@ import java.util.Map;
 import java.util.List;
 import java.util.HashMap;
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.Optional;
 import java.io.IOException;
 import java.nio.file.Path;
@@ -53,9 +54,11 @@ import org.apache.sis.measure.MeasurementRange;
 import org.apache.sis.measure.NumberRange;
 import org.apache.sis.math.Vector;
 import org.apache.sis.util.Numbers;
+import org.apache.sis.util.CharSequences;
 import org.apache.sis.util.resources.Errors;
 import org.apache.sis.util.resources.Vocabulary;
 import org.apache.sis.internal.jdk9.JDK9;
+import org.apache.sis.internal.storage.MetadataBuilder;
 
 
 /**
@@ -100,7 +103,7 @@ public final class RasterResource extends AbstractGridResource implements Resour
      * than variable name because the former is controlled vocabulary. The use of controlled vocabulary for identifiers increases
      * the chances of stability or consistency between similar products.
      *
-     * <p>The value set by constructor may be updated by {@link #resolveNameCollision(RasterResource, Decoder)},
+     * <p>The value set by constructor may be updated by {@link #resolveNameCollision(Decoder)},
      * but should not be modified after that point.</p>
      *
      * @see #getIdentifier()
@@ -203,7 +206,7 @@ public final class RasterResource extends AbstractGridResource implements Resour
         final Variable[]     variables = decoder.getVariables().clone();        // Needs a clone because may be modified.
         final List<Variable> siblings  = new ArrayList<>(4);                    // Usually has only 1 element, sometime 2.
         final List<Resource> resources = new ArrayList<>(variables.length);     // The raster resources to be returned.
-        final Map<GenericName,RasterResource> firstOfName = new HashMap<>();    // For detecting name collisions.
+        final Map<GenericName,List<RasterResource>> byName = new HashMap<>();   // For detecting name collisions.
         for (int i=0; i<variables.length; i++) {
             final Variable variable = variables[i];
             if (variable == null || variable.getRole() != VariableRole.COVERAGE) {
@@ -306,40 +309,95 @@ public final class RasterResource extends AbstractGridResource implements Resour
                 numBands = siblings.size();
             }
             final RasterResource r = new RasterResource(decoder, name.trim(), grid, siblings, numBands, bandDimension, lock);
-            r.resolveNameCollision(firstOfName.putIfAbsent(r.identifier, r), decoder);
+            r.addToNameMap(byName);
             resources.add(r);
             siblings.clear();
         }
+        /*
+         * At this point all resources have been prepared. If the same standard name is used by more than one resource,
+         * replace the standard name by the identifier for all resources in collision (an "all or nothing" replacement).
+         * The identifiers should be unique, which should resolve the name collision. We nevertheless check again after
+         * renaming until there is nothing to change.
+         */
+        boolean changed;
+        do {
+            changed = false;
+            List<RasterResource> collisions = null;
+            for (final Iterator<List<RasterResource>> it = byName.values().iterator(); it.hasNext();) {
+                final List<RasterResource> rs = it.next();
+                if (rs.size() >= 2) {
+                    it.remove();                            // Remove resources before to re-insert them in next loop below.
+                    if (collisions == null) {
+                        collisions = rs;
+                    } else {
+                        collisions.addAll(rs);
+                    }
+                }
+            }
+            if (collisions != null) {                       // After above loop because may change the byName` map.
+                for (final RasterResource r : collisions) {
+                    changed |= r.resolveNameCollision(decoder);
+                    r.addToNameMap(byName);                 // For checking if new names cause new collisions.
+                }
+            }
+        }
+        while (changed);
         return resources;
     }
 
     /**
-     * If the given resource is non-null, modifies the name of this resource for avoiding name collision.
-     * The {@code other} resource shall be non-null when the caller detected that there is a name collision
-     * with that resource.
+     * Adds the name of the given resource to the given map. This is used for resolving name collision: any entry
+     * associated to two values or more will have the resources renamed by {@link #resolveNameCollision(Decoder)}.
+     */
+    private void addToNameMap(final Map<GenericName,List<RasterResource>> byName) {
+        byName.computeIfAbsent(identifier, (key) -> new ArrayList<>()).add(this);
+    }
+
+    /**
+     * Invoked when the name of this resource needs to be changed because it collides with the name of another resource.
+     * This method uses the variable name, which should be unique in each netCDF file. If this resource wraps more than
+     * one variable (for example "eastward_velocity" and "northward_velocity"), then this method takes the common part
+     * of all variable names ("velocity" in above example).
      *
-     * @param  other  the other resource for which there is a name collision, or {@code null} if no collision.
+     * @return whether this resource has been renamed as an effect of this method call.
      */
-    private void resolveNameCollision(final RasterResource other, final Decoder decoder) {
-        if (other != null) {
-            if (identifier.equals(other.identifier)) {
-                other.resolveNameCollision(decoder);
-            }
-            resolveNameCollision(decoder);
+    private boolean resolveNameCollision(final Decoder decoder) {
+        String name = null;
+        for (final Variable v : data) {
+            name = (String) CharSequences.commonWords(name, v.getName());
         }
+        if (name == null || name.isEmpty()) {
+            name = data[0].getName();           // If unable to get a common name, fallback on the first one.
+        }
+        final GenericName newValue = decoder.nameFactory.createLocalName(decoder.namespace, name);
+        if (newValue.equals(identifier)) return false;
+        identifier = newValue;
+        return true;
     }
 
     /**
-     * Invoked when the name of this resource needs to be changed because it collides with the name of another resource.
-     * This method appends the variable name, which should be unique in each netCDF file.
+     * Invoked the first time that {@link #getMetadata()} is invoked. Computes metadata based on information
+     * provided by {@link #getIdentifier()}, {@link #getGridGeometry()}, {@link #getSampleDimensions()} and
+     * variable names.
+     *
+     * @param  metadata  the builder where to set metadata properties.
+     * @throws DataStoreException if an error occurred while reading metadata from the data store.
      */
-    private void resolveNameCollision(final Decoder decoder) {
-        String name = identifier + " (" + data[0].getName() + ')';
-        identifier = decoder.nameFactory.createLocalName(decoder.namespace, name);
+    @Override
+    protected void createMetadata(final MetadataBuilder metadata) throws DataStoreException {
+        String title = null;
+        for (final Variable v : data) {
+            title = (String) CharSequences.commonWords(title, v.getDescription());
+            metadata.addIdentifier(v.getGroupName(), v.getName(), MetadataBuilder.Scope.RESOURCE);
+        }
+        if (title != null && !title.isEmpty()) {
+            metadata.addTitle(CharSequences.camelCaseToSentence(title).toString());
+        }
+        super.createMetadata(metadata);
     }
 
     /**
-     * Returns the variable name as an identifier of this resource.
+     * Returns the standard name (if non ambiguous) or the variable name as an identifier of this resource.
      */
     @Override
     public Optional<GenericName> getIdentifier() {
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
index 6ebaad6..bcee136 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
@@ -155,6 +155,8 @@ public abstract class Variable extends Node {
 
     /**
      * Returns the name of this variable. May be used as sample dimension name in a raster.
+     * The variable name should be unique in each netCDF file
+     * (by contrast, {@link #getStandardName()} is not always unique).
      *
      * @return the name of this variable.
      */
@@ -162,24 +164,23 @@ public abstract class Variable extends Node {
     public abstract String getName();
 
     /**
-     * Returns the standard name if available, or the long name other, or the ordinary name otherwise.
-     * May be used as the {@link RasterResource} label, or the label of a {@link Raster} as a whole
-     * (including all bands). Standard name is preferred to variable name when controlled vocabulary
-     * is desired, for example for more stable identifier or more consistency between similar data.
+     * Returns the standard name if available, or the unique variable name otherwise.
+     * May be used for {@link RasterResource#getIdentifier()} and {@link Raster#label}.
+     * Standard name is preferred to variable name when controlled vocabulary is desired,
+     * for example for more stable identifier or more consistency between similar data.
+     *
+     * <p>This method does not check the {@code "long_name"} attribute because the long
+     * name is more like a sentence (e.g. <cite>"model wind direction at 10 m"</cite>)
+     * while standard name and variable name are more like identifiers.
+     * For the long name, use {@link #getDescription()} instead.</p>
      *
      * @return the standard name, or a fallback if there is no standard name.
      *
      * @see RasterResource#identifier
      */
     public final String getStandardName() {
-        String name = getAttributeAsString(CF.STANDARD_NAME);
-        if (name == null) {
-            name = getAttributeAsString(CDM.LONG_NAME);
-            if (name == null) {
-                name = getName();
-            }
-        }
-        return name;
+        final String name = getAttributeAsString(CF.STANDARD_NAME);
+        return (name != null) ? name : getName();
     }
 
     /**
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java
index db9460a..e62ddbf 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java
@@ -262,7 +262,7 @@ next:       for (final String name : axisNames) {
              */
             char abbreviation = getAxisType(axis.getAxisType());
             if (abbreviation == 0) {
-                abbreviation = getAxisType(axis.getAttributeAsString(CF.STANDARD_NAME));
+                abbreviation = getAxisType(axis.getAttributeAsString(CF.STANDARD_NAME));    // No fallback on variable name.
                 /*
                  * If the abbreviation is still unknown, look at the "long_name", "description" or "title" attribute. Those
                  * attributes are not standardized, so they are less reliable than "standard_name". But they are still more
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
index 9f6a095..c0e424e 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
@@ -68,7 +68,8 @@ import org.apache.sis.math.Vector;
 final class VariableInfo extends Variable implements Comparable<VariableInfo> {
     /**
      * The names of attributes where to look for the description to be returned by {@link #getDescription()}.
-     * We use the same attributes than the one documented in the {@link ucar.nc2.Variable#getDescription()} javadoc.
+     * We use the same attributes than the one documented in the {@link ucar.nc2.Variable#getDescription()}
+     * javadoc, in same order.
      */
     private static final String[] DESCRIPTION_ATTRIBUTES = {
         CDM.LONG_NAME,
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java
index dc0504f..d4cd862 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java
@@ -938,8 +938,8 @@ split:  while ((start = CharSequences.skipLeadingWhitespaces(value, start, lengt
             variable.writeDataTypeName(buffer);
             setBandIdentifier(f.createMemberName(null, name, f.createTypeName(null, buffer.toString())));
         }
-        final String id = variable.getAttributeAsString(CF.STANDARD_NAME);
-        if (id != null && !id.equals(name)) {
+        final String id = Strings.trimOrNull(variable.getStandardName());
+        if (!id.equals(name)) {
             addBandName(variable.getAttributeAsString(ACDD.standard_name_vocabulary), id);
         }
         final String description = Strings.trimOrNull(variable.getDescription());
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java
index a803548..0df65f5 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java
@@ -71,8 +71,8 @@ public abstract class AbstractGridResource extends AbstractResource implements G
 
     /**
      * Invoked the first time that {@link #getMetadata()} is invoked. The default implementation populates
-     * metadata based on information provided by {@link #getIdentifier()} and {@link #getGridGeometry()}.
-     * Subclasses should override if they can provide more information.
+     * metadata based on information provided by {@link #getIdentifier()}, {@link #getGridGeometry()} and
+     * {@link #getSampleDimensions()}. Subclasses should override if they can provide more information.
      *
      * @param  metadata  the builder where to set metadata properties.
      * @throws DataStoreException if an error occurred while reading metadata from the data store.
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java
index aa57fe1..a1c8eba 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java
@@ -16,8 +16,10 @@
  */
 package org.apache.sis.internal.storage;
 
+import java.util.Locale;
 import java.util.Optional;
 import org.opengis.util.GenericName;
+import org.opengis.util.InternationalString;
 import org.opengis.metadata.Metadata;
 import org.opengis.geometry.Envelope;
 import org.opengis.referencing.operation.TransformException;
@@ -27,6 +29,8 @@ import org.apache.sis.storage.event.StoreEvent;
 import org.apache.sis.storage.event.StoreListener;
 import org.apache.sis.storage.event.StoreListeners;
 import org.apache.sis.storage.event.WarningEvent;
+import org.apache.sis.util.iso.AbstractInternationalString;
+import org.apache.sis.util.CharSequences;
 
 
 /**
@@ -47,7 +51,7 @@ import org.apache.sis.storage.event.WarningEvent;
  * Synchronization, when needed, uses {@code this} lock.
  *
  * @author  Martin Desruisseaux (Geomatys)
- * @version 1.0
+ * @version 1.1
  * @since   0.8
  * @module
  */
@@ -92,7 +96,7 @@ public class AbstractResource extends StoreListeners implements Resource {
 
     /**
      * Returns a description of this resource. This method invokes {@link #createMetadata(MetadataBuilder)}
-     * the first time it is invoked, then cache the result.
+     * the first time it is invoked, then caches the result.
      *
      * @return information about this resource (never {@code null} in this implementation).
      * @throws DataStoreException if an error occurred while reading or computing the envelope.
@@ -116,7 +120,7 @@ public class AbstractResource extends StoreListeners implements Resource {
      * @throws DataStoreException if an error occurred while reading metadata from the data store.
      */
     protected void createMetadata(final MetadataBuilder metadata) throws DataStoreException {
-        getIdentifier().ifPresent((name) -> metadata.addTitle(name.toInternationalString()));
+        getIdentifier().ifPresent((name) -> metadata.addTitle(new Sentence(name)));
         getEnvelope().ifPresent((envelope) -> {
             try {
                 metadata.addExtent(envelope);
@@ -127,6 +131,36 @@ public class AbstractResource extends StoreListeners implements Resource {
     }
 
     /**
+     * An international string where localized identifiers are formatted more like an English sentence.
+     * This is used for wrapping {@link GenericName#toInternationalString()} representation for use as
+     * a citation title.
+     */
+    private static final class Sentence extends AbstractInternationalString {
+        /** The generic name localized representation. */
+        private final InternationalString name;
+
+        /** Returns a new wrapper for the given generic name. */
+        Sentence(final GenericName name) {
+            this.name = name.toInternationalString();
+        }
+
+        /** Returns the generic name as an English-like sentence. */
+        @Override public String toString(final Locale locale) {
+            return CharSequences.camelCaseToSentence(name.toString(locale)).toString();
+        }
+
+        /** Returns a hash code value for this sentence. */
+        @Override public int hashCode() {
+            return ~name.hashCode();
+        }
+
+        /** Compares the given object with this sentence for equality. */
+        @Override public boolean equals(final Object other) {
+            return (other instanceof Sentence) && name.equals(((Sentence) other).name);
+        }
+    }
+
+    /**
      * Clears any cache in this resource, forcing the data to be recomputed when needed again.
      * This method should be invoked if the data in underlying data store changed.
      */
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java
index db05e2b..ce758ad 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java
@@ -68,7 +68,7 @@ public class ConcatenatedFeatureSet extends AggregatedFeatureSet {
 
     /**
      * Creates a new concatenated feature set with the same types than the given feature set,
-     * but different sources.
+     * but different sources. This is used for creating {@linkplain #subset(Query) subsets}.
      */
     private ConcatenatedFeatureSet(final FeatureSet[] sources, final ConcatenatedFeatureSet original) {
         super(original);
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java
index 6185215..2a33f3b 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java
@@ -130,6 +130,7 @@ import org.apache.sis.internal.util.Numerics;
 import org.apache.sis.internal.util.Strings;
 import org.apache.sis.util.ArgumentChecks;
 import org.apache.sis.util.CharSequences;
+import org.apache.sis.util.Characters;
 import org.apache.sis.util.iso.Names;
 import org.apache.sis.util.iso.Types;
 import org.apache.sis.measure.Units;
@@ -150,7 +151,7 @@ import org.opengis.metadata.citation.Responsibility;
  * @author  Martin Desruisseaux (Geomatys)
  * @author  Rémi Maréchal (Geomatys)
  * @author  Thi Phuong Hao Nguyen (VNSC)
- * @version 1.0
+ * @version 1.1
  * @since   0.8
  * @module
  */
@@ -913,8 +914,9 @@ public class MetadataBuilder {
     /**
      * Adds a resource (data) identifier, a metadata identifier, or both as they are often the same.
      * The identifier is added only if {@code code} is non-null, regardless other argument values.
-     * Empty strings (ignoring spaces) are ignored.
-     * Storages locations are:
+     * Empty strings (ignoring spaces) are considered as null.
+     * The identifier is not added if already presents.
+     * Storage locations are:
      *
      * <ul>
      *   <li><b>Metadata:</b> {@code metadata/metadataIdentifier}</li>
@@ -1139,7 +1141,10 @@ public class MetadataBuilder {
     }
 
     /**
-     * Adds a title or alternate title of the resource.
+     * Adds a title or alternate title of the resource, if not already present.
+     * This operation does nothing if the title is already defined and the given
+     * title is already used as an identifier (this policy is a complement of the
+     * {@link #addTitleOrIdentifier(String, Scope)} behavior).
      * Storage location is:
      *
      * <ul>
@@ -1159,6 +1164,11 @@ public class MetadataBuilder {
             if (current == null) {
                 citation.setTitle(i18n);
             } else if (!equals(current, i18n)) {
+                for (final Identifier id : citation.getIdentifiers()) {
+                    if (CharSequences.equalsFiltered(title, id.getCode(), Characters.Filter.LETTERS_AND_DIGITS, true)) {
+                        return;
+                    }
+                }
                 addIfNotPresent(citation.getAlternateTitles(), i18n);
             }
         }
diff --git a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java
index a6006ab..7841638 100644
--- a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java
+++ b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java
@@ -19,6 +19,7 @@ package org.apache.sis.internal.storage;
 import java.util.Arrays;
 import java.util.Collections;
 import org.opengis.metadata.Metadata;
+import org.opengis.metadata.lineage.Lineage;
 import org.opengis.metadata.content.FeatureCatalogueDescription;
 import org.apache.sis.feature.builder.FeatureTypeBuilder;
 import org.apache.sis.storage.DataStoreContentException;
@@ -68,8 +69,8 @@ public final strictfp class ConcatenatedFeatureSetTest extends TestCase {
         final Metadata md = cfs.getMetadata();
         assertNotNull("getMetadata()", md);
         assertContentInfoEquals("City", 3, (FeatureCatalogueDescription) getSingleton(md.getContentInfo()));
-        assertFeatureSourceEquals("City", new String[] {"City"},
-                getSingleton(getSingleton(md.getResourceLineages()).getSources()));
+        final Lineage lineage = getSingleton(md.getResourceLineages());
+        assertFeatureSourceEquals("City", new String[] {"City"}, getSingleton(lineage.getSources()));
     }
 
     /**


Mime
View raw message