This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git The following commit(s) were added to refs/heads/geoapi-4.0 by this push: new b37fde6 Try to produce better identifiers and title for netCDF variables. The identifier excludes the "long_name" attribute, which is sometime more like a sentence than an identifier (the standard name or variable name provides more stable identifiers). Conversely the `metadata.citation.title` uses the "long_name" attribute as first choice, formatted in a way more like English sentence (with '_' and CamelCase replaced by space). Variable names are declared as citation identifier [...] b37fde6 is described below commit b37fde605db9f8f72c2a1e18f9b16afafc4d4c8d Author: Martin Desruisseaux AuthorDate: Mon Mar 16 17:30:11 2020 +0100 Try to produce better identifiers and title for netCDF variables. The identifier excludes the "long_name" attribute, which is sometime more like a sentence than an identifier (the standard name or variable name provides more stable identifiers). Conversely the `metadata.citation.title` uses the "long_name" attribute as first choice, formatted in a way more like English sentence (with '_' and CamelCase replaced by space). Variable names are declared as citation identifiers and shown in the summary panel. --- .../sis/gui/metadata/IdentificationInfo.java | 30 ++++ .../org/apache/sis/internal/gui/Resources.java | 5 + .../apache/sis/internal/gui/Resources.properties | 1 + .../sis/internal/gui/Resources_fr.properties | 1 + .../java/org/apache/sis/util/CharSequences.java | 181 +++++++++++++++++++-- .../org/apache/sis/util/CharSequencesTest.java | 26 ++- .../apache/sis/internal/netcdf/RasterResource.java | 96 ++++++++--- .../org/apache/sis/internal/netcdf/Variable.java | 25 +-- .../apache/sis/internal/netcdf/impl/GridInfo.java | 2 +- .../sis/internal/netcdf/impl/VariableInfo.java | 3 +- .../apache/sis/storage/netcdf/MetadataReader.java | 4 +- .../sis/internal/storage/AbstractGridResource.java | 4 +- .../sis/internal/storage/AbstractResource.java | 40 ++++- .../internal/storage/ConcatenatedFeatureSet.java | 2 +- .../sis/internal/storage/MetadataBuilder.java | 18 +- .../storage/ConcatenatedFeatureSetTest.java | 5 +- 16 files changed, 378 insertions(+), 65 deletions(-) diff --git a/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java b/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java index eecfca6..bebc66f 100644 --- a/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java +++ b/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java @@ -17,6 +17,7 @@ package org.apache.sis.gui.metadata; import java.util.Date; +import java.util.StringJoiner; import javafx.geometry.HPos; import javafx.scene.canvas.Canvas; import javafx.scene.canvas.GraphicsContext; @@ -53,6 +54,18 @@ import static org.apache.sis.internal.util.CollectionsExt.nonNull; * The pane where to show the values of {@link Identification} objects. * The same pane can be used for an arbitrary amount of identifications. * Each instance is identified by its title. + * The content is: + * + *
    + *
  1. The title in bold font.
  2. + *
  3. Identifiers.
  4. + *
  5. Abstract, or purpose, or credit (in this preference order).
  6. + *
  7. Topic category.
  8. + *
  9. Release date, or publication date, or creation date, or any date (in this preference order).
  10. + *
  11. Type of resource.
  12. + *
  13. Spatiotemporal extent as a textual description.
  14. + *
  15. Extent shown as a rectangle on a world map.
  16. + *
* * @author Smaniotto Enzo (GSoC) * @author Martin Desruisseaux (Geomatys) @@ -147,6 +160,7 @@ final class IdentificationInfo extends Section { /** * Invoked when new identification information should be shown. * This method updates all fields in this section with the content of given identification information. + * The content is summarized in {@linkplain IdentificationInfo class javadoc}. */ @Override void buildContent(final Identification info) { @@ -166,6 +180,19 @@ final class IdentificationInfo extends Section { } title.setText(text); /* + * Identifiers as a comma-separated list on a single line. Each identifier + * is formatted as "codespace:code" or only "code" if there is no codespace. + */ + if (citation != null) { + final StringJoiner buffer = new StringJoiner(", "); + for (final Identifier id : citation.getIdentifiers()) { + buffer.add(IdentifiedObjects.toString(id)); + } + if (buffer.length() != 0) { + addLine(Resources.Keys.Identifiers, buffer.toString()); + } + } + /* * The abstract, or if there is no abstract the purpose, or if no purpose the credit as a fallback. * We use those fallback because they can provide some hints about the product. * The topic category (climatology, health, etc.) follows. @@ -186,6 +213,9 @@ final class IdentificationInfo extends Section { } } addLine(label, text); + /* + * Topic category. + */ addLine(Resources.Keys.TopicCategory, owner.string(nonNull(info.getTopicCategories()))); /* * Select a single, arbitrary date. We take the release or publication date if available. diff --git a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java index 4060265..ff0a748 100644 --- a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java +++ b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java @@ -211,6 +211,11 @@ public final class Resources extends IndexedResourceBundle { public static final short GeospatialFiles = 4; /** + * Identifiers: + */ + public static final short Identifiers = 54; + + /** * Loading… */ public static final short Loading = 7; diff --git a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties index 7af6c69..d9f07a1 100644 --- a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties +++ b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties @@ -51,6 +51,7 @@ Format = Format: FromMetadata = From metadata FullScreen = Full screen GeospatialFiles = Geospatial data files +Identifiers = Identifiers: Loading = Loading\u2026 MainWindow = Main window Metadata = Metadata diff --git a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties index 48bb9b3..4b7c231 100644 --- a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties +++ b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties @@ -56,6 +56,7 @@ Format = Format\u00a0: FromMetadata = Des m\u00e9ta-donn\u00e9es FullScreen = Plein \u00e9cran GeospatialFiles = Fichiers de donn\u00e9es g\u00e9ospatiales +Identifiers = Identifiants\u00a0: Loading = Chargement\u2026 MainWindow = Fen\u00eatre principale Metadata = Metadonn\u00e9es diff --git a/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java b/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java index 7448785..8802403 100644 --- a/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java +++ b/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java @@ -72,7 +72,7 @@ import static java.lang.Character.*; * {@code 0} or {@code false} primitive type calculated as if the input was an empty string. * * @author Martin Desruisseaux (Geomatys) - * @version 1.0 + * @version 1.1 * * @see StringBuilders * @@ -546,9 +546,9 @@ search: for (; fromIndex <= toIndex; fromIndex++) { */ public static int skipLeadingWhitespaces(final CharSequence text, int fromIndex, final int toIndex) { while (fromIndex < toIndex) { - final int c = Character.codePointAt(text, fromIndex); - if (!Character.isWhitespace(c)) break; - fromIndex += Character.charCount(c); + final int c = codePointAt(text, fromIndex); + if (!isWhitespace(c)) break; + fromIndex += charCount(c); } return fromIndex; } @@ -584,9 +584,9 @@ search: for (; fromIndex <= toIndex; fromIndex++) { */ public static int skipTrailingWhitespaces(final CharSequence text, final int fromIndex, int toIndex) { while (toIndex > fromIndex) { - final int c = Character.codePointBefore(text, toIndex); - if (!Character.isWhitespace(c)) break; - toIndex -= Character.charCount(c); + final int c = codePointBefore(text, toIndex); + if (!isWhitespace(c)) break; + toIndex -= charCount(c); } return toIndex; } @@ -1187,16 +1187,16 @@ searchWordBreak: while (true) { final StringBuilder buffer = new StringBuilder(identifier.length()); final int length = identifier.length(); for (int i=0; iThe purpose of this method is to create a global identifier from a list of component identifiers. + * The later are often eastward and northward components of a vector, in which case this method provides + * an identifier for the vector as a whole.

+ * + *
Example: + * given the following inputs: + *
    + *
  • {@code "baroclinic_eastward_velocity"}
  • + *
  • {@code "baroclinic_northward_velocity"}
  • + *
+ * This method returns {@code "baroclinic_velocity"}. Note that the {@code "ward"} characters + * are a common suffix of both texts but nevertheless omitted because they cut a word.
+ * + *

If one of those texts is {@code null}, then the other text is returned. + * If there is no common words, then this method returns an empty string.

+ * + *

Possible future evolution

+ * Current implementation searches only for a common prefix and a common suffix, ignoring any common words + * that may appear in the middle of the strings. A character is considered the beginning of a word if it is + * {@linkplain Character#isLetterOrDigit(int) a letter or digit} which is not preceded by another letter or + * digit (as leading "s" and "c" in "snake_case"), or if it is an {@linkplain Character#isUpperCase(int) + * upper case} letter preceded by a {@linkplain Character#isLowerCase(int) lower case} letter or no letter + * (as both "C" in "CamelCase"). + * + * @param s1 the first text, or {@code null}. + * @param s2 the second text, or {@code null}. + * @return the common suffix of both texts (may be empty), or {@code null} if both texts are null. + * + * @since 1.1 + */ + public static CharSequence commonWords(final CharSequence s1, final CharSequence s2) { + final int lg1 = length(s1); + final int lg2 = length(s2); + final int shortestLength = Math.min(lg1, lg2); // 0 if s1 or s2 is null, in which case prefix and suffix will have the other value. + final CharSequence prefix = commonPrefix(s1, s2); int prefixLength = length(prefix); if (prefixLength >= shortestLength) return prefix; + final CharSequence suffix = commonSuffix(s1, s2); int suffixLength = length(suffix); if (suffixLength >= shortestLength) return suffix; + final int length = prefixLength + suffixLength; + if (length >= lg1) return s1; // Check if one of the strings is already equal to prefix + suffix. + if (length >= lg2) return s2; + /* + * At this point `s1` and `s2` contain at least one character between the prefix and the suffix. + * If the prefix or the suffix seems to stop in the middle of a word, skip the remaining of that word. + * For example if `s1` and `s2` are "eastward_velocity" and "northward_velocity", the common suffix is + * "ward_velocity" but we want to retain only "velocity". + * + * The first condition below (before the loop) checks the character after the common prefix (for example "e" + * in "baroclinic_eastward_velocity" if the prefix is "baroclinic_"). The intent is to handle the case where + * the word separator is not the same (e.g. "baroclinic_eastward_velocity" and "baroclinic northward velocity", + * in which case the '_' or ' ' character would not appear in the prefix). + */ + if (!isWordBoundary(s1, prefixLength, codePointAt(s1, prefixLength)) && + !isWordBoundary(s2, prefixLength, codePointAt(s2, prefixLength))) + { + while (prefixLength > 0) { + final int c = codePointBefore(prefix, prefixLength); + final int n = charCount(c); + prefixLength -= n; + if (isWordBoundary(prefix, prefixLength, c)) { + if (!isLetterOrDigit(c)) prefixLength += n; // Keep separator character. + break; + } + } + } + /* + * Same process than for the prefix above. The condition before the loop checks the character before suffix + * for the same reason than above, but using only `isLetterOrDigit` ignoring camel-case. The reason is that + * if the character before was a word separator according camel-case convention (i.e. an upper-case letter), + * we would need to include it in the common suffix. + */ + int suffixStart = 0; + if (isLetterOrDigit(codePointBefore(s1, lg1 - suffixLength)) && + isLetterOrDigit(codePointBefore(s2, lg2 - suffixLength))) + { + while (suffixStart < suffixLength) { + final int c = codePointAt(suffix, suffixStart); + if (isWordBoundary(suffix, suffixStart, c)) break; + suffixStart += charCount(c); + } + } + /* + * At this point we got the final prefix and suffix to use. If the prefix or suffix is empty, + * trim whitespaces or '_' character. For example if the suffix is "_velocity" and no prefix, + * return "velocity" without leading "_" character. + */ + if (prefixLength == 0) { + while (suffixStart < suffixLength) { + final int c = codePointAt(suffix, suffixStart); + if (isLetterOrDigit(c)) { + return suffix.subSequence(suffixStart, suffixLength); // Skip leading ignorable characters in suffix. + } + suffixStart += charCount(c); + } + return ""; + } + if (suffixStart >= suffixLength) { + while (prefixLength > 0) { + final int c = codePointBefore(prefix, prefixLength); + if (isLetterOrDigit(c)) { + return prefix.subSequence(0, prefixLength); // Skip trailing ignorable characters in prefix. + } + prefixLength -= charCount(c); + } + return ""; + } + /* + * All special cases have been examined. Return the concatenation of (possibly shortened) + * common prefix and suffix. + */ + final StringBuilder buffer = new StringBuilder(prefixLength + suffixLength).append(prefix); + final int c1 = codePointBefore(prefix, prefixLength); + final int c2 = codePointAt(suffix, suffixStart); + if (isLetterOrDigit(c1) && isLetterOrDigit(c2)) { + if (!Character.isUpperCase(c2) || !isLowerCase(c1)) { + buffer.append(' '); // Keep a separator between two words (except if CamelCase is used). + } + } else if (c1 == c2) { + suffixStart += charCount(c2); // Avoid repeating '_' in e.g. "baroclinic__velocity". + } + return buffer.append(suffix, suffixStart, suffixLength).toString(); + } + + /** + * Returns {@code true} if the character {@code c} is the beginning of a word or a non-word character. + * For example this method returns {@code true} if {@code c} is {@code '_'} in {@code "snake_case"} or + * {@code "C"} in {@code "CamelCase"}. + * + * @param s the character sequence from which the {@code c} character has been obtained. + * @param i the index in {@code s} where the {@code c} character has been obtained. + * @param c the code point in {@code s} as index {@code i}. + * @return whether the given character is the beginning of a word or a non-word character. + */ + private static boolean isWordBoundary(final CharSequence s, final int i, final int c) { + if (!isLetterOrDigit(c)) return true; + if (!Character.isUpperCase(c)) return false; + return (i <= 0 || isLowerCase(codePointBefore(s, i))); + } + + /** * Returns the token starting at the given offset in the given text. For the purpose of this * method, a "token" is any sequence of consecutive characters of the same type, as defined * below. @@ -2085,8 +2230,10 @@ cmp: while (ia < lga) { } else if (src instanceof CharBuffer) { ((CharBuffer) src).subSequence(srcOffset, srcOffset + length).get(dst, dstOffset, length); } else { - // An other candidate could be javax.swing.text.Segment, but it - // is probably not worth to introduce a Swing dependency for it. + /* + * Another candidate could be `javax.swing.text.Segment`, but it + * is probably not worth to introduce a Swing dependency for it. + */ while (length != 0) { dst[dstOffset++] = src.charAt(srcOffset++); length--; diff --git a/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java b/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java index 34b164d..5686cea 100644 --- a/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java +++ b/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java @@ -33,7 +33,7 @@ import static org.apache.sis.util.CharSequences.*; * * @author Martin Desruisseaux (Geomatys) * @author Johann Sorel (Geomatys) - * @version 1.0 + * @version 1.1 * @since 0.3 * @module */ @@ -507,6 +507,10 @@ public final strictfp class CharSequencesTest extends TestCase { @Test public void testCommonPrefix() { assertEquals("testCommon", commonPrefix(new StringBuilder("testCommonPrefix()"), "testCommonSuffix()")); + assertEquals("", commonPrefix( "testCommonPrefix()", "unrelated")); + assertEquals("", commonPrefix( "", "unrelated")); + assertEquals("", commonPrefix( "", "")); + assertEquals("equal", commonPrefix( "equal", "equal")); } /** @@ -515,6 +519,26 @@ public final strictfp class CharSequencesTest extends TestCase { @Test public void testCommonSuffix() { assertEquals("fix()", commonSuffix(new StringBuilder("testCommonPrefix()"), "testCommonSuffix()")); + assertEquals("", commonSuffix( "testCommonPrefix()", "unrelated")); + assertEquals("", commonSuffix( "", "unrelated")); + assertEquals("", commonSuffix( "", "")); + assertEquals("equal", commonSuffix( "equal", "equal")); + } + + /** + * Tests the {@link CharSequences#commonWords(CharSequence, CharSequence)} method. + */ + @Test + public void testCommonWords() { + assertSame ("baroclinic_eastward_velocity", commonWords("baroclinic_eastward_velocity", null)); + assertSame ("baroclinic_velocity", commonWords("baroclinic_eastward_velocity", "baroclinic_velocity")); + assertEquals("baroclinic_velocity", commonWords("baroclinic_eastward_velocity", "baroclinic_northward_velocity")); + assertEquals("baroclinic velocity", commonWords("baroclinic_eastward_velocity", "baroclinic northward velocity")); + assertEquals("baroclinic", commonWords("baroclinic_eastward", "baroclinic_northward")); + assertEquals("velocity", commonWords("eastward_velocity", "northward_velocity")); + assertEquals("BaroclinicVelocity", commonWords("BaroclinicEastwardVelocity", "BaroclinicNorthwardVelocity")); + assertEquals("Baroclinic", commonWords("BaroclinicEastward", "BaroclinicNorthward")); + assertEquals("Velocity", commonWords("EastwardVelocity", "NorthwardVelocity")); } /** diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java index e1c4adb..19f0460 100644 --- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java +++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java @@ -20,6 +20,7 @@ import java.util.Map; import java.util.List; import java.util.HashMap; import java.util.ArrayList; +import java.util.Iterator; import java.util.Optional; import java.io.IOException; import java.nio.file.Path; @@ -53,9 +54,11 @@ import org.apache.sis.measure.MeasurementRange; import org.apache.sis.measure.NumberRange; import org.apache.sis.math.Vector; import org.apache.sis.util.Numbers; +import org.apache.sis.util.CharSequences; import org.apache.sis.util.resources.Errors; import org.apache.sis.util.resources.Vocabulary; import org.apache.sis.internal.jdk9.JDK9; +import org.apache.sis.internal.storage.MetadataBuilder; /** @@ -100,7 +103,7 @@ public final class RasterResource extends AbstractGridResource implements Resour * than variable name because the former is controlled vocabulary. The use of controlled vocabulary for identifiers increases * the chances of stability or consistency between similar products. * - *

The value set by constructor may be updated by {@link #resolveNameCollision(RasterResource, Decoder)}, + *

The value set by constructor may be updated by {@link #resolveNameCollision(Decoder)}, * but should not be modified after that point.

* * @see #getIdentifier() @@ -203,7 +206,7 @@ public final class RasterResource extends AbstractGridResource implements Resour final Variable[] variables = decoder.getVariables().clone(); // Needs a clone because may be modified. final List siblings = new ArrayList<>(4); // Usually has only 1 element, sometime 2. final List resources = new ArrayList<>(variables.length); // The raster resources to be returned. - final Map firstOfName = new HashMap<>(); // For detecting name collisions. + final Map> byName = new HashMap<>(); // For detecting name collisions. for (int i=0; i collisions = null; + for (final Iterator> it = byName.values().iterator(); it.hasNext();) { + final List rs = it.next(); + if (rs.size() >= 2) { + it.remove(); // Remove resources before to re-insert them in next loop below. + if (collisions == null) { + collisions = rs; + } else { + collisions.addAll(rs); + } + } + } + if (collisions != null) { // After above loop because may change the byName` map. + for (final RasterResource r : collisions) { + changed |= r.resolveNameCollision(decoder); + r.addToNameMap(byName); // For checking if new names cause new collisions. + } + } + } + while (changed); return resources; } /** - * If the given resource is non-null, modifies the name of this resource for avoiding name collision. - * The {@code other} resource shall be non-null when the caller detected that there is a name collision - * with that resource. + * Adds the name of the given resource to the given map. This is used for resolving name collision: any entry + * associated to two values or more will have the resources renamed by {@link #resolveNameCollision(Decoder)}. + */ + private void addToNameMap(final Map> byName) { + byName.computeIfAbsent(identifier, (key) -> new ArrayList<>()).add(this); + } + + /** + * Invoked when the name of this resource needs to be changed because it collides with the name of another resource. + * This method uses the variable name, which should be unique in each netCDF file. If this resource wraps more than + * one variable (for example "eastward_velocity" and "northward_velocity"), then this method takes the common part + * of all variable names ("velocity" in above example). * - * @param other the other resource for which there is a name collision, or {@code null} if no collision. + * @return whether this resource has been renamed as an effect of this method call. */ - private void resolveNameCollision(final RasterResource other, final Decoder decoder) { - if (other != null) { - if (identifier.equals(other.identifier)) { - other.resolveNameCollision(decoder); - } - resolveNameCollision(decoder); + private boolean resolveNameCollision(final Decoder decoder) { + String name = null; + for (final Variable v : data) { + name = (String) CharSequences.commonWords(name, v.getName()); } + if (name == null || name.isEmpty()) { + name = data[0].getName(); // If unable to get a common name, fallback on the first one. + } + final GenericName newValue = decoder.nameFactory.createLocalName(decoder.namespace, name); + if (newValue.equals(identifier)) return false; + identifier = newValue; + return true; } /** - * Invoked when the name of this resource needs to be changed because it collides with the name of another resource. - * This method appends the variable name, which should be unique in each netCDF file. + * Invoked the first time that {@link #getMetadata()} is invoked. Computes metadata based on information + * provided by {@link #getIdentifier()}, {@link #getGridGeometry()}, {@link #getSampleDimensions()} and + * variable names. + * + * @param metadata the builder where to set metadata properties. + * @throws DataStoreException if an error occurred while reading metadata from the data store. */ - private void resolveNameCollision(final Decoder decoder) { - String name = identifier + " (" + data[0].getName() + ')'; - identifier = decoder.nameFactory.createLocalName(decoder.namespace, name); + @Override + protected void createMetadata(final MetadataBuilder metadata) throws DataStoreException { + String title = null; + for (final Variable v : data) { + title = (String) CharSequences.commonWords(title, v.getDescription()); + metadata.addIdentifier(v.getGroupName(), v.getName(), MetadataBuilder.Scope.RESOURCE); + } + if (title != null && !title.isEmpty()) { + metadata.addTitle(CharSequences.camelCaseToSentence(title).toString()); + } + super.createMetadata(metadata); } /** - * Returns the variable name as an identifier of this resource. + * Returns the standard name (if non ambiguous) or the variable name as an identifier of this resource. */ @Override public Optional getIdentifier() { diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java index 6ebaad6..bcee136 100644 --- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java +++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java @@ -155,6 +155,8 @@ public abstract class Variable extends Node { /** * Returns the name of this variable. May be used as sample dimension name in a raster. + * The variable name should be unique in each netCDF file + * (by contrast, {@link #getStandardName()} is not always unique). * * @return the name of this variable. */ @@ -162,24 +164,23 @@ public abstract class Variable extends Node { public abstract String getName(); /** - * Returns the standard name if available, or the long name other, or the ordinary name otherwise. - * May be used as the {@link RasterResource} label, or the label of a {@link Raster} as a whole - * (including all bands). Standard name is preferred to variable name when controlled vocabulary - * is desired, for example for more stable identifier or more consistency between similar data. + * Returns the standard name if available, or the unique variable name otherwise. + * May be used for {@link RasterResource#getIdentifier()} and {@link Raster#label}. + * Standard name is preferred to variable name when controlled vocabulary is desired, + * for example for more stable identifier or more consistency between similar data. + * + *

This method does not check the {@code "long_name"} attribute because the long + * name is more like a sentence (e.g. "model wind direction at 10 m") + * while standard name and variable name are more like identifiers. + * For the long name, use {@link #getDescription()} instead.

* * @return the standard name, or a fallback if there is no standard name. * * @see RasterResource#identifier */ public final String getStandardName() { - String name = getAttributeAsString(CF.STANDARD_NAME); - if (name == null) { - name = getAttributeAsString(CDM.LONG_NAME); - if (name == null) { - name = getName(); - } - } - return name; + final String name = getAttributeAsString(CF.STANDARD_NAME); + return (name != null) ? name : getName(); } /** diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java index db9460a..e62ddbf 100644 --- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java +++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java @@ -262,7 +262,7 @@ next: for (final String name : axisNames) { */ char abbreviation = getAxisType(axis.getAxisType()); if (abbreviation == 0) { - abbreviation = getAxisType(axis.getAttributeAsString(CF.STANDARD_NAME)); + abbreviation = getAxisType(axis.getAttributeAsString(CF.STANDARD_NAME)); // No fallback on variable name. /* * If the abbreviation is still unknown, look at the "long_name", "description" or "title" attribute. Those * attributes are not standardized, so they are less reliable than "standard_name". But they are still more diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java index 9f6a095..c0e424e 100644 --- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java +++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java @@ -68,7 +68,8 @@ import org.apache.sis.math.Vector; final class VariableInfo extends Variable implements Comparable { /** * The names of attributes where to look for the description to be returned by {@link #getDescription()}. - * We use the same attributes than the one documented in the {@link ucar.nc2.Variable#getDescription()} javadoc. + * We use the same attributes than the one documented in the {@link ucar.nc2.Variable#getDescription()} + * javadoc, in same order. */ private static final String[] DESCRIPTION_ATTRIBUTES = { CDM.LONG_NAME, diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java index dc0504f..d4cd862 100644 --- a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java +++ b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java @@ -938,8 +938,8 @@ split: while ((start = CharSequences.skipLeadingWhitespaces(value, start, lengt variable.writeDataTypeName(buffer); setBandIdentifier(f.createMemberName(null, name, f.createTypeName(null, buffer.toString()))); } - final String id = variable.getAttributeAsString(CF.STANDARD_NAME); - if (id != null && !id.equals(name)) { + final String id = Strings.trimOrNull(variable.getStandardName()); + if (!id.equals(name)) { addBandName(variable.getAttributeAsString(ACDD.standard_name_vocabulary), id); } final String description = Strings.trimOrNull(variable.getDescription()); diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java index a803548..0df65f5 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java @@ -71,8 +71,8 @@ public abstract class AbstractGridResource extends AbstractResource implements G /** * Invoked the first time that {@link #getMetadata()} is invoked. The default implementation populates - * metadata based on information provided by {@link #getIdentifier()} and {@link #getGridGeometry()}. - * Subclasses should override if they can provide more information. + * metadata based on information provided by {@link #getIdentifier()}, {@link #getGridGeometry()} and + * {@link #getSampleDimensions()}. Subclasses should override if they can provide more information. * * @param metadata the builder where to set metadata properties. * @throws DataStoreException if an error occurred while reading metadata from the data store. diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java index aa57fe1..a1c8eba 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java @@ -16,8 +16,10 @@ */ package org.apache.sis.internal.storage; +import java.util.Locale; import java.util.Optional; import org.opengis.util.GenericName; +import org.opengis.util.InternationalString; import org.opengis.metadata.Metadata; import org.opengis.geometry.Envelope; import org.opengis.referencing.operation.TransformException; @@ -27,6 +29,8 @@ import org.apache.sis.storage.event.StoreEvent; import org.apache.sis.storage.event.StoreListener; import org.apache.sis.storage.event.StoreListeners; import org.apache.sis.storage.event.WarningEvent; +import org.apache.sis.util.iso.AbstractInternationalString; +import org.apache.sis.util.CharSequences; /** @@ -47,7 +51,7 @@ import org.apache.sis.storage.event.WarningEvent; * Synchronization, when needed, uses {@code this} lock. * * @author Martin Desruisseaux (Geomatys) - * @version 1.0 + * @version 1.1 * @since 0.8 * @module */ @@ -92,7 +96,7 @@ public class AbstractResource extends StoreListeners implements Resource { /** * Returns a description of this resource. This method invokes {@link #createMetadata(MetadataBuilder)} - * the first time it is invoked, then cache the result. + * the first time it is invoked, then caches the result. * * @return information about this resource (never {@code null} in this implementation). * @throws DataStoreException if an error occurred while reading or computing the envelope. @@ -116,7 +120,7 @@ public class AbstractResource extends StoreListeners implements Resource { * @throws DataStoreException if an error occurred while reading metadata from the data store. */ protected void createMetadata(final MetadataBuilder metadata) throws DataStoreException { - getIdentifier().ifPresent((name) -> metadata.addTitle(name.toInternationalString())); + getIdentifier().ifPresent((name) -> metadata.addTitle(new Sentence(name))); getEnvelope().ifPresent((envelope) -> { try { metadata.addExtent(envelope); @@ -127,6 +131,36 @@ public class AbstractResource extends StoreListeners implements Resource { } /** + * An international string where localized identifiers are formatted more like an English sentence. + * This is used for wrapping {@link GenericName#toInternationalString()} representation for use as + * a citation title. + */ + private static final class Sentence extends AbstractInternationalString { + /** The generic name localized representation. */ + private final InternationalString name; + + /** Returns a new wrapper for the given generic name. */ + Sentence(final GenericName name) { + this.name = name.toInternationalString(); + } + + /** Returns the generic name as an English-like sentence. */ + @Override public String toString(final Locale locale) { + return CharSequences.camelCaseToSentence(name.toString(locale)).toString(); + } + + /** Returns a hash code value for this sentence. */ + @Override public int hashCode() { + return ~name.hashCode(); + } + + /** Compares the given object with this sentence for equality. */ + @Override public boolean equals(final Object other) { + return (other instanceof Sentence) && name.equals(((Sentence) other).name); + } + } + + /** * Clears any cache in this resource, forcing the data to be recomputed when needed again. * This method should be invoked if the data in underlying data store changed. */ diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java index db05e2b..ce758ad 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java @@ -68,7 +68,7 @@ public class ConcatenatedFeatureSet extends AggregatedFeatureSet { /** * Creates a new concatenated feature set with the same types than the given feature set, - * but different sources. + * but different sources. This is used for creating {@linkplain #subset(Query) subsets}. */ private ConcatenatedFeatureSet(final FeatureSet[] sources, final ConcatenatedFeatureSet original) { super(original); diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java index 6185215..2a33f3b 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java @@ -130,6 +130,7 @@ import org.apache.sis.internal.util.Numerics; import org.apache.sis.internal.util.Strings; import org.apache.sis.util.ArgumentChecks; import org.apache.sis.util.CharSequences; +import org.apache.sis.util.Characters; import org.apache.sis.util.iso.Names; import org.apache.sis.util.iso.Types; import org.apache.sis.measure.Units; @@ -150,7 +151,7 @@ import org.opengis.metadata.citation.Responsibility; * @author Martin Desruisseaux (Geomatys) * @author Rémi Maréchal (Geomatys) * @author Thi Phuong Hao Nguyen (VNSC) - * @version 1.0 + * @version 1.1 * @since 0.8 * @module */ @@ -913,8 +914,9 @@ public class MetadataBuilder { /** * Adds a resource (data) identifier, a metadata identifier, or both as they are often the same. * The identifier is added only if {@code code} is non-null, regardless other argument values. - * Empty strings (ignoring spaces) are ignored. - * Storages locations are: + * Empty strings (ignoring spaces) are considered as null. + * The identifier is not added if already presents. + * Storage locations are: * *
    *
  • Metadata: {@code metadata/metadataIdentifier}
  • @@ -1139,7 +1141,10 @@ public class MetadataBuilder { } /** - * Adds a title or alternate title of the resource. + * Adds a title or alternate title of the resource, if not already present. + * This operation does nothing if the title is already defined and the given + * title is already used as an identifier (this policy is a complement of the + * {@link #addTitleOrIdentifier(String, Scope)} behavior). * Storage location is: * *
      @@ -1159,6 +1164,11 @@ public class MetadataBuilder { if (current == null) { citation.setTitle(i18n); } else if (!equals(current, i18n)) { + for (final Identifier id : citation.getIdentifiers()) { + if (CharSequences.equalsFiltered(title, id.getCode(), Characters.Filter.LETTERS_AND_DIGITS, true)) { + return; + } + } addIfNotPresent(citation.getAlternateTitles(), i18n); } } diff --git a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java index a6006ab..7841638 100644 --- a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java +++ b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java @@ -19,6 +19,7 @@ package org.apache.sis.internal.storage; import java.util.Arrays; import java.util.Collections; import org.opengis.metadata.Metadata; +import org.opengis.metadata.lineage.Lineage; import org.opengis.metadata.content.FeatureCatalogueDescription; import org.apache.sis.feature.builder.FeatureTypeBuilder; import org.apache.sis.storage.DataStoreContentException; @@ -68,8 +69,8 @@ public final strictfp class ConcatenatedFeatureSetTest extends TestCase { final Metadata md = cfs.getMetadata(); assertNotNull("getMetadata()", md); assertContentInfoEquals("City", 3, (FeatureCatalogueDescription) getSingleton(md.getContentInfo())); - assertFeatureSourceEquals("City", new String[] {"City"}, - getSingleton(getSingleton(md.getResourceLineages()).getSources())); + final Lineage lineage = getSingleton(md.getResourceLineages()); + assertFeatureSourceEquals("City", new String[] {"City"}, getSingleton(lineage.getSources())); } /**