This is an automated email from the ASF dual-hosted git repository.
desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git
The following commit(s) were added to refs/heads/geoapi-4.0 by this push:
new b37fde6 Try to produce better identifiers and title for netCDF variables. The identifier excludes the "long_name" attribute, which is sometime more like a sentence than an identifier (the standard name or variable name provides more stable identifiers). Conversely the `metadata.citation.title` uses the "long_name" attribute as first choice, formatted in a way more like English sentence (with '_' and CamelCase replaced by space). Variable names are declared as citation identifier [...]
b37fde6 is described below
commit b37fde605db9f8f72c2a1e18f9b16afafc4d4c8d
Author: Martin Desruisseaux
AuthorDate: Mon Mar 16 17:30:11 2020 +0100
Try to produce better identifiers and title for netCDF variables.
The identifier excludes the "long_name" attribute, which is sometime more like a sentence
than an identifier (the standard name or variable name provides more stable identifiers).
Conversely the `metadata.citation.title` uses the "long_name" attribute as first choice,
formatted in a way more like English sentence (with '_' and CamelCase replaced by space).
Variable names are declared as citation identifiers and shown in the summary panel.
---
.../sis/gui/metadata/IdentificationInfo.java | 30 ++++
.../org/apache/sis/internal/gui/Resources.java | 5 +
.../apache/sis/internal/gui/Resources.properties | 1 +
.../sis/internal/gui/Resources_fr.properties | 1 +
.../java/org/apache/sis/util/CharSequences.java | 181 +++++++++++++++++++--
.../org/apache/sis/util/CharSequencesTest.java | 26 ++-
.../apache/sis/internal/netcdf/RasterResource.java | 96 ++++++++---
.../org/apache/sis/internal/netcdf/Variable.java | 25 +--
.../apache/sis/internal/netcdf/impl/GridInfo.java | 2 +-
.../sis/internal/netcdf/impl/VariableInfo.java | 3 +-
.../apache/sis/storage/netcdf/MetadataReader.java | 4 +-
.../sis/internal/storage/AbstractGridResource.java | 4 +-
.../sis/internal/storage/AbstractResource.java | 40 ++++-
.../internal/storage/ConcatenatedFeatureSet.java | 2 +-
.../sis/internal/storage/MetadataBuilder.java | 18 +-
.../storage/ConcatenatedFeatureSetTest.java | 5 +-
16 files changed, 378 insertions(+), 65 deletions(-)
diff --git a/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java b/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java
index eecfca6..bebc66f 100644
--- a/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java
+++ b/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java
@@ -17,6 +17,7 @@
package org.apache.sis.gui.metadata;
import java.util.Date;
+import java.util.StringJoiner;
import javafx.geometry.HPos;
import javafx.scene.canvas.Canvas;
import javafx.scene.canvas.GraphicsContext;
@@ -53,6 +54,18 @@ import static org.apache.sis.internal.util.CollectionsExt.nonNull;
* The pane where to show the values of {@link Identification} objects.
* The same pane can be used for an arbitrary amount of identifications.
* Each instance is identified by its title.
+ * The content is:
+ *
+ *
+ * - The title in bold font.
+ * - Identifiers.
+ * - Abstract, or purpose, or credit (in this preference order).
+ * - Topic category.
+ * - Release date, or publication date, or creation date, or any date (in this preference order).
+ * - Type of resource.
+ * - Spatiotemporal extent as a textual description.
+ * - Extent shown as a rectangle on a world map.
+ *
*
* @author Smaniotto Enzo (GSoC)
* @author Martin Desruisseaux (Geomatys)
@@ -147,6 +160,7 @@ final class IdentificationInfo extends Section {
/**
* Invoked when new identification information should be shown.
* This method updates all fields in this section with the content of given identification information.
+ * The content is summarized in {@linkplain IdentificationInfo class javadoc}.
*/
@Override
void buildContent(final Identification info) {
@@ -166,6 +180,19 @@ final class IdentificationInfo extends Section {
}
title.setText(text);
/*
+ * Identifiers as a comma-separated list on a single line. Each identifier
+ * is formatted as "codespace:code" or only "code" if there is no codespace.
+ */
+ if (citation != null) {
+ final StringJoiner buffer = new StringJoiner(", ");
+ for (final Identifier id : citation.getIdentifiers()) {
+ buffer.add(IdentifiedObjects.toString(id));
+ }
+ if (buffer.length() != 0) {
+ addLine(Resources.Keys.Identifiers, buffer.toString());
+ }
+ }
+ /*
* The abstract, or if there is no abstract the purpose, or if no purpose the credit as a fallback.
* We use those fallback because they can provide some hints about the product.
* The topic category (climatology, health, etc.) follows.
@@ -186,6 +213,9 @@ final class IdentificationInfo extends Section {
}
}
addLine(label, text);
+ /*
+ * Topic category.
+ */
addLine(Resources.Keys.TopicCategory, owner.string(nonNull(info.getTopicCategories())));
/*
* Select a single, arbitrary date. We take the release or publication date if available.
diff --git a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java
index 4060265..ff0a748 100644
--- a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java
+++ b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java
@@ -211,6 +211,11 @@ public final class Resources extends IndexedResourceBundle {
public static final short GeospatialFiles = 4;
/**
+ * Identifiers:
+ */
+ public static final short Identifiers = 54;
+
+ /**
* Loading…
*/
public static final short Loading = 7;
diff --git a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties
index 7af6c69..d9f07a1 100644
--- a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties
+++ b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties
@@ -51,6 +51,7 @@ Format = Format:
FromMetadata = From metadata
FullScreen = Full screen
GeospatialFiles = Geospatial data files
+Identifiers = Identifiers:
Loading = Loading\u2026
MainWindow = Main window
Metadata = Metadata
diff --git a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties
index 48bb9b3..4b7c231 100644
--- a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties
+++ b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties
@@ -56,6 +56,7 @@ Format = Format\u00a0:
FromMetadata = Des m\u00e9ta-donn\u00e9es
FullScreen = Plein \u00e9cran
GeospatialFiles = Fichiers de donn\u00e9es g\u00e9ospatiales
+Identifiers = Identifiants\u00a0:
Loading = Chargement\u2026
MainWindow = Fen\u00eatre principale
Metadata = Metadonn\u00e9es
diff --git a/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java b/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
index 7448785..8802403 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
@@ -72,7 +72,7 @@ import static java.lang.Character.*;
* {@code 0} or {@code false} primitive type calculated as if the input was an empty string.
*
* @author Martin Desruisseaux (Geomatys)
- * @version 1.0
+ * @version 1.1
*
* @see StringBuilders
*
@@ -546,9 +546,9 @@ search: for (; fromIndex <= toIndex; fromIndex++) {
*/
public static int skipLeadingWhitespaces(final CharSequence text, int fromIndex, final int toIndex) {
while (fromIndex < toIndex) {
- final int c = Character.codePointAt(text, fromIndex);
- if (!Character.isWhitespace(c)) break;
- fromIndex += Character.charCount(c);
+ final int c = codePointAt(text, fromIndex);
+ if (!isWhitespace(c)) break;
+ fromIndex += charCount(c);
}
return fromIndex;
}
@@ -584,9 +584,9 @@ search: for (; fromIndex <= toIndex; fromIndex++) {
*/
public static int skipTrailingWhitespaces(final CharSequence text, final int fromIndex, int toIndex) {
while (toIndex > fromIndex) {
- final int c = Character.codePointBefore(text, toIndex);
- if (!Character.isWhitespace(c)) break;
- toIndex -= Character.charCount(c);
+ final int c = codePointBefore(text, toIndex);
+ if (!isWhitespace(c)) break;
+ toIndex -= charCount(c);
}
return toIndex;
}
@@ -1187,16 +1187,16 @@ searchWordBreak: while (true) {
final StringBuilder buffer = new StringBuilder(identifier.length());
final int length = identifier.length();
for (int i=0; iThe purpose of this method is to create a global identifier from a list of component identifiers.
+ * The later are often eastward and northward components of a vector, in which case this method provides
+ * an identifier for the vector as a whole.
+ *
+ * Example:
+ * given the following inputs:
+ *
+ * - {@code "baroclinic_eastward_velocity"}
+ * - {@code "baroclinic_northward_velocity"}
+ *
+ * This method returns {@code "baroclinic_velocity"}. Note that the {@code "ward"} characters
+ * are a common suffix of both texts but nevertheless omitted because they cut a word.
+ *
+ * If one of those texts is {@code null}, then the other text is returned.
+ * If there is no common words, then this method returns an empty string.
+ *
+ * Possible future evolution
+ * Current implementation searches only for a common prefix and a common suffix, ignoring any common words
+ * that may appear in the middle of the strings. A character is considered the beginning of a word if it is
+ * {@linkplain Character#isLetterOrDigit(int) a letter or digit} which is not preceded by another letter or
+ * digit (as leading "s" and "c" in "snake_case"), or if it is an {@linkplain Character#isUpperCase(int)
+ * upper case} letter preceded by a {@linkplain Character#isLowerCase(int) lower case} letter or no letter
+ * (as both "C" in "CamelCase").
+ *
+ * @param s1 the first text, or {@code null}.
+ * @param s2 the second text, or {@code null}.
+ * @return the common suffix of both texts (may be empty), or {@code null} if both texts are null.
+ *
+ * @since 1.1
+ */
+ public static CharSequence commonWords(final CharSequence s1, final CharSequence s2) {
+ final int lg1 = length(s1);
+ final int lg2 = length(s2);
+ final int shortestLength = Math.min(lg1, lg2); // 0 if s1 or s2 is null, in which case prefix and suffix will have the other value.
+ final CharSequence prefix = commonPrefix(s1, s2); int prefixLength = length(prefix); if (prefixLength >= shortestLength) return prefix;
+ final CharSequence suffix = commonSuffix(s1, s2); int suffixLength = length(suffix); if (suffixLength >= shortestLength) return suffix;
+ final int length = prefixLength + suffixLength;
+ if (length >= lg1) return s1; // Check if one of the strings is already equal to prefix + suffix.
+ if (length >= lg2) return s2;
+ /*
+ * At this point `s1` and `s2` contain at least one character between the prefix and the suffix.
+ * If the prefix or the suffix seems to stop in the middle of a word, skip the remaining of that word.
+ * For example if `s1` and `s2` are "eastward_velocity" and "northward_velocity", the common suffix is
+ * "ward_velocity" but we want to retain only "velocity".
+ *
+ * The first condition below (before the loop) checks the character after the common prefix (for example "e"
+ * in "baroclinic_eastward_velocity" if the prefix is "baroclinic_"). The intent is to handle the case where
+ * the word separator is not the same (e.g. "baroclinic_eastward_velocity" and "baroclinic northward velocity",
+ * in which case the '_' or ' ' character would not appear in the prefix).
+ */
+ if (!isWordBoundary(s1, prefixLength, codePointAt(s1, prefixLength)) &&
+ !isWordBoundary(s2, prefixLength, codePointAt(s2, prefixLength)))
+ {
+ while (prefixLength > 0) {
+ final int c = codePointBefore(prefix, prefixLength);
+ final int n = charCount(c);
+ prefixLength -= n;
+ if (isWordBoundary(prefix, prefixLength, c)) {
+ if (!isLetterOrDigit(c)) prefixLength += n; // Keep separator character.
+ break;
+ }
+ }
+ }
+ /*
+ * Same process than for the prefix above. The condition before the loop checks the character before suffix
+ * for the same reason than above, but using only `isLetterOrDigit` ignoring camel-case. The reason is that
+ * if the character before was a word separator according camel-case convention (i.e. an upper-case letter),
+ * we would need to include it in the common suffix.
+ */
+ int suffixStart = 0;
+ if (isLetterOrDigit(codePointBefore(s1, lg1 - suffixLength)) &&
+ isLetterOrDigit(codePointBefore(s2, lg2 - suffixLength)))
+ {
+ while (suffixStart < suffixLength) {
+ final int c = codePointAt(suffix, suffixStart);
+ if (isWordBoundary(suffix, suffixStart, c)) break;
+ suffixStart += charCount(c);
+ }
+ }
+ /*
+ * At this point we got the final prefix and suffix to use. If the prefix or suffix is empty,
+ * trim whitespaces or '_' character. For example if the suffix is "_velocity" and no prefix,
+ * return "velocity" without leading "_" character.
+ */
+ if (prefixLength == 0) {
+ while (suffixStart < suffixLength) {
+ final int c = codePointAt(suffix, suffixStart);
+ if (isLetterOrDigit(c)) {
+ return suffix.subSequence(suffixStart, suffixLength); // Skip leading ignorable characters in suffix.
+ }
+ suffixStart += charCount(c);
+ }
+ return "";
+ }
+ if (suffixStart >= suffixLength) {
+ while (prefixLength > 0) {
+ final int c = codePointBefore(prefix, prefixLength);
+ if (isLetterOrDigit(c)) {
+ return prefix.subSequence(0, prefixLength); // Skip trailing ignorable characters in prefix.
+ }
+ prefixLength -= charCount(c);
+ }
+ return "";
+ }
+ /*
+ * All special cases have been examined. Return the concatenation of (possibly shortened)
+ * common prefix and suffix.
+ */
+ final StringBuilder buffer = new StringBuilder(prefixLength + suffixLength).append(prefix);
+ final int c1 = codePointBefore(prefix, prefixLength);
+ final int c2 = codePointAt(suffix, suffixStart);
+ if (isLetterOrDigit(c1) && isLetterOrDigit(c2)) {
+ if (!Character.isUpperCase(c2) || !isLowerCase(c1)) {
+ buffer.append(' '); // Keep a separator between two words (except if CamelCase is used).
+ }
+ } else if (c1 == c2) {
+ suffixStart += charCount(c2); // Avoid repeating '_' in e.g. "baroclinic__velocity".
+ }
+ return buffer.append(suffix, suffixStart, suffixLength).toString();
+ }
+
+ /**
+ * Returns {@code true} if the character {@code c} is the beginning of a word or a non-word character.
+ * For example this method returns {@code true} if {@code c} is {@code '_'} in {@code "snake_case"} or
+ * {@code "C"} in {@code "CamelCase"}.
+ *
+ * @param s the character sequence from which the {@code c} character has been obtained.
+ * @param i the index in {@code s} where the {@code c} character has been obtained.
+ * @param c the code point in {@code s} as index {@code i}.
+ * @return whether the given character is the beginning of a word or a non-word character.
+ */
+ private static boolean isWordBoundary(final CharSequence s, final int i, final int c) {
+ if (!isLetterOrDigit(c)) return true;
+ if (!Character.isUpperCase(c)) return false;
+ return (i <= 0 || isLowerCase(codePointBefore(s, i)));
+ }
+
+ /**
* Returns the token starting at the given offset in the given text. For the purpose of this
* method, a "token" is any sequence of consecutive characters of the same type, as defined
* below.
@@ -2085,8 +2230,10 @@ cmp: while (ia < lga) {
} else if (src instanceof CharBuffer) {
((CharBuffer) src).subSequence(srcOffset, srcOffset + length).get(dst, dstOffset, length);
} else {
- // An other candidate could be javax.swing.text.Segment, but it
- // is probably not worth to introduce a Swing dependency for it.
+ /*
+ * Another candidate could be `javax.swing.text.Segment`, but it
+ * is probably not worth to introduce a Swing dependency for it.
+ */
while (length != 0) {
dst[dstOffset++] = src.charAt(srcOffset++);
length--;
diff --git a/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java b/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
index 34b164d..5686cea 100644
--- a/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
+++ b/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
@@ -33,7 +33,7 @@ import static org.apache.sis.util.CharSequences.*;
*
* @author Martin Desruisseaux (Geomatys)
* @author Johann Sorel (Geomatys)
- * @version 1.0
+ * @version 1.1
* @since 0.3
* @module
*/
@@ -507,6 +507,10 @@ public final strictfp class CharSequencesTest extends TestCase {
@Test
public void testCommonPrefix() {
assertEquals("testCommon", commonPrefix(new StringBuilder("testCommonPrefix()"), "testCommonSuffix()"));
+ assertEquals("", commonPrefix( "testCommonPrefix()", "unrelated"));
+ assertEquals("", commonPrefix( "", "unrelated"));
+ assertEquals("", commonPrefix( "", ""));
+ assertEquals("equal", commonPrefix( "equal", "equal"));
}
/**
@@ -515,6 +519,26 @@ public final strictfp class CharSequencesTest extends TestCase {
@Test
public void testCommonSuffix() {
assertEquals("fix()", commonSuffix(new StringBuilder("testCommonPrefix()"), "testCommonSuffix()"));
+ assertEquals("", commonSuffix( "testCommonPrefix()", "unrelated"));
+ assertEquals("", commonSuffix( "", "unrelated"));
+ assertEquals("", commonSuffix( "", ""));
+ assertEquals("equal", commonSuffix( "equal", "equal"));
+ }
+
+ /**
+ * Tests the {@link CharSequences#commonWords(CharSequence, CharSequence)} method.
+ */
+ @Test
+ public void testCommonWords() {
+ assertSame ("baroclinic_eastward_velocity", commonWords("baroclinic_eastward_velocity", null));
+ assertSame ("baroclinic_velocity", commonWords("baroclinic_eastward_velocity", "baroclinic_velocity"));
+ assertEquals("baroclinic_velocity", commonWords("baroclinic_eastward_velocity", "baroclinic_northward_velocity"));
+ assertEquals("baroclinic velocity", commonWords("baroclinic_eastward_velocity", "baroclinic northward velocity"));
+ assertEquals("baroclinic", commonWords("baroclinic_eastward", "baroclinic_northward"));
+ assertEquals("velocity", commonWords("eastward_velocity", "northward_velocity"));
+ assertEquals("BaroclinicVelocity", commonWords("BaroclinicEastwardVelocity", "BaroclinicNorthwardVelocity"));
+ assertEquals("Baroclinic", commonWords("BaroclinicEastward", "BaroclinicNorthward"));
+ assertEquals("Velocity", commonWords("EastwardVelocity", "NorthwardVelocity"));
}
/**
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java
index e1c4adb..19f0460 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java
@@ -20,6 +20,7 @@ import java.util.Map;
import java.util.List;
import java.util.HashMap;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.Optional;
import java.io.IOException;
import java.nio.file.Path;
@@ -53,9 +54,11 @@ import org.apache.sis.measure.MeasurementRange;
import org.apache.sis.measure.NumberRange;
import org.apache.sis.math.Vector;
import org.apache.sis.util.Numbers;
+import org.apache.sis.util.CharSequences;
import org.apache.sis.util.resources.Errors;
import org.apache.sis.util.resources.Vocabulary;
import org.apache.sis.internal.jdk9.JDK9;
+import org.apache.sis.internal.storage.MetadataBuilder;
/**
@@ -100,7 +103,7 @@ public final class RasterResource extends AbstractGridResource implements Resour
* than variable name because the former is controlled vocabulary. The use of controlled vocabulary for identifiers increases
* the chances of stability or consistency between similar products.
*
- * The value set by constructor may be updated by {@link #resolveNameCollision(RasterResource, Decoder)},
+ *
The value set by constructor may be updated by {@link #resolveNameCollision(Decoder)},
* but should not be modified after that point.
*
* @see #getIdentifier()
@@ -203,7 +206,7 @@ public final class RasterResource extends AbstractGridResource implements Resour
final Variable[] variables = decoder.getVariables().clone(); // Needs a clone because may be modified.
final List siblings = new ArrayList<>(4); // Usually has only 1 element, sometime 2.
final List resources = new ArrayList<>(variables.length); // The raster resources to be returned.
- final Map firstOfName = new HashMap<>(); // For detecting name collisions.
+ final Map> byName = new HashMap<>(); // For detecting name collisions.
for (int i=0; i collisions = null;
+ for (final Iterator> it = byName.values().iterator(); it.hasNext();) {
+ final List rs = it.next();
+ if (rs.size() >= 2) {
+ it.remove(); // Remove resources before to re-insert them in next loop below.
+ if (collisions == null) {
+ collisions = rs;
+ } else {
+ collisions.addAll(rs);
+ }
+ }
+ }
+ if (collisions != null) { // After above loop because may change the byName` map.
+ for (final RasterResource r : collisions) {
+ changed |= r.resolveNameCollision(decoder);
+ r.addToNameMap(byName); // For checking if new names cause new collisions.
+ }
+ }
+ }
+ while (changed);
return resources;
}
/**
- * If the given resource is non-null, modifies the name of this resource for avoiding name collision.
- * The {@code other} resource shall be non-null when the caller detected that there is a name collision
- * with that resource.
+ * Adds the name of the given resource to the given map. This is used for resolving name collision: any entry
+ * associated to two values or more will have the resources renamed by {@link #resolveNameCollision(Decoder)}.
+ */
+ private void addToNameMap(final Map> byName) {
+ byName.computeIfAbsent(identifier, (key) -> new ArrayList<>()).add(this);
+ }
+
+ /**
+ * Invoked when the name of this resource needs to be changed because it collides with the name of another resource.
+ * This method uses the variable name, which should be unique in each netCDF file. If this resource wraps more than
+ * one variable (for example "eastward_velocity" and "northward_velocity"), then this method takes the common part
+ * of all variable names ("velocity" in above example).
*
- * @param other the other resource for which there is a name collision, or {@code null} if no collision.
+ * @return whether this resource has been renamed as an effect of this method call.
*/
- private void resolveNameCollision(final RasterResource other, final Decoder decoder) {
- if (other != null) {
- if (identifier.equals(other.identifier)) {
- other.resolveNameCollision(decoder);
- }
- resolveNameCollision(decoder);
+ private boolean resolveNameCollision(final Decoder decoder) {
+ String name = null;
+ for (final Variable v : data) {
+ name = (String) CharSequences.commonWords(name, v.getName());
}
+ if (name == null || name.isEmpty()) {
+ name = data[0].getName(); // If unable to get a common name, fallback on the first one.
+ }
+ final GenericName newValue = decoder.nameFactory.createLocalName(decoder.namespace, name);
+ if (newValue.equals(identifier)) return false;
+ identifier = newValue;
+ return true;
}
/**
- * Invoked when the name of this resource needs to be changed because it collides with the name of another resource.
- * This method appends the variable name, which should be unique in each netCDF file.
+ * Invoked the first time that {@link #getMetadata()} is invoked. Computes metadata based on information
+ * provided by {@link #getIdentifier()}, {@link #getGridGeometry()}, {@link #getSampleDimensions()} and
+ * variable names.
+ *
+ * @param metadata the builder where to set metadata properties.
+ * @throws DataStoreException if an error occurred while reading metadata from the data store.
*/
- private void resolveNameCollision(final Decoder decoder) {
- String name = identifier + " (" + data[0].getName() + ')';
- identifier = decoder.nameFactory.createLocalName(decoder.namespace, name);
+ @Override
+ protected void createMetadata(final MetadataBuilder metadata) throws DataStoreException {
+ String title = null;
+ for (final Variable v : data) {
+ title = (String) CharSequences.commonWords(title, v.getDescription());
+ metadata.addIdentifier(v.getGroupName(), v.getName(), MetadataBuilder.Scope.RESOURCE);
+ }
+ if (title != null && !title.isEmpty()) {
+ metadata.addTitle(CharSequences.camelCaseToSentence(title).toString());
+ }
+ super.createMetadata(metadata);
}
/**
- * Returns the variable name as an identifier of this resource.
+ * Returns the standard name (if non ambiguous) or the variable name as an identifier of this resource.
*/
@Override
public Optional getIdentifier() {
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
index 6ebaad6..bcee136 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
@@ -155,6 +155,8 @@ public abstract class Variable extends Node {
/**
* Returns the name of this variable. May be used as sample dimension name in a raster.
+ * The variable name should be unique in each netCDF file
+ * (by contrast, {@link #getStandardName()} is not always unique).
*
* @return the name of this variable.
*/
@@ -162,24 +164,23 @@ public abstract class Variable extends Node {
public abstract String getName();
/**
- * Returns the standard name if available, or the long name other, or the ordinary name otherwise.
- * May be used as the {@link RasterResource} label, or the label of a {@link Raster} as a whole
- * (including all bands). Standard name is preferred to variable name when controlled vocabulary
- * is desired, for example for more stable identifier or more consistency between similar data.
+ * Returns the standard name if available, or the unique variable name otherwise.
+ * May be used for {@link RasterResource#getIdentifier()} and {@link Raster#label}.
+ * Standard name is preferred to variable name when controlled vocabulary is desired,
+ * for example for more stable identifier or more consistency between similar data.
+ *
+ * This method does not check the {@code "long_name"} attribute because the long
+ * name is more like a sentence (e.g. "model wind direction at 10 m")
+ * while standard name and variable name are more like identifiers.
+ * For the long name, use {@link #getDescription()} instead.
*
* @return the standard name, or a fallback if there is no standard name.
*
* @see RasterResource#identifier
*/
public final String getStandardName() {
- String name = getAttributeAsString(CF.STANDARD_NAME);
- if (name == null) {
- name = getAttributeAsString(CDM.LONG_NAME);
- if (name == null) {
- name = getName();
- }
- }
- return name;
+ final String name = getAttributeAsString(CF.STANDARD_NAME);
+ return (name != null) ? name : getName();
}
/**
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java
index db9460a..e62ddbf 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java
@@ -262,7 +262,7 @@ next: for (final String name : axisNames) {
*/
char abbreviation = getAxisType(axis.getAxisType());
if (abbreviation == 0) {
- abbreviation = getAxisType(axis.getAttributeAsString(CF.STANDARD_NAME));
+ abbreviation = getAxisType(axis.getAttributeAsString(CF.STANDARD_NAME)); // No fallback on variable name.
/*
* If the abbreviation is still unknown, look at the "long_name", "description" or "title" attribute. Those
* attributes are not standardized, so they are less reliable than "standard_name". But they are still more
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
index 9f6a095..c0e424e 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
@@ -68,7 +68,8 @@ import org.apache.sis.math.Vector;
final class VariableInfo extends Variable implements Comparable {
/**
* The names of attributes where to look for the description to be returned by {@link #getDescription()}.
- * We use the same attributes than the one documented in the {@link ucar.nc2.Variable#getDescription()} javadoc.
+ * We use the same attributes than the one documented in the {@link ucar.nc2.Variable#getDescription()}
+ * javadoc, in same order.
*/
private static final String[] DESCRIPTION_ATTRIBUTES = {
CDM.LONG_NAME,
diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java
index dc0504f..d4cd862 100644
--- a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java
+++ b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java
@@ -938,8 +938,8 @@ split: while ((start = CharSequences.skipLeadingWhitespaces(value, start, lengt
variable.writeDataTypeName(buffer);
setBandIdentifier(f.createMemberName(null, name, f.createTypeName(null, buffer.toString())));
}
- final String id = variable.getAttributeAsString(CF.STANDARD_NAME);
- if (id != null && !id.equals(name)) {
+ final String id = Strings.trimOrNull(variable.getStandardName());
+ if (!id.equals(name)) {
addBandName(variable.getAttributeAsString(ACDD.standard_name_vocabulary), id);
}
final String description = Strings.trimOrNull(variable.getDescription());
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java
index a803548..0df65f5 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java
@@ -71,8 +71,8 @@ public abstract class AbstractGridResource extends AbstractResource implements G
/**
* Invoked the first time that {@link #getMetadata()} is invoked. The default implementation populates
- * metadata based on information provided by {@link #getIdentifier()} and {@link #getGridGeometry()}.
- * Subclasses should override if they can provide more information.
+ * metadata based on information provided by {@link #getIdentifier()}, {@link #getGridGeometry()} and
+ * {@link #getSampleDimensions()}. Subclasses should override if they can provide more information.
*
* @param metadata the builder where to set metadata properties.
* @throws DataStoreException if an error occurred while reading metadata from the data store.
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java
index aa57fe1..a1c8eba 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java
@@ -16,8 +16,10 @@
*/
package org.apache.sis.internal.storage;
+import java.util.Locale;
import java.util.Optional;
import org.opengis.util.GenericName;
+import org.opengis.util.InternationalString;
import org.opengis.metadata.Metadata;
import org.opengis.geometry.Envelope;
import org.opengis.referencing.operation.TransformException;
@@ -27,6 +29,8 @@ import org.apache.sis.storage.event.StoreEvent;
import org.apache.sis.storage.event.StoreListener;
import org.apache.sis.storage.event.StoreListeners;
import org.apache.sis.storage.event.WarningEvent;
+import org.apache.sis.util.iso.AbstractInternationalString;
+import org.apache.sis.util.CharSequences;
/**
@@ -47,7 +51,7 @@ import org.apache.sis.storage.event.WarningEvent;
* Synchronization, when needed, uses {@code this} lock.
*
* @author Martin Desruisseaux (Geomatys)
- * @version 1.0
+ * @version 1.1
* @since 0.8
* @module
*/
@@ -92,7 +96,7 @@ public class AbstractResource extends StoreListeners implements Resource {
/**
* Returns a description of this resource. This method invokes {@link #createMetadata(MetadataBuilder)}
- * the first time it is invoked, then cache the result.
+ * the first time it is invoked, then caches the result.
*
* @return information about this resource (never {@code null} in this implementation).
* @throws DataStoreException if an error occurred while reading or computing the envelope.
@@ -116,7 +120,7 @@ public class AbstractResource extends StoreListeners implements Resource {
* @throws DataStoreException if an error occurred while reading metadata from the data store.
*/
protected void createMetadata(final MetadataBuilder metadata) throws DataStoreException {
- getIdentifier().ifPresent((name) -> metadata.addTitle(name.toInternationalString()));
+ getIdentifier().ifPresent((name) -> metadata.addTitle(new Sentence(name)));
getEnvelope().ifPresent((envelope) -> {
try {
metadata.addExtent(envelope);
@@ -127,6 +131,36 @@ public class AbstractResource extends StoreListeners implements Resource {
}
/**
+ * An international string where localized identifiers are formatted more like an English sentence.
+ * This is used for wrapping {@link GenericName#toInternationalString()} representation for use as
+ * a citation title.
+ */
+ private static final class Sentence extends AbstractInternationalString {
+ /** The generic name localized representation. */
+ private final InternationalString name;
+
+ /** Returns a new wrapper for the given generic name. */
+ Sentence(final GenericName name) {
+ this.name = name.toInternationalString();
+ }
+
+ /** Returns the generic name as an English-like sentence. */
+ @Override public String toString(final Locale locale) {
+ return CharSequences.camelCaseToSentence(name.toString(locale)).toString();
+ }
+
+ /** Returns a hash code value for this sentence. */
+ @Override public int hashCode() {
+ return ~name.hashCode();
+ }
+
+ /** Compares the given object with this sentence for equality. */
+ @Override public boolean equals(final Object other) {
+ return (other instanceof Sentence) && name.equals(((Sentence) other).name);
+ }
+ }
+
+ /**
* Clears any cache in this resource, forcing the data to be recomputed when needed again.
* This method should be invoked if the data in underlying data store changed.
*/
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java
index db05e2b..ce758ad 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java
@@ -68,7 +68,7 @@ public class ConcatenatedFeatureSet extends AggregatedFeatureSet {
/**
* Creates a new concatenated feature set with the same types than the given feature set,
- * but different sources.
+ * but different sources. This is used for creating {@linkplain #subset(Query) subsets}.
*/
private ConcatenatedFeatureSet(final FeatureSet[] sources, final ConcatenatedFeatureSet original) {
super(original);
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java
index 6185215..2a33f3b 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java
@@ -130,6 +130,7 @@ import org.apache.sis.internal.util.Numerics;
import org.apache.sis.internal.util.Strings;
import org.apache.sis.util.ArgumentChecks;
import org.apache.sis.util.CharSequences;
+import org.apache.sis.util.Characters;
import org.apache.sis.util.iso.Names;
import org.apache.sis.util.iso.Types;
import org.apache.sis.measure.Units;
@@ -150,7 +151,7 @@ import org.opengis.metadata.citation.Responsibility;
* @author Martin Desruisseaux (Geomatys)
* @author Rémi Maréchal (Geomatys)
* @author Thi Phuong Hao Nguyen (VNSC)
- * @version 1.0
+ * @version 1.1
* @since 0.8
* @module
*/
@@ -913,8 +914,9 @@ public class MetadataBuilder {
/**
* Adds a resource (data) identifier, a metadata identifier, or both as they are often the same.
* The identifier is added only if {@code code} is non-null, regardless other argument values.
- * Empty strings (ignoring spaces) are ignored.
- * Storages locations are:
+ * Empty strings (ignoring spaces) are considered as null.
+ * The identifier is not added if already presents.
+ * Storage locations are:
*
*
* - Metadata: {@code metadata/metadataIdentifier}
@@ -1139,7 +1141,10 @@ public class MetadataBuilder {
}
/**
- * Adds a title or alternate title of the resource.
+ * Adds a title or alternate title of the resource, if not already present.
+ * This operation does nothing if the title is already defined and the given
+ * title is already used as an identifier (this policy is a complement of the
+ * {@link #addTitleOrIdentifier(String, Scope)} behavior).
* Storage location is:
*
*
@@ -1159,6 +1164,11 @@ public class MetadataBuilder {
if (current == null) {
citation.setTitle(i18n);
} else if (!equals(current, i18n)) {
+ for (final Identifier id : citation.getIdentifiers()) {
+ if (CharSequences.equalsFiltered(title, id.getCode(), Characters.Filter.LETTERS_AND_DIGITS, true)) {
+ return;
+ }
+ }
addIfNotPresent(citation.getAlternateTitles(), i18n);
}
}
diff --git a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java
index a6006ab..7841638 100644
--- a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java
+++ b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java
@@ -19,6 +19,7 @@ package org.apache.sis.internal.storage;
import java.util.Arrays;
import java.util.Collections;
import org.opengis.metadata.Metadata;
+import org.opengis.metadata.lineage.Lineage;
import org.opengis.metadata.content.FeatureCatalogueDescription;
import org.apache.sis.feature.builder.FeatureTypeBuilder;
import org.apache.sis.storage.DataStoreContentException;
@@ -68,8 +69,8 @@ public final strictfp class ConcatenatedFeatureSetTest extends TestCase {
final Metadata md = cfs.getMetadata();
assertNotNull("getMetadata()", md);
assertContentInfoEquals("City", 3, (FeatureCatalogueDescription) getSingleton(md.getContentInfo()));
- assertFeatureSourceEquals("City", new String[] {"City"},
- getSingleton(getSingleton(md.getResourceLineages()).getSources()));
+ final Lineage lineage = getSingleton(md.getResourceLineages());
+ assertFeatureSourceEquals("City", new String[] {"City"}, getSingleton(lineage.getSources()));
}
/**