sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject svn commit: r1652463 - in /sis/branches/JDK8/core/sis-metadata/src: main/java/org/apache/sis/io/wkt/ test/java/org/apache/sis/io/wkt/
Date Fri, 16 Jan 2015 17:23:44 GMT
Author: desruisseaux
Date: Fri Jan 16 17:23:44 2015
New Revision: 1652463

URL: http://svn.apache.org/r1652463
Log:
ISO 19162 requires that we replace non-ASCII characters by ASCII ones for all WKT elements
except REMARKS["..."].
Note that we make an exception to this rule when formatting a WKT using the SIS-specific Convention.INTERNAL
mode.

Modified:
    sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/Convention.java
    sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/ElementKind.java
    sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/Formatter.java
    sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/WKTFormat.java
    sis/branches/JDK8/core/sis-metadata/src/test/java/org/apache/sis/io/wkt/FormatterTest.java

Modified: sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/Convention.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/Convention.java?rev=1652463&r1=1652462&r2=1652463&view=diff
==============================================================================
--- sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/Convention.java
[UTF-8] (original)
+++ sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/Convention.java
[UTF-8] Fri Jan 16 17:23:44 2015
@@ -55,7 +55,7 @@ import org.apache.sis.metadata.iso.citat
 public enum Convention {
     /**
      * The ISO 19162 format, also known as “WKT 2”.
-     * This convention follows the ISO recommendations except the following ones:
+     * This convention follows the ISO recommendations with the following exceptions:
      *
      * <ul>
      *   <li>{@code Axis} element omits the {@code Order} sub-element.</li>
@@ -144,6 +144,7 @@ public enum Convention {
      * with the following differences:
      *
      * <ul>
+     *   <li>All quoted texts (not only the remarks) preserve non-ASCII characters.</li>
      *   <li>Map projections are shown as SIS stores them internally, i.e. with the
separation between
      *       linear and non-linear steps, rather than as a single operation.</li>
      *   <li>{@code CompoundCRS} shows nested compound CRS if any (the structure is
not flattened).</li>
@@ -155,8 +156,8 @@ public enum Convention {
      *       not only CRS or coordinate operations.</li>
      *   <li>Additional attributes not defined by ISO 19162 may be formatted:
      *     <ul>
-     *       <li>{@code ImageDatum} includes the {@link org.apache.sis.referencing.datum.DefaultImageDatum#getPixelInCell()
Pixel in Cell} code.</li>
-     *       <li>{@code TemporalDatum} includes the {@link org.apache.sis.referencing.datum.DefaultTemporalDatum#getOrigin()
Origin} date.</li>
+     *       <li>{@code ImageDatum} includes the {@linkplain org.apache.sis.referencing.datum.DefaultImageDatum#getPixelInCell()
Pixel in Cell} code.</li>
+     *       <li>{@code TemporalDatum} includes the {@linkplain org.apache.sis.referencing.datum.DefaultTemporalDatum#getOrigin()
Origin} date.</li>
      *     </ul>
      *   </li>
      * </ul>

Modified: sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/ElementKind.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/ElementKind.java?rev=1652463&r1=1652462&r2=1652463&view=diff
==============================================================================
--- sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/ElementKind.java
[UTF-8] (original)
+++ sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/ElementKind.java
[UTF-8] Fri Jan 16 17:23:44 2015
@@ -114,6 +114,10 @@ public enum ElementKind {
     /**
      * {@linkplain org.apache.sis.referencing.AbstractIdentifiedObject#getRemarks() Remarks},
      * often represented by {@code REMARKS[…]} elements.
+     *
+     * <p>When formatting an ISO 19162 Well Known Text, texts quoted as remarks preserve
non-ASCII characters.
+     * By contrast, quoted texts in any other {@code ElementKind} will have some non-ASCII
characters replaced
+     * by ASCII ones (e.g. "é" → "e").</p>
      */
     REMARKS,
 

Modified: sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/Formatter.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/Formatter.java?rev=1652463&r1=1652462&r2=1652463&view=diff
==============================================================================
--- sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/Formatter.java
[UTF-8] (original)
+++ sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/Formatter.java
[UTF-8] Fri Jan 16 17:23:44 2015
@@ -168,6 +168,12 @@ public class Formatter implements Locali
     private Citation authority;
 
     /**
+     * {@code true} for preserving non-ASCII characters. The default value is {@code false},
+     * which causes replacements like "é" → "e" in all elements except {@code REMARKS["…"]}.
+     */
+    boolean isNonAsciiAllowed;
+
+    /**
      * The enclosing WKT element being formatted.
      *
      * @see #getEnclosingElement(int)
@@ -372,11 +378,12 @@ public class Formatter implements Locali
     final void configure(Convention convention, final Citation authority, final Colors colors,
             final boolean toUpperCase, final byte indentation)
     {
-        this.convention  = convention;
-        this.authority   = (authority != null) ? authority : convention.getNameAuthority();
-        this.colors      = colors;
-        this.toUpperCase = toUpperCase;
-        this.indentation = indentation;
+        this.convention   = convention;
+        this.authority    = (authority != null) ? authority : convention.getNameAuthority();
+        this.colors       = colors;
+        this.toUpperCase  = toUpperCase;
+        this.indentation  = indentation;
+        isNonAsciiAllowed = (convention == Convention.INTERNAL);
     }
 
     /**
@@ -859,7 +866,8 @@ public class Formatter implements Locali
                 final Matrix matrix = ReferencingServices.getInstance().getMatrix(transform);
                 if (matrix != null) {
                     openElement(true, "Param_MT");
-                    quote("Affine");
+                    buffer.appendCodePoint(symbols.getOpeningQuote(0)).append("Affine")
+                          .appendCodePoint(symbols.getClosingQuote(0));
                     indent(+1);
                     append(matrix);
                     indent(-1);
@@ -890,7 +898,7 @@ public class Formatter implements Locali
         boolean columns = false;
         do {
             openElement(true, "Parameter");
-            quote(columns ? "num_col" : "num_row");
+            buffer.appendCodePoint(openQuote).append(columns ? "num_col" : "num_row").appendCodePoint(closeQuote);
             append(columns ? numCol : numRow);
             closeElement(false);
         } while ((columns = !columns) == true);
@@ -932,9 +940,7 @@ public class Formatter implements Locali
             final String localized = CharSequences.trimWhitespaces(text.toString(locale));
             if (localized != null && !localized.isEmpty()) {
                 openElement(true, keyword);
-                setColor(type);
-                quote(localized);
-                resetColor();
+                quote(localized, type);
                 closeElement(true);
             }
         }
@@ -950,9 +956,7 @@ public class Formatter implements Locali
     public void append(final String text, final ElementKind type) {
         if (text != null) {
             appendSeparator();
-            setColor(type);
-            quote(text);
-            resetColor();
+            quote(text, type);
         }
     }
 
@@ -961,10 +965,16 @@ public class Formatter implements Locali
      * that character will be doubled (WKT 2) or deleted (WKT 1). We check for the closing
quote only because
      * it is the character that the parser will look for determining the text end.
      */
-    private void quote(final String text) {
+    private void quote(final String text, final ElementKind type) {
+        setColor(type);
         final int base = buffer.appendCodePoint(symbols.getOpeningQuote(0)).length();
-        buffer.append(text);
+        if (isNonAsciiAllowed || (type == ElementKind.REMARKS)) {
+            buffer.append(text);
+        } else {
+            buffer.append(CharSequences.toASCII(text));
+        }
         closeQuote(base);
+        resetColor();
     }
 
     /**

Modified: sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/WKTFormat.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/WKTFormat.java?rev=1652463&r1=1652462&r2=1652463&view=diff
==============================================================================
--- sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/WKTFormat.java
[UTF-8] (original)
+++ sis/branches/JDK8/core/sis-metadata/src/main/java/org/apache/sis/io/wkt/WKTFormat.java
[UTF-8] Fri Jan 16 17:23:44 2015
@@ -78,7 +78,7 @@ import org.apache.sis.util.resources.Err
  * @author  Martin Desruisseaux (Geomatys)
  * @author  Rémi Eve (IRD)
  * @since   0.4 (derived from geotk-3.20)
- * @version 0.4
+ * @version 0.5
  * @module
  */
 public class WKTFormat extends CompoundFormat<Object> {
@@ -146,6 +146,12 @@ public class WKTFormat extends CompoundF
     private KeywordCase keywordCase;
 
     /**
+     * {@code true} for preserving non-ASCII characters. The default value is {@code false},
+     * which causes replacements like "é" → "e" in all elements except {@code REMARKS["…"]}.
+     */
+    private boolean isNonAsciiAllowed;
+
+    /**
      * The amount of spaces to use in indentation, or {@value #SINGLE_LINE} if indentation
is disabled.
      * The same value is also stored in the {@linkplain #formatter}.
      * It appears here for serialization purpose.
@@ -218,6 +224,34 @@ public class WKTFormat extends CompoundF
     }
 
     /**
+     * Returns whether non-ASCII characters are preserved. The default value is {@code false},
+     * which causes replacements like "é" → "e" in all elements except {@link ElementKind#REMARKS}.
+     *
+     * <p>This value is always {@code true} when the WKT {@linkplain #getConvention()
convention}
+     * is set to {@link Convention#INTERNAL}.</p>
+     *
+     * @return Whether non-ASCII characters are preserved.
+     *
+     * @since 0.5
+     */
+    public boolean isNonAsciiAllowed() {
+        return isNonAsciiAllowed || (convention == Convention.INTERNAL);
+    }
+
+    /**
+     * Sets whether non-ASCII characters shall be preserved. The default value is {@code
false},
+     * which causes replacements like "é" → "e" in all elements except {@link ElementKind#REMARKS}.
+     * Setting this property to {@code true} will disable such replacements.
+     *
+     * @param allowed Whether non-ASCII characters shall be preserved.
+     *
+     * @since 0.5
+     */
+    public void setNonAsciiAllowed(final boolean allowed) {
+        isNonAsciiAllowed = allowed;
+    }
+
+    /**
      * Returns whether WKT keywords should be written with upper cases or camel cases.
      *
      * @return The case to use for formatting keywords.
@@ -347,6 +381,7 @@ public class WKTFormat extends CompoundF
                 default: toUpperCase = (convention.majorVersion() == 1); break;
             }
             formatter.configure(convention, authority, colors, toUpperCase, indentation);
+            formatter.isNonAsciiAllowed |= isNonAsciiAllowed;
         }
     }
 

Modified: sis/branches/JDK8/core/sis-metadata/src/test/java/org/apache/sis/io/wkt/FormatterTest.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-metadata/src/test/java/org/apache/sis/io/wkt/FormatterTest.java?rev=1652463&r1=1652462&r2=1652463&view=diff
==============================================================================
--- sis/branches/JDK8/core/sis-metadata/src/test/java/org/apache/sis/io/wkt/FormatterTest.java
[UTF-8] (original)
+++ sis/branches/JDK8/core/sis-metadata/src/test/java/org/apache/sis/io/wkt/FormatterTest.java
[UTF-8] Fri Jan 16 17:23:44 2015
@@ -55,6 +55,22 @@ public final strictfp class FormatterTes
     }
 
     /**
+     * Tests (indirectly) {@link Formatter#quote(String)}.
+     */
+    @Test
+    public void testQuote() {
+        assertWktEquals(Convention.WKT2,
+                "“A “quote”” to double”",               // Expect doubling quotes.
+                 "A “quote” to double");
+        assertWktEquals(Convention.WKT2,
+                "“Nouvelle Triangulation Francaise”",   // Expect replacement of non-latin
characters.
+                 "Nouvelle Triangulation Française");
+        assertWktEquals(Convention.INTERNAL,
+                "“Nouvelle Triangulation Française”",   // Non-latin characters shall
be preserved in internal mode.
+                 "Nouvelle Triangulation Française");
+    }
+
+    /**
      * Tests (indirectly) {@link Formatter#append(GeographicBoundingBox, int)}.
      */
     @Test



Mime
View raw message