sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject svn commit: r1805949 - in /sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage: io/IOUtilities.java wkt/FirstKeywordPeek.java xml/AbstractProvider.java xml/MimeTypeDetector.java
Date Wed, 23 Aug 2017 19:20:04 GMT
Author: desruisseaux
Date: Wed Aug 23 19:20:04 2017
New Revision: 1805949

URL: http://svn.apache.org/viewvc?rev=1805949&view=rev
Log:
Take Unicode supplementary characters in account when reading from java.io.Reader.

Modified:
    sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/IOUtilities.java
    sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/wkt/FirstKeywordPeek.java
    sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/xml/AbstractProvider.java
    sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/xml/MimeTypeDetector.java

Modified: sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/IOUtilities.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/IOUtilities.java?rev=1805949&r1=1805948&r2=1805949&view=diff
==============================================================================
--- sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/IOUtilities.java
[UTF-8] (original)
+++ sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/IOUtilities.java
[UTF-8] Wed Aug 23 19:20:04 2017
@@ -20,6 +20,7 @@ import java.util.Locale;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.LineNumberReader;
+import java.io.Reader;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.IOException;
@@ -528,6 +529,30 @@ public final class IOUtilities extends S
     }
 
     /**
+     * Reads the next character as an Unicode code point. Unless end-of-file has been reached,
the returned value is
+     * between {@value java.lang.Character#MIN_CODE_POINT} and {@value java.lang.Character#MAX_CODE_POINT}
inclusive.
+     *
+     * @param  in  the reader from which to read code point.
+     * @return the next code point, or -1 on end of file.
+     * @throws IOException if an error occurred while reading characters.
+     *
+     * @since 0.8
+     */
+    public static int readCodePoint(final Reader in) throws IOException {
+        int c = in.read();
+        while (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_HIGH_SURROGATE)
{
+            final int low = in.read();
+            if (low >= Character.MIN_LOW_SURROGATE && low <= Character.MAX_LOW_SURROGATE)
{
+                c = Character.toCodePoint((char) c, (char) low);
+                break;
+            } else {
+                c = low;        // Discard orphan high surrogate and take the next character.
+            }
+        }
+        return c;
+    }
+
+    /**
      * Returns the error message for a file that can not be parsed.
      * The error message will contain the line number if available.
      *

Modified: sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/wkt/FirstKeywordPeek.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/wkt/FirstKeywordPeek.java?rev=1805949&r1=1805948&r2=1805949&view=diff
==============================================================================
--- sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/wkt/FirstKeywordPeek.java
[UTF-8] (original)
+++ sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/wkt/FirstKeywordPeek.java
[UTF-8] Wed Aug 23 19:20:04 2017
@@ -19,6 +19,7 @@ package org.apache.sis.internal.storage.
 import java.io.Reader;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import org.apache.sis.internal.storage.io.IOUtilities;
 import org.apache.sis.storage.DataStoreException;
 import org.apache.sis.storage.StorageConnector;
 import org.apache.sis.storage.ProbeResult;
@@ -78,7 +79,7 @@ public abstract class FirstKeywordPeek {
             return -1;
         }
         int c;
-        while ((c = reader.read()) >= 0) {
+        while ((c = IOUtilities.readCodePoint(reader)) >= 0) {
             if (!Character.isWhitespace(c)) break;
         }
         return c;
@@ -94,7 +95,7 @@ public abstract class FirstKeywordPeek {
                 if (!buffer.hasRemaining()) break;
                 c = (char) buffer.get();
             } else {
-                c = reader.read();
+                c = IOUtilities.readCodePoint(reader);
                 if (c < 0) break;
             }
         } while (!Characters.isLineOrParagraphSeparator(c));
@@ -159,7 +160,7 @@ public abstract class FirstKeywordPeek {
                         }
                         keyword[pos++] = (char) c;
                     }
-                    c = (buffer == null) ? reader.read() : buffer.hasRemaining() ? (char)
buffer.get() : -1;
+                    c = (buffer == null) ? IOUtilities.readCodePoint(reader) : buffer.hasRemaining()
? (char) buffer.get() : -1;
                 } while ((s = isKeywordChar(c)) >= ACCEPT);
                 /*
                  * At this point we finished to read and store the keyword.

Modified: sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/xml/AbstractProvider.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/xml/AbstractProvider.java?rev=1805949&r1=1805948&r2=1805949&view=diff
==============================================================================
--- sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/xml/AbstractProvider.java
[UTF-8] (original)
+++ sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/xml/AbstractProvider.java
[UTF-8] Wed Aug 23 19:20:04 2017
@@ -25,6 +25,7 @@ import org.apache.sis.storage.DataStore;
 import org.apache.sis.storage.DataStoreException;
 import org.apache.sis.storage.StorageConnector;
 import org.apache.sis.storage.ProbeResult;
+import org.apache.sis.internal.storage.io.IOUtilities;
 import org.apache.sis.internal.storage.DocumentedStoreProvider;
 
 
@@ -138,7 +139,7 @@ public abstract class AbstractProvider e
             final ProbeResult result = new MimeTypeDetector(types) {
                 private int remaining = READ_AHEAD_LIMIT;
                 @Override int read() throws IOException {
-                    return (--remaining >= 0) ? reader.read() : -1;
+                    return (--remaining >= 0) ? IOUtilities.readCodePoint(reader) : -1;
                 }
             }.probeContent();
             reader.reset();

Modified: sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/xml/MimeTypeDetector.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/xml/MimeTypeDetector.java?rev=1805949&r1=1805948&r2=1805949&view=diff
==============================================================================
--- sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/xml/MimeTypeDetector.java
[UTF-8] (original)
+++ sis/branches/JDK8/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/xml/MimeTypeDetector.java
[UTF-8] Wed Aug 23 19:20:04 2017
@@ -98,7 +98,7 @@ abstract class MimeTypeDetector {
     /**
      * Reads a single byte or character, or -1 if we reached the end of the stream portion
that we are allowed
      * to read. We are typically not allowed to read the full stream because only a limited
amount of bytes is
-     * cached.
+     * cached. This method may return a Unicode code point (i.e. the returned value may not
fit in {@code char}).
      *
      * @return the character, or -1 on EOF.
      * @throws IOException if an error occurred while reading the byte or character.



Mime
View raw message