sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject svn commit: r1402545 - in /sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis: io/X364.java util/CharSequences.java
Date Fri, 26 Oct 2012 15:21:31 GMT
Author: desruisseaux
Date: Fri Oct 26 15:21:31 2012
New Revision: 1402545

URL: http://svn.apache.org/viewvc?rev=1402545&view=rev
Log:
Needs to measure the string length in terms of code points rather then 'char'.

Modified:
    sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/io/X364.java
    sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java

Modified: sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/io/X364.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/io/X364.java?rev=1402545&r1=1402544&r2=1402545&view=diff
==============================================================================
--- sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/io/X364.java (original)
+++ sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/io/X364.java Fri Oct 26 15:21:31
2012
@@ -16,6 +16,7 @@
  */
 package org.apache.sis.io;
 
+import org.apache.sis.util.CharSequences;
 import org.apache.sis.util.StringBuilders;
 
 
@@ -244,9 +245,10 @@ search:     do {
     }
 
     /**
-     * Returns the length of the given string without the ANSI escape codes.
-     * This is equivalent to <code>{@linkplain #plain plain}(text).length()</code>
-     * without the cost of creating a temporary string.
+     * Returns the number of Unicode code points in the given string without the ANSI escape
codes.
+     * This is equivalent to <code>{@linkplain CharSequences#codePointCount(CharSequence)
+     * CharSequences.codePointCount}({@linkplain #plain plain}(text))</code> without
the
+     * cost of creating a temporary string.
      *
      * @param  text The string which may contains escape codes.
      * @return The length of the given string without escape codes.
@@ -254,7 +256,7 @@ search:     do {
     public static int lengthOfPlain(final String text) {
         int i = text.indexOf(START);
         if (i < 0) {
-            return text.length();
+            return text.codePointCount(0, text.length());
         }
         int last   = 0;
         int length = 0;
@@ -268,14 +270,14 @@ search: do {
             while (i < end) {
                 final char c = text.charAt(i++);
                 if (c < '0' || c > '9') {
-                    continue search;
+                    continue search; // Not an X.364 sequence.
                 }
             }
-            length += start - last;
+            length += text.codePointCount(last, start);
             last = ++i; // The ++ is for skipping the END character.
         } while ((i = text.indexOf(START, i)) >= 0);
-        length += text.length() - last;
-        assert plain(text).length() == length : text;
+        length += text.codePointCount(last, text.length());
+        assert CharSequences.codePointCount(plain(text)) == length : text;
         return length;
     }
 

Modified: sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java?rev=1402545&r1=1402544&r2=1402545&view=diff
==============================================================================
--- sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java (original)
+++ sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java Fri
Oct 26 15:21:31 2012
@@ -137,6 +137,30 @@ public final class CharSequences extends
     }
 
     /**
+     * Returns the number of Unicode code points in the given characters sequence,
+     * or 0 if {@code null}. Unpaired surrogates within the text count as one code
+     * point each.
+     *
+     * @param  text The character sequence from which to get the count, or {@code null}.
+     * @return The number of Unicode code points, or 0 if the argument is {@code null}.
+     *
+     * @see Character#codePointCount(CharSequence, int, int)
+     */
+    public static int codePointCount(final CharSequence text) {
+        if (text == null)                  return 0;
+        if (text instanceof String)        return ((String)        text).codePointCount(0,
text.length());
+        if (text instanceof StringBuilder) return ((StringBuilder) text).codePointCount(0,
text.length());
+        if (text instanceof StringBuffer)  return ((StringBuffer)  text).codePointCount(0,
text.length());
+        if (text instanceof CharBuffer) {
+            final CharBuffer buffer = (CharBuffer) text;
+            if (buffer.hasArray() && !buffer.isReadOnly()) {
+                return Character.codePointCount(buffer.array(), buffer.position(), buffer.limit());
+            }
+        }
+        return Character.codePointCount(text, 0, text.length());
+    }
+
+    /**
      * Returns the number of occurrences of the {@code toSearch} string in the given {@code
text}.
      * The search is case-sensitive.
      *



Mime
View raw message