Author: desruisseaux Date: Mon Sep 24 10:48:37 2012 New Revision: 1389286 URL: http://svn.apache.org/viewvc?rev=1389286&view=rev Log: Improve consistency in CharSequences API: - all CharSequences methods shall work on arbitrary CharSequence instances as claimed by the javadoc; - move methods working on StringBuilder to a separated StringBuilders class; - do not allow CharSequences to modify the provided argument (at the opposite of StringBuilders); - more systematic argument checks. Added: sis/trunk/sis-utility/src/main/java/org/apache/sis/util/StringBuilders.java - copied, changed from r1388393, sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java sis/trunk/sis-utility/src/test/java/org/apache/sis/util/StringBuildersTest.java - copied, changed from r1388393, sis/trunk/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java Modified: sis/trunk/sis-utility/src/main/java/org/apache/sis/resources/IndexedResourceBundle.java sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java sis/trunk/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java Modified: sis/trunk/sis-utility/src/main/java/org/apache/sis/resources/IndexedResourceBundle.java URL: http://svn.apache.org/viewvc/sis/trunk/sis-utility/src/main/java/org/apache/sis/resources/IndexedResourceBundle.java?rev=1389286&r1=1389285&r2=1389286&view=diff ============================================================================== --- sis/trunk/sis-utility/src/main/java/org/apache/sis/resources/IndexedResourceBundle.java (original) +++ sis/trunk/sis-utility/src/main/java/org/apache/sis/resources/IndexedResourceBundle.java Mon Sep 24 10:48:37 2012 @@ -367,7 +367,7 @@ public class IndexedResourceBundle exten * @return A sentence not longer than {@code maxLength}. */ private static String summarize(String text, int maxLength) { - text = CharSequences.trim(text); + text = CharSequences.trimWhitespaces(text).toString(); final int length = text.length(); if (length <= maxLength) { return text; @@ -401,8 +401,8 @@ public class IndexedResourceBundle exten break; } } - return CharSequences.trim(new StringBuilder(break1 + (length-break2) + 6) - .append(text, 0, break1+1).append(" (â¦) ").append(text, break2, length).toString()); + return CharSequences.trimWhitespaces(new StringBuilder(break1 + (length-break2) + 6) + .append(text, 0, break1+1).append(" (â¦) ").append(text, break2, length)).toString(); } /** Modified: sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java URL: http://svn.apache.org/viewvc/sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java?rev=1389286&r1=1389285&r2=1389286&view=diff ============================================================================== --- sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java (original) +++ sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java Mon Sep 24 10:48:37 2012 @@ -16,19 +16,17 @@ */ package org.apache.sis.util; -import org.apache.sis.resources.Errors; - import static java.lang.Character.*; import static java.util.Arrays.fill; import static java.util.Arrays.copyOf; import static org.apache.sis.util.Arrays.resize; +import static org.apache.sis.util.StringBuilders.replace; /** - * Utility methods working on {@link CharSequence} or {@link String} instances. Some methods - * defined in this class duplicate the functionalities already provided in the {@code String} - * class, but works on a generic {@code CharSequence} instance instead than {@code String}. - * Other methods perform their work directly on a provided {@link StringBuilder} instance. + * Utility methods working on {@link CharSequence} instances. Some methods defined in this + * class duplicate the functionalities already provided in the standard {@link String} class, + * but works on a generic {@code CharSequence} instance instead than {@code String}. * * {@section Unicode support} * Every methods defined in this class work on code points instead than characters @@ -36,18 +34,16 @@ import static org.apache.sis.util.Arrays * the Basic Multilingual Plane (BMP). * * {@section Handling of null values} - * Some methods accept a {@code null} argument, in particular the methods converting the - * given {@code String} to another {@code String} which may be the same. For example the - * {@link #camelCaseToAcronym(String)} method returns {@code null} if the string to convert is - * {@code null}. Some other methods like {@link #count(String, char)} handles {@code null} - * argument as synonymous to an empty string. The methods that do not accept a {@code null} - * argument are explicitly documented as throwing a {@link NullPointerException}. + * Most methods in this class accept a {@code null} {@code CharSequence} argument. In such cases + * the method return value is either a {@code null} {@code CharSequence}, an empty array, or a + * {@code int} primitive type calculated as if the input was an empty string. * * @author Martin Desruisseaux (Geomatys) * @since 0.3 (derived from geotk-3.00) * @version 0.3 * @module * + * @see StringBuilders * @see java.util.Arrays#toString(Object[]) */ public final class CharSequences extends Static { @@ -55,7 +51,7 @@ public final class CharSequences extends * An array of zero-length. This constant play a role equivalents to * {@link java.util.Collections#EMPTY_LIST}. */ - public static final String[] EMPTY = new String[0]; + public static final String[] EMPTY_ARRAY = new String[0]; /** * An array of strings containing only white spaces. String lengths are equal to their @@ -64,6 +60,10 @@ public final class CharSequences extends */ private static final String[] SPACES = new String[21]; static { + // Our 'spaces(int)' method will invoke 'substring' on the longuest string in an attempt + // to share the same char[] array. Note however that array sharing has been removed from + // JDK8, which copy every char[] arrays anyway. Consequently the JDK8 branch will abandon + // this strategy and build the char[] array on the fly. final int last = SPACES.length - 1; final char[] spaces = new char[last]; fill(spaces, ' '); @@ -71,13 +71,6 @@ public final class CharSequences extends } /** - * Letters in the range 00C0 (192) to 00FF (255) inclusive with their accent removed, - * when possible. - */ - private static final String ASCII = "AAAAAAÃCEEEEIIIIDNOOOOO*OUUUUYÃsaaaaaaæceeeeiiiionooooo/ouuuuyþy"; - // Original letters (with accent) = "ÃÃÃÃÃà ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃà áâãäåæçèéêëìÃîïðñòóôõö÷øùúûüýþÿ"; - - /** * Do not allow instantiation of this class. */ private CharSequences() { @@ -85,9 +78,9 @@ public final class CharSequences extends /** * Returns the code point after the given index. This method completes - * {@link String#codePointBefore(int)} but is rarely used because slightly inefficient - * (in most cases, the code point at {@code index} and its the {@code charCount(int)} - * value are already known, so the method calls performed here would be unnecessary). + * {@link Character#codePointBefore(CharSequence, int)} but is rarely used because slightly + * inefficient (in most cases, the code point at {@code index} is known together with the + * corresponding {@code charCount(int)} value, so the method calls should be unnecessary). */ private static int codePointAfter(final CharSequence text, final int index) { return codePointAt(text, index + charCount(codePointAt(text, index))); @@ -96,6 +89,12 @@ public final class CharSequences extends /** * Returns a string of the specified length filled with white spaces. * This method tries to return a pre-allocated string if possible. + *
+ * This method is typically used for performing right-alignment of text on the + * {@linkplain java.io.Console console} or other device using monospaced font. + * The {@code length} argument is then calculated by (desired width - + * used width). Since the used width may be greater than expected, + * this method accepts negative {@code length} values as if they were zero. * * @param length The string length. Negative values are clamped to 0. * @return A string of length {@code length} filled with white spaces. @@ -139,44 +138,27 @@ public final class CharSequences extends * Returns the number of occurrences of the {@code toSearch} string in the given {@code text}. * The search is case-sensitive. * - * @param text String to search in, or {@code null}. + * @param text The character sequence to count occurrences, or {@code null}. * @param toSearch The string to search in the given {@code text}. - * Must contain at least one character. - * @return The number of occurrence of {@code toSearch} in {@code text}, + * It shall contain at least one character. + * @return The number of occurrences of {@code toSearch} in {@code text}, * or 0 if {@code text} was null or empty. - * @throws IllegalArgumentException If the {@code toSearch} array is null or empty. + * @throws NullArgumentException If the {@code toSearch} argument is null. + * @throws IllegalArgumentException If the {@code toSearch} argument is empty. */ - public static int count(final String text, final String toSearch) { - final int length; - if (toSearch == null || (length = toSearch.length()) == 0) { - throw new IllegalArgumentException(Errors.format( - Errors.Keys.EmptyArgument_1, "toSearch")); - } + public static int count(final CharSequence text, final String toSearch) { + ArgumentChecks.ensureNonEmpty("toSearch", toSearch); + final int length = toSearch.length(); if (length == 1) { + // Implementation working on a single character is faster. return count(text, toSearch.charAt(0)); } int n = 0; if (text != null) { - for (int i=text.indexOf(toSearch); i>=0; i=text.indexOf(toSearch, i+length)) { - n++; - } - } - return n; - } - - /** - * Counts the number of occurrence of the given character in the given string. This - * method performs the same work than {@link #count(CharSequence, char)}, but is faster. - * - * @param text The text in which to count the number of occurrence. - * @param c The character to count, or 0 if {@code text}Â was null. - * @return The number of occurrences of the given character. - */ - public static int count(final String text, final char c) { - int n = 0; - if (text != null) { - for (int i=text.indexOf(c); ++i!=0; i=text.indexOf(c, i)) { + int i = 0; + while ((i = indexOf(text, toSearch, i)) >= 0) { n++; + i += length; } } return n; @@ -184,196 +166,215 @@ public final class CharSequences extends /** * Counts the number of occurrence of the given character in the given character sequence. - * This method performs the same work than {@link #count(String, char)}, but on a more - * generic interface. * - * @param text The text in which to count the number of occurrence. - * @param c The character to count, or 0 if {@code text}Â was null. - * @return The number of occurrences of the given character. - */ - public static int count(final CharSequence text, final char c) { - if (text instanceof String) { - return count((String) text, c); - } + * @param text The character sequence to count occurrences, or {@code null}. + * @param toSearch The character to count. + * @return The number of occurrences of the given character, or 0 if the {@code text}Â is null. + */ + public static int count(final CharSequence text, final char toSearch) { int n = 0; if (text != null) { - // No need to use the code point API here, since we are looking for exact matches. - for (int i=text.length(); --i>=0;) { - if (text.charAt(i) == c) { + if (text instanceof String) { + final String s = (String) text; + for (int i=s.indexOf(toSearch); ++i != 0; i=s.indexOf(toSearch, i)) { n++; } + } else { + // No need to use the code point API here, since we are looking for exact matches. + for (int i=text.length(); --i>=0;) { + if (text.charAt(i) == toSearch) { + n++; + } + } } } return n; } /** - * Splits a string around the given character. The array returned by this method contains each - * substring of the given string that is terminated by the given character or is terminated by - * the end of the string. The substrings in the array are in the order in which they occur in - * the given string. If the character is not found in the input, then the resulting array has - * just one element, namely the given string. + * Splits a text around the given character. The array returned by this method contains all + * subsequences of the given text that is terminated by the given character or is terminated + * by the end of the text. The subsequences in the array are in the order in which they occur + * in the given text. If the character is not found in the input, then the resulting array has + * just one element, which is the whole given text. *
* This method is similar to the standard {@link String#split(String)} method except for the * following: *
*
- * This method is the converse of {@link #getLinesFromMultilines(String)}
- * when the separator is {@link System#lineSeparator()}.
+ * This method is the converse of {@link #getLinesFromMultilines(CharSequence)}
+ * when the separator is the system line separator.
*
* @param collection The elements to format in a (typically) comma-separated list, or {@code null}.
* @param separator The element separator, which is usually {@code ", "}.
@@ -425,109 +426,36 @@ public final class CharSequences extends
}
/**
- * Replaces every occurrences of the given string in the given buffer.
- * This method invokes {@link StringBuilder#replace(int, int, String)}
- * for each occurrence of {@code search} found in the buffer.
- *
- * @param buffer The string in which to perform the replacements.
- * @param search The string to replace.
- * @param replacement The replacement for the target string.
- * @throws NullPointerException if any of the arguments is null.
- *
- * @see String#replace(char, char)
- * @see String#replace(CharSequence, CharSequence)
- * @see StringBuilder#replace(int, int, String)
- */
- public static void replace(final StringBuilder buffer, final String search, final String replacement) {
- if (!search.equals(replacement)) {
- final int length = search.length();
- int i = buffer.length();
- while ((i = buffer.lastIndexOf(search, i)) >= 0) {
- buffer.replace(i, i+length, replacement);
- i -= length;
- }
- }
- }
-
- /**
- * Replaces the characters in a substring of the buffer with characters in the specified array.
- * The substring to be replaced begins at the specified {@code start} and extends to the
- * character at index {@code end - 1}.
- *
- * @param buffer The buffer in which to perform the replacement.
- * @param start The beginning index in the {@code buffer}, inclusive.
- * @param end The ending index in the {@code buffer}, exclusive.
- * @param chars The array that will replace previous contents.
- * @throws NullPointerException if the {@code buffer} or {@code chars} argument is null.
- *
- * @see StringBuilder#replace(int, int, String)
- */
- public static void replace(final StringBuilder buffer, int start, final int end, final char[] chars) {
- int length = end - start;
- if (start < 0 || length < 0) {
- throw new StringIndexOutOfBoundsException(Errors.format(Errors.Keys.IllegalRange_2, start, end));
- }
- final int remaining = chars.length - length;
- if (remaining < 0) {
- buffer.delete(end + remaining, end);
- length = chars.length;
- }
- for (int i=0; i
* This method is similar in purpose to {@link String#trim()}, except that the later considers
- * every ASCII control codes below 32 to be a whitespace. This have the effect of removing
- * {@linkplain org.apache.sis.io.X364 X3.64} escape sequences as well. Users should invoke
- * this {@code CharSequences.trim} method instead if they need to preserve X3.64 escape sequences.
+ * every ASCII control codes below 32 to be a whitespace. This have the side effect of removing
+ * {@linkplain org.apache.sis.io.X364 X3.64} escape sequences as well. Users should invoke this
+ * {@code CharSequences.trimWhitespaces} method instead if they need to preserve X3.64 escape
+ * sequences.
*
- * @param text The string from which to remove leading and trailing white spaces, or {@code null}.
+ * @param text The text from which to remove leading and trailing white spaces, or {@code null}.
* @return A string with leading and trailing white spaces removed, or {@code null} is the given
* string was null.
*
* @see String#trim()
*/
- public static String trim(String text) {
+ public static CharSequence trimWhitespaces(CharSequence text) {
if (text != null) {
int upper = text.length();
while (upper != 0) {
- final int c = text.codePointBefore(upper);
+ final int c = codePointBefore(text, upper);
if (!isWhitespace(c)) break;
upper -= charCount(c);
}
int lower = 0;
while (lower < upper) {
- final int c = text.codePointAt(lower);
+ final int c = codePointAt(text, lower);
if (!isWhitespace(c)) break;
lower += charCount(c);
}
- text = text.substring(lower, upper);
+ text = text.subSequence(lower, upper);
}
return text;
}
@@ -537,7 +465,7 @@ public final class CharSequences extends
* the value. This method assumes that the number is formatted in the US locale, typically
* by the {@link Double#toString(double)} method.
*
- * More specifically if the given string ends with a {@code '.'} character followed by a
+ * More specifically if the given value ends with a {@code '.'} character followed by a
* sequence of {@code '0'} characters, then those characters are omitted. Otherwise this
* method returns the string unchanged. This is a "all or nothing" method:
* either the fractional part is completely removed, or either it is left unchanged.
@@ -555,15 +483,17 @@ public final class CharSequences extends
* @param value The value to trim if possible, or {@code null}.
* @return The value without the trailing {@code ".0"} part (if any),
* or {@code null} if the given string was null.
+ *
+ * @see StringBuilders#trimFractionalPart(StringBuilder)
*/
- public static String trimFractionalPart(final String value) {
+ public static CharSequence trimFractionalPart(final CharSequence value) {
if (value != null) {
for (int i=value.length(); i>0;) {
- final int c = value.codePointBefore(i);
+ final int c = codePointBefore(value, i);
i -= charCount(c);
switch (c) {
case '0': continue;
- case '.': return value.substring(0, i);
+ case '.': return value.subSequence(0, i);
default : return value;
}
}
@@ -572,70 +502,21 @@ public final class CharSequences extends
}
/**
- * Trims the fractional part of the given formatted number, provided that it doesn't change
- * the value. This method performs the same work than {@link #trimFractionalPart(String)}
- * except that it modifies the given buffer in-place.
- *
- * {@section Use case}
- * This method is useful after a {@linkplain StringBuilder#append(double) double value has
- * been appended to the buffer}, in order to make it appears like an integer when possible.
- *
- * @param buffer The buffer to trim if possible.
- * @throws NullPointerException if the argument is null.
- */
- @SuppressWarnings("fallthrough")
- public static void trimFractionalPart(final StringBuilder buffer) {
- for (int i=buffer.length(); i > 0;) {
- final int c = buffer.codePointBefore(i);
- i -= charCount(c);
- switch (c) {
- case '0': continue;
- case '.': buffer.setLength(i); // Fall through
- default : return;
- }
- }
- }
-
- /**
* Replaces some Unicode characters by ASCII characters on a "best effort basis".
* For example the {@code 'é'} character is replaced by {@code 'e'} (without accent).
*
* The current implementation replaces only the characters in the range {@code 00C0}
* to {@code 00FF}, inclusive. Other characters are left unchanged.
- *
- * Note that if the given character sequence is an instance of {@link StringBuilder},
- * then the replacement will be performed in-place.
*
* @param text The text to scan for Unicode characters to replace by ASCII characters,
* or {@code null}.
* @return The given text with substitution applied, or {@code text}Â if no replacement
* has been applied.
+ *
+ * @see StringBuilders#toASCII(StringBuilder)
*/
- public static CharSequence toASCII(CharSequence text) {
- if (text != null) {
- StringBuilder buffer = null;
- final int length = text.length();
- for (int i=0; iString.substring(int,int)
- * was cheap, because it shared the same internal char[]
array than the original
- * array. However as of JDK8, the String
implementation changed and now copies
- * the data. The pertinence of this method may need to be re-evaluated.}
+ * {@note Prior JDK8 this method was relatively cheap because all string instances created by
+ * String.substring(int,int)
shared the same char[]
internal array.
+ * However since JDK8, the new String
implementation copies the data in new arrays.
+ * Consequently it is better to use index rather than this method for splitting large
+ * String
s. However this method still useful for other CharSequence
+ * implementations providing an efficient subSequence(int,int)
method.}
*
* @param text The multi-line text from which to get the individual lines.
* @return The lines in the text, or {@code null} if the given text was null.
*/
- public static String[] getLinesFromMultilines(final String text) {
+ public static CharSequence[] getLinesFromMultilines(final CharSequence text) {
if (text == null) {
return null;
}
@@ -1375,15 +1342,15 @@ search: for (; fromIndex <= stopAt; from
* This method is implemented on top of String.indexOf(int,int), which is the
* fatest method available while taking care of the complexity of code points.
*/
- int lf = text.indexOf('\n');
- int cr = text.indexOf('\r');
+ int lf = indexOf(text, '\n', 0);
+ int cr = indexOf(text, '\r', 0);
if (lf < 0 && cr < 0) {
- return new String[] {
+ return new CharSequence[] {
text
};
}
int count = 0;
- String[] splitted = new String[8];
+ CharSequence[] splitted = new CharSequence[8];
int last = 0;
boolean hasMore;
do {
@@ -1392,34 +1359,34 @@ search: for (; fromIndex <= stopAt; from
if (cr < 0) {
// There is no "\r" character in the whole text, only "\n".
splitAt = lf;
- hasMore = (lf = text.indexOf('\n', lf+1)) >= 0;
+ hasMore = (lf = indexOf(text, '\n', lf+1)) >= 0;
} else if (lf < 0) {
// There is no "\n" character in the whole text, only "\r".
splitAt = cr;
- hasMore = (cr = text.indexOf('\r', cr+1)) >= 0;
+ hasMore = (cr = indexOf(text, '\r', cr+1)) >= 0;
} else if (lf < cr) {
// There is both "\n" and "\r" characters with "\n" first.
splitAt = lf;
hasMore = true;
- lf = text.indexOf('\n', lf+1);
+ lf = indexOf(text, '\n', lf+1);
} else {
// There is both "\r" and "\n" characters with "\r" first.
// We need special care for the "\r\n" sequence.
splitAt = cr;
if (lf == ++cr) {
- cr = text.indexOf('\r', cr+1);
- lf = text.indexOf('\n', lf+1);
+ cr = indexOf(text, '\r', cr+1);
+ lf = indexOf(text, '\n', lf+1);
hasMore = (cr >= 0 || lf >= 0);
skip = 2;
} else {
- cr = text.indexOf('\r', cr+1);
+ cr = indexOf(text, '\r', cr+1);
hasMore = true; // Because there is lf.
}
}
if (count >= splitted.length) {
splitted = copyOf(splitted, count*2);
}
- splitted[count++] = text.substring(last, splitAt);
+ splitted[count++] = text.subSequence(last, splitAt);
last = splitAt + skip;
} while (hasMore);
/*
@@ -1428,7 +1395,7 @@ search: for (; fromIndex <= stopAt; from
if (count >= splitted.length) {
splitted = copyOf(splitted, count+1);
}
- splitted[count++] = text.substring(last);
+ splitted[count++] = text.subSequence(last, text.length());
return resize(splitted, count);
}
}