From commits-return-526-apmail-sis-commits-archive=sis.apache.org@sis.apache.org Mon Sep 24 10:49:31 2012 Return-Path: X-Original-To: apmail-sis-commits-archive@www.apache.org Delivered-To: apmail-sis-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 13959D7F7 for ; Mon, 24 Sep 2012 10:49:31 +0000 (UTC) Received: (qmail 98545 invoked by uid 500); 24 Sep 2012 10:49:30 -0000 Delivered-To: apmail-sis-commits-archive@sis.apache.org Received: (qmail 98316 invoked by uid 500); 24 Sep 2012 10:49:25 -0000 Mailing-List: contact commits-help@sis.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: sis-dev@sis.apache.org Delivered-To: mailing list commits@sis.apache.org Received: (qmail 98262 invoked by uid 99); 24 Sep 2012 10:49:24 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 24 Sep 2012 10:49:24 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 24 Sep 2012 10:49:21 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 6F71123888E3 for ; Mon, 24 Sep 2012 10:48:38 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r1389286 [1/2] - in /sis/trunk/sis-utility/src: main/java/org/apache/sis/resources/ main/java/org/apache/sis/util/ test/java/org/apache/sis/util/ Date: Mon, 24 Sep 2012 10:48:38 -0000 To: commits@sis.apache.org From: desruisseaux@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120924104838.6F71123888E3@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: desruisseaux Date: Mon Sep 24 10:48:37 2012 New Revision: 1389286 URL: http://svn.apache.org/viewvc?rev=1389286&view=rev Log: Improve consistency in CharSequences API: - all CharSequences methods shall work on arbitrary CharSequence instances as claimed by the javadoc; - move methods working on StringBuilder to a separated StringBuilders class; - do not allow CharSequences to modify the provided argument (at the opposite of StringBuilders); - more systematic argument checks. Added: sis/trunk/sis-utility/src/main/java/org/apache/sis/util/StringBuilders.java - copied, changed from r1388393, sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java sis/trunk/sis-utility/src/test/java/org/apache/sis/util/StringBuildersTest.java - copied, changed from r1388393, sis/trunk/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java Modified: sis/trunk/sis-utility/src/main/java/org/apache/sis/resources/IndexedResourceBundle.java sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java sis/trunk/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java Modified: sis/trunk/sis-utility/src/main/java/org/apache/sis/resources/IndexedResourceBundle.java URL: http://svn.apache.org/viewvc/sis/trunk/sis-utility/src/main/java/org/apache/sis/resources/IndexedResourceBundle.java?rev=1389286&r1=1389285&r2=1389286&view=diff ============================================================================== --- sis/trunk/sis-utility/src/main/java/org/apache/sis/resources/IndexedResourceBundle.java (original) +++ sis/trunk/sis-utility/src/main/java/org/apache/sis/resources/IndexedResourceBundle.java Mon Sep 24 10:48:37 2012 @@ -367,7 +367,7 @@ public class IndexedResourceBundle exten * @return A sentence not longer than {@code maxLength}. */ private static String summarize(String text, int maxLength) { - text = CharSequences.trim(text); + text = CharSequences.trimWhitespaces(text).toString(); final int length = text.length(); if (length <= maxLength) { return text; @@ -401,8 +401,8 @@ public class IndexedResourceBundle exten break; } } - return CharSequences.trim(new StringBuilder(break1 + (length-break2) + 6) - .append(text, 0, break1+1).append(" (…) ").append(text, break2, length).toString()); + return CharSequences.trimWhitespaces(new StringBuilder(break1 + (length-break2) + 6) + .append(text, 0, break1+1).append(" (…) ").append(text, break2, length)).toString(); } /** Modified: sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java URL: http://svn.apache.org/viewvc/sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java?rev=1389286&r1=1389285&r2=1389286&view=diff ============================================================================== --- sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java (original) +++ sis/trunk/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java Mon Sep 24 10:48:37 2012 @@ -16,19 +16,17 @@ */ package org.apache.sis.util; -import org.apache.sis.resources.Errors; - import static java.lang.Character.*; import static java.util.Arrays.fill; import static java.util.Arrays.copyOf; import static org.apache.sis.util.Arrays.resize; +import static org.apache.sis.util.StringBuilders.replace; /** - * Utility methods working on {@link CharSequence} or {@link String} instances. Some methods - * defined in this class duplicate the functionalities already provided in the {@code String} - * class, but works on a generic {@code CharSequence} instance instead than {@code String}. - * Other methods perform their work directly on a provided {@link StringBuilder} instance. + * Utility methods working on {@link CharSequence} instances. Some methods defined in this + * class duplicate the functionalities already provided in the standard {@link String} class, + * but works on a generic {@code CharSequence} instance instead than {@code String}. * * {@section Unicode support} * Every methods defined in this class work on code points instead than characters @@ -36,18 +34,16 @@ import static org.apache.sis.util.Arrays * the Basic Multilingual Plane (BMP). * * {@section Handling of null values} - * Some methods accept a {@code null} argument, in particular the methods converting the - * given {@code String} to another {@code String} which may be the same. For example the - * {@link #camelCaseToAcronym(String)} method returns {@code null} if the string to convert is - * {@code null}. Some other methods like {@link #count(String, char)} handles {@code null} - * argument as synonymous to an empty string. The methods that do not accept a {@code null} - * argument are explicitly documented as throwing a {@link NullPointerException}. + * Most methods in this class accept a {@code null} {@code CharSequence} argument. In such cases + * the method return value is either a {@code null} {@code CharSequence}, an empty array, or a + * {@code int} primitive type calculated as if the input was an empty string. * * @author Martin Desruisseaux (Geomatys) * @since 0.3 (derived from geotk-3.00) * @version 0.3 * @module * + * @see StringBuilders * @see java.util.Arrays#toString(Object[]) */ public final class CharSequences extends Static { @@ -55,7 +51,7 @@ public final class CharSequences extends * An array of zero-length. This constant play a role equivalents to * {@link java.util.Collections#EMPTY_LIST}. */ - public static final String[] EMPTY = new String[0]; + public static final String[] EMPTY_ARRAY = new String[0]; /** * An array of strings containing only white spaces. String lengths are equal to their @@ -64,6 +60,10 @@ public final class CharSequences extends */ private static final String[] SPACES = new String[21]; static { + // Our 'spaces(int)' method will invoke 'substring' on the longuest string in an attempt + // to share the same char[] array. Note however that array sharing has been removed from + // JDK8, which copy every char[] arrays anyway. Consequently the JDK8 branch will abandon + // this strategy and build the char[] array on the fly. final int last = SPACES.length - 1; final char[] spaces = new char[last]; fill(spaces, ' '); @@ -71,13 +71,6 @@ public final class CharSequences extends } /** - * Letters in the range 00C0 (192) to 00FF (255) inclusive with their accent removed, - * when possible. - */ - private static final String ASCII = "AAAAAAÆCEEEEIIIIDNOOOOO*OUUUUYÞsaaaaaaæceeeeiiiionooooo/ouuuuyþy"; - // Original letters (with accent) = "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"; - - /** * Do not allow instantiation of this class. */ private CharSequences() { @@ -85,9 +78,9 @@ public final class CharSequences extends /** * Returns the code point after the given index. This method completes - * {@link String#codePointBefore(int)} but is rarely used because slightly inefficient - * (in most cases, the code point at {@code index} and its the {@code charCount(int)} - * value are already known, so the method calls performed here would be unnecessary). + * {@link Character#codePointBefore(CharSequence, int)} but is rarely used because slightly + * inefficient (in most cases, the code point at {@code index} is known together with the + * corresponding {@code charCount(int)} value, so the method calls should be unnecessary). */ private static int codePointAfter(final CharSequence text, final int index) { return codePointAt(text, index + charCount(codePointAt(text, index))); @@ -96,6 +89,12 @@ public final class CharSequences extends /** * Returns a string of the specified length filled with white spaces. * This method tries to return a pre-allocated string if possible. + *

+ * This method is typically used for performing right-alignment of text on the + * {@linkplain java.io.Console console} or other device using monospaced font. + * The {@code length} argument is then calculated by (desired width - + * used width). Since the used width may be greater than expected, + * this method accepts negative {@code length} values as if they were zero. * * @param length The string length. Negative values are clamped to 0. * @return A string of length {@code length} filled with white spaces. @@ -139,44 +138,27 @@ public final class CharSequences extends * Returns the number of occurrences of the {@code toSearch} string in the given {@code text}. * The search is case-sensitive. * - * @param text String to search in, or {@code null}. + * @param text The character sequence to count occurrences, or {@code null}. * @param toSearch The string to search in the given {@code text}. - * Must contain at least one character. - * @return The number of occurrence of {@code toSearch} in {@code text}, + * It shall contain at least one character. + * @return The number of occurrences of {@code toSearch} in {@code text}, * or 0 if {@code text} was null or empty. - * @throws IllegalArgumentException If the {@code toSearch} array is null or empty. + * @throws NullArgumentException If the {@code toSearch} argument is null. + * @throws IllegalArgumentException If the {@code toSearch} argument is empty. */ - public static int count(final String text, final String toSearch) { - final int length; - if (toSearch == null || (length = toSearch.length()) == 0) { - throw new IllegalArgumentException(Errors.format( - Errors.Keys.EmptyArgument_1, "toSearch")); - } + public static int count(final CharSequence text, final String toSearch) { + ArgumentChecks.ensureNonEmpty("toSearch", toSearch); + final int length = toSearch.length(); if (length == 1) { + // Implementation working on a single character is faster. return count(text, toSearch.charAt(0)); } int n = 0; if (text != null) { - for (int i=text.indexOf(toSearch); i>=0; i=text.indexOf(toSearch, i+length)) { - n++; - } - } - return n; - } - - /** - * Counts the number of occurrence of the given character in the given string. This - * method performs the same work than {@link #count(CharSequence, char)}, but is faster. - * - * @param text The text in which to count the number of occurrence. - * @param c The character to count, or 0 if {@code text} was null. - * @return The number of occurrences of the given character. - */ - public static int count(final String text, final char c) { - int n = 0; - if (text != null) { - for (int i=text.indexOf(c); ++i!=0; i=text.indexOf(c, i)) { + int i = 0; + while ((i = indexOf(text, toSearch, i)) >= 0) { n++; + i += length; } } return n; @@ -184,196 +166,215 @@ public final class CharSequences extends /** * Counts the number of occurrence of the given character in the given character sequence. - * This method performs the same work than {@link #count(String, char)}, but on a more - * generic interface. * - * @param text The text in which to count the number of occurrence. - * @param c The character to count, or 0 if {@code text} was null. - * @return The number of occurrences of the given character. - */ - public static int count(final CharSequence text, final char c) { - if (text instanceof String) { - return count((String) text, c); - } + * @param text The character sequence to count occurrences, or {@code null}. + * @param toSearch The character to count. + * @return The number of occurrences of the given character, or 0 if the {@code text} is null. + */ + public static int count(final CharSequence text, final char toSearch) { int n = 0; if (text != null) { - // No need to use the code point API here, since we are looking for exact matches. - for (int i=text.length(); --i>=0;) { - if (text.charAt(i) == c) { + if (text instanceof String) { + final String s = (String) text; + for (int i=s.indexOf(toSearch); ++i != 0; i=s.indexOf(toSearch, i)) { n++; } + } else { + // No need to use the code point API here, since we are looking for exact matches. + for (int i=text.length(); --i>=0;) { + if (text.charAt(i) == toSearch) { + n++; + } + } } } return n; } /** - * Splits a string around the given character. The array returned by this method contains each - * substring of the given string that is terminated by the given character or is terminated by - * the end of the string. The substrings in the array are in the order in which they occur in - * the given string. If the character is not found in the input, then the resulting array has - * just one element, namely the given string. + * Splits a text around the given character. The array returned by this method contains all + * subsequences of the given text that is terminated by the given character or is terminated + * by the end of the text. The subsequences in the array are in the order in which they occur + * in the given text. If the character is not found in the input, then the resulting array has + * just one element, which is the whole given text. *

* This method is similar to the standard {@link String#split(String)} method except for the * following: *

*

    - *
  • It accepts a {@code null} input string, in which case an empty array is returned.
  • + *
  • It accepts generic character sequences.
  • + *
  • It accepts {@code null} argument, in which case an empty array is returned.
  • *
  • The separator is a simple character instead than a regular expression.
  • - *
  • The leading and trailing spaces of each substring are {@linkplain String#trim trimmed}.
  • + *
  • The leading and trailing spaces of each subsequences are {@linkplain #trimWhitespaces trimmed}.
  • *
* - * @param toSplit The string to split, or {@code null}. + * @param toSplit The text to split, or {@code null}. * @param separator The delimiting character (typically the coma). - * @return The array of strings computed by splitting the given string around the given + * @return The array of subsequences computed by splitting the given text around the given * character, or an empty array if {@code toSplit} was null. * * @see String#split(String) */ - public static String[] split(final String toSplit, final char separator) { - final boolean excludeEmpty = (separator <= ' '); // Use the same criterion than String.trim(). - String[] strings = new String[4]; - int count = 0; - if (toSplit != null) { - int last = 0; - for (int i=toSplit.indexOf(separator); i>=0; i=toSplit.indexOf(separator, i)) { - // Note: parseDoubles(...) needs the call to trim(). - final String item = toSplit.substring(last, i).trim(); - if (!excludeEmpty || !item.isEmpty()) { - if (count == strings.length) { - strings = copyOf(strings, count << 1); - } - strings[count++] = item; - } - last = ++i; - } - final String item = toSplit.substring(last).trim(); - if (!excludeEmpty || !item.isEmpty()) { + public static CharSequence[] split(final CharSequence toSplit, final char separator) { + if (toSplit == null) { + return EMPTY_ARRAY; + } + // 'excludeEmpty' must use the same criterion than trimWhitespaces(...). + final boolean excludeEmpty = isWhitespace(separator); + CharSequence[] strings = new CharSequence[4]; + int count = 0, last = 0, i = 0; + while ((i = indexOf(toSplit, separator, i)) >= 0) { + // Note: parseDoubles(...) needs the call to trimWhitespaces(...). + final CharSequence item = trimWhitespaces(toSplit.subSequence(last, i)); + if (!excludeEmpty || item.length() != 0) { if (count == strings.length) { - strings = copyOf(strings, count + 1); + strings = copyOf(strings, count << 1); } strings[count++] = item; } + last = ++i; + } + // Add the last element. + final CharSequence item = trimWhitespaces(toSplit.subSequence(last, toSplit.length())); + if (!excludeEmpty || item.length() != 0) { + if (count == strings.length) { + strings = copyOf(strings, count + 1); + } + strings[count++] = item; } return resize(strings, count); } /** - * {@linkplain #split(String, char) Splits} the given string around the given character, + * {@linkplain #split(CharSequence, char) Splits} the given text around the given character, * then {@linkplain Double#parseDouble(String) parses} each item as a {@code double}. + * Empty sub-sequences are parsed as {@link Double#NaN}. * - * @param values The strings containing the values to parse, or {@code null}. + * @param values The text containing the values to parse, or {@code null}. * @param separator The delimiting character (typically the coma). * @return The array of numbers parsed from the given string, * or an empty array if {@code values} was null. * @throws NumberFormatException If at least one number can not be parsed. */ - public static double[] parseDoubles(final String values, final char separator) throws NumberFormatException { - final String[] tokens = split(values, separator); + public static double[] parseDoubles(final CharSequence values, final char separator) + throws NumberFormatException + { + final CharSequence[] tokens = split(values, separator); final double[] parsed = new double[tokens.length]; for (int i=0; i *
    - *
  • There is no leading {@code '['} and trailing {@code ']'} characters.
  • + *
  • There is no leading {@code '['} or trailing {@code ']'} characters.
  • *
  • Null elements are ignored instead than formatted as {@code "null"}.
  • *
  • If the {@code collection} argument is null or contains only null elements, * then this method returns {@code null}.
  • @@ -392,8 +393,8 @@ public final class CharSequences extends * that string is returned directly (no object duplication). *
*

- * This method is the converse of {@link #getLinesFromMultilines(String)} - * when the separator is {@link System#lineSeparator()}. + * This method is the converse of {@link #getLinesFromMultilines(CharSequence)} + * when the separator is the system line separator. * * @param collection The elements to format in a (typically) comma-separated list, or {@code null}. * @param separator The element separator, which is usually {@code ", "}. @@ -425,109 +426,36 @@ public final class CharSequences extends } /** - * Replaces every occurrences of the given string in the given buffer. - * This method invokes {@link StringBuilder#replace(int, int, String)} - * for each occurrence of {@code search} found in the buffer. - * - * @param buffer The string in which to perform the replacements. - * @param search The string to replace. - * @param replacement The replacement for the target string. - * @throws NullPointerException if any of the arguments is null. - * - * @see String#replace(char, char) - * @see String#replace(CharSequence, CharSequence) - * @see StringBuilder#replace(int, int, String) - */ - public static void replace(final StringBuilder buffer, final String search, final String replacement) { - if (!search.equals(replacement)) { - final int length = search.length(); - int i = buffer.length(); - while ((i = buffer.lastIndexOf(search, i)) >= 0) { - buffer.replace(i, i+length, replacement); - i -= length; - } - } - } - - /** - * Replaces the characters in a substring of the buffer with characters in the specified array. - * The substring to be replaced begins at the specified {@code start} and extends to the - * character at index {@code end - 1}. - * - * @param buffer The buffer in which to perform the replacement. - * @param start The beginning index in the {@code buffer}, inclusive. - * @param end The ending index in the {@code buffer}, exclusive. - * @param chars The array that will replace previous contents. - * @throws NullPointerException if the {@code buffer} or {@code chars} argument is null. - * - * @see StringBuilder#replace(int, int, String) - */ - public static void replace(final StringBuilder buffer, int start, final int end, final char[] chars) { - int length = end - start; - if (start < 0 || length < 0) { - throw new StringIndexOutOfBoundsException(Errors.format(Errors.Keys.IllegalRange_2, start, end)); - } - final int remaining = chars.length - length; - if (remaining < 0) { - buffer.delete(end + remaining, end); - length = chars.length; - } - for (int i=0; i 0) { - buffer.insert(start, chars, length, remaining); - } - } - - /** - * Removes every occurrences of the given string in the given buffer. This method invokes - * {@link StringBuilder#delete(int, int)} for each occurrence of {@code search} found in - * the buffer. - * - * @param buffer The string in which to perform the removals. - * @param search The string to remove. - * @throws NullPointerException if any of the arguments is null. - * - * @see StringBuilder#delete(int, int) - */ - public static void remove(final StringBuilder buffer, final String search) { - final int length = search.length(); - for (int i=buffer.lastIndexOf(search); i>=0; i=buffer.lastIndexOf(search, i)) { - buffer.delete(i, i + length); - } - } - - /** - * Returns a string with leading and trailing white spaces omitted. White spaces are identified + * Returns a text with leading and trailing white spaces omitted. White spaces are identified * by the {@link Character#isWhitespace(int)} method. *

* This method is similar in purpose to {@link String#trim()}, except that the later considers - * every ASCII control codes below 32 to be a whitespace. This have the effect of removing - * {@linkplain org.apache.sis.io.X364 X3.64} escape sequences as well. Users should invoke - * this {@code CharSequences.trim} method instead if they need to preserve X3.64 escape sequences. + * every ASCII control codes below 32 to be a whitespace. This have the side effect of removing + * {@linkplain org.apache.sis.io.X364 X3.64} escape sequences as well. Users should invoke this + * {@code CharSequences.trimWhitespaces} method instead if they need to preserve X3.64 escape + * sequences. * - * @param text The string from which to remove leading and trailing white spaces, or {@code null}. + * @param text The text from which to remove leading and trailing white spaces, or {@code null}. * @return A string with leading and trailing white spaces removed, or {@code null} is the given * string was null. * * @see String#trim() */ - public static String trim(String text) { + public static CharSequence trimWhitespaces(CharSequence text) { if (text != null) { int upper = text.length(); while (upper != 0) { - final int c = text.codePointBefore(upper); + final int c = codePointBefore(text, upper); if (!isWhitespace(c)) break; upper -= charCount(c); } int lower = 0; while (lower < upper) { - final int c = text.codePointAt(lower); + final int c = codePointAt(text, lower); if (!isWhitespace(c)) break; lower += charCount(c); } - text = text.substring(lower, upper); + text = text.subSequence(lower, upper); } return text; } @@ -537,7 +465,7 @@ public final class CharSequences extends * the value. This method assumes that the number is formatted in the US locale, typically * by the {@link Double#toString(double)} method. *

- * More specifically if the given string ends with a {@code '.'} character followed by a + * More specifically if the given value ends with a {@code '.'} character followed by a * sequence of {@code '0'} characters, then those characters are omitted. Otherwise this * method returns the string unchanged. This is a "all or nothing" method: * either the fractional part is completely removed, or either it is left unchanged. @@ -555,15 +483,17 @@ public final class CharSequences extends * @param value The value to trim if possible, or {@code null}. * @return The value without the trailing {@code ".0"} part (if any), * or {@code null} if the given string was null. + * + * @see StringBuilders#trimFractionalPart(StringBuilder) */ - public static String trimFractionalPart(final String value) { + public static CharSequence trimFractionalPart(final CharSequence value) { if (value != null) { for (int i=value.length(); i>0;) { - final int c = value.codePointBefore(i); + final int c = codePointBefore(value, i); i -= charCount(c); switch (c) { case '0': continue; - case '.': return value.substring(0, i); + case '.': return value.subSequence(0, i); default : return value; } } @@ -572,70 +502,21 @@ public final class CharSequences extends } /** - * Trims the fractional part of the given formatted number, provided that it doesn't change - * the value. This method performs the same work than {@link #trimFractionalPart(String)} - * except that it modifies the given buffer in-place. - * - * {@section Use case} - * This method is useful after a {@linkplain StringBuilder#append(double) double value has - * been appended to the buffer}, in order to make it appears like an integer when possible. - * - * @param buffer The buffer to trim if possible. - * @throws NullPointerException if the argument is null. - */ - @SuppressWarnings("fallthrough") - public static void trimFractionalPart(final StringBuilder buffer) { - for (int i=buffer.length(); i > 0;) { - final int c = buffer.codePointBefore(i); - i -= charCount(c); - switch (c) { - case '0': continue; - case '.': buffer.setLength(i); // Fall through - default : return; - } - } - } - - /** * Replaces some Unicode characters by ASCII characters on a "best effort basis". * For example the {@code 'é'} character is replaced by {@code 'e'} (without accent). *

* The current implementation replaces only the characters in the range {@code 00C0} * to {@code 00FF}, inclusive. Other characters are left unchanged. - *

- * Note that if the given character sequence is an instance of {@link StringBuilder}, - * then the replacement will be performed in-place. * * @param text The text to scan for Unicode characters to replace by ASCII characters, * or {@code null}. * @return The given text with substitution applied, or {@code text} if no replacement * has been applied. + * + * @see StringBuilders#toASCII(StringBuilder) */ - public static CharSequence toASCII(CharSequence text) { - if (text != null) { - StringBuilder buffer = null; - final int length = text.length(); - for (int i=0; i= 0 && r * The given string is usually a programmatic identifier like a class name or a method name. * * @param identifier An identifier with no space, words begin with an upper-case character. * @param toLowerCase {@code true} for changing the first character of words to lower case, * except for the first word and acronyms. - * @return The identifier with spaces inserted after what looks like words, returned - * as a {@link StringBuilder} in order to allow modifications by the caller. - * @throws NullPointerException if the {@code identifier} argument is null. + * @return The identifier with spaces inserted after what looks like words, or {@code null} + * if the given {@code identifier} argument was null. */ - public static StringBuilder camelCaseToWords(final CharSequence identifier, final boolean toLowerCase) { + public static CharSequence camelCaseToWords(final CharSequence identifier, final boolean toLowerCase) { + if (identifier == null) { + return null; + } + /* + * Implementation note: the 'camelCaseToSentence' method needs + * this method to unconditionally returns a new StringBuilder. + */ final int length = identifier.length(); final StringBuilder buffer = new StringBuilder(length + 8); final int lastIndex = (length != 0) ? length - charCount(codePointBefore(identifier, length)) : 0; @@ -738,7 +626,7 @@ public final class CharSequences extends final int c = buffer.codePointAt(pos); final int low = toLowerCase(c); if (c != low) { - replace(buffer, pos, pos + charCount(c), Character.toChars(low)); + replace(buffer, pos, pos + charCount(c), toChars(low)); } } last = i; @@ -772,13 +660,13 @@ public final class CharSequences extends * @param text The text for which to create an acronym, or {@code null}. * @return The acronym, or {@code null} if the given text was null. */ - public static String camelCaseToAcronym(String text) { - if (text != null && !isUpperCase(text = text.trim())) { + public static CharSequence camelCaseToAcronym(CharSequence text) { + if (text != null && !isUpperCase(text = trimWhitespaces(text))) { final int length = text.length(); final StringBuilder buffer = new StringBuilder(8); // Acronyms are usually short. boolean wantChar = true; for (int i=0; icode points * instead than characters. * - * @param s1 The first string to compare. - * @param s2 The second string to compare. - * @return {@code true} if the two given strings are equal, ignoring case. - * @throws NullPointerException if any of the arguments is null. + * @param s1 The first string to compare, or {@code null}. + * @param s2 The second string to compare, or {@code null}. + * @return {@code true} if the two given texts are equal, ignoring case. * * @see String#equalsIgnoreCase(String) */ public static boolean equalsIgnoreCase(final CharSequence s1, final CharSequence s2) { + if (s1 == s2) { + return true; + } + if (s1 == null || s2 == null) { + return false; + } final int lg1 = s1.length(); final int lg2 = s2.length(); int i1 = 0, i2 = 0; @@ -1032,6 +924,37 @@ cmp: while (ia < lga) { } /** + * Returns {@code true} if the two given texts are equal. This method delegates to + * {@link String#contentEquals(CharSequence)} if possible. This method never invoke + * {@link CharSequence#toString()} in order to avoid a potentially large copy of data. + * + * @param s1 The first string to compare, or {@code null}. + * @param s2 The second string to compare, or {@code null}. + * @return {@code true} if the two given texts are equal. + * + * @see String#contentEquals(CharSequence) + */ + public static boolean equals(final CharSequence s1, final CharSequence s2) { + if (s1 == s2) { + return true; + } + if (s1 != null && s2 != null) { + if (s1 instanceof String) return ((String) s1).contentEquals(s2); + if (s2 instanceof String) return ((String) s2).contentEquals(s1); + final int length = s1.length(); + if (s2.length() == length) { + for (int i=0; i + * There is no restriction on the value of {@code fromIndex}. If negative or greater + * than the length of the text, then the behavior of this method is the same than the + * one documented in {@link String#indexOf(int, int)}. + * + * @param text The character sequence in which to perform the search, or {@code null}. + * @param toSearch The Unicode code point of the character to search. + * @param fromIndex The index to start the search from. + * @return The index of the first occurrence of the given character in the text, or -1 + * if no occurrence has been found or if the {@code text} argument is null. + * + * @see String#indexOf(int, int) + */ + public static int indexOf(final CharSequence text, final int toSearch, int fromIndex) { + if (text != null) { + if (text instanceof String) { + // String provides a faster implementation. + return ((String) text).indexOf(toSearch, fromIndex); + } + if (fromIndex < 0) { + fromIndex = 0; + } + final int length = text.length(); + while (fromIndex < length) { + final int c = codePointAt(text, fromIndex); + if (c == toSearch) { + return fromIndex; + } + fromIndex += charCount(c); + } + } + return -1; + } + + /** * Returns the index within the given strings of the first occurrence of the specified part, * starting at the specified index. This method is equivalent to the following code: * @@ -1090,27 +1051,32 @@ cmp: while (ia < lga) { * @see StringBuffer#indexOf(String, int) */ public static int indexOf(final CharSequence string, final CharSequence part, int fromIndex) { - if (part instanceof String) { - if (string instanceof String) { - return ((String) string).indexOf((String) part, fromIndex); - } - if (string instanceof StringBuilder) { - return ((StringBuilder) string).indexOf((String) part, fromIndex); + if (string != null) { + if (part instanceof String) { + if (string instanceof String) { + return ((String) string).indexOf((String) part, fromIndex); + } + if (string instanceof StringBuilder) { + return ((StringBuilder) string).indexOf((String) part, fromIndex); + } + if (string instanceof StringBuffer) { + return ((StringBuffer) string).indexOf((String) part, fromIndex); + } } - if (string instanceof StringBuffer) { - return ((StringBuffer) string).indexOf((String) part, fromIndex); + if (fromIndex < 0) { + fromIndex = 0; } - } - final int length = part.length(); - final int stopAt = string.length() - length; -search: for (; fromIndex <= stopAt; fromIndex++) { - for (int i=0; i * The converse of this method is {@link #formatList(Iterable, String)}. * - * {@note This method has been designed in a time when String.substring(int,int) - * was cheap, because it shared the same internal char[] array than the original - * array. However as of JDK8, the String implementation changed and now copies - * the data. The pertinence of this method may need to be re-evaluated.} + * {@note Prior JDK8 this method was relatively cheap because all string instances created by + * String.substring(int,int) shared the same char[] internal array. + * However since JDK8, the new String implementation copies the data in new arrays. + * Consequently it is better to use index rather than this method for splitting large + * Strings. However this method still useful for other CharSequence + * implementations providing an efficient subSequence(int,int) method.} * * @param text The multi-line text from which to get the individual lines. * @return The lines in the text, or {@code null} if the given text was null. */ - public static String[] getLinesFromMultilines(final String text) { + public static CharSequence[] getLinesFromMultilines(final CharSequence text) { if (text == null) { return null; } @@ -1375,15 +1342,15 @@ search: for (; fromIndex <= stopAt; from * This method is implemented on top of String.indexOf(int,int), which is the * fatest method available while taking care of the complexity of code points. */ - int lf = text.indexOf('\n'); - int cr = text.indexOf('\r'); + int lf = indexOf(text, '\n', 0); + int cr = indexOf(text, '\r', 0); if (lf < 0 && cr < 0) { - return new String[] { + return new CharSequence[] { text }; } int count = 0; - String[] splitted = new String[8]; + CharSequence[] splitted = new CharSequence[8]; int last = 0; boolean hasMore; do { @@ -1392,34 +1359,34 @@ search: for (; fromIndex <= stopAt; from if (cr < 0) { // There is no "\r" character in the whole text, only "\n". splitAt = lf; - hasMore = (lf = text.indexOf('\n', lf+1)) >= 0; + hasMore = (lf = indexOf(text, '\n', lf+1)) >= 0; } else if (lf < 0) { // There is no "\n" character in the whole text, only "\r". splitAt = cr; - hasMore = (cr = text.indexOf('\r', cr+1)) >= 0; + hasMore = (cr = indexOf(text, '\r', cr+1)) >= 0; } else if (lf < cr) { // There is both "\n" and "\r" characters with "\n" first. splitAt = lf; hasMore = true; - lf = text.indexOf('\n', lf+1); + lf = indexOf(text, '\n', lf+1); } else { // There is both "\r" and "\n" characters with "\r" first. // We need special care for the "\r\n" sequence. splitAt = cr; if (lf == ++cr) { - cr = text.indexOf('\r', cr+1); - lf = text.indexOf('\n', lf+1); + cr = indexOf(text, '\r', cr+1); + lf = indexOf(text, '\n', lf+1); hasMore = (cr >= 0 || lf >= 0); skip = 2; } else { - cr = text.indexOf('\r', cr+1); + cr = indexOf(text, '\r', cr+1); hasMore = true; // Because there is lf. } } if (count >= splitted.length) { splitted = copyOf(splitted, count*2); } - splitted[count++] = text.substring(last, splitAt); + splitted[count++] = text.subSequence(last, splitAt); last = splitAt + skip; } while (hasMore); /* @@ -1428,7 +1395,7 @@ search: for (; fromIndex <= stopAt; from if (count >= splitted.length) { splitted = copyOf(splitted, count+1); } - splitted[count++] = text.substring(last); + splitted[count++] = text.subSequence(last, text.length()); return resize(splitted, count); } }