sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject svn commit: r1418210 - in /sis/branches/JDK7/sis-utility/src: main/java/org/apache/sis/internal/util/ main/java/org/apache/sis/util/ main/java/org/apache/sis/util/type/ test/java/org/apache/sis/util/
Date Fri, 07 Dec 2012 08:12:01 GMT
Author: desruisseaux
Date: Fri Dec  7 08:11:59 2012
New Revision: 1418210

URL: http://svn.apache.org/viewvc?rev=1418210&view=rev
Log:
Refactored the CharSequences.equalsLettersAndDigits(...) as a equalsFiltered(..., Characters.Filter,
boolean) method,
and modified the Citations static methods to use it.

Modified:
    sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/internal/util/Citations.java
    sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
    sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/Characters.java
    sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/type/CodeListFilter.java
    sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
    sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharactersTest.java

Modified: sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/internal/util/Citations.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/internal/util/Citations.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
--- sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/internal/util/Citations.java
(original)
+++ sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/internal/util/Citations.java
Fri Dec  7 08:11:59 2012
@@ -23,7 +23,9 @@ import org.opengis.metadata.citation.Cit
 import org.opengis.util.InternationalString;
 import org.apache.sis.util.Static;
 
+import static org.apache.sis.util.CharSequences.equalsFiltered;
 import static org.apache.sis.util.CharSequences.trimWhitespaces;
+import static org.apache.sis.util.Characters.Filter.LETTERS_AND_DIGITS;
 
 // Related to JDK7
 import java.util.Objects;
@@ -59,15 +61,16 @@ public final class Citations extends Sta
      * @param  collection The collection from which to get the iterator, or {@code null}.
      * @return The iterator over the given collection elements, or {@code null}.
      */
-    public static <E> Iterator<E> iterator(final Collection<E> collection)
{
+    private static <E> Iterator<E> iterator(final Collection<E> collection)
{
         return (collection != null && !collection.isEmpty()) ? collection.iterator()
: null;
     }
 
     /**
      * Returns {@code true} if at least one {@linkplain Citation#getTitle() title} or
-     * {@linkplain Citation#getAlternateTitles alternate title} in {@code c1} is equal
-     * to a title or alternate title in {@code c2}. The comparison is case-insensitive
-     * and ignores leading and trailing spaces. The titles ordering is not significant.
+     * {@linkplain Citation#getAlternateTitles() alternate title} in {@code c1} is leniently
+     * equal to a title or alternate title in {@code c2}. The comparison is case-insensitive
+     * and ignores every character which is not a {@linkplain Character#isLetterOrDigit(int)
+     * letter or a digit}. The titles ordering is not significant.
      *
      * @param  c1 The first citation to compare, or {@code null}.
      * @param  c2 the second citation to compare, or {@code null}.
@@ -108,30 +111,29 @@ public final class Citations extends Sta
 
     /**
      * Returns {@code true} if the {@linkplain Citation#getTitle() title} or any
-     * {@linkplain Citation#getAlternateTitles alternate title} in the given citation
-     * matches the given string. The comparison is case-insensitive and ignores leading
-     * and trailing spaces.
+     * {@linkplain Citation#getAlternateTitles() alternate title} in the given citation
+     * matches the given string. The comparison is case-insensitive and ignores every character
+     * which is not a {@linkplain Character#isLetterOrDigit(int) letter or a digit}.
      *
      * @param  citation The citation to check for, or {@code null}.
      * @param  title The title or alternate title to compare, or {@code null}.
      * @return {@code true} if both arguments are non-null, and the title or alternate
      *         title matches the given string.
      */
-    public static boolean titleMatches(final Citation citation, String title) {
+    public static boolean titleMatches(final Citation citation, final CharSequence title)
{
         if (citation != null && title != null) {
-            title = trimWhitespaces(title);
             InternationalString candidate = citation.getTitle();
             Iterator<? extends InternationalString> iterator = null;
             do {
                 if (candidate != null) {
                     // The "null" locale argument is required for getting the unlocalized
version.
-                    final String unlocalized = trimWhitespaces(candidate.toString(null));
-                    if (unlocalized != null && unlocalized.equalsIgnoreCase(title))
{
+                    final String unlocalized = candidate.toString(null);
+                    if (equalsFiltered(unlocalized, title, LETTERS_AND_DIGITS, true)) {
                         return true;
                     }
-                    final String localized = trimWhitespaces(candidate).toString();
-                    if (localized != unlocalized // Slight optimization for a common case.
-                            && (localized != null) && localized.equalsIgnoreCase(title))
+                    final String localized = candidate.toString();
+                    if (!Objects.equals(localized, unlocalized) // Slight optimization for
a common case.
+                            && equalsFiltered(localized, title, LETTERS_AND_DIGITS,
true))
                     {
                         return true;
                     }
@@ -150,7 +152,8 @@ public final class Citations extends Sta
     /**
      * Returns {@code true} if at least one {@linkplain Citation#getIdentifiers() identifier}
in
      * {@code c1} is equal to an identifier in {@code c2}. The comparison is case-insensitive
-     * and ignores leading and trailing spaces. The identifier ordering is not significant.
+     * and ignores every character which is not a {@linkplain Character#isLetterOrDigit(int)
+     * letter or a digit}. The identifier ordering is not significant.
      *
      * <p>If (and <em>only</em> if) the citations do not contains any identifier,
then this method
      * fallback on titles comparison using the {@link #titleMatches(Citation,Citation) titleMatches}
@@ -193,8 +196,8 @@ public final class Citations extends Sta
 
     /**
      * Returns {@code true} if any {@linkplain Citation#getIdentifiers() identifiers} in
the given
-     * citation matches the given string. The comparison is case-insensitive and ignores
leading
-     * and trailing spaces.
+     * citation matches the given string. The comparison is case-insensitive and ignores
every
+     * character which is not a {@linkplain Character#isLetterOrDigit(int) letter or a digit}.
      *
      * <p>If (and <em>only</em> if) the citation does not contain any identifier,
then this method
      * fallback on titles comparison using the {@link #titleMatches(Citation,String) titleMatches}
@@ -206,20 +209,16 @@ public final class Citations extends Sta
      * @return {@code true} if both arguments are non-null, and the title or alternate title
      *         matches the given string.
      */
-    public static boolean identifierMatches(final Citation citation, String identifier) {
+    public static boolean identifierMatches(final Citation citation, final CharSequence identifier)
{
         if (citation != null && identifier != null) {
-            identifier = trimWhitespaces(identifier);
             final Iterator<? extends Identifier> identifiers = iterator(citation.getIdentifiers());
             if (identifiers == null) {
                 return titleMatches(citation, identifier);
             }
             while (identifiers.hasNext()) {
                 final Identifier id = identifiers.next();
-                if (id != null) {
-                    final String code = id.getCode();
-                    if (code != null && identifier.equalsIgnoreCase(trimWhitespaces(code)))
{
-                        return true;
-                    }
+                if (id != null && equalsFiltered(identifier, id.getCode(), LETTERS_AND_DIGITS,
true)) {
+                    return true;
                 }
             }
         }

Modified: sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
--- sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java (original)
+++ sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java Fri
Dec  7 08:11:59 2012
@@ -1126,7 +1126,7 @@ searchWordBreak:    while (true) {
     }
 
     /**
-     * Given a string in camel cases (typically a Java identifier), returns a string formatted
+     * Given a string in camel cases (typically an identifier), returns a string formatted
      * like an English sentence. This heuristic method performs the following steps:
      *
      * <ol>
@@ -1257,8 +1257,8 @@ searchWordBreak:    while (true) {
      * case, then the text is returned unchanged on the assumption that it is already an
acronym.
      * Otherwise this method returns a string containing the first character of each word,
where
      * the words are separated by the camel case convention, the {@code '_'} character, or
any
-     * character which is not a {@linkplain Character#isJavaIdentifierPart(int) java identifier
-     * part} (including spaces).
+     * character which is not a {@linkplain Character#isUnicodeIdentifierPart(int) Unicode
+     * identifier part} (including spaces).
      *
      * <p><b>Examples:</b> given {@code "northEast"}, this method returns
{@code "NE"}.
      * Given {@code "Open Geospatial Consortium"}, this method returns {@code "OGC"}.</p>
@@ -1275,11 +1275,11 @@ searchWordBreak:    while (true) {
             for (int i=0; i<length;) {
                 final int c = codePointAt(text, i);
                 if (wantChar) {
-                    if (isJavaIdentifierStart(c)) {
+                    if (isUnicodeIdentifierStart(c)) {
                         buffer.appendCodePoint(c);
                         wantChar = false;
                     }
-                } else if (!isJavaIdentifierPart(c) || c == '_') {
+                } else if (!isUnicodeIdentifierPart(c) || c == '_') {
                     wantChar = true;
                 } else if (Character.isUpperCase(c)) {
                     // Test for mixed-case (e.g. "northEast").
@@ -1408,7 +1408,7 @@ cmp:    while (ia < lga) {
      * <p>This method is used for identifying character strings that are likely to
be code
      * like {@code "UTF-8"} or {@code "ISO-LATIN-1"}.</p>
      *
-     * @see #isJavaIdentifier(CharSequence)
+     * @see #isUnicodeIdentifier(CharSequence)
      */
     private static boolean isCode(final CharSequence identifier) {
         for (int i=identifier.length(); --i>=0;) {
@@ -1423,28 +1423,28 @@ cmp:    while (ia < lga) {
     }
 
     /**
-     * Returns {@code true} if the given identifier is a legal Java identifier.
+     * Returns {@code true} if the given identifier is a legal Unicode identifier.
      * This method returns {@code true} if the identifier length is greater than zero,
-     * the first character is a {@linkplain Character#isJavaIdentifierStart(int) Java
-     * identifier start} and all remaining characters (if any) are
-     * {@linkplain Character#isJavaIdentifierPart(int) Java identifier parts}.
+     * the first character is a {@linkplain Character#isUnicodeIdentifierStart(int)
+     * Unicode identifier start} and all remaining characters (if any) are
+     * {@linkplain Character#isUnicodeIdentifierPart(int) Unicode identifier parts}.
      *
      * @param identifier The character sequence to test.
-     * @return {@code true} if the given character sequence is a legal Java identifier.
+     * @return {@code true} if the given character sequence is a legal Unicode identifier.
      * @throws NullPointerException if the argument is null.
      */
-    public static boolean isJavaIdentifier(final CharSequence identifier) {
+    public static boolean isUnicodeIdentifier(final CharSequence identifier) {
         final int length = identifier.length();
         if (length == 0) {
             return false;
         }
         int c = codePointAt(identifier, 0);
-        if (!isJavaIdentifierStart(c)) {
+        if (!isUnicodeIdentifierStart(c)) {
             return false;
         }
         for (int i=0; (i += charCount(c)) < length;) {
             c = codePointAt(identifier, i);
-            if (!isJavaIdentifierPart(c)) {
+            if (!isUnicodeIdentifierPart(c)) {
                 return false;
             }
         }
@@ -1476,6 +1476,71 @@ cmp:    while (ia < lga) {
     }
 
     /**
+     * Returns {@code true} if the given texts are equal, optionally ignoring case and filtered-out
+     * characters. This method is sometime used for comparing identifiers in a lenient way.
+     *
+     * <p><b>Example:</b> the following call compares the two strings ignoring
case and any
+     * characters which are not {@linkplain Character#isLetterOrDigit(int) letter or digit}.
+     * In particular, spaces and punctuation characters like {@code '_'} and {@code '-'}
are
+     * ignored:</p>
+     *
+     * {@preformat java
+     *     assert equals("WGS84", "WGS_84", Characters.Filter.LETTERS_AND_DIGITS, true) ==
true;
+     * }
+     *
+     * @param  s1 The first string to compare, or {@code null}.
+     * @param  s2 The second string to compare, or {@code null}.
+     * @param  filter The subset of characters to compare, or {@code null} for comparing
all characters.
+     * @param  ignoreCase {@code true} for comparing cases, or {@code false} for requiring
exact match.
+     * @return {@code true} if or if both arguments are {@code null} or if the two given
texts are equal,
+     *         optionally ignoring case and filtered-out characters.
+     */
+    public static boolean equalsFiltered(final CharSequence s1, final CharSequence s2,
+            final Characters.Filter filter, final boolean ignoreCase)
+    {
+        if (s1 == s2) {
+            return true;
+        }
+        if (s1 == null || s2 == null) {
+            return false;
+        }
+        if (filter == null) {
+            return ignoreCase ? equalsIgnoreCase(s1, s2) : equals(s1, s2);
+        }
+        final int lg1 = s1.length();
+        final int lg2 = s2.length();
+        int i2 = 0, n;
+        for (int i1=0; i1<lg1; i1+=n) {
+            int c1 = codePointAt(s1, i1);
+            n = charCount(c1);
+            if (filter.contains(c1)) {
+                // Fetch the next significant character from the second string.
+                int c2;
+                do {
+                    if (i2 >= lg2) {
+                        return false; // The first string has more significant characters
than expected.
+                    }
+                    c2 = codePointAt(s2, i2);
+                    i2 += charCount(c2);
+                } while (!filter.contains(c2));
+
+                // Compare the characters in the same way than String.equalsIgnoreCase(String).
+                if (c1 != c2 && !(ignoreCase && equalsIgnoreCase(c1, c2)))
{
+                    return false;
+                }
+            }
+        }
+        while (i2 < lg2) {
+            final int s = codePointAt(s2, i2);
+            if (filter.contains(s)) {
+                return false; // The first string has less significant characters than expected.
+            }
+            i2 += charCount(s);
+        }
+        return true;
+    }
+
+    /**
      * Returns {@code true} if the given code points are equal, ignoring case.
      * This method implements the same comparison algorithm than String#equalsIgnoreCase(String).
      *
@@ -1530,57 +1595,6 @@ cmp:    while (ia < lga) {
     }
 
     /**
-     * Returns {@code true} if the given texts are equal, ignoring case and any character
which
-     * is not a {@linkplain Character#isLetterOrDigit(int) letter or digit}. In particular,
-     * spaces and punctuation characters like {@code '_'} and {@code '-'} are ignored.
-     * This method is sometime used for comparing identifiers in a lenient way.
-     *
-     * @param  s1 The first string to compare, or {@code null}.
-     * @param  s2 The second string to compare, or {@code null}.
-     * @return {@code true} if the two given texts are equal, comparing only letters and
digits
-     *         in a case-insensitive way, or if both arguments are {@code null}.
-     */
-    public static boolean equalsLettersAndDigits(final CharSequence s1, final CharSequence
s2) {
-        if (s1 == s2) {
-            return true;
-        }
-        if (s1 == null || s2 == null) {
-            return false;
-        }
-        final int lg1 = s1.length();
-        final int lg2 = s2.length();
-        int i2 = 0, n;
-        for (int i1=0; i1<lg1; i1+=n) {
-            int c1 = codePointAt(s1, i1);
-            n = charCount(c1);
-            if (isLetterOrDigit(c1)) {
-                // Fetch the next significant character from the second string.
-                int c2;
-                do {
-                    if (i2 >= lg2) {
-                        return false; // The first string has more significant characters
than expected.
-                    }
-                    c2 = codePointAt(s2, i2);
-                    i2 += charCount(c2);
-                } while (!isLetterOrDigit(c2));
-
-                // Compare the characters in the same way than String.equalsIgnoreCase(String).
-                if (c1 != c2 && !equalsIgnoreCase(c1, c2)) {
-                    return false;
-                }
-            }
-        }
-        while (i2 < lg2) {
-            final int s = codePointAt(s2, i2);
-            if (isLetterOrDigit(s)) {
-                return false; // The first string has less significant characters than expected.
-            }
-            i2 += charCount(s);
-        }
-        return true;
-    }
-
-    /**
      * Returns {@code true} if the two given texts are equal. This method delegates to
      * {@link String#contentEquals(CharSequence)} if possible. This method never invoke
      * {@link CharSequence#toString()} in order to avoid a potentially large copy of data.
@@ -1784,9 +1798,9 @@ cmp:    while (ia < lga) {
      *
      * <ul>
      *   <li>If <var>c</var> is a
-     *       {@linkplain Character#isJavaIdentifierStart(int) Java identifier start},
+     *       {@linkplain Character#isUnicodeIdentifierStart(int) Unicode identifier start},
      *       then any following characters that are
-     *       {@linkplain Character#isJavaIdentifierPart(int) Java identifier part}.</li>
+     *       {@linkplain Character#isUnicodeIdentifierPart(int) Unicode identifier part}.</li>
      *   <li>Otherwise any character for which {@link Character#getType(int)} returns
      *       the same value than for <var>c</var>.</li>
      * </ul>
@@ -1815,8 +1829,8 @@ cmp:    while (ia < lga) {
         /*
          * Advance over all characters "of the same type".
          */
-        if (isJavaIdentifierStart(c)) {
-            while (upper<length && isJavaIdentifierPart(c = codePointAt(text,
upper))) {
+        if (isUnicodeIdentifierStart(c)) {
+            while (upper<length && isUnicodeIdentifierPart(c = codePointAt(text,
upper))) {
                 upper += charCount(c);
             }
         } else {

Modified: sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/Characters.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/Characters.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
--- sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/Characters.java (original)
+++ sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/Characters.java Fri Dec
 7 08:11:59 2012
@@ -16,6 +16,8 @@
  */
 package org.apache.sis.util;
 
+import org.apache.sis.util.resources.Errors;
+
 
 /**
  * Static methods working on {@code char} values, and some character constants.
@@ -225,4 +227,195 @@ public final class Characters extends St
         }
         return c;
     }
+
+
+
+
+    /**
+     * Subsets of Unicode characters identified by their general category.
+     * The categories are identified by constants defined in the {@link Character} class,
like
+     * {@link Character#LOWERCASE_LETTER     LOWERCASE_LETTER},
+     * {@link Character#UPPERCASE_LETTER     UPPERCASE_LETTER},
+     * {@link Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER} and
+     * {@link Character#SPACE_SEPARATOR      SPACE_SEPARATOR}.
+     *
+     * <p>An instance of this class can be obtained from an enumeration of character
types
+     * using the {@link #forTypes(byte[])} method, or using one of the constants predefined
+     * in this class. Then, Unicode characters can be tested for inclusion in the subset
by
+     * calling the {@link #contains(int)} method.</p>
+     *
+     * @author  Martin Desruisseaux (Geomatys)
+     * @since   0.3
+     * @version 0.3
+     * @module
+     *
+     * @see java.lang.Character.Subset
+     * @see Character#getType(int)
+     */
+    public static class Filter extends Character.Subset {
+        /*
+         * This class can not easily be Serializable, because the parent class is not Serializable
+         * and does not define a no-argument constructor.  We could workaround with a writeReplace
+         * method - waiting to see if there is a real need for that.
+         */
+
+        /**
+         * The subset of all characters for which {@link Character#isLetterOrDigit(int)}
+         * returns {@code true}. This subset includes the following general categories:
+         * {@link Character#LOWERCASE_LETTER},
+         * {@link Character#UPPERCASE_LETTER     UPPERCASE_LETTER},
+         * {@link Character#TITLECASE_LETTER     TITLECASE_LETTER},
+         * {@link Character#MODIFIER_LETTER      MODIFIER_LETTER},
+         * {@link Character#OTHER_LETTER         OTHER_LETTER} and
+         * {@link Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER}.
+         */
+        public static final Filter LETTERS_AND_DIGITS = new LettersAndDigits();
+
+        /**
+         * The subset of all characters for which {@link Character#isUnicodeIdentifierPart(int)}
+         * returns {@code true}, excluding {@linkplain Character#isIdentifierIgnorable(int)
+         * ignorable} characters. This subset includes all the {@link #LETTERS_AND_DIGITS}
+         * categories with the addition of the following ones:
+         * {@link Character#LETTER_NUMBER},
+         * {@link Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION},
+         * {@link Character#NON_SPACING_MARK NON_SPACING_MARK} and
+         * {@link Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK}.
+         */
+        public static final Filter UNICODE_IDENTIFIER = new UnicodeIdentifier();
+
+        /**
+         * A bitmask of character types in this subset.
+         */
+        private final long types;
+
+        /**
+         * Creates a new subset of the given name.
+         *
+         * @param name  The subset name.
+         * @param types A bitmask of character types.
+         */
+        Filter(final String name, final long types) {
+            super(name);
+            this.types = types;
+        }
+
+        /**
+         * Returns {@code true} if this subset contains the given Unicode character.
+         *
+         * @param  codePoint The Unicode character, as a code point value.
+         * @return {@code true} if this subset contains the given character.
+         */
+        public boolean contains(final int codePoint) {
+            return containsType(Character.getType(codePoint));
+        }
+
+        /**
+         * Returns {@code true} if this subset contains the characters of the given type.
+         * The given type shall be one of the {@link Character} constants like
+         * {@link Character#LOWERCASE_LETTER     LOWERCASE_LETTER},
+         * {@link Character#UPPERCASE_LETTER     UPPERCASE_LETTER},
+         * {@link Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER} or
+         * {@link Character#SPACE_SEPARATOR      SPACE_SEPARATOR}.
+         *
+         * @param  type One of the {@link Character} constants.
+         * @return {@code true} if this subset contains the characters of the given type.
+         *
+         * @see Character#getType(int)
+         */
+        public final boolean containsType(final int type) {
+            return (type >= 0) && (type < Long.SIZE) && (types &
(1L << type)) != 0;
+        }
+
+        /**
+         * Returns a subset representing the union of all Unicode characters of the given
types.
+         *
+         * @param  types The character types, as {@link Character} constants.
+         * @return The subset of Unicode characters of the given type.
+         *
+         * @see Character#LOWERCASE_LETTER
+         * @see Character#UPPERCASE_LETTER
+         * @see Character#DECIMAL_DIGIT_NUMBER
+         * @see Character#SPACE_SEPARATOR
+         */
+        public static Filter forTypes(final byte... types) {
+            long mask = 0;
+            for (int i=0; i<types.length; i++) {
+                final int type = types[i];
+                if (type < 0 || type >= Long.SIZE) {
+                    throw new IllegalArgumentException(Errors.format(
+                            Errors.Keys.IllegalArgumentValue_2, "types[" + i + ']', type));
+                }
+                mask |= (1L << type);
+            }
+predefined: for (int i=0; ; i++) {
+                final Filter candidate;
+                switch (i) {
+                    case 0:  candidate = LETTERS_AND_DIGITS; break;
+                    case 1:  candidate = UNICODE_IDENTIFIER; break;
+                    default: break predefined;
+                }
+                if (mask == candidate.types) {
+                    return candidate;
+                }
+            }
+            return new Filter("Filter", mask);
+        }
+    }
+
+    /**
+     * Implementation of the {@link Filter#LETTERS_AND_DIGITS} constant.
+     */
+    private static final class LettersAndDigits extends Filter {
+        /**
+         * Creates the {@link Filter#LETTERS_AND_DIGITS} singleton instance.
+         */
+        LettersAndDigits() {
+            super("LETTERS_AND_DIGITS",
+                      (1L << Character.LOWERCASE_LETTER)
+                    | (1L << Character.UPPERCASE_LETTER)
+                    | (1L << Character.TITLECASE_LETTER)
+                    | (1L << Character.MODIFIER_LETTER)
+                    | (1L << Character.OTHER_LETTER)
+                    | (1L << Character.DECIMAL_DIGIT_NUMBER));
+        }
+
+        /**
+         * Returns {@code true} if this subset contains the given Unicode character.
+         */
+        @Override
+        public boolean contains(final int codePoint) {
+            return Character.isLetterOrDigit(codePoint);
+        }
+    }
+
+    /**
+     * Implementation of the {@link Filter#UNICODE_IDENTIFIER} constant.
+     */
+    private static final class UnicodeIdentifier extends Filter {
+        /**
+         * Creates the {@link Filter#LETTERS_AND_DIGITS} singleton instance.
+         */
+        UnicodeIdentifier() {
+            super("UNICODE_IDENTIFIER",
+                      (1L << Character.LOWERCASE_LETTER)
+                    | (1L << Character.UPPERCASE_LETTER)
+                    | (1L << Character.TITLECASE_LETTER)
+                    | (1L << Character.MODIFIER_LETTER)
+                    | (1L << Character.OTHER_LETTER)
+                    | (1L << Character.DECIMAL_DIGIT_NUMBER)
+                    | (1L << Character.LETTER_NUMBER)
+                    | (1L << Character.CONNECTOR_PUNCTUATION)
+                    | (1L << Character.NON_SPACING_MARK)
+                    | (1L << Character.COMBINING_SPACING_MARK));
+        }
+
+        /**
+         * Returns {@code true} if this subset contains the given Unicode character.
+         */
+        @Override
+        public boolean contains(final int codePoint) {
+            return Character.isUnicodeIdentifierPart(codePoint) &&
+                  !Character.isIdentifierIgnorable(codePoint);
+        }
+    }
 }

Modified: sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/type/CodeListFilter.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/type/CodeListFilter.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
--- sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/type/CodeListFilter.java
(original)
+++ sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/type/CodeListFilter.java
Fri Dec  7 08:11:59 2012
@@ -18,6 +18,7 @@ package org.apache.sis.util.type;
 
 import org.opengis.util.CodeList;
 import org.apache.sis.util.CharSequences;
+import org.apache.sis.util.Characters.Filter;
 
 
 /**
@@ -56,12 +57,12 @@ final class CodeListFilter implements Co
     }
 
     /**
-     * Returns {@code true} if the given code match the the name we are looking for.
+     * Returns {@code true} if the given code matches the name we are looking for.
      */
     @Override
     public boolean accept(final CodeList<?> code) {
         for (final String name : code.names()) {
-            if (CharSequences.equalsLettersAndDigits(name, codename)) {
+            if (CharSequences.equalsFiltered(name, codename, Filter.LETTERS_AND_DIGITS, true))
{
                 return true;
             }
         }

Modified: sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
--- sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
(original)
+++ sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
Fri Dec  7 08:11:59 2012
@@ -37,7 +37,10 @@ import static org.apache.sis.util.CharSe
  * @version 0.3
  * @module
  */
-@DependsOn(ArraysTest.class)
+@DependsOn({
+    ArraysTest.class,
+    CharactersTest.class
+})
 public final strictfp class CharSequencesTest extends TestCase {
     /**
      * Tests {@link CharSequences#spaces(int)}.
@@ -312,12 +315,12 @@ public final strictfp class CharSequence
     }
 
     /**
-     * Tests the {@link CharSequences#isJavaIdentifier(CharSequence)} method.
+     * Tests the {@link CharSequences#isUnicodeIdentifier(CharSequence)} method.
      */
     @Test
-    public void testIsJavaIdentifier() {
-        assertTrue ("A123", isJavaIdentifier("A123"));
-        assertFalse("123A", isJavaIdentifier("123A"));
+    public void testIsUnicodeIdentifier() {
+        assertTrue ("A123", isUnicodeIdentifier("A123"));
+        assertFalse("123A", isUnicodeIdentifier("123A"));
     }
 
     /**
@@ -341,13 +344,16 @@ public final strictfp class CharSequence
     }
 
     /**
-     * Tests the {@link CharSequences#equalsLettersAndDigits(CharSequence, CharSequence)}
method.
+     * Tests the {@link CharSequences#equalsFiltered(CharSequence, CharSequence, Characters.Filter,
boolean)} method.
      */
     @Test
-    public void testEqualsLettersAndDigits() {
-        assertTrue (equalsLettersAndDigits(" UTF-8 ", "utf8"));
-        assertTrue (equalsLettersAndDigits("UTF-8", " utf 8"));
-        assertFalse(equalsLettersAndDigits("UTF-8", " utf 16"));
+    public void testEqualsFiltered() {
+        assertTrue (equalsFiltered(" UTF-8 ", "utf8",  Characters.Filter.LETTERS_AND_DIGITS,
true));
+        assertFalse(equalsFiltered(" UTF-8 ", "utf8",  Characters.Filter.LETTERS_AND_DIGITS,
false));
+        assertTrue (equalsFiltered("UTF-8", " utf 8",  Characters.Filter.LETTERS_AND_DIGITS,
true));
+        assertFalse(equalsFiltered("UTF-8", " utf 16", Characters.Filter.LETTERS_AND_DIGITS,
true));
+        assertTrue (equalsFiltered("WGS84", "WGS_84",  Characters.Filter.LETTERS_AND_DIGITS,
true));
+        assertFalse(equalsFiltered("WGS84", "WGS_84",  Characters.Filter.UNICODE_IDENTIFIER,
true));
     }
 
     /**

Modified: sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharactersTest.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharactersTest.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
--- sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharactersTest.java (original)
+++ sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharactersTest.java Fri
Dec  7 08:11:59 2012
@@ -92,4 +92,52 @@ public final strictfp class CharactersTe
         assertEquals(c, toNormalScript(c));
         assertFalse(isSubScript(c));
     }
+
+    /**
+     * Tests the pre-defined {@link Characters.Filter} constants.
+     */
+    @Test
+    public void testPredefinedFilters() {
+        assertTrue (Filter.UNICODE_IDENTIFIER.contains('a'));
+        assertTrue (Filter.LETTERS_AND_DIGITS.contains('a'));
+        assertTrue (Filter.UNICODE_IDENTIFIER.contains('_'));
+        assertFalse(Filter.LETTERS_AND_DIGITS.contains('_'));
+        assertFalse(Filter.UNICODE_IDENTIFIER.contains(' '));
+        assertFalse(Filter.LETTERS_AND_DIGITS.contains(' '));
+    }
+
+    /**
+     * Tests the {@link Characters.Filter#forTypes(byte[])} method.
+     */
+    @Test
+    public void testFilterForTypes() {
+        final Filter filter = Filter.forTypes(Character.SPACE_SEPARATOR, Character.DECIMAL_DIGIT_NUMBER);
+        assertTrue (filter.contains('0'));
+        assertTrue (filter.contains(' '));
+        assertFalse(filter.contains('A'));
+    }
+
+    /**
+     * Scans the full {@code char} range in order to check for {@link Character.Filter} consistency.
+     */
+    @Test
+    public void scanCharacterRange() {
+        for (int c=Character.MIN_VALUE; c<=Character.MAX_VALUE; c++) {
+            final int type = Character.getType(c);
+predefined: for (int i=0; ; i++) {
+                final Characters.Filter filter;
+                switch (i) {
+                    case 0:  filter = Filter.UNICODE_IDENTIFIER; break;
+                    case 1:  filter = Filter.LETTERS_AND_DIGITS; break;
+                    default: break predefined;
+                }
+                final boolean cc = filter.contains(c);
+                final boolean ct = filter.containsType(type);
+                if (cc != ct) {
+                    fail(filter + ".contains('" + (char) c + "') == " + cc + " but "
+                            + filter + ".containsType(" + type + ") == " + ct);
+                }
+            }
+        }
+    }
 }



Mime
View raw message