sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject svn commit: r1551738 - in /sis/branches/JDK7/core/sis-utility/src: main/java/org/apache/sis/util/Locales.java test/java/org/apache/sis/util/LocalesTest.java
Date Tue, 17 Dec 2013 22:28:25 GMT
Author: desruisseaux
Date: Tue Dec 17 22:28:25 2013
New Revision: 1551738

URL: http://svn.apache.org/r1551738
Log:
More efficient replacement of ISO3 codes by ISO2 ones (avoid linear scan on all Locales).

Modified:
    sis/branches/JDK7/core/sis-utility/src/main/java/org/apache/sis/util/Locales.java
    sis/branches/JDK7/core/sis-utility/src/test/java/org/apache/sis/util/LocalesTest.java

Modified: sis/branches/JDK7/core/sis-utility/src/main/java/org/apache/sis/util/Locales.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK7/core/sis-utility/src/main/java/org/apache/sis/util/Locales.java?rev=1551738&r1=1551737&r2=1551738&view=diff
==============================================================================
--- sis/branches/JDK7/core/sis-utility/src/main/java/org/apache/sis/util/Locales.java [UTF-8]
(original)
+++ sis/branches/JDK7/core/sis-utility/src/main/java/org/apache/sis/util/Locales.java [UTF-8]
Tue Dec 17 22:28:25 2013
@@ -20,8 +20,10 @@ import java.util.Map;
 import java.util.Set;
 import java.util.Arrays;
 import java.util.Locale;
+import java.util.TreeMap;
 import java.util.HashMap;
 import java.util.LinkedHashSet;
+import java.util.MissingResourceException;
 import java.lang.reflect.Field;
 import java.lang.reflect.Modifier;
 
@@ -84,6 +86,57 @@ public final class Locales extends Stati
     }
 
     /**
+     * Bit mask for differentiating language codes from country codes in the {@link #ISO2}
and {@link #ISO3} arrays.
+     */
+    private static final short LANGUAGE = 0, COUNTRY = Short.MIN_VALUE;
+
+    /**
+     * Mapping from 3-letters codes to 2-letters codes. We use {@code short} type instead
of {@link String}
+     * for compactness (conversions is done by {@link #toNumber(String, int)}) and for avoiding
references
+     * to {@code String} instances.
+     *
+     * {@note Oracle JDK8 implementation computes the 3-letters codes on-the-fly instead
of holding references
+     *        to pre-existing strings. If we were holding string references here, we would
prevent the garbage
+     *        collector to collect the strings for all languages and countries. This would
probably be a waste
+     *        of resources.}
+     */
+    private static final short[] ISO3, ISO2;
+    static {
+        final Short CONFLICT = (short) 0; // Sentinal value for conflicts (paranoiac safety).
+        final Map<Short,Short> map = new TreeMap<>();
+        for (final Locale locale : POOL.values()) {
+            short type = LANGUAGE; // 0 for language, or leftmost bit set for country.
+            do { // Executed exactly twice: once for language, than once for country.
+                final short alpha2 = toNumber((type == LANGUAGE) ? locale.getLanguage() :
locale.getCountry(), type);
+                if (alpha2 != 0) {
+                    final short alpha3;
+                    try {
+                        alpha3 = toNumber((type == LANGUAGE) ? locale.getISO3Language() :
locale.getISO3Country(), type);
+                    } catch (MissingResourceException e) {
+                        continue; // No 3-letters code to map for this locale.
+                    }
+                    if (alpha3 != 0 && alpha3 != alpha2) {
+                        final Short p = map.put(alpha3, alpha2);
+                        if (p != null && p.shortValue() != alpha2) {
+                            // We do not expect any conflict. But if it happen anyway, conservatively
+                            // remember that we should not perform any substitution for that
code.
+                            map.put(alpha3, CONFLICT);
+                        }
+                    }
+                }
+            } while ((type ^= COUNTRY) != LANGUAGE);
+        }
+        while (map.values().remove(CONFLICT)); // Remove all conflicts that we may have found.
+        ISO3 = new short[map.size()];
+        ISO2 = new short[map.size()];
+        int i = 0;
+        for (final Map.Entry<Short,Short> entry : map.entrySet()) {
+            ISO3[i]   = entry.getKey();
+            ISO2[i++] = entry.getValue();
+        }
+    }
+
+    /**
      * All locales available on the JavaVM.
      */
     public static final Locales ALL = new Locales();
@@ -129,12 +182,20 @@ public final class Locales extends Stati
         if (this == ALL) {
             return Locale.getAvailableLocales();
         }
-        final Locale[] languages = getAvailableLanguages();
-        Locale[] locales = Locale.getAvailableLocales();
+        Locale[] locales = getAvailableLanguages();
+        final String[] languages = new String[locales.length];
+        for (int i=0; i<languages.length; i++) {
+            languages[i] = locales[i].getLanguage();
+        }
         int count = 0;
-        for (final Locale locale : locales) {
-            if (containsLanguage(languages, locale)) {
-                locales[count++] = unique(locale);
+        locales = Locale.getAvailableLocales();
+filter: for (final Locale locale : locales) {
+            final String code = locale.getLanguage();
+            for (int i=0; i<languages.length; i++) {
+                if (code.equals(languages[i])) {
+                    locales[count++] = unique(locale);
+                    continue filter;
+                }
             }
         }
         locales = ArraysExt.resize(locales, count);
@@ -142,20 +203,6 @@ public final class Locales extends Stati
     }
 
     /**
-     * Returns {@code true} if the specified array of locales contains at least
-     * one element with the specified language.
-     */
-    private static boolean containsLanguage(final Locale[] locales, final Locale language)
{
-        final String code = language.getLanguage();
-        for (int i=0; i<locales.length; i++) {
-            if (code.equals(locales[i].getLanguage())) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    /**
      * Returns the list of {@linkplain #getAvailableLocales() available locales} formatted
      * as strings in the specified locale.
      *
@@ -240,7 +287,8 @@ public final class Locales extends Stati
     public static Locale parse(final String code, final int fromIndex) throws IllegalArgumentException
{
         ArgumentChecks.ensureNonNull("code", code);
         ArgumentChecks.ensurePositive("fromIndex", fromIndex);
-        final String language, country, variant;
+        boolean hasMore = false;
+        String language, country, variant;
         int ci = code.indexOf('_', fromIndex);
         if (ci < 0) {
             language = (String) trimWhitespaces(code, fromIndex, code.length());
@@ -255,44 +303,15 @@ public final class Locales extends Stati
             } else {
                 country = (String) trimWhitespaces(code, ci, vi);
                 variant = (String) trimWhitespaces(code, ++vi, code.length());
-                if (code.indexOf('_', vi) >= 0) {
-                    throw new IllegalArgumentException(Errors.format(Errors.Keys.IllegalLanguageCode_1,
-                            code.substring(fromIndex)));
-                }
+                hasMore = code.indexOf('_', vi) >= 0;
             }
         }
-        final boolean language3 = isThreeLetters(language);
-        final boolean country3  = isThreeLetters(country);
-        /*
-         * Perform a linear scan only if we need to compare some 3-letters ISO code.
-         * Otherwise (if every code are 2 letters), it will be faster to create a new
-         * locale and check for an existing instance in the hash map.
-         */
-        if (language3 || country3) {
-            String language2 = language;
-            String country2  = country;
-            for (Locale locale : Locale.getAvailableLocales()) {
-                String c = (language3) ? locale.getISO3Language() : locale.getLanguage();
-                if (language.equals(c)) {
-                    if (country2 == country) { // Really identity comparison.
-                        // Remember the 2-letters ISO code in an opportunist way.
-                        // If the 2-letters ISO code has been set for the country
-                        // as well, we will not change the language code because
-                        // it has already been set with the code associated with
-                        // the right country.
-                        language2 = locale.getLanguage();
-                    }
-                    c = (country3) ? locale.getISO3Country() : locale.getCountry();
-                    if (country.equals(c)) {
-                        country2 = locale.getCountry();
-                        if (variant.equals(locale.getVariant())) {
-                            return unique(locale);
-                        }
-                    }
-                }
-            }
-            return unique(new Locale(language2, country2, variant));
+        if (hasMore || language.length() > 3 || country.length() > 3) {
+            throw new IllegalArgumentException(Errors.format(Errors.Keys.IllegalLanguageCode_1,
+                    code.substring(fromIndex)));
         }
+        language = toISO2(language, LANGUAGE);
+        country  = toISO2(country,  COUNTRY);
         return unique(new Locale(language, country, variant));
     }
 
@@ -341,17 +360,57 @@ public final class Locales extends Stati
     }
 
     /**
-     * Returns {@code true} if the following code is 3 letters, or {@code false} if 2 letters.
+     * Converts a 3-letters ISO code to a 2-letters one. If the given code is not recognized,
+     * then this method returns {@code code} unmodified.
+     *
+     * @param  code The 3-letters code.
+     * @param  type Either {@link #LANGUAGE} or {@link #COUNTRY}.
+     * @return The 2-letters code, or {@code null} if none.
+     */
+    private static String toISO2(final String code, final short type) {
+        final short alpha3 = toNumber(code, type);
+        if (alpha3 != 0) {
+            int alpha2 = Arrays.binarySearch(ISO3, alpha3);
+            if (alpha2 >= 0) {
+                alpha2 = ISO2[alpha2];
+                final int base = (alpha2 & COUNTRY) != 0 ? ('A' - 1) : ('a' - 1);
+                alpha2 &= ~COUNTRY;
+                int i = 0;
+                final char[] c = new char[3]; // 2 should be enough, but our impl. actually
allows 3-letters codes too.
+                do c[i++] = (char) ((alpha2 & 0x1F) + base);
+                while ((alpha2 >>>= 5) != 0);
+                return String.valueOf(c, 0, i);
+            }
+        }
+        return code;
+    }
+
+    /**
+     * Converts the given 1-, 2- or 3- letters alpha code to a 15 bits numbers. Each letter
uses 5 bits.
+     * If an invalid character is found, then this method returns 0.
+     *
+     * <p>This method does not use the sign bit. Callers can use it for differentiating
language codes
+     * from country codes, using the {@link #LANGUAGE} or {@link #COUNTRY} bit masks.</p>
+     *
+     * @param  code The 1-, 2- or 3- letters alpha code to convert.
+     * @param  n Initial bit pattern, either {@link #LANGUAGE} or {@link #COUNTRY}.
+     * @return A number for the given code, or 0 if a non alpha characters were found.
      */
-    private static boolean isThreeLetters(final String code) {
-        switch (code.length()) {
-            case 0: // fall through
-            case 2: return false;
-            case 3: return true;
-            default: {
-                throw new IllegalArgumentException(Errors.format(Errors.Keys.IllegalLanguageCode_1,
code));
+    private static short toNumber(final String code, short n) {
+        final int length = code.length();
+        if (length >= 1 && length <= 3) {
+            int shift = 0;
+            for (int i=0; i<length; i++) {
+                int c = code.charAt(i);
+                if (c < 'A' || (c -= (c >= 'a') ? ('a' - 1) : ('A' - 1)) > 26) {
+                    return 0;
+                }
+                n |= c << shift;
+                shift += 5;
             }
+            return n;
         }
+        return 0;
     }
 
     /**

Modified: sis/branches/JDK7/core/sis-utility/src/test/java/org/apache/sis/util/LocalesTest.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK7/core/sis-utility/src/test/java/org/apache/sis/util/LocalesTest.java?rev=1551738&r1=1551737&r2=1551738&view=diff
==============================================================================
--- sis/branches/JDK7/core/sis-utility/src/test/java/org/apache/sis/util/LocalesTest.java
[UTF-8] (original)
+++ sis/branches/JDK7/core/sis-utility/src/test/java/org/apache/sis/util/LocalesTest.java
[UTF-8] Tue Dec 17 22:28:25 2013
@@ -72,7 +72,7 @@ public final strictfp class LocalesTest 
      */
     @Test
     @DependsOnMethod("testUnique")
-    public void testParseLanguage() {
+    public void testParse() {
         assertSame(Locale.FRENCH,        Locales.parse("fr"));
         assertSame(Locale.FRENCH,        Locales.parse("fra"));
         assertSame(Locale.CANADA_FRENCH, Locales.parse("fr_CA"));
@@ -89,11 +89,24 @@ public final strictfp class LocalesTest 
     }
 
     /**
+     * Tests that {@link Locales#parse(String)} throw an exception if given an invalid argument.
+     */
+    @Test
+    public void testParseInvalid() {
+        try {
+            Locales.parse("orange_APPLE");
+            fail("Shall not parse invalid locale.");
+        } catch (IllegalArgumentException e) {
+            assertTrue(e.getMessage().contains("orange_APPLE"));
+        }
+    }
+
+    /**
      * Tests the {@link Locales#parseSuffix(String, String)} method.
      */
     @Test
     @Deprecated
-    @DependsOnMethod("testParseLanguage")
+    @DependsOnMethod("testParse")
     public void testParseSuffix() {
         assertSame(null,           Locales.parseSuffix("remarks", "remark"));
         assertSame(Locale.ROOT,    Locales.parseSuffix("remarks", "remarks"));



Mime
View raw message