sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject svn commit: r1797075 - in /sis/branches/JDK8/core/sis-utility/src: main/java/org/apache/sis/measure/UnitFormat.java main/java/org/apache/sis/util/CharSequences.java test/java/org/apache/sis/measure/UnitFormatTest.java
Date Wed, 31 May 2017 17:16:42 GMT
Author: desruisseaux
Date: Wed May 31 17:16:42 2017
New Revision: 1797075

URL: http://svn.apache.org/viewvc?rev=1797075&view=rev
Log:
Allow parsing of unit symbol containing exponentiation operator, as in "m*s^1".
UnitFormat already supported implicit exponentiation as in "m*s-1", but some formats add an
explicit ^ or ** operator.

Modified:
    sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
    sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
    sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java

Modified: sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java?rev=1797075&r1=1797074&r2=1797075&view=diff
==============================================================================
--- sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
[UTF-8] (original)
+++ sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
[UTF-8] Wed May 31 17:16:42 2017
@@ -462,7 +462,6 @@ public class UnitFormat extends Format i
      *
      * @param  uom  the unit symbol, without leading or trailing spaces.
      */
-    @SuppressWarnings("fallthrough")
     private Unit<?> fromName(String uom) {
         /*
          * Before to search in resource bundles, check for degrees units. The "deg" unit
can be both angular
@@ -808,14 +807,32 @@ public class UnitFormat extends Format i
     }
 
     /**
-     * Returns {@code true} if the {@code '*'} character at the given index is surrounded
by digits
-     * or a sign on its right side. For example this method returns {@code true} for "10*-6",
which
-     * means 1E-6 in UCUM syntax. This check is used for heuristic rules at parsing time.
-     */
-    private static boolean isExponentOperator(final CharSequence symbols, int i, final int
length) {
-        char c;
-        return (i != 0) && isDigit(symbols.charAt(i-1)) &&
-               (++i < length) && (isDigit(c = symbols.charAt(i)) || isSign(c));
+     * Returns {@code 0} or {@code 1} if the {@code '*'} character at the given index stands
for exponentiation
+     * instead than multiplication, or a negative value if the character stands for multiplication.
This check
+     * is used for heuristic rules at parsing time. Current implementation applies the following
rules:
+     *
+     * <ul>
+     *   <li>The operation is presumed an exponentiation if the '*' symbol is doubled,
as in {@code "m**s-1"}.</li>
+     *   <li>The operation is presumed an exponentiation if it is surrounded by digits
or a sign on its right side.
+     *       Example: {@code "10*-6"}, which means 1E-6 in UCUM syntax.</li>
+     *   <li>All other cases are currently presumed multiplication.
+     *       Example: {@code "m*s"}.</li>
+     * </ul>
+     *
+     * @return -1 for parsing as a multiplication, or a positive value for exponentiation.
+     *         If positive, this is the number of characters in the exponent symbol minus
1.
+     */
+    private static int exponentOperator(final CharSequence symbols, int i, final int length)
{
+        if (i >= 0 && ++i < length) {
+            final char c = symbols.charAt(i);
+            if (c == Style.EXPONENT_OR_MULTIPLY) {
+                return 1;                               // "**" operator: need to skip one
character after '*'.
+            }
+            if ((isDigit(c) || isSign(c)) && isDigit(symbols.charAt(i-2))) {
+                return 0;                               // "*" operator surrounded by digits:
no character to skip.
+            }
+        }
+        return -1;
     }
 
     /**
@@ -1012,10 +1029,12 @@ scan:   for (int n; i < end; i += n) {
                  * a unit symbol.
                  */
                 case Style.EXPONENT_OR_MULTIPLY: {
-                    if (!isExponentOperator(symbols, i, end)) {
+                    final int w = exponentOperator(symbols, i, end);
+                    if (w < 0) {
                         next = MULTIPLY;
                         break;
                     }
+                    i += w;
                     // else fall through.
                 }
                 case Style.EXPONENT: {
@@ -1065,7 +1084,7 @@ scan:   for (int n; i < end; i += n) {
              * the above 'switch' statement all cases that end with 'break', not 'break scan'
or 'continue').
              */
             if (operation != IMPLICIT) {
-                unit = apply(operation, unit, parseSymbol(symbols, start, i));
+                unit = apply(operation, unit, parseTerm(symbols, start, i));
             }
             hasSpaces = false;
             operation = next;
@@ -1111,7 +1130,7 @@ search:     while ((i = CharSequences.sk
             }
         }
         if (component == null) {
-            component = parseSymbol(symbols, start, i);
+            component = parseTerm(symbols, start, i);
         }
         unit = apply(operation, unit, component);
         position.setIndex(endOfURI >= 0 ? endOfURI : i);
@@ -1151,7 +1170,8 @@ search:     while ((i = CharSequences.sk
      * @return the parsed unit symbol (never {@code null}).
      * @throws ParserException if a problem occurred while parsing the given symbols.
      */
-    private Unit<?> parseSymbol(final CharSequence symbols, final int lower, final
int upper) throws ParserException {
+    @SuppressWarnings("fallthrough")
+    private Unit<?> parseTerm(final CharSequence symbols, final int lower, final int
upper) throws ParserException {
         final String uom = CharSequences.trimWhitespaces(symbols, lower, upper).toString();
         /*
          * Check for labels explicitly given by users. Those labels have precedence over
the Apache SIS hard-coded
@@ -1183,11 +1203,11 @@ search:     while ((i = CharSequences.sk
                                 final int next = CharSequences.skipLeadingWhitespaces(uom,
s, length);
                                 if (next < length && AbstractUnit.isSymbolChar(uom.codePointAt(next)))
{
                                     multiplier = Double.parseDouble(uom.substring(0, s));
-                                    return parseSymbol(uom, s, length).multiply(multiplier);
+                                    return parseTerm(uom, s, length).multiply(multiplier);
                                 }
                             }
-                            s = uom.lastIndexOf(Style.EXPONENT_OR_MULTIPLY);
-                            if (s >= 0) {
+                            s = uom.lastIndexOf(Style.EXPONENT_OR_MULTIPLY);      // Check
standard UCUM symbol first.
+                            if (s >= 0 || (s = uom.lastIndexOf(Style.EXPONENT)) >=
0) {
                                 final int base = Integer.parseInt(uom.substring(0, s));
                                 final int exp  = Integer.parseInt(uom.substring(s+1));
                                 multiplier = Math.pow(base, exp);
@@ -1235,7 +1255,24 @@ search:     while ((i = CharSequences.sk
                         } while (i != 0);
                     }
                     if (canApply) {
-                        unit = getPrefixed(CharSequences.trimWhitespaces(uom, 0, i).toString());
+                        /*
+                         * At this point we have parsed the exponent. Before to parse the
raw unit symbol,
+                         * skip the exponent symbol (^, * or **) if any.
+                         */
+                        i = CharSequences.skipTrailingWhitespaces(uom, 0, i);
+                        if (i != 0) {
+                            switch (uom.charAt(i-1)) {
+                                case Style.EXPONENT_OR_MULTIPLY: {
+                                    if (i != 1 && uom.charAt(i-2) == Style.EXPONENT_OR_MULTIPLY)
i--;
+                                    // Fallthrough for skipping the next character and whitespaces.
+                                }
+                                case Style.EXPONENT: {
+                                    i = CharSequences.skipTrailingWhitespaces(uom, 0, i -
1);
+                                    break;
+                                }
+                            }
+                        }
+                        unit = getPrefixed(uom.substring(CharSequences.skipLeadingWhitespaces(uom,
0, i), i));
                         if (unit != null) {
                             return unit.pow(power);
                         }

Modified: sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java?rev=1797075&r1=1797074&r2=1797075&view=diff
==============================================================================
--- sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
[UTF-8] (original)
+++ sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
[UTF-8] Wed May 31 17:16:42 2017
@@ -1006,7 +1006,7 @@ search:     for (; fromIndex <= toIndex;
         if (text != null) {
             lower = skipLeadingWhitespaces (text, lower, upper);
             upper = skipTrailingWhitespaces(text, lower, upper);
-            if (lower != 0 || upper != length) { // Safety in case subSequence doesn't make
the check.
+            if (lower != 0 || upper != length) {                  // Safety in case subSequence
doesn't make the check.
                 text = text.subSequence(lower, upper);
             }
         }

Modified: sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java?rev=1797075&r1=1797074&r2=1797075&view=diff
==============================================================================
--- sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
[UTF-8] (original)
+++ sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
[UTF-8] Wed May 31 17:16:42 2017
@@ -415,10 +415,26 @@ public final strictfp class UnitFormatTe
         assertSame(Units.KILOMETRE,  f.parse( "1000.0*m"));
         ConventionalUnitTest.verify(Units.METRE, f.parse("10*-6⋅m"),   "µm", 1E-6);
         ConventionalUnitTest.verify(Units.METRE, f.parse("10*-6.m"),   "µm", 1E-6);
+        ConventionalUnitTest.verify(Units.METRE, f.parse("10^-3.m"),   "mm", 1E-3);
         ConventionalUnitTest.verify(Units.METRE, f.parse( "100 feet"), null, 30.48);
     }
 
     /**
+     * Tests parsing of symbols containing an explicit exponentiation operation.
+     * Usually the exponentiation is implicit, as in {@code "m*s-1"}.
+     * However some formats write it explicitely, as in {@code "m*s^-1"}.
+     */
+    @Test
+    @DependsOnMethod("testParseMultiplier")
+    public void testParseExponentiation() {
+        final UnitFormat f = new UnitFormat(Locale.UK);
+        assertSame(Units.HERTZ,             f.parse("s^-1"));
+        assertSame(Units.HERTZ,             f.parse("s**-1"));
+        assertSame(Units.METRES_PER_SECOND, f.parse("m*s^-1"));
+        assertSame(Units.METRES_PER_SECOND, f.parse("m*s**-1"));
+    }
+
+    /**
      * Tests parsing expressions containing parenthesis.
      */
     @Test



Mime
View raw message