sis-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From desruisse...@apache.org
Subject svn commit: r1728885 - in /sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis: referencing/factory/sql/EPSGDataFormatter.java referencing/factory/sql/EPSGDataFormatterTest.java test/suite/ReferencingTestSuite.java
Date Sat, 06 Feb 2016 21:22:04 GMT
Author: desruisseaux
Date: Sat Feb  6 21:22:04 2016
New Revision: 1728885

URL: http://svn.apache.org/viewvc?rev=1728885&view=rev
Log:
Initial port of the code reformating the EPSG "Data.sql" table in a more compact and efficient
way.

Added:
    sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatter.java
  (with props)
    sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatterTest.java
  (with props)
Modified:
    sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/test/suite/ReferencingTestSuite.java

Added: sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatter.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatter.java?rev=1728885&view=auto
==============================================================================
--- sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatter.java
(added)
+++ sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatter.java
[UTF-8] Sat Feb  6 21:22:04 2016
@@ -0,0 +1,409 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sis.referencing.factory.sql;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.Writer;
+import java.io.BufferedWriter;
+import java.io.OutputStreamWriter;
+import java.io.InputStream;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.SQLException;
+import javax.sql.DataSource;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.apache.sis.util.CharSequences;
+import org.apache.sis.internal.metadata.sql.ScriptRunner;
+import org.apache.sis.internal.metadata.sql.TestDatabase;
+
+
+/**
+ * Rewrites the {@code INSERT TO ...} statements in a SQL script in a more compact form.
+ * This class is used only for updating the SQL scripts used by Apache SIS for the EPSG
+ * dataset when a newer release of the EPSG dataset is available. Steps to follow:
+ *
+ * <ol class="verbose">
+ *   <li><p>Download the latest SQL scripts for PostgreSQL from <a href="http://www.epsg.org">http://www.epsg.org</a>.</p></li>
+ *   <li><p>Unzip in the directory of your choice (replace "8.8" by the appropriate
version number in the ZIP filename),
+ *          and remember the path to that directory:</p>
+ *
+ *          {@preformat text
+ *            unzip epsg-v8_8sql-PostgreSQL.zip
+ *            cd epsg-v8_8sql-PostgreSQL
+ *            export EPSG_SCRIPTS=$PWD
+ *          }
+ *   </li>
+ *
+ *   <li><p>Move to the directory which contains the Apache SIS scripts:</p>
+ *
+ *         {@preformat text
+ *           cd <SIS_HOME>/core/sis-referencing/src/main/resources/org/apache/sis/referencing/factory/sql/
+ *         }
+ *   </li>
+ *
+ *   <li><p>Overwrite {@code Tables.sql} and {@code FKeys.sql} with the new SQL
scripts
+ *          (replace "8.8" by the appropriate version number in the commands show below).
+ *          Do not overwrite {@code Data.sql} and {@code Indexes.sql}:</p>
+ *
+ *          {@preformat text
+ *            cp $EPSG_SCRIPTS/EPSG_v8_8.mdb_Tables_PostgreSQL.sql Tables.sql
+ *            cp $EPSG_SCRIPTS/EPSG_v8_8.mdb_FKeys_PostgreSQL.sql  FKeys.sql
+ *          }
+ *   </li>
+ *
+ *   <li><p>Open the {@code Tables.sql} file for edition:</p>
+ *     <ul>
+ *       <li>In the statement creating the {@code epsg_datum} table,
+ *           change the type of the {@code realization_epoch} column to {@code SMALLINT}.</li>
+ *       <li>In the statement creating the {@code coordinateaxis} table,
+ *           add the {@code NOT NULL} constraint to the {@code coord_axis_code} column.</li>
+ *       <li>In the statement creating the {@code change} table,
+ *           remove the {@code UNIQUE} constraint on the {@code change_id} column
+ *           and add a {@code CONSTRAINT pk_change PRIMARY KEY (change_id)} line instead.</li>
+ *       <li>Suppress trailing spaces, format the statement as in the previous version
+ *           for reducing the amount of difference and save.</li>
+ *     </ul>
+ *     <p>Usually this results in no change at all compared to the previous script
(ignoring white spaces),
+ *        in which case the maintainer can just revert the changes in order to preserve the
formatting.</p>
+ *   </li>
+ *
+ *   <li><p>Open the {@code FKeys.sql} file for edition:</p>
+ *     <ul>
+ *       <li>At the end of all {@code ALTER TABLE} statement,
+ *           append {@code ON UPDATE RESTRICT ON DELETE RESTRICT}.</li>
+ *       <li>suppress trailing spaces and save.</li>
+ *     </ul>
+ *     <p>In most cases this results in unmodified {@code FKeys.sql} file compared
to the previous version.</p>
+ *   </li>
+ *
+ *   <li><p>Run the {@code main} method of this class.</p></li>
+ *
+ *   <li><p>Upgrade the {@code FACTORY.VERSION} value defined in the
+ *          {@code org.apache.sis.referencing.report.CoordinateReferenceSystems} class.</p></li>
+ * </ol>
+ *
+ * @author  Martin Desruisseaux (IRD, Geomatys)
+ * @since   0.7
+ * @version 0.7
+ * @module
+ */
+public final class EPSGDataFormatter extends ScriptRunner {
+    /**
+     * Compacts the {@code Data.sql} file provided by EPSG. This method expects two arguments.
+     * The first argument is the file of the SQL script to read, which must exist.
+     * The second argument is the file where to write the compacted SQL script,
+     * which will be overwritten without warning if it exists.
+     * The values of those arguments are typically:
+     *
+     * <ol>
+     *   <li>{@code EPSG_vX.mdb_Data_MySQL.sql}</li>
+     *   <li>{@code core/sis-referencing/src/main/resources/org/apache/sis/referencing/factory/sql/Data.sql}</li>
+     * </ol>
+     *
+     * @param  arguments The source files and the destination file.
+     * @throws Exception if an error occurred while reading of writing the file.
+     */
+    @SuppressWarnings("UseOfSystemOutOrSystemErr")
+    public static void main(String[] arguments) throws Exception {
+        if (arguments.length != 2) {
+            System.err.println("Expected two arguments: source SQL file and target SQL file.");
+            return;
+        }
+        final DataSource ds = TestDatabase.create("dummy");
+        try (Connection c = ds.getConnection()) {
+            final EPSGDataFormatter f = new EPSGDataFormatter(c);
+            f.run(new File(arguments[0]), new File(arguments[1]));
+        } finally {
+            TestDatabase.drop(ds);
+        }
+    }
+
+    /**
+     * The output stream, or {@code null} if closed or not yet created.
+     */
+    private Writer out;
+
+    /**
+     * The {@code INSERT INTO "Table"} statement currently executing.
+     * Used in order to detect when the script start inserting values in a different table.
+     */
+    private String insertStatement;
+
+    /**
+     * {@code true} if insertions are currently done in the datum table.
+     */
+    private boolean insertDatum;
+
+    /**
+     * Creates a new instance.
+     *
+     * @param  c A dummy connection. Will be used for fetching metadata.
+     * @throws SQLException if an error occurred while fetching metadata.
+     */
+    private EPSGDataFormatter(final Connection c) throws SQLException {
+        super(c, EPSGInstaller.ENCODING, Integer.MAX_VALUE);
+    }
+
+    /**
+     * Returns {@code true} if the given line should be omitted from the script.
+     *
+     * @param  line The line, without trailing {@code ';'}.
+     * @return {@code true} if the line should be omitted.
+     */
+    private static boolean omit(final String line) {
+        // We omit the following line because we changed the type from VARCHAR to SMALLINT.
+        return line.startsWith("UPDATE epsg_datum SET realization_epoch = replace(realization_epoch,
CHR(182), CHR(10))");
+    }
+
+    /**
+     * Compacts the given file.
+     *
+     * @param  inputFile    The input file where to read the SQL statements to compact.
+     * @param  outputFile   The output file where to write the compacted SQL statements.
+     * @param  encoding     The character encoding for both input and output files.
+     * @throws IOException  if an I/O operation failed.
+     * @throws SQLException should never happen.
+     */
+    private void run(final File inputFile, final File outputFile) throws SQLException, IOException
{
+        if (inputFile.equals(outputFile)) {
+            throw new IllegalArgumentException("Input and output files are the same.");
+        }
+        out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile),
EPSGInstaller.ENCODING));
+        try (final InputStream in = new FileInputStream(inputFile)) {
+            run(inputFile.getName(), in);
+        } finally {
+            out.close();
+            out = null;
+        }
+    }
+
+    /**
+     * "Executes" the given SQL statement. In the context of this {@code EPSGDataWriter}
class,
+     * executing a SQL statement means compacting it and writing it to the output file.
+     *
+     * @param  sql The SQL statement to compact.
+     * @return The number of rows added.
+     * @throws IOException if an I/O operation failed.
+     */
+    @Override
+    protected int execute(final StringBuilder sql) throws IOException {
+        removeLF(sql);
+        String line = CharSequences.trimWhitespaces(sql).toString();
+        if (insertStatement != null) {
+            if (line.startsWith(insertStatement)) {
+                // The previous instruction was already an INSERT INTO the same table.
+                line = CharSequences.trimWhitespaces(line, insertStatement.length(), line.length()).toString();
+                line = removeUselessExponents(line);
+                if (insertDatum) {
+                    line = removeRealizationEpochQuotes(line);
+                }
+                out.append(",\n"); // Really want Unix EOL, not the platform-specific one.
+                writeValues(line);
+                return 1;
+            }
+            // Previous instruction was the last INSERT INTO for a given table.
+            // We now have a new instruction. Append the pending cariage return.
+            out.append(";\n");
+        }
+        if (line.startsWith("INSERT INTO")) {
+            insertDatum = line.startsWith("INSERT INTO EPSG_DATUM VALUES");
+            int valuesStart = line.indexOf("VALUES", 11);
+            if (valuesStart >= 0) {
+                // We are beginning insertions in a new table.
+                valuesStart += 6; // Move to the end of "VALUES".
+                insertStatement = CharSequences.trimWhitespaces(line, 0, valuesStart).toString();
+                line = CharSequences.trimWhitespaces(line, insertStatement.length(), line.length()).toString();
+                line = removeUselessExponents(line);
+                if (insertDatum) {
+                    line = removeRealizationEpochQuotes(line);
+                }
+                out.append(insertStatement);
+                out.append('\n');
+                writeValues(line);
+                return 1;
+            }
+        }
+        insertStatement = null;
+        if (!omit(line)) {
+            out.append(line);
+            out.append(";\n");
+        }
+        return 0;
+    }
+
+    /**
+     * Writes the values after an {@code INSERT INTO "Table" VALUES} expression.
+     * This method tries to remove extra spaces before ( and after ) for producing a more
compact file.
+     */
+    private void writeValues(final String values) throws IOException {
+        if (values.startsWith("(") && values.endsWith(")")) {
+            out.append('(');
+            out.append(CharSequences.trimWhitespaces(values, 1, values.length() - 1));
+            out.append(')');
+        } else {
+            out.append(values);
+        }
+    }
+
+    /**
+     * For private usage by the following method only.
+     */
+    private static final Pattern uselessExponentPattern =
+            Pattern.compile("([\\(\\,]\\-?\\d+\\.\\d+)E[\\+\\-]?0+([\\,\\)])");
+
+    /**
+     * Removes the useless "E0" exponents after floating point numbers.
+     */
+    private static String removeUselessExponents(String line) {
+        StringBuilder cleaned = null;
+        final Matcher matcher = uselessExponentPattern.matcher(line);
+        while (true) {
+            int lastIndex = 0;
+            while (matcher.find()) {
+                // Make sure this is not a quoted text.
+                boolean quoted = false;
+                for (int i=matcher.start(); (i=line.lastIndexOf('\'', i-1)) >= 0;) {
+                    if (i == 0 || line.charAt(i-1) != '\\') {
+                        quoted = !quoted;
+                    }
+                }
+                if (!quoted) {
+                    // Found a number outside quotes. Replace.
+                    if (cleaned == null) {
+                        cleaned = new StringBuilder();
+                    }
+                    cleaned.append(line, lastIndex, matcher.end(1));
+                    lastIndex = matcher.end();
+                    cleaned.append(line, matcher.start(2), lastIndex);
+                }
+            }
+            if (lastIndex == 0) {
+                return line;
+            }
+            cleaned.append(line, lastIndex, line.length());
+            line = cleaned.toString();
+            matcher.reset(line);
+            cleaned.setLength(0);
+        }
+    }
+
+    /**
+     * Removes the quotes in REALIZATION_EPOCH column (i.e. change the type from TEXT to
INTEGER).
+     * This is the 5th column.
+     */
+    private static String removeRealizationEpochQuotes(final String line) {
+        int index = getIndexForColumn(line, 5);
+        if (line.charAt(index) != '\'') {
+            return line;
+        }
+        final StringBuilder cleaned = new StringBuilder(line.substring(0, index));
+        if (line.charAt(++index) == '\'') {
+            cleaned.append("Null");
+        } else do {
+            cleaned.append(line.charAt(index));
+        }
+        while (line.charAt(++index) != '\'');
+        cleaned.append(line, index+1, line.length());
+        return cleaned.toString();
+    }
+
+    /**
+     * Returns the start index for the given column in the specified {@code VALUES} string.
+     * Column numbers start at 1.
+     */
+    private static int getIndexForColumn(final String line, int column) {
+        if (--column == 0) {
+            return 0;
+        }
+        boolean quote = false;
+        final int length = line.length();
+        for (int index=0; index<length; index++) {
+            switch (line.charAt(index)) {
+                case '\'': {
+                    if (index == 0 || line.charAt(index-1) != '\\') {
+                        quote = !quote;
+                    }
+                    break;
+                }
+                case ',': {
+                    if (!quote && --column==0) {
+                        return CharSequences.skipLeadingWhitespaces(line, index+1, length);
+                    }
+                    break;
+                }
+            }
+        }
+        return length;
+    }
+
+    /**
+     * Reformats a multi-line text as a single line text. For each occurrence of line feed
+     * (the {@code '\n'} character) found in the given buffer, this method performs the following
steps:
+     *
+     * <ol>
+     *   <li>Remove the line feed character and the {@linkplain Character#isWhitespace(char)
white spaces} around them.</li>
+     *   <li>If the last character before the line feed and the first character after
the line feed are both
+     *       {@linkplain Character#isLetterOrDigit(char) letter or digit}, then a space is
inserted between them.
+     *       Otherwise they will be no space.</li>
+     * </ol>
+     *
+     * This method is provided for {@link #execute(StringBuilder)} implementations, in order
to "compress"
+     * a multi-lines SQL statement on a single line before further processing by the caller.
+     *
+     * <p><b>Note:</b> current version does not use codepoint API
+     * on the assumption that it is not needed for EPSG's SQL files.</p>
+     *
+     * @param buffer The string in which to perform the removal.
+     */
+    static void removeLF(final StringBuilder buffer) {
+        int i = buffer.length();
+        while ((i = buffer.lastIndexOf("\n", i)) >= 0) {
+            final int length = buffer.length();
+            int nld = 0;
+            int upper = i;
+            while (++upper < length) {
+                final char c = buffer.charAt(upper);
+                if (!Character.isWhitespace(c)) {
+                    if (Character.isLetterOrDigit(c)) {
+                        nld++;
+                    }
+                    break;
+                }
+            }
+            while (i != 0) {
+                final char c = buffer.charAt(--i);
+                if (!Character.isWhitespace(c)) {
+                    if (Character.isLetterOrDigit(c)) {
+                        nld++;
+                    }
+                    i++;
+                    break;
+                }
+            }
+            if (nld == 2) {
+                upper--;
+            }
+            buffer.delete(i, upper);
+            if (nld == 2) {
+                buffer.setCharAt(i, ' ');
+            }
+        }
+    }
+}

Propchange: sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain;charset=UTF-8

Added: sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatterTest.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatterTest.java?rev=1728885&view=auto
==============================================================================
--- sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatterTest.java
(added)
+++ sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatterTest.java
[UTF-8] Sat Feb  6 21:22:04 2016
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sis.referencing.factory.sql;
+
+import org.apache.sis.test.TestCase;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+
+/**
+ * Tests {@link EPSGDataFormatter}.
+ *
+ * @author  Martin Desruisseaux (IRD, Geomatys)
+ * @since   0.7
+ * @version 0.7
+ * @module
+ */
+public final strictfp class EPSGDataFormatterTest extends TestCase {
+    /**
+     * Tests the {@link EPSGDataFormatter#removeLF(StringBuilder)} method.
+     */
+    @Test
+    public void testRemoveLF() {
+        final StringBuilder buffer = new StringBuilder(" \nOne,\nTwo, \n Three Four\nFive
\nSix \n");
+        EPSGDataFormatter.removeLF(buffer);
+        assertEquals("One,Two,Three Four Five Six", buffer.toString());
+    }
+}

Propchange: sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatterTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGDataFormatterTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain;charset=UTF-8

Modified: sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/test/suite/ReferencingTestSuite.java
URL: http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/test/suite/ReferencingTestSuite.java?rev=1728885&r1=1728884&r2=1728885&view=diff
==============================================================================
--- sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/test/suite/ReferencingTestSuite.java
[UTF-8] (original)
+++ sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/test/suite/ReferencingTestSuite.java
[UTF-8] Sat Feb  6 21:22:04 2016
@@ -202,6 +202,7 @@ import org.junit.BeforeClass;
     org.apache.sis.referencing.factory.MultiAuthoritiesFactoryTest.class,
     org.apache.sis.referencing.factory.sql.EPSGFactoryTest.class,
     org.apache.sis.referencing.factory.sql.EPSGInstallerTest.class,
+    org.apache.sis.referencing.factory.sql.EPSGDataFormatterTest.class,
     org.apache.sis.referencing.EPSGFactoryFallbackTest.class,
     org.apache.sis.referencing.AuthorityFactoriesTest.class,
     org.apache.sis.referencing.CRSTest.class,



Mime
View raw message