view backend/src/main/java/org/dive4elements/river/importer/Importer.java @ 9650:a2a42a6bac6b

Importer (s/u-info) extensions: outer try/catch for parse and log of line no, catching parsing exception if not enough value fields, parsing error and warning log messages with line number, detecting and rejecting duplicate data series, better differentiation between error and warning log messages
author mschaefer
date Mon, 23 Mar 2020 14:57:03 +0100
parents a44200bf0547
children
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.dive4elements.artifacts.common.utils.XMLUtils;
import org.dive4elements.river.backend.utils.StringUtil;
import org.dive4elements.river.importer.parsers.AnnotationClassifier;
import org.dive4elements.river.importer.parsers.BundesWasserStrassenParser;
import org.dive4elements.river.importer.parsers.InfoGewParser;
import org.hibernate.HibernateException;
import org.hibernate.Transaction;
import org.slf4j.bridge.SLF4JBridgeHandler;
import org.w3c.dom.Document;

/** Data Importer. Further processing happens per-river. */
public class Importer
{
    static final String VERSION = "3.3.0";

    /** Private log. */
    private static final Logger log = Logger.getLogger(Importer.class);

    private static final String BWASTR_ID_CSV_FILE = "BWASTR_ID.csv";

    protected List<ImportRiver> rivers;

    public Importer() {
    }

    public Importer(final List<ImportRiver> rivers) {
        this.rivers = rivers;
    }

    public List<ImportRiver> getRivers() {
        return this.rivers;
    }

    public void setRivers(final List<ImportRiver> rivers) {
        this.rivers = rivers;
    }

    /** Write rivers and their dependencies/dependants to db. */
    public void writeRivers() {
        log.info("write rivers started");

        for (final ImportRiver river: this.rivers) {
            log.debug("writing river '" + river.getName() + "'");
            river.storeDependencies();
            ImporterSession.getInstance().getDatabaseSession().flush();
        }

        log.info("write rivers finished");
    }

    public void writeToDatabase() {

        Transaction tx = null;

        try {
            tx = ImporterSession.getInstance().getDatabaseSession().beginTransaction();

            try {
                writeRivers();
            }
            catch (final HibernateException he) {
                Throwable t = he.getCause();
                while (t instanceof SQLException) {
                    final SQLException sqle = (SQLException) t;
                    log.error("SQL exeception chain:", sqle);
                    t = sqle.getNextException();
                }
                throw he;
            }

            tx.commit();
        }
        catch (final RuntimeException re) {
            if (tx != null) {
                tx.rollback();
            }
            throw re;
        }
    }

    public static AnnotationClassifier getAnnotationClassifier() {
        final String annotationTypes = Config.INSTANCE.getAnnotationTypes();

        if (annotationTypes == null) {
            log.info("no annotation types file configured.");
            return null;
        }

        final File file = new File(annotationTypes);

        log.info("use annotation types file '" + file + "'");

        if (!(file.isFile() && file.canRead())) {
            log.warn("annotation type file '" + file + "' is not readable.");
            return null;
        }

        final Document rules = XMLUtils.parseDocument(file, false, null);

        if (rules == null) {
            log.warn("cannot parse annotation types file.");
            return null;
        }

        return new AnnotationClassifier(rules);
    }


    /** Starting point for importing river data. */
    public static void main(final String [] args) {

        configureLogging();

        log.info("IMPORTER version " + VERSION);
        log.info("START parsing rivers...");

        final InfoGewParser infoGewParser = new InfoGewParser(getAnnotationClassifier());

        File bwastrFile = null;

        // Main parsing loop for all river gew file paths in args
        // FIXME: Multiple rivers lead to reparsing the already parsed rivers again in InfoGewParser.parse...
        for (final String gew : args) {
            log.info("Parsing info gew file: " + gew);
            final File gewFile = new File(gew);
            if (bwastrFile == null) {
                bwastrFile = new File(gewFile.getParentFile(), BWASTR_ID_CSV_FILE);
            }
            try {
                infoGewParser.parse(gewFile);
            }
            catch (final Exception ioe) {
                log.error("error parsing gew: " + gew, ioe);
                System.exit(1);
            }
        }

        // Parse a single river gew file specified in the flys.backend.importer.infogew.file property
        // (seems to be an alternative to the args way)
        final String gew = Config.INSTANCE.getInfoGewFile();
        if (gew != null && gew.length() > 0) {
            log.info("Parsing info gew file: " + gew);
            final File gewFile = new File(gew);
            if (bwastrFile == null) {
                bwastrFile = new File(gewFile.getParentFile(), BWASTR_ID_CSV_FILE);
            }
            try {
                infoGewParser.parse(gewFile);
            }
            catch (final Exception ioe) {
                log.error("error parsing gew: " + gew, ioe);
                System.exit(1);
            }
        }

        // Look for official numbers.
        final BundesWasserStrassenParser bwastrIdParser = new BundesWasserStrassenParser();

        // Read bwastFile (river-dir + BWASTR_ID_CSV_FILE).
        if (!Config.INSTANCE.skipBWASTR()) {
            try{
                bwastrIdParser.parse(bwastrFile);
                final HashMap<String,Long> map = bwastrIdParser.getMap();

                // Now link rivers with official numbers.
                for(final ImportRiver river: infoGewParser.getRivers()) {
                    for(final Map.Entry<String, Long> entry: map.entrySet()) {
                        if (StringUtil.containsIgnoreCase(river.getName(), entry.getKey())) {
                            river.setOfficialNumber(entry.getValue());
                            log.debug(river.getName() + " is mapped to bwastr " + entry.getValue());
                        }
                    }
                }
            }
            catch (final IOException ioe) {
                log.warn("BWASTR-file could not be loaded: " + ioe.getMessage());
            }
        }
        else {
            log.debug("Skip reading BWASTR_ID.csv");
        }

        // Write all parsed objects to the database
        if (!Config.INSTANCE.dryRun()) {
            new Importer(infoGewParser.getRivers()).writeToDatabase();
        }
        else {
            log.info("Dry run, not writing to database.");
        }
    }

    /**
     * Tries to load the Log4j configuration from the property 'log4j.configuration'.
     */
    private static final void configureLogging() {
        final String configPath = System.getProperty("log4j.configuration");
        try {
            final File propFile = new File(configPath);
            if (propFile.isFile() && propFile.canRead()) {
                try {
                    PropertyConfigurator.configure(propFile.toURI().toURL());
                    SLF4JBridgeHandler.install();
                }
                catch (final MalformedURLException mue) {
                    mue.printStackTrace(System.err);
                }
            }
        }
        catch (final Exception e) {
            e.printStackTrace(System.err);
        }
    }

}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org