changeset 4935:c0a58558b817 dami

Importer: - Handle regular expressions for attribute names - Convert Strings to UTF-8 - Add regular expressions for hws_points values
author Andre Heinecke <aheinecke@intevation.de>
date Thu, 31 Jan 2013 12:23:41 +0100
parents 11b459a3eb5c
children 3dbe89cbf0c1
files flys-backend/contrib/shpimporter/hws.py flys-backend/contrib/shpimporter/importer.py flys-backend/contrib/shpimporter/utils.py
diffstat 3 files changed, 97 insertions(+), 46 deletions(-) [+]
line wrap: on
line diff
--- a/flys-backend/contrib/shpimporter/hws.py	Thu Jan 31 12:21:59 2013 +0100
+++ b/flys-backend/contrib/shpimporter/hws.py	Thu Jan 31 12:23:41 2013 +0100
@@ -91,16 +91,16 @@
 
 class HWSPoints(Importer):
     fieldmap = {
-            "Name" : "name",
-            "Quelle" : "source",
-            "Anmerkung" : "description",
-            "Stand" : "status_date",
-            "Verband" : "agency",
-            "Deich_KM" : "dike_km",
-            "Bereich" : "range",
-            "Höhe_SOLL" : "z_target",
-            "WSP_BfG100" : "rated_level",
-            "Hoehe_IST" : "z",
+            "name$" : "name",
+            "quelle$" : "source",
+            "anmerkung$" : "description",
+            "stand$" : "status_date",
+            "verband$" : "agency",
+            "Deich_{0,1}KM$" : "dike_km",
+            "Bereich$" : "range",
+            "H[oeö]{0,2}he_{0,1}SOLL$" : "z_target",
+            "(WSP_){0,1}BfG_{0,1}100$" : "rated_level",
+            "H[oeö]{0,2}he_{0,1}IST$" : "z",
         }
 
     def getPath(self, base):
@@ -132,53 +132,56 @@
 
         newFeat.SetGeometry(geometry)
 
-        newFeat.SetFID(feat.GetFID())
-
         newFeat.SetField("ogr_fid", feat.GetFID())
-
-        if self.IsFieldSet(feat, "Art"):
-            self.handled("Art")
-            kind_id = HWS_KIND.get(feat.GetField("Art"))
+        artname = self.searchField("art$")
+        if self.IsFieldSet(feat, artname):
+            self.handled(artname)
+            kind_id = HWS_KIND.get(feat.GetField(artname))
             if not kind_id:
-                print ("Unbekannte Art: %s" % \
-                        feat.GetField("Art"))
+                print ("Unknown Art: %s" % \
+                        feat.GetField(artname))
             else:
                 newFeat.SetField("kind_id", kind_id)
 
-        if self.IsFieldSet(feat, "Bundesland"):
-            self.handled("Bundesland")
-            fed_id = FED_STATES.get(feat.GetField("Bundesland"))
+        fname = self.searchField("Bundesland$")
+        if self.IsFieldSet(feat, fname):
+            self.handled(fname)
+            fed_id = FED_STATES.get(feat.GetField(fname))
 
             if not fed_id:
-                print ("Unbekanntes Bundesland: %s" % \
+                print ("Unknown Bundesland: %s" % \
                         feat.GetField("Bundesland"))
             else:
                 newFeat.SetField("fed_state_id", fed_id)
 
-        if self.IsFieldSet(feat, "river_id"):
-            self.handled("river_id")
-            if feat.GetField("river_id") != self.river_id:
-                print ("River_id mismatch between shapefile and"
-                     " importer parameter.")
-            newFeat.SetField("river_id", feat.GetField("river_id"))
-        else:
-            newFeat.SetField("river_id", self.river_id)
-
-        if self.IsFieldSet(feat, "Ufer"):
-            self.handled("Ufer")
-            shoreString = feat.GetField("Ufer")
+        fname = self.searchField("(ufer$)|(flussseite$)")
+        if self.IsFieldSet(feat, fname):
+            self.handled(fname)
+            shoreString = feat.GetField(fname)
             if "links" in shoreString.lower():
                 newFeat.SetField("shore_side", True)
             elif "rechts" in shoreString.lower():
                 newFeat.SetField("shore_side", False)
 
-        if not self.IsFieldSet(feat, "Name"):
-            self.handled("Name")
+
+        fname = self.searchField("river_{0,1}id$")
+        if self.IsFieldSet(feat, fname):
+            self.handled(fname)
+            if feat.GetField(fname) != self.river_id:
+                print ("River_id mismatch between shapefile and"
+                     " importer parameter.")
+            newFeat.SetField("river_id", feat.GetField(fname))
+        else:
+            newFeat.SetField("river_id", self.river_id)
+
+        fname = self.searchField("name$")
+        if not self.IsFieldSet(feat, fname):
             newFeat.SetField("name", args['name'])
 
-        if self.IsFieldSet(feat, "offiziell"):
-            self.handled("offiziell")
-            offiziell = feat.GetField("offiziell")
+        fname = self.searchField("offiziell$")
+        if self.IsFieldSet(feat, fname):
+            self.handled(fname)
+            offiziell = feat.GetField(fname)
             if offiziell == "1" or offiziell == 1:
                 newFeat.SetField("offiziell", True)
             else:
@@ -186,7 +189,8 @@
 
         if self.IsFieldSet(newFeat, "z") and \
             self.IsFieldSet(newFeat, "rated_level"):
-            self.handled("Freibord_m")
+            fname = self.searchField("freibord(_m){0,1}$")
+            self.handled(fname)
             z = newFeat.GetFieldAsDouble("z")
             rl = newFeat.GetFieldAsDouble("rated_level")
             newFeat.SetField("freeboard", z - rl)
--- a/flys-backend/contrib/shpimporter/importer.py	Thu Jan 31 12:21:59 2013 +0100
+++ b/flys-backend/contrib/shpimporter/importer.py	Thu Jan 31 12:23:41 2013 +0100
@@ -5,6 +5,7 @@
 import osr
 import shpimporter
 import utils
+import re
 
 class Importer:
 
@@ -16,6 +17,7 @@
         self.dest_srs.ImportFromEPSG(config.target_srs)
         self.handled_fields = []
         self.tracking_import = False
+        self.srcLayer = None
 
     def getKind(self, path):
         raise NotImplementedError("Importer.getKind is abstract!")
@@ -30,10 +32,39 @@
         raise NotImplementedError("Importer.getTablename is abstract!")
 
     def IsFieldSet(self, feat, name):
+        if not name:
+            return False
         if feat.GetFieldIndex(name) == -1:
             return False # Avoids an Error in IsFieldSet
         return feat.IsFieldSet(feat.GetFieldIndex(name))
 
+    def searchField(self, regex):
+        """
+        Searches for a field in the current src layer that matches
+        the expression regex.
+        Throws an exception if more then one field matches
+        @param feat: The feature to search for attributes
+        @param regex: The regex to look for
+
+        @returns: The field name as a string
+        """
+
+        if not hasattr(self.srcLayer, "fieldnames"):
+            self.srcLayer.fieldnames = []
+            for i in range(0, self.srcLayer.GetLayerDefn().GetFieldCount()):
+                self.srcLayer.fieldnames.append(
+                    self.srcLayer.GetLayerDefn().GetFieldDefn(i).GetNameRef())
+
+        result = None
+        for name in self.srcLayer.fieldnames:
+            match = re.match(regex, name, re.IGNORECASE)
+            if match:
+                if result:
+                    raise Exception("More then one field matches: %s" % regex)
+                else:
+                    result = match.group(0)
+        return result
+
     def IsDoubleFieldSet(self, feat, name):
         try:
             isset = feat.GetFieldAsDouble(name)
@@ -90,15 +121,19 @@
         """
         Checks the mapping dictonary for key value pairs to
         copy from the source to the destination feature.
+        The keys can be reguar expressions that are matched
+        agains the source fieldnames
 
         The Key is the attribute of the source feature to be copied
         into the target attribute named by the dict's value.
         """
         self.tracking_import = True
-        self.handled_fields.extend(mapping.keys())
         for key, value in mapping.items():
-            if src.GetFieldIndex(key) == -1:
+            realname = self.searchField(key)
+            if realname == None:
                 continue
+            if not realname in self.handled_fields:
+                self.handled_fields.append(realname)
             # 0 OFTInteger, Simple 32bit integer
             # 1 OFTIntegerList, List of 32bit integers
             # 2 OFTReal, Double Precision floating point
@@ -111,11 +146,11 @@
             # 9 OFTDate, Date
             # 10 OFTTime, Time
             # 11 OFTDateTime, Date and Time
-            if src.IsFieldSet(src.GetFieldIndex(key)):
-                if src.GetFieldType(key) == 2:
-                    target.SetField(value, src.GetFieldAsDouble(key))
+            if src.IsFieldSet(src.GetFieldIndex(realname)):
+                if src.GetFieldType(realname) == 2:
+                    target.SetField(value, src.GetFieldAsDouble(realname))
                 else:
-                    target.SetField(value, src.GetField(key))
+                    target.SetField(value, utils.getUTF8(src.GetField(realname)))
 
     def shape2Database(self, srcLayer, name, path):
         destLayer = self.dbconn.GetLayerByName(self.getTablename())
@@ -132,6 +167,7 @@
         shpimporter.DEBUG("Try to add %i features to database." % count)
 
         srcLayer.ResetReading()
+        self.srcLayer = srcLayer
 
         geomType    = -1
         success     = 0
--- a/flys-backend/contrib/shpimporter/utils.py	Thu Jan 31 12:21:59 2013 +0100
+++ b/flys-backend/contrib/shpimporter/utils.py	Thu Jan 31 12:23:41 2013 +0100
@@ -20,6 +20,17 @@
 
     return shapes
 
+def getUTF8(string):
+    """
+    Tries to convert the string to a UTF-8 encoding by first checking if it
+    is UTF-8 and then trying cp1252
+    """
+    try:
+        return unicode.encode(unicode(string, "UTF-8"), "UTF-8")
+    except UnicodeDecodeError:
+        # Probably European Windows names so lets try again
+        return unicode.encode(unicode(string, "cp1252"), "UTF-8")
+
 def getUTF8Path(path):
     """
     Tries to convert path to utf-8 by first checking the filesystemencoding

http://dive4elements.wald.intevation.org