Mercurial > dive4elements > river
changeset 4935:c0a58558b817 dami
Importer: - Handle regular expressions for attribute names
- Convert Strings to UTF-8
- Add regular expressions for hws_points values
author | Andre Heinecke <aheinecke@intevation.de> |
---|---|
date | Thu, 31 Jan 2013 12:23:41 +0100 |
parents | 11b459a3eb5c |
children | 3dbe89cbf0c1 |
files | flys-backend/contrib/shpimporter/hws.py flys-backend/contrib/shpimporter/importer.py flys-backend/contrib/shpimporter/utils.py |
diffstat | 3 files changed, 97 insertions(+), 46 deletions(-) [+] |
line wrap: on
line diff
--- a/flys-backend/contrib/shpimporter/hws.py Thu Jan 31 12:21:59 2013 +0100 +++ b/flys-backend/contrib/shpimporter/hws.py Thu Jan 31 12:23:41 2013 +0100 @@ -91,16 +91,16 @@ class HWSPoints(Importer): fieldmap = { - "Name" : "name", - "Quelle" : "source", - "Anmerkung" : "description", - "Stand" : "status_date", - "Verband" : "agency", - "Deich_KM" : "dike_km", - "Bereich" : "range", - "Höhe_SOLL" : "z_target", - "WSP_BfG100" : "rated_level", - "Hoehe_IST" : "z", + "name$" : "name", + "quelle$" : "source", + "anmerkung$" : "description", + "stand$" : "status_date", + "verband$" : "agency", + "Deich_{0,1}KM$" : "dike_km", + "Bereich$" : "range", + "H[oeö]{0,2}he_{0,1}SOLL$" : "z_target", + "(WSP_){0,1}BfG_{0,1}100$" : "rated_level", + "H[oeö]{0,2}he_{0,1}IST$" : "z", } def getPath(self, base): @@ -132,53 +132,56 @@ newFeat.SetGeometry(geometry) - newFeat.SetFID(feat.GetFID()) - newFeat.SetField("ogr_fid", feat.GetFID()) - - if self.IsFieldSet(feat, "Art"): - self.handled("Art") - kind_id = HWS_KIND.get(feat.GetField("Art")) + artname = self.searchField("art$") + if self.IsFieldSet(feat, artname): + self.handled(artname) + kind_id = HWS_KIND.get(feat.GetField(artname)) if not kind_id: - print ("Unbekannte Art: %s" % \ - feat.GetField("Art")) + print ("Unknown Art: %s" % \ + feat.GetField(artname)) else: newFeat.SetField("kind_id", kind_id) - if self.IsFieldSet(feat, "Bundesland"): - self.handled("Bundesland") - fed_id = FED_STATES.get(feat.GetField("Bundesland")) + fname = self.searchField("Bundesland$") + if self.IsFieldSet(feat, fname): + self.handled(fname) + fed_id = FED_STATES.get(feat.GetField(fname)) if not fed_id: - print ("Unbekanntes Bundesland: %s" % \ + print ("Unknown Bundesland: %s" % \ feat.GetField("Bundesland")) else: newFeat.SetField("fed_state_id", fed_id) - if self.IsFieldSet(feat, "river_id"): - self.handled("river_id") - if feat.GetField("river_id") != self.river_id: - print ("River_id mismatch between shapefile and" - " importer parameter.") - newFeat.SetField("river_id", feat.GetField("river_id")) - else: - newFeat.SetField("river_id", self.river_id) - - if self.IsFieldSet(feat, "Ufer"): - self.handled("Ufer") - shoreString = feat.GetField("Ufer") + fname = self.searchField("(ufer$)|(flussseite$)") + if self.IsFieldSet(feat, fname): + self.handled(fname) + shoreString = feat.GetField(fname) if "links" in shoreString.lower(): newFeat.SetField("shore_side", True) elif "rechts" in shoreString.lower(): newFeat.SetField("shore_side", False) - if not self.IsFieldSet(feat, "Name"): - self.handled("Name") + + fname = self.searchField("river_{0,1}id$") + if self.IsFieldSet(feat, fname): + self.handled(fname) + if feat.GetField(fname) != self.river_id: + print ("River_id mismatch between shapefile and" + " importer parameter.") + newFeat.SetField("river_id", feat.GetField(fname)) + else: + newFeat.SetField("river_id", self.river_id) + + fname = self.searchField("name$") + if not self.IsFieldSet(feat, fname): newFeat.SetField("name", args['name']) - if self.IsFieldSet(feat, "offiziell"): - self.handled("offiziell") - offiziell = feat.GetField("offiziell") + fname = self.searchField("offiziell$") + if self.IsFieldSet(feat, fname): + self.handled(fname) + offiziell = feat.GetField(fname) if offiziell == "1" or offiziell == 1: newFeat.SetField("offiziell", True) else: @@ -186,7 +189,8 @@ if self.IsFieldSet(newFeat, "z") and \ self.IsFieldSet(newFeat, "rated_level"): - self.handled("Freibord_m") + fname = self.searchField("freibord(_m){0,1}$") + self.handled(fname) z = newFeat.GetFieldAsDouble("z") rl = newFeat.GetFieldAsDouble("rated_level") newFeat.SetField("freeboard", z - rl)
--- a/flys-backend/contrib/shpimporter/importer.py Thu Jan 31 12:21:59 2013 +0100 +++ b/flys-backend/contrib/shpimporter/importer.py Thu Jan 31 12:23:41 2013 +0100 @@ -5,6 +5,7 @@ import osr import shpimporter import utils +import re class Importer: @@ -16,6 +17,7 @@ self.dest_srs.ImportFromEPSG(config.target_srs) self.handled_fields = [] self.tracking_import = False + self.srcLayer = None def getKind(self, path): raise NotImplementedError("Importer.getKind is abstract!") @@ -30,10 +32,39 @@ raise NotImplementedError("Importer.getTablename is abstract!") def IsFieldSet(self, feat, name): + if not name: + return False if feat.GetFieldIndex(name) == -1: return False # Avoids an Error in IsFieldSet return feat.IsFieldSet(feat.GetFieldIndex(name)) + def searchField(self, regex): + """ + Searches for a field in the current src layer that matches + the expression regex. + Throws an exception if more then one field matches + @param feat: The feature to search for attributes + @param regex: The regex to look for + + @returns: The field name as a string + """ + + if not hasattr(self.srcLayer, "fieldnames"): + self.srcLayer.fieldnames = [] + for i in range(0, self.srcLayer.GetLayerDefn().GetFieldCount()): + self.srcLayer.fieldnames.append( + self.srcLayer.GetLayerDefn().GetFieldDefn(i).GetNameRef()) + + result = None + for name in self.srcLayer.fieldnames: + match = re.match(regex, name, re.IGNORECASE) + if match: + if result: + raise Exception("More then one field matches: %s" % regex) + else: + result = match.group(0) + return result + def IsDoubleFieldSet(self, feat, name): try: isset = feat.GetFieldAsDouble(name) @@ -90,15 +121,19 @@ """ Checks the mapping dictonary for key value pairs to copy from the source to the destination feature. + The keys can be reguar expressions that are matched + agains the source fieldnames The Key is the attribute of the source feature to be copied into the target attribute named by the dict's value. """ self.tracking_import = True - self.handled_fields.extend(mapping.keys()) for key, value in mapping.items(): - if src.GetFieldIndex(key) == -1: + realname = self.searchField(key) + if realname == None: continue + if not realname in self.handled_fields: + self.handled_fields.append(realname) # 0 OFTInteger, Simple 32bit integer # 1 OFTIntegerList, List of 32bit integers # 2 OFTReal, Double Precision floating point @@ -111,11 +146,11 @@ # 9 OFTDate, Date # 10 OFTTime, Time # 11 OFTDateTime, Date and Time - if src.IsFieldSet(src.GetFieldIndex(key)): - if src.GetFieldType(key) == 2: - target.SetField(value, src.GetFieldAsDouble(key)) + if src.IsFieldSet(src.GetFieldIndex(realname)): + if src.GetFieldType(realname) == 2: + target.SetField(value, src.GetFieldAsDouble(realname)) else: - target.SetField(value, src.GetField(key)) + target.SetField(value, utils.getUTF8(src.GetField(realname))) def shape2Database(self, srcLayer, name, path): destLayer = self.dbconn.GetLayerByName(self.getTablename()) @@ -132,6 +167,7 @@ shpimporter.DEBUG("Try to add %i features to database." % count) srcLayer.ResetReading() + self.srcLayer = srcLayer geomType = -1 success = 0
--- a/flys-backend/contrib/shpimporter/utils.py Thu Jan 31 12:21:59 2013 +0100 +++ b/flys-backend/contrib/shpimporter/utils.py Thu Jan 31 12:23:41 2013 +0100 @@ -20,6 +20,17 @@ return shapes +def getUTF8(string): + """ + Tries to convert the string to a UTF-8 encoding by first checking if it + is UTF-8 and then trying cp1252 + """ + try: + return unicode.encode(unicode(string, "UTF-8"), "UTF-8") + except UnicodeDecodeError: + # Probably European Windows names so lets try again + return unicode.encode(unicode(string, "cp1252"), "UTF-8") + def getUTF8Path(path): """ Tries to convert path to utf-8 by first checking the filesystemencoding