annotate backend/contrib/shpimporter/utils.py @ 5841:8bd9b551456c

Added header to Java files in ETL process.
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 25 Apr 2013 17:06:50 +0200
parents 5aa05a7a34b7
children
rev   line source
2798
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
1 import os
4874
b1d7e600b43b (importer) Add utility function to convert paths to utf-8
Andre Heinecke <aheinecke@intevation.de>
parents: 3654
diff changeset
2 import sys
5006
769593a84606 Importer: Behold, Logging!
Andre Heinecke <aheinecke@intevation.de>
parents: 4995
diff changeset
3 import logging
769593a84606 Importer: Behold, Logging!
Andre Heinecke <aheinecke@intevation.de>
parents: 4995
diff changeset
4
4970
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
5 try:
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
6 from osgeo import ogr
5077
c5187ab9f571 Fix Syntax Errror
Andre Heinecke <aheinecke@intevation.de>
parents: 5036
diff changeset
7 except ImportError:
4970
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
8 import ogr
2798
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
9
5006
769593a84606 Importer: Behold, Logging!
Andre Heinecke <aheinecke@intevation.de>
parents: 4995
diff changeset
10 logger = logging.getLogger("utils")
769593a84606 Importer: Behold, Logging!
Andre Heinecke <aheinecke@intevation.de>
parents: 4995
diff changeset
11
2798
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
12 SHP='.shp'
4976
a6ee62a070b0 I'm learning how to use cursors \o/
Andre Heinecke <aheinecke@intevation.de>
parents: 4970
diff changeset
13 SQL_SELECT_RIVER_ID="SELECT id FROM rivers WHERE name = %s"
5036
d6860fca89e4 Pull information about the backend into dgm.
Andre Heinecke <aheinecke@intevation.de>
parents: 5006
diff changeset
14 SQL_SELECT_RIVER_ID_ORA="SELECT id FROM rivers WHERE name = :s"
2798
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
15
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
16 def findShapefiles(path):
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
17 shapes = []
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
18
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
19 for root, dirs, files in os.walk(path):
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
20 if len(files) == 0:
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
21 continue
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
22
5006
769593a84606 Importer: Behold, Logging!
Andre Heinecke <aheinecke@intevation.de>
parents: 4995
diff changeset
23 logger.debug("Processing directory '%s' with %i files " % (root, len(files)))
2798
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
24
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
25 for f in files:
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
26 idx = f.find(SHP)
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
27 if (idx+len(SHP)) == len(f):
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
28 shapes.append((f.replace(SHP, ''), root + "/" + f))
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
29
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
30 return shapes
5a654f2e35bc Added a python tool to import shapefiles into database.
Ingo Weinzierl <ingo.weinzierl@intevation.de>
parents:
diff changeset
31
5036
d6860fca89e4 Pull information about the backend into dgm.
Andre Heinecke <aheinecke@intevation.de>
parents: 5006
diff changeset
32 def getRiverId(dbconn, name, oracle):
4970
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
33 """
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
34 Returns the id of the river "name"
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
35 Dbconn must be a python database connection api compliant object
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
36 """
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
37 cur = dbconn.cursor()
5036
d6860fca89e4 Pull information about the backend into dgm.
Andre Heinecke <aheinecke@intevation.de>
parents: 5006
diff changeset
38 if oracle:
d6860fca89e4 Pull information about the backend into dgm.
Andre Heinecke <aheinecke@intevation.de>
parents: 5006
diff changeset
39 # This is stupid and shoudl not be neccessary. But I don't
d6860fca89e4 Pull information about the backend into dgm.
Andre Heinecke <aheinecke@intevation.de>
parents: 5006
diff changeset
40 # know how to make it work both ways. aheinecke - 02/2013
d6860fca89e4 Pull information about the backend into dgm.
Andre Heinecke <aheinecke@intevation.de>
parents: 5006
diff changeset
41 stmt = SQL_SELECT_RIVER_ID_ORA
d6860fca89e4 Pull information about the backend into dgm.
Andre Heinecke <aheinecke@intevation.de>
parents: 5006
diff changeset
42 else:
d6860fca89e4 Pull information about the backend into dgm.
Andre Heinecke <aheinecke@intevation.de>
parents: 5006
diff changeset
43 stmt = SQL_SELECT_RIVER_ID
5219
40dc001594e4 Allow unicode river names
Andre Heinecke <aheinecke@intevation.de>
parents: 5137
diff changeset
44 cur.execute(stmt, (getUTF8(name),))
4970
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
45 row = cur.fetchone()
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
46 if row:
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
47 return row[0]
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
48 else:
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
49 return 0
174fbaa3d344 Add handling of River Names and remove target_src parameter
Andre Heinecke <aheinecke@intevation.de>
parents: 4935
diff changeset
50
4935
c0a58558b817 Importer: - Handle regular expressions for attribute names
Andre Heinecke <aheinecke@intevation.de>
parents: 4887
diff changeset
51 def getUTF8(string):
c0a58558b817 Importer: - Handle regular expressions for attribute names
Andre Heinecke <aheinecke@intevation.de>
parents: 4887
diff changeset
52 """
c0a58558b817 Importer: - Handle regular expressions for attribute names
Andre Heinecke <aheinecke@intevation.de>
parents: 4887
diff changeset
53 Tries to convert the string to a UTF-8 encoding by first checking if it
c0a58558b817 Importer: - Handle regular expressions for attribute names
Andre Heinecke <aheinecke@intevation.de>
parents: 4887
diff changeset
54 is UTF-8 and then trying cp1252
c0a58558b817 Importer: - Handle regular expressions for attribute names
Andre Heinecke <aheinecke@intevation.de>
parents: 4887
diff changeset
55 """
c0a58558b817 Importer: - Handle regular expressions for attribute names
Andre Heinecke <aheinecke@intevation.de>
parents: 4887
diff changeset
56 try:
c0a58558b817 Importer: - Handle regular expressions for attribute names
Andre Heinecke <aheinecke@intevation.de>
parents: 4887
diff changeset
57 return unicode.encode(unicode(string, "UTF-8"), "UTF-8")
c0a58558b817 Importer: - Handle regular expressions for attribute names
Andre Heinecke <aheinecke@intevation.de>
parents: 4887
diff changeset
58 except UnicodeDecodeError:
c0a58558b817 Importer: - Handle regular expressions for attribute names
Andre Heinecke <aheinecke@intevation.de>
parents: 4887
diff changeset
59 # Probably European Windows names so lets try again
c0a58558b817 Importer: - Handle regular expressions for attribute names
Andre Heinecke <aheinecke@intevation.de>
parents: 4887
diff changeset
60 return unicode.encode(unicode(string, "cp1252"), "UTF-8")
c0a58558b817 Importer: - Handle regular expressions for attribute names
Andre Heinecke <aheinecke@intevation.de>
parents: 4887
diff changeset
61
4874
b1d7e600b43b (importer) Add utility function to convert paths to utf-8
Andre Heinecke <aheinecke@intevation.de>
parents: 3654
diff changeset
62 def getUTF8Path(path):
b1d7e600b43b (importer) Add utility function to convert paths to utf-8
Andre Heinecke <aheinecke@intevation.de>
parents: 3654
diff changeset
63 """
b1d7e600b43b (importer) Add utility function to convert paths to utf-8
Andre Heinecke <aheinecke@intevation.de>
parents: 3654
diff changeset
64 Tries to convert path to utf-8 by first checking the filesystemencoding
b1d7e600b43b (importer) Add utility function to convert paths to utf-8
Andre Heinecke <aheinecke@intevation.de>
parents: 3654
diff changeset
65 and trying the default windows encoding afterwards.
b1d7e600b43b (importer) Add utility function to convert paths to utf-8
Andre Heinecke <aheinecke@intevation.de>
parents: 3654
diff changeset
66 Returns a valid UTF-8 encoded unicode object or throws a UnicodeDecodeError
b1d7e600b43b (importer) Add utility function to convert paths to utf-8
Andre Heinecke <aheinecke@intevation.de>
parents: 3654
diff changeset
67 """
b1d7e600b43b (importer) Add utility function to convert paths to utf-8
Andre Heinecke <aheinecke@intevation.de>
parents: 3654
diff changeset
68 try:
b1d7e600b43b (importer) Add utility function to convert paths to utf-8
Andre Heinecke <aheinecke@intevation.de>
parents: 3654
diff changeset
69 return unicode.encode(unicode(path, sys.getfilesystemencoding()), "UTF-8")
b1d7e600b43b (importer) Add utility function to convert paths to utf-8
Andre Heinecke <aheinecke@intevation.de>
parents: 3654
diff changeset
70 except UnicodeDecodeError:
4887
1f6e544f7a7f Importer: Use cp1252 instead of latin-9 to guess filename encodings
Andre Heinecke <aheinecke@intevation.de>
parents: 4884
diff changeset
71 # Probably European Windows names so lets try again
1f6e544f7a7f Importer: Use cp1252 instead of latin-9 to guess filename encodings
Andre Heinecke <aheinecke@intevation.de>
parents: 4884
diff changeset
72 return unicode.encode(unicode(path, "cp1252"), "UTF-8")
4995
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
73
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
74 WKB_MAP = {
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
75 ogr.wkb25Bit : 'wkb25Bit',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
76 ogr.wkbGeometryCollection : 'wkbGeometryCollection',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
77 ogr.wkbGeometryCollection25D :'wkbGeometryCollection25D',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
78 ogr.wkbLineString : 'wkbLineString',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
79 ogr.wkbLineString25D : 'wkbLineString25D',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
80 ogr.wkbLinearRing : 'wkbLinearRing',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
81 ogr.wkbMultiLineString : 'wkbMultiLineString',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
82 ogr.wkbMultiLineString25D : 'wkbMultiLineString25D',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
83 ogr.wkbMultiPoint : 'wkbMultiPoint',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
84 ogr.wkbMultiPoint25D : 'wkbMultiPoint25D',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
85 ogr.wkbMultiPolygon : 'wkbMultiPolygon',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
86 ogr.wkbMultiPolygon25D : 'wkbMultiPolygon25D',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
87 ogr.wkbNDR : 'wkbNDR',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
88 ogr.wkbNone : 'wkbNone',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
89 ogr.wkbPoint : 'wkbPoint',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
90 ogr.wkbPoint25D : 'wkbPoint25D',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
91 ogr.wkbPolygon : 'wkbPolygon',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
92 ogr.wkbPolygon25D : 'wkbPolygon25D',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
93 ogr.wkbUnknown : 'wkbUnknown',
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
94 ogr.wkbXDR : 'wkbXDR'
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
95 }
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
96
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
97 def getWkbString(type):
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
98 return WKB_MAP.get(type) or "Unknown"
998b29c8d2fd Improve debug output for unsupported features and skipped shapefiles
Andre Heinecke <aheinecke@intevation.de>
parents: 4976
diff changeset
99
5137
04eb62eae722 River Axes and HWS_lines are now multiline
Andre Heinecke <aheinecke@intevation.de>
parents: 5077
diff changeset
100 def convertToMultiLine(feature):
04eb62eae722 River Axes and HWS_lines are now multiline
Andre Heinecke <aheinecke@intevation.de>
parents: 5077
diff changeset
101 """
04eb62eae722 River Axes and HWS_lines are now multiline
Andre Heinecke <aheinecke@intevation.de>
parents: 5077
diff changeset
102 Converts a feature to a multiline feature.
04eb62eae722 River Axes and HWS_lines are now multiline
Andre Heinecke <aheinecke@intevation.de>
parents: 5077
diff changeset
103 """
04eb62eae722 River Axes and HWS_lines are now multiline
Andre Heinecke <aheinecke@intevation.de>
parents: 5077
diff changeset
104 geometry = feature.GetGeometryRef()
04eb62eae722 River Axes and HWS_lines are now multiline
Andre Heinecke <aheinecke@intevation.de>
parents: 5077
diff changeset
105 # SRS information is lost while forcing to multiline
04eb62eae722 River Axes and HWS_lines are now multiline
Andre Heinecke <aheinecke@intevation.de>
parents: 5077
diff changeset
106 srs = geometry.GetSpatialReference()
04eb62eae722 River Axes and HWS_lines are now multiline
Andre Heinecke <aheinecke@intevation.de>
parents: 5077
diff changeset
107 geometry = ogr.ForceToMultiLineString(geometry)
04eb62eae722 River Axes and HWS_lines are now multiline
Andre Heinecke <aheinecke@intevation.de>
parents: 5077
diff changeset
108 geometry.AssignSpatialReference(srs)
04eb62eae722 River Axes and HWS_lines are now multiline
Andre Heinecke <aheinecke@intevation.de>
parents: 5077
diff changeset
109 feature.SetGeometry(geometry)
04eb62eae722 River Axes and HWS_lines are now multiline
Andre Heinecke <aheinecke@intevation.de>
parents: 5077
diff changeset
110 return feature
5294
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
111
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
112 def convertToMultiPolygon(feature):
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
113 """
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
114 Converts a feature to a multiline feature.
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
115 """
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
116 geometry = feature.GetGeometryRef()
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
117 # SRS information is lost while forcing to multiline
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
118 srs = geometry.GetSpatialReference()
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
119 geometry = ogr.ForceToMultiPolygon(geometry)
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
120 geometry.AssignSpatialReference(srs)
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
121 feature.SetGeometry(geometry)
3f35d9db48c1 Force UESG to multipolygon and import source field for messungen
Andre Heinecke <aheinecke@intevation.de>
parents: 5219
diff changeset
122 return feature

http://dive4elements.wald.intevation.org