comparison flys-backend/src/main/java/de/intevation/flys/utils/FileTools.java @ 1206:c7370734b872

Prevent parsing and storing PRF duplicates. flys-backend/trunk@2315 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Mon, 11 Jul 2011 09:31:52 +0000
parents 31895d24387e
children 7121a40671ff
comparison
equal deleted inserted replaced
1205:5f1506fc7636 1206:c7370734b872
1 package de.intevation.flys.utils; 1 package de.intevation.flys.utils;
2 2
3 import java.io.File; 3 import java.io.File;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.io.FileInputStream;
4 7
5 import java.util.Stack; 8 import java.util.Stack;
9 import java.util.List;
10 import java.util.Set;
11 import java.util.HashSet;
12 import java.util.ArrayList;
6 13
7 import org.apache.log4j.Logger; 14 import org.apache.log4j.Logger;
15
16 import java.security.MessageDigest;
17 import java.security.NoSuchAlgorithmException;
8 18
9 public class FileTools 19 public class FileTools
10 { 20 {
11 private static Logger log = Logger.getLogger(FileTools.class); 21 private static Logger log = Logger.getLogger(FileTools.class);
22
23 public static final String DIGEST =
24 System.getProperty("flys.backend.file.cmp.digest", "MD5");
12 25
13 private FileTools() { 26 private FileTools() {
14 } 27 }
15 28
16 public static File repair(File file) { 29 public static File repair(File file) {
72 return file; 85 return file;
73 } 86 }
74 87
75 return curr; 88 return curr;
76 } 89 }
90
91 public static class HashedFile
92 implements Comparable<HashedFile>
93 {
94 protected File file;
95 protected long length;
96 protected byte [] hash;
97
98 public HashedFile(File file) {
99 this.file = file;
100 length = file.length();
101 }
102
103 public File getFile() {
104 return file;
105 }
106
107 protected byte [] getHash() {
108 if (hash == null) {
109 InputStream in = null;
110
111 try {
112 in = new FileInputStream(file);
113
114 MessageDigest digest = MessageDigest.getInstance(DIGEST);
115
116 byte [] buf = new byte[40*1024];
117 int r;
118
119 while ((r = in.read(buf)) >= 0) {
120 digest.update(buf, 0, r);
121 }
122
123 hash = digest.digest();
124 }
125 catch (IOException ioe) {
126 log.error(ioe);
127 hash = new byte[0];
128 }
129 catch (NoSuchAlgorithmException nsae) {
130 log.error(nsae);
131 hash = new byte[0];
132 }
133 finally {
134 if (in != null) {
135 try {
136 in.close();
137 }
138 catch (IOException ioe) {
139 log.error(ioe);
140 }
141 }
142 }
143 }
144 return hash;
145 }
146
147 @Override
148 public int compareTo(HashedFile other) {
149 if (length < other.length) return -1;
150 if (length > other.length) return +1;
151 return 0;
152 }
153
154 private static int compare(byte [] a, byte [] b) {
155 if (a.length < b.length) return -1;
156 if (a.length > b.length) return +1;
157 for (int i = 0; i < a.length; ++i) {
158 int x = a[i] & 0xff;
159 int y = b[i] & 0xff;
160 if (x < y) return -1;
161 if (x > y) return +1;
162 }
163 return 0;
164 }
165
166 @Override
167 public boolean equals(Object other) {
168 return other instanceof HashedFile
169 && ((HashedFile)other).compareTo(this) == 0;
170 }
171
172 @Override
173 public int hashCode() {
174 return (int)(length ^ (length >>> 32));
175 }
176 } // class HashedFile
177
178 public static List<File> uniqueFiles(List<File> files) {
179
180 Set<HashedFile> set = new HashSet<HashedFile>();
181
182 for (File file: files) {
183 if (!set.add(new HashedFile(file))) {
184 log.warn("file '" + file + "' is a duplicate.");
185 }
186 }
187
188 ArrayList<File> out = new ArrayList<File>(set.size());
189 for (HashedFile hf: set) {
190 out.add(hf.file);
191 }
192
193 return out;
194 }
77 } 195 }
78 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : 196 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org