Mercurial > dive4elements > river
diff flys-backend/src/main/java/de/intevation/flys/utils/FileTools.java @ 1206:c7370734b872
Prevent parsing and storing PRF duplicates.
flys-backend/trunk@2315 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Mon, 11 Jul 2011 09:31:52 +0000 |
parents | 31895d24387e |
children | 7121a40671ff |
line wrap: on
line diff
--- a/flys-backend/src/main/java/de/intevation/flys/utils/FileTools.java Fri Jul 08 09:45:48 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/utils/FileTools.java Mon Jul 11 09:31:52 2011 +0000 @@ -1,15 +1,28 @@ package de.intevation.flys.utils; import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.FileInputStream; import java.util.Stack; +import java.util.List; +import java.util.Set; +import java.util.HashSet; +import java.util.ArrayList; import org.apache.log4j.Logger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + public class FileTools { private static Logger log = Logger.getLogger(FileTools.class); + public static final String DIGEST = + System.getProperty("flys.backend.file.cmp.digest", "MD5"); + private FileTools() { } @@ -74,5 +87,110 @@ return curr; } + + public static class HashedFile + implements Comparable<HashedFile> + { + protected File file; + protected long length; + protected byte [] hash; + + public HashedFile(File file) { + this.file = file; + length = file.length(); + } + + public File getFile() { + return file; + } + + protected byte [] getHash() { + if (hash == null) { + InputStream in = null; + + try { + in = new FileInputStream(file); + + MessageDigest digest = MessageDigest.getInstance(DIGEST); + + byte [] buf = new byte[40*1024]; + int r; + + while ((r = in.read(buf)) >= 0) { + digest.update(buf, 0, r); + } + + hash = digest.digest(); + } + catch (IOException ioe) { + log.error(ioe); + hash = new byte[0]; + } + catch (NoSuchAlgorithmException nsae) { + log.error(nsae); + hash = new byte[0]; + } + finally { + if (in != null) { + try { + in.close(); + } + catch (IOException ioe) { + log.error(ioe); + } + } + } + } + return hash; + } + + @Override + public int compareTo(HashedFile other) { + if (length < other.length) return -1; + if (length > other.length) return +1; + return 0; + } + + private static int compare(byte [] a, byte [] b) { + if (a.length < b.length) return -1; + if (a.length > b.length) return +1; + for (int i = 0; i < a.length; ++i) { + int x = a[i] & 0xff; + int y = b[i] & 0xff; + if (x < y) return -1; + if (x > y) return +1; + } + return 0; + } + + @Override + public boolean equals(Object other) { + return other instanceof HashedFile + && ((HashedFile)other).compareTo(this) == 0; + } + + @Override + public int hashCode() { + return (int)(length ^ (length >>> 32)); + } + } // class HashedFile + + public static List<File> uniqueFiles(List<File> files) { + + Set<HashedFile> set = new HashSet<HashedFile>(); + + for (File file: files) { + if (!set.add(new HashedFile(file))) { + log.warn("file '" + file + "' is a duplicate."); + } + } + + ArrayList<File> out = new ArrayList<File>(set.size()); + for (HashedFile hf: set) { + out.add(hf.file); + } + + return out; + } } // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :