diff flys-backend/src/main/java/de/intevation/flys/utils/FileTools.java @ 1206:c7370734b872

Prevent parsing and storing PRF duplicates. flys-backend/trunk@2315 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Mon, 11 Jul 2011 09:31:52 +0000
parents 31895d24387e
children 7121a40671ff
line wrap: on
line diff
--- a/flys-backend/src/main/java/de/intevation/flys/utils/FileTools.java	Fri Jul 08 09:45:48 2011 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/utils/FileTools.java	Mon Jul 11 09:31:52 2011 +0000
@@ -1,15 +1,28 @@
 package de.intevation.flys.utils;
 
 import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.FileInputStream;
 
 import java.util.Stack;
+import java.util.List;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.ArrayList;
 
 import org.apache.log4j.Logger;
 
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
 public class FileTools
 {
     private static Logger log = Logger.getLogger(FileTools.class);
 
+    public static final String DIGEST =
+        System.getProperty("flys.backend.file.cmp.digest", "MD5");
+
     private FileTools() {
     }
 
@@ -74,5 +87,110 @@
 
         return curr;
     }
+
+    public static class HashedFile 
+    implements Comparable<HashedFile>
+    {
+        protected File    file;
+        protected long    length;
+        protected byte [] hash;
+
+        public HashedFile(File file) {
+            this.file = file;
+            length = file.length();
+        }
+
+        public File getFile() {
+            return file;
+        }
+
+        protected byte [] getHash() {
+            if (hash == null) {
+                InputStream in = null;
+
+                try {
+                    in = new FileInputStream(file);
+
+                    MessageDigest digest = MessageDigest.getInstance(DIGEST);
+
+                    byte [] buf = new byte[40*1024];
+                    int r;
+
+                    while ((r = in.read(buf)) >= 0) {
+                        digest.update(buf, 0, r);
+                    }
+
+                    hash = digest.digest();
+                }
+                catch (IOException ioe) {
+                    log.error(ioe);
+                    hash = new byte[0];
+                }
+                catch (NoSuchAlgorithmException nsae) {
+                    log.error(nsae);
+                    hash = new byte[0];
+                }
+                finally {
+                    if (in != null) {
+                        try {
+                            in.close();
+                        }
+                        catch (IOException ioe) {
+                            log.error(ioe);
+                        }
+                    }
+                }
+            }
+            return hash;
+        }
+
+        @Override
+        public int compareTo(HashedFile other) {
+            if (length < other.length) return -1;
+            if (length > other.length) return +1;
+            return 0;
+        }
+
+        private static int compare(byte [] a, byte [] b) {
+            if (a.length < b.length) return -1;
+            if (a.length > b.length) return +1;
+            for (int i = 0; i < a.length; ++i) {
+                int x = a[i] & 0xff;
+                int y = b[i] & 0xff;
+                if (x < y) return -1;
+                if (x > y) return +1;
+            }
+            return 0;
+        }
+
+        @Override
+        public boolean equals(Object other) {
+            return other instanceof HashedFile 
+                && ((HashedFile)other).compareTo(this) == 0;
+        }
+
+        @Override
+        public int hashCode() {
+            return (int)(length ^ (length >>> 32));
+        }
+    } // class HashedFile
+
+    public static List<File> uniqueFiles(List<File> files) {
+
+        Set<HashedFile> set = new HashSet<HashedFile>();
+
+        for (File file: files) {
+            if (!set.add(new HashedFile(file))) {
+                log.warn("file '" + file + "' is a duplicate.");
+            }
+        }
+
+        ArrayList<File> out = new ArrayList<File>(set.size());
+        for (HashedFile hf: set) {
+            out.add(hf.file);
+        }
+
+        return out;
+    }
 }
 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org