Smartssolutions

Tuesday, May 17, 2011

How to compare two folders contents using C#

This alrogithm will help you compare two folders

using System.Linq;
using System.Collections;
using System.IO;
using System.Security.Cryptography;


namespace Namespace

    class Program
    {    
        /// <summary>
        /// This is the main method that tests the hole staff
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            /*For the purpose of the demo I already created files within the file system as bellow and added the same content
             to them then tested the staff. In second test I changed the content of one of those files to see the impact*/
            IEnumerable<FileInfo> Folder1 = GetFiles(@"C:\temp\folder1\text1.txt", @"C:\temp\folder1\text2.txt");
            IEnumerable<FileInfo> Folder2 = GetFiles(@"C:\temp\folder2\text1.txt", @"C:\temp\folder2\text2.txt");

            //I stored each file title and hached content within two dictioanries, each dictionary correponds to a given folder
            Dictionary<byte[], string> Dictionary1 = new Dictionary<byte[], string>();
            Dictionary<byte[], string> Dictionary2 = new Dictionary<byte[], string>();

            foreach (var item in Folder1)
            {
                byte[] filecontentHach = FileContentHach(item.OpenRead());
                string fileTitle = item.Name;
                Dictionary1.Add(filecontentHach, fileTitle);
            }

            foreach (var item in Folder2)
            {
                byte[] filecontentHach = FileContentHach(item.OpenRead());
                string fileTitle = item.Name;
                Dictionary2.Add(filecontentHach, fileTitle);
            }

            bool flag = CompareFolders(Dictionary1,Dictionary2);
            //Once the dictionaries are compared then the result will be displayed in the console
            if (flag==true)
            {
                Console.WriteLine("Folders have the same contents");
            }
            else if(flag == false)
            {
                Console.WriteLine("Folders don't have the same contents");
            }
      
            Console.ReadLine();
        }
        /// <summary>
        /// This method could be fusioned with CompareTwoDictionaries method, but I did separate them
        /// by purpose to get things more clear for you, anyway this code needs to b refactored
        /// </summary>
        /// <param name="Dictionary1">contains informations about the folder1</param>
        /// <param name="Dictionary2">contains informations about the folder2</param>
        /// <returns></returns>
        static bool CompareFolders(Dictionary<byte[], string> Dictionary1, Dictionary<byte[], string> Dictionary2)
        {
            bool flag = false;
            flag = (Dictionary1.Count==Dictionary2.Count)?true:false;
            if (flag == true)
            {
                flag = CompareTwoDictionaries(Dictionary1, Dictionary2);
            }   
            return flag;
        }
        /// <summary>
        /// This method  will compare two dictionaries, both dictionaries contains couples
        /// each couple is composed by the title of a given file and the correponding hach result of its content
        /// </summary>
        /// <param name="Dictionary1">contains informations about the folder1</param>
        /// <param name="Dictionary2">contains informations about the folder2</param>
        /// <returns>returns a flag that indicates if the content of the two dictionaries are the same or not</returns>
        static bool CompareTwoDictionaries(Dictionary<byte[], string> Dictionary1, Dictionary<byte[], string> Dictionary2)
        {
            bool flag = true;
            int count = Dictionary1.Count();

            string[] values1 = Dictionary1.Values.ToArray();
            string[] values2 = Dictionary2.Values.ToArray();

            byte[][] keys1 = Dictionary1.Keys.ToArray();
            byte[][] keys2 = Dictionary2.Keys.ToArray();

            for (int i = 0; i < count; i++)
            {
                if (!values1[0].Equals(values2[0]))
                {
                    flag = false;
                    break;
                }
            }
            if (flag == true)
            {
                for (int i = 0; i < count; i++)
                {
                    if (!CompareByteArrays(keys1[i],keys2[i]))
                    {
                        flag = false;
                        break;
                    }
                }
            }
                 
            return flag;
        }
        /// <summary>
        /// This method recieves the file paths and generates a set
        /// of file streams correponding to those file paths, this method
        /// needs some extra work to avoid the case were files
        /// do no exist within the file system, so I let this homework to you ;)
        /// </summary>
        /// <param name="filepaths"></param>
        /// <returns>An IEnumerable of FileInfo objects</returns>
        static IEnumerable<FileInfo> GetFiles(params string[] filepaths)
        {
            foreach (var item in filepaths)
            {   
                    yield return new FileInfo(item);
            }
        }

        /// <summary>
        /// This method will generate a unique key a byte array
        /// that will varies depending on the file content
        /// </summary>
        /// <param name="stream">This is the file going to have the hach code</param>
        /// <returns>the byte array that corresponds to the unique key of the given file</returns>
        static byte[] FileContentHach(FileStream stream)
        {
            MD5 hachAlgo = MD5.Create();
            /*You can use Hash to calculate a hash of the entire file
             or computehash to calculate a hash or a partial part
             of the file if you're wooried about optimization*/
            byte[] filehash = hachAlgo.ComputeHash(stream);
            hachAlgo.Clear();
            hachAlgo.Dispose();
            return filehash;
        }

       /// <summary>
       /// This method compare two bytes arrays element by element or byte by byte
       /// </summary>
       /// <param name="array1">array of bytes</param>
       /// <param name="array2">array of bytes</param>
       /// <returns>boolean that indicates if two bytes arrays have the same content</returns>
        static bool CompareByteArrays(byte[] array1, byte[] array2)
        {
            /*This flag will indicate if two arrays have the same content or not*/
            bool flag = true;
            /* If the two arrays don't have the same count they
             don't  have the same content automatically*/
            if (array1.Count() != array2.Count())
            {
                flag = false;
            }
            /*If they have the same count then we have to compare their
             contents byte by byte*/
            for (int i = 0; i < array1.Count(); i++)
            {
                if (array1[i].CompareTo(array2[i])!=0)
                {
                    flag = false;
                    break;
                }
            }
            return flag;
        }

    }
}


1 comment:

  1. Hi, I have ArgumentException problem which said Illegall characters in path.

    static IEnumerable GetFiles(params string[] filepaths)
    {
    foreach (var item in filepaths){
    yield return new FileInfo(item); } }

    Initially it work well if IEnumerable declare in this manner

    IEnumerable Folder1 = GetFiles( @"C:\temp\folder1\text1.txt", @"C:\temp\folder1\text2.txt");
    IEnumerable Folder2 = GetFiles( @"C:\temp\folder2\text1.txt", @"C:\temp\folder2\text2.txt");

    But when i tried to change it into this, the exception occur.

    IEnumerable Folder1 = GetFiles(@"C:\temp\folder1\*.*");
    IEnumerable Folder2 = GetFiles(@"C:\temp\folder2\*.*");

    where did I do wrong? I'm trying to make it to read all the files in the folder without typing in the files' name in the GetFiles() method.

    ReplyDelete