Encoding management

Here you can find a quite simple class, but very important for the proper management of the encoding required to write and read correctly the content of files.
The supported encodings are as follows (but you can add others):
  • ANSI
  • UTF-8
  • UTF-8 without BOM
  • UTF-16 Little Endian
  • UTF-16 Big Endian
  • UTF-32 Little Endian

Code

Here is the code:

using System;
using System.IO;
using System.Linq;
using System.Text;

namespace MyNamespace
{
    /// <summary>
    /// Encoding util.
    /// </summary>
    /// <author>Marco Macciò</author>
    internal static class EncodingUtil
    {
        #region Internal Methods

        internal static Encoding GetDefaultEncoding()
        {
            switch (ConfigUtil.GetIntParameter("Encoding"))
            {
                case 0: //ANSI
                    return Encoding.Default;
                case 1: //UTF-8
                    return new UTF8Encoding(true);
                case 2: //UTF-8 without BOM
                    return new UTF8Encoding(false);
                case 3: //UTF-16 Little Endian
                    return Encoding.Unicode;
                case 4: //UTF-16 Big Endian
                    return Encoding.BigEndianUnicode;
                case 5: //UTF-32 Little Endian
                    return Encoding.UTF32;

                default: //UTF-8
                    return new UTF8Encoding(true);
            }
        }

        internal static Encoding GetFileEncoding(String fileName, out bool withBOM)
        {
            Encoding result = null;
            FileInfo fileInfo = new FileInfo(fileName);
            FileStream fileStream = null;
            withBOM = true;

            try
            {
                fileStream = fileInfo.OpenRead();
                Encoding[] unicodeEncodings = { Encoding.BigEndianUnicode,
						Encoding.UTF32,
						new UTF8Encoding(true),
						Encoding.Unicode };

                for (int i = 0; result == null && i < unicodeEncodings.Length; i++)
                {
                    fileStream.Position = 0;
                    byte[] preamble = unicodeEncodings[i].GetPreamble();
                    bool preamblesAreEqual = true;

                    for (int j = 0; preamblesAreEqual && j < preamble.Length; j++)
                    {
                        preamblesAreEqual = (preamble[j] == fileStream.ReadByte());
                    }

                    if (!preamblesAreEqual)
                    {
                        continue;
                    }

                    result = unicodeEncodings[i];
                }

                if (result == null) //Encoding is "new UTF8Encoding(false)"
				    //or "Encoding.Default"?
                {
                    byte[] byteArray = new byte[Convert.ToInt32(fileStream.Length)];
                    fileStream.Read(byteArray, 0, Convert.ToInt32(fileStream.Length));
                    String sANSI = Encoding.Default.GetString(byteArray);
                    String sUTF8 = new UTF8Encoding(false).GetString(byteArray);

                    if (sANSI == sUTF8) //If they are equal, let the default value decide
					//if encoding is "UTF-8 without BOM" or "ANSI"
                    {
                        if (GetDefaultEncoding() == new UTF8Encoding(false))
                        {
                            withBOM = false;
                            result = new UTF8Encoding(false);
                        }
                        else
                        {
                            result = Encoding.Default;
                        }
                    }
                    else //Otherwise, I try to find en error character into file content
			 //to detect "ANSI"
                    {
                        if (sUTF8.Any(c => c == 65533))
                        {
                            result = Encoding.Default;
                        }

                        if (result == null)
                        {
                            withBOM = false;
                            result = new UTF8Encoding(false);
                        }
                    }
                }
            }
            catch (IOException)
            {
            }
            finally
            {
                if (fileStream != null)
                {
                    fileStream.Close();
                }
            }

            return result ?? (Encoding.Default);
        }

        #endregion Internal Methods
    }
}

Last edited Mar 5, 2013 at 8:36 AM by MarcoCav, version 5

Comments

No comments yet.