using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; using System.IO; namespace ReadFile { public partial class Form1 : Form { public Form1() { InitializeComponent(); } private void Form1_Load(object sender, EventArgs e) { Test(); } public void Test() { // Use default replacement fallback for invalid encoding. //UnicodeEncoding enc = new UnicodeEncoding(true, false, false); UTF8Encoding enc = new UTF8Encoding(); // Define a string with various Unicode characters. string str1 = "AB YZ 19 DEGREE \xb0 BULLET \x95 \uD800\udc05 \u00e4"; str1 += "Unicode characters. \u00a9 \u010C s \u0062\u0308"; Console.WriteLine("Created original string...\n"); // Convert string to byte array. byte[] bytes = enc.GetBytes(str1); FileStream fs = File.Create(@".\characters.bin"); BinaryWriter bw = new BinaryWriter(fs); bw.Write(bytes); bw.Close(); // Read bytes from file. FileStream fsIn = File.OpenRead(@".\characters.bin"); BinaryReader br = new BinaryReader(fsIn); const int count = 10; // Number of bytes to read at a time. byte[] bytesRead = new byte[10]; // Buffer (byte array). int read; // Number of bytes actually read. string str2 = String.Empty; // Decoded string. // Try using Encoding object for all operations. do { read = br.Read(bytesRead, 0, count); str2 += enc.GetString(bytesRead, 0, read); } while (read == count); br.Close(); Console.WriteLine("Decoded string using UnicodeEncoding.GetString()..."); CompareForEquality(str1, str2); Console.WriteLine(); // Use Decoder for all operations. fsIn = File.OpenRead(@".\characters.bin"); br = new BinaryReader(fsIn); Decoder decoder = enc.GetDecoder(); char[] chars = new char[50]; int index = 0; // Next character to write in array. int written = 0; // Number of chars written to array. do { read = br.Read(bytesRead, 0, count); if (index + decoder.GetCharCount(bytesRead, 0, read) - 1 >= chars.Length) Array.Resize(ref chars, chars.Length + 50); written = decoder.GetChars(bytesRead, 0, read, chars, index); index += written; } while (read == count); br.Close(); // Instantiate a string with the decoded characters. string str3 = new String(chars, 0, index); Console.WriteLine("Decoded string using UnicodeEncoding.Decoder.GetString()..."); CompareForEquality(str1, str3); } private static void CompareForEquality(string original, string decoded) { bool result = original.Equals(decoded); Console.WriteLine("original = decoded: {0}", original.Equals(decoded, StringComparison.Ordinal)); if (!result) { Console.WriteLine("Code points in original string:{0}",original); foreach (var ch in original) Console.Write("{0} ", Convert.ToUInt16(ch).ToString("X4")); Console.WriteLine(); Console.WriteLine("Code points in decoded string:{0}",decoded); foreach (var ch in decoded) Console.Write("{0} ", Convert.ToUInt16(ch).ToString("X4")); Console.WriteLine(); } } // The example displays the following output: // Created original string... // // Decoded string using UnicodeEncoding.GetString()... // original = decoded: False // Code points in original string: // 0041 0042 0020 0059 005A 0020 0031 0039 0020 D800 DC05 0020 00E4 0055 006E 0069 0063 006F // 0064 0065 0020 0063 0068 0061 0072 0061 0063 0074 0065 0072 0073 002E 0020 00A9 0020 010C // 0020 0073 0020 0062 0308 // Code points in decoded string: // 0041 0042 0020 0059 005A 0020 0031 0039 0020 FFFD FFFD 0020 00E4 0055 006E 0069 0063 006F // 0064 0065 0020 0063 0068 0061 0072 0061 0063 0074 0065 0072 0073 002E 0020 00A9 0020 010C // 0020 0073 0020 0062 0308 // // Decoded string using UnicodeEncoding.Decoder.GetString()... // original = decoded: True } }