122 lines
4.1 KiB
C#
122 lines
4.1 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.ComponentModel;
|
|
using System.Data;
|
|
using System.Drawing;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Threading.Tasks;
|
|
using System.Windows.Forms;
|
|
using System.IO;
|
|
|
|
namespace ReadFile
|
|
{
|
|
public partial class Form1 : Form
|
|
{
|
|
public Form1()
|
|
{
|
|
InitializeComponent();
|
|
}
|
|
|
|
private void Form1_Load(object sender, EventArgs e)
|
|
{
|
|
Test();
|
|
}
|
|
public void Test()
|
|
{
|
|
// Use default replacement fallback for invalid encoding.
|
|
//UnicodeEncoding enc = new UnicodeEncoding(true, false, false);
|
|
UTF8Encoding enc = new UTF8Encoding();
|
|
// Define a string with various Unicode characters.
|
|
string str1 = "AB YZ 19 DEGREE \xb0 BULLET \x95 \uD800\udc05 \u00e4";
|
|
str1 += "Unicode characters. \u00a9 \u010C s \u0062\u0308";
|
|
Console.WriteLine("Created original string...\n");
|
|
|
|
// Convert string to byte array.
|
|
byte[] bytes = enc.GetBytes(str1);
|
|
|
|
FileStream fs = File.Create(@".\characters.bin");
|
|
BinaryWriter bw = new BinaryWriter(fs);
|
|
bw.Write(bytes);
|
|
bw.Close();
|
|
|
|
// Read bytes from file.
|
|
FileStream fsIn = File.OpenRead(@".\characters.bin");
|
|
BinaryReader br = new BinaryReader(fsIn);
|
|
|
|
const int count = 10; // Number of bytes to read at a time.
|
|
byte[] bytesRead = new byte[10]; // Buffer (byte array).
|
|
int read; // Number of bytes actually read.
|
|
string str2 = String.Empty; // Decoded string.
|
|
|
|
// Try using Encoding object for all operations.
|
|
do
|
|
{
|
|
read = br.Read(bytesRead, 0, count);
|
|
str2 += enc.GetString(bytesRead, 0, read);
|
|
} while (read == count);
|
|
br.Close();
|
|
Console.WriteLine("Decoded string using UnicodeEncoding.GetString()...");
|
|
CompareForEquality(str1, str2);
|
|
Console.WriteLine();
|
|
|
|
// Use Decoder for all operations.
|
|
fsIn = File.OpenRead(@".\characters.bin");
|
|
br = new BinaryReader(fsIn);
|
|
Decoder decoder = enc.GetDecoder();
|
|
char[] chars = new char[50];
|
|
int index = 0; // Next character to write in array.
|
|
int written = 0; // Number of chars written to array.
|
|
do
|
|
{
|
|
read = br.Read(bytesRead, 0, count);
|
|
if (index + decoder.GetCharCount(bytesRead, 0, read) - 1 >= chars.Length)
|
|
Array.Resize(ref chars, chars.Length + 50);
|
|
|
|
written = decoder.GetChars(bytesRead, 0, read, chars, index);
|
|
index += written;
|
|
} while (read == count);
|
|
br.Close();
|
|
// Instantiate a string with the decoded characters.
|
|
string str3 = new String(chars, 0, index);
|
|
Console.WriteLine("Decoded string using UnicodeEncoding.Decoder.GetString()...");
|
|
CompareForEquality(str1, str3);
|
|
}
|
|
|
|
private static void CompareForEquality(string original, string decoded)
|
|
{
|
|
bool result = original.Equals(decoded);
|
|
Console.WriteLine("original = decoded: {0}",
|
|
original.Equals(decoded, StringComparison.Ordinal));
|
|
if (!result)
|
|
{
|
|
Console.WriteLine("Code points in original string:{0}",original);
|
|
foreach (var ch in original)
|
|
Console.Write("{0} ", Convert.ToUInt16(ch).ToString("X4"));
|
|
Console.WriteLine();
|
|
|
|
Console.WriteLine("Code points in decoded string:{0}",decoded);
|
|
foreach (var ch in decoded)
|
|
Console.Write("{0} ", Convert.ToUInt16(ch).ToString("X4"));
|
|
Console.WriteLine();
|
|
}
|
|
}
|
|
// The example displays the following output:
|
|
// Created original string...
|
|
//
|
|
// Decoded string using UnicodeEncoding.GetString()...
|
|
// original = decoded: False
|
|
// Code points in original string:
|
|
// 0041 0042 0020 0059 005A 0020 0031 0039 0020 D800 DC05 0020 00E4 0055 006E 0069 0063 006F
|
|
// 0064 0065 0020 0063 0068 0061 0072 0061 0063 0074 0065 0072 0073 002E 0020 00A9 0020 010C
|
|
// 0020 0073 0020 0062 0308
|
|
// Code points in decoded string:
|
|
// 0041 0042 0020 0059 005A 0020 0031 0039 0020 FFFD FFFD 0020 00E4 0055 006E 0069 0063 006F
|
|
// 0064 0065 0020 0063 0068 0061 0072 0061 0063 0074 0065 0072 0073 002E 0020 00A9 0020 010C
|
|
// 0020 0073 0020 0062 0308
|
|
//
|
|
// Decoded string using UnicodeEncoding.Decoder.GetString()...
|
|
// original = decoded: True
|
|
}
|
|
}
|