zhao
2021-07-19 8347f2fbddbd25369359dcb2da1233ac48a19fdc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
using System;
using System.Collections.Generic;
using System.Text;
 
namespace QiHe.CodeLib
{
    /// <summary>
    /// TextEncoding
    /// </summary>
    public class TextEncoding
    {
        /// <summary>
        /// check if all charaters in text are ASCII charater
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        public static bool FitsInASCIIEncoding(string text)
        {
            if(string.IsNullOrEmpty(text))return true;
            byte[] bytes = Encoding.UTF8.GetBytes(text);
            for (int i = 0; i < bytes.Length; i++)
            {
                if (bytes[i] > 127)
                {
                    return false;
                }
            }
            return true;
        }
 
        /// <summary>
        /// Encodings the is right.
        /// </summary>
        /// <param name="encoding">The encoding.</param>
        /// <param name="data">The data.</param>
        /// <returns></returns>
        public static bool EncodingIsRight(Encoding encoding, byte[] data)
        {
            string text = encoding.GetString(data);
            byte[] bytes = encoding.GetBytes(text);
            if (Algorithm.ArrayEqual(bytes, data))
            {
                return true;
            }
            else
            {
                return false;
            }
        }
 
        /// <summary>
        /// Safes the decode string.
        /// </summary>
        /// <param name="encoding">The encoding.</param>
        /// <param name="data">The data.</param>
        /// <returns></returns>
        public static object SafeDecodeString(Encoding encoding, byte[] data)
        {
            string text = encoding.GetString(data);
            byte[] bytes = encoding.GetBytes(text);
            if (Algorithm.ArrayEqual(bytes, data))
            {
                return text;
            }
            else
            {
                return data;
            }
        }
 
        /// <summary>
        /// Group text by encoding.
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        public static List<Pair<string, string>> GroupTextByEncoding(string text)
        {
            List<Pair<string, string>> frags = new List<Pair<string, string>>();
            StringBuilder buffer = new StringBuilder();
            string encoding = "ascii";
            foreach (char ch in text)
            {
                if (encoding == "ascii" && ch > 0x7f)
                {
                    if (buffer.Length > 0)
                    {
                        frags.Add(new Pair<string, string>("ascii", buffer.ToString()));
                        buffer.Length = 0;
                    }
                    encoding = "unicode";
                }
                else if (encoding == "unicode" && ch <= 0x7f)
                {
                    if (buffer.Length > 0)
                    {
                        frags.Add(new Pair<string, string>("unicode", buffer.ToString()));
                        buffer.Length = 0;
                    }
                    encoding = "ascii";
                }
                buffer.Append(ch);
            }
            if (buffer.Length > 0)
            {
                frags.Add(new Pair<string, string>(encoding, buffer.ToString()));
            }
            return frags;
        }
 
        public static string RemoveByteOrderMark(string text)
        {
            if (text[0] == 0xFEFF || text[0] == 0xFFFE)
            {
                return text.Substring(1);
            }
            else
            {
                return text;
            }
        }
    }
}