/
Pinyin.cs
129 lines (113 loc) · 5.22 KB
/
Pinyin.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace CC_CEDICT.WindowsPhone
{
public class Pinyin : IComparable
{
public string Original;
public string Syllable;
public enum Tones { Unknown = 0, Flat = 1, Rising = 2, FallingRising = 3, Falling = 4, Neutral = 5 };
public Tones Tone = Tones.Unknown;
public Pinyin(string input)
{
Original = input.Replace("u:", "v");
Regex pattern = new Regex("^([a-zA-Z]+)([1-5])$");
Match match = pattern.Match(Original);
if (match.Success)
{
Syllable = match.Groups[1].Value;
Tone = (Tones)int.Parse(match.Groups[2].Value);
return;
}
pattern = new Regex("^([a-zA-Z]+)$"); // TODO: this could be more discerning
match = pattern.Match(Original);
if (match.Success)
{
Syllable = match.Groups[1].Value;
return;
}
throw new FormatException(String.Format("Invalid Pinyin: '{0}'", Original));
}
#region Pinyin markup implementation
static Dictionary<char, char[]> markupTable = new Dictionary<char, char[]>
{
{ 'a', new char[5]{ 'a', '\u0101', '\u00e1', '\u01ce', '\u00e0' } },
{ 'A', new char[5]{ 'A', '\u0100', '\u00c1', '\u01cd', '\u00c0' } },
{ 'e', new char[5]{ 'e', '\u0113', '\u00e9', '\u011b', '\u00e8' } },
{ 'E', new char[5]{ 'E', '\u0112', '\u00c9', '\u011a', '\u00c8' } },
{ 'i', new char[5]{ 'i', '\u012b', '\u00ed', '\u01d0', '\u00ec' } },
{ 'I', new char[5]{ 'I', '\u012a', '\u00cd', '\u01cf', '\u00cc' } },
{ 'o', new char[5]{ 'o', '\u014d', '\u00f3', '\u01d2', '\u00f2' } },
{ 'O', new char[5]{ 'O', '\u014c', '\u00d3', '\u01d1', '\u00d2' } },
{ 'u', new char[5]{ 'u', '\u016b', '\u00fa', '\u01d4', '\u00f9' } },
{ 'U', new char[5]{ 'U', '\u016a', '\u00da', '\u01d3', '\u00d9' } },
{ 'v', new char[5]{ '\u00fc', '\u01d6', '\u01d8', '\u01da', '\u01dc' } },
{ 'V', new char[5]{ '\u00dc', '\u01d5', '\u01d7', '\u01d9', '\u01db' } }
};
static string initial = "(?:[bcdfghjklmnpqrstwxyz]|[csz]h)";
static string vowel = "[aeiouv]";
static string priorityVowel = "[aeo]";
static string secondaryVowel = "[iuv]";
static Regex singleVowel = new Regex("^(" + initial + ")?(" + vowel + ")(?!" + vowel + ")", RegexOptions.IgnoreCase);
static Regex multiVowelPriority = new Regex("^(" + initial + ")?(" + priorityVowel + ")(?=" + vowel + ")", RegexOptions.IgnoreCase);
static Regex multiVowelNormal = new Regex("^(" + initial + "?" + secondaryVowel + ")(" + vowel + ")", RegexOptions.IgnoreCase);
string _markedup = null;
public string MarkedUp
{
get
{
if (_markedup == null)
{
if (Tone == Tones.Unknown || Tone == Tones.Neutral)
{
_markedup = Syllable;
}
else
{
string temp;
MatchEvaluator eval = new MatchEvaluator(this.MarkupEvaluator);
if ((temp = singleVowel.Replace(Syllable, eval)) != Syllable)
_markedup = temp;
else if ((temp = multiVowelPriority.Replace(Syllable, eval)) != Syllable)
_markedup = temp;
else if ((temp = multiVowelNormal.Replace(Syllable, eval)) != Syllable)
_markedup = temp;
else
_markedup = Syllable;
}
// special case, e.g. lve - the e takes the tone mark so v still needs to become u:
if (_markedup.Contains("v"))
_markedup = _markedup.Replace("v", markupTable['v'][0].ToString());
if (_markedup.Contains("V"))
_markedup = _markedup.Replace("V", markupTable['V'][0].ToString());
}
return _markedup;
}
}
public string MarkupEvaluator(Match match)
{
char vowel;
switch (match.Groups.Count)
{
case 2:
vowel = match.Groups[1].Value.ToCharArray()[0];
return markupTable[vowel][(int)Tone % 5].ToString();
case 3:
vowel = match.Groups[2].Value.ToCharArray()[0];
return match.Groups[1].Value + markupTable[vowel][(int)Tone % 5].ToString();
default:
throw new FormatException("confused.com");
}
}
#endregion
#region IComparable interface
public int CompareTo(object obj)
{
Pinyin other = (Pinyin)obj;
int cmp = this.Syllable.ToLower().CompareTo(other.Syllable.ToLower());
return cmp != 0 ? cmp : ((int)this.Tone).CompareTo((int)other.Tone);
}
#endregion
}
}