feat: 支持含飞键双拼方案更新中英混输词库 (#685)
This commit is contained in:
parent
8a2a06ecc6
commit
72876379b2
@ -108,17 +108,21 @@ var digitMap = map[string]string{
|
||||
}
|
||||
|
||||
type schema struct {
|
||||
name string
|
||||
desc string
|
||||
path string
|
||||
mapping map[string]string
|
||||
file *os.File
|
||||
name string
|
||||
desc string
|
||||
combinationType string
|
||||
path string
|
||||
mapping map[string]string
|
||||
additionalMapping map[string]string
|
||||
excludingMapping map[string]string
|
||||
file *os.File
|
||||
}
|
||||
|
||||
var doublePinyin = schema{
|
||||
name: "cn_en_double_pinyin",
|
||||
desc: "自然码双拼",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_double_pinyin.txt"),
|
||||
name: "cn_en_double_pinyin",
|
||||
desc: "自然码双拼",
|
||||
combinationType: "unique",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_double_pinyin.txt"),
|
||||
mapping: map[string]string{
|
||||
// 零声母
|
||||
"-a-": "aa",
|
||||
@ -169,9 +173,10 @@ var doublePinyin = schema{
|
||||
}
|
||||
|
||||
var doublePinyinFlypy = schema{
|
||||
name: "cn_en_flypy",
|
||||
desc: "小鹤双拼",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_flypy.txt"),
|
||||
name: "cn_en_flypy",
|
||||
desc: "小鹤双拼",
|
||||
combinationType: "unique",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_flypy.txt"),
|
||||
mapping: map[string]string{
|
||||
// 零声母
|
||||
"-a-": "aa",
|
||||
@ -222,9 +227,10 @@ var doublePinyinFlypy = schema{
|
||||
}
|
||||
|
||||
var doublePinyinMSPY = schema{
|
||||
name: "cn_en_mspy",
|
||||
desc: "微软双拼",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_mspy.txt"),
|
||||
name: "cn_en_mspy",
|
||||
desc: "微软双拼",
|
||||
combinationType: "unique",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_mspy.txt"),
|
||||
mapping: map[string]string{
|
||||
// 零声母
|
||||
"-a-": "oa",
|
||||
@ -276,9 +282,10 @@ var doublePinyinMSPY = schema{
|
||||
}
|
||||
|
||||
var doublePinyinSogou = schema{
|
||||
name: "cn_en_sogou",
|
||||
desc: "搜狗双拼",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_sogou.txt"),
|
||||
name: "cn_en_sogou",
|
||||
desc: "搜狗双拼",
|
||||
combinationType: "unique",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_sogou.txt"),
|
||||
mapping: map[string]string{
|
||||
// 零声母
|
||||
"-a-": "oa",
|
||||
@ -330,9 +337,10 @@ var doublePinyinSogou = schema{
|
||||
}
|
||||
|
||||
var doublePinyinZiGuang = schema{
|
||||
name: "cn_en_ziguang",
|
||||
desc: "紫光双拼",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_ziguang.txt"),
|
||||
name: "cn_en_ziguang",
|
||||
desc: "紫光双拼",
|
||||
combinationType: "unique",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_ziguang.txt"),
|
||||
mapping: map[string]string{
|
||||
// 零声母
|
||||
"-a-": "oa",
|
||||
@ -383,9 +391,10 @@ var doublePinyinZiGuang = schema{
|
||||
}
|
||||
|
||||
var doublePinyinABC = schema{
|
||||
name: "cn_en_abc",
|
||||
desc: "智能 ABC 双拼",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_abc.txt"),
|
||||
name: "cn_en_abc",
|
||||
desc: "智能 ABC 双拼",
|
||||
combinationType: "unique",
|
||||
path: filepath.Join(RimeDir, "en_dicts/cn_en_abc.txt"),
|
||||
mapping: map[string]string{
|
||||
// 零声母
|
||||
"-a-": "oa",
|
||||
@ -447,7 +456,7 @@ func CnEn() {
|
||||
defer cnEnTXT.Close()
|
||||
|
||||
schemas := []schema{
|
||||
{name: "cn_en", desc: "全拼", path: filepath.Join(RimeDir, "en_dicts/cn_en.txt")},
|
||||
{name: "cn_en", desc: "全拼", combinationType: "unique", path: filepath.Join(RimeDir, "en_dicts/cn_en.txt")},
|
||||
doublePinyin,
|
||||
doublePinyinFlypy,
|
||||
doublePinyinMSPY,
|
||||
@ -482,17 +491,35 @@ func CnEn() {
|
||||
}
|
||||
uniq.Add(line)
|
||||
for _, schema := range schemas {
|
||||
code := textToPinyin(line, schema)
|
||||
_, err := schema.file.WriteString(line + "\t" + code + "\n")
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
lowerCode := strings.ToLower(code)
|
||||
if code != lowerCode {
|
||||
_, err := schema.file.WriteString(line + "\t" + lowerCode + "\n")
|
||||
if schema.combinationType != "multi" {
|
||||
code := textToPinyin(line, schema)
|
||||
_, err := schema.file.WriteString(line + "\t" + code + "\n")
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
lowerCode := strings.ToLower(code)
|
||||
if code != lowerCode {
|
||||
_, err := schema.file.WriteString(line + "\t" + lowerCode + "\n")
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
codes := textToPinyinMulti(line, schema)
|
||||
for _, code := range codes {
|
||||
_, err := schema.file.WriteString(line + "\t" + code + "\n")
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
lowerCode := strings.ToLower(code)
|
||||
if code != lowerCode {
|
||||
_, err := schema.file.WriteString(line + "\t" + lowerCode + "\n")
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -558,6 +585,45 @@ func textToPinyin(text string, s schema) string {
|
||||
return code
|
||||
}
|
||||
|
||||
func textToPinyinMulti(text string, s schema) []string {
|
||||
parts := splitMixedWords(text)
|
||||
map4DoublePinyins := make(map[int][]string)
|
||||
for index, part := range parts {
|
||||
if digit, ok := digitMap[part]; ok { // 数字
|
||||
map4DoublePinyins[index] = convertToDoublePinyinMulti(hanPinyin[digit][0], s)
|
||||
} else if len(hanPinyin[part]) > 1 { // 多音字,按字典指定的读音
|
||||
if value, ok := polyphones[text+" > "+part]; ok {
|
||||
map4DoublePinyins[index] = convertToDoublePinyinMulti(value, s)
|
||||
} else {
|
||||
log.Fatalln("❌ 多音字未指定读音", text, part)
|
||||
}
|
||||
} else if len(hanPinyin[part]) == 1 {
|
||||
// 非多音字汉字,按唯一的读音
|
||||
map4DoublePinyins[index] = convertToDoublePinyinMulti(hanPinyin[part][0], s)
|
||||
}
|
||||
}
|
||||
|
||||
var result = make([]string, 0)
|
||||
return stepFurther(parts, 0, "", map4DoublePinyins, result)
|
||||
}
|
||||
|
||||
func stepFurther(parts []string, index int, arranged string, map4DoublePinyins map[int][]string, result []string) []string {
|
||||
if index >= len(parts) {
|
||||
result = append(result, arranged)
|
||||
return result
|
||||
}
|
||||
if combinations, ok := map4DoublePinyins[index]; ok {
|
||||
// 数字或汉字
|
||||
for _, combination := range combinations {
|
||||
result = stepFurther(parts, index+1, arranged+combination, map4DoublePinyins, result)
|
||||
}
|
||||
} else {
|
||||
// 英文字母
|
||||
result = stepFurther(parts, index+1, arranged+parts[index], map4DoublePinyins, result)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// 中英文分割,去掉间隔号和横杠
|
||||
// "哆啦A梦" → ["哆", "啦", "A", "梦"]
|
||||
// "QQ号" → ["QQ", "号"]
|
||||
@ -609,3 +675,51 @@ func convertToDoublePinyin(code string, s schema) string {
|
||||
|
||||
return initial + final
|
||||
}
|
||||
|
||||
func convertToDoublePinyinMulti(code string, s schema) []string {
|
||||
// 零声母
|
||||
i := []string{"a", "e", "o", "ai", "ei", "ou", "an", "en", "ang", "eng", "ao", "er"}
|
||||
if contains(i, code) {
|
||||
return []string{s.mapping["-"+code+"-"]}
|
||||
}
|
||||
|
||||
// 分割为声母和韵母
|
||||
consonantRegexp := regexp.MustCompile(`^(b|p|m|f|d|t|n|l|g|k|h|j|q|x|zh|ch|sh|r|z|c|s|y|w)`)
|
||||
initial := consonantRegexp.FindString(code)
|
||||
final := consonantRegexp.ReplaceAllString(code, "")
|
||||
|
||||
// 声母转换
|
||||
isRetroflex := initial == "zh" || initial == "ch" || initial == "sh"
|
||||
if isRetroflex {
|
||||
initial = s.mapping[initial]
|
||||
}
|
||||
// 韵母转换
|
||||
if len(final) > 1 {
|
||||
final = s.mapping[final]
|
||||
}
|
||||
|
||||
var result []string
|
||||
if isRetroflex || len(final) > 1 {
|
||||
leadings := strings.Split(initial, ",")
|
||||
followings := strings.Split(final, ",")
|
||||
for _, leading := range leadings {
|
||||
for _, following := range followings {
|
||||
if exclusion, ok := s.excludingMapping[code]; ok {
|
||||
if exclusion == (leading + following) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
result = append(result, leading+following)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// 其余单个的声母和韵母不转换
|
||||
result = append(result, initial+final)
|
||||
}
|
||||
|
||||
if addition, ok := s.additionalMapping[code]; ok {
|
||||
result = append(result, addition)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user