dict: 日常更新

This commit is contained in:
Dvel 2023-09-08 19:36:27 +08:00
parent 06467aa0b3
commit 7ebf8ff1fa
10 changed files with 1239 additions and 1375 deletions

View File

@ -33,7 +33,7 @@
# 需要注音的字词设定在 others/script/rime/需要注音.txt
---
name: 8105
version: "2023-09-03"
version: "2023-09-08"
sort: by_weight
...
### 按需启用
@ -66,6 +66,8 @@ sort: by_weight
# 觍 tian
# 𪨊 song
# 磺 huang
# 屮 cao
# 芔 cao
### 小时候没学过的拼音。。。
@ -613,6 +615,8 @@ sort: by_weight
糙 cao 12840
槽 cao 12412
漕 cao 2663
屮 cao 2222
芔 cao 1111
艚 cao 50
螬 cao 26
𥕢 cao 0
@ -3586,15 +3590,15 @@ sort: by_weight
铹 lao 2
络 lao 1
𫭼 lao 0
了 le 3481243
乐 le 777260
勒 le 149722
叻 le 525
饹 le 55
泐 le 34
鳓 le 22
仂 le 15
簕 le 8
了 le 9
乐 le 8
勒 le 7
叻 le 6
饹 le 5
泐 le 4
鳓 le 4
仂 le 3
簕 le 2
嘞 le 1
雷 lei 560379
类 lei 542775
@ -3773,7 +3777,6 @@ sort: by_weight
廖 liao 31067
撩 liao 21011
寥 liao 20857
了 liao 20000
僚 liao 19624
缭 liao 16457
燎 liao 9587
@ -3795,6 +3798,7 @@ sort: by_weight
憭 liao 6
簝 liao 4
橑 liao 1
了 liao 0
𪤗 liao 0
列 lie 244324
裂 lie 174922

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@
#
---
name: others
version: "2023-09-03"
version: "2023-09-08"
sort: by_weight
...
##### 容错词
@ -144,6 +144,8 @@ sort: by_weight
歇斯底里 jie si di li
暖和 nuan huo
暖和 nuan he
模棱两可 mo leng liang ke
模棱两可 mo ling liang ke
### 错字
曾经 ceng jing
曾今 ceng jin

File diff suppressed because it is too large Load Diff

View File

@ -14,7 +14,7 @@
# 转化应当大写的单词
---
name: en
version: "2023-08-15"
version: "2023-09-08"
sort: by_weight
...
# +_+
@ -19161,7 +19161,7 @@ toxicity toxicity
toxicology toxicology
toxins toxins
toy toy
toyota toyota
Toyota Toyota
toys toys
# tp tp
# tr tr

View File

@ -7,7 +7,7 @@
#
---
name: en_ext
version: "2023-09-03"
version: "2023-09-08"
sort: by_weight
...
# 一些杂项
@ -2263,3 +2263,5 @@ decode decode
KeyCastr KeyCastr
Laugh Tale LaughTale
Joy Boy JoyBoy
gotcha gotcha
wombat wombat

View File

@ -72,6 +72,7 @@ local corrections = {
["tai xing shan"] = { text = "太行山", comment = "tai hang shan" },
["jie si di li"] = { text = "歇斯底里", comment = "xie si di li" },
["nuan he"] = { text = "暖和", comment = "nuan huo" },
["mo ling liang ke"] = { text = "模棱两可", comment = "mo leng liang ke" },
-- 错字
["ceng jin"] = { text = "曾今", comment = "曾经" },
["an nai"] = { text = "按耐", comment = "按捺(na)" },

View File

@ -3,6 +3,7 @@ package rime
import (
"bufio"
"fmt"
mapset "github.com/deckarep/golang-set/v2"
"log"
"os"
"strconv"
@ -17,8 +18,9 @@ func Temp() {
// GeneratePinyinTest("都挺长的")
// GeneratePinyinTest("血条长")
// findP(ExtPath, "谁")
// findP(BasePath, "血")
Pinyin(ExtPath)
AddWeight(ExtPath, 100)
}
// 列出字表中多音字的状况:是否参与自动注音
@ -107,6 +109,7 @@ func findP(dictPath string, ch string) {
isMark := false
sc := bufio.NewScanner(file)
set := mapset.NewSet[string]() // 去重用的
for sc.Scan() {
line := sc.Text()
if !isMark {
@ -125,9 +128,10 @@ func findP(dictPath string, ch string) {
log.Fatalln("len(parts) != 3", line)
}
text := parts[0]
if strings.Contains(text, ch) && utf8.RuneCountInString(text) >= 3 {
if strings.Contains(text, ch) && utf8.RuneCountInString(text) >= 3 && !set.Contains(text) {
outFile.WriteString(line + "\n")
} else {
set.Add(text)
lines = append(lines, line)
}
}

View File

@ -1,4 +1,8 @@
# 在 # -_- 上面是错词;下面是包含错词但不是错词。
阿帕契
德鲁依
唐三才
塔克拉马干
元文件
桃花园
桃花缘