rime-ice/others/script/rime/emoji.go

138 lines
3.1 KiB
Go
Raw Normal View History

2022-10-30 16:47:40 +01:00
package rime
import (
"bufio"
2023-02-06 20:32:24 +01:00
"fmt"
2022-10-30 16:47:40 +01:00
mapset "github.com/deckarep/golang-set/v2"
"log"
"os"
"regexp"
"strings"
"time"
"unicode/utf8"
)
var emojiTXT = "/Users/dvel/Library/Rime/opencc/emoji.txt"
var mappingTXT = "/Users/dvel/Library/Rime/opencc/emoji-map.txt"
type OrderedMap struct {
keys []string
m map[string][]string
}
// CheckEmoji 检查 Emoji
// 检查 emoji-map.txt 格式书写问题
// 检查所有词条是否与 base+sogou+ext 词库存在差集
2022-10-30 16:47:40 +01:00
func CheckEmoji() {
// 控制台输出
defer printlnTimeCost("检查 Emoji 差集", time.Now())
// 打开文件
file, err := os.Open(EmojiPath)
if err != nil {
log.Fatal(err)
}
defer file.Close()
// 将 Emoji 加入 set为检测差集做准备
emojiSet := mapset.NewSet[string]()
sc := bufio.NewScanner(file)
for sc.Scan() {
line := sc.Text()
2023-02-06 20:32:24 +01:00
// 过滤空行
if line == "" {
continue
}
2022-10-30 16:47:40 +01:00
// 过滤注释
if strings.Contains(line, "#") {
continue
}
// 检查是否包含 Tab
if strings.Contains(line, "\t") {
2023-02-06 20:32:24 +01:00
fmt.Println("❌ 此行包含 Tab", line)
2022-10-30 16:47:40 +01:00
}
// 开头结尾无效的空格
if strings.HasPrefix(line, " ") || strings.HasSuffix(line, " ") {
2023-02-06 20:32:24 +01:00
fmt.Println("❌ unexpected space:", line)
2022-10-30 16:47:40 +01:00
}
parts := strings.Split(line, " ")
if len(parts) < 2 {
2023-02-06 20:32:24 +01:00
fmt.Println("❌ invalid line:", line)
2022-10-30 16:47:40 +01:00
}
// 加入 emojiSet顺便用一个 tempSet 查重
tempSet := mapset.NewSet[string]()
for _, word := range parts[1:] {
emojiSet.Add(word)
if tempSet.Contains(word) {
2023-02-06 20:32:24 +01:00
fmt.Println("❌ 此行有重复映射:", line)
2022-10-30 16:47:40 +01:00
} else {
tempSet.Add(word)
}
}
}
// 检查 emoji 中的词条是否与 base+sogou+ext 词库存在差集
for _, word := range emojiSet.Difference(BaseSet.Union(SogouSet).Union(ExtSet)).ToSlice() {
2022-10-30 16:47:40 +01:00
// 去除英文字母
if match, _ := regexp.MatchString(`[A-Za-z]+`, word); match {
continue
}
// 去除一个字的
if utf8.RuneCountInString(word) == 1 {
continue
}
2023-02-06 20:32:24 +01:00
fmt.Println("❌ Emoji 差集:", word)
2022-10-30 16:47:40 +01:00
}
}
// UpdateEmojiTXT 从 emoji-map.txt 生成或更新 emoji.txt
func UpdateEmojiTXT() {
// 控制台输出
defer printlnTimeCost("更新 emoji.txt", time.Now())
// 读取 emoji-map.txt
mappingFile, err := os.Open(mappingTXT)
if err != nil {
log.Fatal(err)
}
defer mappingFile.Close()
om := new(OrderedMap)
om.keys = make([]string, 0)
om.m = make(map[string][]string)
sc := bufio.NewScanner(mappingFile)
for sc.Scan() {
line := sc.Text()
if strings.HasPrefix(line, "#") {
continue
}
arr := strings.Split(line, " ")
for _, word := range arr[1:] {
if !contains(om.keys, word) {
om.keys = append(om.keys, word)
}
om.m[word] = append(om.m[word], arr[0])
}
}
// 写入 emoji.txt
emojiFile, err := os.OpenFile(emojiTXT, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666)
if err != nil {
log.Fatalln(err)
}
defer emojiFile.Close()
for _, key := range om.keys {
line := key + "\t" + key + " " + strings.Join(om.m[key], " ") + "\n"
_, err := emojiFile.WriteString(line)
if err != nil {
log.Fatal(err)
}
}
if err := emojiFile.Sync(); err != nil {
log.Fatal(err)
}
}