From b6d39175b07928f430a8a416c8ae8e95ea6023b5 Mon Sep 17 00:00:00 2001 From: Mirtle Date: Tue, 30 May 2023 22:06:38 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E8=8B=B1=E6=96=87=E8=AF=8D=E6=B1=87?= =?UTF-8?q?=E8=87=AA=E5=8A=A8=E5=A4=A7=E5=86=99=E8=BD=AC=E6=8D=A2=20(#305)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 示例: 输入小写,得到词库中的原样:latex → LaTeX 输入首字母大写,得到首字母大写:Hello → Hello 输入前2~n个字母大写,得到全大写:HEllo → HELLO 同上,输入全大写,得到全大写:HELLO → HELLO --- double_pinyin.schema.yaml | 3 +- double_pinyin_flypy.schema.yaml | 3 +- double_pinyin_mspy.schema.yaml | 3 +- double_pinyin_ziguang.schema.yaml | 3 +- lua/autocap_filter.lua | 62 +++++++++++++++++++++++++++++++ melt_eng.schema.yaml | 37 ++++++++++-------- rime.lua | 4 +- rime_ice.schema.yaml | 3 +- 8 files changed, 96 insertions(+), 22 deletions(-) create mode 100644 lua/autocap_filter.lua diff --git a/double_pinyin.schema.yaml b/double_pinyin.schema.yaml index e6c82bb..bc0ed36 100644 --- a/double_pinyin.schema.yaml +++ b/double_pinyin.schema.yaml @@ -78,8 +78,9 @@ engine: filters: - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 - - uniquifier # 去重 + - lua_filter@autocap_filter # 英文自动大写 - lua_filter@reduce_english_filter # 降低部分英语单词在候选项的位置 + - uniquifier # 去重 # Lua 配置: 日期、时间、星期、ISO 8601、时间戳的触发关键字 diff --git a/double_pinyin_flypy.schema.yaml b/double_pinyin_flypy.schema.yaml index f528438..1e97a34 100644 --- a/double_pinyin_flypy.schema.yaml +++ b/double_pinyin_flypy.schema.yaml @@ -78,8 +78,9 @@ engine: filters: - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 - - uniquifier # 去重 + - lua_filter@autocap_filter # 英文自动大写 - lua_filter@reduce_english_filter # 降低部分英语单词在候选项的位置 + - uniquifier # 去重 # Lua 配置: 日期、时间、星期、ISO 8601、时间戳的触发关键字 diff --git a/double_pinyin_mspy.schema.yaml b/double_pinyin_mspy.schema.yaml index 2bb9203..bba7db2 100644 --- a/double_pinyin_mspy.schema.yaml +++ b/double_pinyin_mspy.schema.yaml @@ -78,8 +78,9 @@ engine: filters: - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 - - uniquifier # 去重 + - lua_filter@autocap_filter # 英文自动大写 - lua_filter@reduce_english_filter # 降低部分英语单词在候选项的位置 + - uniquifier # 去重 # Lua 配置: 日期、时间、星期、ISO 8601、时间戳的触发关键字 diff --git a/double_pinyin_ziguang.schema.yaml b/double_pinyin_ziguang.schema.yaml index d7d21ae..8a779df 100644 --- a/double_pinyin_ziguang.schema.yaml +++ b/double_pinyin_ziguang.schema.yaml @@ -78,8 +78,9 @@ engine: filters: - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 - - uniquifier # 去重 + - lua_filter@autocap_filter # 英文自动大写 - lua_filter@reduce_english_filter # 降低部分英语单词在候选项的位置 + - uniquifier # 去重 # Lua 配置: 日期、时间、星期、ISO 8601、时间戳的触发关键字 diff --git a/lua/autocap_filter.lua b/lua/autocap_filter.lua new file mode 100644 index 0000000..8b0dc16 --- /dev/null +++ b/lua/autocap_filter.lua @@ -0,0 +1,62 @@ +--[[ + #302@abcdefg233 #305@Mirtle + + 自动大写英文词汇: + - 部分规则不做转换 + - 输入首字母大写,候选词转换为首字母大写: Hello → Hello + - 输入至少前 2 个字母大写,候选词转换为全部大写: HEllo → HELLO + + 大写时无法动态调整词频 +--]] +local function autocap_filter(input, env) + local code = env.engine.context.input -- 输入码 + local codeLen = #code + local codeAllUCase = false + local codeUCase = false + -- 不转换: + if codeLen == 1 or -- 码长为 1 + code:find("^[%l%p]") -- 输入码首位为小写字母或标点 + then -- 输入码不满足条件不判断候选项 + for cand in input:iter() do + yield(cand) + end + return + ---- 输入码全大写 + -- elseif code == code:upper() then + -- codeAllUCase = true + -- 输入码前 2 - n 位大写 + elseif code:find("^%u%u+.*") then + codeAllUCase = true + -- 输入码首位大写 + elseif code:find("^%u.*") then + codeUCase = true + end + + local pureCode = code:gsub("[%s%p]", "") -- 删除标点和空格的输入码 + for cand in input:iter() do + local text = cand.text -- 候选词 + local pureText = text:gsub("[%s%p]", "") -- 删除标点和空格的候选词 + -- 不转换: + if + text:find("[^%w%p%s]") or -- 候选词包含非字母和数字、非标点符号、非空格的字符 + text:find("%s") or -- 候选词中包含空格 + pureText:find("^" .. code) or -- 输入码完全匹配候选词 + (cand.type ~= "completion" and -- 单词与其对应的编码不一致 + pureCode:lower() ~= pureText:lower()) -- 例如 PS - Photoshop + then + yield(cand) + -- 输入码前 2~10 位大写,候选词转换为全大写 + elseif codeAllUCase then + text = text:upper() + yield(Candidate(cand.type, 0, codeLen, text, cand.comment)) + -- 输入码首位大写,候选词转换为首位大写 + elseif codeUCase then + text = text:gsub("^%a", string.upper) + yield(Candidate(cand.type, 0, codeLen, text, cand.comment)) + else + yield(cand) + end + end +end + +return autocap_filter diff --git a/melt_eng.schema.yaml b/melt_eng.schema.yaml index 1272183..a72bc9f 100644 --- a/melt_eng.schema.yaml +++ b/melt_eng.schema.yaml @@ -2,8 +2,7 @@ # vim: set sw=2 sts=2 et: # encoding: utf-8 # -# 复制自 https://github.com/tumuyan/rime-melt - +# 复制自 https://github.com/tumuyan/rime-melt,修改了一点拼写派生 schema: schema_id: melt_eng @@ -12,13 +11,12 @@ schema: author: - Patrick - tumuyan - description: - Easy English Nano,只包含少量常用词汇,方便中英文混合输入度方案调用。 + description: Easy English Nano,只包含少量常用词汇,方便中英文混合输入度方案调用。 switches: - name: ascii_mode reset: 0 - states: [ ASCII-OFF, ASCII-ON ] + states: [ASCII-OFF, ASCII-ON] engine: processors: @@ -26,7 +24,7 @@ engine: - key_binder - speller - recognizer -#關閉標點符號轉換(對應symbols.yaml) - punctuator + #關閉標點符號轉換(對應symbols.yaml) - punctuator - selector - navigator - express_editor @@ -46,16 +44,25 @@ speller: alphabet: zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA-_ delimiter: " '" algebra: -# 拼写规则的本质,是把字典按照规则改写,而不是把输入的按键码转写。 + # 为编码派生新的拼写: + # 删除特殊字符 - derive/['\-_+,.]+// -# 把大小写混写的单词转写为全小写 - - derive/^(.+)$/\L$1/ -# 把小写的单词转写为全大写写 - - derive/^([a-z].+)$/\U$1/ -# 把单词转写为首字母大写 - - derive/^([a-zA-Z])/\U$1/ -# 把小写的单词转写为前2字母大写 - - derive/^([a-z][a-z])/\U$1/ + # 全小写 + - derive/^.+$/\L$0/ + # 全大写 + - derive/^.+$/\U$0/ + # 首字母大写 + - derive/^./\U$0/ + # 前 2~10 个字母大写 + - derive/^([a-z]{2})/\U$1/ + - derive/^([a-z]{3})/\U$1/ + - derive/^([a-z]{4})/\U$1/ + - derive/^([a-z]{5})/\U$1/ + - derive/^([a-z]{6})/\U$1/ + - derive/^([a-z]{7})/\U$1/ + - derive/^([a-z]{8})/\U$1/ + - derive/^([a-z]{9})/\U$1/ + - derive/^([a-z]{10})/\U$1/ translator: dictionary: melt_eng spelling_hints: 9 diff --git a/rime.lua b/rime.lua index 46512cd..50c7d85 100644 --- a/rime.lua +++ b/rime.lua @@ -25,8 +25,8 @@ unicode = require("unicode") -- 数字、人民币大写,R 开头 number_translator = require("number_translator") - - +-- 自动大写英文词汇 +autocap_filter = require("autocap_filter") -- 默认未启用: diff --git a/rime_ice.schema.yaml b/rime_ice.schema.yaml index 8a5496e..e79a2e7 100644 --- a/rime_ice.schema.yaml +++ b/rime_ice.schema.yaml @@ -69,10 +69,11 @@ engine: filters: - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 - - uniquifier # 去重 + - lua_filter@autocap_filter # 英文自动大写 - lua_filter@v_filter # v 模式 symbols 优先(否则是英文优先) - lua_filter@reduce_english_filter # 降低部分英语单词在候选项的位置 - lua_filter@long_word_filter # 长词优先 + - uniquifier # 去重 # Lua 配置: 日期、时间、星期、ISO 8601、时间戳的触发关键字