diff --git a/default.yaml b/default.yaml index 92c38b7..13624ce 100644 --- a/default.yaml +++ b/default.yaml @@ -168,6 +168,9 @@ key_binder: # Lua 配置: 以词定字(上屏当前词句的第一个或最后一个字) select_first_character: select_last_character: "grave" + # Lua 配置: 词条隐藏、降频 + # turn_down_cand: "Control+j" # 匹配当前输入码后隐藏指定的候选字词 或候选词条放到第四候选位置 + # drop_cand: "Control+d" # 强制删词, 无视输入的编码 bindings: # Tab / Shift+Tab 切换光标至下/上一个拼音 diff --git a/double_pinyin.schema.yaml b/double_pinyin.schema.yaml index 5834835..45e1da3 100644 --- a/double_pinyin.schema.yaml +++ b/double_pinyin.schema.yaml @@ -51,7 +51,8 @@ switches: # 输入引擎 engine: processors: - - lua_processor@*select_character # 以词定字 + - lua_processor@*select_character # 以词定字 + # - lua_processor@*cold_word_drop.processor # 词条隐藏、降频 - ascii_composer - recognizer - key_binder @@ -78,7 +79,8 @@ engine: - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 # - lua_filter@*is_in_user_dict # 为用户词典中(输入过)的内容结尾加上一个星号 * - - lua_filter@*reduce_english_filter # 降低部分英语单词在候选项的位置 + - lua_filter@*cold_word_drop.filter # 强制删词, 词条降频(选中高亮的词条放到第四位) + - lua_filter@*reduce_english_filter # 词条隐藏、降频 - uniquifier # 去重 diff --git a/double_pinyin_flypy.schema.yaml b/double_pinyin_flypy.schema.yaml index 43120a3..9c688f1 100644 --- a/double_pinyin_flypy.schema.yaml +++ b/double_pinyin_flypy.schema.yaml @@ -51,7 +51,8 @@ switches: # 输入引擎 engine: processors: - - lua_processor@*select_character # 以词定字 + - lua_processor@*select_character # 以词定字 + # - lua_processor@*cold_word_drop.processor # 词条隐藏、降频 - ascii_composer - recognizer - key_binder @@ -78,6 +79,7 @@ engine: - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 # - lua_filter@*is_in_user_dict # 为用户词典中(输入过)的内容结尾加上一个星号 * + # - lua_filter@*cold_word_drop.filter # 词条隐藏、降频 - lua_filter@*reduce_english_filter # 降低部分英语单词在候选项的位置 - uniquifier # 去重 diff --git a/double_pinyin_mspy.schema.yaml b/double_pinyin_mspy.schema.yaml index 4ae53e5..e0fc872 100644 --- a/double_pinyin_mspy.schema.yaml +++ b/double_pinyin_mspy.schema.yaml @@ -51,7 +51,8 @@ switches: # 输入引擎 engine: processors: - - lua_processor@*select_character # 以词定字 + - lua_processor@*select_character # 以词定字 + # - lua_processor@*cold_word_drop.processor # 词条隐藏、降频 - ascii_composer - recognizer - key_binder @@ -78,6 +79,7 @@ engine: - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 # - lua_filter@*is_in_user_dict # 为用户词典中(输入过)的内容结尾加上一个星号 * + # - lua_filter@*cold_word_drop.filter # 词条隐藏、降频 - lua_filter@*reduce_english_filter # 降低部分英语单词在候选项的位置 - uniquifier # 去重 diff --git a/double_pinyin_ziguang.schema.yaml b/double_pinyin_ziguang.schema.yaml index 3a70080..ae932a6 100644 --- a/double_pinyin_ziguang.schema.yaml +++ b/double_pinyin_ziguang.schema.yaml @@ -51,7 +51,8 @@ switches: # 输入引擎 engine: processors: - - lua_processor@*select_character # 以词定字 + - lua_processor@*select_character # 以词定字 + # - lua_processor@*cold_word_drop.processor # 词条隐藏、降频 - ascii_composer - recognizer - key_binder @@ -78,7 +79,8 @@ engine: - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 # - lua_filter@*is_in_user_dict # 为用户词典中(输入过)的内容结尾加上一个星号 * - - lua_filter@*reduce_english_filter # 降低部分英语单词在候选项的位置 + - lua_filter@*cold_word_drop.filter # 强制删词, 词条降频(选中高亮的词条放到第四位) + - lua_filter@*reduce_english_filter # 词条隐藏、降频 - uniquifier # 去重 diff --git a/lua/cold_word_drop/debugtool.lua b/lua/cold_word_drop/debugtool.lua new file mode 100644 index 0000000..354d216 --- /dev/null +++ b/lua/cold_word_drop/debugtool.lua @@ -0,0 +1,80 @@ +#! /usr/bin/env lua +-- +-- debugtool.lua +-- Copyright (C) 2021 Shewer Lu +-- +-- Distributed under terms of the MIT license. +-- +-- puts(tag,...) +-- DEBUG --> log.error +-- WARN --> log.warning +-- INFO --> log.info +-- CONSOLE --> print +-- +-- ex: +-- test.lua +-- +-- local puts = require 'tools/debugtool' +-- --set tag D103 C102 +-- local D103= DEBUG .. "103" +-- local C102= CONSOLE .. "102" +-- local C103= nil +-- +-- +-- puts(ERROR,__FILE__(),__LINE__(),__FUNC__(), 1, 2 , 3 ) +-- --> log.error( "error" .. tran_msg(...)) +-- +-- puts(DEBUG,__FILE__(),__LINE__(),__FUNC__(), 1, 2 , 3 ) +-- --> log.error( DEBUG .. tran_msg(...)) +-- +-- puts(D103,__FILE__(),__LINE__(),__FUNC__(), 1 2 3) +-- --> log.error("trace103" .. tran_msg(...) +-- +-- puts(C102,__FILE__(),__LINE__(),__FUNC__(), 1 2 3) +-- --> print("console103" .. tran_msg(...) +-- +-- puts(C103,__FILE__(),__LINE__(),__FUNC__(), 1 2 3) +-- --> pass +-- +-- +-- +-- puts(DEBUG,__FILE__(),__LINE__(),__FUNC__() , ...) +-- puts(INFO,__FILE__(),__LINE__(),__FUNC__() , ...) +-- +-- global variable +function __FILE__(n) n=n or 2 return debug.getinfo(n,'S').soruce end +function __LINE__(n) n=n or 2 return debug.getinfo(n, 'l').currentline end +function __FUNC__(n) n=n or 2 return debug.getinfo(n, 'n').name end +INFO="log" +WARN="warn" +ERROR="error" +DEBUG="trace" +CONSOLE="console" + + + + +local function tran_msg(...) + local msg="\t" + for i,k in next, {...} do msg = msg .. ": " .. tostring(k) end + return msg +end +local function puts( tag , ...) + if type(tag) ~= "string" then return end + + if INFO and tag:match("^" .. INFO) then + (log and log.info or print)( tag .. tran_msg(...)) + elseif WARN and tag:match("^" .. WARN) then + (log and log.warning or print)(tag .. tran_msg(...)) + elseif ERROR and tag:match("^" .. ERROR) then + (log and log.error or print)(tag .. tran_msg(...)) + elseif DEBUG and tag:match("^" .. DEBUG) then + (log and log.error or print)(tag .. tran_msg(...)) + elseif CONSOLE and tag:match( "^" .. CONSOLE ) then + ( print)( tag .. tran_msg(...)) + else + return + end +end + +return puts diff --git a/lua/cold_word_drop/drop_words.lua b/lua/cold_word_drop/drop_words.lua new file mode 100644 index 0000000..f95740c --- /dev/null +++ b/lua/cold_word_drop/drop_words.lua @@ -0,0 +1,4 @@ +local drop_words = +{ "示~例~", +} +return drop_words \ No newline at end of file diff --git a/lua/cold_word_drop/filter.lua b/lua/cold_word_drop/filter.lua new file mode 100644 index 0000000..2d2f4a6 --- /dev/null +++ b/lua/cold_word_drop/filter.lua @@ -0,0 +1,55 @@ + +local drop_list = require("cold_word_drop.drop_words") +local hide_list = require("cold_word_drop.hide_words") +local turndown_freq_list = require("cold_word_drop.turndown_freq_words") + +local function filter(input, env) + local idx = 3 -- 降频的词条放到第三个后面, 即第四位, 可在 yaml 里配置 + local i = 1 + local cands = {} + local context = env.engine.context + local preedit_code = context.input + + for cand in input:iter() do + local cpreedit_code = string.gsub(cand.preedit, ' ', '') + if (i <= idx) then + local tfl = turndown_freq_list[cand.text] or nil + -- 前三个 候选项排除 要调整词频的词条, 要删的(实际假性删词, 彻底隐藏罢了) 和要隐藏的词条 + if not + ((tfl and table.find_index(tfl, cpreedit_code)) or + table.find_index(drop_list, cand.text) or + (hide_list[cand.text] and table.find_index(hide_list[cand.text], cpreedit_code)) + ) + then + i = i + 1 + ---@diagnostic disable-next-line: undefined-global + yield(cand) + else + table.insert(cands, cand) + end + else + table.insert(cands, cand) + end + if (#cands > 50) then + break + end + end + for _, cand in ipairs(cands) do + local cpreedit_code = string.gsub(cand.preedit, ' ', '') + if not + -- 要删的 和要隐藏的词条不显示 + ( + table.find_index(drop_list, cand.text) or + (hide_list[cand.text] and table.find_index(hide_list[cand.text], cpreedit_code)) + ) + then + ---@diagnostic disable-next-line: undefined-global + yield(cand) + end + end + for cand in input:iter() do + yield(cand) + end +end + +return filter diff --git a/lua/cold_word_drop/hide_words.lua b/lua/cold_word_drop/hide_words.lua new file mode 100644 index 0000000..690c00d --- /dev/null +++ b/lua/cold_word_drop/hide_words.lua @@ -0,0 +1,4 @@ +local hide_words = +{ ["示~例~"] = { "shil", "shili", }, +} +return hide_words \ No newline at end of file diff --git a/lua/cold_word_drop/metatable.lua b/lua/cold_word_drop/metatable.lua new file mode 100644 index 0000000..ffb13ed --- /dev/null +++ b/lua/cold_word_drop/metatable.lua @@ -0,0 +1,163 @@ +-- create metatable +orgtype = type + +function type(obj) + local _type = orgtype(obj) + if "table" == _type and obj._cname then + return obj._cname + end + return _type +end + +function metatable(...) + if ... and type(...) == "table" then + return setmetatable(..., { __index = table }) + else + return setmetatable({ ... }, { __index = table }) + end +end + +-- chech metatble +function metatable_chk(tab) + if "table" == type(tab) + then + return (tab.each and tab) or metatable(tab) + else + return tab + end +end + +table.eachi = function (tab, func) + for i = 1, #tab do + func(tab[i], i) + end + return tab +end +table.eacha = function (tab, func) + for i, v in ipairs(tab) do + func(v, i) + end + return tab +end +table.each = function (tab, func) + for k, v in pairs(tab) do + func(v, k) + end + return tab +end +table.find_index = function (tab, elm, ...) + local _, i = table.find(tab, elm, ...) + return i +end +table.find = function (tab, elm, func) + for i, v in ipairs(tab) do + if elm == v then + return v, i + end + end +end + +table.find_with_func = function (tab, elm, ...) + local i, v = table.find(tab, elm) +end +table.delete = function (tab, elm, ...) + local index = table.find_index(tab, elm) + return index and table.remove(tab, index) +end + +table.find_all = function (tab, elm, ...) + local tmptab = setmetatable({}, { __index = table }) + local _func = (type(elm) == "function" and elm) or function (v, k, ...) return v == elm end + for k, v in pairs(tab) do + if _func(v, k, ...) then + tmptab:insert(v) + end + end + return tmptab +end +table.select = table.find_all + +table.reduce = function (tab, func, arg) + local new, old = arg, arg + for i, v in ipairs(tab) do + new, old = func(v, new) + end + return new, arg +end + +table.map = function (tab, func) + local newtab = setmetatable({}, { __index = table }) + func = func or function (v, i) return v, i end + for i, v in ipairs(tab) do + newtab[i] = func(v, i) + end + return newtab +end +table.map_hash = function (tab, func) -- table to list of array { key, v} + local newtab = setmetatable({}, { __index = table }) + func = func or function (k, v) return { k, v } end + for k, v in pairs(tab) do + newtab:insert(func(k, v)) + end + return newtab +end +function table:push(elm) + self:insert(elm) +end + +table.append = table.push +function table:pop() + return self:remove(#self) +end + +function table:shift() + self:remove(1) +end + +function table:unshift(elm) + self:insert(1, elm) +end + +function table.len(t) + local leng = 0 + for k, v in pairs(t) do + leng = leng + 1 + end + return leng; +end + +-- table to string 序列化 +function table.serialize(obj) + local serialize_str = "" + local t = type(obj) + if t == "number" then + serialize_str = serialize_str .. obj + elseif t == "boolean" then + serialize_str = serialize_str .. tostring(obj) + elseif t == "string" then + serialize_str = serialize_str .. string.format("%q", obj) + elseif t == "table" then + serialize_str = serialize_str .. "{ " + local record_sep = #obj < 4 and ", " or ",\n" + local record_prefix = #obj < 4 and "" or "\t" + for k, v in pairs(obj) do + if type(k) == "number" then + serialize_str = serialize_str .. record_prefix .. '"' .. v .. '"' .. record_sep + else + serialize_str = serialize_str .. "\t[" .. table.serialize(k) .. "] = " .. table.serialize(v) .. ",\n" + end + end + -- local metatable = getmetatable(obj) + -- if metatable ~= nil and type(metatable.__index) == "table" then + -- for k, v in pairs(metatable.__index) do + -- serialize_str = serialize_str .. "[" .. table.serialize(k) .. "]=" .. table.serialize(v) .. ",\n" + -- end + -- end + serialize_str = serialize_str .. "}" + elseif t == "nil" then + return nil + else + error("can not serialize a " .. t .. " type.") + end + return serialize_str +end diff --git a/lua/cold_word_drop/processor.lua b/lua/cold_word_drop/processor.lua new file mode 100644 index 0000000..b8787d5 --- /dev/null +++ b/lua/cold_word_drop/processor.lua @@ -0,0 +1,146 @@ + +require('cold_word_drop.string') +require("cold_word_drop.metatable") +-- local puts = require("tools/debugtool") +local drop_list = require("cold_word_drop.drop_words") +local hide_list = require("cold_word_drop.hide_words") +local turndown_freq_list = require("cold_word_drop.turndown_freq_words") +local tbls = { + ['drop_list'] = drop_list, + ['hide_list'] = hide_list, + ['turndown_freq_list'] = turndown_freq_list +} +-- local cold_word_drop = {} + + +local function get_record_filername(record_type) + local system = io.popen("uname -s"):read("*l") + local filename = nil + -- body + if system == "Darwin" then + filename = string.format("%s/Library/Rime/lua/cold_word_drop/%s_words.lua", os.getenv('HOME'), record_type) + elseif system == "Linux" then + filename = string.format("%s/.config/ibus/rime/lua/cold_word_drop/%s_words.lua", os.getenv('HOME'), record_type) + else + filename = string.format("%%APPDATA%%\\Rime\\lua\\cold_word_drop\\%s_words.lua", record_type) + end + return filename +end + +local function write_word_to_file(record_type) + -- local filename = string.format("%s/Library/Rime/lua/cold_word_drop/%s_words.lua", os.getenv('HOME'), record_type) + local filename = get_record_filername(record_type) + local record_header = string.format("local %s_words =\n", record_type) + local record_tailer = string.format("\nreturn %s_words", record_type) + local fd = assert(io.open(filename, "w")) --打开 + fd:setvbuf("line") + fd:write(record_header) --写入文件头部 + -- df:flush() --刷新 + local x = string.format("%s_list", record_type) + local record = table.serialize(tbls[x]) -- lua 的 table 对象 序列化为字符串 + fd:write(record) --写入 序列化的字符串 + fd:write(record_tailer) --写入文件尾部, 结束记录 + fd:close() --关闭 +end + +local function check_encode_matched(cand_code, word, input_code_tbl, reversedb) + if #cand_code < 1 and utf8.len(word) > 1 then -- 二字词以上的词条反查, 需要逐个字去反查 + local word_cand_code = string.split(word, "") + for i, v in ipairs(word_cand_code) do + -- 如有 `[` 引导的辅助码情况, 去掉引导符及之后的所有形码字符 + local char_code = string.gsub(reversedb:lookup(v), '%[%l%l', '') + local _char_preedit_code = input_code_tbl[i] or " " + -- 如有 `[` 引导的辅助码情况, 同上, 去掉之 + local char_preedit_code = string.gsub(_char_preedit_code, '%[%l+', '') + if not string.match(char_code, char_preedit_code) then + -- 输入编码串和词条反查结果不匹配(考虑到多音字, 开启了模糊音, 纠错音), 返回false, 表示隐藏这个词条 + return false + end + end + end + -- 输入编码串和词条反查结果匹配, 返回true, 表示对这个词条降频 + return true +end + +local function append_word_to_droplist(ctx, action_type, reversedb) + local word = ctx.word + local input_code = ctx.code + if action_type == 'drop' then + table.insert(drop_list, word) -- 高亮选中的词条插入到 drop_list + return true + end + local input_code_tbl = string.split(input_code, " ") + local cand_code = reversedb:lookup(word) or "" -- 反查候选项文字编码 + -- 二字词 的匹配检查, 匹配返回true, 不匹配返回false + local match_result = check_encode_matched(cand_code, word, input_code_tbl, reversedb) + local ccand_code = string.gsub(cand_code, '%[%l%l', '') + -- 如有 `[` 引导的辅助码情况, 去掉引导符及之后的所有形码字符 + local input_str = string.gsub(input_code, '%[%l+', '') + local input_code_str = table.concat(input_code_tbl, '') + -- 单字和二字词 的匹配检查, 如果匹配, 降频 + if string.match(ccand_code, input_str) or match_result then + if turndown_freq_list[word] then + table.insert(turndown_freq_list[word], input_code_str) + else + turndown_freq_list[word] = { input_code_str } + end + return 'turndown_freq' + end + + -- 单字和二字词 如果不匹配 就隐藏 + if not hide_list[word] then + hide_list[word] = { input_code_str } + return true + else + -- 隐藏的词条如果已经在 hide_list 中, 则将输入串追加到 值表中, 如: ['藏'] = {'chang', 'zhang'} + if not table.find_index(hide_list[word], input_code_str) then + table.insert(hide_list[word], input_code_str) + return true + else + return false + end + end +end + +local function processor(key, env) + local engine = env.engine + local config = engine.schema.config + local context = engine.context + -- local top_cand_text = context:get_commit_text() + -- local preedit_code = context.input + local preedit_code = context:get_script_text() + local turndown_cand_key = config:get_string("key_binder/turn_down_cand") or "Control+j" + local drop_cand_key = config:get_string("key_binder/drop_cand") or "Control+d" + local action_map = { + [turndown_cand_key] = 'hide', + [drop_cand_key] = 'drop' + } + + -- local schema_id = config:get_string("schema/schema_id") + local schema_id = config:get_string("translator/dictionary") -- 多方案共用字典取主方案名称 + ---@diagnostic disable-next-line: undefined-global + local reversedb = ReverseLookup(schema_id) + if key:repr() == turndown_cand_key or key:repr() == drop_cand_key then + local cand = context:get_selected_candidate() + local action_type = action_map[key:repr()] + local ctx_map = { + ['word'] = cand.text, + ['code'] = preedit_code + } + local res = append_word_to_droplist(ctx_map, action_type, reversedb) + + context:refresh_non_confirmed_composition() -- 刷新当前输入法候选菜单, 实现看到实时效果 + if type(res) == "boolean" then + -- 期望被删的词和隐藏的词条写入文件(drop_words.lua, hide_words.lua) + write_word_to_file(action_type) + else + -- 期望 要调整词频的词条写入 turndown_freq_words.lua 文件 + write_word_to_file(res) + end + return 1 -- kAccept + end + + return 2 -- kNoop, 不做任何操作, 交给下个组件处理 +end + +return processor \ No newline at end of file diff --git a/lua/cold_word_drop/string.lua b/lua/cold_word_drop/string.lua new file mode 100644 index 0000000..8dd0033 --- /dev/null +++ b/lua/cold_word_drop/string.lua @@ -0,0 +1,40 @@ +-- wrap utf8.sub(str,head_index, tail_index) +-- wrap string.split(str,sp,sp1) +-- string.utf8_len = utf8.len +-- string.utf8_offset= utf8.offset +-- string.utf8_sub= utf8.sub +function string.split( str, sp,sp1) + sp =type(sp) == "string" and sp or " " + if #sp == 0 then + sp= "([%z\1-\127\194-\244][\128-\191]*)" + elseif #sp == 1 then + sp= "[^" .. (sp=="%" and "%%" or sp) .. "]*" + else + sp1= sp1 or "^" + str=str:gsub(sp,sp1) + sp= "[^".. sp1 .. "]*" + end + + local tab= {} + for v in str:gmatch(sp) do + table.insert(tab,v) + end + return tab +end + +function utf8.gsub(str,si,ei) + local function index(ustr,i) + return i>=0 and ( ustr:utf8_offset(i) or ustr:len() +1 ) + or ( ustr:utf8_offset(i) or 1 ) + end + + local u_si= index(str,si) + ei = ei or str:utf8_len() + ei = ei >=0 and ei +1 or ei + local u_ei= index(str, ei ) -1 + return str:sub(u_si,u_ei) +end +string.utf8_len= utf8.len +string.utf8_offset=utf8.offset +string.utf8_sub= utf8.gsub +return true diff --git a/lua/cold_word_drop/turndown_freq_words.lua b/lua/cold_word_drop/turndown_freq_words.lua new file mode 100644 index 0000000..f1acd96 --- /dev/null +++ b/lua/cold_word_drop/turndown_freq_words.lua @@ -0,0 +1,4 @@ +local turndown_freq_words = +{ ["示~例~"] = { "shili", }, +} +return turndown_freq_words \ No newline at end of file diff --git a/rime_ice.schema.yaml b/rime_ice.schema.yaml index 616c42d..7c33200 100644 --- a/rime_ice.schema.yaml +++ b/rime_ice.schema.yaml @@ -42,7 +42,8 @@ switches: # 输入引擎 engine: processors: - - lua_processor@*select_character # 以词定字 + - lua_processor@*select_character # 以词定字 + # - lua_processor@*cold_word_drop.processor # 词条隐藏、降频 - ascii_composer - recognizer - key_binder @@ -71,6 +72,7 @@ engine: # - lua_filter@*is_in_user_dict # 为用户词典中(输入过)的内容结尾加上一个星号 * - lua_filter@*v_filter # v 模式 symbols 优先(否则是英文优先) - lua_filter@*reduce_english_filter # 降低部分英语单词在候选项的位置 + # - lua_filter@*cold_word_drop.filter # 词条隐藏、降频 - lua_filter@*long_word_filter # 长词优先 - uniquifier # 去重