rime-ice/lua/search.lua

248 lines
8.0 KiB
Lua
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- Copyright (C) Mirtle <mirtle.cn@outlook.com>
-- [CC BY 3.0 DEED](https://creativecommons.org/licenses/by/3.0/deed)
-- 使用说明:<https://github.com/mirtlecn/rime-radical-pinyin/blob/master/search.lua.md>
-- 感谢 [AuxFilter](https://github.com/HowcanoeWang/rime-lua-aux-code/blob/main/lua/aux_code.lua) 提供参考
local function alt_lua_punc(s)
if s then
return s:gsub("([%.%+%-%*%?%[%]%^%$%(%)%%])", "%%%1")
else
return ''
end
end
local f = {}
-- 逻辑
-- 当在 engine 出直接指定了 namespace 则使用该 namespace 进行 schema 匹配
-- 当在 search_in_cand 节点下指定了 schema 和 db 则进行相应匹配
-- 当该节点下 schema 为 0 或者 false或者不存在时不进行相应匹配
function f.init(env)
local config = env.engine.schema.config
local ns = 'search'
-- f.mem_main = Memory(env.engine, env.engine.schema)
-- local rules = config:get_list('preedit_rules')
-- if rules then
-- f.projection = Projection()
-- f.projection:load(rules)
-- end
f.schema = config:get_string(ns .. '/schema')
if f.schema == 'false' or f.schema == '0' then
goto checkdb
end
if not env.name_space:find('^%*') then
f.schema = env.name_space
end
if f.schema then
f.mem = Memory(env.engine, Schema(f.schema))
end
f.schema_search_limit = config:get_int(ns .. "/schema_search_limit") or 1000
::checkdb::
f.db = config:get_list(ns .. '/db')
f.if_schema_lookup = false
f.if_reverse_lookup = false
if f.schema and f.mem then
f.if_schema_lookup = true
-- log.error('if_schema_lookup: ' .. 'true')
end
if f.db then
f.wildcard = config:get_string(ns .. "/wildcard") or "'"
f.if_reverse_lookup = true
-- log.error('if_reverse_lookup: ' .. 'true')
end
f.sort = config:get_bool(ns .. "/show_other_cands")
-- 反引号作为查找的引导符号,需要加入 speller 的字母表当中
f.search_key = config:get_string("key_binder/search") or config:get_string(ns .. "/key") or '`'
-- 处理一下输入码,如果还有没有上屏的词,保留辅助码,否则,清除上屏码
f.search_key_string = alt_lua_punc(f.search_key)
-- 如果不使用任何反查手段,则不接管选词逻辑
if not f.if_reverse_lookup and not f.if_schema_lookup then
return
end
-- 接管选词逻辑,是词组则始终保留引导码,否则直接上屏
env.notifier = env.engine.context.select_notifier:connect(function(ctx)
if not ctx.input:find("^[a-z;]+" .. f.search_key_string) then
return
end
local preedit = ctx:get_preedit()
local no_search_string = ctx.input:match("^(.-)" .. f.search_key_string)
-- log.warning('[no_search_string]: '..no_search_string)
local edit = preedit.text:match('^(.-)' .. f.search_key_string)
-- log.warning('[edit]: ' .. edit)
ctx.input = no_search_string
if edit and edit:match('[a-z;]') then
ctx.input = ctx.input .. f.search_key
else
ctx:commit()
-- local t = f.entry()
-- log.warning(edit .. '|' .. no_search_string)
-- 手动推入历史记录
-- ctx.commit_history:push("user_phrase", edit)
-- 手动写入用户词库
-- f.update_dict_entry(edit, no_search_string)
end
end)
end
-- function f.update_dict_entry(s, code)
-- local codeLen = #code
-- if s == '' or (#code % 2 ~= 0) then
-- log.warning('Ignored!' .. s)
-- return 0
-- end
-- local e = DictEntry()
-- e.text = s
-- local custom_code = {}
-- for i = 1, #code, 2 do
-- local s = code:sub(i, i + 1)
-- local c = f.projection:apply(s, true)
-- table.insert(custom_code, c)
-- end
-- e.custom_code = table.concat(custom_code, " ") .. ' '
-- log.info("[search.lua]: " .. e.text .. ' ' .. e.custom_code)
-- f.mem_main:update_userdict(e, 1, "")
-- end
-- 查询反查词典当中的匹配项,并且返回字表
function f.dict_init(search_string)
local dict_table = {}
if f.mem:dict_lookup(search_string, true, f.schema_search_limit) then
for entry in f.mem:iter_dict() do
-- log.error('text: ' .. entry.text .. ' code: ' .. entry.comment)
-- table.insert(dict_table, entry.text)
dict_table[entry.text] = true
-- dict_table[entry.text] = entry.comment
end
end
return dict_table
end
-- 通过 schema 的方式查询(以码查字,然后轮询匹配,非常慢,但能够匹配到算法转换过的码)
function f.dict_match(table, text)
-- for i, dict in ipairs(table) do
-- if text == dict then
-- return true
-- end
-- end
if table[text] == true then
return true
end
return false
end
-- 通过 reverse db 查询(以字查码,然后比对辅码是否相同,比校快,但只能匹配未经算法转换的码)
function f.reverse_lookup(text, s)
local list = f.db
s = s:gsub(f.wildcard, '.*')
-- log.error(s)
for i = 0, list.size - 1 do
local code = ReverseLookup(list:get_value_at(i).value):lookup(text)
if code:find(' ' .. s) or code:find('^' .. s) then
return true
end
end
return false
end
function f.func(input, env)
local input_code = env.engine.context.input
-- 当且仅当当输入码中含有辅码引导符号,并有有辅码存在,进入匹配逻辑
-- 当无任何查询方式存在,直接上屏
if (input_code:find("^[a-z;]+" .. f.search_key_string .. '.+$')) and (f.if_reverse_lookup or f.if_schema_lookup) then
f.search_string = input_code:match("^.*" .. f.search_key_string .. "(.*)$")
else
for cand in input:iter() do
yield(cand)
end
return
end
-- 查字时是否单字优先
local if_single_char_first = env.engine.context:get_option("search_single_char")
local dict_table
if f.if_schema_lookup then
dict_table = f.dict_init(f.search_string)
end
local other_cand = {}
local long_word_cands = {}
for cand in input:iter() do
local type = cand.type -- 类型
local text = cand.text -- 候选文字
local comment = cand.comment
-- if utf8.len(text) > 1 and if_single_char_first then
-- table.insert(other_cand_last, cand)
-- goto skip
-- end
-- 处理经过 simplify 转化过的候选,使之能够正确匹配
if cand:get_dynamic_type() == "Shadow" then
local originalCand = cand:get_genuine()
cand = ShadowCandidate(originalCand, originalCand.type, cand.text, cand.comment)
type = cand.type
text = cand.text
end
-- 只有 script_translator 下的用户词和词才去匹配
if (type == 'phrase' or type == 'user_phrase') then
-- 当候选多于一个汉字,则取第一个匹配
if utf8.len(text) > 1 then
text = text:sub(1, utf8.offset(text, 2) - 1)
end
else
table.insert(other_cand, cand)
goto skip
end
-- 匹配逻辑
if (f.if_reverse_lookup and f.reverse_lookup(text, f.search_string)) or
(f.if_schema_lookup and f.dict_match(dict_table, text)) then
if if_single_char_first and utf8.len(cand.text) > 1 then
table.insert(long_word_cands, cand)
else
yield(cand)
end
else
table.insert(other_cand, cand)
end
::skip::
end
-- 上屏其余的候选
for i, cand in ipairs(long_word_cands) do
yield(cand)
end
if f.sort then
for i, cand in ipairs(other_cand) do
yield(cand)
end
end
end
function f.fini(env)
if not f.if_reverse_lookup and not f.if_schema_lookup then
return
end
env.notifier:disconnect()
-- env.commit_notifier:disconnect()
end
return f