2024-05-08 05:24:18 +02:00
|
|
|
|
-- Copyright (C) [Mirtle](https://github.com/mirtlecn)
|
|
|
|
|
-- License: CC BY-SA 4.0 (https://creativecommons.org/licenses/by-sa/4.0/)
|
2024-02-04 16:27:50 +01:00
|
|
|
|
-- 使用说明:<https://github.com/mirtlecn/rime-radical-pinyin/blob/master/search.lua.md>
|
2024-05-08 05:24:18 +02:00
|
|
|
|
-- 处理 lua 中的特殊字符用于匹配
|
|
|
|
|
local function alt_lua_punc( s )
|
2024-01-29 11:51:45 +01:00
|
|
|
|
if s then
|
2024-05-08 05:24:18 +02:00
|
|
|
|
return s:gsub( '([%.%+%-%*%?%[%]%^%$%(%)%%])', '%%%1' )
|
2024-01-29 11:51:45 +01:00
|
|
|
|
else
|
|
|
|
|
return ''
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2024-05-08 05:24:18 +02:00
|
|
|
|
-- 获取指定字符在文本中的位置
|
|
|
|
|
local function get_pos( text, char )
|
|
|
|
|
local pos = {}
|
|
|
|
|
if text:find( char ) then
|
|
|
|
|
local tmp = text
|
|
|
|
|
for i = 1, utf8.len( tmp ) do
|
|
|
|
|
local first_char = tmp:sub( 1, utf8.offset( tmp, 2 ) - 1 )
|
|
|
|
|
if first_char == char then pos[i] = true end
|
|
|
|
|
tmp = tmp:gsub( '^' .. first_char, '' )
|
|
|
|
|
i = i + 1
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
return pos
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
-- 此函数用于手动写入用户词库,目前仅对双拼有效
|
|
|
|
|
local function update_dict_entry( s, code, mem, proj )
|
|
|
|
|
if #s == 0 or utf8.len( s ) == #s or (#code % 2 ~= 0) then
|
|
|
|
|
log.info( '[search.lua]: Ignored' .. s )
|
|
|
|
|
return 0
|
|
|
|
|
end
|
|
|
|
|
local e = DictEntry()
|
|
|
|
|
e.text = s
|
|
|
|
|
|
|
|
|
|
local pos = {}
|
|
|
|
|
if s:find( '·' ) and (utf8.len( s ) > 1) then pos = get_pos( s, '·' ) end
|
|
|
|
|
|
|
|
|
|
local custom_code = {}
|
|
|
|
|
local loop = 1
|
|
|
|
|
for i = 1, #code, 2 do
|
|
|
|
|
local code_convert = proj:apply( code:sub( i, i + 1 ), true )
|
|
|
|
|
if code_convert == 'dian' and pos[loop] then
|
|
|
|
|
-- Ignored
|
|
|
|
|
else
|
|
|
|
|
table.insert( custom_code, code_convert )
|
|
|
|
|
end
|
|
|
|
|
loop = loop + 1
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
e.custom_code = table.concat( custom_code, ' ' ) .. ' '
|
|
|
|
|
if mem.start_session then mem:start_session() end -- new on librime 2024.05
|
|
|
|
|
mem:update_userdict( e, 1, '' )
|
|
|
|
|
if mem.finish_session then mem:finish_session() end -- new on librime 2024.05
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
-- 通过 schema 的方式查询(以辅码查字,然后对比候选,慢,但能够匹配到算法转换过的码)
|
|
|
|
|
-- 查询方案中的匹配项,并返回字表
|
|
|
|
|
local function dict_init( search_string, mem, search_limit, code_projection )
|
|
|
|
|
local dict_table = {}
|
|
|
|
|
if code_projection then
|
|
|
|
|
-- old librime(<= 2023.06) do not return original string when apply failed
|
|
|
|
|
local p = code_projection:apply( search_string, true )
|
|
|
|
|
if p and #p > 0 then search_string = p end
|
|
|
|
|
end
|
|
|
|
|
if mem:dict_lookup( search_string, true, search_limit ) then
|
|
|
|
|
for entry in mem:iter_dict() do dict_table[entry.text] = true end
|
|
|
|
|
end
|
|
|
|
|
return dict_table
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
-- 匹配候选
|
|
|
|
|
local function dict_match( table, text )
|
|
|
|
|
if table[text] == true then return true end
|
|
|
|
|
return false
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
-- 通过 reverse db 查询(以字查码,然后比对辅码是否相同,快,但只能匹配未经算法转换的码)
|
|
|
|
|
local function reverse_lookup( code_projection, db_table, wildcard, text, s, global_match )
|
|
|
|
|
if wildcard then s = s:gsub( wildcard, '.*' ) end
|
|
|
|
|
if code_projection then
|
|
|
|
|
-- old librime do not return original string when apply failed
|
|
|
|
|
local p = code_projection:apply( s, true )
|
|
|
|
|
if p and #p > 0 then s = p end
|
|
|
|
|
end
|
|
|
|
|
-- log.error(s)
|
|
|
|
|
for _, db in ipairs( db_table ) do
|
|
|
|
|
local code = db:lookup( text )
|
|
|
|
|
for part in code:gmatch( '%S+' ) do
|
|
|
|
|
if global_match then
|
|
|
|
|
if part:find( s ) then return true end
|
|
|
|
|
else
|
|
|
|
|
if part:find( ' ' .. s ) or part:find( '^' .. s ) then return true end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
return false
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
-- 处理长词优先
|
|
|
|
|
local function handle_long_cand(if_single_char_first, cand, long_word_cands)
|
|
|
|
|
if if_single_char_first and utf8.len( cand.text ) > 1 then
|
|
|
|
|
table.insert( long_word_cands, cand )
|
|
|
|
|
else
|
|
|
|
|
yield( cand )
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2024-01-29 11:51:45 +01:00
|
|
|
|
local f = {}
|
|
|
|
|
|
2024-05-08 05:24:18 +02:00
|
|
|
|
function f.init( env )
|
2024-01-29 11:51:45 +01:00
|
|
|
|
local config = env.engine.schema.config
|
|
|
|
|
local ns = 'search'
|
2024-05-08 05:24:18 +02:00
|
|
|
|
env.if_schema_lookup = false
|
|
|
|
|
env.if_reverse_lookup = false
|
|
|
|
|
|
|
|
|
|
-- 配置:仅限 script_translator 引擎
|
|
|
|
|
local engine = config:get_list('engine/translators')
|
|
|
|
|
local engine_table = {}
|
|
|
|
|
for i = 0, engine.size - 1 do engine_table[engine:get_value_at( i ).value] = true end
|
|
|
|
|
if not engine_table['script_translator'] then
|
|
|
|
|
log.error( '[search.lua]: script_translator not found in engine/translators, search.lua will not work' )
|
|
|
|
|
return
|
|
|
|
|
end
|
2024-01-29 11:51:45 +01:00
|
|
|
|
|
2024-02-27 15:15:12 +01:00
|
|
|
|
-- 配置:辅码查字方法
|
|
|
|
|
-- --
|
|
|
|
|
-- 当在 engine 出直接指定了 namespace 则使用该 namespace 进行 schema 匹配
|
|
|
|
|
-- 当在 search_in_cand 节点下指定了 schema 和 db 则进行相应匹配
|
|
|
|
|
-- 当该节点下 schema 为 0 或者 false,或者不存在时,不进行相应匹配
|
|
|
|
|
-- --
|
2024-05-08 05:24:18 +02:00
|
|
|
|
local schema_name = config:get_string( ns .. '/schema' )
|
|
|
|
|
if not env.name_space:find( '^%*' ) then schema_name = env.name_space end
|
|
|
|
|
if not schema_name or schema_name == 'false' or schema_name == '0' or #schema_name == 0 then goto checkdb end
|
|
|
|
|
env.search = Memory( env.engine, Schema( schema_name ) )
|
|
|
|
|
if schema_name and env.search then
|
|
|
|
|
env.if_schema_lookup = true
|
|
|
|
|
env.search_limit = config:get_int( ns .. '/schema_search_limit' ) or 1000
|
2024-01-29 11:51:45 +01:00
|
|
|
|
end
|
2024-02-27 15:15:12 +01:00
|
|
|
|
|
|
|
|
|
::checkdb::
|
|
|
|
|
|
2024-05-08 05:24:18 +02:00
|
|
|
|
local db = config:get_list( ns .. '/db' )
|
2024-02-27 15:15:12 +01:00
|
|
|
|
if db and db.size > 0 then
|
2024-05-08 05:24:18 +02:00
|
|
|
|
env.wildcard = alt_lua_punc( config:get_string( ns .. '/wildcard' ) ) or '*'
|
|
|
|
|
env.db_table = {}
|
|
|
|
|
for i = 0, db.size - 1 do table.insert( env.db_table, ReverseLookup( db:get_value_at( i ).value ) ) end
|
|
|
|
|
env.if_reverse_lookup = true
|
2024-01-29 11:51:45 +01:00
|
|
|
|
end
|
2024-05-08 05:24:18 +02:00
|
|
|
|
if not env.if_reverse_lookup and not env.if_schema_lookup then return end
|
2024-01-29 11:51:45 +01:00
|
|
|
|
|
2024-02-27 15:15:12 +01:00
|
|
|
|
-- 配置:辅码转换规则
|
|
|
|
|
-- --
|
|
|
|
|
-- 例如:- xlit/ABCD/1234/ 就可以用 ABCD 来输入 1234(地球拼音音调)
|
2024-05-08 05:24:18 +02:00
|
|
|
|
local fuma_format = config:get_list( ns .. '/fuma_format' )
|
2024-02-27 15:15:12 +01:00
|
|
|
|
if fuma_format and fuma_format.size > 0 then
|
2024-05-08 05:24:18 +02:00
|
|
|
|
env.code_projection = Projection()
|
|
|
|
|
env.code_projection:load( fuma_format )
|
|
|
|
|
else
|
|
|
|
|
env.code_projection = nil
|
2024-02-27 15:15:12 +01:00
|
|
|
|
end
|
2024-01-29 11:51:45 +01:00
|
|
|
|
|
2024-02-27 15:15:12 +01:00
|
|
|
|
-- 配置:是否显示不符合辅码的候选
|
2024-05-08 05:24:18 +02:00
|
|
|
|
env.show_other_cands = config:get_bool( ns .. '/show_other_cands' )
|
|
|
|
|
-- 配置:辅码引导符号,默认为反引号 `
|
|
|
|
|
local search_key = config:get_string( 'key_binder/search' ) or config:get_string( ns .. '/key' ) or '`'
|
|
|
|
|
env.search_key_alt = alt_lua_punc( search_key )
|
|
|
|
|
local code_pattern = config:get_string( ns .. '/code_pattern' ) or '[a-z]'
|
2024-01-29 11:51:45 +01:00
|
|
|
|
|
2024-02-27 15:15:12 +01:00
|
|
|
|
-- 配置:seg tag
|
2024-05-08 05:24:18 +02:00
|
|
|
|
local tag = config:get_list( ns .. '/tags' )
|
2024-02-27 15:15:12 +01:00
|
|
|
|
if tag and tag.size > 0 then
|
2024-05-08 05:24:18 +02:00
|
|
|
|
env.tag = {}
|
|
|
|
|
for i = 0, tag.size - 1 do table.insert( env.tag, tag:get_value_at( i ).value ) end
|
2024-02-27 15:15:12 +01:00
|
|
|
|
else
|
2024-05-08 05:24:18 +02:00
|
|
|
|
env.tag = { 'abc' }
|
2024-02-27 15:15:12 +01:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
-- 配置:手动写入用户词库
|
2024-05-08 05:24:18 +02:00
|
|
|
|
local rules = config:get_list( ns .. '/input2code_format' )
|
2024-02-27 15:15:12 +01:00
|
|
|
|
if rules and rules.size > 0 then
|
2024-05-08 05:24:18 +02:00
|
|
|
|
env.projection = Projection()
|
|
|
|
|
env.projection:load( rules )
|
|
|
|
|
env.mem = Memory( env.engine, env.engine.schema )
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
-- 推入输入历史,并手动(如果设定了按键到编码的转换规则)写入用户词库
|
|
|
|
|
env.commit_notifier = env.engine.context.commit_notifier:connect(
|
|
|
|
|
function( ctx )
|
2024-02-27 15:15:12 +01:00
|
|
|
|
if env.have_select_commit and env.commit_code then
|
|
|
|
|
local commit_text = ctx:get_commit_text()
|
2024-05-08 05:24:18 +02:00
|
|
|
|
if env.mem then
|
|
|
|
|
update_dict_entry( commit_text, env.commit_code, env.mem, env.projection )
|
|
|
|
|
end
|
|
|
|
|
ctx.commit_history:push( 'search.lua', commit_text )
|
2024-02-27 15:15:12 +01:00
|
|
|
|
env.have_select_commit = false
|
|
|
|
|
else
|
|
|
|
|
return
|
|
|
|
|
end
|
2024-01-29 11:51:45 +01:00
|
|
|
|
end
|
2024-05-08 05:24:18 +02:00
|
|
|
|
)
|
2024-02-27 15:15:12 +01:00
|
|
|
|
|
2024-05-08 05:24:18 +02:00
|
|
|
|
-- 接管选词逻辑,是词组则始终保留引导码,否则直接上屏
|
|
|
|
|
env.notifier = env.engine.context.select_notifier:connect(
|
|
|
|
|
function( ctx )
|
|
|
|
|
local input = ctx.input
|
|
|
|
|
local code = input:match( '^(.-)' .. env.search_key_alt )
|
|
|
|
|
if (not code or #code == 0) then return end
|
2024-01-29 11:51:45 +01:00
|
|
|
|
|
2024-05-08 05:24:18 +02:00
|
|
|
|
local preedit = ctx:get_preedit()
|
|
|
|
|
local no_search_string = ctx.input:match( '^(.-)' .. env.search_key_alt )
|
|
|
|
|
local edit = preedit.text:match( '^(.-)' .. env.search_key_alt )
|
|
|
|
|
env.have_select_commit = true
|
2024-01-29 11:51:45 +01:00
|
|
|
|
|
2024-05-08 05:24:18 +02:00
|
|
|
|
if edit and edit:match( code_pattern ) then
|
|
|
|
|
ctx.input = no_search_string .. search_key
|
|
|
|
|
else
|
|
|
|
|
ctx.input = no_search_string
|
|
|
|
|
env.commit_code = no_search_string
|
|
|
|
|
ctx:commit()
|
2024-02-27 15:15:12 +01:00
|
|
|
|
end
|
2024-01-29 11:51:45 +01:00
|
|
|
|
end
|
2024-05-08 05:24:18 +02:00
|
|
|
|
)
|
2024-01-29 11:51:45 +01:00
|
|
|
|
end
|
|
|
|
|
|
2024-05-08 05:24:18 +02:00
|
|
|
|
function f.func( input, env )
|
2024-01-29 11:51:45 +01:00
|
|
|
|
-- 当且仅当当输入码中含有辅码引导符号,并有有辅码存在,进入匹配逻辑
|
2024-05-08 05:24:18 +02:00
|
|
|
|
local code, fuma = env.engine.context.input:match( '^(.-)' .. env.search_key_alt .. '(.+)$' )
|
|
|
|
|
if (not code or #code == 0) or (not fuma or #fuma == 0) or (not env.if_reverse_lookup and not env.if_schema_lookup) then
|
|
|
|
|
for cand in input:iter() do yield( cand ) end
|
2024-01-29 11:51:45 +01:00
|
|
|
|
return
|
|
|
|
|
end
|
|
|
|
|
|
2024-05-08 05:24:18 +02:00
|
|
|
|
local if_single_char_first = env.engine.context:get_option( 'search_single_char' )
|
2024-01-29 11:51:45 +01:00
|
|
|
|
local dict_table
|
2024-05-08 05:24:18 +02:00
|
|
|
|
local fuma_2
|
2024-01-29 11:51:45 +01:00
|
|
|
|
local other_cand = {}
|
|
|
|
|
local long_word_cands = {}
|
2024-05-08 05:24:18 +02:00
|
|
|
|
if env.if_schema_lookup then dict_table = dict_init( fuma, env.search, env.search_limit, env.code_projection ) end
|
|
|
|
|
|
|
|
|
|
if fuma:find( env.search_key_alt ) then fuma, fuma_2 = fuma:match( '^(.-)' .. env.search_key_alt .. '(.*)$' ) end
|
2024-01-29 11:51:45 +01:00
|
|
|
|
|
|
|
|
|
for cand in input:iter() do
|
2024-05-08 05:24:18 +02:00
|
|
|
|
if cand.type == 'sentence' then goto skip end
|
|
|
|
|
|
|
|
|
|
local cand_text = cand.text
|
|
|
|
|
local text = cand_text
|
|
|
|
|
local text_2 = nil
|
2024-01-29 11:51:45 +01:00
|
|
|
|
|
2024-02-27 15:15:12 +01:00
|
|
|
|
-- 当候选多于一个字,则取第一个匹配
|
2024-05-08 05:24:18 +02:00
|
|
|
|
if utf8.len( cand_text ) and utf8.len( cand_text ) > 1 then
|
|
|
|
|
text = cand_text:sub( 1, utf8.offset( cand_text, 2 ) - 1 )
|
|
|
|
|
local cand_text_2 = cand_text:gsub( '^' .. text, '' )
|
|
|
|
|
text_2 = cand_text_2:sub( 1, utf8.offset( cand_text_2, 2 ) - 1 )
|
2024-01-29 11:51:45 +01:00
|
|
|
|
end
|
|
|
|
|
|
2024-05-08 05:24:18 +02:00
|
|
|
|
if fuma_2 and #fuma_2 > 0 and env.if_reverse_lookup and not env.if_schema_lookup then
|
|
|
|
|
if
|
|
|
|
|
-- 第一个辅码匹配第一个字,第二个辅码正则匹配第一个字**或者**匹配第二个字
|
|
|
|
|
reverse_lookup( env.code_projection, env.db_table, env.wildcard, text, fuma ) and
|
|
|
|
|
((text_2 and reverse_lookup( env.code_projection, env.db_table, env.wildcard, text_2, fuma_2 )) or
|
|
|
|
|
reverse_lookup( env.code_projection, env.db_table, env.wildcard, text, fuma_2, true ))
|
|
|
|
|
then
|
|
|
|
|
handle_long_cand(if_single_char_first, cand, long_word_cands)
|
2024-01-29 11:51:45 +01:00
|
|
|
|
else
|
2024-05-08 05:24:18 +02:00
|
|
|
|
table.insert( other_cand, cand )
|
2024-01-29 11:51:45 +01:00
|
|
|
|
end
|
|
|
|
|
else
|
2024-05-08 05:24:18 +02:00
|
|
|
|
if
|
|
|
|
|
-- 用辅码匹配第一个字
|
|
|
|
|
(env.if_reverse_lookup and reverse_lookup( env.code_projection, env.db_table, env.wildcard, text, fuma )) or
|
|
|
|
|
(env.if_schema_lookup and dict_match( dict_table, text ))
|
|
|
|
|
then
|
|
|
|
|
handle_long_cand(if_single_char_first, cand, long_word_cands)
|
|
|
|
|
else
|
|
|
|
|
table.insert( other_cand, cand )
|
|
|
|
|
end
|
2024-01-29 11:51:45 +01:00
|
|
|
|
end
|
|
|
|
|
::skip::
|
|
|
|
|
end
|
2024-02-27 15:15:12 +01:00
|
|
|
|
|
2024-01-29 11:51:45 +01:00
|
|
|
|
-- 上屏其余的候选
|
2024-05-08 05:24:18 +02:00
|
|
|
|
for i, cand in ipairs( long_word_cands ) do yield( cand ) end
|
|
|
|
|
if env.show_other_cands then for i, cand in ipairs( other_cand ) do yield( cand ) end end
|
2024-01-29 11:51:45 +01:00
|
|
|
|
end
|
|
|
|
|
|
2024-05-08 05:24:18 +02:00
|
|
|
|
function f.tags_match( seg, env )
|
|
|
|
|
for i, v in ipairs( env.tag ) do if seg.tags[v] then return true end end
|
2024-02-27 15:15:12 +01:00
|
|
|
|
return false
|
|
|
|
|
end
|
|
|
|
|
|
2024-05-08 05:24:18 +02:00
|
|
|
|
function f.fini( env )
|
|
|
|
|
if env.if_reverse_lookup or env.if_schema_lookup then
|
2024-02-27 15:15:12 +01:00
|
|
|
|
env.notifier:disconnect()
|
2024-05-08 05:24:18 +02:00
|
|
|
|
env.commit_notifier:disconnect()
|
|
|
|
|
if env.mem or env.search then
|
|
|
|
|
env.mem = nil
|
|
|
|
|
env.search = nil
|
|
|
|
|
collectgarbage( 'collect' )
|
2024-02-27 15:15:12 +01:00
|
|
|
|
end
|
2024-01-29 11:51:45 +01:00
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
return f
|