fix #220 (again)

This commit is contained in:
Dvel 2023-04-25 21:11:00 +08:00
parent 10943c808a
commit 93992ec6d3
2 changed files with 27 additions and 45 deletions

View File

@ -21,7 +21,8 @@ function date_translator(input, seg, env)
local cand = Candidate("date", seg.start, seg._end, os.date("%Y.%m.%d"), "")
cand.quality = 100
yield(cand)
local cand = Candidate("date", seg.start, seg._end, os.date("%Y 年 ")..tostring(tonumber(os.date("%m")))..os.date(" 月 %d 日"), "")
local cand = Candidate("date", seg.start, seg._end,
os.date("%Y 年 ") .. tostring(tonumber(os.date("%m"))) .. os.date(" 月 %d 日"), "")
cand.quality = 100
yield(cand)
end
@ -62,13 +63,10 @@ function date_translator(input, seg, env)
cand.quality = 100
yield(cand)
end
-- 输出内存
if input == "gccount" then
local cand = Candidate("date", seg.start, seg._end, ("%.f"):format(collectgarbage('count')), "")
cand.quality = 100
yield(cand)
end
-- -- 输出内存
-- local cand = Candidate("date", seg.start, seg._end, ("%.f"):format(collectgarbage('count')), "")
-- cand.quality = 100
-- yield(cand)
-- if input == "xxx" then
-- collectgarbage()
-- local cand = Candidate("date", seg.start, seg._end, "collectgarbage()", "")
@ -168,47 +166,31 @@ function long_word_filter(input, env)
local count = config:get_int(env.name_space .. "/count") or 2
local idx = config:get_int(env.name_space .. "/idx") or 4
local code = env.engine.context.input -- 当前编码
if string.find(code, "[aeo]") then -- 要提升的词汇的拼音一定是包含 a o e 的
local l = {}
local firstWordLength = 0 -- 记录第一个候选词的长度,提前的候选词至少要比第一个候选词长
local done = 0 -- 记录筛选了多少个词条(只提升 count 个词的权重)
local i = 1
for cand in input:iter() do
-- 找到要提升的词
local leng = utf8.len(cand.text)
if (firstWordLength < 1 or i < idx) then
i = i + 1
firstWordLength = leng
yield(cand)
elseif ((leng > firstWordLength) and (done < count)) and (string.find(cand.text, "[%w%p%s]+") == nil) then
yield(cand)
done = done + 1
else
table.insert(l, cand)
end
-- 找齐了或者 l 太大了,就不找了
if (done == count) or (#l > 50) then
break
end
end
-- yield l
for _, cand in ipairs(l) do
local l = {}
local firstWordLength = 0 -- 记录第一个候选词的长度,提前的候选词至少要比第一个候选词长
local done = 0 -- 记录筛选了多少个词条(只提升 count 个词的权重)
local i = 1
for cand in input:iter() do
-- 找到要提升的词
local leng = utf8.len(cand.text)
if (firstWordLength < 1 or i < idx) then
i = i + 1
firstWordLength = leng
yield(cand)
end
-- l 弄完了立马给丫回收了
l = nil
if collectgarbage('count') < 3000 then
collectgarbage("step")
elseif ((leng > firstWordLength) and (done < count)) and (string.find(cand.text, "[%w%p%s]+") == nil) then
yield(cand)
done = done + 1
else
collectgarbage('collect')
table.insert(l, cand)
end
-- yield 其他
for cand in input:iter() do
yield(cand)
-- 找齐了或者 l 太大了,就不找了
if (done == count) or (#l > 50) then
break
end
end
for _, cand in ipairs(l) do
yield(cand)
end
for cand in input:iter() do
yield(cand)
end

View File

@ -66,11 +66,11 @@ engine:
- reverse_lookup_translator@liangfen # 反查,两分拼字
- lua_translator@unicode # Unicode
filters:
- lua_filter@long_word_filter # 长词优先
- simplifier@emoji # Emoji
- simplifier@traditionalize # 简繁切换
- lua_filter@v_filter # v 模式 symbols 优先(否则是英文优先)
- lua_filter@reduce_english_filter # 降低部分英语单词在候选项的位置
- lua_filter@long_word_filter # 长词优先
- uniquifier # 去重