style: Lua 统一缩进 4 空格

This commit is contained in:
Dvel 2024-02-08 18:39:49 +08:00
parent 1ffdce6a84
commit efcb4e2bdd
17 changed files with 1054 additions and 1012 deletions

View File

@ -42,39 +42,51 @@
-- puts(INFO,__FILE__(),__LINE__(),__FUNC__() , ...)
--
-- global variable
function __FILE__(n) n=n or 2 return debug.getinfo(n,'S').source end
function __LINE__(n) n=n or 2 return debug.getinfo(n, 'l').currentline end
function __FUNC__(n) n=n or 2 return debug.getinfo(n, 'n').name end
INFO="log"
WARN="warn"
ERROR="error"
DEBUG="trace"
CONSOLE="console"
function __FILE__(n)
n = n or 2
return debug.getinfo(n, 'S').source
end
function __LINE__(n)
n = n or 2
return debug.getinfo(n, 'l').currentline
end
function __FUNC__(n)
n = n or 2
return debug.getinfo(n, 'n').name
end
INFO = "log"
WARN = "warn"
ERROR = "error"
DEBUG = "trace"
CONSOLE = "console"
local function tran_msg(...)
local msg="\t"
for i,k in next, {...} do msg = msg .. ": " .. tostring(k) end
return msg
local msg = "\t"
for i, k in next, { ... } do msg = msg .. ": " .. tostring(k) end
return msg
end
local function puts( tag , ...)
if type(tag) ~= "string" then return end
local function puts(tag, ...)
if type(tag) ~= "string" then return end
if INFO and tag:match("^" .. INFO) then
(log and log.info or print)( tag .. tran_msg(...))
elseif WARN and tag:match("^" .. WARN) then
(log and log.warning or print)(tag .. tran_msg(...))
elseif ERROR and tag:match("^" .. ERROR) then
(log and log.error or print)(tag .. tran_msg(...))
elseif DEBUG and tag:match("^" .. DEBUG) then
(log and log.error or print)(tag .. tran_msg(...))
elseif CONSOLE and tag:match( "^" .. CONSOLE ) then
( print)( tag .. tran_msg(...))
else
return
end
if INFO and tag:match("^" .. INFO) then
(log and log.info or print)(tag .. tran_msg(...))
elseif WARN and tag:match("^" .. WARN) then
(log and log.warning or print)(tag .. tran_msg(...))
elseif ERROR and tag:match("^" .. ERROR) then
(log and log.error or print)(tag .. tran_msg(...))
elseif DEBUG and tag:match("^" .. DEBUG) then
(log and log.error or print)(tag .. tran_msg(...))
elseif CONSOLE and tag:match("^" .. CONSOLE) then
(print)(tag .. tran_msg(...))
else
return
end
end
return puts

View File

@ -1,4 +1,4 @@
local drop_words =
{ "示~例~",
{ "示~例~",
}
return drop_words

View File

@ -1,4 +1,3 @@
local drop_list = require("cold_word_drop.drop_words")
local hide_list = require("cold_word_drop.hide_words")
local turndown_freq_list = require("cold_word_drop.turndown_freq_words")
@ -24,8 +23,8 @@ local function filter(input, env)
i = i + 1
---@diagnostic disable-next-line: undefined-global
yield(cand)
else
table.insert(cands, cand)
else
table.insert(cands, cand)
end
else
table.insert(cands, cand)
@ -43,13 +42,13 @@ local function filter(input, env)
(hide_list[cand.text] and table.find_index(hide_list[cand.text], cpreedit_code))
)
then
---@diagnostic disable-next-line: undefined-global
---@diagnostic disable-next-line: undefined-global
yield(cand)
end
end
for cand in input:iter() do
yield(cand)
end
for cand in input:iter() do
yield(cand)
end
end
return filter

View File

@ -1,4 +1,4 @@
local hide_words =
{ ["示~例~"] = { "shil", "shili", },
{ ["示~例~"] = { "shil", "shili", },
}
return hide_words

View File

@ -27,29 +27,29 @@ function metatable_chk(tab)
end
end
table.eachi = function (tab, func)
table.eachi = function(tab, func)
for i = 1, #tab do
func(tab[i], i)
end
return tab
end
table.eacha = function (tab, func)
table.eacha = function(tab, func)
for i, v in ipairs(tab) do
func(v, i)
end
return tab
end
table.each = function (tab, func)
table.each = function(tab, func)
for k, v in pairs(tab) do
func(v, k)
end
return tab
end
table.find_index = function (tab, elm, ...)
table.find_index = function(tab, elm, ...)
local _, i = table.find(tab, elm, ...)
return i
end
table.find = function (tab, elm, func)
table.find = function(tab, elm, func)
for i, v in ipairs(tab) do
if elm == v then
return v, i
@ -57,17 +57,17 @@ table.find = function (tab, elm, func)
end
end
table.find_with_func = function (tab, elm, ...)
table.find_with_func = function(tab, elm, ...)
local i, v = table.find(tab, elm)
end
table.delete = function (tab, elm, ...)
table.delete = function(tab, elm, ...)
local index = table.find_index(tab, elm)
return index and table.remove(tab, index)
end
table.find_all = function (tab, elm, ...)
table.find_all = function(tab, elm, ...)
local tmptab = setmetatable({}, { __index = table })
local _func = (type(elm) == "function" and elm) or function (v, k, ...) return v == elm end
local _func = (type(elm) == "function" and elm) or function(v, k, ...) return v == elm end
for k, v in pairs(tab) do
if _func(v, k, ...) then
tmptab:insert(v)
@ -77,7 +77,7 @@ table.find_all = function (tab, elm, ...)
end
table.select = table.find_all
table.reduce = function (tab, func, arg)
table.reduce = function(tab, func, arg)
local new, old = arg, arg
for i, v in ipairs(tab) do
new, old = func(v, new)
@ -85,17 +85,17 @@ table.reduce = function (tab, func, arg)
return new, arg
end
table.map = function (tab, func)
table.map = function(tab, func)
local newtab = setmetatable({}, { __index = table })
func = func or function (v, i) return v, i end
func = func or function(v, i) return v, i end
for i, v in ipairs(tab) do
newtab[i] = func(v, i)
end
return newtab
end
table.map_hash = function (tab, func) -- table to list of array { key, v}
table.map_hash = function(tab, func) -- table to list of array { key, v}
local newtab = setmetatable({}, { __index = table })
func = func or function (k, v) return { k, v } end
func = func or function(k, v) return { k, v } end
for k, v in pairs(tab) do
newtab:insert(func(k, v))
end

View File

@ -1,4 +1,3 @@
require('cold_word_drop.string')
require("cold_word_drop.metatable")
-- local puts = require("tools/debugtool")
@ -146,7 +145,7 @@ local function processor(key, env)
return 1 -- kAccept
end
return 2 -- kNoop, 不做任何操作, 交给下个组件处理
return 2 -- kNoop, 不做任何操作, 交给下个组件处理
end
return processor

View File

@ -3,38 +3,39 @@
-- string.utf8_len = utf8.len
-- string.utf8_offset= utf8.offset
-- string.utf8_sub= utf8.sub
function string.split( str, sp,sp1)
sp =type(sp) == "string" and sp or " "
if #sp == 0 then
sp= "([%z\1-\127\194-\244][\128-\191]*)"
elseif #sp == 1 then
sp= "[^" .. (sp=="%" and "%%" or sp) .. "]*"
else
sp1= sp1 or "^"
str=str:gsub(sp,sp1)
sp= "[^".. sp1 .. "]*"
end
function string.split(str, sp, sp1)
sp = type(sp) == "string" and sp or " "
if #sp == 0 then
sp = "([%z\1-\127\194-\244][\128-\191]*)"
elseif #sp == 1 then
sp = "[^" .. (sp == "%" and "%%" or sp) .. "]*"
else
sp1 = sp1 or "^"
str = str:gsub(sp, sp1)
sp = "[^" .. sp1 .. "]*"
end
local tab= {}
for v in str:gmatch(sp) do
table.insert(tab,v)
end
return tab
local tab = {}
for v in str:gmatch(sp) do
table.insert(tab, v)
end
return tab
end
function utf8.gsub(str,si,ei)
local function index(ustr,i)
return i>=0 and ( ustr:utf8_offset(i) or ustr:len() +1 )
or ( ustr:utf8_offset(i) or 1 )
end
function utf8.gsub(str, si, ei)
local function index(ustr, i)
return i >= 0 and (ustr:utf8_offset(i) or ustr:len() + 1)
or (ustr:utf8_offset(i) or 1)
end
local u_si= index(str,si)
ei = ei or str:utf8_len()
ei = ei >=0 and ei +1 or ei
local u_ei= index(str, ei ) -1
return str:sub(u_si,u_ei)
local u_si = index(str, si)
ei = ei or str:utf8_len()
ei = ei >= 0 and ei + 1 or ei
local u_ei = index(str, ei) - 1
return str:sub(u_si, u_ei)
end
string.utf8_len= utf8.len
string.utf8_offset=utf8.offset
string.utf8_sub= utf8.gsub
string.utf8_len = utf8.len
string.utf8_offset = utf8.offset
string.utf8_sub = utf8.gsub
return true

View File

@ -1,4 +1,4 @@
local turndown_freq_words =
{ ["示~例~"] = { "shili", },
{ ["示~例~"] = { "shili", },
}
return turndown_freq_words

View File

@ -3,79 +3,79 @@
-- 不提升包含英文、数字、emoji、假名的候选项
local function isEmoji(text)
for _, char in utf8.codes(text) do
if (char >= 0x1F600 and char <= 0x1F64F) or -- Emoticons
(char >= 0x1F300 and char <= 0x1F5FF) or -- Misc Symbols and Pictographs
(char >= 0x1F680 and char <= 0x1F6FF) or -- Transport and Map
(char >= 0x2600 and char <= 0x26FF) or -- Misc symbols
(char >= 0x2700 and char <= 0x27BF) or -- Dingbats
(char >= 0xFE00 and char <= 0xFE0F) or -- Variation Selectors
(char >= 0x1F900 and char <= 0x1F9FF) or -- Supplemental Symbols and Pictographs
(char >= 0x1F1E6 and char <= 0x1F1FF) then -- Flags (iOS)
return true
end
end
return false
for _, char in utf8.codes(text) do
if (char >= 0x1F600 and char <= 0x1F64F) or -- Emoticons
(char >= 0x1F300 and char <= 0x1F5FF) or -- Misc Symbols and Pictographs
(char >= 0x1F680 and char <= 0x1F6FF) or -- Transport and Map
(char >= 0x2600 and char <= 0x26FF) or -- Misc symbols
(char >= 0x2700 and char <= 0x27BF) or -- Dingbats
(char >= 0xFE00 and char <= 0xFE0F) or -- Variation Selectors
(char >= 0x1F900 and char <= 0x1F9FF) or -- Supplemental Symbols and Pictographs
(char >= 0x1F1E6 and char <= 0x1F1FF) then -- Flags (iOS)
return true
end
end
return false
end
local function containsJapaneseKana(str)
for _, code in utf8.codes(str) do
-- 检查平假名的 Unicode 范围0x3040 至 0x309F
if code >= 0x3040 and code <= 0x309F then
return true
end
-- 检查片假名的 Unicode 范围0x30A0 至 0x30FF
if code >= 0x30A0 and code <= 0x30FF then
return true
end
end
return false
for _, code in utf8.codes(str) do
-- 检查平假名的 Unicode 范围0x3040 至 0x309F
if code >= 0x3040 and code <= 0x309F then
return true
end
-- 检查片假名的 Unicode 范围0x30A0 至 0x30FF
if code >= 0x30A0 and code <= 0x30FF then
return true
end
end
return false
end
local M = {}
function M.init(env)
-- 提升 count 个词语,插入到第 idx 个位置,默认 2、4。
local config = env.engine.schema.config
env.name_space = env.name_space:gsub("^*", "")
M.count = config:get_int(env.name_space .. "/count") or 2
M.idx = config:get_int(env.name_space .. "/idx") or 4
-- 提升 count 个词语,插入到第 idx 个位置,默认 2、4。
local config = env.engine.schema.config
env.name_space = env.name_space:gsub("^*", "")
M.count = config:get_int(env.name_space .. "/count") or 2
M.idx = config:get_int(env.name_space .. "/idx") or 4
end
function M.func(input)
local l = {}
local firstWordLength = 0 -- 记录第一个候选词的长度,提前的候选词至少要比第一个候选词长
local done = 0 -- 记录筛选了多少个词条(只提升 count 个词的权重)
local i = 1
for cand in input:iter() do
local leng = utf8.len(cand.text)
-- 只以第一个候选项的长度作为参考
if firstWordLength < 1 then
firstWordLength = leng
end
-- 不处理 M.idx 之前的候选项
if i < M.idx then
i = i + 1
yield(cand)
-- 长词直接 yield其余的放到 l 里
elseif leng <= firstWordLength or cand.text:find("[%a%d]") or containsJapaneseKana(cand.text) or isEmoji(cand.text) then
table.insert(l, cand)
else
yield(cand)
done = done + 1
end
-- 找齐了或者 l 太大了,就不找了,一般前 50 个就够了
if done == M.count or #l > 50 then
break
end
end
-- yield l 及后续的候选项
for _, cand in ipairs(l) do
yield(cand)
end
for cand in input:iter() do
yield(cand)
end
local l = {}
local firstWordLength = 0 -- 记录第一个候选词的长度,提前的候选词至少要比第一个候选词长
local done = 0 -- 记录筛选了多少个词条(只提升 count 个词的权重)
local i = 1
for cand in input:iter() do
local leng = utf8.len(cand.text)
-- 只以第一个候选项的长度作为参考
if firstWordLength < 1 then
firstWordLength = leng
end
-- 不处理 M.idx 之前的候选项
if i < M.idx then
i = i + 1
yield(cand)
-- 长词直接 yield其余的放到 l 里
elseif leng <= firstWordLength or cand.text:find("[%a%d]") or containsJapaneseKana(cand.text) or isEmoji(cand.text) then
table.insert(l, cand)
else
yield(cand)
done = done + 1
end
-- 找齐了或者 l 太大了,就不找了,一般前 50 个就够了
if done == M.count or #l > 50 then
break
end
end
-- yield l 及后续的候选项
for _, cand in ipairs(l) do
yield(cand)
end
for cand in input:iter() do
yield(cand)
end
end
return M

File diff suppressed because it is too large Load Diff

View File

@ -3,116 +3,149 @@
-- 触发前缀默认为 recognizer/patterns/number 的第 2 个字符,即 R
local function splitNumPart(str)
local part = {}
part.int, part.dot, part.dec = string.match(str, "^(%d*)(%.?)(%d*)")
return part
local part = {}
part.int, part.dot, part.dec = string.match(str, "^(%d*)(%.?)(%d*)")
return part
end
local function GetPreciseDecimal(nNum, n)
if type(nNum) ~= "number" then nNum =tonumber(nNum) end
n = n or 0;
n = math.floor(n)
if n < 0 then n = 0 end
local nDecimal = 10 ^ n
local nTemp = math.floor(nNum * nDecimal);
local nRet = nTemp / nDecimal;
return nRet;
if type(nNum) ~= "number" then nNum = tonumber(nNum) end
n = n or 0;
n = math.floor(n)
if n < 0 then n = 0 end
local nDecimal = 10 ^ n
local nTemp = math.floor(nNum * nDecimal);
local nRet = nTemp / nDecimal;
return nRet;
end
local function decimal_func(str, posMap, valMap)
local dec
posMap = posMap or {[1]=""; [2]=""; [3]=""; [4]=""}
valMap = valMap or {[0]=""; ""; ""; "" ;""; ""; ""; ""; ""; ""}
if #str>4 then dec = string.sub(tostring(str), 1, 4) else dec =tostring(str) end
dec = string.gsub(dec, "0+$", "")
local dec
posMap = posMap or { [1] = "", [2] = "", [3] = "", [4] = "" }
valMap = valMap or { [0] = "", "", "", "", "", "", "", "", "", "" }
if #str > 4 then dec = string.sub(tostring(str), 1, 4) else dec = tostring(str) end
dec = string.gsub(dec, "0+$", "")
if dec == "" then return "" end
if dec == "" then return "" end
local result = ""
for pos =1, #dec do
local val = tonumber(string.sub(dec, pos, pos))
if val~=0 then result = result .. valMap[val] .. posMap[pos] else result = result .. valMap[val] end
end
result=result:gsub(valMap[0]..valMap[0] ,valMap[0])
return result:gsub(valMap[0]..valMap[0] ,valMap[0])
local result = ""
for pos = 1, #dec do
local val = tonumber(string.sub(dec, pos, pos))
if val ~= 0 then result = result .. valMap[val] .. posMap[pos] else result = result .. valMap[val] end
end
result = result:gsub(valMap[0] .. valMap[0], valMap[0])
return result:gsub(valMap[0] .. valMap[0], valMap[0])
end
-- 把数字串按千分位四位数分割,进行转换为中文
local function formatNum(num,t)
local digitUnit,wordFigure
local result=""
num=tostring(num)
if tonumber(t) < 1 then digitUnit = {"", "", "",""} else digitUnit = {"","","",""} end
if tonumber(t) <1 then
wordFigure = {"","","","","","","","","",""}
else wordFigure = {"","","","","","","","","",""} end
if string.len(num)>4 or tonumber(num)==0 then return wordFigure[1] end
local lens=string.len(num)
for i=1,lens do
local n=wordFigure[tonumber(string.sub(num,-i,-i))+1]
if n~=wordFigure[1] then result=n .. digitUnit[i] .. result else result=n .. result end
end
result=result:gsub(wordFigure[1]..wordFigure[1] ,wordFigure[1])
result=result:gsub(wordFigure[1].."$","") result=result:gsub(wordFigure[1].."$","")
local function formatNum(num, t)
local digitUnit, wordFigure
local result = ""
num = tostring(num)
if tonumber(t) < 1 then digitUnit = { "", "", "", "" } else digitUnit = { "", "", "", "" } end
if tonumber(t) < 1 then
wordFigure = { "", "", "", "", "", "", "", "", "", "" }
else
wordFigure = { "", "", "", "", "", "", "", "", "", "" }
end
if string.len(num) > 4 or tonumber(num) == 0 then return wordFigure[1] end
local lens = string.len(num)
for i = 1, lens do
local n = wordFigure[tonumber(string.sub(num, -i, -i)) + 1]
if n ~= wordFigure[1] then result = n .. digitUnit[i] .. result else result = n .. result end
end
result = result:gsub(wordFigure[1] .. wordFigure[1], wordFigure[1])
result = result:gsub(wordFigure[1] .. "$", "")
result = result:gsub(wordFigure[1] .. "$", "")
return result
return result
end
-- 数值转换为中文
local function number2cnChar(num,flag,digitUnit,wordFigure) --flag=0中文小写反之为大写
local st,result
num=tostring(num) result=""
local num1,num2=math.modf(num)
if tonumber(num2)==0 then
if tonumber(flag) < 1 then
digitUnit = digitUnit or {[1]="";[2]="亿"} wordFigure = wordFigure or {[1]=""; [2]=""; [3]=""; [4]=""}
else
digitUnit = digitUnit or {[1]="";[2]="亿"} wordFigure = wordFigure or {[1]=""; [2]=""; [3]=""; [4]=""}
end
local lens=string.len(num1)
if lens<5 then result=formatNum(num1,flag) elseif lens<9 then result=formatNum(string.sub(num1,1,-5),flag) .. digitUnit[1].. formatNum(string.sub(num1,-4,-1),flag)
elseif lens<13 then result=formatNum(string.sub(num1,1,-9),flag) .. digitUnit[2] .. formatNum(string.sub(num1,-8,-5),flag) .. digitUnit[1] .. formatNum(string.sub(num1,-4,-1),flag) else result="" end
result=result:gsub("^" .. wordFigure[1],"") result=result:gsub(wordFigure[1] .. digitUnit[1],"") result=result:gsub(wordFigure[1] .. digitUnit[2],"")
result=result:gsub(wordFigure[1] .. wordFigure[1],wordFigure[1]) result=result:gsub(wordFigure[1] .. "$","")
if lens>4 then result=result:gsub("^"..wordFigure[2].. wordFigure[3],wordFigure[3]) end
if result~="" then result=result .. wordFigure[4] else result="数值超限!" end
else return "数值超限!" end
local function number2cnChar(num, flag, digitUnit, wordFigure) --flag=0中文小写反之为大写
local st, result
num = tostring(num)
result = ""
local num1, num2 = math.modf(num)
if tonumber(num2) == 0 then
if tonumber(flag) < 1 then
digitUnit = digitUnit or { [1] = "", [2] = "亿" }
wordFigure = wordFigure or { [1] = "", [2] = "", [3] = "", [4] = "" }
else
digitUnit = digitUnit or { [1] = "", [2] = "亿" }
wordFigure = wordFigure or { [1] = "", [2] = "", [3] = "", [4] = "" }
end
local lens = string.len(num1)
if lens < 5 then
result = formatNum(num1, flag)
elseif lens < 9 then
result = formatNum(string.sub(num1, 1, -5), flag) .. digitUnit[1] .. formatNum(string.sub(num1, -4, -1), flag)
elseif lens < 13 then
result = formatNum(string.sub(num1, 1, -9), flag) ..
digitUnit[2] ..
formatNum(string.sub(num1, -8, -5), flag) .. digitUnit[1] .. formatNum(string.sub(num1, -4, -1), flag)
else
result = ""
end
result = result:gsub("^" .. wordFigure[1], "")
result = result:gsub(wordFigure[1] .. digitUnit[1], "")
result = result:gsub(wordFigure[1] .. digitUnit[2], "")
result = result:gsub(wordFigure[1] .. wordFigure[1], wordFigure[1])
result = result:gsub(wordFigure[1] .. "$", "")
if lens > 4 then result = result:gsub("^" .. wordFigure[2] .. wordFigure[3], wordFigure[3]) end
if result ~= "" then result = result .. wordFigure[4] else result = "数值超限!" end
else
return "数值超限!"
end
return result
return result
end
local function number2zh(num,t)
local result,wordFigure
result=""
if tonumber(t) <1 then
wordFigure = {"","","","","","","","","",""}
else wordFigure = {"","","","","","","","","",""} end
if tostring(num)==nil then return "" end
for pos=1,string.len(num) do
result=result..wordFigure[tonumber(string.sub(num, pos, pos)+1)]
end
result=result:gsub(wordFigure[1] .. wordFigure[1],wordFigure[1])
return result:gsub(wordFigure[1] .. wordFigure[1],wordFigure[1])
local function number2zh(num, t)
local result, wordFigure
result = ""
if tonumber(t) < 1 then
wordFigure = { "", "", "", "", "", "", "", "", "", "" }
else
wordFigure = { "", "", "", "", "", "", "", "", "", "" }
end
if tostring(num) == nil then return "" end
for pos = 1, string.len(num) do
result = result .. wordFigure[tonumber(string.sub(num, pos, pos) + 1)]
end
result = result:gsub(wordFigure[1] .. wordFigure[1], wordFigure[1])
return result:gsub(wordFigure[1] .. wordFigure[1], wordFigure[1])
end
local function number_translatorFunc(num)
local numberPart=splitNumPart(num)
local result={}
if numberPart.dot~="" then
table.insert(result,{number2cnChar(numberPart.int,0,{"", "亿"},{"","","",""})..number2zh(numberPart.dec,0),"〔数字小写〕"})
table.insert(result,{number2cnChar(numberPart.int,1,{"", ""},{"","","",""})..number2zh(numberPart.dec,1),"〔数字大写〕"})
else
table.insert(result,{number2cnChar(numberPart.int,0,{"", "亿"},{"","","",""}),"〔数字小写〕"})
table.insert(result,{number2cnChar(numberPart.int,1,{"", ""},{"","","",""}),"〔数字大写〕"})
end
table.insert(result,{number2cnChar(numberPart.int,0)..decimal_func(numberPart.dec,{[1]=""; [2]=""; [3]=""; [4]=""},{[0]=""; ""; ""; "" ;""; ""; ""; ""; ""; ""}),"〔金额小写〕"})
table.insert(result,{number2cnChar(numberPart.int,1)..decimal_func(numberPart.dec,{[1]=""; [2]=""; [3]=""; [4]=""},{[0]=""; ""; ""; "" ;""; ""; ""; ""; ""; ""}),"〔金额大写〕"})
return result
local numberPart = splitNumPart(num)
local result = {}
if numberPart.dot ~= "" then
table.insert(result,
{ number2cnChar(numberPart.int, 0, { "", "亿" }, { "", "", "", "" }) .. number2zh(numberPart.dec, 0),
"〔数字小写〕" })
table.insert(result,
{ number2cnChar(numberPart.int, 1, { "", "" }, { "", "", "", "" }) .. number2zh(numberPart.dec, 1),
"〔数字大写〕" })
else
table.insert(result, { number2cnChar(numberPart.int, 0, { "", "亿" }, { "", "", "", "" }), "〔数字小写〕" })
table.insert(result, { number2cnChar(numberPart.int, 1, { "", "" }, { "", "", "", "" }), "〔数字大写〕" })
end
table.insert(result,
{ number2cnChar(numberPart.int, 0) ..
decimal_func(numberPart.dec, { [1] = "", [2] = "", [3] = "", [4] = "" },
{ [0] = "", "", "", "", "", "", "", "", "", "" }), "〔金额小写〕" })
table.insert(result,
{ number2cnChar(numberPart.int, 1) ..
decimal_func(numberPart.dec, { [1] = "", [2] = "", [3] = "", [4] = "" },
{ [0] = "", "", "", "", "", "", "", "", "", "" }), "〔金额大写〕" })
return result
end
local function number_translator(input, seg, env)
-- 获取 recognizer/patterns/number 的第 2 个字符作为触发前缀
env.number_keyword = env.number_keyword or env.engine.schema.config:get_string('recognizer/patterns/number'):sub(2, 2)
-- 获取 recognizer/patterns/number 的第 2 个字符作为触发前缀
env.number_keyword = env.number_keyword or
env.engine.schema.config:get_string('recognizer/patterns/number'):sub(2, 2)
local str, num, numberPart
if env.number_keyword ~= '' and input:sub(1, 1) == env.number_keyword then
str = string.gsub(input, "^(%a+)", "")

View File

@ -2,197 +2,197 @@
-- Powered By ChatGPT
local function is_in_list(list, str)
for i, v in ipairs(list) do
if v == str then
return true, i
end
end
return false, 0
for i, v in ipairs(list) do
if v == str then
return true, i
end
end
return false, 0
end
local M = {}
function M.init(env)
env.name_space = env.name_space:gsub("^*", "")
local list = env.engine.schema.config:get_list(env.name_space)
if not list then return end -- no configuration found -> stop
env.name_space = env.name_space:gsub("^*", "")
local list = env.engine.schema.config:get_list(env.name_space)
if not list then return end -- no configuration found -> stop
-- 如果定义了 'da zhuan' 或 'da zhong' ,会自动生成 'daz' 和 'dazh' 的键。
-- 然而,如果明确定义了 'da z' 或 'da zh',则会优先使用这些明确自定义的简码,用 set 来做判断。
local set = {}
for i = 0, list.size - 1 do
local preedit, texts = list:get_value_at(i).value:match("([^\t]+)\t(.+)")
if #preedit > 0 and #texts > 0 then
set[preedit:gsub(" ", "")] = true
end
end
-- 如果定义了 'da zhuan' 或 'da zhong' ,会自动生成 'daz' 和 'dazh' 的键。
-- 然而,如果明确定义了 'da z' 或 'da zh',则会优先使用这些明确自定义的简码,用 set 来做判断。
local set = {}
for i = 0, list.size - 1 do
local preedit, texts = list:get_value_at(i).value:match("([^\t]+)\t(.+)")
if #preedit > 0 and #texts > 0 then
set[preedit:gsub(" ", "")] = true
end
end
-- 遍历要置顶的候选项列表,将其转换为 table 存储到 M.pin_cands
-- 'l 了 啦' → M.pin_cands["l"] = {"了", "啦"}
-- 'ta 他 她 它' → M.pin_cands["ta"] = {"他", "她", "它"}
--
-- 无空格的键,如 `nihao 你好` → M.pin_cands["nihao"] = {"你好"}
--
-- 包含空格的的键,同时生成简码的拼写(最后一个空格后的首字母),如:
-- 'ni hao 你好 拟好' → M.pin_cands["nihao"] = {"你好", "拟好"}
-- → M.pin_cands["nih"] = {"你好", "拟好"}
--
-- 如果最后一个空格后以 zh ch sh 开头,额外再生成 zh, ch, sh 的拼写,如:
-- 'zhi chi 支持' → M.pin_cands["zhichi"] = {"支持"}
-- → M.pin_cands["zhic"] = {"支持"}
-- → M.pin_cands["zhich"] = {"支持"}
--
-- 如果同时定义了 'da zhuan 大专' 'da zhong 大众',会生成:
-- M.pin_cands["dazhuan"] = {"大专"}
-- M.pin_cands["dazhong"] = {"大众"}
-- M.pin_cands["daz"] = {"大专", "大众"} -- 先写的排在前面
-- M.pin_cands["dazh"] = {"大专", "大众"} -- 先写的排在前面
--
-- 如果同时定义了 'da zhuan 大专' 'da zhong 大众' 且明确定义了简码形式 'da z 打字',会生成:
-- M.pin_cands["dazhuan"] = {"大专"}
-- M.pin_cands["dazhong"] = {"大众"}
-- M.pin_cands["daz"] = {"打字"} -- 明确定义的优先级更高
-- M.pin_cands["dazh"] = {"大专", "大众"} -- 没明确定义的,仍然按上面的方式生成
-- 遍历要置顶的候选项列表,将其转换为 table 存储到 M.pin_cands
-- 'l 了 啦' → M.pin_cands["l"] = {"了", "啦"}
-- 'ta 他 她 它' → M.pin_cands["ta"] = {"他", "她", "它"}
--
-- 无空格的键,如 `nihao 你好` → M.pin_cands["nihao"] = {"你好"}
--
-- 包含空格的的键,同时生成简码的拼写(最后一个空格后的首字母),如:
-- 'ni hao 你好 拟好' → M.pin_cands["nihao"] = {"你好", "拟好"}
-- → M.pin_cands["nih"] = {"你好", "拟好"}
--
-- 如果最后一个空格后以 zh ch sh 开头,额外再生成 zh, ch, sh 的拼写,如:
-- 'zhi chi 支持' → M.pin_cands["zhichi"] = {"支持"}
-- → M.pin_cands["zhic"] = {"支持"}
-- → M.pin_cands["zhich"] = {"支持"}
--
-- 如果同时定义了 'da zhuan 大专' 'da zhong 大众',会生成:
-- M.pin_cands["dazhuan"] = {"大专"}
-- M.pin_cands["dazhong"] = {"大众"}
-- M.pin_cands["daz"] = {"大专", "大众"} -- 先写的排在前面
-- M.pin_cands["dazh"] = {"大专", "大众"} -- 先写的排在前面
--
-- 如果同时定义了 'da zhuan 大专' 'da zhong 大众' 且明确定义了简码形式 'da z 打字',会生成:
-- M.pin_cands["dazhuan"] = {"大专"}
-- M.pin_cands["dazhong"] = {"大众"}
-- M.pin_cands["daz"] = {"打字"} -- 明确定义的优先级更高
-- M.pin_cands["dazh"] = {"大专", "大众"} -- 没明确定义的,仍然按上面的方式生成
M.pin_cands = {}
for i = 0, list.size - 1 do
local preedit, texts = list:get_value_at(i).value:match("([^\t]+)\t(.+)")
if #preedit > 0 and #texts > 0 then
-- 按照 " > " 或 " " 分割词汇
local delimiter = "\0"
if texts:find(" > ") then
texts = texts:gsub(" > ", delimiter)
else
texts = texts:gsub(" ", delimiter)
end
-- 按照键生成完整的拼写
local preedit_no_spaces = preedit:gsub(" ", "")
M.pin_cands[preedit_no_spaces] = {}
for text in texts:gmatch("[^" .. delimiter .. "]+") do
table.insert(M.pin_cands[preedit_no_spaces], text)
end
-- 额外处理包含空格的 preedit增加最后一个拼音的首字母和 zh, ch, sh 的简码
if preedit:find(" ") then
local preceding_part, last_part = preedit:match("^(.+)%s(%S+)$")
if #last_part > 0 then
-- 生成最后一个拼音的简码拼写(最后一个空格后的首字母),如 ni hao 生成 nih
local p1 = preceding_part:gsub(" ", "") .. last_part:sub(1, 1)
-- 只在没有明确定义此简码时才生成,已有的追加,没有的直接赋值
if not set[p1] then
if M.pin_cands[p1] ~= nil then
for text in texts:gmatch("[^" .. delimiter .. "]+") do
table.insert(M.pin_cands[p1], text)
end
else
M.pin_cands[p1] = M.pin_cands[preedit_no_spaces]
end
end
-- 生成最后一个拼音的 zh, ch, sh 的简码拼写(最后一个空格后以 zh ch sh 开头),如 zhi chi 生成 zhich
if last_part:match("^[zcs]h") then
local p2 = preceding_part:gsub(" ", "") .. last_part:sub(1, 2)
-- 只在没有明确定义此简码时才生成,已有的追加,没有的直接赋值
if not set[p2] then
if M.pin_cands[p2] ~= nil then
for text in texts:gmatch("[^" .. delimiter .. "]+") do
table.insert(M.pin_cands[p2], text)
end
else
M.pin_cands[p2] = M.pin_cands[preedit_no_spaces]
end
end
end
end
end
end
end
M.pin_cands = {}
for i = 0, list.size - 1 do
local preedit, texts = list:get_value_at(i).value:match("([^\t]+)\t(.+)")
if #preedit > 0 and #texts > 0 then
-- 按照 " > " 或 " " 分割词汇
local delimiter = "\0"
if texts:find(" > ") then
texts = texts:gsub(" > ", delimiter)
else
texts = texts:gsub(" ", delimiter)
end
-- 按照键生成完整的拼写
local preedit_no_spaces = preedit:gsub(" ", "")
M.pin_cands[preedit_no_spaces] = {}
for text in texts:gmatch("[^" .. delimiter .. "]+") do
table.insert(M.pin_cands[preedit_no_spaces], text)
end
-- 额外处理包含空格的 preedit增加最后一个拼音的首字母和 zh, ch, sh 的简码
if preedit:find(" ") then
local preceding_part, last_part = preedit:match("^(.+)%s(%S+)$")
if #last_part > 0 then
-- 生成最后一个拼音的简码拼写(最后一个空格后的首字母),如 ni hao 生成 nih
local p1 = preceding_part:gsub(" ", "") .. last_part:sub(1, 1)
-- 只在没有明确定义此简码时才生成,已有的追加,没有的直接赋值
if not set[p1] then
if M.pin_cands[p1] ~= nil then
for text in texts:gmatch("[^" .. delimiter .. "]+") do
table.insert(M.pin_cands[p1], text)
end
else
M.pin_cands[p1] = M.pin_cands[preedit_no_spaces]
end
end
-- 生成最后一个拼音的 zh, ch, sh 的简码拼写(最后一个空格后以 zh ch sh 开头),如 zhi chi 生成 zhich
if last_part:match("^[zcs]h") then
local p2 = preceding_part:gsub(" ", "") .. last_part:sub(1, 2)
-- 只在没有明确定义此简码时才生成,已有的追加,没有的直接赋值
if not set[p2] then
if M.pin_cands[p2] ~= nil then
for text in texts:gmatch("[^" .. delimiter .. "]+") do
table.insert(M.pin_cands[p2], text)
end
else
M.pin_cands[p2] = M.pin_cands[preedit_no_spaces]
end
end
end
end
end
end
end
end
function M.func(input, env)
-- 当前输入框的 preedit未经过方案 translator/preedit_format 转换
-- 输入 nihaoshij 则为 nihaoshij选择了「你好」后变成 你好shij
local full_preedit = env.engine.context:get_preedit().text
-- 非汉字部分的 preedit如 shij
local letter_only_preedit = string.gsub(full_preedit, "[^a-zA-Z]", "")
-- 是否正在选词(已经选择了至少一个字词,如 `你好shij` 这种状态)
-- local isSelecting = full_preedit ~= letter_only_preedit
-- 当前输入框的 preedit未经过方案 translator/preedit_format 转换
-- 输入 nihaoshij 则为 nihaoshij选择了「你好」后变成 你好shij
local full_preedit = env.engine.context:get_preedit().text
-- 非汉字部分的 preedit如 shij
local letter_only_preedit = string.gsub(full_preedit, "[^a-zA-Z]", "")
-- 是否正在选词(已经选择了至少一个字词,如 `你好shij` 这种状态)
-- local isSelecting = full_preedit ~= letter_only_preedit
-- If there is no configuration, no filtering will be performed
if not M.pin_cands or #letter_only_preedit == 0 then
for cand in input:iter() do yield(cand) end
return
end
-- If there is no configuration, no filtering will be performed
if not M.pin_cands or #letter_only_preedit == 0 then
for cand in input:iter() do yield(cand) end
return
end
local pined = {} -- 提升的候选项
local others = {} -- 其余候选项
local pined_count = 0
local pined = {} -- 提升的候选项
local others = {} -- 其余候选项
local pined_count = 0
for cand in input:iter() do
local cand_preedit_no_spaces = cand.preedit:gsub(" ", "")
for cand in input:iter() do
local cand_preedit_no_spaces = cand.preedit:gsub(" ", "")
-- 无关的输入直接 break
if string.find(letter_only_preedit, "^" .. cand_preedit_no_spaces) == nil then
yield(cand)
break
end
-- 无关的输入直接 break
if string.find(letter_only_preedit, "^" .. cand_preedit_no_spaces) == nil then
yield(cand)
break
end
local texts = M.pin_cands[cand_preedit_no_spaces]
local texts = M.pin_cands[cand_preedit_no_spaces]
-- 跳过不需要处理的部分,对后续的候选项排序
if texts == nil then
yield(cand)
else
-- 给 pined 几个空字符串占位元素,后面直接 pined[idx] = cand 确保 pined 与 texts 顺序一致
if #pined < #texts then
for _ = 1, #texts do
table.insert(pined, '')
end
end
-- 处理简繁转换后的问题
local cand_text = cand.text
if cand:get_dynamic_type() == "Shadow" then
-- handle cands converted by simplifier
local originalCand = cand:get_genuine()
if #originalCand.text == #cand_text and not is_in_list({ "", "", "", "", "", "", "", "" }, cand.text) then
-- 笑|😄 cand_text = 😄; 麼|么 cand_text = 麼;
cand_text = originalCand.text
end
end
-- 要置顶的放到 pined 中,其余的放到 others
local ok, idx = is_in_list(texts, cand_text)
if ok then
pined[idx] = cand
pined_count = pined_count + 1
else
table.insert(others, cand)
end
-- 找齐了或者 others 太大了,就不找了,一般前 5 个就找完了
if pined_count == #texts or #others > 50 then
break
end
end
end
-- 跳过不需要处理的部分,对后续的候选项排序
if texts == nil then
yield(cand)
else
-- 给 pined 几个空字符串占位元素,后面直接 pined[idx] = cand 确保 pined 与 texts 顺序一致
if #pined < #texts then
for _ = 1, #texts do
table.insert(pined, '')
end
end
-- 处理简繁转换后的问题
local cand_text = cand.text
if cand:get_dynamic_type() == "Shadow" then
-- handle cands converted by simplifier
local originalCand = cand:get_genuine()
if #originalCand.text == #cand_text and not is_in_list({ "", "", "", "", "", "", "", "" }, cand.text) then
-- 笑|😄 cand_text = 😄; 麼|么 cand_text = 麼;
cand_text = originalCand.text
end
end
-- 要置顶的放到 pined 中,其余的放到 others
local ok, idx = is_in_list(texts, cand_text)
if ok then
pined[idx] = cand
pined_count = pined_count + 1
else
table.insert(others, cand)
end
-- 找齐了或者 others 太大了,就不找了,一般前 5 个就找完了
if pined_count == #texts or #others > 50 then
break
end
end
end
-- yield pined others 及后续的候选项
if pined_count > 0 then
-- 如果因配置写了这个编码没有的字词,导致没有找齐,删掉空字符串占位元素
local i = 1
while i <= #pined do
if pined[i] == '' then
table.remove(pined, i)
else
i = i + 1
end
end
for _, cand in ipairs(pined) do
yield(cand)
end
end
for _, cand in ipairs(others) do
yield(cand)
end
for cand in input:iter() do
yield(cand)
end
-- yield pined others 及后续的候选项
if pined_count > 0 then
-- 如果因配置写了这个编码没有的字词,导致没有找齐,删掉空字符串占位元素
local i = 1
while i <= #pined do
if pined[i] == '' then
table.remove(pined, i)
else
i = i + 1
end
end
for _, cand in ipairs(pined) do
yield(cand)
end
end
for _, cand in ipairs(others) do
yield(cand)
end
for cand in input:iter() do
yield(cand)
end
end
return M

View File

@ -95,7 +95,6 @@ function f.init(env)
-- f.update_dict_entry(edit, no_search_string)
end
end)
end
-- function f.update_dict_entry(s, code)
@ -233,7 +232,6 @@ function f.func(input, env)
yield(cand)
end
end
end
function f.fini(env)

View File

@ -3,23 +3,23 @@
-- 示例:输入 U62fc 得到「拼」
-- 触发前缀默认为 recognizer/patterns/unicode 的第 2 个字符,即 U
local function unicode(input, seg, env)
-- 获取 recognizer/patterns/unicode 的第 2 个字符作为触发前缀
env.unicode_keyword = env.unicode_keyword or
env.engine.schema.config:get_string('recognizer/patterns/unicode'):sub(2, 2)
if seg:has_tag("unicode") and env.unicode_keyword ~= '' and input:sub(1, 1) == env.unicode_keyword then
local ucodestr = input:match(env.unicode_keyword .. "(%x+)")
if ucodestr and #ucodestr > 1 then
local code = tonumber(ucodestr, 16)
local text = utf8.char(code)
yield(Candidate("unicode", seg.start, seg._end, text, string.format("U%x", code)))
if code < 0x10000 then
for i = 0, 15 do
local text = utf8.char(code * 16 + i)
yield(Candidate("unicode", seg.start, seg._end, text, string.format("U%x~%x", code, i)))
end
end
end
end
-- 获取 recognizer/patterns/unicode 的第 2 个字符作为触发前缀
env.unicode_keyword = env.unicode_keyword or
env.engine.schema.config:get_string('recognizer/patterns/unicode'):sub(2, 2)
if seg:has_tag("unicode") and env.unicode_keyword ~= '' and input:sub(1, 1) == env.unicode_keyword then
local ucodestr = input:match(env.unicode_keyword .. "(%x+)")
if ucodestr and #ucodestr > 1 then
local code = tonumber(ucodestr, 16)
local text = utf8.char(code)
yield(Candidate("unicode", seg.start, seg._end, text, string.format("U%x", code)))
if code < 0x10000 then
for i = 0, 15 do
local text = utf8.char(code * 16 + i)
yield(Candidate("unicode", seg.start, seg._end, text, string.format("U%x~%x", code, i)))
end
end
end
end
end
return unicode

View File

@ -4,7 +4,7 @@
-- 感谢改进 @[t123yh](https://github.com/t123yh) @[Shewer Lu](https://github.com/shewer)
local function v_filter(input, env)
local code = env.engine.context.input -- 当前编码
env.v_spec_arr = env.v_spec_arr or Set({"0", "1", "2", "3", "4", "5", "6", "7", "8", "9","Vs."})
env.v_spec_arr = env.v_spec_arr or Set({ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "Vs." })
-- 仅当当前输入以 v 开头,并且编码长度为 2才进行处理
if (string.len(code) == 2 and string.find(code, "^v")) then
local l = {}
@ -12,7 +12,7 @@ local function v_filter(input, env)
-- 特殊情况处理
if (env.v_spec_arr[cand.text]) then
yield(cand)
-- 候选项为单个字符的,提到前面来。
-- 候选项为单个字符的,提到前面来。
elseif (utf8.len(cand.text) == 1) then
yield(cand)
else

View File

@ -83,18 +83,18 @@ cold_word_drop_filter = require("cold_word_drop.filter")
-- 详情 https://github.com/hchunhui/librime-lua/issues/307
-- 这样也不会导致卡顿,那就每次都调用一下吧,内存稳稳的
function force_gc()
-- collectgarbage()
collectgarbage("step")
-- collectgarbage()
collectgarbage("step")
end
-- 临时用的
function debug_checker(input, env)
for cand in input:iter() do
yield(ShadowCandidate(
cand,
cand.type,
cand.text,
env.engine.context.input .. " - " .. env.engine.context:get_preedit().text .. " - " .. cand.preedit
))
end
for cand in input:iter() do
yield(ShadowCandidate(
cand,
cand.type,
cand.text,
env.engine.context.input .. " - " .. env.engine.context:get_preedit().text .. " - " .. cand.preedit
))
end
end