mirror of
https://github.com/iDvel/rime-ice.git
synced 2026-05-14 00:30:37 +08:00
lua: 简化和修复 cold_word_drop 逻辑 (#923)
* update cold_word_drop * Delete lua/cold_word_drop/debugtool.lua * Delete lua/cold_word_drop/turndown_freq_words.lua
This commit is contained in:
@@ -1,92 +0,0 @@
|
|||||||
#! /usr/bin/env lua
|
|
||||||
--
|
|
||||||
-- debugtool.lua
|
|
||||||
-- Copyright (C) 2021 Shewer Lu <shewer@gmail.com>
|
|
||||||
--
|
|
||||||
-- Distributed under terms of the MIT license.
|
|
||||||
--
|
|
||||||
-- puts(tag,...)
|
|
||||||
-- DEBUG --> log.error
|
|
||||||
-- WARN --> log.warning
|
|
||||||
-- INFO --> log.info
|
|
||||||
-- CONSOLE --> print
|
|
||||||
--
|
|
||||||
-- ex:
|
|
||||||
-- test.lua
|
|
||||||
--
|
|
||||||
-- local puts = require 'tools/debugtool'
|
|
||||||
-- --set tag D103 C102
|
|
||||||
-- local D103= DEBUG .. "103"
|
|
||||||
-- local C102= CONSOLE .. "102"
|
|
||||||
-- local C103= nil
|
|
||||||
--
|
|
||||||
--
|
|
||||||
-- puts(ERROR,__FILE__(),__LINE__(),__FUNC__(), 1, 2 , 3 )
|
|
||||||
-- --> log.error( "error" .. tran_msg(...))
|
|
||||||
--
|
|
||||||
-- puts(DEBUG,__FILE__(),__LINE__(),__FUNC__(), 1, 2 , 3 )
|
|
||||||
-- --> log.error( DEBUG .. tran_msg(...))
|
|
||||||
--
|
|
||||||
-- puts(D103,__FILE__(),__LINE__(),__FUNC__(), 1 2 3)
|
|
||||||
-- --> log.error("trace103" .. tran_msg(...)
|
|
||||||
--
|
|
||||||
-- puts(C102,__FILE__(),__LINE__(),__FUNC__(), 1 2 3)
|
|
||||||
-- --> print("console103" .. tran_msg(...)
|
|
||||||
--
|
|
||||||
-- puts(C103,__FILE__(),__LINE__(),__FUNC__(), 1 2 3)
|
|
||||||
-- --> pass
|
|
||||||
--
|
|
||||||
--
|
|
||||||
--
|
|
||||||
-- puts(DEBUG,__FILE__(),__LINE__(),__FUNC__() , ...)
|
|
||||||
-- puts(INFO,__FILE__(),__LINE__(),__FUNC__() , ...)
|
|
||||||
--
|
|
||||||
-- global variable
|
|
||||||
function __FILE__(n)
|
|
||||||
n = n or 2
|
|
||||||
return debug.getinfo(n, 'S').source
|
|
||||||
end
|
|
||||||
|
|
||||||
function __LINE__(n)
|
|
||||||
n = n or 2
|
|
||||||
return debug.getinfo(n, 'l').currentline
|
|
||||||
end
|
|
||||||
|
|
||||||
function __FUNC__(n)
|
|
||||||
n = n or 2
|
|
||||||
return debug.getinfo(n, 'n').name
|
|
||||||
end
|
|
||||||
|
|
||||||
INFO = "log"
|
|
||||||
WARN = "warn"
|
|
||||||
ERROR = "error"
|
|
||||||
DEBUG = "trace"
|
|
||||||
CONSOLE = "console"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
local function tran_msg(...)
|
|
||||||
local msg = "\t"
|
|
||||||
for i, k in next, { ... } do msg = msg .. ": " .. tostring(k) end
|
|
||||||
return msg
|
|
||||||
end
|
|
||||||
local function puts(tag, ...)
|
|
||||||
if type(tag) ~= "string" then return end
|
|
||||||
|
|
||||||
if INFO and tag:match("^" .. INFO) then
|
|
||||||
(log and log.info or print)(tag .. tran_msg(...))
|
|
||||||
elseif WARN and tag:match("^" .. WARN) then
|
|
||||||
(log and log.warning or print)(tag .. tran_msg(...))
|
|
||||||
elseif ERROR and tag:match("^" .. ERROR) then
|
|
||||||
(log and log.error or print)(tag .. tran_msg(...))
|
|
||||||
elseif DEBUG and tag:match("^" .. DEBUG) then
|
|
||||||
(log and log.error or print)(tag .. tran_msg(...))
|
|
||||||
elseif CONSOLE and tag:match("^" .. CONSOLE) then
|
|
||||||
(print)(tag .. tran_msg(...))
|
|
||||||
else
|
|
||||||
return
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return puts
|
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
local drop_words =
|
local drop_words =
|
||||||
{ "示~例~",
|
{ "示~例~", "肏女人", }
|
||||||
}
|
return drop_words
|
||||||
return drop_words
|
|
||||||
@@ -1,54 +1,60 @@
|
|||||||
local drop_list = require("cold_word_drop.drop_words")
|
local filter = {}
|
||||||
local hide_list = require("cold_word_drop.hide_words")
|
|
||||||
local turndown_freq_list = require("cold_word_drop.turndown_freq_words")
|
|
||||||
|
|
||||||
local function filter(input, env)
|
function filter.init(env)
|
||||||
local idx = 3 -- 降频的词条放到第三个后面, 即第四位, 可在 yaml 里配置
|
local engine = env.engine
|
||||||
local i = 1
|
local config = engine.schema.config
|
||||||
local cands = {}
|
env.word_reduce_idx = config:get_int("cold_word_reduce/idx") or 4
|
||||||
local context = env.engine.context
|
env.drop_words = require("cold_word_drop.drop_words") or {}
|
||||||
local preedit_code = context.input
|
env.hide_words = require("cold_word_drop.hide_words") or {}
|
||||||
|
env.reduce_freq_words = require("cold_word_drop.reduce_freq_words") or {}
|
||||||
|
end
|
||||||
|
|
||||||
for cand in input:iter() do
|
function filter.func(input, env)
|
||||||
local cpreedit_code = string.gsub(cand.preedit, ' ', '')
|
local cands = {}
|
||||||
if (i <= idx) then
|
local context = env.engine.context
|
||||||
local tfl = turndown_freq_list[cand.text] or nil
|
local preedit_str = context.input:gsub(" ", "")
|
||||||
-- 前三个 候选项排除 要调整词频的词条, 要删的(实际假性删词, 彻底隐藏罢了) 和要隐藏的词条
|
local drop_words = env.drop_words
|
||||||
if not
|
local hide_words = env.hide_words
|
||||||
((tfl and table.find_index(tfl, cpreedit_code)) or
|
local word_reduce_idx = env.word_reduce_idx
|
||||||
table.find_index(drop_list, cand.text) or
|
local reduce_freq_words = env.reduce_freq_words
|
||||||
(hide_list[cand.text] and table.find_index(hide_list[cand.text], cpreedit_code))
|
for cand in input:iter() do
|
||||||
)
|
local cand_text = cand.text:gsub(" ", "")
|
||||||
then
|
local preedit_code = cand.preedit:gsub(" ", "") or preedit_str
|
||||||
i = i + 1
|
|
||||||
---@diagnostic disable-next-line: undefined-global
|
local reduce_freq_list = reduce_freq_words[cand_text] or {}
|
||||||
yield(cand)
|
if word_reduce_idx > 1 then
|
||||||
else
|
-- 前三个 候选项排除 要调整词频的词条, 要删的(实际假性删词, 彻底隐藏罢了) 和要隐藏的词条
|
||||||
table.insert(cands, cand)
|
if reduce_freq_list and table.find_index(reduce_freq_list, preedit_code) then
|
||||||
end
|
table.insert(cands, cand)
|
||||||
else
|
elseif
|
||||||
table.insert(cands, cand)
|
not (
|
||||||
end
|
table.find_index(drop_words, cand_text)
|
||||||
if (#cands > 50) then
|
or (hide_words[cand_text] and table.find_index(hide_words[cand_text], preedit_code))
|
||||||
break
|
|
||||||
end
|
)
|
||||||
end
|
then
|
||||||
for _, cand in ipairs(cands) do
|
yield(cand)
|
||||||
local cpreedit_code = string.gsub(cand.preedit, ' ', '')
|
word_reduce_idx = word_reduce_idx - 1
|
||||||
if not
|
end
|
||||||
-- 要删的 和要隐藏的词条不显示
|
else
|
||||||
(
|
if
|
||||||
table.find_index(drop_list, cand.text) or
|
not (
|
||||||
(hide_list[cand.text] and table.find_index(hide_list[cand.text], cpreedit_code))
|
table.find_index(drop_words, cand_text)
|
||||||
)
|
or (hide_words[cand_text] and table.find_index(hide_words[cand_text], preedit_code))
|
||||||
then
|
)
|
||||||
---@diagnostic disable-next-line: undefined-global
|
then
|
||||||
yield(cand)
|
table.insert(cands, cand)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
for cand in input:iter() do
|
|
||||||
yield(cand)
|
if #cands >= 80 then
|
||||||
end
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
for _, cand in ipairs(cands) do
|
||||||
|
yield(cand)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return filter
|
return filter
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
local hide_words =
|
local hide_words =
|
||||||
{ ["示~例~"] = { "shil", "shili", },
|
{ ["示~例~"] = { "shil", "shili", },
|
||||||
|
["么特瑞"] = { "meter", },
|
||||||
}
|
}
|
||||||
return hide_words
|
return hide_words
|
||||||
48
lua/cold_word_drop/logger.lua
Normal file
48
lua/cold_word_drop/logger.lua
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
-- runLog.lua
|
||||||
|
-- Copyright (C) 2023 yaoyuan.dou <douyaoyuan@126.com>
|
||||||
|
|
||||||
|
local M = {}
|
||||||
|
local dbgFlg = true
|
||||||
|
|
||||||
|
--设置 dbg 开关
|
||||||
|
M.setDbg = function(flg)
|
||||||
|
dbgFlg = flg
|
||||||
|
|
||||||
|
print('runLog dbgFlg is ' .. tostring(dbgFlg))
|
||||||
|
end
|
||||||
|
|
||||||
|
local current_path = string.sub(debug.getinfo(1).source, 2, string.len("/runLog.lua") * -1)
|
||||||
|
M.logDoc = current_path .. 'runLog.txt'
|
||||||
|
|
||||||
|
M.writeLog = function(logStr, newLineFlg)
|
||||||
|
logStr = logStr or "nothing"
|
||||||
|
|
||||||
|
if not newLineFlg then newLineFlg = true end
|
||||||
|
|
||||||
|
local f = io.open(M.logDoc, 'a')
|
||||||
|
if f then
|
||||||
|
local timeStamp = os.date("%Y/%m/%d %H:%M:%S")
|
||||||
|
f:write(timeStamp .. '[' .. _VERSION .. ']' .. '\t' .. logStr .. '\n')
|
||||||
|
f:close()
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
--===========================test========================
|
||||||
|
M.test = function(printPrefix)
|
||||||
|
if nil == printPrefix then
|
||||||
|
printPrefix = ' '
|
||||||
|
end
|
||||||
|
if dbgFlg then
|
||||||
|
M.writeLog('this is a test string on new line', true)
|
||||||
|
M.writeLog('this is a test string appending the last line', false)
|
||||||
|
M.writeLog('runLogDoc is: ' .. M.logDoc, true)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function M.init(...)
|
||||||
|
--如果有需要初始化的动作,可以在这里运行
|
||||||
|
end
|
||||||
|
|
||||||
|
M.init()
|
||||||
|
|
||||||
|
return M
|
||||||
@@ -2,162 +2,167 @@
|
|||||||
orgtype = type
|
orgtype = type
|
||||||
|
|
||||||
function type(obj)
|
function type(obj)
|
||||||
local _type = orgtype(obj)
|
local _type = orgtype(obj)
|
||||||
if "table" == _type and obj._cname then
|
if "table" == _type and obj._cname then
|
||||||
return obj._cname
|
return obj._cname
|
||||||
end
|
end
|
||||||
return _type
|
return _type
|
||||||
end
|
end
|
||||||
|
|
||||||
function metatable(...)
|
function metatable(...)
|
||||||
if ... and type(...) == "table" then
|
if ... and type(...) == "table" then
|
||||||
return setmetatable(..., { __index = table })
|
return setmetatable(..., { __index = table })
|
||||||
else
|
else
|
||||||
return setmetatable({ ... }, { __index = table })
|
return setmetatable({ ... }, { __index = table })
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
-- chech metatble
|
-- chech metatble
|
||||||
function metatable_chk(tab)
|
function metatable_chk(tab)
|
||||||
if "table" == type(tab)
|
if "table" == type(tab) then
|
||||||
then
|
return (tab.each and tab) or metatable(tab)
|
||||||
return (tab.each and tab) or metatable(tab)
|
else
|
||||||
else
|
return tab
|
||||||
return tab
|
end
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
table.eachi = function(tab, func)
|
table.eachi = function(tab, func)
|
||||||
for i = 1, #tab do
|
for i = 1, #tab do
|
||||||
func(tab[i], i)
|
func(tab[i], i)
|
||||||
end
|
end
|
||||||
return tab
|
return tab
|
||||||
end
|
end
|
||||||
table.eacha = function(tab, func)
|
table.eacha = function(tab, func)
|
||||||
for i, v in ipairs(tab) do
|
for i, v in ipairs(tab) do
|
||||||
func(v, i)
|
func(v, i)
|
||||||
end
|
end
|
||||||
return tab
|
return tab
|
||||||
end
|
end
|
||||||
table.each = function(tab, func)
|
table.each = function(tab, func)
|
||||||
for k, v in pairs(tab) do
|
for k, v in pairs(tab) do
|
||||||
func(v, k)
|
func(v, k)
|
||||||
end
|
end
|
||||||
return tab
|
return tab
|
||||||
end
|
end
|
||||||
table.find_index = function(tab, elm, ...)
|
table.find_index = function(tab, elm, ...)
|
||||||
local _, i = table.find(tab, elm, ...)
|
local _, i = table.find(tab, elm, ...)
|
||||||
return i
|
return i
|
||||||
end
|
end
|
||||||
table.find = function(tab, elm, func)
|
table.find = function(tab, elm, func)
|
||||||
for i, v in ipairs(tab) do
|
for i, v in ipairs(tab) do
|
||||||
if elm == v then
|
if elm == v then
|
||||||
return v, i
|
return v, i
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
table.find_with_func = function(tab, elm, ...)
|
table.find_with_func = function(tab, elm, ...)
|
||||||
local i, v = table.find(tab, elm)
|
local i, v = table.find(tab, elm)
|
||||||
end
|
end
|
||||||
table.delete = function(tab, elm, ...)
|
table.delete = function(tab, elm, ...)
|
||||||
local index = table.find_index(tab, elm)
|
local index = table.find_index(tab, elm)
|
||||||
return index and table.remove(tab, index)
|
return index and table.remove(tab, index)
|
||||||
end
|
end
|
||||||
|
|
||||||
table.find_all = function(tab, elm, ...)
|
table.find_all = function(tab, elm, ...)
|
||||||
local tmptab = setmetatable({}, { __index = table })
|
local tmptab = setmetatable({}, { __index = table })
|
||||||
local _func = (type(elm) == "function" and elm) or function(v, k, ...) return v == elm end
|
local _func = (type(elm) == "function" and elm) or function(v, k, ...)
|
||||||
for k, v in pairs(tab) do
|
return v == elm
|
||||||
if _func(v, k, ...) then
|
end
|
||||||
tmptab:insert(v)
|
for k, v in pairs(tab) do
|
||||||
end
|
if _func(v, k, ...) then
|
||||||
end
|
tmptab:insert(v)
|
||||||
return tmptab
|
end
|
||||||
|
end
|
||||||
|
return tmptab
|
||||||
end
|
end
|
||||||
table.select = table.find_all
|
table.select = table.find_all
|
||||||
|
|
||||||
table.reduce = function(tab, func, arg)
|
table.reduce = function(tab, func, arg)
|
||||||
local new, old = arg, arg
|
local new, old = arg, arg
|
||||||
for i, v in ipairs(tab) do
|
for i, v in ipairs(tab) do
|
||||||
new, old = func(v, new)
|
new, old = func(v, new)
|
||||||
end
|
end
|
||||||
return new, arg
|
return new, arg
|
||||||
end
|
end
|
||||||
|
|
||||||
table.map = function(tab, func)
|
table.map = function(tab, func)
|
||||||
local newtab = setmetatable({}, { __index = table })
|
local newtab = setmetatable({}, { __index = table })
|
||||||
func = func or function(v, i) return v, i end
|
func = func or function(v, i)
|
||||||
for i, v in ipairs(tab) do
|
return v, i
|
||||||
newtab[i] = func(v, i)
|
end
|
||||||
end
|
for i, v in ipairs(tab) do
|
||||||
return newtab
|
newtab[i] = func(v, i)
|
||||||
|
end
|
||||||
|
return newtab
|
||||||
end
|
end
|
||||||
table.map_hash = function(tab, func) -- table to list of array { key, v}
|
table.map_hash = function(tab, func) -- table to list of array { key, v}
|
||||||
local newtab = setmetatable({}, { __index = table })
|
local newtab = setmetatable({}, { __index = table })
|
||||||
func = func or function(k, v) return { k, v } end
|
func = func or function(k, v)
|
||||||
for k, v in pairs(tab) do
|
return { k, v }
|
||||||
newtab:insert(func(k, v))
|
end
|
||||||
end
|
for k, v in pairs(tab) do
|
||||||
return newtab
|
newtab:insert(func(k, v))
|
||||||
|
end
|
||||||
|
return newtab
|
||||||
end
|
end
|
||||||
function table:push(elm)
|
function table:push(elm)
|
||||||
self:insert(elm)
|
self:insert(elm)
|
||||||
end
|
end
|
||||||
|
|
||||||
table.append = table.push
|
table.append = table.push
|
||||||
function table:pop()
|
function table:pop()
|
||||||
return self:remove(#self)
|
return self:remove(#self)
|
||||||
end
|
end
|
||||||
|
|
||||||
function table:shift()
|
function table:shift()
|
||||||
self:remove(1)
|
self:remove(1)
|
||||||
end
|
end
|
||||||
|
|
||||||
function table:unshift(elm)
|
function table:unshift(elm)
|
||||||
self:insert(1, elm)
|
self:insert(1, elm)
|
||||||
end
|
end
|
||||||
|
|
||||||
function table.len(t)
|
function table.len(t)
|
||||||
local leng = 0
|
local leng = 0
|
||||||
for k, v in pairs(t) do
|
for k, v in pairs(t) do
|
||||||
leng = leng + 1
|
leng = leng + 1
|
||||||
end
|
end
|
||||||
return leng;
|
return leng
|
||||||
end
|
end
|
||||||
|
|
||||||
-- table to string 序列化
|
-- table to string 序列化
|
||||||
function table.serialize(obj)
|
function table.serialize(obj)
|
||||||
local serialize_str = ""
|
local serialize_str = ""
|
||||||
local t = type(obj)
|
local t = type(obj)
|
||||||
if t == "number" then
|
if t == "number" then
|
||||||
serialize_str = serialize_str .. obj
|
serialize_str = serialize_str .. obj
|
||||||
elseif t == "boolean" then
|
elseif t == "boolean" then
|
||||||
serialize_str = serialize_str .. tostring(obj)
|
serialize_str = serialize_str .. tostring(obj)
|
||||||
elseif t == "string" then
|
elseif t == "string" then
|
||||||
serialize_str = serialize_str .. string.format("%q", obj)
|
serialize_str = serialize_str .. string.format("%q", obj)
|
||||||
elseif t == "table" then
|
elseif t == "table" then
|
||||||
serialize_str = serialize_str .. "{ "
|
serialize_str = serialize_str .. "{ "
|
||||||
local record_sep = #obj < 4 and ", " or ",\n"
|
local record_sep = #obj < 4 and ", " or ",\n"
|
||||||
local record_prefix = #obj < 4 and "" or "\t"
|
local record_prefix = #obj < 4 and "" or "\t"
|
||||||
for k, v in pairs(obj) do
|
for k, v in pairs(obj) do
|
||||||
if type(k) == "number" then
|
if type(k) == "number" then
|
||||||
serialize_str = serialize_str .. record_prefix .. '"' .. v .. '"' .. record_sep
|
serialize_str = serialize_str .. record_prefix .. '"' .. v .. '"' .. record_sep
|
||||||
else
|
else
|
||||||
serialize_str = serialize_str .. "\t[" .. table.serialize(k) .. "] = " .. table.serialize(v) .. ",\n"
|
serialize_str = serialize_str .. "\t[" .. table.serialize(k) .. "] = " .. table.serialize(v) .. ",\n"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
-- local metatable = getmetatable(obj)
|
-- local metatable = getmetatable(obj)
|
||||||
-- if metatable ~= nil and type(metatable.__index) == "table" then
|
-- if metatable ~= nil and type(metatable.__index) == "table" then
|
||||||
-- for k, v in pairs(metatable.__index) do
|
-- for k, v in pairs(metatable.__index) do
|
||||||
-- serialize_str = serialize_str .. "[" .. table.serialize(k) .. "]=" .. table.serialize(v) .. ",\n"
|
-- serialize_str = serialize_str .. "[" .. table.serialize(k) .. "]=" .. table.serialize(v) .. ",\n"
|
||||||
-- end
|
-- end
|
||||||
-- end
|
-- end
|
||||||
serialize_str = serialize_str .. "}"
|
serialize_str = serialize_str .. "}"
|
||||||
elseif t == "nil" then
|
elseif t == "nil" then
|
||||||
return nil
|
return nil
|
||||||
else
|
else
|
||||||
error("can not serialize a " .. t .. " type.")
|
error("can not serialize a " .. t .. " type.")
|
||||||
end
|
end
|
||||||
return serialize_str
|
return serialize_str
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -1,151 +1,120 @@
|
|||||||
require('cold_word_drop.string')
|
require("cold_word_drop.string")
|
||||||
require("cold_word_drop.metatable")
|
require("cold_word_drop.metatable")
|
||||||
-- local puts = require("tools/debugtool")
|
local processor = {}
|
||||||
local drop_list = require("cold_word_drop.drop_words")
|
|
||||||
local hide_list = require("cold_word_drop.hide_words")
|
|
||||||
local turndown_freq_list = require("cold_word_drop.turndown_freq_words")
|
|
||||||
local tbls = {
|
|
||||||
['drop_list'] = drop_list,
|
|
||||||
['hide_list'] = hide_list,
|
|
||||||
['turndown_freq_list'] = turndown_freq_list
|
|
||||||
}
|
|
||||||
-- local cold_word_drop = {}
|
|
||||||
|
|
||||||
|
|
||||||
local function get_record_filername(record_type)
|
local function get_record_filername(record_type)
|
||||||
local user_distribute_name = rime_api:get_distribution_name()
|
local user_distribute_name = rime_api:get_distribution_code_name()
|
||||||
if user_distribute_name == '小狼毫' then
|
if user_distribute_name:lower():match("weasel") then
|
||||||
return string.format("%s\\Rime\\lua\\cold_word_drop\\%s_words.lua", os.getenv("APPDATA"), record_type)
|
return string.format("%s\\lua\\cold_word_drop\\%s_words.lua", rime_api:get_user_data_dir(), record_type)
|
||||||
end
|
elseif user_distribute_name:lower():match("squirrel") then
|
||||||
|
return string.format("%s/lua/cold_word_drop/%s_words.lua", rime_api:get_user_data_dir(), record_type)
|
||||||
local system = io.popen("uname -s"):read("*l")
|
elseif user_distribute_name:lower():match("fcitx") then
|
||||||
local filename = nil
|
return string.format("%s/lua/cold_word_drop/%s_words.lua", rime_api:get_user_data_dir(), record_type)
|
||||||
-- body
|
elseif user_distribute_name:lower():match("ibus") then
|
||||||
if system == "Darwin" then
|
return string.format(
|
||||||
filename = string.format("%s/Library/Rime/lua/cold_word_drop/%s_words.lua", os.getenv('HOME'), record_type)
|
"%s/rime/lua/cold_word_drop/%s_words.lua",
|
||||||
elseif system == "Linux" then
|
os.getenv("HOME") .. "/.config/ibus",
|
||||||
filename = string.format("%s/%s/rime/lua/cold_word_drop/%s_words.lua",
|
record_type
|
||||||
os.getenv('HOME'),
|
)
|
||||||
(string.find(os.getenv('GTK_IM_MODULE'), 'fcitx') and '.local/share/fcitx5' or '.config/ibus'),
|
end
|
||||||
record_type)
|
|
||||||
end
|
|
||||||
return filename
|
|
||||||
end
|
end
|
||||||
|
|
||||||
local function write_word_to_file(record_type)
|
local function write_word_to_file(env, record_type)
|
||||||
-- local filename = string.format("%s/Library/Rime/lua/cold_word_drop/%s_words.lua", os.getenv('HOME'), record_type)
|
local filename = get_record_filername(record_type)
|
||||||
local filename = get_record_filername(record_type)
|
local record_header = string.format("local %s_words =\n", record_type)
|
||||||
local record_header = string.format("local %s_words =\n", record_type)
|
local record_tailer = string.format("\nreturn %s_words", record_type)
|
||||||
local record_tailer = string.format("\nreturn %s_words", record_type)
|
if not filename then
|
||||||
local fd = assert(io.open(filename, "w")) --打开
|
return false
|
||||||
fd:setvbuf("line")
|
end
|
||||||
fd:write(record_header) --写入文件头部
|
local fd = assert(io.open(filename, "w")) --打开
|
||||||
-- df:flush() --刷新
|
-- fd:flush() --刷新
|
||||||
local x = string.format("%s_list", record_type)
|
local x = string.format("%s_list", record_type)
|
||||||
local record = table.serialize(tbls[x]) -- lua 的 table 对象 序列化为字符串
|
local record = table.serialize(env.tbls[x]) -- lua 的 table 对象 序列化为字符串
|
||||||
fd:write(record) --写入 序列化的字符串
|
fd:setvbuf("line")
|
||||||
fd:write(record_tailer) --写入文件尾部, 结束记录
|
fd:write(record_header) --写入文件头部
|
||||||
fd:close() --关闭
|
fd:write(record) --写入 序列化的字符串
|
||||||
|
fd:write(record_tailer) --写入文件尾部, 结束记录
|
||||||
|
fd:close() --关闭
|
||||||
end
|
end
|
||||||
|
|
||||||
local function check_encode_matched(cand_code, word, input_code_tbl, reversedb)
|
local function append_word_to_droplist(env, ctx, action_type)
|
||||||
if #cand_code < 1 and utf8.len(word) > 1 then -- 二字词以上的词条反查, 需要逐个字去反查
|
local word = ctx.word:gsub(" ", "")
|
||||||
local word_cand_code = string.split(word, "")
|
local input_code = ctx.code:gsub(" ", "")
|
||||||
for i, v in ipairs(word_cand_code) do
|
|
||||||
-- 如有 `[` 引导的辅助码情况, 去掉引导符及之后的所有形码字符
|
if action_type == "drop" then
|
||||||
local char_code = string.gsub(reversedb:lookup(v), '%[%l%l', '')
|
table.insert(env.drop_words, word) -- 高亮选中的词条插入到 drop_list
|
||||||
local _char_preedit_code = input_code_tbl[i] or " "
|
return true
|
||||||
-- 如有 `[` 引导的辅助码情况, 同上, 去掉之
|
end
|
||||||
local char_preedit_code = string.gsub(_char_preedit_code, '%[%l+', '')
|
|
||||||
if not string.match(char_code, char_preedit_code) then
|
if action_type == "hide" then
|
||||||
-- 输入编码串和词条反查结果不匹配(考虑到多音字, 开启了模糊音, 纠错音), 返回false, 表示隐藏这个词条
|
if not env.hide_words[word] then
|
||||||
return false
|
env.hide_words[word] = { input_code }
|
||||||
end
|
-- 隐藏的词条如果已经在 hide_list 中, 则将输入串追加到 值表中, 如: ['藏'] = {'chang', 'zhang'}
|
||||||
end
|
elseif not table.find_index(env.hide_words[word], input_code) then
|
||||||
end
|
table.insert(env.hide_words[word], input_code)
|
||||||
-- 输入编码串和词条反查结果匹配, 返回true, 表示对这个词条降频
|
end
|
||||||
return true
|
return true
|
||||||
|
end
|
||||||
|
|
||||||
|
if action_type == "reduce_freq" then
|
||||||
|
if env.reduce_freq_words[word] then
|
||||||
|
table.insert(env.reduce_freq_words[word], input_code)
|
||||||
|
else
|
||||||
|
env.reduce_freq_words[word] = { input_code }
|
||||||
|
end
|
||||||
|
return true
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
local function append_word_to_droplist(ctx, action_type, reversedb)
|
function processor.init(env)
|
||||||
local word = ctx.word
|
local engine = env.engine
|
||||||
local input_code = ctx.code
|
local config = engine.schema.config
|
||||||
if action_type == 'drop' then
|
env.drop_cand_key = config:get_string("key_binder/drop_cand") or "Control+d"
|
||||||
table.insert(drop_list, word) -- 高亮选中的词条插入到 drop_list
|
env.hide_cand_key = config:get_string("key_binder/hide_cand") or "Control+x"
|
||||||
return true
|
env.reduce_cand_key = config:get_string("key_binder/reduce_freq_cand") or "Control+j"
|
||||||
end
|
env.drop_words = require("cold_word_drop.drop_words") or {}
|
||||||
local input_code_tbl = string.split(input_code, " ")
|
env.hide_words = require("cold_word_drop.hide_words") or {}
|
||||||
local cand_code = reversedb:lookup(word) or "" -- 反查候选项文字编码
|
env.reduce_freq_words = require("cold_word_drop.reduce_freq_words") or {}
|
||||||
-- 二字词 的匹配检查, 匹配返回true, 不匹配返回false
|
env.tbls = {
|
||||||
local match_result = check_encode_matched(cand_code, word, input_code_tbl, reversedb)
|
["drop_list"] = env.drop_words,
|
||||||
local ccand_code = string.gsub(cand_code, '%[%l%l', '')
|
["hide_list"] = env.hide_words,
|
||||||
-- 如有 `[` 引导的辅助码情况, 去掉引导符及之后的所有形码字符
|
["reduce_freq_list"] = env.reduce_freq_words,
|
||||||
local input_str = string.gsub(input_code, '%[%l+', '')
|
}
|
||||||
local input_code_str = table.concat(input_code_tbl, '')
|
|
||||||
-- 单字和二字词 的匹配检查, 如果匹配, 降频
|
|
||||||
if string.match(ccand_code, input_str) or match_result then
|
|
||||||
if turndown_freq_list[word] then
|
|
||||||
table.insert(turndown_freq_list[word], input_code_str)
|
|
||||||
else
|
|
||||||
turndown_freq_list[word] = { input_code_str }
|
|
||||||
end
|
|
||||||
return 'turndown_freq'
|
|
||||||
end
|
|
||||||
|
|
||||||
-- 单字和二字词 如果不匹配 就隐藏
|
|
||||||
if not hide_list[word] then
|
|
||||||
hide_list[word] = { input_code_str }
|
|
||||||
return true
|
|
||||||
else
|
|
||||||
-- 隐藏的词条如果已经在 hide_list 中, 则将输入串追加到 值表中, 如: ['藏'] = {'chang', 'zhang'}
|
|
||||||
if not table.find_index(hide_list[word], input_code_str) then
|
|
||||||
table.insert(hide_list[word], input_code_str)
|
|
||||||
return true
|
|
||||||
else
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
local function processor(key, env)
|
function processor.func(key, env)
|
||||||
local engine = env.engine
|
local engine = env.engine
|
||||||
local config = engine.schema.config
|
local context = engine.context
|
||||||
local context = engine.context
|
local preedit_code = context:get_script_text()
|
||||||
-- local top_cand_text = context:get_commit_text()
|
local action_map = {
|
||||||
-- local preedit_code = context.input
|
[env.drop_cand_key] = "drop",
|
||||||
local preedit_code = context:get_script_text()
|
[env.hide_cand_key] = "hide",
|
||||||
local turndown_cand_key = config:get_string("key_binder/turn_down_cand") or "Control+j"
|
[env.reduce_cand_key] = "reduce_freq",
|
||||||
local drop_cand_key = config:get_string("key_binder/drop_cand") or "Control+d"
|
}
|
||||||
local action_map = {
|
|
||||||
[turndown_cand_key] = 'hide',
|
|
||||||
[drop_cand_key] = 'drop'
|
|
||||||
}
|
|
||||||
|
|
||||||
-- local schema_id = config:get_string("schema/schema_id")
|
if context:has_menu() and action_map[key:repr()] then
|
||||||
local schema_id = config:get_string("translator/dictionary") -- 多方案共用字典取主方案名称
|
local cand = context:get_selected_candidate()
|
||||||
---@diagnostic disable-next-line: undefined-global
|
local action_type = action_map[key:repr()]
|
||||||
local reversedb = ReverseLookup(schema_id)
|
local ctx_map = {
|
||||||
if key:repr() == turndown_cand_key or key:repr() == drop_cand_key then
|
["word"] = cand.text,
|
||||||
local cand = context:get_selected_candidate()
|
["code"] = preedit_code,
|
||||||
local action_type = action_map[key:repr()]
|
}
|
||||||
local ctx_map = {
|
local res = append_word_to_droplist(env, ctx_map, action_type)
|
||||||
['word'] = cand.text,
|
|
||||||
['code'] = preedit_code
|
|
||||||
}
|
|
||||||
local res = append_word_to_droplist(ctx_map, action_type, reversedb)
|
|
||||||
|
|
||||||
context:refresh_non_confirmed_composition() -- 刷新当前输入法候选菜单, 实现看到实时效果
|
context:refresh_non_confirmed_composition() -- 刷新当前输入法候选菜单, 实现看到实时效果
|
||||||
if type(res) == "boolean" then
|
if not res then
|
||||||
-- 期望被删的词和隐藏的词条写入文件(drop_words.lua, hide_words.lua)
|
return 2
|
||||||
write_word_to_file(action_type)
|
end
|
||||||
else
|
|
||||||
-- 期望 要调整词频的词条写入 turndown_freq_words.lua 文件
|
|
||||||
write_word_to_file(res)
|
|
||||||
end
|
|
||||||
return 1 -- kAccept
|
|
||||||
end
|
|
||||||
|
|
||||||
return 2 -- kNoop, 不做任何操作, 交给下个组件处理
|
if res then
|
||||||
|
-- 期望被删的词和隐藏的词条写入文件(drop_words.lua, hide_words.lua)
|
||||||
|
write_word_to_file(env, action_type)
|
||||||
|
end
|
||||||
|
|
||||||
|
return 1 -- kAccept
|
||||||
|
end
|
||||||
|
|
||||||
|
return 2 -- kNoop, 不做任何操作, 交给下个组件处理
|
||||||
end
|
end
|
||||||
|
|
||||||
return processor
|
return processor
|
||||||
|
|||||||
5
lua/cold_word_drop/reduce_freq_words.lua
Normal file
5
lua/cold_word_drop/reduce_freq_words.lua
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
local reduce_freq_words =
|
||||||
|
{ ["示~例~"] = { "shili", },
|
||||||
|
["颜射"] = { "yanshe", },
|
||||||
|
}
|
||||||
|
return reduce_freq_words
|
||||||
@@ -4,35 +4,34 @@
|
|||||||
-- string.utf8_offset= utf8.offset
|
-- string.utf8_offset= utf8.offset
|
||||||
-- string.utf8_sub= utf8.sub
|
-- string.utf8_sub= utf8.sub
|
||||||
function string.split(str, sp, sp1)
|
function string.split(str, sp, sp1)
|
||||||
sp = type(sp) == "string" and sp or " "
|
sp = type(sp) == "string" and sp or " "
|
||||||
if #sp == 0 then
|
if #sp == 0 then
|
||||||
sp = "([%z\1-\127\194-\244][\128-\191]*)"
|
sp = "([%z\1-\127\194-\244][\128-\191]*)"
|
||||||
elseif #sp == 1 then
|
elseif #sp == 1 then
|
||||||
sp = "[^" .. (sp == "%" and "%%" or sp) .. "]*"
|
sp = "[^" .. (sp == "%" and "%%" or sp) .. "]*"
|
||||||
else
|
else
|
||||||
sp1 = sp1 or "^"
|
sp1 = sp1 or "^"
|
||||||
str = str:gsub(sp, sp1)
|
str = str:gsub(sp, sp1)
|
||||||
sp = "[^" .. sp1 .. "]*"
|
sp = "[^" .. sp1 .. "]*"
|
||||||
end
|
end
|
||||||
|
|
||||||
local tab = {}
|
local tab = {}
|
||||||
for v in str:gmatch(sp) do
|
for v in str:gmatch(sp) do
|
||||||
table.insert(tab, v)
|
table.insert(tab, v)
|
||||||
end
|
end
|
||||||
return tab
|
return tab
|
||||||
end
|
end
|
||||||
|
|
||||||
function utf8.gsub(str, si, ei)
|
function utf8.gsub(str, si, ei)
|
||||||
local function index(ustr, i)
|
local function index(ustr, i)
|
||||||
return i >= 0 and (ustr:utf8_offset(i) or ustr:len() + 1)
|
return i >= 0 and (ustr:utf8_offset(i) or ustr:len() + 1) or (ustr:utf8_offset(i) or 1)
|
||||||
or (ustr:utf8_offset(i) or 1)
|
end
|
||||||
end
|
|
||||||
|
|
||||||
local u_si = index(str, si)
|
local u_si = index(str, si)
|
||||||
ei = ei or str:utf8_len()
|
ei = ei or str:utf8_len()
|
||||||
ei = ei >= 0 and ei + 1 or ei
|
ei = ei >= 0 and ei + 1 or ei
|
||||||
local u_ei = index(str, ei) - 1
|
local u_ei = index(str, ei) - 1
|
||||||
return str:sub(u_si, u_ei)
|
return str:sub(u_si, u_ei)
|
||||||
end
|
end
|
||||||
|
|
||||||
string.utf8_len = utf8.len
|
string.utf8_len = utf8.len
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
local turndown_freq_words =
|
|
||||||
{ ["示~例~"] = { "shili", },
|
|
||||||
}
|
|
||||||
return turndown_freq_words
|
|
||||||
Reference in New Issue
Block a user