rime-ice/double_pinyin_sogou.schema.yaml

# Rime schema
# encoding: utf-8

# 复制自 rime_ice.schema.yaml ，适配了双拼。
# 双拼键位映射复制自 https://github.com/rime/rime-double-pinyin
# 双拼之间主要就是 speller/algebra 和 translator/preedit_format 这里有区别。
#
# 全拼的自定义文本和双拼有些冲突，改成了 custom_phrase_double ，需要创建 custom_phrase_double.txt 文件。
#
# 目前配置中有个别功能仍然是全拼拼写，不能自动适配，所以需要手动修改一下：
# - 英文中部分符号的派生规则：在英文方案文件 melt_eng.schema.yaml > speller > algebra 修改为对应的双拼拼写运算
# - 部件拆字的拼写规则：在部件拆字方案 radical_pinyin.schema.yaml > speller > algebra 修改为对应的双拼拼写运算
# 打个补丁一劳永逸，补丁写法参考 [常见问题](https://github.com/iDvel/rime-ice/issues/133)


# 方案说明
schema:
  schema_id: double_pinyin_sogou
  name: 搜狗双拼
  version: "1"
  author:
    - Dvel
  description: |
    雾凇拼音 - 搜狗双拼
    https://github.com/iDvel/rime-ice
  dependencies:
    - melt_eng        # 英文输入，作为次翻译器挂载到拼音方案
    - radical_pinyin  # 部件拆字，反查及辅码


# 开关
# reset: 默认状态。注释掉后，切换窗口时不会重置到默认状态。
# states: 方案选单显示的名称。可以注释掉，仍可以通过快捷键切换。
# abbrev: 默认的缩写取 states 的第一个字符，abbrev 可自定义一个字符
switches:
  - name: ascii_mode
    states: [ 中, Ａ ]
  - name: ascii_punct  # 中英标点
    states: [ ¥, $ ]
  - name: traditionalization
    states: [ 简, 繁 ]
  - name: emoji
    states: [ 💀, 😄 ]
    reset: 1
  - name: full_shape
    states: [ 半角, 全角 ]
  - name: search_single_char  # search.lua 的功能开关，辅码查词时是否单字优先
    abbrev: [词, 单]
    states: [正常, 单字]


# 输入引擎
engine:
  processors:
    - lua_processor@select_character          # 以词定字
    - ascii_composer
    - recognizer
    - key_binder
    - speller
    - punctuator
    - selector
    - navigator
    - express_editor
  segmentors:
    - ascii_segmentor
    - matcher
    - abc_segmentor
    - affix_segmentor@radical_lookup  # 部件拆字自定义 tag
    - punct_segmentor
    - fallback_segmentor
  translators:
    - punct_translator
    - script_translator
    - lua_translator@date_translator    # 时间、日期、星期
    - lua_translator@lunar              # 农历
    - table_translator@custom_phrase    # 自定义短语 custom_phrase_double.txt
    - table_translator@cn_en            # 中英混合词汇
    - table_translator@melt_eng         # 英文输入
    - table_translator@radical_lookup   # 部件拆字反查
    - lua_translator@unicode            # Unicode
    - lua_translator@number_translator  # 数字、金额大写
    - lua_translator@force_gc           # 暴力 GC
  filters:
    - lua_filter@corrector                          # 错音错字提示
    - reverse_lookup_filter@radical_reverse_lookup  # 部件拆字滤镜
    - lua_filter@autocap_filter                     # 英文自动大写
    - lua_filter@pin_cand_filter                    # 置顶候选项（顺序要求：置顶候选项 > Emoji > 简繁切换）
    - lua_filter@reduce_english_filter              # 降低部分英语单词在候选项的位置
    - simplifier@emoji                              # Emoji
    - simplifier@traditionalize                     # 简繁切换
    - lua_filter@search@radical_pinyin              # 部件拆字辅码
    - uniquifier                                    # 去重


# Lua 配置: 日期、时间、星期、ISO 8601、时间戳的触发关键字
date_translator:
  date: date            # 日期： 2022-11-29
  time: time            # 时间： 18:13
  week: week            # 星期： 星期二
  datetime: datetime    # ISO 8601： 2022-11-29T18:13:11+08:00
  timestamp: timestamp  # 时间戳： 1669716794


# Lua 配置：农历的触发关键字
lunar: lunar  # 农历： 二〇二三年冬月二十 癸卯年（兔）冬月二十


# Lua 配置：为 corrector 格式化 comment，占位符为 {comment}
# 默认 "{comment}" 输入 hun dun 时会在「馄饨」旁边生成 hún tun 的 comment
# 例如左右加个括号 "({comment})" 就会变成 (hún tun)
corrector: "{comment}"


# Lua 配置: 降低部分英语单词在候选项的位置。
# 详细介绍 https://dvel.me/posts/make-rime-en-better/#短单词置顶的问题
# 正常情况： 输入 rug 得到 「1.rug 2.如果 …… 」
# 降低之后： 输入 rug 得到 「1.如果 2.rug …… 」
# 几种模式：
# all     降低脚本内置的单词（所有 3~4 位长度、前 2~3 位是完整拼音、最后一位是声母），words 作为自定义的额外补充
# custom  完全自定义，只降低 words 里的
# none    不降低任何单词，相当于没有启用这个 Lua
# （匹配的是编码，不是单词）
reduce_english_filter:
  mode: custom  # all | custom | none
  idx: 2        # 降低到第 idx 个位置
  # 自定义的单词列表，示例列表没有降低部分常用单词，如 and cat mail Mac but bad shit ……
  words: [
    aid, ann,
    bail, bait, bam, band, bans, bat, bay, bend, bent, benz, bib, bid, bien, biz, boc, bop, bos, bud, buf, bach, bench, bush,
    cab, cad, cain, cam, cans, cap, cef, chad, chan, chap, chef, cher, chew, chic, chin, chip, chit, coup, cum, cunt, cur, couch,
    dab, dag, dal, dam, dent, dew, dial, diet, dim, din, dip, dis, dit, doug, dub, dug, dunn,
    fab, fax, fob, fog, foul, fur,
    gag, gail, gain, gal, gam, gaol, ged, gel, ger, guam, gus, gut,
    hail, ham, hank, hans, hat, hay, heil, heir, hem, hep, hud, hum, hung, hunk, hut, hush,
    jim, jug,
    kat,
    lab, lad, lag, laid, lam, laos, lap, lat, lax, lay, led, leg, lex, liam, lib, lid, lied, lien, lies, linn, lip, lit, liz, lob, lug, lund, lung, lux, lash, loch, lush,
    mag, maid, mann, mar, mat, med, mel, mend, mens, ment, mil, mins, mint, mob, moc, mop, mos, mot, mud, mug, mum, mesh,
    nap, nat, nay, neil, nib, nip, noun, nous, nun, nut, nail, nash,
    pac, paid, pail, pain, pair, pak, pal, pam, pans, pant, pap, par, pat, paw, pax, pens, pic, pier, pies, pins, pint, pit, pix, pod, pop, pos, pot, pour, pow, pub, pinch, pouch,
    rand, rant, rent, rep, res, ret, rex, rib, rid, rig, rim, rub, rug, rum, runc, runs, ranch,
    sac, sail, sal, sam, sans, sap, saw, sax, sew, sham, shaw, shin, sig, sin, sip, sis, suit, sung, suns, sup, sur, sus,
    tad, tail, taj, tar, tax, tec, ted, tel, ter, tex, tic, tied, tier, ties, tim, tin, tit, tour, tout, tum,
    wag, wand, womens, wap, wax, weir, won,
    yan, yen,
    zach
  ]


# Lua 配置: 置顶候选项
# 注释太长了，请参考 pin_cand_filter.lua 开头的说明书。
pin_cand_filter:
  # 格式：编码<Tab>字词1<Space>字词2……
  - d	的


# 主翻译器，拼音
translator:
  dictionary: rime_ice          # 挂载词库 rime_ice.dict.yaml
  enable_word_completion: true  # 大于 4 音节的词条自动补全，librime > 1.11.2
  prism: double_pinyin_sogou    # 多方案共用一个词库时，为避免冲突，需要用 prism 指定一个名字。
  spelling_hints: 8             # corrector.lua ：为了让错音错字提示的 Lua 同时适配全拼双拼，将拼音显示在 comment 中
  always_show_comments: true    # corrector.lua ：Rime 默认在 preedit 等于 comment 时取消显示 comment，这里强制一直显示，供 corrector.lua 做判断用。
  initial_quality: 1.2          # 拼音的权重应该比英文大
  comment_format:               # 标记拼音注释，供 corrector.lua 做判断用
    - xform/^/［/
    - xform/$/］/
  preedit_format:               # preedit_format 影响到输入框的显示和“Shift+回车”上屏的字符
    - xform/([aoe])(\w)/0$2/
    - xform/([bpmnljqxy])n/$1in/
    - xform/(\w)g/$1eng/
    - xform/(\w)q/$1iu/
    - xform/([gkhvuirzcs])w/$1ua/
    - xform/(\w)w/$1ia/
    - xform/([dtnlgkhjqxyvuirzcs])r/$1uan/
    - xform/0r/er/
    - xform/([dtgkhvuirzcs])v/$1ui/
    - xform/(\w)v/$1ve/
    - xform/(\w)t/$1ve/
    - xform/([gkhvuirzcs])y/$1uai/
    - xform/(\w)y/$1v/
    - xform/([dtnlgkhvuirzcs])o/$1uo/
    - xform/(\w)p/$1un/
    - xform/([jqx])s/$1iong/
    - xform/(\w)s/$1ong/
    - xform/([jqxnlb])d/$1iang/
    - xform/(\w)d/$1uang/
    - xform/(\w)f/$1en/
    - xform/(\w)h/$1ang/
    - xform/(\w)j/$1an/
    - xform/(\w)k/$1ao/
    - xform/(\w)l/$1ai/
    - xform/(\w)z/$1ei/
    - xform/(\w)x/$1ie/
    - xform/(\w)c/$1iao/
    - xform/(\w)b/$1ou/
    - xform/(\w)m/$1ian/
    - xform/(\w);/$1ing/
    - xform/0(\w)/$1/
    - "xform/(^|[ '])v/$1zh/"
    - "xform/(^|[ '])i/$1ch/"
    - "xform/(^|[ '])u/$1sh/"
    - xform/([jqxy])v/$1u/
    - xform/([nl])v/$1ü/
    - xform/ü/v/  # ü 显示为 v


# 次翻译器，英文
melt_eng:
  dictionary: melt_eng     # 挂载词库 melt_eng.dict.yaml
  enable_sentence: false   # 禁止造句
  enable_user_dict: false  # 禁用用户词典
  initial_quality: 1.1     # 初始权重
  comment_format:          # 自定义提示码
    - xform/.*//           # 清空提示码


# 中英混合词汇
cn_en:
  dictionary: ""
  user_dict: en_dicts/cn_en_sogou
  db_class: stabledb
  enable_completion: true
  enable_sentence: false
  initial_quality: 0.5
  comment_format:
    - xform/^.+$//


# 自定义短语
custom_phrase:
  dictionary: ""
  user_dict: custom_phrase_double  # 需要手动创建 custom_phrase_double.txt 文件
  db_class: stabledb
  enable_completion: false # 补全提示
  enable_sentence: false   # 禁止造句
  initial_quality: 99      # custom_phrase 的权重应该比 pinyin 和 melt_eng 大


# Emoji
emoji:
  option_name: emoji
  opencc_config: emoji.json
  inherit_comment: false  # 在 corrector.lua 及反查中，emoji 返回空注释


# 简繁切换
traditionalize:
  option_name: traditionalization
  opencc_config: s2t.json  # s2t.json | s2hk.json | s2tw.json | s2twp.json
  tips: none               # 转换提示: all 都显示 | char 仅单字显示 | none 不显示。
  tags: [ abc, number, gregorian_to_lunar ]  # 限制在对应 tag，不对其他如反查的内容做简繁转换


# 标点符号
# punctuator 下面有三个子项：
#   full_shape 全角标点映射
#   half_shape 半角标点映射
#   symbols    Rime 的预设配置是以 '/' 前缀开头输出一系列字符，自定义的 symbols_caps_v.yaml 修改成了 'V' 开头。
punctuator:
  full_shape:
    __include: default:/punctuator/full_shape  # 从 default.yaml 导入配置
  half_shape:
    __include: default:/punctuator/half_shape  # 从 default.yaml 导入配置
  symbols:
    __include: symbols_caps_v:/symbols         # 从 symbols_caps_v.yaml 导入配置


# 部件拆字反查
radical_lookup:
  tag: radical_lookup
  dictionary: radical_pinyin
  enable_user_dict: false
  prefix: "uU"  # 反查前缀（反查时前缀会消失影响打英文所以设定为两个字母，或可改成一个非字母符号），与 recognizer/patterns/radical_lookup 匹配
  tips: "  〔拆字〕"
  comment_format:
    - erase/^.*$//
# 部件拆字滤镜
radical_reverse_lookup:
  tags: [ radical_lookup ]
  # dictionary 为拼音标注来源。目前是显示本方案词库的注音，可去部件拆字方案下载更全的、带声调的、已编译好的词典
  # https://github.com/mirtlecn/rime-radical-pinyin?tab=readme-ov-file#%E5%8F%8D%E6%9F%A5%E5%B8%A6%E5%A3%B0%E8%B0%83%E6%B3%A8%E9%9F%B3
  dictionary: rime_ice
  # comment_format:     # 自定义 comment，例如在左右加上括号
  #  - xform/^/(/
  #  - xform/$/)/


# 处理符合特定规则的输入码，如网址、反查

recognizer:
  import_preset: default  # 从 default.yaml 继承通用的
  patterns:  # 再增加方案专有的：
    punct: "^V([0-9]|10|[A-Za-z]+)$"    # 响应 symbols_caps_v.yaml 的 symbols
    radical_lookup: "^uU[a-z;]+$"       # 响应部件拆字的反查，与 radical_lookup/prefix 匹配
    unicode: "^U[a-f0-9]+"              # 脚本将自动获取第 2 个字符 U 作为触发前缀，响应 lua_translator@unicode，输出 Unicode 字符
    number: "^R[0-9]+[.]?[0-9]*"        # 脚本将自动获取第 2 个字符 R 作为触发前缀，响应 lua_translator@number_translator，数字金额大写
    gregorian_to_lunar: "^N[0-9]{1,8}"  # 脚本将自动获取第 2 个字符 N 作为触发前缀，响应 lua_translator@lunar，公历转农历，输入 N20240115 得到「二〇二三年腊月初五」


# 从 default 继承快捷键
key_binder:
  import_preset: default  # 从 default.yaml 继承通用的
  search: "`"             # 辅码引导符，要添加到 speller/alphabet
  # bindings:             # 也可以再增加方案专有的


# 拼写设定
speller:
  # 如果不想让什么标点直接上屏，可以加在 alphabet，或者编辑标点符号为两个及以上的映射
  alphabet: zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA;`
  # initials 定义仅作为始码的按键，排除 ` ; 让单个的 ` ; 可以直接上屏
  initials: zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA
  delimiter: " '"  # 第一位<空格>是拼音之间的分隔符；第二位<'>表示可以手动输入单引号来分割拼音。
  algebra:
    - erase/^xx$/
    - derive/^([jqxy])u$/$1v/
    - derive/^([aoe].*)$/o$1/
    - xform/^([ae])(.*)$/$1$1$2/
    - xform/iu$/Ⓠ/
    - xform/[iu]a$/Ⓦ/
    - xform/er$|[uv]an$/Ⓡ/
    - xform/[uv]e$/Ⓣ/
    - xform/v$|uai$/Ⓨ/
    - xform/^sh/Ⓤ/
    - xform/^ch/Ⓘ/
    - xform/^zh/Ⓥ/
    - xform/uo$/Ⓞ/
    - xform/[uv]n$/Ⓟ/
    - xform/(.)i?ong$/$1Ⓢ/
    - xform/[iu]ang$/Ⓓ/
    - xform/(.)en$/$1Ⓕ/
    - xform/(.)eng$/$1Ⓖ/
    - xform/(.)ang$/$1Ⓗ/
    - xform/ian$/Ⓜ/
    - xform/(.)an$/$1Ⓙ/
    - xform/iao$/Ⓒ/
    - xform/(.)ao$/$1Ⓚ/
    - xform/(.)ai$/$1Ⓛ/
    - xform/(.)ei$/$1Ⓩ/
    - xform/ie$/Ⓧ/
    - xform/ui$/Ⓥ/
    - xform/(.)ou$/$1Ⓑ/
    - xform/in$/Ⓝ/
    - xform/ing$/;/
    - xlit/ⓆⓌⓇⓉⓎⓊⒾⓄⓅⓈⒹⒻⒼⒽⓂⒿⒸⓀⓁⓏⓍⓋⒷⓃ/qwrtyuiopsdfghmjcklzxvbn/
    # - abbrev/^(.).+$/$1/  # 首字母简拼，开启后会导致 3 个字母时 kj'x 变成 k'jx 的问题