Skip to content

Commit

Permalink
092wubi_secret
Browse files Browse the repository at this point in the history
  • Loading branch information
bczhc committed Sep 18, 2024
1 parent 8f4256a commit 3c07486
Show file tree
Hide file tree
Showing 10 changed files with 7,503 additions and 6 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@
/easy-type.txt
/ci-build/
/sync/
/092wubi_secret.dict.yaml
/092wubi_secret.schema.yaml
/092wubi_secret.custom.yaml
4 changes: 2 additions & 2 deletions 092wubi.dict.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52856,8 +52856,8 @@ HIV hivz
基本上 hjh
桌上 hjha
基本上 hjha
旧书 hjhb
馄饨 hjhb
旧书 hjhb
瞬发 hjhd
上星期 hjhe
长时期 hjhe
Expand Down Expand Up @@ -125859,8 +125859,8 @@ SIM卡 simm
中距离 slyb
核辐射 slyf
中国话 slyq
森 sm
中央 sm
森 sm
松 sm
桥 sm
桥式 smaa
Expand Down
28 changes: 28 additions & 0 deletions create-secret-dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

cargo build --manifest-path=tools-rs/Cargo.toml -r
tools-rs/target/release/secret-mkdict

secret_dict=092wubi_secret.dict.yaml
secret_schema=092wubi_secret.schema.yaml
mv out "$secret_dict"
sed -ie 's/092wubi/092wubi_secret/g' "$secret_dict"

cp 092wubi.schema.yaml "$secret_schema"
sed -it 's/schema_id: 092wubi/schema_id: 092wubi_secret/g' "$secret_schema"
sed -it 's/dictionary: 092wubi/dictionary: 092wubi_secret/g' "$secret_schema"
sed -it 's/- table_translator@translator_extend//g' "$secret_schema"
sed -it 's/name: "五笔"/name: "092wubi_secret"/g' "$secret_schema"

cp 092wubi.custom.yaml 092wubi_secret.custom.yaml

patch default.custom.yaml << EOF
@@ -10,6 +10,7 @@ patch:
- schema: luna_pinyin
- schema: ipa_yunlong
- schema: latin_international
+ - schema: 092wubi_secret
"switcher/hotkeys": # 将默认的 F4热键 更改为ctrl+~
- "Control+grave"
- "Control+F8"
EOF
42 changes: 38 additions & 4 deletions tools-rs/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions tools-rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,15 @@ edition = "2021"
[dependencies]
rime-api = "=0.12.1"
clap = { version = "4.5.3", features = ["derive"] }
once_cell = "1.18.0"
unicode-normalization = "0.1.22"
anyhow = "1.0.89"
regex = "1.10.6"

[[bin]]
name = "secret-mkdict"
path = "src/secret_dict/mkdict.rs"

[[bin]]
name = "secret-decode"
path = "src/secret_dict/decode.rs"
2 changes: 2 additions & 0 deletions tools-rs/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::path::PathBuf;

pub mod secret_dict;

#[derive(clap::Parser, Debug)]
pub struct Args {
pub user_data_dir: PathBuf,
Expand Down
8 changes: 8 additions & 0 deletions tools-rs/src/secret_dict/decode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
use std::io::{stdin, Read};
use tools_rs::secret_dict::decode;

fn main() {
let mut text = String::new();
stdin().read_to_string(&mut text).unwrap();
println!("{}", decode(&text));
}
120 changes: 120 additions & 0 deletions tools-rs/src/secret_dict/mkdict.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
use once_cell::sync::Lazy;
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader, Seek, SeekFrom, Write};
use std::sync::Mutex;
use anyhow::anyhow;
use tools_rs::secret_dict::{encode_pua, CHARS_MAP, PINYIN_INITIAL_TABLE, RIME_ROOT};
use unicode_normalization::UnicodeNormalization;

fn main() -> anyhow::Result<()> {
let out_dict_path = RIME_ROOT.join("out");
let mut out_dict = File::options()
.write(true)
.read(true)
.truncate(true)
.create(true)
.open(out_dict_path)?;

let mut file = File::open(RIME_ROOT.join("092wubi.dict.yaml"))?;
let reader = BufReader::new(file.try_clone()?);
let header = reader
.lines()
.map(|x| x.unwrap())
.take_while(|x| x != "...")
.collect::<Vec<_>>()
.join("\n")
+ "\n...\n";

out_dict.write_all(header.as_bytes())?;

file.seek(SeekFrom::Start(0))?;
let reader = BufReader::new(file);
for x in reader
.lines()
.skip_while(|x| x.as_ref().unwrap() != "...")
.skip(1)
{
let line = x?;
let split = line.split('\t').collect::<Vec<_>>();
if split.len() != 2 {
continue;
}
let word = split[0];
let code = split[1];

let mut new_entry_buf = String::new();
for c in word.chars() {
if CHARS_MAP.contains_key(&c) {
let Some(pinyin_initial) = char_pinyin_initial(c) else {
continue
};
new_entry_buf.push(pinyin_initial);
new_entry_buf.push(encode_pua(c).unwrap());
} else {
new_entry_buf.push(c);
}
}
writeln!(&mut out_dict, "{}\t{}", new_entry_buf, code)?;
}
Ok(())
}

// paste from some old code
pub fn han_char_range(codepoint: u32) -> bool {
if (0x4e00..=0x9fff).contains(&codepoint) {
return true;
}
if (0x3400..=0x4dbf).contains(&codepoint) {
return true;
}
if (0x20000..=0x2a6df).contains(&codepoint) {
return true;
}
if (0x2a700..=0x2b73f).contains(&codepoint) {
return true;
}
if (0x2b740..=0x2b81f).contains(&codepoint) {
return true;
}
if (0x2b820..=0x2ceaf).contains(&codepoint) {
return true;
}
if (0xf900..=0xfaff).contains(&codepoint) {
return true;
}
if (0x2f800..=0x2fa1f).contains(&codepoint) {
return true;
}
false
}

static PINYIN_OPENCC_MAP: Lazy<Mutex<HashMap<char, char>>> = Lazy::new(|| {
let file = File::open(RIME_ROOT.join("opencc/PYCharacters.txt")).unwrap();
let reader = BufReader::new(file);
let mut map = HashMap::new();
for line in reader.lines().map(|x| x.unwrap()) {
let split = line.split('\t').collect::<Vec<_>>();
if split.len() < 2 {
continue;
}
map.insert(
split[0].chars().next().unwrap(),
PINYIN_INITIAL_TABLE[(split[1]
.chars()
.nth(1)
.unwrap()
.nfd()
.next()
.unwrap()
.to_ascii_lowercase() as u32
- 'a' as u32) as usize],
);
}
Mutex::new(map)
});

fn char_pinyin_initial(c: char) -> Option<char> {
let guard = PINYIN_OPENCC_MAP.lock().unwrap();
guard.get(&c).copied()
}
64 changes: 64 additions & 0 deletions tools-rs/src/secret_dict/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
use once_cell::sync::Lazy;
use std::collections::{HashMap, HashSet};
use std::iter::Iterator;
use std::path::Path;

pub static RIME_ROOT: Lazy<&Path> =
Lazy::new(|| Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap());

pub const PUA_START: u32 = 0x10abcd;
pub static PINYIN_INITIAL_TABLE: Lazy<[char; 26]> = Lazy::new(|| {
"ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ"
.chars()
.collect::<Vec<_>>()
.try_into()
.unwrap()
});

pub static PINYIN_INITIAL_SET: Lazy<HashSet<char>> =
Lazy::new(|| PINYIN_INITIAL_TABLE.iter().copied().collect());

pub static CHARS_TABLE_TXT: &str = include_str!("table.txt");

pub static CHARS_ARR: Lazy<Vec<char>> = Lazy::new(|| {
CHARS_TABLE_TXT
.lines()
.map(|x| x.chars().next().unwrap())
.collect()
});

pub static CHARS_MAP: Lazy<HashMap<char, u16>> = Lazy::new(|| {
CHARS_ARR
.iter()
.enumerate()
.map(|(i, &c)| (c, i as u16))
.collect()
});

pub fn encode_pua(c: char) -> Option<char> {
let &index = CHARS_MAP.get(&c)?;
char::from_u32(PUA_START + index as u32)
}

pub fn decode_pua(pua: char) -> Option<char> {
Some(*CHARS_ARR.get((pua as u32 - PUA_START) as usize)?)
}

pub fn decode(text: &str) -> String {
let mut decoded = String::new();
let mut escape_mode = false;
for c in text.chars() {
if PINYIN_INITIAL_SET.contains(&c) {
escape_mode = true;
continue;
} else if escape_mode {
let decoded_char = decode_pua(c).expect("failed to decode");
decoded.push(decoded_char);
escape_mode = false;
continue;
} else {
decoded.push(c);
}
}
decoded
}
Loading

0 comments on commit 3c07486

Please sign in to comment.