-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
7,503 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/bin/bash | ||
|
||
cargo build --manifest-path=tools-rs/Cargo.toml -r | ||
tools-rs/target/release/secret-mkdict | ||
|
||
secret_dict=092wubi_secret.dict.yaml | ||
secret_schema=092wubi_secret.schema.yaml | ||
mv out "$secret_dict" | ||
sed -ie 's/092wubi/092wubi_secret/g' "$secret_dict" | ||
|
||
cp 092wubi.schema.yaml "$secret_schema" | ||
sed -it 's/schema_id: 092wubi/schema_id: 092wubi_secret/g' "$secret_schema" | ||
sed -it 's/dictionary: 092wubi/dictionary: 092wubi_secret/g' "$secret_schema" | ||
sed -it 's/- table_translator@translator_extend//g' "$secret_schema" | ||
sed -it 's/name: "五笔"/name: "092wubi_secret"/g' "$secret_schema" | ||
|
||
cp 092wubi.custom.yaml 092wubi_secret.custom.yaml | ||
|
||
patch default.custom.yaml << EOF | ||
@@ -10,6 +10,7 @@ patch: | ||
- schema: luna_pinyin | ||
- schema: ipa_yunlong | ||
- schema: latin_international | ||
+ - schema: 092wubi_secret | ||
"switcher/hotkeys": # 将默认的 F4热键 更改为ctrl+~ | ||
- "Control+grave" | ||
- "Control+F8" | ||
EOF |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
use std::io::{stdin, Read}; | ||
use tools_rs::secret_dict::decode; | ||
|
||
fn main() { | ||
let mut text = String::new(); | ||
stdin().read_to_string(&mut text).unwrap(); | ||
println!("{}", decode(&text)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
use once_cell::sync::Lazy; | ||
use std::collections::HashMap; | ||
use std::fs::File; | ||
use std::io::{BufRead, BufReader, Seek, SeekFrom, Write}; | ||
use std::sync::Mutex; | ||
use anyhow::anyhow; | ||
use tools_rs::secret_dict::{encode_pua, CHARS_MAP, PINYIN_INITIAL_TABLE, RIME_ROOT}; | ||
use unicode_normalization::UnicodeNormalization; | ||
|
||
fn main() -> anyhow::Result<()> { | ||
let out_dict_path = RIME_ROOT.join("out"); | ||
let mut out_dict = File::options() | ||
.write(true) | ||
.read(true) | ||
.truncate(true) | ||
.create(true) | ||
.open(out_dict_path)?; | ||
|
||
let mut file = File::open(RIME_ROOT.join("092wubi.dict.yaml"))?; | ||
let reader = BufReader::new(file.try_clone()?); | ||
let header = reader | ||
.lines() | ||
.map(|x| x.unwrap()) | ||
.take_while(|x| x != "...") | ||
.collect::<Vec<_>>() | ||
.join("\n") | ||
+ "\n...\n"; | ||
|
||
out_dict.write_all(header.as_bytes())?; | ||
|
||
file.seek(SeekFrom::Start(0))?; | ||
let reader = BufReader::new(file); | ||
for x in reader | ||
.lines() | ||
.skip_while(|x| x.as_ref().unwrap() != "...") | ||
.skip(1) | ||
{ | ||
let line = x?; | ||
let split = line.split('\t').collect::<Vec<_>>(); | ||
if split.len() != 2 { | ||
continue; | ||
} | ||
let word = split[0]; | ||
let code = split[1]; | ||
|
||
let mut new_entry_buf = String::new(); | ||
for c in word.chars() { | ||
if CHARS_MAP.contains_key(&c) { | ||
let Some(pinyin_initial) = char_pinyin_initial(c) else { | ||
continue | ||
}; | ||
new_entry_buf.push(pinyin_initial); | ||
new_entry_buf.push(encode_pua(c).unwrap()); | ||
} else { | ||
new_entry_buf.push(c); | ||
} | ||
} | ||
writeln!(&mut out_dict, "{}\t{}", new_entry_buf, code)?; | ||
} | ||
Ok(()) | ||
} | ||
|
||
// paste from some old code | ||
pub fn han_char_range(codepoint: u32) -> bool { | ||
if (0x4e00..=0x9fff).contains(&codepoint) { | ||
return true; | ||
} | ||
if (0x3400..=0x4dbf).contains(&codepoint) { | ||
return true; | ||
} | ||
if (0x20000..=0x2a6df).contains(&codepoint) { | ||
return true; | ||
} | ||
if (0x2a700..=0x2b73f).contains(&codepoint) { | ||
return true; | ||
} | ||
if (0x2b740..=0x2b81f).contains(&codepoint) { | ||
return true; | ||
} | ||
if (0x2b820..=0x2ceaf).contains(&codepoint) { | ||
return true; | ||
} | ||
if (0xf900..=0xfaff).contains(&codepoint) { | ||
return true; | ||
} | ||
if (0x2f800..=0x2fa1f).contains(&codepoint) { | ||
return true; | ||
} | ||
false | ||
} | ||
|
||
static PINYIN_OPENCC_MAP: Lazy<Mutex<HashMap<char, char>>> = Lazy::new(|| { | ||
let file = File::open(RIME_ROOT.join("opencc/PYCharacters.txt")).unwrap(); | ||
let reader = BufReader::new(file); | ||
let mut map = HashMap::new(); | ||
for line in reader.lines().map(|x| x.unwrap()) { | ||
let split = line.split('\t').collect::<Vec<_>>(); | ||
if split.len() < 2 { | ||
continue; | ||
} | ||
map.insert( | ||
split[0].chars().next().unwrap(), | ||
PINYIN_INITIAL_TABLE[(split[1] | ||
.chars() | ||
.nth(1) | ||
.unwrap() | ||
.nfd() | ||
.next() | ||
.unwrap() | ||
.to_ascii_lowercase() as u32 | ||
- 'a' as u32) as usize], | ||
); | ||
} | ||
Mutex::new(map) | ||
}); | ||
|
||
fn char_pinyin_initial(c: char) -> Option<char> { | ||
let guard = PINYIN_OPENCC_MAP.lock().unwrap(); | ||
guard.get(&c).copied() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
use once_cell::sync::Lazy; | ||
use std::collections::{HashMap, HashSet}; | ||
use std::iter::Iterator; | ||
use std::path::Path; | ||
|
||
pub static RIME_ROOT: Lazy<&Path> = | ||
Lazy::new(|| Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap()); | ||
|
||
pub const PUA_START: u32 = 0x10abcd; | ||
pub static PINYIN_INITIAL_TABLE: Lazy<[char; 26]> = Lazy::new(|| { | ||
"ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ" | ||
.chars() | ||
.collect::<Vec<_>>() | ||
.try_into() | ||
.unwrap() | ||
}); | ||
|
||
pub static PINYIN_INITIAL_SET: Lazy<HashSet<char>> = | ||
Lazy::new(|| PINYIN_INITIAL_TABLE.iter().copied().collect()); | ||
|
||
pub static CHARS_TABLE_TXT: &str = include_str!("table.txt"); | ||
|
||
pub static CHARS_ARR: Lazy<Vec<char>> = Lazy::new(|| { | ||
CHARS_TABLE_TXT | ||
.lines() | ||
.map(|x| x.chars().next().unwrap()) | ||
.collect() | ||
}); | ||
|
||
pub static CHARS_MAP: Lazy<HashMap<char, u16>> = Lazy::new(|| { | ||
CHARS_ARR | ||
.iter() | ||
.enumerate() | ||
.map(|(i, &c)| (c, i as u16)) | ||
.collect() | ||
}); | ||
|
||
pub fn encode_pua(c: char) -> Option<char> { | ||
let &index = CHARS_MAP.get(&c)?; | ||
char::from_u32(PUA_START + index as u32) | ||
} | ||
|
||
pub fn decode_pua(pua: char) -> Option<char> { | ||
Some(*CHARS_ARR.get((pua as u32 - PUA_START) as usize)?) | ||
} | ||
|
||
pub fn decode(text: &str) -> String { | ||
let mut decoded = String::new(); | ||
let mut escape_mode = false; | ||
for c in text.chars() { | ||
if PINYIN_INITIAL_SET.contains(&c) { | ||
escape_mode = true; | ||
continue; | ||
} else if escape_mode { | ||
let decoded_char = decode_pua(c).expect("failed to decode"); | ||
decoded.push(decoded_char); | ||
escape_mode = false; | ||
continue; | ||
} else { | ||
decoded.push(c); | ||
} | ||
} | ||
decoded | ||
} |
Oops, something went wrong.