1 个不稳定版本
0.1.0 | 2024年3月26日 |
---|
#717 在 文本处理
81KB
2.5K SLoC
bmpf_py Rust 中的注音拼音库
特点
- 它处理注音(注音符號)和汉语拼音(漢語拼音)的解析,识别音节(含声调),生成音节对象(Syllable)
- 它支持解析带声调标记的标准形式的拼音。
- 它识别尾随数字表示声调,并以字母 'v' 代替不易访问的 'ü' 的 ascii 拼音;
- 它支持注音和拼音之间的相互转换;
- 它支持 ASCII 拼音和带标记的标准形式之间的相互转换;
- 它在 CPU 和内存方面都很轻量且高效'
- 它经过了彻底的测试;
- 它易于使用;
- 它只处理小写拼音;
bmpf_py 注音拼音處理函式庫(RUST)
特点
- 对「注音符號」和「漢語拼音」进行綴字分析,识别音節(含聲調),生成音節對象(Syllable)
- 支持非標準 ASCII拼音(以 v 代 ü,以數字標調)
- 实现注音、拼音、ASCII拼音两两相互轉換
- 轻量、高效
- 充分測試
- 簡潔易用
- 只支持小寫拼音
用法 · 用法
use bpmf_py::bpmf::*;
fn main() {
//Assemble a mandarin syllable from parts directly .
let shuai4 = Syllable::new(Init::Shi, Med::Wu, Rime::Ai, Tone::Fall);
println!("Constructed: '{shuai4}'"); //outputs: ㄕㄨㄞˋ
//Four enums represent bopomofo parts:
//They all implement From<ENUM> for char trait
assert_eq!(char::from(Init::Shi), 'ㄕ');
assert_eq!(char::from(Med::Yu), 'ㄩ');
assert_eq!(char::from(Rime::Er), 'ㄦ');
assert_eq!(char::from(Tone::Fall), 'ˋ');
//They all have a default value which corresponds to '\0'
assert_eq!(char::from(Init::NoInit), '\0');
//All discriminants of these four enums have a unique name so it's okay to
// bring them all into the scope if messing up the namespace is'nt your concern
use Init::*;
use Med::*;
use Rime::*;
use Tone::*;
let qiang2 = Syllable::new(Qi, Yi, Ang, Rise);
println!("Constructed '{}'", qiang2); //outputs: "ㄑㄧㄤˊ"
//Parsing
//The parser will skip all whitespaces and the syllable separator "'"
let mut txt = "\t ㄎㄨㄟˋ 'ㄖㄣˊ";
//Parse bopomofo
let (mut syl, mut remainder) = Syllable::parse_bopomofo(txt).unwrap();
assert_eq!(syl, Syllable::new(Ke, Wu, Ei, Fall));
assert_eq!(remainder, " 'ㄖㄣˊ");
//continue to parse the next syllable
(syl, remainder) = Syllable::parse_bopomofo(remainder).unwrap();
assert_eq!(syl, Syllable::new(Ri, NoMed, En, Rise));
assert_eq!(remainder, "");
//If the text is ill-formed it returns An Error
txt = "万X尢";
if let Err(ParseBopomofoError) = Syllable::parse_bopomofo(txt) {
println!("Failed to parse '{}'", txt)
}
//parsing pinyin and ascii pinyin are similar:
txt = "ráo";
(syl, _) = Syllable::parse_pinyin(txt).unwrap();
println!("Pinyin '{txt}' parsed to '{}'", syl.to_pinyin()); //outputs:'ráo'
//***Ascii pinyin*** is a commonly used alternative form of standard pinyin,
//in which letter 'ü' is substituted with 'v' and all vowel letters with
//diacritical tone marks are not used, instead tones are indicated with
//a trailing number. E.g. zhuǎng is spelt as zhuang3
txt = "lve4";
(syl, _) = Syllable::parse_ascii_pinyin(txt).unwrap();
println!("Ascii pinyin '{txt}' parsed to '{}'", syl.to_pinyin()); //outputs:'lüè'
//Even unshortened form is recognized
txt = "qiou2";
(syl, _) = Syllable::parse_ascii_pinyin(txt).unwrap();
println!(
"Long form ascii pinyin '{txt}' parsed to '{}'",
syl.to_pinyin()
); //outputs:'qiú'
//FromStr trait is implemented, both bopomofo and pinyin(standard form)
//are recognized
txt = "ㄑㄧㄠˇ";
syl = txt.parse().unwrap();
println!("Bopomofo '{txt}' recognized as '{}'", syl); //outputs: 'ㄑㄧㄠˇ'
txt = "qiǎo";
syl = txt.parse().unwrap();
println!("Pinyin {txt} recognized as '{}'", syl); //also outputs: 'ㄑㄧㄠˇ'
//The Syllable object implemented Eq and Ord
syl = Syllable::new(Ri, Wu, Ang, Dip); // a fabricated sound
let (syl2, _) = Syllable::parse_ascii_pinyin("ruang3").unwrap();
assert_eq!(syl, syl2);
let orig_syllables = ["zhuan4", "an3", "an1", "bo2", "qi3"];
let mut sorted_syllables: Vec<Syllable> = orig_syllables
.into_iter()
.map(Syllable::parse_ascii_pinyin)
.map(|res| res.unwrap().0)
.collect();
sorted_syllables.sort();
println!(
"{:?} sorted: {:?}",
orig_syllables,
sorted_syllables
.iter()
.map(Syllable::to_ascii_pinyin)
.collect::<Vec<String>>()
);
//outputs: [bo2, qi3, zhuan4, an1, an3]
//The order conforms to the order of bopomofo: b p ... i u ü
//For you convenience the following conversion functions are also provided:
//They all returns Option<String>
println!("\n- - -\nCalling convenient functions performing direct conversion");
println!("{}", pinyin_to_ascii_pinyin("ráo").unwrap()); //outputs: rao2
println!("{}", ascii_pinyin_to_pinyin("rao2").unwrap()); //outputs: ráo
println!("{}", bopomofo_to_pinyin("ㄑㄩㄥ").unwrap()); //outputs: "qiōng"
println!("{}", pinyin_to_bopomofo("qiōng").unwrap()); //outputs: "ㄑㄩㄥ
println!("{}", ascii_pinyin_to_bopomofo("qiong1").unwrap()); //outputs: "ㄑㄩㄥ
println!("{}", bopomofo_to_ascii_pinyin("ㄑㄩㄥ").unwrap()); //outputs: "qiong1
}
依赖
~105KB