9 个稳定版本
1.5.0 | 2024 年 5 月 28 日 |
---|---|
1.4.0 | 2023 年 11 月 16 日 |
1.3.3 | 2023 年 10 月 18 日 |
1.2.0 | 2023 年 4 月 14 日 |
0.0.1 | 2021 年 4 月 29 日 |
#31 在 国际化 (i18n) 中排名
32,706 每月下载量
被 24 个 包使用 (直接使用 7 个)
3MB
28K SLoC
icu_collator
根据语言相关约定比较字符串。
此模块作为其自己的包 (icu_collator
) 和作为 icu
包的一部分发布。有关 ICU4X 项目的更多详细信息,请参阅后者。`Collator
` 是组件的主要结构。它接受一组参数,允许它从数据提供程序收集必要的数据,一旦实例化,就可以用于比较字符串。
请参阅 ICU 用户指南中有关排序的部分,其中提供了 介绍 并解释了 基本概念。
示例
作为其最基本的目的,`Collator
` 提供了区域感知的排序
use core::cmp::Ordering;
use icu::collator::*;
use icu::locid::locale;
let locale_es = locale!("es-u-co-trad").into();
let mut options = CollatorOptions::new();
options.strength = Some(Strength::Primary);
let collator_es: Collator = Collator::try_new(&locale_es, options).unwrap();
// "pollo" > "polvo" in traditional Spanish
assert_eq!(collator_es.compare("pollo", "polvo"), Ordering::Greater);
let locale_en = locale!("en").into();
let mut options = CollatorOptions::new();
options.strength = Some(Strength::Primary);
let collator_en: Collator = Collator::try_new(&locale_en, options).unwrap();
// "pollo" < "polvo" according to English rules
assert_eq!(collator_en.compare("pollo", "polvo"), Ordering::Less);
`CollatorOptions
` 的示例
结构体 `CollatorOptions
` 为 `Collator
` 配置特定的自定义行为。有关 `CollatorOptions
` 的更多详细信息,请参阅文档。以下是一些基本描述和示例。
强度
确定字符串是否不同的敏感程度。
use core::cmp::Ordering;
use icu::collator::*;
// Primary Level
let mut options_l1 = CollatorOptions::new();
options_l1.strength = Some(Strength::Primary);
let collator_l1: Collator =
Collator::try_new(&Default::default(), options_l1).unwrap();
assert_eq!(collator_l1.compare("a", "b"), Ordering::Less); // primary
assert_eq!(collator_l1.compare("as", "às"), Ordering::Equal); // secondary
assert_eq!(collator_l1.compare("às", "at"), Ordering::Less);
assert_eq!(collator_l1.compare("ao", "Ao"), Ordering::Equal); // tertiary
assert_eq!(collator_l1.compare("Ao", "aò"), Ordering::Equal);
assert_eq!(collator_l1.compare("A", "Ⓐ"), Ordering::Equal);
// Secondary Level
let mut options_l2 = CollatorOptions::new();
options_l2.strength = Some(Strength::Secondary);
let collator_l2: Collator =
Collator::try_new(&Default::default(), options_l2).unwrap();
assert_eq!(collator_l2.compare("a", "b"), Ordering::Less); // primary
assert_eq!(collator_l2.compare("as", "às"), Ordering::Less); // secondary
assert_eq!(collator_l2.compare("às", "at"), Ordering::Less);
assert_eq!(collator_l2.compare("ao", "Ao"), Ordering::Equal); // tertiary
assert_eq!(collator_l2.compare("Ao", "aò"), Ordering::Less);
assert_eq!(collator_l2.compare("A", "Ⓐ"), Ordering::Equal);
// Tertiary Level
let mut options_l3 = CollatorOptions::new();
options_l3.strength = Some(Strength::Tertiary);
let collator_l3: Collator =
Collator::try_new(&Default::default(), options_l3).unwrap();
assert_eq!(collator_l3.compare("a", "b"), Ordering::Less); // primary
assert_eq!(collator_l3.compare("as", "às"), Ordering::Less); // secondary
assert_eq!(collator_l3.compare("às", "at"), Ordering::Less);
assert_eq!(collator_l3.compare("ao", "Ao"), Ordering::Less); // tertiary
assert_eq!(collator_l3.compare("Ao", "aò"), Ordering::Less);
assert_eq!(collator_l3.compare("A", "Ⓐ"), Ordering::Less);
备用处理
允许对某些自定义排序顺序进行备用处理,包括忽略此类自定义的字符串的特殊处理。具体来说,备用处理用于控制 Unicode 排序算法中所谓的“变量”字符的处理:空格、标点符号和符号。
请注意,以下功能尚未实现:AlternateHandling::ShiftTrimmed
和 AlternateHandling::Blanked
。默认情况下,除了泰语之外,默认为 AlternateHandling::NonIgnorable
。
use core::cmp::Ordering;
use icu::collator::*;
// If alternate handling is set to `NonIgnorable`, then differences among
// these characters are of the same importance as differences among letters.
let mut options_3n = CollatorOptions::new();
options_3n.strength = Some(Strength::Tertiary);
options_3n.alternate_handling = Some(AlternateHandling::NonIgnorable);
let collator_3n: Collator =
Collator::try_new(&Default::default(), options_3n).unwrap();
assert_eq!(collator_3n.compare("di Silva", "Di Silva"), Ordering::Less);
assert_eq!(collator_3n.compare("Di Silva", "diSilva"), Ordering::Less);
assert_eq!(collator_3n.compare("diSilva", "U.S.A."), Ordering::Less);
assert_eq!(collator_3n.compare("U.S.A.", "USA"), Ordering::Less);
// If alternate handling is set to `Shifted`, then these characters are of only minor
// importance. The Shifted value is often used in combination with Strength
// set to Quaternary.
let mut options_3s = CollatorOptions::new();
options_3s.strength = Some(Strength::Tertiary);
options_3s.alternate_handling = Some(AlternateHandling::Shifted);
let collator_3s: Collator =
Collator::try_new(&Default::default(), options_3s).unwrap();
assert_eq!(collator_3s.compare("di Silva", "diSilva"), Ordering::Equal);
assert_eq!(collator_3s.compare("diSilva", "Di Silva"), Ordering::Less);
assert_eq!(collator_3s.compare("Di Silva", "U.S.A."), Ordering::Less);
assert_eq!(collator_3s.compare("U.S.A.", "USA"), Ordering::Equal);
let mut options_4s = CollatorOptions::new();
options_4s.strength = Some(Strength::Quaternary);
options_4s.alternate_handling = Some(AlternateHandling::Shifted);
let collator_4s: Collator =
Collator::try_new(&Default::default(), options_4s).unwrap();
assert_eq!(collator_4s.compare("di Silva", "diSilva"), Ordering::Less);
assert_eq!(collator_4s.compare("diSilva", "Di Silva"), Ordering::Less);
assert_eq!(collator_4s.compare("Di Silva", "U.S.A."), Ordering::Less);
assert_eq!(collator_4s.compare("U.S.A.", "USA"), Ordering::Less);
大小写级别
在排序时是否区分大小写,即使对于高于三级的排序级别,也不需要仅为了启用大小写级别差异而使用三级。
use core::cmp::Ordering;
use icu::collator::*;
// Primary
let mut options = CollatorOptions::new();
options.strength = Some(Strength::Primary);
options.case_level = Some(CaseLevel::Off);
let primary =
Collator::try_new(&Default::default(),
options).unwrap();
assert_eq!(primary.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Equal);
assert_eq!(primary.compare("dejavu", "dejAvu"), Ordering::Equal);
assert_eq!(primary.compare("dejavu", "déjavu"), Ordering::Equal);
// Primary with case level on
options.strength = Some(Strength::Primary);
options.case_level = Some(CaseLevel::On);
let primary_and_case =
Collator::try_new(&Default::default(),
options).unwrap();
assert_eq!(primary_and_case.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Less);
assert_eq!(primary_and_case.compare("dejavu", "dejAvu"), Ordering::Less);
assert_eq!(primary_and_case.compare("dejavu", "déjavu"), Ordering::Equal);
// Secondary with case level on
options.strength = Some(Strength::Secondary);
options.case_level = Some(CaseLevel::On);
let secondary_and_case =
Collator::try_new(&Default::default(),
options).unwrap();
assert_eq!(secondary_and_case.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Less);
assert_eq!(secondary_and_case.compare("dejavu", "dejAvu"), Ordering::Less);
assert_eq!(secondary_and_case.compare("dejavu", "déjavu"), Ordering::Less); // secondary difference
// Tertiary
options.strength = Some(Strength::Tertiary);
options.case_level = Some(CaseLevel::Off);
let tertiary =
Collator::try_new(&Default::default(),
options).unwrap();
assert_eq!(tertiary.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Less);
assert_eq!(tertiary.compare("dejavu", "dejAvu"), Ordering::Less);
assert_eq!(tertiary.compare("dejavu", "déjavu"), Ordering::Less);
大小写优先
是否交换大写和小写的排序顺序。
向后二级
以倒序比较二级。默认值为 false
(关闭),除加拿大法语外。
数值
当设置为 true
(开启)时,任何十进制数字序列都将根据数值在主要级别上排序。
use core::cmp::Ordering;
use icu::collator::*;
// Numerical sorting off
let mut options_num_off = CollatorOptions::new();
options_num_off.numeric = Some(Numeric::Off);
let collator_num_off: Collator =
Collator::try_new(&Default::default(), options_num_off).unwrap();
assert_eq!(collator_num_off.compare("a10b", "a2b"), Ordering::Less);
// Numerical sorting on
let mut options_num_on = CollatorOptions::new();
options_num_on.numeric = Some(Numeric::On);
let collator_num_on: Collator =
Collator::try_new(&Default::default(), options_num_on).unwrap();
assert_eq!(collator_num_on.compare("a10b", "a2b"), Ordering::Greater);
更多信息
有关开发、作者、贡献等更多信息,请访问 ICU4X 主页
。