#diff #lcs #algorithm #myers #byte #slice #sequences

已删除 diffr-lib

Myers 差分算法的实现

3 个版本

0.1.3 2019年12月8日
0.1.2 2019年9月7日
0.1.1 2019年8月27日

#17 in #lcs

Download history 13/week @ 2024-04-22 9/week @ 2024-05-27 23/week @ 2024-06-03 18/week @ 2024-06-10 10/week @ 2024-06-17 17/week @ 2024-06-24 13/week @ 2024-07-01 5/week @ 2024-07-08 7/week @ 2024-07-15 12/week @ 2024-07-22 38/week @ 2024-07-29

每月63次下载

MIT 许可证

50KB
1.5K SLoC

diffr_lib

该crate实现了E. Myers论文中描述的各种算法:《一个O(ND)差分算法及其变体》。

主要入口点是 diff,它允许计算两个字节切片序列之间的最长公共子序列。

请注意,当前API尚未稳定。

用法

将此添加到您的 Cargo.toml

[dependencies]
diffr-lib="*"
use diffr_lib::{diff, DiffInput, HashedSpan, Tokenization};
use std::collections::HashSet;

fn main() {
    fn line(lo: usize, hi: usize) -> HashedSpan {
        HashedSpan { lo, hi, hash: 0 }
    }

    let old_data = b"I need to buy apples.\n\
I need to run the laundry.\n\
I need to wash the dog.\n\
I need to get the car detailed.";
    let old_tokens = vec![line(0, 21), line(22, 48), line(49, 72), line(73, 104)];

    let new_data = b"I need to buy apples.\n\
I need to do the laundry.\n\
I need to wash the car.\n\
I need to get the dog detailed.";
    let new_tokens = vec![line(0, 21), line(22, 47), line(48, 71), line(72, 103)];

    let input = DiffInput {
        added: Tokenization::new(old_data, &old_tokens),
        removed: Tokenization::new(new_data, &new_tokens),
    };
    let mut namespace = vec![]; // memory used during the diff algorithm
    let mut shared_blocks = vec![]; // results
    diff(&input, &mut namespace, &mut shared_blocks);

    let mut old_shared = HashSet::new();
    let mut new_shared = HashSet::new();

    println!("LCS:");
    for shared_block in &shared_blocks {
        for line_idx in 0..shared_block.len as usize {
            let old_line_index = shared_block.x0 as usize + line_idx;
            let old_line = old_tokens[old_line_index];
            old_shared.insert(old_line_index);
            let new_line_index = shared_block.y0 as usize + line_idx;
            new_shared.insert(new_line_index);
            println!(
                "\t{}",
                String::from_utf8_lossy(&old_data[old_line.lo..old_line.hi])
            );
        }
    }

    println!("unique to old data: ");
    for i in 0..old_tokens.len() {
        if !old_shared.contains(&i) {
            let old_line = old_tokens[i];
            println!(
                "\t{}",
                String::from_utf8_lossy(&old_data[old_line.lo..old_line.hi])
            );
        }
    }

    println!("unique to new data: ");
    for i in 0..new_tokens.len() {
        if !new_shared.contains(&i) {
            let new_line = new_tokens[i];
            println!(
                "\t{}",
                String::from_utf8_lossy(&new_data[new_line.lo..new_line.hi])
            );
        }
    }
}

无运行时依赖