1个不稳定版本
使用旧的Rust 2015
0.1.0 | 2018年2月5日 |
---|
#1717 in 硬件支持
在rjoin中使用
110KB
2.5K SLoC
csvroll
一个低级、非FSM、零拷贝的CSV解析器。
双许可下:MIT或Unlicense
文档
用法
将此内容添加到您的Cargo.toml
[dependencies]
csvroll = "0.1"
并将此内容添加到您的crate根目录
extern crate csvroll;
解析器使用SIMD,该功能尚未稳定,因此您需要使用nightly通道。
示例
extern crate rollbuf;
use rollbuf::RollBuf;
use csvroll::parser::{Parser, Index,};
use csvroll::index_builder::IndexBuilder;
struct TestCase {
input: String,
buf_len: usize,
consume: Vec<usize>,
want: Vec<(String, Index)>,
}
let test_cases = vec![
TestCase {
input: "a\nb\nc,d,e".to_owned(),
buf_len: 7,
consume: vec![1, 1, 1, 1],
want: vec![
("a\nb\nc,d".to_owned(), Index::from_parts(vec![0..1, 2..3, 4..5 ], vec![1, 2])),
("b\nc,d,e".to_owned(), Index::from_parts(vec![0..1, 2..3, 4..5], vec![1])),
("c,d,e".to_owned(), Index::from_parts(vec![0..1, 2..3, 4..5], vec![3])),
("".to_owned(), Index::from_parts(vec![], vec![])),
],
},
TestCase {
input: "a\nb\nc,d,e".to_owned(),
buf_len: 7,
consume: vec![2, 1, 1],
want: vec![
("a\nb\nc,d".to_owned(), Index::from_parts(vec![0..1, 2..3, 4..5 ], vec![1, 2])),
("c,d,e".to_owned(), Index::from_parts(vec![0..1, 2..3, 4..5], vec![3])),
("".to_owned(), Index::from_parts(vec![], vec![])),
],
},
TestCase {
input: "a\nb\nc,d,e".to_owned(),
buf_len: 7,
consume: vec![1, 0, 1, 1, 1],
want: vec![
("a\nb\nc,d".to_owned(), Index::from_parts(vec![0..1, 2..3, 4..5 ], vec![1, 2])),
("b\nc,d,e".to_owned(), Index::from_parts(vec![0..1, 2..3, 4..5], vec![1])),
("b\nc,d,e".to_owned(), Index::from_parts(vec![0..1, 2..3, 4..5, 6..7], vec![1, 4])),
("c,d,e".to_owned(), Index::from_parts(vec![0..1, 2..3, 4..5], vec![3])),
("".to_owned(), Index::from_parts(vec![], vec![])),
],
},
TestCase {
input: "a\nb\nc\n".to_owned(),
buf_len: 4,
consume: vec![2, 1, 1],
want: vec![
("a\nb\n".to_owned(), Index::from_parts(vec![0..1, 2..3], vec![1, 2])),
("c\n".to_owned(), Index::from_parts(vec![0..1], vec![1])),
("".to_owned(), Index::from_parts(vec![], vec![])),
],
},
TestCase {
input: "a\nb\nc,".to_owned(),
buf_len: 4,
consume: vec![2, 1, 1],
want: vec![
("a\nb\n".to_owned(), Index::from_parts(vec![0..1, 2..3], vec![1, 2])),
("c,".to_owned(), Index::from_parts(vec![0..1, 2..2], vec![2])),
("".to_owned(), Index::from_parts(vec![], vec![])),
],
},
];
for (i, t) in test_cases.into_iter().enumerate() {
println!("test case: {}", i);
let TestCase { input, buf_len, consume, want } = t;
let buf = RollBuf::with_capacity(buf_len, input.as_bytes());
let idx_builder = IndexBuilder::new(b',', b'\n');
let mut parser = Parser::from_parts(buf, idx_builder);
for (i, (c, w)) in consume.iter().zip(&want).enumerate() {
println!("parse: {}", i);
parser.parse().unwrap();
assert_eq!(parser.output(), (w.0.as_bytes(), &w.1));
parser.consume(*c);
}
}
依赖项
~555KB
~10K SLoC