3 个不稳定版本
0.2.0 | 2023年11月21日 |
---|---|
0.1.1 | 2023年10月26日 |
0.1.0 | 2023年10月24日 |
#1371 在 解析器实现
29KB
383 行
此 crate 基于 yap
的接口,允许简单解析流。
原因
已经存在许多 crate,旨在帮助解析。其中 nom
、winnow
、chumsky
、combine
支持解析值流。
nom
:
- 没有明显的方法向解析器指示流的结束。
- 库的用户必须明显不同于非流式解析器来实现流式解析器。
- 解析发生在块上。解析动态大小的块可能需要从头开始重新解析块并重新做工作。
winnow
:
- 解析发生在块上。解析动态大小的块可能需要从头开始重新解析块并重新做工作。
chumsky
不针对速度设计。
combine
复杂。
此 crate 允许通过简单地更改初始标记声明来使用已编写的 yap
解析器。
use std::{
fs::File,
io::{self, BufReader, Read},
};
use yap_streaming::{
// Allows you to use `.into_tokens()` on strings and slices,
// to get an instance of the above:
IntoTokens,
// Allows you to get an instance of `Tokens` that supports streams:
StrStreamTokens,
// This trait has all of the parsing methods on it:
Tokens,
};
// Write parser
// =========================================
#[derive(PartialEq, Debug)]
enum Op {
Plus,
Minus,
Multiply,
}
#[derive(PartialEq, Debug)]
enum OpOrDigit {
Op(Op),
Digit(u32),
}
// The `Tokens` trait builds on `Iterator`, so we get a `next` method.
fn parse_op(t: &mut impl Tokens<Item = char>) -> Option<Op> {
let loc = t.location();
match t.next()? {
'-' => Some(Op::Minus),
'+' => Some(Op::Plus),
'x' => Some(Op::Multiply),
_ => {
t.set_location(loc);
None
}
}
}
// We also get other useful functions..
fn parse_digits(t: &mut impl Tokens<Item = char>) -> Option<u32> {
t.take_while(|c| c.is_digit(10)).parse::<u32, String>().ok()
}
fn parse_all(t: &mut impl Tokens<Item = char>) -> impl Tokens<Item = OpOrDigit> + '_ {
// As well as combinator functions like `sep_by_all` and `surrounded_by`..
t.sep_by_all(
|t| {
t.surrounded_by(
|t| parse_digits(t).map(OpOrDigit::Digit),
|t| {
t.skip_while(|c| c.is_ascii_whitespace());
},
)
},
|t| parse_op(t).map(OpOrDigit::Op),
)
}
// Now we've parsed our input into OpOrDigits, let's calculate the result..
fn eval(t: &mut impl Tokens<Item = char>) -> u32 {
let op_or_digit = parse_all(t);
let mut current_op = Op::Plus;
let mut current_digit = 0;
for d in op_or_digit.into_iter() {
match d {
OpOrDigit::Op(op) => current_op = op,
OpOrDigit::Digit(n) => match current_op {
Op::Plus => current_digit += n,
Op::Minus => current_digit -= n,
Op::Multiply => current_digit *= n,
},
}
}
current_digit
}
// Use parser
// =========================================
// Get our input and convert into something implementing `Tokens`
let mut tokens = "10 + 2 x 12-4,foobar".into_tokens();
// Parse
assert_eq!(eval(&mut tokens), 140);
// Instead of parsing an in-memory buffer we can use `yap_streaming` to parse a stream.
// While we could [`std::io::Read::read_to_end()`] here, what if the file was too large
// to fit in memory? What if we were parsing from a network socket?
let mut io_err = None;
let file_chars = BufReader::new(File::open("examples/opOrDigit.txt").expect("open file"))
.bytes()
.map_while(|x| {
match x {
Ok(x) => {
if x.is_ascii() {
Some(x as char)
} else {
io_err = Some(io::ErrorKind::InvalidData.into());
// Don't parse any further if non-ascii input.
// This simple example parser only makes sense with ascii values.
None
}
}
Err(e) => {
io_err = Some(e);
// Don't parse any further if io error.
// Alternatively could panic, retry the byte,
// or include as an error variant and parse Result<char, ParseError> instead.
None
}
}
});
// Convert to something implementing `Tokens`.
// If parsing a stream not of `char` use [`yap_streaming::StreamTokens`] instead.
let mut tokens = StrStreamTokens::new(file_chars);
// Parse
assert_eq!(eval(&mut tokens), 140);
// Check that parse encountered no io errors.
assert!(io_err.is_none());
依赖项
~115KB