3 个不稳定版本

0.2.0	2023年11月21日
0.1.1	2023年10月26日
0.1.0	2023年10月24日

#1371 在解析器实现

MIT 许可证

29KB
383 行

此 crate 基于 yap 的接口，允许简单解析流。

原因

已经存在许多 crate，旨在帮助解析。其中 nom、winnow、chumsky、combine 支持解析值流。

nom:

没有明显的方法向解析器指示流的结束。
库的用户必须明显不同于非流式解析器来实现流式解析器。
解析发生在块上。解析动态大小的块可能需要从头开始重新解析块并重新做工作。

winnow:

解析发生在块上。解析动态大小的块可能需要从头开始重新解析块并重新做工作。

chumsky 不针对速度设计。

combine 复杂。

此 crate 允许通过简单地更改初始标记声明来使用已编写的 yap 解析器。

use std::{
    fs::File,
    io::{self, BufReader, Read},
};
use yap_streaming::{
    // Allows you to use `.into_tokens()` on strings and slices,
    // to get an instance of the above:
    IntoTokens,
    // Allows you to get an instance of `Tokens` that supports streams:
    StrStreamTokens,
    // This trait has all of the parsing methods on it:
    Tokens,
};

// Write parser
// =========================================

#[derive(PartialEq, Debug)]
enum Op {
    Plus,
    Minus,
    Multiply,
}
#[derive(PartialEq, Debug)]
enum OpOrDigit {
    Op(Op),
    Digit(u32),
}

// The `Tokens` trait builds on `Iterator`, so we get a `next` method.
fn parse_op(t: &mut impl Tokens<Item = char>) -> Option<Op> {
    let loc = t.location();
    match t.next()? {
        '-' => Some(Op::Minus),
        '+' => Some(Op::Plus),
        'x' => Some(Op::Multiply),
        _ => {
            t.set_location(loc);
            None
        }
    }
}

// We also get other useful functions..
fn parse_digits(t: &mut impl Tokens<Item = char>) -> Option<u32> {
    t.take_while(|c| c.is_digit(10)).parse::<u32, String>().ok()
}

fn parse_all(t: &mut impl Tokens<Item = char>) -> impl Tokens<Item = OpOrDigit> + '_ {
    // As well as combinator functions like `sep_by_all` and `surrounded_by`..
    t.sep_by_all(
        |t| {
            t.surrounded_by(
                |t| parse_digits(t).map(OpOrDigit::Digit),
                |t| {
                    t.skip_while(|c| c.is_ascii_whitespace());
                },
            )
        },
        |t| parse_op(t).map(OpOrDigit::Op),
    )
}

// Now we've parsed our input into OpOrDigits, let's calculate the result..
fn eval(t: &mut impl Tokens<Item = char>) -> u32 {
    let op_or_digit = parse_all(t);
    let mut current_op = Op::Plus;
    let mut current_digit = 0;
    for d in op_or_digit.into_iter() {
        match d {
            OpOrDigit::Op(op) => current_op = op,
            OpOrDigit::Digit(n) => match current_op {
                Op::Plus => current_digit += n,
                Op::Minus => current_digit -= n,
                Op::Multiply => current_digit *= n,
            },
        }
    }
    current_digit
}

// Use parser
// =========================================

// Get our input and convert into something implementing `Tokens`
let mut tokens = "10 + 2 x 12-4,foobar".into_tokens();
// Parse
assert_eq!(eval(&mut tokens), 140);

// Instead of parsing an in-memory buffer we can use `yap_streaming` to parse a stream.
// While we could [`std::io::Read::read_to_end()`] here, what if the file was too large
// to fit in memory? What if we were parsing from a network socket?
let mut io_err = None;
let file_chars = BufReader::new(File::open("examples/opOrDigit.txt").expect("open file"))
    .bytes()
    .map_while(|x| {
        match x {
            Ok(x) => {
                if x.is_ascii() {
                    Some(x as char)
                } else {
                    io_err = Some(io::ErrorKind::InvalidData.into());
                    // Don't parse any further if non-ascii input.
                    // This simple example parser only makes sense with ascii values.
                    None
                }
            }
            Err(e) => {
                io_err = Some(e);
                // Don't parse any further if io error.
                // Alternatively could panic, retry the byte,
                // or include as an error variant and parse Result<char, ParseError> instead.
                None
            }
        }
    });

// Convert to something implementing `Tokens`.
// If parsing a stream not of `char` use [`yap_streaming::StreamTokens`] instead.
let mut tokens = StrStreamTokens::new(file_chars);
// Parse
assert_eq!(eval(&mut tokens), 140);
// Check that parse encountered no io errors.
assert!(io_err.is_none());

依赖项

~115KB