17 个版本
使用旧的 Rust 2015
0.1.18 | 2021 年 8 月 19 日 |
---|---|
0.1.16 | 2020 年 1 月 16 日 |
0.1.15 | 2018 年 4 月 19 日 |
0.1.9 | 2018 年 1 月 9 日 |
0.1.1 | 2017 年 10 月 20 日 |
#89 在 解析器工具
44 每月下载量
在 2 crates 中使用
27KB
740 行
lexer
基于插件的词法分析器
extern crate lexer;
use std::fmt::{self, Write};
use lexer::{Input, Reader, ReaderResult, Readers, ReadersBuilder, State, TokenMeta};
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub enum TokenValue {
Number(isize),
String(String),
Keyword(String),
Identifier(String),
List(Vec<Token>),
}
impl fmt::Display for TokenValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
&TokenValue::Number(ref n) => write!(f, "{}", n),
&TokenValue::String(ref s) => write!(f, "{:?}", s),
&TokenValue::Keyword(ref s) => write!(f, ":{}", s),
&TokenValue::Identifier(ref s) => write!(f, "{}", s),
&TokenValue::List(ref list) => {
f.write_char('(')?;
let mut index = 0;
for token in list {
write!(f, "{}", token.value())?;
index += 1;
if index < list.len() {
f.write_str(", ")?;
}
}
f.write_char(')')
}
}
}
}
pub type Token = lexer::Token<TokenValue>;
pub type TokenError = lexer::TokenError<&'static str>;
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct WhitespaceReader;
impl Reader<Token, TokenError> for WhitespaceReader {
fn read(
&self,
_: &Readers<Token, TokenError>,
input: &mut dyn Input,
_: &State,
next: &mut State,
) -> ReaderResult<Token, TokenError> {
match input.read(next) {
Some(ch) => {
if is_whitespace(ch) {
while let Some(ch) = input.peek(next, 0) {
if is_whitespace(ch) {
input.read(next);
} else {
break;
}
}
ReaderResult::Empty
} else {
ReaderResult::None
}
}
None => ReaderResult::None,
}
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct NumberReader;
impl Reader<Token, TokenError> for NumberReader {
fn read(
&self,
_: &Readers<Token, TokenError>,
input: &mut dyn Input,
current: &State,
next: &mut State,
) -> ReaderResult<Token, TokenError> {
match input.read(next) {
Some(ch) => {
if ch.is_numeric() || ch == '-' {
let mut string = String::new();
string.push(ch);
while let Some(ch) = input.peek(next, 0) {
if ch.is_numeric() || ch == '_' {
input.read(next);
string.push(ch);
} else {
break;
}
}
ReaderResult::Some(Token::new(
TokenMeta::new_state_meta(current, next),
TokenValue::Number(string.parse().unwrap()),
))
} else {
ReaderResult::None
}
}
None => ReaderResult::None,
}
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct StringReader;
impl Reader<Token, TokenError> for StringReader {
fn read(
&self,
_: &Readers<Token, TokenError>,
input: &mut dyn Input,
current: &State,
next: &mut State,
) -> ReaderResult<Token, TokenError> {
match input.read(next) {
Some(ch) => {
if ch == '"' {
let mut string = String::new();
while let Some(ch) = input.read(next) {
if ch == '"' {
break;
} else {
string.push(ch);
}
}
ReaderResult::Some(Token::new(
TokenMeta::new_state_meta(current, next),
TokenValue::String(string),
))
} else {
ReaderResult::None
}
}
None => ReaderResult::None,
}
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct KeywordReader;
impl Reader<Token, TokenError> for KeywordReader {
fn read(
&self,
_: &Readers<Token, TokenError>,
input: &mut dyn Input,
current: &State,
next: &mut State,
) -> ReaderResult<Token, TokenError> {
match input.read(next) {
Some(ch) => {
if ch == ':' {
let mut string = String::new();
while let Some(ch) = input.peek(next, 0) {
if is_closer(ch) || is_whitespace(ch) {
break;
} else {
input.read(next);
string.push(ch);
}
}
ReaderResult::Some(Token::new(
TokenMeta::new_state_meta(current, next),
TokenValue::Keyword(string),
))
} else {
ReaderResult::None
}
}
None => ReaderResult::None,
}
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct ListReader;
impl Reader<Token, TokenError> for ListReader {
fn read(
&self,
readers: &Readers<Token, TokenError>,
input: &mut dyn Input,
current: &State,
next: &mut State,
) -> ReaderResult<Token, TokenError> {
match input.read(next) {
Some(ch) => {
if ch == '(' {
let mut list = Vec::new();
while let Some(ch) = input.peek(next, 0) {
if ch == ')' {
input.read(next);
break;
} else {
match lexer::read(readers, input, next) {
Some(Ok(token)) => {
list.push(token);
}
Some(Err(error)) => {
return ReaderResult::Err(error);
}
_ => {
break;
}
}
}
}
ReaderResult::Some(Token::new(
TokenMeta::new_state_meta(current, next),
TokenValue::List(list),
))
} else {
ReaderResult::None
}
}
None => ReaderResult::None,
}
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct IdentifierReader;
impl Reader<Token, TokenError> for IdentifierReader {
fn read(
&self,
_: &Readers<Token, TokenError>,
input: &mut dyn Input,
current: &State,
next: &mut State,
) -> ReaderResult<Token, TokenError> {
match input.read(next) {
Some(ch) => {
let mut string = String::new();
string.push(ch);
while let Some(ch) = input.peek(next, 0) {
if is_closer(ch) || is_whitespace(ch) {
break;
} else {
input.read(next);
string.push(ch);
}
}
ReaderResult::Some(Token::new(
TokenMeta::new_state_meta(current, next),
TokenValue::Identifier(string),
))
}
None => ReaderResult::None,
}
}
}
#[inline]
fn is_whitespace(ch: char) -> bool {
ch.is_whitespace() || ch == ','
}
fn is_closer(ch: char) -> bool {
ch == ')'
}
pub fn readers() -> lexer::Readers<Token, TokenError> {
ReadersBuilder::new()
.add(WhitespaceReader)
.add(NumberReader)
.add(StringReader)
.add(KeywordReader)
.add(ListReader)
.add(IdentifierReader)
.build()
}
fn main() {
let readers = readers();
let string = "(def-fn hello () (println :Hello, \"World!\"))";
let tokens = readers.read(string.chars());
let tokens: Vec<Token> = tokens.map(Result::unwrap).collect();
assert_eq!(tokens.len(), 1);
if let Some(&TokenValue::List(ref tokens)) = tokens.get(0).map(Token::value) {
let first = tokens.first().unwrap();
assert_eq!(first.meta().col_start(), 1);
assert_eq!(first.meta().col_end(), 7);
assert_eq!(first.meta().col_count(), 6);
assert_eq!(first.meta().line_start(), 1);
assert_eq!(first.meta().line_end(), 1);
assert_eq!(first.meta().line_count(), 0);
assert_eq!(first.meta().len(), 6);
}
}
依赖
~0.5–1.1MB
~26K SLoC