-
This is my AST, it includes ...
use logos::Span;
#[derive(Debug, Clone)]
pub enum Expr<'a> {
Int(i64, Span),
Float(f64, Span),
String(&'a str, Span),
UnaryOperation(UnaryOperator, Rc<RefCell<Expr<'a>>>, Span),
BinaryOperation(
BinaryOperator,
Rc<RefCell<Expr<'a>>>,
Rc<RefCell<Expr<'a>>>,
Span,
),
}
#[derive(Debug, Clone)]
pub enum UnaryOperator {
Minus(Span),
}
#[derive(Debug, Clone)]
pub enum BinaryOperator {
Add(Span),
Subtract(Span),
Multiply(Span),
Divide(Span),
Modulo(Span),
} This is my custom lexer wrapping logos use logos::{Logos, Span, SpannedIter};
...
pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
#[derive(Debug)]
pub enum LexicalError {
InvalidToken,
}
pub struct Lexer<'input> {
// instead of an iterator over characters, we have a token iterator
token_stream: SpannedIter<'input, Token<'input>>,
}
impl<'input> Lexer<'input> {
pub fn new(input: &'input str) -> Self {
// the Token::lexer() method is provided by the Logos trait
Self {
token_stream: Token::lexer(input).spanned(),
}
}
}
impl<'input> Iterator for Lexer<'input> {
// I tried making the token a tuple but that doesn't work.
type Item = Spanned<(Token<'input>, Span), usize, LexicalError>;
fn next(&mut self) -> Option<Self::Item> {
self.token_stream.next().map(|(token, span)| match token {
Err(()) => Err(LexicalError::InvalidToken),
Ok(token) => Ok((span.start, (token, span), span.end)),
})
}
} This is my grammar ...
use crate::ast::*;
use crate::lexer::*;
use logos::Span;
grammar<'input>(input: &'input str);
pub Expr: Rc<RefCell<Expr<'input>>> = {
"Int" => Rc::new(RefCell::new(Expr::Int(<>.parse().unwrap()))),
"Float" => Rc::new(RefCell::new(Expr::Float(<>.parse().unwrap()))),
"LParen" <Expr> "RParen",
};
extern {
type Location = usize;
type Error = LexicalError;
enum Token<'input> {
"Int" => (Token::Int(<&'input str>), <Span>),
"Float" => (Token::Float(<&'input str>), <Span>)),
"LParen" => (Token::LParen, <Span>),
"RParen" => (Token::RParen, <Span>),
}
} |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
Let me know if that work for you, but here is how it's done in Nickel: The type of token produced by the Logos wrapper is defined here as The However if you don't have custom errors I believe you can just use We abstracted that away in a macro because we have a lot of position information to put in our AST: https://github.com/tweag/nickel/blob/d899cf5848e6ac0958062376696aa39389de580e/core/src/parser/grammar.lalrpop#L79. With that, as long as the type that you produce has a |
Beta Was this translation helpful? Give feedback.
Let me know if that work for you, but here is how it's done in Nickel:
The type of token produced by the Logos wrapper is defined here as
Spanned = (usize, YourToken, usize)
: https://github.com/tweag/nickel/blob/d899cf5848e6ac0958062376696aa39389de580e/core/src/parser/lexer.rs#L476The
Item
type of the Logos wrapper isResult<Spanned, SomeCustomError>
, defined here: https://github.com/tweag/nickel/blob/d899cf5848e6ac0958062376696aa39389de580e/core/src/parser/lexer.rs#L950However if you don't have custom errors I believe you can just use
Item = Spanned
directly and call it a day. Then LALRPOP recognizes the tuple and allows to capture position information directly in the grammar rules wit…