-
Notifications
You must be signed in to change notification settings - Fork 89
Expand file tree
/
Copy pathlexer.rs
More file actions
117 lines (110 loc) · 2.94 KB
/
lexer.rs
File metadata and controls
117 lines (110 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
//< ch-0 ch-4 ch-5 ch-6 lexer-tokens-use
pub use self::Token::{
Def,
Extern,
//> ch-0 lexer-tokens-use
If,
Then,
Else,
For,
In,
//> ch-4
Binary,
Unary,
//> ch-5
Var,
//< ch-0 ch-4 ch-5 lexer-tokens-use
Delimiter,
OpeningParenthesis,
ClosingParenthesis,
Comma,
Ident,
Number,
Operator
};
//> lexer-tokens-use
//< lexer-tokens if-lexer for-lexer mutable-var-lexer
#[derive(PartialEq, Clone, Debug)]
pub enum Token {
Def,
Extern,
//> ch-0 lexer-tokens
If,
Then,
Else,
//> if-lexer
For,
In,
//> ch-4 for-lexer
Binary,
Unary,
//> ch-5
Var,
//< ch-0 ch-4 ch-5 lexer-tokens if-lexer for-lexer
Delimiter, //';' character
OpeningParenthesis,
ClosingParenthesis,
Comma,
Ident(String),
Number(f64),
Operator(String)
}
//> lexer-tokens
//< lexer-tokenize
pub fn tokenize(input: &str) -> Vec<Token> {
// regex for commentaries (start with #, end with the line end)
let comment_re = regex!(r"(?m)#.*\n");
// remove commentaries from the input stream
let preprocessed = comment_re.replace_all(input, "\n");
let mut result = Vec::new();
// regex for token, just union of straightforward regexes for different token types
// operators are parsed the same way as identifier and separated later
let token_re = regex!(concat!(
r"(?P<ident>\p{Alphabetic}\w*)|",
r"(?P<number>\d+\.?\d*)|",
r"(?P<delimiter>;)|",
r"(?P<oppar>\()|",
r"(?P<clpar>\))|",
r"(?P<comma>,)|",
r"(?P<operator>\S)"));
for cap in token_re.captures_iter(preprocessed.as_str()) {
let token = if cap.name("ident").is_some() {
match cap.name("ident").unwrap() {
"def" => Def,
"extern" => Extern,
//> ch-0 lexer-tokenize
"if" => If,
"then" => Then,
"else" => Else,
//> if-lexer
"for" => For,
"in" => In,
//> ch-4 for-lexer
"binary" => Binary,
"unary" => Unary,
//> ch-5
"var" => Var,
//< ch-0 ch-4 ch-5 lexer-tokenize if-lexer for-lexer
ident => Ident(ident.to_string())
}
} else if cap.name("number").is_some() {
match cap.name("number").unwrap().parse() {
Ok(number) => Number(number),
Err(_) => panic!("Lexer failed trying to parse number")
}
} else if cap.name("delimiter").is_some() {
Delimiter
} else if cap.name("oppar").is_some() {
OpeningParenthesis
} else if cap.name("clpar").is_some() {
ClosingParenthesis
} else if cap.name("comma").is_some() {
Comma
} else {
Operator(cap.name("operator").unwrap().to_string())
};
result.push(token)
}
result
}
//> ch-0 ch-4 ch-5 ch-6 lexer-tokenize if-lexer for-lexer mutable-var-lexer