@@ -49,6 +49,7 @@ the latest Rust and on improvinvg the way it uses LLVM.
4949 * [ Mutable variables in Kaleidoscope] ( #mutable-variables-in-kaleidoscope )
5050 * [ Adjusting variables for mutation] ( #adjusting-variables-for-mutation )
5151 * [ Assignmnet operator] ( #assignmnet-operator )
52+ * [ User-defined local variables] ( #user-defined-local-variables )
5253
5354## Chapter 0. Introduction
5455
@@ -4353,3 +4354,206 @@ test(123);
43534354```
43544355
43554356This will first print ` 123 ` and then ` 4 ` showing that our assignment operator really works.
4357+
4358+ ### User-defined local variables
4359+
4360+ Introduction of local variables starts like every change in syntax from the lexer
4361+ (grammar was already defined above):
4362+
4363+ ``` rust
4364+ #[derive(PartialEq , Clone , Debug )]
4365+ pub enum Token {
4366+ Def ,
4367+ Extern ,
4368+ If ,
4369+ Then ,
4370+ Else ,
4371+ For ,
4372+ In ,
4373+ Binary ,
4374+ Unary ,
4375+ Var ,
4376+ Delimiter , // ';' character
4377+ OpeningParenthesis ,
4378+ ClosingParenthesis ,
4379+ Comma ,
4380+ Ident (String ),
4381+ Number (f64 ),
4382+ Operator (String )
4383+ }
4384+
4385+ pub fn tokenize (input : & str ) -> Vec <Token > {
4386+ // regex for commentaries (start with #, end with the line end)
4387+ let comment_re = regex! (r " (?m)#.*\n" );
4388+ // remove commentaries from the input stream
4389+ let preprocessed = comment_re . replace_all (input , " \ n" );
4390+
4391+ let mut result = Vec :: new ();
4392+
4393+ // regex for token, just union of straightforward regexes for different token types
4394+ // operators are parsed the same way as identifier and separated later
4395+ let token_re = regex! (concat! (
4396+ r " (?P<ident>\p{Alphabetic}\w*)|" ,
4397+ r " (?P<number>\d+\.?\d*)|" ,
4398+ r " (?P<delimiter>;)|" ,
4399+ r " (?P<oppar>\()|" ,
4400+ r " (?P<clpar>\))|" ,
4401+ r " (?P<comma>,)|" ,
4402+ r " (?P<operator>\S)" ));
4403+
4404+ for cap in token_re . captures_iter (preprocessed . as_str ()) {
4405+ let token = if cap . name (" ident" ). is_some () {
4406+ match cap . name (" ident" ). unwrap () {
4407+ " def" => Def ,
4408+ " extern" => Extern ,
4409+ " if" => If ,
4410+ " then" => Then ,
4411+ " else" => Else ,
4412+ " for" => For ,
4413+ " in" => In ,
4414+ " binary" => Binary ,
4415+ " unary" => Unary ,
4416+ " var" => Var ,
4417+ ident => Ident (ident . to_string ())
4418+ }
4419+ } else if cap . name (" number" ). is_some () {
4420+ match cap . name (" number" ). unwrap (). parse () {
4421+ Ok (number ) => Number (number ),
4422+ Err (_ ) => panic! (" Lexer failed trying to parse number" )
4423+ }
4424+ } else if cap . name (" delimiter" ). is_some () {
4425+ Delimiter
4426+ } else if cap . name (" oppar" ). is_some () {
4427+ OpeningParenthesis
4428+ } else if cap . name (" clpar" ). is_some () {
4429+ ClosingParenthesis
4430+ } else if cap . name (" comma" ). is_some () {
4431+ Comma
4432+ } else {
4433+ Operator (cap . name (" operator" ). unwrap (). to_string ())
4434+ };
4435+
4436+ result . push (token )
4437+ }
4438+
4439+ result
4440+ }
4441+ ```
4442+
4443+ We just add new keyword ` var ` here.
4444+
4445+ Than we change parser:
4446+
4447+ ``` rust
4448+ #[derive(PartialEq , Clone , Debug )]
4449+ pub enum Expression {
4450+ LiteralExpr (f64 ),
4451+ VariableExpr (String ),
4452+ UnaryExpr (String , Box <Expression >),
4453+ BinaryExpr (String , Box <Expression >, Box <Expression >),
4454+ ConditionalExpr {cond_expr : Box <Expression >, then_expr : Box <Expression >, else_expr : Box <Expression >},
4455+ LoopExpr {var_name : String , start_expr : Box <Expression >, end_expr : Box <Expression >, step_expr : Box <Expression >, body_expr : Box <Expression >},
4456+ VarExpr {vars : Vec <(String , Expression )>, body_expr : Box <Expression >},
4457+ CallExpr (String , Vec <Expression >)
4458+ }
4459+
4460+ fn parse_primary_expr (tokens : & mut Vec <Token >, settings : & mut ParserSettings ) -> PartParsingResult <Expression > {
4461+ match tokens . last () {
4462+ Some (& Ident (_ )) => parse_ident_expr (tokens , settings ),
4463+ Some (& Number (_ )) => parse_literal_expr (tokens , settings ),
4464+ Some (& If ) => parse_conditional_expr (tokens , settings ),
4465+ Some (& For ) => parse_loop_expr (tokens , settings ),
4466+ Some (& Var ) => parse_var_expr (tokens , settings ),
4467+ Some (& Operator (_ )) => parse_unary_expr (tokens , settings ),
4468+ Some (& OpeningParenthesis ) => parse_parenthesis_expr (tokens , settings ),
4469+ None => return NotComplete ,
4470+ _ => error (" unknow token when expecting an expression" )
4471+ }
4472+ }
4473+
4474+ fn parse_var_expr (tokens : & mut Vec <Token >, settings : & mut ParserSettings ) -> PartParsingResult <Expression > {
4475+ tokens . pop ();
4476+ let mut parsed_tokens = vec! [Var ];
4477+ let mut vars = Vec :: new ();
4478+
4479+ loop {
4480+ let var_name = expect_token! (
4481+ [Ident (name ), Ident (name . clone ()), name ] <= tokens ,
4482+ parsed_tokens , " expected identifier list after var" );
4483+
4484+ let init_expr = expect_token! (
4485+ [Operator (op ), Operator (op . clone ()), {
4486+ if op . as_str () != " =" {
4487+ return error (" expected '=' in variable initialization" )
4488+ }
4489+ parse_try! (parse_expr , tokens , settings , parsed_tokens )
4490+ }]
4491+ else {LiteralExpr (0.0 )}
4492+ <= tokens , parsed_tokens );
4493+
4494+ vars . push ((var_name , init_expr ));
4495+
4496+ expect_token! (
4497+ [Comma , Comma , ()]
4498+ else {break }
4499+ <= tokens , parsed_tokens );
4500+ }
4501+
4502+ expect_token! (
4503+ [In , In , ()] <= tokens ,
4504+ parsed_tokens , " expected 'in' after var" );
4505+
4506+ let body_expr = parse_try! (parse_expr , tokens , settings , parsed_tokens );
4507+
4508+ Good (VarExpr {vars : vars , body_expr : box body_expr }, parsed_tokens )
4509+ }
4510+ ```
4511+
4512+ Here we add new AST entry, namely var expression. It consists of the vector of binding/value pairs
4513+ and the body expression. Than we dispatch on ` Var ` literal in the primary expression parsing
4514+ function. In the var expression parsing function we straightforwadly parse list
4515+ of bindings (if no value provided, we set it to 0). Finally we parse body expression.
4516+
4517+ Builder changes follow:
4518+
4519+ ``` rust
4520+ & parser :: VarExpr {ref vars , ref body_expr } => {
4521+ let mut old_bindings = Vec :: new ();
4522+ let function = context . builder. get_insert_block (). get_parent ();
4523+ for var in vars . iter () {
4524+ let (ref name , ref init_expr ) = * var ;
4525+ let (init_value , _ ) = try ! (init_expr . codegen (context , module_provider ));
4526+ let variable = create_entry_block_alloca (context , & function , name );
4527+ context . builder. build_store (init_value , variable );
4528+ old_bindings . push (context . named_values. remove (name ));
4529+ context . named_values. insert (name . clone (), variable );
4530+ }
4531+
4532+ let (body_value , _ ) = try ! (body_expr . codegen (context , module_provider ));
4533+
4534+ let mut old_iter = old_bindings . iter ();
4535+ for var in vars . iter () {
4536+ let (ref name , _ ) = * var ;
4537+ context . named_values. remove (name );
4538+
4539+ match old_iter . next () {
4540+ Some (& Some (value )) => {context . named_values. insert (name . clone (), value );},
4541+ _ => ()
4542+ };
4543+ }
4544+
4545+ Ok ((body_value , false ))
4546+ }
4547+ ```
4548+
4549+ We save old bindings, generate new ones and create allocas for them, insert them into context
4550+ and than generate code for the body expression. At the end we restore old bindings back.
4551+
4552+ That's all we needed to add properly scoped mutable local variables. LLVM allowed us
4553+ to avoid dirty our hands with "iterated dominance frontier" and to have our code concise and easy.
4554+
4555+ [ The full code for this chapter] ( https://github.com/jauhien/iron-kaleidoscope/tree/master/chapters/6 )
4556+ is available. This chapter finishes the main part of the tutorial about writing REPL using LLVM.
4557+
4558+ Next parts will cover different topics (like debug information, different JITs etc.), but the
4559+ main work is done.
0 commit comments