Skip to content

Commit 266e589

Browse files
committed
add more
1 parent 51f91fd commit 266e589

File tree

4 files changed

+115
-5
lines changed

4 files changed

+115
-5
lines changed

.sconsign.dblite

0 Bytes
Binary file not shown.

lexer.c

Lines changed: 115 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,28 @@ bool program_load(char *file_name, program_t *program) {
2828
}
2929

3030
// ----
31-
#define TOKEN_MAX_LENGTH 16
31+
#define TOKEN_MAX_LENGTH 32
32+
33+
typedef enum _token_type {
34+
tk_identifier, tk_boolean, tk_number, tk_character, tk_string, tk_single
35+
} token_type;
3236

3337
typedef struct {
3438
char token[TOKEN_MAX_LENGTH];
3539
int len;
40+
token_type tp;
3641
} token_t;
3742

43+
bool token_loop_until_blank(program_t *program, token_t *token) {
44+
char *code = program->code;
45+
int iter = 0;
46+
while ( isblank(code[iter]) == false ) {
47+
if ( program->iter + iter == TOKEN_MAX_LENGTH) { return false; }
48+
token->token[iter] = code[program->iter + iter];
49+
iter++;
50+
}
51+
}
52+
3853
bool token_any(program_t *program, token_t *token, char any[]) {
3954
char c[] = { program->code[program->iter] } ;
4055
int idx = strspn(c, any);
@@ -49,10 +64,10 @@ bool is_reserved(char _c) {
4964
return strspn(c, reserved);
5065
}
5166

52-
const char token_character[] = "()'`,.";
53-
bool is_token_chararcter(char _c) {
67+
const char token_single[] = "()'`,.";
68+
bool is_token_single(char _c) {
5469
char c[] = { _c };
55-
return strspn(c, token_character);
70+
return strspn(c, token_single);
5671
}
5772

5873
// comma(,) is also a token_character, with ,@ the same
@@ -84,7 +99,102 @@ bool is_special_subquent(char _c) {
8499
return strspn(c, special_subquent);
85100
}
86101

87-
// const char peculiar_identifier = "+-..."
102+
bool token_period(program_t *program, token_t *token) {
103+
char c = program->code[program->iter];
104+
char n = program->code[program->iter + 1];
105+
char *code = program->code;
106+
if( c == '.' && isblank(n) ) {
107+
program->iter++, program->col++;
108+
token->token[0] = '.', token->len = 1;
109+
return true;
110+
} else if ( strncmp(code, "...", 3) == 0 ) {
111+
program->iter += 3, program->col += 3;
112+
strncpy(token->token, ".", 3);
113+
token->len = 3;
114+
return true;
115+
}
116+
return false;
117+
}
118+
119+
/* #(, #t, #f, #\<char>, #\<char name>, #i, #e, #b, #o, #d, #x */
120+
bool token_sharp(program_t *program, token_t *token) {
121+
}
122+
123+
/* ---- character #\<character name>
124+
125+
#\nul Unicode character 0
126+
#\alarm Unicode character 7
127+
#\backspace Unicode character 8
128+
#\tab Unicode character 9
129+
#\newline Unicode character 10 (newline character)
130+
#\linefeed Unicode character 10
131+
#\vtab Unicode character 11
132+
#\page Unicode character 12
133+
#\return Unicode character 13
134+
#\esc Unicode character 27
135+
#\space Unicode character 32 (space character)
136+
#\delete Unicode character 127
137+
138+
-- not supported
139+
#\xhh character encoded in hexadecimal (>= 1 hexadecimal digit)
140+
#\uhhhh character encoded in hexadecimal (exactly 4 hexadecimal digits)
141+
#\Uhhhhhhhh character encoded in hexadecimal (exactly 8 hexadecimal digits)
142+
143+
*/
144+
145+
bool token_character(program_t *program, token_t *token) {
146+
char _1 = program->code[program->iter];
147+
char _2 = program->code[program->iter + 1];
148+
char _3 = program->code[program->iter + 2];
149+
char _4 = program->code[program->iter + 3];
150+
if ( _1 == '#' && _2 == '\\' ) {
151+
if( _3 > 32 && _3 < 127 && isblank(_4) ) {
152+
program->iter += 2, program->col += 2;
153+
token->token[0] = _3, token->len = 1, token->tp = tk_character;
154+
return true;
155+
}
156+
token_loop_until_blank(program, token);
157+
token->len = 1, token->tp = tk_character;
158+
return true;
159+
}
160+
return false;
161+
}
162+
163+
164+
/* ---- string escape code
165+
166+
\a Unicode character 7
167+
\b Unicode character 8
168+
\t Unicode character 9
169+
\n Unicode character 10 (newline character)
170+
\v Unicode character 11
171+
\f Unicode character 12
172+
\r Unicode character 13
173+
\" "
174+
\\ \
175+
\| |
176+
\? ?
177+
178+
\ooo character encoded in octal (1 to 3 octal digits, first digit must
179+
be less than 4 when there are 3 octal digits)
180+
181+
\xhh character encoded in hexadecimal (>= 1 hexadecimal digit)
182+
\uhhhh character encoded in hexadecimal (exactly 4 hexadecimal digits)
183+
\Uhhhhhhhh character encoded in hexadecimal (exactly 8 hexadecimal digits)
184+
\<space> Unicode character 32 (space character)
185+
186+
\<newline><whitespace-except-newline>*
187+
This sequence expands to nothing (it is useful for splitting a long
188+
string literal on multiple lines while respecting proper indentation
189+
of the source code)
190+
191+
*/
192+
193+
/* only support two kind of escape, \\ and \" */
194+
bool token_string(program_t *program, token_t *token) {
195+
if ( program->code[program->iter] != '"' ) return false;
196+
197+
}
88198

89199
/*
90200
bool token_identifier(program_t *program, token_t *token) {

lexer.o

961 Bytes
Binary file not shown.

word.exe

1.13 KB
Binary file not shown.

0 commit comments

Comments
 (0)