@@ -28,13 +28,28 @@ bool program_load(char *file_name, program_t *program) {
2828}
2929
3030// ----
31- #define TOKEN_MAX_LENGTH 16
31+ #define TOKEN_MAX_LENGTH 32
32+
33+ typedef enum _token_type {
34+ tk_identifier , tk_boolean , tk_number , tk_character , tk_string , tk_single
35+ } token_type ;
3236
3337typedef struct {
3438 char token [TOKEN_MAX_LENGTH ];
3539 int len ;
40+ token_type tp ;
3641} token_t ;
3742
43+ bool token_loop_until_blank (program_t * program , token_t * token ) {
44+ char * code = program -> code ;
45+ int iter = 0 ;
46+ while ( isblank (code [iter ]) == false ) {
47+ if ( program -> iter + iter == TOKEN_MAX_LENGTH ) { return false; }
48+ token -> token [iter ] = code [program -> iter + iter ];
49+ iter ++ ;
50+ }
51+ }
52+
3853bool token_any (program_t * program , token_t * token , char any []) {
3954 char c [] = { program -> code [program -> iter ] } ;
4055 int idx = strspn (c , any );
@@ -49,10 +64,10 @@ bool is_reserved(char _c) {
4964 return strspn (c , reserved );
5065}
5166
52- const char token_character [] = "()'`,." ;
53- bool is_token_chararcter (char _c ) {
67+ const char token_single [] = "()'`,." ;
68+ bool is_token_single (char _c ) {
5469 char c [] = { _c };
55- return strspn (c , token_character );
70+ return strspn (c , token_single );
5671}
5772
5873// comma(,) is also a token_character, with ,@ the same
@@ -84,7 +99,102 @@ bool is_special_subquent(char _c) {
8499 return strspn (c , special_subquent );
85100}
86101
87- // const char peculiar_identifier = "+-..."
102+ bool token_period (program_t * program , token_t * token ) {
103+ char c = program -> code [program -> iter ];
104+ char n = program -> code [program -> iter + 1 ];
105+ char * code = program -> code ;
106+ if ( c == '.' && isblank (n ) ) {
107+ program -> iter ++ , program -> col ++ ;
108+ token -> token [0 ] = '.' , token -> len = 1 ;
109+ return true;
110+ } else if ( strncmp (code , "..." , 3 ) == 0 ) {
111+ program -> iter += 3 , program -> col += 3 ;
112+ strncpy (token -> token , "." , 3 );
113+ token -> len = 3 ;
114+ return true;
115+ }
116+ return false;
117+ }
118+
119+ /* #(, #t, #f, #\<char>, #\<char name>, #i, #e, #b, #o, #d, #x */
120+ bool token_sharp (program_t * program , token_t * token ) {
121+ }
122+
123+ /* ---- character #\<character name>
124+
125+ #\nul Unicode character 0
126+ #\alarm Unicode character 7
127+ #\backspace Unicode character 8
128+ #\tab Unicode character 9
129+ #\newline Unicode character 10 (newline character)
130+ #\linefeed Unicode character 10
131+ #\vtab Unicode character 11
132+ #\page Unicode character 12
133+ #\return Unicode character 13
134+ #\esc Unicode character 27
135+ #\space Unicode character 32 (space character)
136+ #\delete Unicode character 127
137+
138+ -- not supported
139+ #\xhh character encoded in hexadecimal (>= 1 hexadecimal digit)
140+ #\uhhhh character encoded in hexadecimal (exactly 4 hexadecimal digits)
141+ #\Uhhhhhhhh character encoded in hexadecimal (exactly 8 hexadecimal digits)
142+
143+ */
144+
145+ bool token_character (program_t * program , token_t * token ) {
146+ char _1 = program -> code [program -> iter ];
147+ char _2 = program -> code [program -> iter + 1 ];
148+ char _3 = program -> code [program -> iter + 2 ];
149+ char _4 = program -> code [program -> iter + 3 ];
150+ if ( _1 == '#' && _2 == '\\' ) {
151+ if ( _3 > 32 && _3 < 127 && isblank (_4 ) ) {
152+ program -> iter += 2 , program -> col += 2 ;
153+ token -> token [0 ] = _3 , token -> len = 1 , token -> tp = tk_character ;
154+ return true;
155+ }
156+ token_loop_until_blank (program , token );
157+ token -> len = 1 , token -> tp = tk_character ;
158+ return true;
159+ }
160+ return false;
161+ }
162+
163+
164+ /* ---- string escape code
165+
166+ \a Unicode character 7
167+ \b Unicode character 8
168+ \t Unicode character 9
169+ \n Unicode character 10 (newline character)
170+ \v Unicode character 11
171+ \f Unicode character 12
172+ \r Unicode character 13
173+ \" "
174+ \\ \
175+ \| |
176+ \? ?
177+
178+ \ooo character encoded in octal (1 to 3 octal digits, first digit must
179+ be less than 4 when there are 3 octal digits)
180+
181+ \xhh character encoded in hexadecimal (>= 1 hexadecimal digit)
182+ \uhhhh character encoded in hexadecimal (exactly 4 hexadecimal digits)
183+ \Uhhhhhhhh character encoded in hexadecimal (exactly 8 hexadecimal digits)
184+ \<space> Unicode character 32 (space character)
185+
186+ \<newline><whitespace-except-newline>*
187+ This sequence expands to nothing (it is useful for splitting a long
188+ string literal on multiple lines while respecting proper indentation
189+ of the source code)
190+
191+ */
192+
193+ /* only support two kind of escape, \\ and \" */
194+ bool token_string (program_t * program , token_t * token ) {
195+ if ( program -> code [program -> iter ] != '"' ) return false;
196+
197+ }
88198
89199/*
90200bool token_identifier(program_t *program, token_t *token) {
0 commit comments