diff options
author | Alexis Lockwood | 2021-07-04 16:22:45 -0400 |
---|---|---|
committer | Alexis Lockwood | 2021-07-04 16:23:38 -0400 |
commit | f94009bdb8c04d16d2ad9d0a03ed693e2fc5ee93 (patch) | |
tree | 13ba11a2346b09c514154658761b9fda2f68f19f | |
parent | 079f5e05706fecaa580dc880fbc1f194d4d4f120 (diff) |
Style improvements
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | lib/ls.c | 6 | ||||
-rw-r--r-- | lib/ls_goto.c | 15 | ||||
-rw-r--r-- | lib/ls_internal.c | 9 | ||||
-rw-r--r-- | lib/ls_internal.h | 16 | ||||
-rw-r--r-- | lib/ls_kw_impl.c | 1 | ||||
-rw-r--r-- | lib/ls_kw_impl_GOSUB_RETURN.c | 64 | ||||
-rw-r--r-- | lib/ls_lex.c | 353 |
8 files changed, 262 insertions, 208 deletions
@@ -122,6 +122,12 @@ The following things use pool entries: `size_t`, that wouldn't have any effect on 8-bit anyway, though it'd make the pool more wasteful on 32 bit. +## Coding style + +I'm trying to follow a fairly consistent coding style now, please see +[my style guide](https://git.alexisvl.rocks/avlstyle/plain/avlstyle.7.html?h=trunk) +for that. + # THE SCRIPTING LANGUAGE ## Positions @@ -75,8 +75,7 @@ ls_print_value(FILE * stream, ls_value_t * value, ls_value_t * first) memcpy(buf, value->body.str.value, 4); buf[4] = 0; fprintf(stream, - "[str ] len = %"PRIu16", " - "chunk = <"POOL_F">, val = %s", + "[str ] len = %"PRIu16", chunk = <"POOL_F">, val = %s", value->body.str.length, TO_POOL_N(value->body.str.chunk), buf @@ -84,8 +83,7 @@ ls_print_value(FILE * stream, ls_value_t * value, ls_value_t * first) break; case LS_TY_LIST: fprintf(stream, - "[list ] len = %"PRIu16", first = <"POOL_F">, " - "memo = %"PRIu16" @ <"POOL_F">", + "[list ] len = %"PRIu16", first = <"POOL_F">, memo = %"PRIu16" @ <"POOL_F">", value->body.list.length, TO_POOL_N(value->body.list.first), value->body.list.memo_i, diff --git a/lib/ls_goto.c b/lib/ls_goto.c index 1259ea5..7a24ae5 100644 --- a/lib/ls_goto.c +++ b/lib/ls_goto.c @@ -90,15 +90,14 @@ ls_goto_num(ls_t * self, bool backward, uint16_t num) } if (target == LS_ADDR_NULL) - { + goto throw_undef; + + self->_pc = target; + return; + throw_undef: - self->_pc = pc_saved; - ls_throw_err(self, LS_UNDEFINED_LABEL); - } - else - { - self->_pc = target; - } + self->_pc = pc_saved; + ls_throw_err(self, LS_UNDEFINED_LABEL); } void diff --git a/lib/ls_internal.c b/lib/ls_internal.c index 2e77e2a..9749a11 100644 --- a/lib/ls_internal.c +++ b/lib/ls_internal.c @@ -99,13 +99,14 @@ ls_convert_kw(char const * word) // TODO PROGMEM if (!strcasecmp(kwdef->name, word)) return (ls_kw_t) n_kw + LS_KW_OFFSET; - } while (!LS_HASHMAP_ENTRY_IS_SENTINEL(ls_kw_hashmap[i++])); + } + while (!LS_HASHMAP_ENTRY_IS_SENTINEL(ls_kw_hashmap[i++])); return LS_NOT_A_KW; } void -ls_throw_err(ls_t * self, ls_error_t e) +ls_throw_err_f(ls_t * self, ls_error_t e) { if (!e) e = LS_INTERNAL_ERROR; @@ -132,7 +133,6 @@ ls_fetch_at(ls_t * self, ls_addr_t pc) ls_throw_err(self, LS_INTERNAL_ERROR); else ls_throw_err(self, (ls_error_t) -rc); - return 0; } ls_value_t * @@ -263,9 +263,6 @@ ls_read_int_var(ls_t * self, ls_value_t * var) return var->body.int_var.value; else ls_throw_err(self, LS_TYPE_MISMATCH); - - // Unreachable - return 0; } void diff --git a/lib/ls_internal.h b/lib/ls_internal.h index 3630db7..d9c5086 100644 --- a/lib/ls_internal.h +++ b/lib/ls_internal.h @@ -60,6 +60,18 @@ #define LS_TRY(e) \ do { ls_error_t __err = (e); if (__err) return __err; } while (0) +/// Throw an error. In normal builds, this just calls through to ls_throw_err_f. +/// In debug builds it reports the file and line as well. +#ifdef NDEBUG +#define ls_throw_err ls_throw_err_f +#else +#define ls_throw_err(self, e) do { \ + fprintf(stderr, ">throwing %d from %s:%d\n", (e), __FILE__, \ + __LINE__); \ + ls_throw_err_f((self), (e)); \ + } while (0) +#endif + #ifdef __GNUC__ #define ls_noreturn __attribute__((noreturn)) #else @@ -126,7 +138,9 @@ ls_kw_t ls_convert_kw(char const * word); /// Throw an error, to be caught by the setjmp in the interpreter. If the error /// is LS_OK, no throw occurs. -ls_noreturn void ls_throw_err(ls_t * self, ls_error_t e); +/// +/// This function is not used directly; it is wrapped by macro ls_throw_err(). +ls_noreturn void ls_throw_err_f(ls_t * self, ls_error_t e); /// Fetch the current character. Throws if the fetcher returns an error other /// than LS_NO_PROGRAM. If there is no character, returns nul. diff --git a/lib/ls_kw_impl.c b/lib/ls_kw_impl.c index 4c3c343..5e537b1 100644 --- a/lib/ls_kw_impl.c +++ b/lib/ls_kw_impl.c @@ -66,7 +66,6 @@ ls_kw_fun_IF(ls_t * self) if (cond.body.integer.value == 0) { ls_consume_to_eol(self); - return; } else { diff --git a/lib/ls_kw_impl_GOSUB_RETURN.c b/lib/ls_kw_impl_GOSUB_RETURN.c index 6a729d8..3ef04b4 100644 --- a/lib/ls_kw_impl_GOSUB_RETURN.c +++ b/lib/ls_kw_impl_GOSUB_RETURN.c @@ -20,6 +20,12 @@ // --- PRIVATE DATATYPES ------------------------------------------------------- // --- PRIVATE CONSTANTS ------------------------------------------------------- // --- PRIVATE FUNCTION PROTOTYPES --------------------------------------------- + +/// Parse function argumens into the newly created scope. +/// @pre opening LS_OP_LPAREN has already been lexed +/// @pre self->_callstack points to the new scope +static void _parse_args(ls_t * self); + // --- PUBLIC VARIABLES -------------------------------------------------------- // --- PRIVATE VARIABLES ------------------------------------------------------- // --- PUBLIC FUNCTIONS -------------------------------------------------------- @@ -52,31 +58,7 @@ ls_kw_fun_GOSUB(ls_t * self) tok = ls_lex(self); if (tok == LS_OP_LPAREN) { - while (tok != LS_OP_RPAREN) - { - tok = ls_lex(self); - - if (tok == LS_TOK_WORD) - { - ls_value_t * var = ls_new_var(self, - self->_token.word); - - tok = ls_lex(self); - if (tok != LS_OP_EQ) - ls_throw_err(self, LS_SYNTAX_ERROR); - - // Evaluate the expression in the prev scope - // We had to create the variable already to - // avoid losing the buffer content - self->_callstack = self->_callstack->prev; - ls_value_t val; - ls_eval_expr(self, &val, LS_TOK_NONE); - ls_write_var(self, var, &val); - self->_callstack = frame; - } - else if (tok != LS_OP_RPAREN && tok != LS_TOK_COMMA) - ls_throw_err(self, LS_SYNTAX_ERROR); - } + _parse_args(self); tok = ls_lex(self); } @@ -134,3 +116,35 @@ ls_kw_fun_RETURN(ls_t * self) } // --- PRIVATE FUNCTION DEFINITIONS -------------------------------------------- + +static void _parse_args(ls_t * self) +{ + ls_token_t tok; + ls_value_t * frame = self->_callstack; + + while (tok != LS_OP_RPAREN) + { + tok = ls_lex(self); + + if (tok == LS_TOK_WORD) + { + ls_value_t * var = ls_new_var(self, + self->_token.word); + + tok = ls_lex(self); + if (tok != LS_OP_EQ) + ls_throw_err(self, LS_SYNTAX_ERROR); + + // Evaluate the expression in the prev scope + // We had to create the variable already to + // avoid losing the buffer content + self->_callstack = self->_callstack->prev; + ls_value_t val; + ls_eval_expr(self, &val, LS_TOK_NONE); + ls_write_var(self, var, &val); + self->_callstack = frame; + } + else if (tok != LS_OP_RPAREN && tok != LS_TOK_COMMA) + ls_throw_err(self, LS_SYNTAX_ERROR); + } +} diff --git a/lib/ls_lex.c b/lib/ls_lex.c index 89571b3..3e866ec 100644 --- a/lib/ls_lex.c +++ b/lib/ls_lex.c @@ -38,9 +38,106 @@ typedef enum { } ch_kind_t; // --- PRIVATE CONSTANTS ------------------------------------------------------- + +// The LUT encodes ASCII values 0x20 to 0x7E. Anything below 0x20 is treated as +// whitespace (with \n specifically as a linesep), anything above 0x7F is an +// abbreviated keyword, and 0x7F (delete char) is also treated as whitespace. +// +// The LUT encoding is rather packed but the code to unpack it is quite a bit +// smaller than the actual excess LUT space would be without packing, on small +// architectures. +// +// First packing: +// Values 0x20 to 0x5F are found at position (value - 0x20)/2, with the +// even-numbered entry in the low nibble and the odd-numbered in the high +// nibble. +// +// Second packing: +// Because 0x40...0x5F and 0x60...0x7F all have the same character kind except +// 0x7F, the second range is folded onto the first. Values 0x60 to 0x7F are +// found at position (value - 0x40)/2, overlapping value-0x20. +// +// Yeah I kinda did that for fun. Sorry it's dumb + +#define K(a, b) ((uint8_t)(a) | ((uint8_t)(b) << 4)) +static const uint8_t _ch_kind_lut[32] = { + K(CH_KIND_SPACE, CH_KIND_OPER), // space ! + K(CH_KIND_STR, CH_KIND_SIGIL), // " # + K(CH_KIND_SIGIL, CH_KIND_SIGIL), // $ % + K(CH_KIND_DIGIT, CH_KIND_REM), // & ' + K(CH_KIND_OPER, CH_KIND_OPER), // ( ) + K(CH_KIND_OPER, CH_KIND_OPER), // * + + K(CH_KIND_COMMA, CH_KIND_OPER), // , - + K(CH_KIND_DIGIT, CH_KIND_OPER), // . / + K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 0 1 + K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 2 3 + K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 4 5 + K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 6 7 + K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 8 9 + K(CH_KIND_LABEL, CH_KIND_SEP), // : ; + K(CH_KIND_OPER, CH_KIND_OPER), // < = + K(CH_KIND_OPER, CH_KIND_OPER), // > ? + + K(CH_KIND_OPER, CH_KIND_LETTER), // @` Aa + K(CH_KIND_LETTER, CH_KIND_LETTER), // Bb Cc + K(CH_KIND_LETTER, CH_KIND_LETTER), // Dd Ee + K(CH_KIND_LETTER, CH_KIND_LETTER), // Ff Gg + K(CH_KIND_LETTER, CH_KIND_LETTER), // Hh Ii + K(CH_KIND_LETTER, CH_KIND_LETTER), // Jj Kk + K(CH_KIND_LETTER, CH_KIND_LETTER), // Ll Mm + K(CH_KIND_LETTER, CH_KIND_LETTER), // Nn Oo + K(CH_KIND_LETTER, CH_KIND_LETTER), // Pp Qq + K(CH_KIND_LETTER, CH_KIND_LETTER), // Rr Ss + K(CH_KIND_LETTER, CH_KIND_LETTER), // Tt Uu + K(CH_KIND_LETTER, CH_KIND_LETTER), // Vv Ww + K(CH_KIND_LETTER, CH_KIND_LETTER), // Xx Yy + K(CH_KIND_LETTER, CH_KIND_OPER), // Zz [{ + K(CH_KIND_OPER, CH_KIND_OPER), // \| ]} + K(CH_KIND_OPER , CH_KIND_LETTER), // ^~ _del +}; +#undef K + +static const ls_uchar _ops[][2] = { + [LS_OP_LEQ] = "<=", + [LS_OP_GEQ] = ">=", + [LS_OP_NEQ] = "<>", + [LS_OP_LPAREN] = "(", + [LS_OP_RPAREN] = ")", + [LS_OP_MOD] = "%", + [LS_OP_MUL] = "*", + [LS_OP_ADD] = "+", + [LS_OP_SUB] = "-", + [LS_OP_DIV] = "/", + [LS_OP_POW] = "^", + [LS_OP_LT] = "<", + [LS_OP_EQ] = "=", + [LS_OP_GT] = ">", +}; + + // --- PRIVATE FUNCTION PROTOTYPES --------------------------------------------- -static ch_kind_t _ident_ch_kind(unsigned char ch); +/// Identify what kind of character ch is. Handles all values of ls_uchar. +static ch_kind_t _ident_ch_kind(ls_uchar ch); + +/// Advance through whitespace and comments to the beginning of the next +/// interesting token. Returns the character it stopped on. +static ls_uchar _advance_to_token(ls_t * self); + +/// Record a numeric label into the label cache. The label is set to point at +/// the current pc. +/// +/// @param val - numeric label value +/// TODO this should go in a new source file +static void _record_num_label(ls_t * self, ls_addr_t val); + +/// Record a string label into the label cache. The label is set to point at +/// the current pc. +/// +/// @param word - string value +/// TODO this should go in a new source file +static void _record_str_label(ls_t * self, char const * word); + static ls_token_t _lex_oper(ls_t * self, ls_uchar ch[2]); static ls_token_t _lex_num(ls_t * self, ls_uchar ch[2]); static ls_token_t _lex_word(ls_t * self, ls_uchar ch[2]); @@ -54,70 +151,47 @@ static ls_token_t _lex_kw(ls_t * self, ls_uchar ch[2]); ls_token_t ls_lex(ls_t * self) { - unsigned char ch[2]; - ch_kind_t ch_kind; - bool skip_rem = false; - - do { - ch[0] = ls_fetch(self); - ch_kind = _ident_ch_kind(ch[0]); - self->_pc++; - - if (ch_kind == CH_KIND_REM) - skip_rem = true; - else if (ch_kind == CH_KIND_LINESEP || ch_kind == CH_KIND_END) - skip_rem = false; - } while (ch_kind == CH_KIND_SPACE || skip_rem); - + ls_uchar ch[2]; + ch[0] = _advance_to_token(self); ch[1] = ls_fetch(self); - switch (ch_kind) + switch (_ident_ch_kind(ch[0])) { case CH_KIND_OPER: return _lex_oper(self, ch); - break; case CH_KIND_DIGIT: return _lex_num(self, ch); - break; case CH_KIND_LETTER: return _lex_word(self, ch); - break; case CH_KIND_STR: return _lex_str(self, ch); - break; case CH_KIND_SIGIL: ls_throw_err(self, LS_SYNTAX_ERROR); - break; case CH_KIND_COMMA: return LS_TOK_COMMA; - break; case CH_KIND_LINESEP: case CH_KIND_SEP: return LS_TOK_STATEMENT_SEP; - break; case CH_KIND_LABEL: // These are actually parsed inside digit/word parsers ls_throw_err(self, LS_SYNTAX_ERROR); - break; case CH_KIND_KW: return _lex_kw(self, ch); - break; case CH_KIND_END: return LS_TOK_NONE; - break; + case CH_KIND_INVALID: default: ls_throw_err(self, LS_SYNTAX_ERROR); - break; } } @@ -137,68 +211,8 @@ ls_consume_to_eol(ls_t * self) // --- PRIVATE FUNCTION DEFINITIONS -------------------------------------------- static ch_kind_t -_ident_ch_kind(unsigned char ch) +_ident_ch_kind(ls_uchar ch) { - // The LUT encodes ASCII values 0x20 to 0x7E. Anything below - // 0x20 is treated as whitespace (with \n specifically as a linesep), - // anything above 0x7F is an abbreviated keyword, and 0x7F (delete - // char) is also treated as whitespace. - // - // The LUT encoding is rather packed but the code to unpack it is - // quite a bit smaller than the actual excess LUT space would be - // without packing, on small architectures. - // - // First packing: - // Values 0x20 to 0x5F are found at position (value - 0x20)/2, with - // the even-numbered entry in the low nibble and the odd-numbered in - // the high nibble. - // - // Second packing: - // Because 0x40...0x5F and 0x60...0x7F all have the same character kind - // except 0x7F, the second range is folded onto the first. Values - // 0x60 to 0x7F are found at position (value - 0x40)/2, overlapping - // value-0x20. - // - // Yeah I kinda did that for fun. Sorry it's dumb - -#define K(a, b) ((uint8_t)(a) | ((uint8_t)(b) << 4)) - static const uint8_t ch_kind_lut[32] = { - K(CH_KIND_SPACE, CH_KIND_OPER), // space ! - K(CH_KIND_STR, CH_KIND_SIGIL), // " # - K(CH_KIND_SIGIL, CH_KIND_SIGIL), // $ % - K(CH_KIND_DIGIT, CH_KIND_REM), // & ' - K(CH_KIND_OPER, CH_KIND_OPER), // ( ) - K(CH_KIND_OPER, CH_KIND_OPER), // * + - K(CH_KIND_COMMA, CH_KIND_OPER), // , - - K(CH_KIND_DIGIT, CH_KIND_OPER), // . / - K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 0 1 - K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 2 3 - K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 4 5 - K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 6 7 - K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 8 9 - K(CH_KIND_LABEL, CH_KIND_SEP), // : ; - K(CH_KIND_OPER, CH_KIND_OPER), // < = - K(CH_KIND_OPER, CH_KIND_OPER), // > ? - - K(CH_KIND_OPER, CH_KIND_LETTER), // @` Aa - K(CH_KIND_LETTER, CH_KIND_LETTER), // Bb Cc - K(CH_KIND_LETTER, CH_KIND_LETTER), // Dd Ee - K(CH_KIND_LETTER, CH_KIND_LETTER), // Ff Gg - K(CH_KIND_LETTER, CH_KIND_LETTER), // Hh Ii - K(CH_KIND_LETTER, CH_KIND_LETTER), // Jj Kk - K(CH_KIND_LETTER, CH_KIND_LETTER), // Ll Mm - K(CH_KIND_LETTER, CH_KIND_LETTER), // Nn Oo - K(CH_KIND_LETTER, CH_KIND_LETTER), // Pp Qq - K(CH_KIND_LETTER, CH_KIND_LETTER), // Rr Ss - K(CH_KIND_LETTER, CH_KIND_LETTER), // Tt Uu - K(CH_KIND_LETTER, CH_KIND_LETTER), // Vv Ww - K(CH_KIND_LETTER, CH_KIND_LETTER), // Xx Yy - K(CH_KIND_LETTER, CH_KIND_OPER), // Zz [{ - K(CH_KIND_OPER, CH_KIND_OPER), // \| ]} - K(CH_KIND_OPER , CH_KIND_LETTER), // ^~ _del - }; -#undef K - if (ch >= 0x80) return CH_KIND_KW; else if (ch == 0x7F) @@ -209,7 +223,7 @@ _ident_ch_kind(unsigned char ch) ch -= 0x20; ch -= 0x20; - uint8_t k = ch_kind_lut[ch / 2]; + uint8_t k = _ch_kind_lut[ch / 2]; if (ch & 1) // this is one instruction on avr k = k << 4 | k >> 4; @@ -223,39 +237,89 @@ _ident_ch_kind(unsigned char ch) return CH_KIND_SPACE; } -static ls_token_t -_lex_oper(ls_t * self, ls_uchar ch[2]) +static ls_uchar +_advance_to_token(ls_t * self) +{ + ls_uchar ch; + ch_kind_t ch_kind; + bool skip_rem = false; + + do { + ch = ls_fetch(self); + ch_kind = _ident_ch_kind(ch); + self->_pc++; + + if (ch_kind == CH_KIND_REM) + skip_rem = true; + else if (ch_kind == CH_KIND_LINESEP || ch_kind == CH_KIND_END) + skip_rem = false; + } + while (ch_kind == CH_KIND_SPACE || skip_rem); + + return ch; +} + +static void +_record_num_label(ls_t * self, ls_addr_t val) { - static const unsigned char ops[][2] = { - [LS_OP_LEQ] = "<=", - [LS_OP_GEQ] = ">=", - [LS_OP_NEQ] = "<>", - [LS_OP_LPAREN] = "(", - [LS_OP_RPAREN] = ")", - [LS_OP_MOD] = "%", - [LS_OP_MUL] = "*", - [LS_OP_ADD] = "+", - [LS_OP_SUB] = "-", - [LS_OP_DIV] = "/", - [LS_OP_POW] = "^", - [LS_OP_LT] = "<", - [LS_OP_EQ] = "=", - [LS_OP_GT] = ">", + uint8_t i = (self->_label_cache_i + 1) % LS_LABEL_CACHE_SIZE; + self->_label_cache_i = i; + + self->_label_cache[i] = (ls_label_cache_t) { + .pc = self->_pc, + .num = val, }; +} + +static void +_record_str_label(ls_t * self, char const * word) +{ + // The label might already exist. If it does, we're either + // seeing it again or shadowing it. + + ls_value_t * label = NULL; + for (ls_value_t * i = self->_labels; i; i = i->next) + { + if (!strncmp(word, i->body.label.ident, LS_IDENT_LEN)) + { + label = i; + break; + } + } - for (uint8_t i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) + if (label) { - if (ops[i][1]) { - if (ch[0] == ops[i][0] && ch[1] == ops[i][1]) + if (label->body.label.pc != self->_pc) + ls_throw_err(self, LS_DUPLICATE_DEFINITION); + } + else + { + label = ls_alloc(self); + label->ty = LS_TY_LABEL; + label->next = self->_labels; + strncpy(label->body.label.ident, word, LS_IDENT_LEN); + label->body.label.pc = self->_pc; + self->_labels = label; + } +} + +static ls_token_t +_lex_oper(ls_t * self, ls_uchar ch[2]) +{ + for (uint8_t i = 0; i < sizeof(_ops)/sizeof(_ops[0]); i++) + { + if (_ops[i][1]) + { + if (ch[0] == _ops[i][0] && ch[1] == _ops[i][1]) { self->_pc++; // consume second char return (ls_token_t) i; } - } else { - if (ch[0] == ops[i][0]) - { + } + else + { + if (ch[0] == _ops[i][0]) return (ls_token_t) i; - } } } @@ -265,49 +329,43 @@ _lex_oper(ls_t * self, ls_uchar ch[2]) static ls_token_t _lex_num(ls_t * self, ls_uchar ch[2]) { - uint8_t radix = 10; + int8_t radix = 10; ls_token_t tok = LS_TOK_NUMBER; ls_int_t val = 0; if (ch[0] == '&') { - uint8_t nch = 2; radix = 8; + self->_pc += 2; ch[1] = (ls_uchar) toupper(ch[1]); if (ch[1] == 'H') radix = 16; - else if (ch[1] == 'B') - radix = 2; else if (ch[1] == 'O') radix = 8; + else if (ch[1] == 'B') + radix = 2; else - nch = 1; + self->_pc--; // bare & without a letter - self->_pc = (ls_addr_t)(self->_pc + nch - 1); ch[0] = ls_fetch(self); self->_pc++; } for (;;) { - uint8_t digit = 0; + int8_t digit = 0; ch[0] = (ls_uchar) toupper(ch[0]); if (ch[0] >= '0' && ch[0] <= '9') - digit = (uint8_t)(ch[0] - '0'); + digit = (int8_t)(ch[0] - '0'); else if (ch[0] >= 'A' && ch[0] <= 'F') - digit = (uint8_t)(ch[0] - 'A' + 10); + digit = (int8_t)(ch[0] - 'A' + 10); else if (ch[0] == ':') { if (val > LS_ADDR_MAX || val < 0) ls_throw_err(self, LS_SYNTAX_ERROR); - self->_label_cache_i++; - self->_label_cache_i %= LS_LABEL_CACHE_SIZE; - ls_label_cache_t * lc - = &self->_label_cache[self->_label_cache_i]; - lc->pc = self->_pc; - lc->num = (ls_addr_t) val; + _record_num_label(self, (ls_addr_t) val); tok = LS_TOK_NUM_LABEL; break; } @@ -318,12 +376,11 @@ _lex_num(ls_t * self, ls_uchar ch[2]) } ch[0] = ls_fetch(self); - self->_pc++; if (digit >= radix) ls_throw_err(self, LS_SYNTAX_ERROR); - else if (tok != LS_TOK_NUM_LABEL) - val = val * radix + digit; + val = val * radix + digit; + self->_pc++; } self->_token.number = val; @@ -368,37 +425,7 @@ _lex_word(ls_t * self, ls_uchar ch[2]) return (ls_token_t) kw; } else if (tok == LS_TOK_STR_LABEL) - { - // The label might already exist. If it does, we're either - // seeing it again or shadowing it. - - ls_value_t * label = NULL; - for (ls_value_t * i = self->_labels; i; i = i->next) - { - if (!strncmp(self->_token.word, i->body.label.ident, - LS_IDENT_LEN)) - { - label = i; - break; - } - } - - if (label) - { - if (label->body.label.pc != self->_pc) - ls_throw_err(self, LS_DUPLICATE_DEFINITION); - } - else - { - label = ls_alloc(self); - label->ty = LS_TY_LABEL; - label->next = self->_labels; - strncpy(label->body.label.ident, self->_token.word, - LS_IDENT_LEN); - label->body.label.pc = self->_pc; - self->_labels = label; - } - } + _record_str_label(self, self->_token.word); return tok; } |