aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexis Lockwood2021-07-04 16:22:45 -0400
committerAlexis Lockwood2021-07-04 16:23:38 -0400
commitf94009bdb8c04d16d2ad9d0a03ed693e2fc5ee93 (patch)
tree13ba11a2346b09c514154658761b9fda2f68f19f
parent079f5e05706fecaa580dc880fbc1f194d4d4f120 (diff)
Style improvements
-rw-r--r--README.md6
-rw-r--r--lib/ls.c6
-rw-r--r--lib/ls_goto.c15
-rw-r--r--lib/ls_internal.c9
-rw-r--r--lib/ls_internal.h16
-rw-r--r--lib/ls_kw_impl.c1
-rw-r--r--lib/ls_kw_impl_GOSUB_RETURN.c64
-rw-r--r--lib/ls_lex.c353
8 files changed, 262 insertions, 208 deletions
diff --git a/README.md b/README.md
index 5cf54a4..29bbc66 100644
--- a/README.md
+++ b/README.md
@@ -122,6 +122,12 @@ The following things use pool entries:
`size_t`, that wouldn't have any effect on 8-bit anyway, though it'd make
the pool more wasteful on 32 bit.
+## Coding style
+
+I'm trying to follow a fairly consistent coding style now, please see
+[my style guide](https://git.alexisvl.rocks/avlstyle/plain/avlstyle.7.html?h=trunk)
+for that.
+
# THE SCRIPTING LANGUAGE
## Positions
diff --git a/lib/ls.c b/lib/ls.c
index 1559d8a..ab704ce 100644
--- a/lib/ls.c
+++ b/lib/ls.c
@@ -75,8 +75,7 @@ ls_print_value(FILE * stream, ls_value_t * value, ls_value_t * first)
memcpy(buf, value->body.str.value, 4);
buf[4] = 0;
fprintf(stream,
- "[str ] len = %"PRIu16", "
- "chunk = <"POOL_F">, val = %s",
+ "[str ] len = %"PRIu16", chunk = <"POOL_F">, val = %s",
value->body.str.length,
TO_POOL_N(value->body.str.chunk),
buf
@@ -84,8 +83,7 @@ ls_print_value(FILE * stream, ls_value_t * value, ls_value_t * first)
break;
case LS_TY_LIST:
fprintf(stream,
- "[list ] len = %"PRIu16", first = <"POOL_F">, "
- "memo = %"PRIu16" @ <"POOL_F">",
+ "[list ] len = %"PRIu16", first = <"POOL_F">, memo = %"PRIu16" @ <"POOL_F">",
value->body.list.length,
TO_POOL_N(value->body.list.first),
value->body.list.memo_i,
diff --git a/lib/ls_goto.c b/lib/ls_goto.c
index 1259ea5..7a24ae5 100644
--- a/lib/ls_goto.c
+++ b/lib/ls_goto.c
@@ -90,15 +90,14 @@ ls_goto_num(ls_t * self, bool backward, uint16_t num)
}
if (target == LS_ADDR_NULL)
- {
+ goto throw_undef;
+
+ self->_pc = target;
+ return;
+
throw_undef:
- self->_pc = pc_saved;
- ls_throw_err(self, LS_UNDEFINED_LABEL);
- }
- else
- {
- self->_pc = target;
- }
+ self->_pc = pc_saved;
+ ls_throw_err(self, LS_UNDEFINED_LABEL);
}
void
diff --git a/lib/ls_internal.c b/lib/ls_internal.c
index 2e77e2a..9749a11 100644
--- a/lib/ls_internal.c
+++ b/lib/ls_internal.c
@@ -99,13 +99,14 @@ ls_convert_kw(char const * word)
// TODO PROGMEM
if (!strcasecmp(kwdef->name, word))
return (ls_kw_t) n_kw + LS_KW_OFFSET;
- } while (!LS_HASHMAP_ENTRY_IS_SENTINEL(ls_kw_hashmap[i++]));
+ }
+ while (!LS_HASHMAP_ENTRY_IS_SENTINEL(ls_kw_hashmap[i++]));
return LS_NOT_A_KW;
}
void
-ls_throw_err(ls_t * self, ls_error_t e)
+ls_throw_err_f(ls_t * self, ls_error_t e)
{
if (!e)
e = LS_INTERNAL_ERROR;
@@ -132,7 +133,6 @@ ls_fetch_at(ls_t * self, ls_addr_t pc)
ls_throw_err(self, LS_INTERNAL_ERROR);
else
ls_throw_err(self, (ls_error_t) -rc);
- return 0;
}
ls_value_t *
@@ -263,9 +263,6 @@ ls_read_int_var(ls_t * self, ls_value_t * var)
return var->body.int_var.value;
else
ls_throw_err(self, LS_TYPE_MISMATCH);
-
- // Unreachable
- return 0;
}
void
diff --git a/lib/ls_internal.h b/lib/ls_internal.h
index 3630db7..d9c5086 100644
--- a/lib/ls_internal.h
+++ b/lib/ls_internal.h
@@ -60,6 +60,18 @@
#define LS_TRY(e) \
do { ls_error_t __err = (e); if (__err) return __err; } while (0)
+/// Throw an error. In normal builds, this just calls through to ls_throw_err_f.
+/// In debug builds it reports the file and line as well.
+#ifdef NDEBUG
+#define ls_throw_err ls_throw_err_f
+#else
+#define ls_throw_err(self, e) do { \
+ fprintf(stderr, ">throwing %d from %s:%d\n", (e), __FILE__, \
+ __LINE__); \
+ ls_throw_err_f((self), (e)); \
+ } while (0)
+#endif
+
#ifdef __GNUC__
#define ls_noreturn __attribute__((noreturn))
#else
@@ -126,7 +138,9 @@ ls_kw_t ls_convert_kw(char const * word);
/// Throw an error, to be caught by the setjmp in the interpreter. If the error
/// is LS_OK, no throw occurs.
-ls_noreturn void ls_throw_err(ls_t * self, ls_error_t e);
+///
+/// This function is not used directly; it is wrapped by macro ls_throw_err().
+ls_noreturn void ls_throw_err_f(ls_t * self, ls_error_t e);
/// Fetch the current character. Throws if the fetcher returns an error other
/// than LS_NO_PROGRAM. If there is no character, returns nul.
diff --git a/lib/ls_kw_impl.c b/lib/ls_kw_impl.c
index 4c3c343..5e537b1 100644
--- a/lib/ls_kw_impl.c
+++ b/lib/ls_kw_impl.c
@@ -66,7 +66,6 @@ ls_kw_fun_IF(ls_t * self)
if (cond.body.integer.value == 0)
{
ls_consume_to_eol(self);
- return;
}
else
{
diff --git a/lib/ls_kw_impl_GOSUB_RETURN.c b/lib/ls_kw_impl_GOSUB_RETURN.c
index 6a729d8..3ef04b4 100644
--- a/lib/ls_kw_impl_GOSUB_RETURN.c
+++ b/lib/ls_kw_impl_GOSUB_RETURN.c
@@ -20,6 +20,12 @@
// --- PRIVATE DATATYPES -------------------------------------------------------
// --- PRIVATE CONSTANTS -------------------------------------------------------
// --- PRIVATE FUNCTION PROTOTYPES ---------------------------------------------
+
+/// Parse function argumens into the newly created scope.
+/// @pre opening LS_OP_LPAREN has already been lexed
+/// @pre self->_callstack points to the new scope
+static void _parse_args(ls_t * self);
+
// --- PUBLIC VARIABLES --------------------------------------------------------
// --- PRIVATE VARIABLES -------------------------------------------------------
// --- PUBLIC FUNCTIONS --------------------------------------------------------
@@ -52,31 +58,7 @@ ls_kw_fun_GOSUB(ls_t * self)
tok = ls_lex(self);
if (tok == LS_OP_LPAREN)
{
- while (tok != LS_OP_RPAREN)
- {
- tok = ls_lex(self);
-
- if (tok == LS_TOK_WORD)
- {
- ls_value_t * var = ls_new_var(self,
- self->_token.word);
-
- tok = ls_lex(self);
- if (tok != LS_OP_EQ)
- ls_throw_err(self, LS_SYNTAX_ERROR);
-
- // Evaluate the expression in the prev scope
- // We had to create the variable already to
- // avoid losing the buffer content
- self->_callstack = self->_callstack->prev;
- ls_value_t val;
- ls_eval_expr(self, &val, LS_TOK_NONE);
- ls_write_var(self, var, &val);
- self->_callstack = frame;
- }
- else if (tok != LS_OP_RPAREN && tok != LS_TOK_COMMA)
- ls_throw_err(self, LS_SYNTAX_ERROR);
- }
+ _parse_args(self);
tok = ls_lex(self);
}
@@ -134,3 +116,35 @@ ls_kw_fun_RETURN(ls_t * self)
}
// --- PRIVATE FUNCTION DEFINITIONS --------------------------------------------
+
+static void _parse_args(ls_t * self)
+{
+ ls_token_t tok;
+ ls_value_t * frame = self->_callstack;
+
+ while (tok != LS_OP_RPAREN)
+ {
+ tok = ls_lex(self);
+
+ if (tok == LS_TOK_WORD)
+ {
+ ls_value_t * var = ls_new_var(self,
+ self->_token.word);
+
+ tok = ls_lex(self);
+ if (tok != LS_OP_EQ)
+ ls_throw_err(self, LS_SYNTAX_ERROR);
+
+ // Evaluate the expression in the prev scope
+ // We had to create the variable already to
+ // avoid losing the buffer content
+ self->_callstack = self->_callstack->prev;
+ ls_value_t val;
+ ls_eval_expr(self, &val, LS_TOK_NONE);
+ ls_write_var(self, var, &val);
+ self->_callstack = frame;
+ }
+ else if (tok != LS_OP_RPAREN && tok != LS_TOK_COMMA)
+ ls_throw_err(self, LS_SYNTAX_ERROR);
+ }
+}
diff --git a/lib/ls_lex.c b/lib/ls_lex.c
index 89571b3..3e866ec 100644
--- a/lib/ls_lex.c
+++ b/lib/ls_lex.c
@@ -38,9 +38,106 @@ typedef enum {
} ch_kind_t;
// --- PRIVATE CONSTANTS -------------------------------------------------------
+
+// The LUT encodes ASCII values 0x20 to 0x7E. Anything below 0x20 is treated as
+// whitespace (with \n specifically as a linesep), anything above 0x7F is an
+// abbreviated keyword, and 0x7F (delete char) is also treated as whitespace.
+//
+// The LUT encoding is rather packed but the code to unpack it is quite a bit
+// smaller than the actual excess LUT space would be without packing, on small
+// architectures.
+//
+// First packing:
+// Values 0x20 to 0x5F are found at position (value - 0x20)/2, with the
+// even-numbered entry in the low nibble and the odd-numbered in the high
+// nibble.
+//
+// Second packing:
+// Because 0x40...0x5F and 0x60...0x7F all have the same character kind except
+// 0x7F, the second range is folded onto the first. Values 0x60 to 0x7F are
+// found at position (value - 0x40)/2, overlapping value-0x20.
+//
+// Yeah I kinda did that for fun. Sorry it's dumb
+
+#define K(a, b) ((uint8_t)(a) | ((uint8_t)(b) << 4))
+static const uint8_t _ch_kind_lut[32] = {
+ K(CH_KIND_SPACE, CH_KIND_OPER), // space !
+ K(CH_KIND_STR, CH_KIND_SIGIL), // " #
+ K(CH_KIND_SIGIL, CH_KIND_SIGIL), // $ %
+ K(CH_KIND_DIGIT, CH_KIND_REM), // & '
+ K(CH_KIND_OPER, CH_KIND_OPER), // ( )
+ K(CH_KIND_OPER, CH_KIND_OPER), // * +
+ K(CH_KIND_COMMA, CH_KIND_OPER), // , -
+ K(CH_KIND_DIGIT, CH_KIND_OPER), // . /
+ K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 0 1
+ K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 2 3
+ K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 4 5
+ K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 6 7
+ K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 8 9
+ K(CH_KIND_LABEL, CH_KIND_SEP), // : ;
+ K(CH_KIND_OPER, CH_KIND_OPER), // < =
+ K(CH_KIND_OPER, CH_KIND_OPER), // > ?
+
+ K(CH_KIND_OPER, CH_KIND_LETTER), // @` Aa
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Bb Cc
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Dd Ee
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Ff Gg
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Hh Ii
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Jj Kk
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Ll Mm
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Nn Oo
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Pp Qq
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Rr Ss
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Tt Uu
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Vv Ww
+ K(CH_KIND_LETTER, CH_KIND_LETTER), // Xx Yy
+ K(CH_KIND_LETTER, CH_KIND_OPER), // Zz [{
+ K(CH_KIND_OPER, CH_KIND_OPER), // \| ]}
+ K(CH_KIND_OPER , CH_KIND_LETTER), // ^~ _del
+};
+#undef K
+
+static const ls_uchar _ops[][2] = {
+ [LS_OP_LEQ] = "<=",
+ [LS_OP_GEQ] = ">=",
+ [LS_OP_NEQ] = "<>",
+ [LS_OP_LPAREN] = "(",
+ [LS_OP_RPAREN] = ")",
+ [LS_OP_MOD] = "%",
+ [LS_OP_MUL] = "*",
+ [LS_OP_ADD] = "+",
+ [LS_OP_SUB] = "-",
+ [LS_OP_DIV] = "/",
+ [LS_OP_POW] = "^",
+ [LS_OP_LT] = "<",
+ [LS_OP_EQ] = "=",
+ [LS_OP_GT] = ">",
+};
+
+
// --- PRIVATE FUNCTION PROTOTYPES ---------------------------------------------
-static ch_kind_t _ident_ch_kind(unsigned char ch);
+/// Identify what kind of character ch is. Handles all values of ls_uchar.
+static ch_kind_t _ident_ch_kind(ls_uchar ch);
+
+/// Advance through whitespace and comments to the beginning of the next
+/// interesting token. Returns the character it stopped on.
+static ls_uchar _advance_to_token(ls_t * self);
+
+/// Record a numeric label into the label cache. The label is set to point at
+/// the current pc.
+///
+/// @param val - numeric label value
+/// TODO this should go in a new source file
+static void _record_num_label(ls_t * self, ls_addr_t val);
+
+/// Record a string label into the label cache. The label is set to point at
+/// the current pc.
+///
+/// @param word - string value
+/// TODO this should go in a new source file
+static void _record_str_label(ls_t * self, char const * word);
+
static ls_token_t _lex_oper(ls_t * self, ls_uchar ch[2]);
static ls_token_t _lex_num(ls_t * self, ls_uchar ch[2]);
static ls_token_t _lex_word(ls_t * self, ls_uchar ch[2]);
@@ -54,70 +151,47 @@ static ls_token_t _lex_kw(ls_t * self, ls_uchar ch[2]);
ls_token_t
ls_lex(ls_t * self)
{
- unsigned char ch[2];
- ch_kind_t ch_kind;
- bool skip_rem = false;
-
- do {
- ch[0] = ls_fetch(self);
- ch_kind = _ident_ch_kind(ch[0]);
- self->_pc++;
-
- if (ch_kind == CH_KIND_REM)
- skip_rem = true;
- else if (ch_kind == CH_KIND_LINESEP || ch_kind == CH_KIND_END)
- skip_rem = false;
- } while (ch_kind == CH_KIND_SPACE || skip_rem);
-
+ ls_uchar ch[2];
+ ch[0] = _advance_to_token(self);
ch[1] = ls_fetch(self);
- switch (ch_kind)
+ switch (_ident_ch_kind(ch[0]))
{
case CH_KIND_OPER:
return _lex_oper(self, ch);
- break;
case CH_KIND_DIGIT:
return _lex_num(self, ch);
- break;
case CH_KIND_LETTER:
return _lex_word(self, ch);
- break;
case CH_KIND_STR:
return _lex_str(self, ch);
- break;
case CH_KIND_SIGIL:
ls_throw_err(self, LS_SYNTAX_ERROR);
- break;
case CH_KIND_COMMA:
return LS_TOK_COMMA;
- break;
case CH_KIND_LINESEP:
case CH_KIND_SEP:
return LS_TOK_STATEMENT_SEP;
- break;
case CH_KIND_LABEL:
// These are actually parsed inside digit/word parsers
ls_throw_err(self, LS_SYNTAX_ERROR);
- break;
case CH_KIND_KW:
return _lex_kw(self, ch);
- break;
case CH_KIND_END:
return LS_TOK_NONE;
- break;
+
case CH_KIND_INVALID:
default:
ls_throw_err(self, LS_SYNTAX_ERROR);
- break;
}
}
@@ -137,68 +211,8 @@ ls_consume_to_eol(ls_t * self)
// --- PRIVATE FUNCTION DEFINITIONS --------------------------------------------
static ch_kind_t
-_ident_ch_kind(unsigned char ch)
+_ident_ch_kind(ls_uchar ch)
{
- // The LUT encodes ASCII values 0x20 to 0x7E. Anything below
- // 0x20 is treated as whitespace (with \n specifically as a linesep),
- // anything above 0x7F is an abbreviated keyword, and 0x7F (delete
- // char) is also treated as whitespace.
- //
- // The LUT encoding is rather packed but the code to unpack it is
- // quite a bit smaller than the actual excess LUT space would be
- // without packing, on small architectures.
- //
- // First packing:
- // Values 0x20 to 0x5F are found at position (value - 0x20)/2, with
- // the even-numbered entry in the low nibble and the odd-numbered in
- // the high nibble.
- //
- // Second packing:
- // Because 0x40...0x5F and 0x60...0x7F all have the same character kind
- // except 0x7F, the second range is folded onto the first. Values
- // 0x60 to 0x7F are found at position (value - 0x40)/2, overlapping
- // value-0x20.
- //
- // Yeah I kinda did that for fun. Sorry it's dumb
-
-#define K(a, b) ((uint8_t)(a) | ((uint8_t)(b) << 4))
- static const uint8_t ch_kind_lut[32] = {
- K(CH_KIND_SPACE, CH_KIND_OPER), // space !
- K(CH_KIND_STR, CH_KIND_SIGIL), // " #
- K(CH_KIND_SIGIL, CH_KIND_SIGIL), // $ %
- K(CH_KIND_DIGIT, CH_KIND_REM), // & '
- K(CH_KIND_OPER, CH_KIND_OPER), // ( )
- K(CH_KIND_OPER, CH_KIND_OPER), // * +
- K(CH_KIND_COMMA, CH_KIND_OPER), // , -
- K(CH_KIND_DIGIT, CH_KIND_OPER), // . /
- K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 0 1
- K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 2 3
- K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 4 5
- K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 6 7
- K(CH_KIND_DIGIT, CH_KIND_DIGIT), // 8 9
- K(CH_KIND_LABEL, CH_KIND_SEP), // : ;
- K(CH_KIND_OPER, CH_KIND_OPER), // < =
- K(CH_KIND_OPER, CH_KIND_OPER), // > ?
-
- K(CH_KIND_OPER, CH_KIND_LETTER), // @` Aa
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Bb Cc
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Dd Ee
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Ff Gg
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Hh Ii
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Jj Kk
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Ll Mm
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Nn Oo
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Pp Qq
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Rr Ss
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Tt Uu
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Vv Ww
- K(CH_KIND_LETTER, CH_KIND_LETTER), // Xx Yy
- K(CH_KIND_LETTER, CH_KIND_OPER), // Zz [{
- K(CH_KIND_OPER, CH_KIND_OPER), // \| ]}
- K(CH_KIND_OPER , CH_KIND_LETTER), // ^~ _del
- };
-#undef K
-
if (ch >= 0x80)
return CH_KIND_KW;
else if (ch == 0x7F)
@@ -209,7 +223,7 @@ _ident_ch_kind(unsigned char ch)
ch -= 0x20;
ch -= 0x20;
- uint8_t k = ch_kind_lut[ch / 2];
+ uint8_t k = _ch_kind_lut[ch / 2];
if (ch & 1)
// this is one instruction on avr
k = k << 4 | k >> 4;
@@ -223,39 +237,89 @@ _ident_ch_kind(unsigned char ch)
return CH_KIND_SPACE;
}
-static ls_token_t
-_lex_oper(ls_t * self, ls_uchar ch[2])
+static ls_uchar
+_advance_to_token(ls_t * self)
+{
+ ls_uchar ch;
+ ch_kind_t ch_kind;
+ bool skip_rem = false;
+
+ do {
+ ch = ls_fetch(self);
+ ch_kind = _ident_ch_kind(ch);
+ self->_pc++;
+
+ if (ch_kind == CH_KIND_REM)
+ skip_rem = true;
+ else if (ch_kind == CH_KIND_LINESEP || ch_kind == CH_KIND_END)
+ skip_rem = false;
+ }
+ while (ch_kind == CH_KIND_SPACE || skip_rem);
+
+ return ch;
+}
+
+static void
+_record_num_label(ls_t * self, ls_addr_t val)
{
- static const unsigned char ops[][2] = {
- [LS_OP_LEQ] = "<=",
- [LS_OP_GEQ] = ">=",
- [LS_OP_NEQ] = "<>",
- [LS_OP_LPAREN] = "(",
- [LS_OP_RPAREN] = ")",
- [LS_OP_MOD] = "%",
- [LS_OP_MUL] = "*",
- [LS_OP_ADD] = "+",
- [LS_OP_SUB] = "-",
- [LS_OP_DIV] = "/",
- [LS_OP_POW] = "^",
- [LS_OP_LT] = "<",
- [LS_OP_EQ] = "=",
- [LS_OP_GT] = ">",
+ uint8_t i = (self->_label_cache_i + 1) % LS_LABEL_CACHE_SIZE;
+ self->_label_cache_i = i;
+
+ self->_label_cache[i] = (ls_label_cache_t) {
+ .pc = self->_pc,
+ .num = val,
};
+}
+
+static void
+_record_str_label(ls_t * self, char const * word)
+{
+ // The label might already exist. If it does, we're either
+ // seeing it again or shadowing it.
+
+ ls_value_t * label = NULL;
+ for (ls_value_t * i = self->_labels; i; i = i->next)
+ {
+ if (!strncmp(word, i->body.label.ident, LS_IDENT_LEN))
+ {
+ label = i;
+ break;
+ }
+ }
- for (uint8_t i = 0; i < sizeof(ops)/sizeof(ops[0]); i++)
+ if (label)
{
- if (ops[i][1]) {
- if (ch[0] == ops[i][0] && ch[1] == ops[i][1])
+ if (label->body.label.pc != self->_pc)
+ ls_throw_err(self, LS_DUPLICATE_DEFINITION);
+ }
+ else
+ {
+ label = ls_alloc(self);
+ label->ty = LS_TY_LABEL;
+ label->next = self->_labels;
+ strncpy(label->body.label.ident, word, LS_IDENT_LEN);
+ label->body.label.pc = self->_pc;
+ self->_labels = label;
+ }
+}
+
+static ls_token_t
+_lex_oper(ls_t * self, ls_uchar ch[2])
+{
+ for (uint8_t i = 0; i < sizeof(_ops)/sizeof(_ops[0]); i++)
+ {
+ if (_ops[i][1])
+ {
+ if (ch[0] == _ops[i][0] && ch[1] == _ops[i][1])
{
self->_pc++; // consume second char
return (ls_token_t) i;
}
- } else {
- if (ch[0] == ops[i][0])
- {
+ }
+ else
+ {
+ if (ch[0] == _ops[i][0])
return (ls_token_t) i;
- }
}
}
@@ -265,49 +329,43 @@ _lex_oper(ls_t * self, ls_uchar ch[2])
static ls_token_t
_lex_num(ls_t * self, ls_uchar ch[2])
{
- uint8_t radix = 10;
+ int8_t radix = 10;
ls_token_t tok = LS_TOK_NUMBER;
ls_int_t val = 0;
if (ch[0] == '&')
{
- uint8_t nch = 2;
radix = 8;
+ self->_pc += 2;
ch[1] = (ls_uchar) toupper(ch[1]);
if (ch[1] == 'H')
radix = 16;
- else if (ch[1] == 'B')
- radix = 2;
else if (ch[1] == 'O')
radix = 8;
+ else if (ch[1] == 'B')
+ radix = 2;
else
- nch = 1;
+ self->_pc--; // bare & without a letter
- self->_pc = (ls_addr_t)(self->_pc + nch - 1);
ch[0] = ls_fetch(self);
self->_pc++;
}
for (;;)
{
- uint8_t digit = 0;
+ int8_t digit = 0;
ch[0] = (ls_uchar) toupper(ch[0]);
if (ch[0] >= '0' && ch[0] <= '9')
- digit = (uint8_t)(ch[0] - '0');
+ digit = (int8_t)(ch[0] - '0');
else if (ch[0] >= 'A' && ch[0] <= 'F')
- digit = (uint8_t)(ch[0] - 'A' + 10);
+ digit = (int8_t)(ch[0] - 'A' + 10);
else if (ch[0] == ':')
{
if (val > LS_ADDR_MAX || val < 0)
ls_throw_err(self, LS_SYNTAX_ERROR);
- self->_label_cache_i++;
- self->_label_cache_i %= LS_LABEL_CACHE_SIZE;
- ls_label_cache_t * lc
- = &self->_label_cache[self->_label_cache_i];
- lc->pc = self->_pc;
- lc->num = (ls_addr_t) val;
+ _record_num_label(self, (ls_addr_t) val);
tok = LS_TOK_NUM_LABEL;
break;
}
@@ -318,12 +376,11 @@ _lex_num(ls_t * self, ls_uchar ch[2])
}
ch[0] = ls_fetch(self);
- self->_pc++;
if (digit >= radix)
ls_throw_err(self, LS_SYNTAX_ERROR);
- else if (tok != LS_TOK_NUM_LABEL)
- val = val * radix + digit;
+ val = val * radix + digit;
+ self->_pc++;
}
self->_token.number = val;
@@ -368,37 +425,7 @@ _lex_word(ls_t * self, ls_uchar ch[2])
return (ls_token_t) kw;
}
else if (tok == LS_TOK_STR_LABEL)
- {
- // The label might already exist. If it does, we're either
- // seeing it again or shadowing it.
-
- ls_value_t * label = NULL;
- for (ls_value_t * i = self->_labels; i; i = i->next)
- {
- if (!strncmp(self->_token.word, i->body.label.ident,
- LS_IDENT_LEN))
- {
- label = i;
- break;
- }
- }
-
- if (label)
- {
- if (label->body.label.pc != self->_pc)
- ls_throw_err(self, LS_DUPLICATE_DEFINITION);
- }
- else
- {
- label = ls_alloc(self);
- label->ty = LS_TY_LABEL;
- label->next = self->_labels;
- strncpy(label->body.label.ident, self->_token.word,
- LS_IDENT_LEN);
- label->body.label.pc = self->_pc;
- self->_labels = label;
- }
- }
+ _record_str_label(self, self->_token.word);
return tok;
}