aboutsummaryrefslogtreecommitdiff
path: root/lib/ls_types.h
blob: 7b8d218ac334d873f537c22fd6ea190bcbeb538c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
// This software disclaims copyright. Do what you want with it. Be gay, do
// crime. Originally written by Alexis Lockwood in 2021. Ⓐ

// --- LITTLESCRIPT TYPES ------------------------------------------------------
// Values/types needed at runtime are allocated "dynamically". This is done via
// a linked-list pool of fixed size blocks. This header defines all types in
// that pool.

#ifndef LS_TYPES_H
#define LS_TYPES_H

// --- DEPENDENCIES ------------------------------------------------------------

// Supporting modules
// NOTE that ls_kws.h is included at the BOTTOM of this file. This is a slight
// hack to avoid having to define some types in it that gen_kws.py would have
// to know about.

// Standard headers
#include <stdbool.h>
#include <stddef.h>
#include <inttypes.h>

// --- MACROS ------------------------------------------------------------------

#define LS_IDENT_LEN		6
#define LS_IDENT_OR_KW_LEN	9

#define LS_TY_SIMPLE_START	0x00
#define LS_TY_COMPLEX_START	0x40
#define LS_TY_CHILD_START	0x80
#define LS_TY_INTERNAL_START	0xC0
#define LS_TY_GRP_MASK		0xC0

/// Check if a type is a Simple type. This is a type that can be used in the
/// script directly, and does not contain any further pointers to other values.
/// SIMPLE types must not use their PREV and NEXT pointers, so these may be
/// used by containers holding them.
#define LS_TY_IS_SIMPLE(ty) (((ty) & LS_TY_GRP_MASK) == LS_TY_SIMPLE_START)
/// Check if a type is a Complex type. This is a type that be used in the
/// script directly, and contains pointers to other values. COMPLEX types must
/// not use their PREV and NEXT pointers, so these may be used by containers
/// holding them.
#define LS_TY_IS_COMPLEX(ty) (((ty) & LS_TY_GRP_MASK) == LS_TY_COMPLEX_START)
/// Check if a type is a Child type. This is a type that cannot be used in the
/// script directly, but is pointed to by Complex types.
#define LS_TY_IS_CHILD(ty) (((ty) & LS_TY_GRP_MASK) == LS_TY_CHILD_START)
/// Check if a type is an Internal type. These are always an error if they
/// occur as the target of a script variable.
#define LS_TY_IS_INTERNAL(ty) (((ty) & LS_TY_GRP_MASK) == LS_TY_INTERNAL_START)

/// Null placeholder for source addresses
#define LS_ADDR_NULL UINT16_MAX
#define LS_ADDR_MAX (UINT16_MAX - 1)

/// Check if a token represents the end of a statement
#define LS_TOK_EOS(tok) ((tok) == LS_TOK_NEWLINE || (tok) == LS_TOK_SEMICOLON \
		|| ((tok) == LS_TOK_NONE))

/// Check if a token represents an operator (including the invalid operator
/// LS_NO_OP)
#define LS_TOK_OPER(tok) ((tok) <= (ls_token_ty_t) LS_NO_OP)
/// Check if a token represents a keyword
#define LS_TOK_KEYWORD(tok) ((tok) >= (ls_token_ty_t) LS_KW_OFFSET)

/// Define a keyword function. This is used by the code generated by gen_kws.py
#define LS_KW_FUN(n) \
	void ls_kw_fun_ ## n ( \
		struct ls_s * self \
	)


// --- ACCESSORY DEFINITIONS ---------------------------------------------------

// Forward declaration of the main Value type.
struct ls_value_s;

// Forward declaration of ls_t/struct ls_s.
struct ls_s;

/// Basic integer type. This is used for integers in scripts.
typedef int32_t ls_int_t;

/// Unsigned 8-bit char as returned from the fetcher. This is only used when
/// dealing with source at the byte level; once ls_lex parses it, it becomes
/// char.
typedef unsigned char ls_uchar;

/// Addresses into the source. Note that because the source starts at address
/// 0, LS_ADDR_NULL is used as a null value.
typedef uint16_t ls_addr_t;

// --- SIMPLE TYPES ------------------------------------------------------------

/// Integer value
typedef struct {
	ls_int_t value;
} ls_ty_int_t;

// --- COMPLEX TYPES -----------------------------------------------------------

/// String variable. This contains eight bytes at a time in chunks (the head
/// contains fewer bytes, so it can fit more information in the same 8).
typedef struct {
	uint16_t length;
	struct ls_value_s * chunk; ///< Pointer to first chunk
	char value[4];
} ls_ty_str_t;

/// List. The items contained by the list are linked up by their PREV and NEXT
/// pointers. The list itself has available P/N, so lists could nest (though
/// the current version of LS does not allow it).
///
/// To make "random" access to the next or previous item faster, memo and
/// memo_i should store the pointer and index of the most recently accessed
/// item. When accessing an item at random, it can be walked to from this
/// instead of from the beginning.
typedef struct {
	struct ls_value_s * first; ///< Pointer to first chunk
	struct ls_value_s * memo;
	uint16_t memo_i;
	uint16_t length;
} ls_ty_list_t;

// --- CHILD TYPES -------------------------------------------------------------

/// String chunk. Used by ls_ty_str_t. The PREV and NEXT pointers are used to
/// link up the chunks. The first chunk should point back at the string head,
/// and the last chunk should point forward to NULL.
typedef struct {
	char value[8];
} ls_ty_str_chunk_t;

// --- INTERNAL TYPES ----------------------------------------------------------

/// Integer variable. This contains a variable name and a direct integer value.
/// Variables start with this type, and then switch to ls_ty_var_t if assigned
/// a non-integer value.
typedef struct {
	char ident[LS_IDENT_LEN];
	ls_int_t value;
} ls_ty_int_var_t;

/// Generic variable. This points at a subtype. It should never be used for
/// integers (integer code assumes non-TY_INT_VAR variables do not contain
/// ints, as an optimization).
typedef struct {
	char ident[LS_IDENT_LEN];
	struct ls_value_s * value;
} ls_ty_var_t;

/// Label. This points to the PC of the start of the line just after the label.
typedef struct {
	char ident[LS_IDENT_LEN];
	ls_addr_t pc;
} ls_ty_label_t;

/// Function context stackframe. Used in function calls, and one is also used
/// as the global context. NEXT pointer should point into a linked list of
/// variables.
typedef struct {
	/// "Program counter" (byte address into source text) of execution.
	/// when executing a GOSUB, this should be left pointing at the
	/// beginning of that GOSUB statement. The RETURN statement will
	/// examine this for an AS keyword to accept any return value, then
	/// advance it to the next statement.
	///
	/// The global context sets this to LS_ADDR_NULL.
	ls_addr_t pc;

	/// Byte address into the source text of the next element in a DATA
	/// statement to read. Defaults to LS_ADDR_NULL, which means the next
	/// READ should find the nearest DATA statement forward and initialize
	/// it to that.
	ls_addr_t readptr;

	// Keep flags at 8 bits or fewer
	/// Whether currently executing an IF statement
	bool executing_if : 1;
} ls_ty_sctx_call_t;

/// FOR loop stackframe. NEXT pointer should point at the loop iterator
/// variable.
typedef struct {
	/// Terminating value
	ls_int_t term;
	/// Step value
	int16_t step;
	/// Program counter of the statement after FOR
	ls_addr_t for_pc;
} ls_ty_sctx_for_t;

/// WHILE loop stackframe. The NEXT pointer is not used here --- WHILE loops
/// do not create a scope.
typedef struct {
	/// Program counter of the condition in the WHILE statement.
	ls_addr_t while_pc;
} ls_ty_sctx_while_t;

// --- VARIANT/POOL TYPE -------------------------------------------------------

/// All possible types of values. This enum is guaranteed to fit in 8 bits, and
/// has four groups of types that can be identified by the two most significant
/// bits.
typedef enum {
	LS_TY_PRISTINE = LS_TY_SIMPLE_START,
	LS_TY_NOT_ALLOC,
	LS_TY_INT,

	LS_TY_STR = LS_TY_COMPLEX_START,
	LS_TY_LIST,

	LS_TY_STR_CHUNK = LS_TY_CHILD_START,

	LS_TY_INT_VAR = LS_TY_INTERNAL_START,
	LS_TY_VAR,
	LS_TY_LABEL,
	LS_TY_SCTX_CALL,
	LS_TY_SCTX_FOR,
	LS_TY_SCTX_WHILE,
	LS_TY_NULL, // Has no body
} ls_ty_t;

/// Main variant type
typedef struct ls_value_s {
	ls_ty_t ty : 8;
	/// Link pointers. Used as follows:
	///
	/// Simple, Complex types: must not use in the type itself. These types
	///   may be inside containers, which will use them to collect the
	///   values.
	/// Child types: use as needed. Typically used to collect chunks.
	/// Internal types:
	///   - Stack frames: PREV points at the stack frame above this one.
	///     NEXT points at the variable scope.
	///   - Others: as defined in those types' documentation.
	struct ls_value_s * prev, * next;
	union {
		ls_ty_int_t		integer;

		ls_ty_str_t		str;
		ls_ty_list_t		list;

		ls_ty_str_chunk_t	str_chunk;

		ls_ty_int_var_t		int_var;
		ls_ty_var_t		var;
		ls_ty_label_t		label;
		ls_ty_sctx_call_t	sctx_call;
		ls_ty_sctx_for_t	sctx_for;
		ls_ty_sctx_while_t	sctx_while;
	} body;
} ls_value_t;

// --- OTHER TYPES -------------------------------------------------------------
/// Label cache entry. When a numeric label is found, it's added to the label
/// cache, so recently seen labels can be jumped to more directly without
/// needing to scan the code.
typedef struct {
	ls_addr_t pc;
	uint16_t num;
} ls_label_cache_t;

/// Error codes.
typedef enum {
	LS_OK,

	LS_BAD_KEYWORD, // this keyword not accepted here
	LS_DUPLICATE_DEFINITION,
	LS_EXPECTED_IDENT,
	LS_EXPECTED_KEYWORD,
	LS_EXPECTED_NUMBER,
	LS_FOR_NEXT_MISMATCH,
	LS_FOR_STEP_TOO_LARGE,
	LS_INTERNAL_ERROR, // things that should fail an assert
	LS_INVALID_EXPR,
	LS_IO_ERROR,
	LS_LABEL_RANGE,
	LS_MISSING_QUOTE,
	LS_NUMBER_FORMAT,
	LS_OUT_OF_MEMORY,
	LS_STOPPED,
	LS_SYNTAX_ERROR,
	LS_TYPE_MISMATCH,
	LS_UNDEFINED_LABEL,
	LS_UNDEFINED_VARIABLE,
	LS_WHILE_WEND_MISMATCH,
	LS_SCANNING_STMT_IN_IF,

	/// To be returned by a fetcher if there is no more data. This is not
	/// strictly an error condition; the parser should be allowed to read
	/// past the end.
	LS_NO_MORE_PROGRAM,

	LS_ERROR_T_TOP
} ls_error_t;

typedef enum {
	// Double char
	LS_OP_LEQ,
	LS_OP_GEQ,
	LS_OP_NEQ,

	// Single char
	LS_OP_LPAREN,
	LS_OP_RPAREN,
	LS_OP_MOD,
	LS_OP_MUL,
	LS_OP_ADD,
	LS_OP_SUB,
	LS_OP_DIV,
	LS_OP_POW,
	LS_OP_LT,
	LS_OP_EQ,
	LS_OP_GT,

	// Any operators that are detected directly in text by ls_lex() should
	// be located contiguously above this point, so that these enum values
	// may be indices into a non-sparse lookup table.

	// Virtual
	LS_OP_NEG, // LS_OP_SUB becomes LS_OP_NEG in context

	// Keywords. Must be located contiguously starting at LS_OP_FIRST_KW
	LS_OP_FIRST_KW,
	LS_OP_ABS = LS_OP_FIRST_KW,
	LS_OP_AND,
	LS_OP_EQV,
	LS_OP_IMP,
	LS_OP_NOT,
	LS_OP_OR,
	LS_OP_XOR,

	LS_NO_OP
} ls_op_t;

/// The token type enum coexists with ls_op_t and ls_kw_t, all of which have
/// mutually non-overlapping ranges. This simplifies comparisons to check
/// whether a token is a certain operator or keyword - just check the type.
/// See LS_TOK_OPER() and LS_TOK_KEYWORD().
///
/// Note that as returned by ls_lex, the following are NOT considered
/// operators: commas, and keywords that act like operators (abs, not, and...)
typedef enum {
	// --- ls_op_t here ---

	/// Integer and floating point values
	LS_TOK_NUMBER = LS_NO_OP + 1,

	/// Idents
	LS_TOK_WORD,

	/// String labels
	LS_TOK_STR_LABEL,

	/// Numeric labels
	LS_TOK_NUM_LABEL,

	/// String literals
	LS_TOK_STRING,

	/// Comma
	LS_TOK_COMMA,

	/// All other "tokens". Normally only used internally by the lexer -
	/// lexer will throw a syntax error on invalid tokens.
	LS_TOK_INVALID,

	/// Statement separators: newline and ;
	///
	/// In most cases, you don't need to care which is which - just use
	/// LS_TOK_EOS() to check for all end-of-statements including end-of-
	/// file
	LS_TOK_NEWLINE,
	LS_TOK_SEMICOLON,

	/// End of stream
	LS_TOK_NONE,

	// --- ls_kw_t here
} ls_token_ty_t;

/// Decoded token. Values of either ls_token_ty_t, ls_oper_t, or ls_kw_t
/// (which do not overlap)
typedef uint8_t ls_token_t;

// --- PUBLIC CONSTANTS --------------------------------------------------------
// --- PUBLIC VARIABLES --------------------------------------------------------
// --- PUBLIC FUNCTIONS --------------------------------------------------------

// --- UGLY DEPENDENCY HACKS, SEE ABOVE ----------------------------------------
#include "ls_kws.h"

#endif // !defined(LS_TYPES_H)