diff options
author | Alexis Lockwood | 2021-06-27 18:20:43 -0400 |
---|---|---|
committer | Alexis Lockwood | 2021-06-27 18:20:43 -0400 |
commit | 3e8ec839dd2d0270b7ddac9feaea15c3f7883728 (patch) | |
tree | 1d1c6182730428db97c1baa7511268ca7b72de66 | |
parent | e560c7b9fd333be5855d7da29d492bc9176b7b84 (diff) |
Tidy minifier
-rw-r--r-- | ls_minify.c | 353 |
1 files changed, 223 insertions, 130 deletions
diff --git a/ls_minify.c b/ls_minify.c index 363360f..bc0cca7 100644 --- a/ls_minify.c +++ b/ls_minify.c @@ -28,6 +28,15 @@ typedef struct { size_t len; } file_fetcher_ctx_t; +typedef struct { + ls_context_t ls; + ls_token_t tok; + ls_token_ty_t last_type; + file_fetcher_ctx_t * fctx; + bool add_space; + FILE * f_out; +} minifier_t; + // --- PRIVATE CONSTANTS ------------------------------------------------------- // --- PRIVATE FUNCTION PROTOTYPES --------------------------------------------- @@ -35,6 +44,16 @@ static int _fetcher(void * ctx, uint16_t loc); static void _usage(char const * argv0, bool short_text); static void _minify(file_fetcher_ctx_t * fctx, FILE * f_out); +static void _min_number(minifier_t * min, ls_token_t tok); +static void _min_word(minifier_t * min, ls_token_t tok); +static void _min_str_label(minifier_t * min, ls_token_t tok); +static void _min_num_label(minifier_t * min, ls_token_t tok); +static void _min_keyword(minifier_t * min, ls_token_t tok); +static void _min_string(minifier_t * min, ls_token_t tok); +static void _min_operator(minifier_t * min, ls_token_t tok); +static void _min_comma(minifier_t * min, ls_token_t tok); +static void _min_sep(minifier_t * min, ls_token_t tok); + // --- PUBLIC VARIABLES -------------------------------------------------------- // --- PRIVATE VARIABLES ------------------------------------------------------- @@ -161,178 +180,252 @@ static void _usage(char const * argv0, bool short_text) static void _minify(file_fetcher_ctx_t * fctx, FILE * f_out) { - ls_context_t ctx = {0}; + minifier_t min = {.fctx = fctx}; ls_value_t pool[100]; // for labels - ls_init_ctx(&ctx, pool, sizeof pool / sizeof pool[0]); - ctx.fetcher = _fetcher; - ctx.fetcher_ctx = (void *) fctx; - if (setjmp(ctx.error_jmp_buf)) + ls_init_ctx(&min.ls, pool, sizeof pool / sizeof pool[0]); + min.ls.fetcher = _fetcher; + min.ls.fetcher_ctx = (void *) fctx; + + if (setjmp(min.ls.error_jmp_buf)) { uint16_t line = 0, col = 0; - ls_translate_pc(&ctx, ctx.pc, &line, &col); + ls_translate_pc(&min.ls, min.ls.pc, &line, &col); fprintf(stderr, "error %d at %u:%u", - (int) ctx.error, line, col); + (int) min.ls.error, line, col); exit(EXIT_FAILURE); } - ls_token_t tok = {.ty = LS_TOK_NONE}; - ls_token_ty_t last_type = LS_TOK_NONE; + min.f_out = f_out; + min.tok = (ls_token_t) {.ty = LS_TOK_NONE}; + min.last_type = LS_TOK_NONE; // TODO: actually collect idents and label numbers, and minify them // too bool done = false; while (!done) { - ls_lex(&ctx, &tok); + ls_lex(&min.ls, &min.tok); - bool add_space = - last_type == LS_TOK_NUMBER || - last_type == LS_TOK_WORD || - (last_type == LS_TOK_KEYWORD && un_minify) || - (last_type == LS_TOK_OPERATOR && un_minify) || - (last_type == LS_TOK_COMMA && un_minify); + min.add_space = + min.last_type == LS_TOK_NUMBER || + min.last_type == LS_TOK_WORD || + (min.last_type == LS_TOK_KEYWORD && un_minify) || + (min.last_type == LS_TOK_OPERATOR && un_minify) || + (min.last_type == LS_TOK_COMMA && un_minify); - switch (tok.ty) + switch (min.tok.ty) { case LS_TOK_NUMBER: - if (add_space) - fprintf(f_out, " "); - fprintf(f_out, "%"PRId32, tok.body.number_val); + _min_number(&min, min.tok); break; + case LS_TOK_WORD: - if (add_space) - fprintf(f_out, " "); - assert(tok.body.word_val[LS_IDENT_OR_KW_LEN] == 0); - tok.body.word_val[LS_IDENT_OR_KW_LEN] = 0; - fprintf(f_out, "%s", tok.body.word_val); + _min_word(&min, min.tok); break; + case LS_TOK_STR_LABEL: - if (add_space) - fprintf(f_out, " "); - assert(tok.body.word_val[LS_IDENT_OR_KW_LEN] == 0); - tok.body.word_val[LS_IDENT_OR_KW_LEN] = 0; - fprintf(f_out, "%s:", tok.body.word_val); + _min_str_label(&min, min.tok); break; + case LS_TOK_NUM_LABEL: - if (add_space) - fprintf(f_out, " "); - assert(tok.body.word_val[LS_IDENT_OR_KW_LEN] == 0); - tok.body.word_val[LS_IDENT_OR_KW_LEN] = 0; - fprintf(f_out, "%"PRId32":", tok.body.number_val); + _min_num_label(&min, min.tok); break; + case LS_TOK_KEYWORD: - if (tok.body.keyword_val == LS_KW_REM) - { - if (un_minify) - { - if (add_space) - fprintf(f_out, " "); - fprintf(f_out, "REM "); - } - else if (keep_rems) - { - uint8_t c = LS_KW_REM; - fwrite(&c, 1, 1, f_out); - } - - ls_addr_t pc_start; - ls_addr_t pc_end; - for (pc_start = ctx.pc; - (fctx->s[pc_start] == ' ' || fctx->s[pc_start] == '\t') - && pc_start < fctx->len; - pc_start++); - for (pc_end = pc_start; - fctx->s[pc_end] != '\n' && pc_end < fctx->len; - pc_end++); - if (un_minify || keep_rems) - { - fwrite(&fctx->s[pc_start], - (size_t)(pc_end - pc_start), - 1, f_out); - fprintf(f_out, "\n"); - // We just emitted a \n, make sure the - // next statement reflects that - tok.ty = LS_TOK_STATEMENT_SEP; - } - else - { - // Skipped entirely... - tok.ty = last_type; - } - ctx.pc = (ls_addr_t)(pc_end + 1); - } - else if (!un_minify) - { - uint8_t c = tok.body.keyword_val; - fwrite(&c, 1, 1, f_out); - } - else - { - if (add_space) - fprintf(f_out, " "); - fprintf(f_out, "%s", - ls_kwmap[tok.body.keyword_val - - LS_KW_OFFSET].name - ); - } + _min_keyword(&min, min.tok); break; + case LS_TOK_STRING: - if (un_minify) - fprintf(f_out, " "); - fprintf(f_out, "\""); - for (ls_addr_t i = tok.body.string_val.pc_start; - i <= tok.body.string_val.pc_end; - i++) - { - fprintf(f_out, "%c", fctx->s[i]); - } - fprintf(f_out, "\""); + _min_string(&min, min.tok); break; + case LS_TOK_OPERATOR: - { - // TODO: this could be centralized - static const unsigned char ops[][3] = { - [OPER_LEQ] = "<=", - [OPER_GEQ] = ">=", - [OPER_NEQ] = "<>", - [OPER_LPAREN] = "(", - [OPER_RPAREN] = ")", - [OPER_MOD] = "%", - [OPER_MUL] = "*", - [OPER_ADD] = "+", - [OPER_SUB] = "-", - [OPER_DIV] = "/", - [OPER_POW] = "^", - [OPER_LT] = "<", - [OPER_EQ] = "=", - [OPER_GT] = ">", - }; - assert(tok.body.oper_val <= OPER_GT); - if (un_minify) - fprintf(f_out, " "); - fprintf(f_out, "%s", ops[tok.body.oper_val]); + _min_operator(&min, min.tok); break; - } + case LS_TOK_COMMA: - fprintf(f_out, ","); + _min_comma(&min, min.tok); break; + case LS_TOK_STATEMENT_SEP: - if (last_type != LS_TOK_STATEMENT_SEP && - last_type != LS_TOK_STR_LABEL && - last_type != LS_TOK_NUM_LABEL - ) - fprintf(f_out, "\n"); + _min_sep(&min, min.tok); break; + case LS_TOK_INVALID: default: - ls_throw_err(&ctx, LS_SYNTAX_ERROR); + ls_throw_err(&min.ls, LS_SYNTAX_ERROR); break; + case LS_TOK_NONE: done = true; break; } - last_type = tok.ty; + min.last_type = min.tok.ty; } } + +static void _min_number(minifier_t * min, ls_token_t tok) +{ + if (min->add_space) + fprintf(min->f_out, " "); + fprintf(min->f_out, "%"PRId32, tok.body.number_val); +} + +static void _min_word(minifier_t * min, ls_token_t tok) +{ + if (min->add_space) + fprintf(min->f_out, " "); + assert(tok.body.word_val[LS_IDENT_OR_KW_LEN] == 0); + tok.body.word_val[LS_IDENT_OR_KW_LEN] = 0; + fprintf(min->f_out, "%s", tok.body.word_val); +} + +static void _min_str_label(minifier_t * min, ls_token_t tok) +{ + if (min->add_space) + fprintf(min->f_out, " "); + assert(tok.body.word_val[LS_IDENT_OR_KW_LEN] == 0); + tok.body.word_val[LS_IDENT_OR_KW_LEN] = 0; + fprintf(min->f_out, "%s:%s", tok.body.word_val, + un_minify ? " " : ""); +} + +static void _min_num_label(minifier_t * min, ls_token_t tok) +{ + if (min->add_space) + fprintf(min->f_out, " "); + assert(tok.body.word_val[LS_IDENT_OR_KW_LEN] == 0); + tok.body.word_val[LS_IDENT_OR_KW_LEN] = 0; + fprintf(min->f_out, "%"PRId32":%s", tok.body.number_val, + un_minify ? " " : ""); +} + +static void _min_keyword(minifier_t * min, ls_token_t tok) +{ + if (tok.body.keyword_val == LS_KW_REM) + { + if (un_minify) + { + if (min->add_space) + fprintf(min->f_out, " "); + fprintf(min->f_out, "REM "); + } + else if (keep_rems) + { + uint8_t c = LS_KW_REM; + fwrite(&c, 1, 1, min->f_out); + } + + ls_addr_t pc_start; + ls_addr_t pc_end; + for (pc_start = min->ls.pc;; pc_start++) + { + if (pc_start >= min->fctx->len) + break; + if (!(min->fctx->s[pc_start] == ' ' + || min->fctx->s[pc_start] == '\t')) + break; + } + for (pc_end = pc_start;; pc_end++) + { + if (pc_end >= min->fctx->len) + break; + if (min->fctx->s[pc_end] == '\n') + break; + } + + if (un_minify || keep_rems) + { + fwrite(&min->fctx->s[pc_start], + (size_t)(pc_end - pc_start), 1, min->f_out); + + fprintf(min->f_out, "\n"); + + // We just emitted a \n, make sure the + // next statement reflects that + min->tok.ty = LS_TOK_STATEMENT_SEP; + } + else + { + // Skipped entirely... + min->tok.ty = min->last_type; + } + min->ls.pc = (ls_addr_t)(pc_end + 1); + } + else if (!un_minify) + { + uint8_t c = tok.body.keyword_val; + fwrite(&c, 1, 1, min->f_out); + } + else + { + if (min->add_space) + fprintf(min->f_out, " "); + fprintf(min->f_out, "%s", + ls_kwmap[tok.body.keyword_val - LS_KW_OFFSET].name); + } +} + +static void _min_string(minifier_t * min, ls_token_t tok) +{ + if (un_minify) + fprintf(min->f_out, " "); + + fprintf(min->f_out, "\""); + + for (ls_addr_t i = tok.body.string_val.pc_start; + i <= tok.body.string_val.pc_end; + i++) + { + fprintf(min->f_out, "%c", min->fctx->s[i]); + } + + fprintf(min->f_out, "\""); +} + +static void _min_operator(minifier_t * min, ls_token_t tok) +{ + // TODO: this could be centralized + static const unsigned char ops[][3] = { + [OPER_LEQ] = "<=", + [OPER_GEQ] = ">=", + [OPER_NEQ] = "<>", + [OPER_LPAREN] = "(", + [OPER_RPAREN] = ")", + [OPER_MOD] = "%", + [OPER_MUL] = "*", + [OPER_ADD] = "+", + [OPER_SUB] = "-", + [OPER_DIV] = "/", + [OPER_POW] = "^", + [OPER_LT] = "<", + [OPER_EQ] = "=", + [OPER_GT] = ">", + }; + + assert(tok.body.oper_val <= OPER_GT); + + if (un_minify) + fprintf(min->f_out, " "); + + fprintf(min->f_out, "%s", ops[tok.body.oper_val]); +} + +static void _min_comma(minifier_t * min, ls_token_t tok) +{ + (void) tok; + fprintf(min->f_out, ","); +} + +static void _min_sep(minifier_t * min, ls_token_t tok) +{ + (void) tok; + + if (min->last_type != LS_TOK_STATEMENT_SEP && + min->last_type != LS_TOK_STR_LABEL && + min->last_type != LS_TOK_NUM_LABEL + ) + fprintf(min->f_out, "\n"); +} |