diff --git a/net.ser b/net.ser new file mode 100644 index 0000000..4ebcf0f --- /dev/null +++ b/net.ser @@ -0,0 +1,119 @@ +// Abs -> { whatever: u16, field1: { a: (invalid)[^1] }, field2: A[1] } + +// AbsArr -> { whatever: u16, field1: { a: (invalid)[^1] }, field2: A[1] }&[] +// AbsArrArr -> { whatever: u16, field1: { a: (invalid)[^1] }, field2: A[1] }&[]&[][4] + +// A -> { a: (invalid)[^1] } + +// AA -> { a: (invalid)[^1] }[1] + +// RBASIC -> (invalid) +// RM1 -> (invalid) +// RM2 -> (invalid) +// RM3 -> (invalid) + +// R1 -> (invalid)&[4]&[] +// R2 -> (invalid)&[4] +// R3 -> (invalid)&[4] +// R4 -> (invalid) +// R5 -> (invalid) + +// SA -> { field: { field: SA } } +// SC -> { field: SC&[] } +// Node -> { data: u8, children: Node&[] } +// Recur -> { field: Recur } +// SB -> { field: { field: SB } } + +// bool -> bool +// char -> char +// i16 -> i16 +// i32 -> i32 +// i64 -> i64 +// i8 -> i8 +// u16 -> u16 +// u32 -> u32 +// u64 -> u64 +// u8 -> u8 + +// Struct Layout: S2 +// field[0].field.len align(2) size(2) +// field[1].field.len align(2) size(2) +// field[0].field.data align(0) size(0) +// field[1].field.data align(0) size(0) +// Struct Layout: SC +// field.len align(2) size(2) +// field.data align(0) size(0) +// Struct Layout: Node +// children.len align(2) size(2) +// data align(1) size(1) +// children.data align(0) size(0) +// Struct Layout: S1 +// field.len align(1) size(1) +// field.data align(0) size(0) +// Struct Layout: Abs +// whatever align(2) size(2) +// Struct Layout: S3 +// field.len align(2) size(2) +// field.data align(0) size(0) +// Struct Layout: Rel +// a align(2) size(2) + +struct Abs { + id: u16, + min: u32, + max: u32, + fuzz: u32, + flat: u32, + res: u32, +} + +struct Rel { + id: u16, +} + +struct Key { + id: u16, +} + +const ABS_CNT = 64; +const REL_CNT = 16; +const KEY_CNT = 768; + +struct Tag { + name: char[], +} + +messages Device { + Info { + slot: u8, + index: u8, + + abs: Abs[^ABS_CNT], + rel: Rel[^REL_CNT], + key: Key[^KEY_CNT], + } + Report { + slot: u8, + index: u8, + + abs: u32[^ABS_CNT], + rel: u32[^REL_CNT], + key: u8[^KEY_CNT], + } + ControllerState { + index: u16, + led: u8[3], + small_rumble: u8, + big_rumble: u8, + flash_on: u8, + flash_off: u8, + } + #[versioned] + Request { + requests: Tag[][], + request_count: u16, + } + Destroy { + index: u16, + } +} diff --git a/ser/.ccls b/ser/.ccls new file mode 100644 index 0000000..e671fa2 --- /dev/null +++ b/ser/.ccls @@ -0,0 +1 @@ +clang diff --git a/ser/.clang-format b/ser/.clang-format new file mode 100644 index 0000000..6355949 --- /dev/null +++ b/ser/.clang-format @@ -0,0 +1,12 @@ +# vi:ft=yaml +BasedOnStyle: LLVM +IndentWidth: 4 +AlignArrayOfStructures: Left +PointerAlignment: Right +ColumnLimit: 130 +IncludeBlocks: Regroup +BinPackArguments: false +BinPackParameters: false +AlignAfterOpenBracket: BlockIndent +AllowAllArgumentsOnNextLine: false +AlignEscapedNewlines: DontAlign diff --git a/ser/.gitignore b/ser/.gitignore new file mode 100644 index 0000000..af3a786 --- /dev/null +++ b/ser/.gitignore @@ -0,0 +1,3 @@ +.ccls-cache +objects +ser diff --git a/ser/Makefile b/ser/Makefile new file mode 100644 index 0000000..85adcb8 --- /dev/null +++ b/ser/Makefile @@ -0,0 +1,31 @@ +CC=gcc +CFLAGS=-std=c2x -pedantic -g -Wall -fsanitize=address +LDFLAGS=-lm + +BUILD_DIR=./objects +BIN=./ser +SOURCES=$(wildcard *.c) + +OBJECTS:=$(patsubst %.c,$(BUILD_DIR)/%.o,$(SOURCES)) +DEPS:=$(patsubst %.c,$(BUILD_DIR)/%.d,$(SOURCES)) + +.PHONY: run build clean + +run: $(BIN) + @echo "[exec] $<" + $(BIN) +build: $(BIN) + +-include $(DEPS) + +$(BIN): $(OBJECTS) + @echo "[ld] $@" + $(CC) $(CFLAGS) $^ $(LDFLAGS) -o $@ +$(BUILD_DIR)/%.o: %.c | $(BUILD_DIR) + @echo "[cc] $<" + $(CC) -MMD $(CFLAGS) -c $< -o $@ +$(BUILD_DIR): + mkdir -p $(BUILD_DIR) +clean: + rm -rf $(BUILD_DIR) + rm -f $(BIN) diff --git a/ser/arena_allocator.c b/ser/arena_allocator.c new file mode 100644 index 0000000..53d66b9 --- /dev/null +++ b/ser/arena_allocator.c @@ -0,0 +1,39 @@ +#include "arena_allocator.h" + +#include "assert.h" +#include "vector.h" + +#include +#include + +static ArenaBlock arena_block_alloc(size_t size) { + size = size < ARENA_BLOCK_SIZE ? ARENA_BLOCK_SIZE : size; + byte *ptr = malloc(size); + assert_alloc(ptr); + return (ArenaBlock){.data = ptr, .size = size, .end = ptr + size}; +} + +void arena_block_drop(ArenaBlock block) { free(block.data); } + +ArenaAllocator arena_init() { + ArenaBlock block = arena_block_alloc(ARENA_BLOCK_SIZE); + ArenaBlockVec blocks = vec_init(); + vec_grow(&blocks, 256); + vec_push(&blocks, block); + ArenaBlock *last = blocks.data; + return (ArenaAllocator){.blocks = blocks, .ptr = last->data, .last = last}; +} +void *arena_alloc(ArenaAllocator *alloc, size_t size) { + if (alloc->ptr + size > alloc->last->end) { + ArenaBlock block = arena_block_alloc(size); + vec_push(&alloc->blocks, block); + ArenaBlock *last = &alloc->blocks.data[alloc->blocks.len - 1]; + alloc->ptr = last->data; + alloc->last = last; + } + + byte *ptr = alloc->ptr; + alloc->ptr += size; + return ptr; +} +void arena_drop(ArenaAllocator arena) { vec_drop(arena.blocks); } diff --git a/ser/arena_allocator.h b/ser/arena_allocator.h new file mode 100644 index 0000000..bd4a67a --- /dev/null +++ b/ser/arena_allocator.h @@ -0,0 +1,35 @@ +#ifndef ARENA_ALLOCATOR_H +#define ARENA_ALLOCATOR_H +#include "utils.h" +#include "vector_impl.h" + +#include +#include + +#define ARENA_BLOCK_SIZE 4096 + +typedef struct { + size_t size; + byte *data; + byte *end; +} ArenaBlock; + +void arena_block_drop(ArenaBlock block); + +VECTOR_IMPL(ArenaBlock, ArenaBlockVec, arena_block, arena_block_drop); + +// Simple growing arena allocator +typedef struct { + ArenaBlockVec blocks; + ArenaBlock *last; + byte *ptr; +} ArenaAllocator; + +// Create a new arena allocator +ArenaAllocator arena_init(); +// Allocate size bytes in the arena +void *arena_alloc(ArenaAllocator *alloc, size_t size); +// Destroy the arena, freeing its memory +void arena_drop(ArenaAllocator arena); + +#endif diff --git a/ser/assert.h b/ser/assert.h new file mode 100644 index 0000000..42d8350 --- /dev/null +++ b/ser/assert.h @@ -0,0 +1,37 @@ +#ifndef ASSERT_H +#define ASSERT_H + +#include "log.h" + +// Basic assertion macro (always checks) +#ifdef LOG_DISABLE +#define assert(c, fmt, ...) \ + do { \ + if (!(c)) { \ + fprintf(stderr, fmt "\n" __VA_OPT__(, ) __VA_ARGS__); \ + exit(1); \ + } \ + } while (false) +#else // LOG_DISABLE +#define assert(c, ...) \ + do { \ + if (!(c)) { \ + log_error(__VA_ARGS__); \ + exit(1); \ + } \ + } while (false) +#endif // LOG_DISABLE + +// Only check if NDEBUG isn't defined +#ifdef NDEBUG +#define debug_assert(c, ...) (void)0 +#else +#define debug_assert(c, ...) assert(c, __VA_ARGS__) +#endif + +#define assert_eq(a, b, ...) assert(a == b, __VA_ARGS__) + +// Assert allocation succeeded (var != NULL) +#define assert_alloc(var) debug_assert(var != NULL, "Failed to allocate memory for " #var " (Out of memory ?)") + +#endif diff --git a/ser/ast.c b/ser/ast.c new file mode 100644 index 0000000..5b0e087 --- /dev/null +++ b/ser/ast.c @@ -0,0 +1,121 @@ +#include "ast.h" + +#include "arena_allocator.h" +#include "vector.h" + +AstContext ast_init() { + return (AstContext){ + .root = NULL, + .alloc = arena_init(), + }; +} + +static void ast_node_drop(AstNode *node) { + switch (node->tag) { + case ATStruct: + vec_drop(node->struct_.fields); + break; + case ATMessage: + vec_drop(node->message.fields); + break; + case ATMessages: + for (size_t i = 0; i < node->messages.children.len; i++) { + ast_node_drop((AstNode *)&node->messages.children.data[i]); + } + vec_drop(node->messages.children); + break; + case ATItems: + for (size_t i = 0; i < node->items.items.len; i++) { + ast_node_drop((AstNode *)&node->items.items.data[i]); + } + vec_drop(node->items.items); + break; + default: + break; + } +} + +void ast_drop(AstContext ctx) { + if (ctx.root != NULL) { + ast_node_drop(ctx.root); + } + arena_drop(ctx.alloc); +} + +static void print(AstNode *node, uint32_t indent) { + const uint32_t I = 4; + switch (node->tag) { + case ATNumber: + fprintf(stderr, "%*sAstNumber(%.*s)\n", indent, "", node->number.token.span.len, node->number.token.lexeme); + break; + case ATIdent: + fprintf(stderr, "%*sAstIdent(%.*s)\n", indent, "", node->ident.token.span.len, node->ident.token.lexeme); + break; + case ATVersion: + fprintf(stderr, "%*sAstVersion:\n", indent, ""); + print((AstNode *)&node->version.version, indent + I); + break; + case ATNoSize: + fprintf(stderr, "%*sAstSize(none)\n", indent, ""); + break; + case ATMaxSize: + fprintf(stderr, "%*sAstSize(max):\n", indent, ""); + print((AstNode *)&node->size.value, indent + I); + break; + case ATFixedSize: + fprintf(stderr, "%*sAstSize(fixed):\n", indent, ""); + print((AstNode *)&node->size.value, indent + I); + break; + case ATHeapArray: + fprintf(stderr, "%*sAstArray(heap):\n", indent, ""); + print((AstNode *)node->array.type, indent + I); + print((AstNode *)&node->array.size, indent + I); + break; + case ATFieldArray: + fprintf(stderr, "%*sAstArray(field):\n", indent, ""); + print((AstNode *)node->array.type, indent + I); + print((AstNode *)&node->array.size, indent + I); + break; + case ATField: + fprintf(stderr, "%*sAstField(%.*s):\n", indent, "", node->field.name.span.len, node->field.name.lexeme); + print((AstNode *)&node->field.type, indent + I); + break; + case ATStruct: + fprintf(stderr, "%*sAstStruct(%.*s):\n", indent, "", node->struct_.ident.span.len, node->struct_.ident.lexeme); + for (size_t i = 0; i < node->struct_.fields.len; i++) { + print((AstNode *)&node->struct_.fields.data[i], indent + I); + } + break; + case ATMessage: + fprintf(stderr, "%*sAstMessage(%.*s):\n", indent, "", node->message.ident.span.len, node->message.ident.lexeme); + for (size_t i = 0; i < node->message.fields.len; i++) { + print((AstNode *)&node->message.fields.data[i], indent + I); + } + break; + case ATAttribute: + fprintf(stderr, "%*sAstAttribute(%.*s)\n", indent, "", node->attribute.ident.span.len, node->attribute.ident.lexeme); + break; + case ATMessages: + fprintf(stderr, "%*sAstMessages(%.*s):\n", indent, "", node->messages.name.span.len, node->messages.name.lexeme); + for (size_t i = 0; i < node->messages.children.len; i++) { + print((AstNode *)&node->messages.children.data[i], indent + I); + } + break; + case ATTypeDecl: + fprintf(stderr, "%*sAstTypeDecl(%.*s):\n", indent, "", node->type_decl.name.span.len, node->type_decl.name.lexeme); + print((AstNode *)&node->type_decl.value, indent + I); + break; + case ATConstant: + fprintf(stderr, "%*sAstConstant(%.*s):\n", indent, "", node->constant.name.span.len, node->constant.name.lexeme); + print((AstNode *)&node->constant.value, indent + I); + break; + case ATItems: + fprintf(stderr, "%*sAstItems:\n", indent, ""); + for (size_t i = 0; i < node->items.items.len; i++) { + print((AstNode *)&node->items.items.data[i], indent + I); + } + break; + } +} + +void ast_print(AstNode *node) { print(node, 0); } diff --git a/ser/ast.h b/ser/ast.h new file mode 100644 index 0000000..0236e19 --- /dev/null +++ b/ser/ast.h @@ -0,0 +1,329 @@ +#ifndef AST_H +#define AST_H +#include "arena_allocator.h" +#include "lexer.h" +#include "source.h" +#include "vector_impl.h" + +typedef enum { + ATNumber, + ATVersion, + ATIdent, + ATHeapArray, + ATFieldArray, + ATMaxSize, + ATFixedSize, + ATNoSize, + ATField, + ATAttribute, + ATStruct, + ATMessage, + ATMessages, + ATTypeDecl, + ATConstant, + ATItems, +} AstTag; + +typedef struct { + AstTag tag; + Span span; + Token token; +} AstNumber; + +typedef struct { + AstTag tag; + Span span; + Token token; +} AstIdent; + +typedef struct { + AstTag tag; + Span span; + AstNumber version; +} AstVersion; + +typedef struct { + AstTag tag; + Span span; + AstNumber value; +} AstSize; + +typedef struct { + AstTag tag; + Span span; + struct AstType *type; + AstSize size; +} AstArray; + +typedef union { + AstTag tag; + AstIdent ident; + AstArray array; +} AstType; + +typedef struct { + AstTag tag; + Span span; + Token name; + AstType type; +} AstField; + +VECTOR_IMPL(AstField, AstFieldVec, ast_field); + +typedef struct { + AstTag tag; + Span span; + Token ident; + AstFieldVec fields; +} AstStruct; + +typedef struct { + AstTag tag; + Span span; + Token ident; + AstFieldVec fields; +} AstMessage; + +typedef struct { + AstTag tag; + Span span; + Token ident; +} AstAttribute; + +typedef union { + AstTag tag; + AstMessage message; + AstAttribute attribute; +} AstAttributeOrMessage; + +VECTOR_IMPL(AstAttributeOrMessage, AstAttributeOrMessageVec, ast_attribute_or_message); + +typedef struct { + AstTag tag; + Span span; + Token name; + AstAttributeOrMessageVec children; +} AstMessages; + +typedef struct { + AstTag tag; + Span span; + Token name; + AstType value; +} AstTypeDecl; + +typedef struct { + AstTag tag; + Span span; + Token name; + AstNumber value; +} AstConstant; + +typedef union { + AstTag tag; + AstTypeDecl type_decl; + AstVersion version; + AstStruct struct_; + AstMessages messages; + AstConstant constant; +} AstItem; + +VECTOR_IMPL(AstItem, AstItemVec, ast_item); + +typedef struct { + AstTag tag; + Span span; + AstItemVec items; +} AstItems; + +typedef union { + AstTag tag; + AstNumber number; + AstIdent ident; + AstVersion version; + AstSize size; + AstArray array; + AstType type; + AstField field; + AstStruct struct_; + AstMessage message; + AstAttribute attribute; + AstAttributeOrMessage attribute_or_message; + AstMessages messages; + AstTypeDecl type_decl; + AstConstant constant; + AstItems items; +} AstNode; + +typedef struct { + AstNode *root; + ArenaAllocator alloc; +} AstContext; + +AstContext ast_init(); + +void ast_drop(AstContext ctx); + +static inline AstNumber ast_number(AstContext ctx, Span span, Token lit) { + AstNumber res; + res.tag = ATNumber; + res.span = span; + res.token = lit; + return res; +} + +static inline AstIdent ast_ident(AstContext ctx, Span span, Token ident) { + AstIdent res; + res.tag = ATIdent; + res.span = span; + res.token = ident; + return res; +} + +static inline AstVersion ast_version(AstContext ctx, Span span, AstNumber number) { + AstVersion res; + res.tag = ATVersion; + res.span = span; + res.version = number; + return res; +} + +static inline AstArray ast_heap_array(AstContext ctx, Span span, AstType *type, AstSize size) { + AstArray res; + res.tag = ATHeapArray; + res.span = span; + res.type = (struct AstType *)type; + res.size = size; + return res; +} + +static inline AstArray ast_field_array(AstContext ctx, Span span, AstType *type, AstSize size) { + AstArray res; + res.tag = ATFieldArray; + res.span = span; + res.type = (struct AstType *)type; + res.size = size; + return res; +} + +static inline AstSize ast_max_size(AstContext ctx, Span span, AstNumber size) { + AstSize res; + res.tag = ATMaxSize; + res.span = span; + res.value = size; + return res; +} + +static inline AstSize ast_fixed_size(AstContext ctx, Span span, AstNumber size) { + AstSize res; + res.tag = ATFixedSize; + res.span = span; + res.value = size; + return res; +} + +static inline AstSize ast_no_size(AstContext ctx, Span span) { + AstSize res; + res.tag = ATNoSize; + res.span = span; + return res; +} + +static inline AstField ast_field(AstContext ctx, Span span, Token name, AstType type) { + AstField res; + res.tag = ATField; + res.span = span; + res.name = name; + res.type = type; + return res; +} + +static inline AstStruct ast_struct(AstContext ctx, Span span, Token name, AstFieldVec fields) { + AstStruct res; + res.tag = ATStruct; + res.span = span; + res.ident = name; + res.fields = fields; + return res; +} + +static inline AstMessage ast_message(AstContext ctx, Span span, Token name, AstFieldVec fields) { + AstMessage res; + res.tag = ATMessage; + res.span = span; + res.ident = name; + res.fields = fields; + return res; +} + +static inline AstAttribute ast_attribute(AstContext ctx, Span span, Token attribute) { + AstAttribute res; + res.tag = ATAttribute; + res.span = span; + res.ident = attribute; + return res; +} + +static inline AstMessages ast_messages(AstContext ctx, Span span, Token name, AstAttributeOrMessageVec children) { + AstMessages res; + res.tag = ATMessages; + res.span = span; + res.name = name; + res.children = children; + return res; +} + +static inline AstConstant ast_constant(AstContext ctx, Span span, Token name, AstNumber value) { + AstConstant res; + res.tag = ATConstant; + res.span = span; + res.name = name; + res.value = value; + return res; +} + +static inline AstTypeDecl ast_type_decl(AstContext ctx, Span span, Token name, AstType type) { + AstTypeDecl res; + res.tag = ATTypeDecl; + res.span = span; + res.name = name; + res.value = type; + return res; +} + +static inline AstItems ast_items(AstContext ctx, Span span, AstItemVec items) { + AstItems res; + res.tag = ATItems; + res.span = span; + res.items = items; + return res; +} + +void ast_print(AstNode *node); + +static inline const char *ast_tag_to_string(AstTag tag) { +#define _case(c) \ + case AT##c: \ + return #c + switch (tag) { + _case(Number); + _case(Version); + _case(Ident); + _case(HeapArray); + _case(FieldArray); + _case(MaxSize); + _case(FixedSize); + _case(NoSize); + _case(Field); + _case(Struct); + _case(Message); + _case(Attribute); + _case(Messages); + _case(TypeDecl); + _case(Constant); + _case(Items); + } +#undef _case +} + +#endif diff --git a/ser/codegen.c b/ser/codegen.c new file mode 100644 index 0000000..f14ff3d --- /dev/null +++ b/ser/codegen.c @@ -0,0 +1,168 @@ +#include "codegen.h" + +#include +#include + +static void buffered_writer_write(void *w, const char *data, size_t len) { + // We don't use vec_push array because we want the string to be null terminated at all time (while not really including the + // null character in the string / len) + BufferedWriter *bw = (BufferedWriter *)w; + vec_grow(&bw->buf, bw->buf.len + len + 1); + memcpy(&bw->buf.data[bw->buf.len], data, len); + bw->buf.data[bw->buf.len + len] = '\0'; + bw->buf.len += len; +} +static void buffered_writer_format(void *w, const char *fmt, va_list args) { + BufferedWriter *bw = (BufferedWriter *)w; + va_list args2; + va_copy(args2, args); + size_t cap = bw->buf.cap - bw->buf.len; + char *ptr = &bw->buf.data[bw->buf.len]; + int len = vsnprintf(ptr, cap, fmt, args); + if (cap <= len) { + // The writing failed + vec_grow(&bw->buf, bw->buf.len + len + 1); + ptr = &bw->buf.data[bw->buf.len]; + vsnprintf(ptr, len + 1, fmt, args2); + } + va_end(args2); + bw->buf.len += len; +} +static void file_writer_write(void *w, const char *data, size_t len) { + FileWriter *fw = (FileWriter *)w; + fwrite(data, 1, len, fw->fd); +} +static void file_writer_format(void *w, const char *fmt, va_list args) { + FileWriter *fw = (FileWriter *)w; + vfprintf(fw->fd, fmt, args); +} + +BufferedWriter buffered_writer_init() { + CharVec buf = vec_init(); + vec_grow(&buf, 512); + return (BufferedWriter){.w.write = buffered_writer_write, .w.format = buffered_writer_format, .buf = buf}; +} +void buffered_writer_drop(BufferedWriter w) { vec_drop(w.buf); } +FileWriter file_writer_init(const char *path) { + FILE *fd = fopen(path, "w"); + assert(fd != NULL, "couldn't open output file"); + return (FileWriter){.w.write = file_writer_write, .w.format = file_writer_format, .fd = fd}; +} +FileWriter file_writer_from_fd(FILE *fd) { + return (FileWriter){.w.write = file_writer_write, .w.format = file_writer_format, .fd = fd}; +} +void file_writer_drop(FileWriter w) { fclose(w.fd); } + +void wt_write(Writer *w, const char *data, size_t len) { w->write(w, data, len); } +void wt_format(Writer *w, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + w->format(w, fmt, args); + va_end(args); +} + +typedef struct { + StructObject *obj; + PointerVec dependencies; +} StructDependencies; + +void strdeps_drop(void *item) { + StructDependencies *deps = (StructDependencies *)item; + vec_drop(deps->dependencies); +} + +impl_hashmap( + strdeps, StructDependencies, { return hash(state, (byte *)&v->obj, sizeof(StructObject *)); }, { return a->obj == b->obj; } +); + +int struct_object_compare(const void *a, const void *b) { + const StructObject *sa = *(StructObject **)a; + const StructObject *sb = *(StructObject **)b; + size_t len = sa->name.len < sb->name.len ? sa->name.len : sb->name.len; + return strncmp(sa->name.ptr, sb->name.ptr, len); +} + +void define_structs(Program *p, Writer *w, void (*define)(Writer *w, StructObject *obj)) { + Hashmap *dependencies = hashmap_init(strdeps_hash, strdeps_equal, strdeps_drop, sizeof(StructDependencies)); + + TypeDef *td = NULL; + while (hashmap_iter(p->typedefs, &td)) { + if (td->value->kind != TypeStruct) + continue; + StructObject *obj = (StructObject *)&td->value->type.struct_; + + StructDependencies deps = {.obj = obj, .dependencies = vec_init()}; + for (size_t i = 0; i < obj->fields.len; i++) { + TypeObject *type = obj->fields.data[i].type; + // Skip through the field arrays + while (type->kind == TypeArray && !type->type.array.heap) { + type = type->type.array.type; + } + + if (type->kind != TypeStruct) + continue; + + vec_push(&deps.dependencies, &type->type.struct_); + } + + hashmap_set(dependencies, &deps); + } + + PointerVec to_define = vec_init(); + size_t pass = 0; + do { + vec_clear(&to_define); + + StructDependencies *deps = NULL; + while (hashmap_iter(dependencies, &deps)) { + bool dependencies_met = true; + for (size_t i = 0; i < deps->dependencies.len; i++) { + if (hashmap_has(dependencies, &(StructDependencies){.obj = deps->dependencies.data[i]})) { + dependencies_met = false; + break; + } + } + + if (!dependencies_met) + continue; + vec_push(&to_define, deps->obj); + } + + qsort(to_define.data, to_define.len, sizeof(StructObject *), struct_object_compare); + + for (size_t i = 0; i < to_define.len; i++) { + StructObject *s = to_define.data[i]; + define(w, s); + hashmap_delete(dependencies, &(StructDependencies){.obj = s}); + } + pass++; + } while (to_define.len > 0); + + if (dependencies->count > 0) { + log_error("cyclic struct dependency without indirection couldn't be resolved"); + } + + hashmap_drop(dependencies); + vec_drop(to_define); +} + +char *pascal_to_snake_case(StringSlice str) { + CharVec res = vec_init(); + vec_grow(&res, str.len + 4); + for (size_t i = 0; i < str.len; i++) { + if (i == 0) { + vec_push(&res, tolower(str.ptr[i])); + continue; + } + + char c = str.ptr[i]; + if (isupper(c)) { + vec_push(&res, '_'); + } + vec_push(&res, tolower(c)); + } + + vec_push(&res, '\0'); + + return res.data; +} diff --git a/ser/codegen.h b/ser/codegen.h new file mode 100644 index 0000000..5692463 --- /dev/null +++ b/ser/codegen.h @@ -0,0 +1,62 @@ +#ifndef CODEGEN_H +#define CODEGEN_H +#include "eval.h" +#include "vector.h" + +#include +#include +#include +#include + +// Struct used to define the relative alignment when working with structs +typedef struct { + Alignment align; + uint8_t offset; +} CurrentAlignment; + +typedef struct { + void (*write)(void *w, const char *data, size_t len); + void (*format)(void *w, const char *fmt, va_list args); +} Writer; + +typedef struct { + Writer w; + CharVec buf; +} BufferedWriter; + +typedef struct { + Writer w; + FILE *fd; +} FileWriter; + +BufferedWriter buffered_writer_init(); +void buffered_writer_drop(BufferedWriter w); +FileWriter file_writer_init(const char *path); +FileWriter file_writer_from_fd(FILE *fd); +void file_writer_drop(FileWriter w); + +void wt_write(Writer *w, const char *data, size_t len); +void wt_format(Writer *w, const char *fmt, ...); + +// Define the structs of a program in the correct order (respecting direct dependencies) +void define_structs(Program *p, Writer *w, void (*define)(Writer *w, StructObject *)); +char *pascal_to_snake_case(StringSlice str); + +// Check if c is aligned to alignment to +static inline bool calign_is_aligned(CurrentAlignment c, Alignment to) { + assert(to.value <= c.align.value, "Can't know if calign is aligned to aligment if major alignment is less"); + return (c.offset & to.mask) == 0; +} +// Add offset to the offset of c +static inline CurrentAlignment calign_add(CurrentAlignment c, uint8_t offset) { + c.offset += offset; + c.offset &= c.align.mask; + return c; +} +// Compute the number of bytes of padding needed to be aligned to a from c. +static inline uint8_t calign_to(CurrentAlignment c, Alignment a) { + assert(a.value <= c.align.value, "Can't align when major alignment is less than requested alignment"); + return (-c.offset) & a.mask; +} + +#endif diff --git a/ser/codegen_c.c b/ser/codegen_c.c new file mode 100644 index 0000000..686d03e --- /dev/null +++ b/ser/codegen_c.c @@ -0,0 +1,624 @@ +#include "codegen_c.h" + +#include "vector.h" +#include "vector_impl.h" + +#define INDENT 4 + +typedef enum { + MTPointer, + MTArray, +} ModifierType; + +typedef struct { + ModifierType type; + uint64_t size; +} Modifier; + +#define MOD_PTR ((Modifier){.type = MTPointer}) +#define MOD_ARRAY(s) ((Modifier){.type = MTArray, .size = s}) + +VECTOR_IMPL(Modifier, ModifierVec, modifier); + +static inline const char *array_size_type(uint64_t size) { + if (size <= UINT8_MAX) { + return "uint8_t"; + } else if (size <= UINT16_MAX) { + return "uint16_t"; + } else if (size <= UINT32_MAX) { + return "uint32_t"; + } else { + return "uint64_t"; + } +} + +void write_field(Writer *w, Field f, Modifier *mods, size_t len, uint32_t indent); +// Wrte the *base* type type with indentation +void write_type(Writer *w, TypeObject *type, uint32_t indent) { + if (type->kind == TypePrimitif) { +#define _case(x, s) \ + case Primitif_##x: \ + wt_format(w, "%*s" #s " ", indent, ""); \ + break + switch (type->type.primitif) { + _case(u8, uint8_t); + _case(u16, uint16_t); + _case(u32, uint32_t); + _case(u64, uint64_t); + _case(i8, uint8_t); + _case(i16, uint16_t); + _case(i32, uint32_t); + _case(i64, uint64_t); + _case(f32, float); + _case(f64, double); + _case(char, char); + _case(bool, bool); + } +#undef _case + } else if (type->kind == TypeStruct) { + wt_format(w, "%*sstruct %.*s ", indent, "", type->type.struct_.name.len, type->type.struct_.name.ptr); + } else { + if (type->type.array.sizing == SizingMax) { + const char *len_type = array_size_type(type->type.array.size); + wt_format(w, "%*sstruct {\n%*s%s len;\n", indent, "", indent + INDENT, "", len_type); + Field f = {.name = STRING_SLICE("data"), .type = type->type.array.type}; + Modifier mod; + if (type->type.array.heap) { + mod = MOD_PTR; + } else { + mod = MOD_ARRAY(type->type.array.size); + } + write_field(w, f, &mod, 1, indent + INDENT); + wt_format(w, ";\n%*s} ", indent, ""); + } else { + log_error("Called write_type on a non base type"); + } + } +} + +// Algorithm to handle c types here: +// 0. Given a field with a name and a type. +// let a base type be a type without modifiers (any type that isn't an array with fixed sizing). +// let modifiers be a sequence of array or pointer (array have size), the type of a modifier is +// either array or pointer. +// let * be a pointer modifier and [n] be an array modifier of size n +// 1. initialize a list of modifers, a base type variable, +// current = type (of the field) +// while(is_array(current) && is_fixed_size(current)) do +// if is_heap(current) then +// push(modifiers, *) +// else +// push(modifiers, [size(current)]) +// end +// current = element_type(current) +// end +// base_type = current +// 3. we now have a base type, a list of modifiers and a field name. +// 4. emit base_type " " +// 5. walk modifiers in reverse (let m and p be the current and previous modifiers) +// if type(m) == pointer then +// if type(m) != type(p) then +// emit "(" +// end +// emit "*" +// end +// 6. emit field_name +// 7. walk modifiers forward (let m and p be the current and previous modifiers) +// if type(m) == array then +// if type(m) != type(p) then +// emit "(" +// end +// emit "[" size(m) "]" +// end +// 8. Examples +// given the field: +// foo: char&[1][2][3]&[4][5]&[6]&[7], +// 3. +// base_type = char +// modifiers = {*, *, [5], *, [3], [2], *} +// field_name = foo +// result = "" +// 4. +// result = "char " +// 5. +// result = "char *(*(**" +// 6. +// result = "char *(*(**foo" +// 7. +// result = "char *(*(**foo)[5])[3][2]" +// +// in a trivial case the algoritm works as expected: +// bar: char, +// 3. +// base_type = char +// modifiers = {} +// field_name = bar +// result = "" +// 4. +// result = "char " +// 5. +// result = "char " +// 6. +// result = "char bar" +// 7. +// result = "char bar" + +void write_field(Writer *w, Field f, Modifier *mods, size_t len, uint32_t indent) { + TypeObject *type = f.type; + ModifierVec modifiers = vec_init(); + TypeObject *current = type; + _vec_modifier_push_array(&modifiers, mods, len); + while (current->kind == TypeArray && current->type.array.sizing == SizingFixed) { + if (current->type.array.heap) { + _vec_modifier_push(&modifiers, MOD_PTR); + } else { + _vec_modifier_push(&modifiers, MOD_ARRAY(current->type.array.size)); + } + current = current->type.array.type; + } + TypeObject *base_type = current; + + write_type(w, base_type, indent); + for (int i = modifiers.len - 1; i >= 0; i--) { + Modifier m = modifiers.data[i]; + if (m.type != MTPointer) + continue; + if (i == modifiers.len - 1 || modifiers.data[i + 1].type == m.type) { + wt_format(w, "*"); + } else { + wt_format(w, "(*"); + } + } + wt_format(w, "%.*s", f.name.len, f.name.ptr); + for (size_t i = 0; i < modifiers.len; i++) { + Modifier m = modifiers.data[i]; + if (m.type != MTArray) + continue; + if (i == 0 || modifiers.data[i - 1].type == m.type) { + wt_format(w, "[%lu]", m.size); + } else { + wt_format(w, ")[%lu]", m.size); + } + } + _vec_modifier_drop(modifiers); +} + +void write_struct(Writer *w, StructObject *obj) { + wt_format(w, "typedef struct %.*s {\n", obj->name.len, obj->name.ptr); + + for (size_t i = 0; i < obj->fields.len; i++) { + Field f = obj->fields.data[i]; + write_field(w, f, NULL, 0, INDENT); + wt_format(w, ";\n", f.name.len, f.name.ptr); + } + + wt_format(w, "} %.*s;\n\n", obj->name.len, obj->name.ptr); +} + +void write_accessor(Writer *w, TypeObject *base_type, FieldAccessor fa, bool ptr) { + if (fa.indices.len == 0) + return; + + if (ptr) { + wt_write(w, "->", 2); + } else { + wt_write(w, ".", 1); + } + + TypeObject *t = base_type; + for (size_t j = 0; j < fa.indices.len; j++) { + uint64_t index = fa.indices.data[j]; + + if (t->kind == TypeStruct) { + if (j != 0) + wt_write(w, ".", 1); + StructObject *st = (StructObject *)&t->type.struct_; + wt_write(w, st->fields.data[index].name.ptr, st->fields.data[index].name.len); + t = st->fields.data[index].type; + } else if (t->kind == TypeArray) { + if (t->type.array.sizing == SizingMax) { + if (j != 0) + wt_write(w, ".", 1); + if (index == 0) { + uint64_t size = t->type.array.size; + wt_write(w, "len", 3); + const TypeObject *type; + if (size <= UINT8_MAX) { + type = &PRIMITIF_u8; + } else if (size <= UINT16_MAX) { + type = &PRIMITIF_u16; + } else if (size <= UINT32_MAX) { + type = &PRIMITIF_u32; + } else { + type = &PRIMITIF_u64; + } + t = (TypeObject *)type; + } else { + wt_write(w, "data", 4); + t = t->type.array.type; + } + } else { + wt_format(w, "[%lu]", index); + t = t->type.array.type; + } + } + } +} + +void write_type_serialization( + Writer *w, const char *base, bool ptr, Layout *layout, CurrentAlignment al, Hashmap *layouts, size_t indent, size_t depth +) { + if (layout->fields.len == 0) + return; + + Alignment align = al.align; + size_t offset = al.offset; + + offset += calign_to(al, layout->fields.data[0].type->align); + + size_t i = 0; + for (; i < layout->fields.len && layout->fields.data[i].size != 0; i++) { + FieldAccessor fa = layout->fields.data[i]; + wt_format(w, "%*s*(", indent, ""); + write_type(w, fa.type, 0); + wt_format(w, "*)&buf[%lu] = %s", offset, base); + write_accessor(w, layout->type, fa, ptr); + wt_write(w, ";\n", 2); + + offset += fa.size; + al = calign_add(al, fa.size); + } + + if (i < layout->fields.len) { + offset += calign_to(al, layout->fields.data[i].type->align); + wt_format(w, "%*sbuf += %lu;\n", indent, "", offset); + + for (; i < layout->fields.len; i++) { + FieldAccessor farr = layout->fields.data[i]; + wt_format(w, "%*sfor(size_t i = 0; i < %s", indent, "", base); + FieldAccessor flen = field_accessor_clone(&farr); + // Access the length instead of data + flen.indices.data[flen.indices.len - 1] = 0; + write_accessor(w, layout->type, flen, ptr); + field_accessor_drop(flen); + char *vname = msprintf("e%lu", depth); + wt_format(w, "; i++) {\n%*stypeof(%s", indent, "", base); + write_accessor(w, layout->type, farr, ptr); + wt_format(w, "[i]) %s = %s", vname, base); + write_accessor(w, layout->type, farr, ptr); + wt_format(w, "[i];\n"); + + Layout *arr_layout = hashmap_get(layouts, &(Layout){.type = farr.type}); + assert(arr_layout != NULL, "Type has no layout (How ?)"); + write_type_serialization( + w, + vname, + false, + arr_layout, + (CurrentAlignment){.align = farr.type->align, .offset = 0}, + layouts, + indent + INDENT, + depth + 1 + ); + wt_format(w, "%*s}\n", indent, ""); + free(vname); + } + wt_format(w, "%*sbuf = (byte*)(((uintptr_t)buf - %u) & -%u);\n", indent, "", align.mask, align.value); + } else { + offset += calign_to(al, align); + wt_format(w, "%*sbuf += %lu;\n", indent, "", offset); + } +} + +void write_type_deserialization( + Writer *w, const char *base, bool ptr, Layout *layout, CurrentAlignment al, Hashmap *layouts, size_t indent, size_t depth +) { + if (layout->fields.len == 0) + return; + + Alignment align = al.align; + size_t offset = al.offset; + + offset += calign_to(al, layout->fields.data[0].type->align); + + char *deref = ""; + if (layout->type->kind == TypePrimitif) { + deref = "*"; + } + + size_t i = 0; + for (; i < layout->fields.len && layout->fields.data[i].size != 0; i++) { + FieldAccessor fa = layout->fields.data[i]; + wt_format(w, "%*s%s%s", indent, "", deref, base); + write_accessor(w, layout->type, fa, ptr); + wt_format(w, " = *("); + write_type(w, fa.type, 0); + wt_format(w, "*)&buf[%lu]", offset, base); + wt_write(w, ";\n", 2); + + offset += fa.size; + al = calign_add(al, fa.size); + } + + if (i < layout->fields.len) { + offset += calign_to(al, layout->fields.data[i].type->align); + wt_format(w, "%*sbuf += %lu;\n", indent, "", offset); + + for (; i < layout->fields.len; i++) { + FieldAccessor farr = layout->fields.data[i]; + wt_format(w, "%*sfor(size_t i = 0; i < %s", indent, "", base); + FieldAccessor flen = field_accessor_clone(&farr); + // Access the length instead of data + flen.indices.data[flen.indices.len - 1] = 0; + write_accessor(w, layout->type, flen, ptr); + field_accessor_drop(flen); + char *vname = msprintf("e%lu", depth); + wt_format(w, "; i++) {\n%*stypeof(&%s", indent + INDENT, "", base); + write_accessor(w, layout->type, farr, ptr); + wt_format(w, "[i]) %s = &%s", vname, base); + write_accessor(w, layout->type, farr, ptr); + wt_format(w, "[i];\n"); + + Layout *arr_layout = hashmap_get(layouts, &(Layout){.type = farr.type}); + assert(arr_layout != NULL, "Type has no layout (How ?)"); + write_type_deserialization( + w, + vname, + true, + arr_layout, + (CurrentAlignment){.align = farr.type->align, .offset = 0}, + layouts, + indent + INDENT, + depth + 1 + ); + wt_format(w, "%*s}\n", indent, ""); + free(vname); + } + wt_format(w, "%*sbuf = (byte*)(((uintptr_t)buf - %u) & -%u);\n", indent, "", align.mask, align.value); + } else { + offset += calign_to(al, align); + wt_format(w, "%*sbuf += %lu;\n", indent, "", offset); + } +} + +void codegen_c(Writer *header, Writer *source, const char *name, Program *p) { + wt_format( + header, + "// Generated file, do not edit (its not like it'll explode if you do, but its better not to)\n" + "#include \n" + "#include \n" + "#include \n" + "\n" + "typedef unsigned char byte;\n" + "\n" + ); + wt_format( + source, + "// Generated file, do not edit (its not like it'll explode if you do, but its better not to)\n" + "#include \"%s.h\"\n" + "#include \n" + "\n", + name + ); + + define_structs(p, header, write_struct); + + for (size_t i = 0; i < p->messages.len; i++) { + MessagesObject msgs = p->messages.data[i]; + + wt_format(header, "// %.*s\n\n", msgs.name.len, msgs.name.ptr); + wt_format(header, "typedef enum %.*sTag {\n", msgs.name.len, msgs.name.ptr); + for (size_t j = 0; j < msgs.messages.len; j++) { + wt_format( + header, + "%*s%.*sTag%.*s = %lu,\n", + INDENT, + "", + msgs.name.len, + msgs.name.ptr, + msgs.messages.data[j].name.len, + msgs.messages.data[j].name.ptr, + j + ); + } + wt_format(header, "} %.*sTag;\n\n", msgs.name.len, msgs.name.ptr); + + for (size_t j = 0; j < msgs.messages.len; j++) { + MessageObject msg = msgs.messages.data[j]; + + wt_format(header, "typedef struct %.*s%.*s {\n", msgs.name.len, msgs.name.ptr, msg.name.len, msg.name.ptr); + wt_format(header, "%*s%.*sTag tag;\n", INDENT, "", msgs.name.len, msgs.name.ptr); + + for (size_t k = 0; k < msg.fields.len; k++) { + Field f = msg.fields.data[k]; + write_field(header, f, NULL, 0, INDENT); + wt_format(header, ";\n"); + } + if (msg.attributes & Attr_versioned) { + write_field( + header, + (Field){.name.ptr = "_version", .name.len = 8, .type = (TypeObject *)&PRIMITIF_u64}, + NULL, + 0, + INDENT + ); + wt_format(header, ";\n"); + } + + wt_format(header, "} %.*s%.*s;\n\n", msgs.name.len, msgs.name.ptr, msg.name.len, msg.name.ptr); + } + + wt_format(header, "typedef union %.*sMessage {\n", msgs.name.len, msgs.name.ptr); + wt_format(header, "%*s%.*sTag tag;\n", INDENT, "", msgs.name.len, msgs.name.ptr); + for (size_t j = 0; j < msgs.messages.len; j++) { + MessageObject msg = msgs.messages.data[j]; + char *field = pascal_to_snake_case(msg.name); + wt_format(header, "%*s%.*s%.*s %s;\n", INDENT, "", msgs.name.len, msgs.name.ptr, msg.name.len, msg.name.ptr, field); + free(field); + } + wt_format(header, "} %.*sMessage;\n\n", msgs.name.len, msgs.name.ptr); + + char *name = pascal_to_snake_case(msgs.name); + wt_format( + header, + "// Serialize the message msg to buffer dst of size len, returns the length of the serialized message, or -1 on " + "error (buffer overflow)\n" + ); + wt_format(header, "int msg_%s_serialize(byte *dst, size_t len, %.*sMessage *msg);\n", name, msgs.name.len, msgs.name.ptr); + wt_format( + header, + "// Deserialize the message in the buffer src of size len into dst, return the length of the serialized message or " + "-1 on error.\n" + ); + wt_format( + header, + "int msg_%s_deserialize(const byte *src, size_t len, %.*sMessage *dst);\n\n", + name, + msgs.name.len, + msgs.name.ptr + ); + + char *tag_type = msprintf("%.*sTag", msgs.name.len, msgs.name.ptr); + PointerVec message_tos = vec_init(); + + for (size_t j = 0; j < msgs.messages.len; j++) { + MessageObject m = msgs.messages.data[j]; + TypeObject *to = malloc(sizeof(TypeObject)); + assert_alloc(to); + { + StructObject obj = {.name = m.name, .fields = vec_clone(&m.fields)}; + if (m.attributes & Attr_versioned) { + vec_push(&obj.fields, ((Field){.name.ptr = "_version", .name.len = 8, .type = (TypeObject *)&PRIMITIF_u64})); + } + to->type.struct_ = *(struct StructObject *)&obj; + to->kind = TypeStruct; + to->align = ALIGN_8; + } + Layout layout = type_layout(to); + vec_push(&message_tos, to); + + hashmap_set(p->layouts, &layout); + } + + { + wt_format( + source, + "int msg_%s_serialize(byte *buf, size_t len, %.*sMessage *msg) {\n", + name, + msgs.name.len, + msgs.name.ptr + ); + + wt_format(source, "%*sbyte *base_buf = buf;\n%*s%s tag = msg->tag;\n", INDENT, "", INDENT, "", tag_type); + wt_format(source, "%*sswitch(tag) {\n", INDENT, ""); + + for (size_t j = 0; j < msgs.messages.len; j++) { + MessageObject m = msgs.messages.data[j]; + TypeObject *mtype = message_tos.data[j]; + Layout *layout = hashmap_get(p->layouts, &(Layout){.type = mtype}); + assert(layout != NULL, "What ?"); + char *snake_case_name = pascal_to_snake_case(m.name); + char *base = msprintf("msg->%s", snake_case_name); + + wt_format(source, "%*scase %s%.*s: {\n", INDENT, "", tag_type, m.name.len, m.name.ptr); + wt_format(source, "%*s*(uint16_t *)buf = %s%.*s;\n", INDENT * 2, "", tag_type, m.name.len, m.name.ptr); + if (m.attributes & Attr_versioned) { + wt_format(source, "%*smsg->%s._version = %luUL;\n", INDENT * 2, "", snake_case_name, msgs.version); + } + write_type_serialization( + source, + base, + false, + layout, + (CurrentAlignment){.align = ALIGN_8, .offset = 2}, + p->layouts, + INDENT * 2, + 0 + ); + wt_format(source, "%*sbreak;\n%*s}\n", INDENT * 2, "", INDENT, ""); + + free(base); + free(snake_case_name); + } + wt_format(source, "%*s}\n", INDENT, ""); + wt_format(source, "%*sbuf = (byte*)(((uintptr_t)buf - %u) & -%u);\n", INDENT, "", ALIGN_8.mask, ALIGN_8.value); + wt_format(source, "%*sreturn (int)(buf - base_buf);\n", INDENT, ""); + wt_format(source, "}\n"); + } + + { + wt_format( + source, + "\nint msg_%s_deserialize(const byte *buf, size_t len, %.*sMessage *msg) {\n", + name, + msgs.name.len, + msgs.name.ptr + ); + + wt_format(source, "%*sconst byte *base_buf = buf;\n%*s%s tag = *(uint16_t*)buf;\n", INDENT, "", INDENT, "", tag_type); + wt_format(source, "%*sswitch(tag) {\n", INDENT, ""); + + for (size_t j = 0; j < msgs.messages.len; j++) { + MessageObject m = msgs.messages.data[j]; + TypeObject *mtype = message_tos.data[j]; + Layout *layout = hashmap_get(p->layouts, &(Layout){.type = mtype}); + assert(layout != NULL, "What ?"); + char *snake_case_name = pascal_to_snake_case(m.name); + char *base = msprintf("msg->%s", snake_case_name); + + wt_format(source, "%*scase %s%.*s: {\n", INDENT, "", tag_type, m.name.len, m.name.ptr); + wt_format(source, "%*smsg->tag = %s%.*s;\n", INDENT * 2, "", tag_type, m.name.len, m.name.ptr); + write_type_deserialization( + source, + base, + false, + layout, + (CurrentAlignment){.align = ALIGN_8, .offset = 2}, + p->layouts, + INDENT * 2, + 0 + ); + if (m.attributes & Attr_versioned) { + wt_format( + source, + "%*sif(msg->%s._version != %luUL) {\n%*sprintf(\"Mismatched message version: peers aren't the same " + "version.\\n\");\n%*s}\n", + INDENT * 2, + "", + snake_case_name, + msgs.version, + INDENT * 3, + "", + INDENT * 2, + "" + ); + } + wt_format(source, "%*sbreak;\n%*s}\n", INDENT * 2, "", INDENT, ""); + + free(base); + free(snake_case_name); + } + wt_format(source, "%*s}\n", INDENT, ""); + wt_format(source, "%*sreturn (int)(buf - base_buf);\n", INDENT, ""); + wt_format(source, "}\n"); + } + + for (size_t j = 0; j < message_tos.len; j++) { + TypeObject *to = message_tos.data[j]; + StructObject *s = (StructObject *)&to->type.struct_; + + vec_drop(s->fields); + free(to); + } + + vec_drop(message_tos); + + free(tag_type); + free(name); + } +} + +typedef struct { + uint16_t field_count; + char *a[4]; +} AA; diff --git a/ser/codegen_c.h b/ser/codegen_c.h new file mode 100644 index 0000000..25ec215 --- /dev/null +++ b/ser/codegen_c.h @@ -0,0 +1,8 @@ +#ifndef CODEGEN_C_H +#define CODEGEN_C_H + +#include "codegen.h" + +void codegen_c(Writer *header, Writer *source, const char *name, Program *p); + +#endif diff --git a/ser/eval.c b/ser/eval.c new file mode 100644 index 0000000..a88c2df --- /dev/null +++ b/ser/eval.c @@ -0,0 +1,1320 @@ +#include "eval.h" + +#include "ast.h" +#include "gen_vec.h" +#include "hashmap.h" +#include "vector.h" +#include "vector_impl.h" + +#include +#include + +#define PRIMITIF_TO(name, al) \ + const TypeObject PRIMITIF_##name = {.kind = TypePrimitif, .align = _ALIGN_##al, .type.primitif = Primitif_##name} +PRIMITIF_TO(u8, 2); +PRIMITIF_TO(u16, 2); +PRIMITIF_TO(u32, 4); +PRIMITIF_TO(u64, 8); +PRIMITIF_TO(i8, 1); +PRIMITIF_TO(i16, 2); +PRIMITIF_TO(i32, 4); +PRIMITIF_TO(i64, 8); +PRIMITIF_TO(f32, 4); +PRIMITIF_TO(f64, 8); +PRIMITIF_TO(char, 1); +PRIMITIF_TO(bool, 1); +#undef PRIMITIF_TO + +void array_drop(Array a) { free(a.type); } + +void type_drop(TypeObject t) { + if (t.kind == TypeArray) { + array_drop(t.type.array); + } +} + +void struct_drop(StructObject s) { vec_drop(s.fields); } + +void message_drop(MessageObject m) { vec_drop(m.fields); } + +void messages_drop(MessagesObject m) { vec_drop(m.messages); } + +static Alignment max_alignment(Alignment a, Alignment b) { + if (a.value > b.value) { + return a; + } else { + return b; + } +} + +static char *attributes_to_string(Attributes attrs, bool and) { + uint32_t count = 0; + Attributes attributes[ATTRIBUTES_COUNT]; +#define handle(a) \ + if (attrs & Attr_##a) \ + attributes[count++] = Attr_##a; + handle(versioned); +#undef handle + CharVec res = vec_init(); + for (size_t i = 0; i < count; i++) { + if (i == 0) { + } else if (i < count - 1) { + vec_push_array(&res, ", ", 2); + } else if (and) { + vec_push_array(&res, " and ", 5); + } else { + vec_push_array(&res, " or ", 4); + } + +#define _case(x) \ + case Attr_##x: \ + vec_push_array(&res, #x, sizeof(#x) - 1); \ + break + switch (attributes[i]) { + _case(versioned); + default: + vec_push_array(&res, "(invalid attribute)", 19); + break; + } +#undef _case + } + vec_push(&res, '\0'); + return res.data; +} + +static inline EvalError err_duplicate_def(Span first, Span second, AstTag type, StringSlice ident) { + return (EvalError){ + .dup = {.tag = EETDuplicateDefinition, .first = first, .second = second, .type = type, .ident = ident} + }; +} + +static inline EvalError err_unknown(Span span, AstTag type, StringSlice ident) { + return (EvalError){ + .unk = {.tag = EETUnknown, .span = span, .type = type, .ident = ident} + }; +} + +static inline EvalError err_empty(Span span, AstTag type, StringSlice ident) { + return (EvalError){ + .empty = {.tag = EETEmptyType, .span = span, .type = type, .ident = ident} + }; +} + +void eval_error_report(Source *src, EvalError *err) { + switch (err->tag) { + case EETUnknown: { + EvalErrorUnknown unk = err->unk; + ReportSpan span = {.span = unk.span, .sev = ReportSeverityError}; + const char *type = ""; + switch (unk.type) { + case ATConstant: + type = "constant"; + break; + case ATAttribute: + type = "attribute"; + break; + default: + type = "identifier"; + break; + } + char *help = NULL; + if (unk.type == ATAttribute) { + char *attributes = attributes_to_string(~0, false); + help = msprintf("expected %s", attributes); + free(attributes); + } + source_report( + src, + unk.span.loc, + ReportSeverityError, + &span, + 1, + help, + "Unknown %s '%.*s'", + type, + unk.ident.len, + unk.ident.ptr + ); + if (help != NULL) { + free(help); + } + break; + } + case EETDuplicateDefinition: { + EvalErrorDuplicateDefinition dup = err->dup; + ReportSpan spans[] = { + {.span = dup.first, + .sev = ReportSeverityNote, + .message = msprintf("first definition of '%.*s' here", dup.ident.len, dup.ident.ptr)}, + {.span = dup.second, .sev = ReportSeverityError, .message = "redefined here"} + }; + const char *type = ""; + switch (dup.type) { + case ATConstant: + type = "constant"; + break; + case ATIdent: + type = "identifier"; + break; + case ATField: + type = "field"; + break; + default: + break; + } + source_report( + src, + dup.second.loc, + ReportSeverityError, + spans, + 2, + NULL, + "Duplicate definition of %s '%.*s'", + type, + dup.ident.len, + dup.ident.ptr + ); + free((char *)spans[0].message); + break; + } + case EETCycle: { + EvalErrorCycle cycle = err->cycle; + const char *type = ""; + if (cycle.type == ATConstant) { + type = "constant"; + } else if (cycle.type == ATTypeDecl) { + type = "type declaration"; + } + // Check if the spans are ordered + bool spans_ascending = true; + bool spans_descending = true; + for (size_t i = 1; i < cycle.spans.len; i++) { + int comp = span_compare(&cycle.spans.data[i - 1], &cycle.spans.data[i]); + spans_ascending = spans_ascending && comp >= 0; + spans_descending = spans_descending && comp <= 0; + if (!spans_ascending && !spans_descending) + break; + } + bool ordered = spans_ascending | spans_descending; + if (ordered) { + // If they are, we can print the info on a span each (less noisy output) + ReportSpanVec spans = vec_init(); + vec_grow(&spans, cycle.spans.len); + + for (size_t i = 0; i < cycle.spans.len; i++) { + ReportSeverity sev; + char *message; + StringSlice name = cycle.idents.data[i]; + StringSlice next_name = cycle.idents.data[(i + 1) % cycle.idents.len]; + + if (cycle.spans.len == 1) { // Special case for a constant equal to itself + sev = ReportSeverityError; + message = msprintf("%.*s requires evaluating itself", name.len, name.ptr); + } else if (i == 0) { // First equality + sev = ReportSeverityError; + message = msprintf("%.*s requires evaluating %.*s", name.len, name.ptr, next_name.len, next_name.ptr); + } else if (i < cycle.spans.len - 1) { + sev = ReportSeverityNote; + message = msprintf("... which requires %.*s ...", next_name.len, next_name.ptr); + } else { // Looparound + sev = ReportSeverityNote; + message = msprintf("... which again requires %.*s", next_name.len, next_name.ptr); + } + + vec_push(&spans, ((ReportSpan){.span = cycle.spans.data[i], .sev = sev, .message = message})); + } + + source_report( + src, + cycle.spans.data[0].loc, + ReportSeverityError, + spans.data, + spans.len, + NULL, + "cycle detected when evaluating %s '%.*s'", + type, + cycle.idents.data[0].len, + cycle.idents.data[0].ptr + ); + + for (size_t i = 0; i < spans.len; i++) { + free((char *)spans.data[i].message); + } + vec_drop(spans); + } else { + // If they aren't we have to use a report per span (because the lines are not ordered) + ReportSpan span; + + span.span = cycle.spans.data[0]; + span.sev = ReportSeverityError; + if (cycle.spans.len >= 2) { + span.message = NULL; + } else { + span.message = msprintf("%.*s requires evaluating itself", cycle.idents.data[0].len, cycle.idents.data[0].ptr); + } + + source_report( + src, + cycle.spans.data[0].loc, + ReportSeverityError, + &span, + 1, + NULL, + "cycle detected when evaluating %s '%.*s'", + type, + cycle.idents.data[0].len, + cycle.idents.data[0].ptr + ); + + if (span.message != NULL) { + free((char *)span.message); + } + + span.sev = ReportSeverityNote; + span.message = NULL; + for (size_t i = 1; i < cycle.idents.len; i++) { + span.span = cycle.spans.data[i]; + StringSlice name = cycle.idents.data[i]; + + if (i == cycle.idents.len - 1) { + span.message = + msprintf("which again requires evaluating %.*s", cycle.idents.data[0].len, cycle.idents.data[0].ptr); + } + source_report( + src, + span.span.loc, + ReportSeverityNote, + &span, + 1, + NULL, + "... which requires evaluating %.*s ...", + name.len, + name.ptr + ); + } + free((char *)span.message); + } + break; + } + case EETInfiniteStruct: { + EvalErrorInfiniteStruct infs = err->infs; + ReportSpanVec spans = vec_init(); + CharVec structs = vec_init(); + vec_grow(&spans, infs.fields.len + infs.structs.len); + + vec_push(&structs, '\''); + SpannedStringSlice last_struct = infs.structs.data[infs.structs.len - 1]; + vec_push_array(&structs, last_struct.slice.ptr, last_struct.span.len); + vec_push(&structs, '\''); + for (int i = infs.structs.len - 2; i >= 0; i--) { + if (i == 1) { + vec_push_array(&structs, " and ", 5); + } else { + vec_push_array(&structs, ", ", 2); + } + SpannedStringSlice s = infs.structs.data[i]; + vec_push(&structs, '\''); + vec_push_array(&structs, s.slice.ptr, s.slice.len); + vec_push(&structs, '\''); + } + vec_push(&structs, '\0'); + + for (int i = infs.structs.len - 1; i >= 0; i--) { + ReportSpan span[] = { + {.sev = ReportSeverityError, .message = NULL, .span = infs.structs.data[i].span}, + {.sev = ReportSeverityNote, .message = "recursive without limit", .span = infs.fields.data[i].span } + }; + vec_push_array(&spans, span, 2); + } + + source_report( + src, + infs.structs.data[0].span.loc, + ReportSeverityError, + spans.data, + spans.len, + "insert some limiting indirection ('[]', '&[]', or '&[^max size]') to break the cycle", + "recursive struct%s %s ha%s infinite size", + infs.structs.len > 1 ? "s" : "", + structs.data, + infs.structs.len > 1 ? "ve" : "s" + ); + vec_drop(spans); + vec_drop(structs); + break; + } + case EETEmptyType: { + EvalErrorEmptyType empty = err->empty; + char *type = ""; + if (empty.type == ATStruct) { + type = "struct"; + } else if (empty.type == ATMessage) { + type = "message"; + } + + ReportSpan span = {.span = empty.span, .sev = ReportSeverityError, .message = "zero sized types aren't allowed"}; + + source_report( + src, + empty.span.loc, + ReportSeverityError, + &span, + 1, + NULL, + "%s '%.*s' doesn't have any field", + type, + empty.ident.len, + empty.ident.ptr + ); + break; + } + } + fprintf(stderr, "\n"); +} + +void eval_error_drop(EvalError err) { + switch (err.tag) { + case EETCycle: + vec_drop(err.cycle.idents); + vec_drop(err.cycle.spans); + break; + case EETInfiniteStruct: + vec_drop(err.infs.structs); + vec_drop(err.infs.fields); + default: + break; + } +} + +static inline StringSlice string_slice_from_token(Token t) { return (StringSlice){.ptr = t.lexeme, .len = t.span.len}; } + +static SpannedStringSlice sss_from_token(Token t) { + return (SpannedStringSlice){.slice.ptr = t.lexeme, .slice.len = t.span.len, .span = t.span}; +} + +typedef struct { + Hashmap *constants; + Hashmap *typedefs; + Hashmap *layouts; + Hashmap *unresolved; + Hashmap *names; + PointerVec type_objects; + AstItemVec *items; + EvalErrorVec errors; + MessagesObjectVec messages; +} EvaluationContext; + +typedef struct { + StringSlice constant; + Token value; + Span name_span; + Span span; +} UnresolvedConstant; + +typedef struct { + StringSlice type; + AstNode value; + Span name_span; + Span span; +} UnresolvedTypeDef; + +typedef struct { + TypeObject *type; + StringSlice name; +} TypeName; + +impl_hashmap_delegate(unconst, UnresolvedConstant, string_slice, constant); +impl_hashmap_delegate(const, Constant, string_slice, name); +impl_hashmap_delegate(untypd, UnresolvedTypeDef, string_slice, type); +impl_hashmap_delegate(typedef, TypeDef, string_slice, name); +impl_hashmap( + layout, Layout, { return hash(state, (byte *)&v->type, sizeof(TypeObject *)); }, { return a->type == b->type; } +); +impl_hashmap( + typename, TypeName, { return hash(state, (byte *)&v->type, sizeof(TypeObject *)); }, { return a->type == b->type; } +); + +static uint64_t get_ast_number_value(EvaluationContext *ctx, AstNumber number) { + if (number.token.type == Number) { + return number.token.lit; + } else { // The token is an Ident + StringSlice ident = string_slice_from_token(number.token); + Constant *c = hashmap_get(ctx->constants, &(Constant){.name = ident}); + if (c != NULL) { + // If the constant is invalid we make up a value to continue checking for errors + // (Since it is invalid there already has been at least one and we know this code + // can't go to the next stage) + return c->valid ? c->valid : 0; + } else { + // This constant doesn't exist: raise an error and return dummy value to continue + vec_push(&ctx->errors, err_unknown(number.token.span, ATConstant, ident)); + return 0; + } + } +} + +static Sizing ast_size_to_sizing(EvaluationContext *ctx, AstSize size, uint64_t *value) { + if (size.tag == ATMaxSize) { + *value = get_ast_number_value(ctx, size.value); + return SizingMax; + } else if (size.tag == ATFixedSize) { + *value = get_ast_number_value(ctx, size.value); + return SizingFixed; + } else { + *value = UINT16_MAX; + return SizingMax; + } +} + +static void _type_print(Hashmap *type_set, TypeObject *type) { + if (type == NULL) { + fprintf(stderr, "(invalid)"); + return; + } + + if (hashmap_set(type_set, &type)) { + if (type->kind == TypeStruct) { + fprintf(stderr, "%.*s", type->type.struct_.name.len, type->type.struct_.name.ptr); + } else { + fprintf(stderr, "(recursion)"); + } + return; + }; + + if (type->kind == TypePrimitif) { +#define _case(t) \ + case Primitif_##t: \ + fprintf(stderr, #t); \ + break + switch (type->type.primitif) { + _case(u8); + _case(u16); + _case(u32); + _case(u64); + _case(i8); + _case(i16); + _case(i32); + _case(i64); + _case(f32); + _case(f64); + _case(char); + _case(bool); + } +#undef _case + } else if (type->kind == TypeArray) { + _type_print(type_set, (TypeObject *)type->type.array.type); + if (type->type.array.heap) + fprintf(stderr, "&"); + if (type->type.array.sizing == SizingFixed) + fprintf(stderr, "[%lu]", type->type.array.size); + else if (type->type.array.sizing == SizingMax) + fprintf(stderr, "[^%lu]", type->type.array.size); + else + fprintf(stderr, "[]"); + } else { + StructObject s = *(StructObject *)&type->type.struct_; + fprintf(stderr, "{ "); + for (size_t i = 0; i < s.fields.len; i++) { + fprintf(stderr, "%.*s: ", s.fields.data[i].name.len, s.fields.data[i].name.ptr); + _type_print(type_set, s.fields.data[i].type); + if (i < s.fields.len - 1) { + fprintf(stderr, ", "); + } + } + fprintf(stderr, " }"); + } +} + +__attribute__((unused)) static void type_print(TypeObject *type) { + Hashmap *type_set = hashmap_init(pointer_hash, pointer_equal, NULL, sizeof(TypeObject *)); + _type_print(type_set, type); + hashmap_drop(type_set); +} + +static TypeObject *resolve_type(EvaluationContext *ctx, SpannedStringSlice name); + +static TypeObject *ast_type_to_type_obj(EvaluationContext *ctx, AstType type) { + if (type.tag == ATHeapArray || type.tag == ATFieldArray) { + TypeObject *res = malloc(sizeof(TypeObject)); + assert_alloc(res); + vec_push(&ctx->type_objects, res); + res->kind = TypeArray; + res->type.array.heap = type.tag == ATHeapArray; + res->type.array.sizing = ast_size_to_sizing(ctx, type.array.size, &res->type.array.size); + res->type.array.type = (struct TypeObject *)ast_type_to_type_obj(ctx, *(AstType *)type.array.type); + res->align.value = 0; + return res; + } else { // Otherwise the type is an identifier + return resolve_type(ctx, sss_from_token(type.ident.token)); + } +} + +static TypeObject *resolve_type(EvaluationContext *ctx, SpannedStringSlice name) { + TypeDef *type_def = hashmap_get(ctx->typedefs, &(TypeDef){.name = name.slice}); + if (type_def != NULL) { // Type is already resolved + return type_def->value; + } + + // Type isn't defined anywhere + if (ctx->unresolved == NULL || !hashmap_has(ctx->unresolved, &(UnresolvedTypeDef){.type = name.slice})) { + vec_push(&ctx->errors, err_unknown(name.span, ATIdent, name.slice)); + return NULL; + } + + UnresolvedTypeDef *untd = hashmap_get(ctx->unresolved, &(UnresolvedTypeDef){.type = name.slice}); + + if (untd->value.tag == ATIdent || untd->value.tag == ATFieldArray || untd->value.tag == ATHeapArray) { + hashmap_set(ctx->typedefs, &(TypeDef){.name = name.slice, .value = NULL}); + TypeObject *value = ast_type_to_type_obj(ctx, *(AstType *)&untd->value); + hashmap_set(ctx->typedefs, &(TypeDef){.name = name.slice, .value = value}); + return value; + } else { // Otherwise the value is a struct + AstStruct str = untd->value.struct_; + TypeObject *value = malloc(sizeof(TypeObject)); + { + FieldVec fields = vec_init(); + vec_grow(&fields, str.fields.len); + assert_alloc(value); + vec_push(&ctx->type_objects, value); + value->kind = TypeStruct; + value->type.struct_.fields = *(AnyVec *)&fields; + value->type.struct_.name = name.slice; + value->align.value = 0; + hashmap_set(ctx->typedefs, &(TypeDef){.name = name.slice, .value = value}); + } + StructObject *stro = (StructObject *)&value->type.struct_; + + for (size_t i = 0; i < str.fields.len; i++) { + Field f; + f.name = string_slice_from_token(str.fields.data[i].name); + f.name_span = str.fields.data[i].name.span; + f.type = ast_type_to_type_obj(ctx, str.fields.data[i].type); + vec_push(&stro->fields, f); + } + + return value; + } +} + +// Check struct object for direct recursion, returns true if the struct contains a reference to rec somewhere +static bool check_for_recursion( + EvaluationContext *ctx, EvalErrorInfiniteStruct *err, Hashmap *checked, Hashmap *invalids, TypeObject *rec, StructObject *str +) { + // Shortcircuit if we already checked this struct + // This also avoids running into recursion + // (In the case of invalids there already has been an error, so we don't produce another) + if (hashmap_set(checked, &str) || hashmap_has(invalids, &str)) { + return false; + } + + for (size_t i = 0; i < str->fields.len; i++) { + Field f = str->fields.data[i]; + + TypeObject *type = f.type; + if (type == NULL) + continue; + + // Non heap arrays work very much the same as regular fields, Fixed size array as well (with added indirection) + while (type->kind == TypeArray && (!type->type.array.heap || type->type.array.sizing == SizingFixed)) { + type = type->type.array.type; + } + + // Anything else won't recurse: primitives can't, and heap arrays add indirection + // (Field arrays have been eliminated above) + if (type->kind != TypeStruct) { + continue; + } + + // If we got here the type is a struct + StructObject *obj = (StructObject *)&type->type.struct_; + + if (type == rec || check_for_recursion(ctx, err, checked, invalids, rec, obj)) { + // The struct contains rec + + UnresolvedTypeDef *unr = hashmap_get(ctx->unresolved, &(UnresolvedTypeDef){.type = str->name}); + SpannedStringSlice struct_ = {.slice = unr->type, .span = unr->name_span}; + AstField af = unr->value.struct_.fields.data[i]; + // af can be either ATFieldArray or ATIdent + while (af.type.tag == ATFieldArray || af.type.tag == ATHeapArray) { + af.type = *(AstType *)af.type.array.type; + } + SpannedStringSlice field = sss_from_token(af.type.ident.token); + + vec_push(&err->structs, struct_); + vec_push(&err->fields, field); + + hashmap_set(invalids, &str); + + return true; + } + } + + return false; +} + +static Alignment resolve_alignment(TypeObject *type, Hashmap *seen) { + // Check if the type has already been resolved + if (type->align.value != 0) { + return type->align; + } + + // Avoid cycles: if we already have seen this type (but not resolved), no need to check it again + // (since we're computing the max) + if (hashmap_set(seen, &type)) { + return ALIGN_1; + } + + if (type->kind == TypeStruct) { + Alignment res = ALIGN_1; + StructObject *s = (StructObject *)&type->type.struct_; + for (size_t i = 0; i < s->fields.len; i++) { + res = max_alignment(res, resolve_alignment(s->fields.data[i].type, seen)); + } + return res; + } + + // Type is type array (since primitive already have an alignment) + debug_assert(type->kind == TypeArray, ""); + + if (type->type.array.sizing == SizingMax) { + uint64_t size = type->type.array.size; + Alignment res; + if (size <= UINT8_MAX) { + res = ALIGN_1; + } else if (size <= UINT16_MAX) { + res = ALIGN_2; + } else if (size <= UINT32_MAX) { + res = ALIGN_4; + } else { + res = ALIGN_8; + } + res = max_alignment(res, resolve_alignment(type->type.array.type, seen)); + return res; + } + + // Type is fixed size array + return resolve_alignment(type->type.array.type, seen); +} + +void field_accessor_drop(FieldAccessor fa) { vec_drop(fa.indices); } +FieldAccessor field_accessor_clone(FieldAccessor *fa) { + return (FieldAccessor){.type = fa->type, .size = fa->size, .indices = vec_clone(&fa->indices)}; +} + +void layout_drop(void *l) { vec_drop(((Layout *)l)->fields); } + +static void add_fields(FieldAccessorVec *v, TypeObject *t, const uint64_t *base, size_t len) { + if (t->kind == TypePrimitif) { + FieldAccessor fa = {.indices = vec_init()}; +#define _case(typ, n) \ + case Primitif_##typ: \ + fa.size = n; \ + fa.type = (TypeObject *)&PRIMITIF_##typ; \ + break; + switch (t->type.primitif) { + _case(bool, 1); + _case(char, 1); + _case(i8, 1); + _case(u8, 1); + _case(i16, 2); + _case(u16, 2); + _case(i32, 4); + _case(u32, 4); + _case(f32, 4); + _case(i64, 8); + _case(u64, 8); + _case(f64, 8); + } +#undef _case + vec_push_array(&fa.indices, base, len); + vec_push(v, fa); + } else if (t->kind == TypeStruct) { + StructObject *s = (StructObject *)&t->type.struct_; + UInt64Vec new_base = vec_init(); + vec_grow(&new_base, len + 1); + vec_push_array(&new_base, base, len); + vec_push(&new_base, 0); + for (size_t i = 0; i < s->fields.len; i++) { + new_base.data[len] = i; + add_fields(v, s->fields.data[i].type, new_base.data, new_base.len); + } + vec_drop(new_base); + } else { // Type is array + if (t->type.array.sizing == SizingMax) { + FieldAccessor fa = {.indices = vec_init()}; + FieldAccessor fl = {.indices = vec_init()}; + vec_grow(&fa.indices, len + 1); + vec_grow(&fl.indices, len + 1); + vec_push_array(&fa.indices, base, len); + vec_push_array(&fl.indices, base, len); + vec_push(&fa.indices, 1); + vec_push(&fl.indices, 0); + + fa.size = 0; + fa.type = t->type.array.type; + + uint64_t size = t->type.array.size; + if (size <= UINT8_MAX) { + fl.size = 1; + fl.type = (TypeObject *)&PRIMITIF_u8; + } else if (size <= UINT16_MAX) { + fl.size = 2; + fl.type = (TypeObject *)&PRIMITIF_u16; + } else if (size <= UINT32_MAX) { + fl.size = 4; + fl.type = (TypeObject *)&PRIMITIF_u32; + } else { + fl.size = 8; + fl.type = (TypeObject *)&PRIMITIF_u64; + } + + vec_push(v, fa); + vec_push(v, fl); + } else { + UInt64Vec new_base = vec_init(); + vec_grow(&new_base, len + 1); + vec_push_array(&new_base, base, len); + vec_push(&new_base, 0); + for (size_t i = 0; i < t->type.array.size; i++) { + new_base.data[len] = i; + add_fields(v, t->type.array.type, new_base.data, new_base.len); + } + vec_drop(new_base); + } + } +} + +static int fa_compare(const void *a, const void *b) { + const FieldAccessor *fa = (const FieldAccessor *)a; + const FieldAccessor *fb = (const FieldAccessor *)b; + if (fb->size != 0 && fa->size == 0) + return 1; + if (fa->size != 0 && fb->size == 0) + return -1; + return (int)fb->type->align.value - (int)fa->type->align.value; +} + +Layout type_layout(TypeObject *type) { + Layout l = {.type = type, .fields = vec_init()}; + add_fields(&l.fields, type, NULL, 0); + qsort(l.fields.data, l.fields.len, sizeof(FieldAccessor), fa_compare); + return l; +} + +static void resolve_types(EvaluationContext *ctx) { + AstItemVec *items = ctx->items; + Hashmap *untypds = hashmap_init(untypd_hash, untypd_equal, NULL, sizeof(UnresolvedTypeDef)); + + ctx->unresolved = untypds; + + // Get the unresolved type definitions in the map and report duplicates + for (int i = 0; i < items->len; i++) { + if (items->data[i].tag != ATStruct && items->data[i].tag != ATTypeDecl) { + continue; + } + + UnresolvedTypeDef td; + if (items->data[i].tag == ATTypeDecl) { + AstTypeDecl t = items->data[i].type_decl; + td.type = string_slice_from_token(t.name); + td.span = t.span; + td.name_span = t.name.span; + td.value.type = t.value; + } else { + AstStruct s = items->data[i].struct_; + td.type = string_slice_from_token(s.ident); + td.span = s.span; + td.name_span = s.ident.span; + td.value.struct_ = s; + + if (s.fields.len == 0) { + vec_push(&ctx->errors, err_empty(s.ident.span, ATStruct, td.type)); + } + } + + UnresolvedTypeDef *original = hashmap_get(untypds, &td); + if (original != NULL) { + vec_push(&ctx->errors, err_duplicate_def(original->name_span, td.name_span, ATIdent, original->type)); + vec_take(items, i); + i--; + // Update value to last definition + hashmap_set(untypds, &td); + } else { + hashmap_set(untypds, &td); + } + } + + // Check for type declarations cycles / and resolve type declarations (give them a value) + for (int i = 0; i < items->len; i++) { + if (items->data[i].tag != ATTypeDecl) { + continue; + } + + hashmap_clear(ctx->names); + + AstTypeDecl td = items->data[i].type_decl; + StringSlice name = string_slice_from_token(td.name); + hashmap_set(ctx->names, &name); + bool valid = true; + AstType value = td.value; + + SpanVec spans = vec_init(); + StringSliceVec idents = vec_init(); + vec_push(&spans, td.span); + vec_push(&idents, name); + while (true) { + // Skip indirections + while (value.tag == ATFieldArray || value.tag == ATHeapArray) { + value = *(AstType *)value.array.type; + } + // Value is now an AstIdent. + SpannedStringSlice next = sss_from_token(value.ident.token); + + if (hashmap_set(ctx->names, &next.slice)) { + // We evaluate to a type we've already visited: cycle + + size_t index; + // Loop over idents (members of the cycle), set them as invalid and find the index + // of the first member of the cycle, the members before aren't actually part of it: + // A = B, B = C, C = B, A isn't part of the cycle (B <-> C) and shouldn't be reported + // (but is invalid) + for (size_t i = 0; i < idents.len; i++) { + if (string_slice_equal(&idents.data[i], &next.slice)) { + index = i; + } + + hashmap_set(ctx->typedefs, &(TypeDef){.name = idents.data[i], .value = NULL}); + } + + vec_splice(&spans, 0, index); + vec_splice(&idents, 0, index); + + EvalErrorCycle cycle; + cycle.tag = EETCycle; + cycle.type = ATTypeDecl; + cycle.spans = spans; + cycle.idents = idents; + // reinitialize the vectors to be dropped at the end. + // vec_init doesn't do any allocation so this is free + spans = (SpanVec)vec_init(); + idents = (StringSliceVec)vec_init(); + + EvalError err = {.cycle = cycle}; + + vec_push(&ctx->errors, err); + break; + } + + TypeDef *resolved = hashmap_get(ctx->typedefs, &(TypeDef){.name = next.slice}); + if (resolved != NULL) { + // The type declaration evaluates to a resolved type (a primitif type, or an invalid type) + if (resolved->value == NULL) { + // the type it evaluates to is invalid, so it is too + valid = false; + break; + } + // The type it evaluates to is valid: the type declaration doesn't contain any cycle + break; + } + + UnresolvedTypeDef *unr = hashmap_get(untypds, &(UnresolvedTypeDef){.type = next.slice}); + if (unr == NULL) { // The type evaluates to an unknown identifier + // Report error and set as invalid + vec_push(&ctx->errors, err_unknown(next.span, ATIdent, next.slice)); + valid = false; + break; + } + + if (unr->value.tag == ATStruct) { + // The type declarations evaluates to an (unresolved) struct: it can't cycle + break; + } else { + // The type declarations evaluates to another type declarations: we continue checking + vec_push(&spans, unr->span); + vec_push(&idents, next.slice); + value = unr->value.type; + } + } + + vec_drop(spans); + vec_drop(idents); + + if (!valid) { + // Set invalid + hashmap_set(ctx->typedefs, &(TypeDef){.name = name, .value = NULL}); + } + } + + hashmap_clear(ctx->names); + + // Resolves types (this accepts recursive types) + for (int i = 0; i < items->len; i++) { + if (items->data[i].tag != ATStruct && items->data[i].tag != ATTypeDecl) { + continue; + } + + SpannedStringSlice name; + if (items->data[i].tag == ATStruct) { + name = sss_from_token(items->data[i].struct_.ident); + } else { + name = sss_from_token(items->data[i].type_decl.name); + } + + resolve_type(ctx, name); + } + + Hashmap *checked = hashmap_init(pointer_hash, pointer_equal, NULL, sizeof(StructObject *)); + Hashmap *invalids = hashmap_init(pointer_hash, pointer_equal, NULL, sizeof(StructObject *)); + // Check for recursive types without indirections (infinite size) + for (int i = 0; i < items->len; i++) { + // TypeDecl can't be recursive + if (items->data[i].tag != ATStruct) { + continue; + } + + TypeDef *td = hashmap_get(ctx->typedefs, &(TypeDef){.name = string_slice_from_token(items->data[i].struct_.ident)}); + TypeObject *start = td->value; + StructObject *str = (StructObject *)&start->type.struct_; + + EvalErrorInfiniteStruct err = {.tag = EETInfiniteStruct, .fields = vec_init(), .structs = vec_init()}; + if (check_for_recursion(ctx, &err, checked, invalids, start, str)) { + EvalError e = {.infs = err}; + vec_push(&ctx->errors, e); + }; + hashmap_clear(checked); + } + + // Check structs for duplicate fields + Hashmap *names = hashmap_init(sss_hash, sss_equal, NULL, sizeof(SpannedStringSlice)); + for (int i = 0; i < items->len; i++) { + if (items->data[i].tag != ATStruct) { + continue; + } + + TypeDef *td = hashmap_get(ctx->typedefs, &(TypeDef){.name = string_slice_from_token(items->data[i].struct_.ident)}); + StructObject *str = (StructObject *)&td->value->type.struct_; + for (size_t i = 0; i < str->fields.len; i++) { + Field f = str->fields.data[i]; + SpannedStringSlice *prev = hashmap_get(names, &(SpannedStringSlice){.slice = f.name}); + if (prev != NULL) { + vec_push(&ctx->errors, err_duplicate_def(prev->span, f.name_span, ATField, f.name)); + continue; + } + hashmap_set(names, &(SpannedStringSlice){.slice = f.name, .span = f.name_span}); + } + hashmap_clear(names); + } + hashmap_drop(names); + + hashmap_drop(checked); + hashmap_drop(invalids); + hashmap_drop(untypds); + ctx->unresolved = NULL; +} + +static void resolve_constants(EvaluationContext *ctx) { + AstItemVec *items = ctx->items; + Hashmap *unconsts = hashmap_init(unconst_hash, unconst_equal, NULL, sizeof(UnresolvedConstant)); + Hashmap *names = ctx->names; + Hashmap *constants = ctx->constants; + + // Load unresolved constants into map (and check for duplicates) + for (int i = 0; i < items->len; i++) { + if (items->data[i].tag != ATConstant) { + continue; + } + + AstConstant c = items->data[i].constant; + UnresolvedConstant constant = + {.constant = string_slice_from_token(c.name), .name_span = c.name.span, .span = c.span, .value = c.value.token}; + UnresolvedConstant *original = hashmap_get(unconsts, &constant); + + if (original != NULL) { + vec_push(&ctx->errors, err_duplicate_def(original->name_span, constant.name_span, ATConstant, original->constant)); + vec_take(items, i); + i--; + // Update value to last + hashmap_set(unconsts, &constant); + } else { + hashmap_set(unconsts, &constant); + } + } + + for (size_t i = 0; i < items->len; i++) { + if (items->data[i].tag != ATConstant) { + continue; + } + + UnresolvedConstant *unc = + hashmap_get(unconsts, &(UnresolvedConstant){.constant = string_slice_from_token(items->data[i].constant.name)}); + hashmap_clear(names); + hashmap_set(names, &unc->constant); + Token value = unc->value; + while (value.type == Ident) { + StringSlice ident = string_slice_from_token(value); + Constant *resolved = hashmap_get(constants, &(Constant){.name = ident}); + // If the constant is set to another that is already resolved + if (resolved != NULL) { + if (!resolved->valid) { + // If the constant is invalid, break here, we know we won't be resolving this + break; + } + // We expect a token out of this loop, but we don't have one here, so we make one up that works + // only value.lit and value.type are read + value.type = Number; + value.lit = resolved->value; + break; + } + + if (hashmap_has(names, &ident)) { // Cycle detected on ident + EvalErrorCycle cycle; + cycle.tag = EETCycle; + cycle.type = ATConstant; + cycle.spans = (SpanVec)vec_init(); + cycle.idents = (StringSliceVec)vec_init(); + + // Walk the cycle again, keeping track of the spans, and marking every member + // as invalid + UnresolvedConstant *start = hashmap_get(unconsts, &(UnresolvedConstant){.constant = ident}); + UnresolvedConstant *cur = start; + do { + vec_push(&cycle.spans, cur->span); + vec_push(&cycle.idents, cur->constant); + hashmap_set(constants, &(Constant){.name = cur->constant, .value = 0, .valid = false}); + cur = hashmap_get(unconsts, &(UnresolvedConstant){.constant = string_slice_from_token(cur->value)}); + } while (cur != start); + + EvalError err = {.cycle = cycle}; + + vec_push(&ctx->errors, err); + break; + } + + // Get the constant the current is set to + UnresolvedConstant *c = hashmap_get(unconsts, &(UnresolvedConstant){.constant = ident}); + if (c == NULL) { // Constant doesn't exist + // throw error and mark invalid + vec_push(&ctx->errors, err_unknown(unc->value.span, ATConstant, ident)); + break; + } + + hashmap_set(names, &ident); + value = c->value; + } + + if (value.type == Ident) { // Constant couldn't be resolved + hashmap_set(constants, &(Constant){.name = unc->constant, .value = 0, .valid = false}); + } else { + hashmap_set(constants, &(Constant){.name = unc->constant, .value = value.lit, .valid = true}); + } + } + + hashmap_drop(unconsts); + hashmap_clear(names); +} + +static void resolve_messages(EvaluationContext *ctx) { + AstItemVec *items = ctx->items; + Hashmap *names = hashmap_init(sss_hash, sss_equal, NULL, sizeof(SpannedStringSlice)); + Hashmap *field_names = hashmap_init(sss_hash, sss_equal, NULL, sizeof(SpannedStringSlice)); + + ctx->messages = (MessagesObjectVec)vec_init(); + uint64_t version = ~0; + for (size_t i = 0; i < items->len; i++) { + if (items->data[i].tag == ATVersion) { + AstVersion v = items->data[i].version; + version = get_ast_number_value(ctx, v.version); + continue; + } + if (items->data[i].tag != ATMessages) { + continue; + } + AstMessages m = items->data[i].messages; + SpannedStringSlice name = sss_from_token(m.name); + Attributes attrs = AttrNone; + + MessagesObject res; + res.name = name.slice; + res.messages = (MessageObjectVec)vec_init(); + res.version = version; + + SpannedStringSlice *prev_name = hashmap_get(names, &name); + if (prev_name != NULL) { + vec_push(&ctx->errors, err_duplicate_def(prev_name->span, name.span, ATIdent, name.slice)); + } else { + hashmap_set(names, &name); + } + + for (size_t j = 0; j < m.children.len; j++) { + if (m.children.data[j].tag == ATAttribute) { + AstAttribute attr = m.children.data[j].attribute; + const char *a = attr.ident.lexeme; + uint32_t len = attr.ident.span.len; +#define _case(x) \ + if (strncmp(#x, a, sizeof(#x) - 1 > len ? sizeof(#x) - 1 : len) == 0) { \ + attrs |= Attr_##x; \ + continue; \ + } + _case(versioned); + + // If we get to here none of the above matched + vec_push(&ctx->errors, err_unknown(attr.ident.span, ATAttribute, string_slice_from_token(attr.ident))); +#undef _case + } else { + AstMessage msg = m.children.data[j].message; + + SpannedStringSlice name = sss_from_token(msg.ident); + + SpannedStringSlice *prev_name = hashmap_get(names, &name); + if (prev_name != NULL) { + vec_push(&ctx->errors, err_duplicate_def(prev_name->span, name.span, ATIdent, name.slice)); + } else { + hashmap_set(names, &name); + } + + MessageObject message; + message.name = name.slice; + message.attributes = attrs; + message.fields = (FieldVec)vec_init(); + vec_grow(&message.fields, msg.fields.len); + + if (msg.fields.len == 0) { + vec_push(&ctx->errors, err_empty(msg.ident.span, ATMessage, message.name)); + } + for (size_t k = 0; k < msg.fields.len; k++) { + Field f; + f.name = string_slice_from_token(msg.fields.data[k].name); + f.name_span = msg.fields.data[k].name.span; + f.type = ast_type_to_type_obj(ctx, msg.fields.data[k].type); + vec_push(&message.fields, f); + + SpannedStringSlice *prev = hashmap_get(field_names, &(SpannedStringSlice){.slice = f.name}); + if (prev != NULL) { + vec_push(&ctx->errors, err_duplicate_def(prev->span, f.name_span, ATField, f.name)); + continue; + } + hashmap_set(field_names, &(SpannedStringSlice){.slice = f.name, .span = f.name_span}); + } + + hashmap_clear(field_names); + + vec_push(&res.messages, message); + + // Reset attributes after a message + attrs = AttrNone; + } + } + + vec_push(&ctx->messages, res); + version = ~0; + } + + hashmap_drop(names); + hashmap_drop(field_names); +} + +void resolve_additional_type_info(EvaluationContext *ctx) { + // Resolve alignment of all living type objects + Hashmap *seen = hashmap_init(pointer_hash, pointer_equal, NULL, sizeof(TypeObject *)); + for (size_t i = 0; i < ctx->type_objects.len; i++) { + ((TypeObject *)ctx->type_objects.data[i])->align = resolve_alignment(ctx->type_objects.data[i], seen); + hashmap_clear(seen); + } + + // Compute type layouts + Hashmap *layouts = hashmap_init(layout_hash, layout_equal, layout_drop, sizeof(Layout)); + for (size_t i = 0; i < ctx->type_objects.len; i++) { + Layout l = type_layout(ctx->type_objects.data[i]); + hashmap_set(layouts, &l); + } +#define _case(x) \ + { \ + Layout l = type_layout((TypeObject *)&PRIMITIF_##x); \ + hashmap_set(layouts, &l); \ + } + _case(u8); + _case(u16); + _case(u32); + _case(u64); + _case(i8); + _case(i16); + _case(i32); + _case(i64); + _case(char); + _case(bool); +#undef _case + + ctx->layouts = layouts; + + hashmap_drop(seen); +} + +void program_drop(Program p) { + for (size_t i = 0; i < p.type_objects.len; i++) { + TypeObject *ptr = p.type_objects.data[i]; + if (ptr->kind == TypeStruct) { + StructObject *str = (StructObject *)&ptr->type.struct_; + vec_drop(str->fields); + } + free(ptr); + } + vec_drop(p.type_objects); + + hashmap_drop(p.typedefs); + hashmap_drop(p.layouts); + vec_drop(p.messages); +} + +// Resolve statics of an AST (constants and type declarations); +EvaluationResult resolve_statics(AstContext *ctx) { + EvaluationContext ectx; + // resolved constants: value is a number, and the constant may be invalid + ectx.constants = hashmap_init(const_hash, const_equal, NULL, sizeof(Constant)); + ectx.typedefs = hashmap_init(typedef_hash, typedef_equal, NULL, sizeof(TypeDef)); + + // Set of names used to check for cycles + ectx.names = hashmap_init(string_slice_hash, string_slice_equal, NULL, sizeof(StringSlice)); + ectx.unresolved = NULL; + ectx.items = &ctx->root->items.items; + ectx.errors = (EvalErrorVec)vec_init(); + ectx.type_objects = (PointerVec)vec_init(); + + { +#define add_prim(type_name, type_size) \ + do { \ + hashmap_set( \ + ectx.typedefs, \ + &(TypeDef){.name.ptr = #type_name, .name.len = sizeof(#type_name) - 1, .value = (TypeObject *)&PRIMITIF_##type_name} \ + ); \ + } while (0) + add_prim(u8, 1); + add_prim(u16, 2); + add_prim(u32, 4); + add_prim(u64, 8); + add_prim(i8, 1); + add_prim(i16, 2); + add_prim(i32, 4); + add_prim(i64, 8); + add_prim(f32, 4); + add_prim(f64, 8); + add_prim(char, 1); + add_prim(bool, 1); +#undef add_prim + } + + resolve_constants(&ectx); + resolve_types(&ectx); + resolve_messages(&ectx); + resolve_additional_type_info(&ectx); + + hashmap_drop(ectx.names); + hashmap_drop(ectx.constants); + + Program p; + p.typedefs = ectx.typedefs; + p.layouts = ectx.layouts; + p.type_objects = ectx.type_objects; + p.messages = ectx.messages; + + return (EvaluationResult){.program = p, .errors = ectx.errors}; +} diff --git a/ser/eval.h b/ser/eval.h new file mode 100644 index 0000000..0fbd3ce --- /dev/null +++ b/ser/eval.h @@ -0,0 +1,251 @@ +#ifndef EVAL_H +#define EVAL_H +#include "ast.h" +#include "source.h" +#include "utils.h" +#include "vector_impl.h" + +#include +#include + +#define _ALIGN_1 \ + { .po2 = 0, .mask = 0, .value = 1 } +#define _ALIGN_2 \ + { .po2 = 1, .mask = 1, .value = 2 } +#define _ALIGN_4 \ + { .po2 = 2, .mask = 3, .value = 4 } +#define _ALIGN_8 \ + { .po2 = 3, .mask = 7, .value = 8 } + +#define ALIGN_1 ((Alignment)_ALIGN_1) +#define ALIGN_2 ((Alignment)_ALIGN_2) +#define ALIGN_4 ((Alignment)_ALIGN_4) +#define ALIGN_8 ((Alignment)_ALIGN_8) + +typedef struct { + uint8_t po2; + uint8_t mask; + uint8_t value; +} Alignment; + +static inline uint32_t align(uint32_t v, Alignment align) { return (((v - 1) >> align.po2) + 1) << align.po2; } + +typedef enum { + SizingMax, + SizingFixed, +} Sizing; + +typedef enum { + Primitif_u8, + Primitif_u16, + Primitif_u32, + Primitif_u64, + Primitif_i8, + Primitif_i16, + Primitif_i32, + Primitif_i64, + Primitif_f32, + Primitif_f64, + Primitif_char, + Primitif_bool, +} PrimitifType; + +typedef struct { + Sizing sizing; + uint64_t size; + bool heap; + struct TypeObject *type; +} Array; + +void array_drop(Array a); + +typedef enum { + TypeArray, + TypePrimitif, + TypeStruct, +} TypeKind; + +// Definition of StructObject used by TypeUnion +// Must match with later StructObject +struct StructObject { + StringSlice name; + AnyVec fields; +}; + +typedef union { + Array array; + PrimitifType primitif; + struct StructObject struct_; +} TypeUnion; + +typedef struct TypeObject { + TypeKind kind; + Alignment align; + TypeUnion type; +} TypeObject; + +void type_drop(TypeObject t); + +typedef struct { + StringSlice name; + Span name_span; + TypeObject *type; +} Field; + +VECTOR_IMPL(Field, FieldVec, field); + +typedef struct { + StringSlice name; + FieldVec fields; +} StructObject; + +void struct_drop(StructObject s); + +typedef struct { + StringSlice name; + TypeObject *value; +} TypeDef; + +void type_decl_drop(TypeDef t); + +typedef enum : uint32_t { + AttrNone = 0, + Attr_versioned = 1 << 0, +} Attributes; + +static const uint32_t ATTRIBUTES_COUNT = 1; + +typedef struct { + StringSlice name; + FieldVec fields; + Attributes attributes; +} MessageObject; + +void message_drop(MessageObject msg); + +VECTOR_IMPL(MessageObject, MessageObjectVec, message_object, message_drop); + +typedef struct { + StringSlice name; + MessageObjectVec messages; + uint64_t version; +} MessagesObject; + +void messages_drop(MessagesObject msg); + +VECTOR_IMPL(MessagesObject, MessagesObjectVec, messages_object, messages_drop); + +typedef struct { + StringSlice name; + bool valid; + uint64_t value; +} Constant; + +typedef struct { + UInt64Vec indices; + // Size of the field, or 0 if it isn't constant + uint64_t size; + TypeObject *type; +} FieldAccessor; + +void field_accessor_drop(FieldAccessor fa); +FieldAccessor field_accessor_clone(FieldAccessor *fa); + +VECTOR_IMPL(FieldAccessor, FieldAccessorVec, field_accessor, field_accessor_drop); + +typedef struct { + FieldAccessorVec fields; + TypeObject *type; +} Layout; + +Layout type_layout(TypeObject *to); + +void layout_drop(void *l); + +typedef struct { + Hashmap *typedefs; + Hashmap *layouts; + MessagesObjectVec messages; + PointerVec type_objects; +} Program; + +void program_drop(Program p); + +typedef enum { + EETDuplicateDefinition, + EETUnknown, + EETCycle, + EETInfiniteStruct, + EETEmptyType, +} EvalErrorTag; + +typedef struct { + EvalErrorTag tag; + Span first; + Span second; + StringSlice ident; + AstTag type; +} EvalErrorDuplicateDefinition; + +typedef struct { + EvalErrorTag tag; + Span span; + StringSlice ident; + AstTag type; +} EvalErrorUnknown; + +typedef struct { + EvalErrorTag tag; + SpanVec spans; + StringSliceVec idents; + AstTag type; +} EvalErrorCycle; + +typedef struct { + EvalErrorTag tag; + SpannedStringSliceVec structs; + SpannedStringSliceVec fields; +} EvalErrorInfiniteStruct; + +typedef struct { + EvalErrorTag tag; + Span span; + StringSlice ident; + AstTag type; +} EvalErrorEmptyType; + +typedef union { + EvalErrorTag tag; + EvalErrorDuplicateDefinition dup; + EvalErrorUnknown unk; + EvalErrorCycle cycle; + EvalErrorInfiniteStruct infs; + EvalErrorEmptyType empty; +} EvalError; + +void eval_error_drop(EvalError err); +void eval_error_report(Source *src, EvalError *err); + +VECTOR_IMPL(EvalError, EvalErrorVec, eval_error, eval_error_drop); + +typedef struct { + EvalErrorVec errors; + Program program; +} EvaluationResult; + +EvaluationResult resolve_statics(AstContext *ctx); + +extern const TypeObject PRIMITIF_u8; +extern const TypeObject PRIMITIF_u16; +extern const TypeObject PRIMITIF_u32; +extern const TypeObject PRIMITIF_u64; +extern const TypeObject PRIMITIF_i8; +extern const TypeObject PRIMITIF_i16; +extern const TypeObject PRIMITIF_i32; +extern const TypeObject PRIMITIF_i64; +extern const TypeObject PRIMITIF_f32; +extern const TypeObject PRIMITIF_f64; +extern const TypeObject PRIMITIF_char; +extern const TypeObject PRIMITIF_bool; + +#endif diff --git a/ser/gen_vec.c b/ser/gen_vec.c new file mode 100644 index 0000000..83612e9 --- /dev/null +++ b/ser/gen_vec.c @@ -0,0 +1,88 @@ +#include "gen_vec.h" + +#include "assert.h" + +#include + +#define NONE (~0) + +typedef struct { + uint64_t gen; + size_t next_free; +} Entry; + +GenVec genvec_init(size_t data_size, DropFunction drop) { + GenVec res; + res.len = 0; + res.count = 0; + res.size = data_size; + res.entry_size = data_size + sizeof(uint64_t); + res.drop = drop; + res.cap = 0; + res.data = NULL; + res.last_free = NONE; + res.gen = 1; + return res; +} + +static void genvec_grow(GenVec *v, size_t cap) { + if (v->cap >= cap) + return; + cap = v->cap * 2 > cap ? v->cap * 2 : cap; + if (v->cap != 0) { + v->data = realloc(v->data, cap * v->entry_size); + } else { + v->data = malloc(cap * v->entry_size); + } + assert_alloc(v->data); + v->cap = cap; +} + +GenIndex genvec_push(GenVec *v, void *item) { + if (v->last_free == NONE) { + genvec_grow(v, v->len + 1); + byte *ptr = v->data + v->len++ * v->entry_size; + ((Entry *)ptr)->gen = v->gen; + memcpy(ptr + sizeof(Entry), item, v->size); + v->count++; + return (GenIndex){.gen = v->gen, .index = v->len - 1}; + } else { + size_t index = v->last_free; + byte *ptr = v->data + index * v->entry_size; + Entry *entry = (Entry *)ptr; + v->last_free = entry->next_free; + entry->gen = v->gen; + memcpy(ptr + sizeof(Entry), item, v->size); + v->count++; + return (GenIndex){.gen = v->gen, .index = index}; + } +} + +void genvec_remove(GenVec *v, GenIndex idx) { + byte *ptr = v->data + idx.index * v->entry_size; + Entry *entry = (Entry *)ptr; + if (!entry->gen || entry->gen != idx.gen) + return; + entry->gen = 0; + entry->next_free = v->last_free; + v->last_free = idx.index; + if (v->drop != NULL) { + v->drop(ptr + sizeof(Entry)); + } + v->count--; + v->gen++; +} + +void *genvec_get(GenVec *v, GenIndex idx) { + byte *ptr = v->data + idx.index * v->entry_size; + Entry *entry = (Entry *)ptr; + if (!entry->gen || entry->gen != idx.gen) + return NULL; + return ptr + sizeof(Entry); +} + +void genvec_drop(GenVec v) { + if (v.cap >= 0) { + free(v.data); + } +} diff --git a/ser/gen_vec.h b/ser/gen_vec.h new file mode 100644 index 0000000..7188f7e --- /dev/null +++ b/ser/gen_vec.h @@ -0,0 +1,32 @@ +#ifndef GEN_VEC_H +#define GEN_VEC_H +#include +#include +#include +typedef unsigned char byte; +typedef void (*DropFunction)(void *item); + +typedef struct { + size_t size; + size_t entry_size; + size_t cap; + size_t len; + size_t count; + uint64_t gen; + byte *data; + size_t last_free; + DropFunction drop; +} GenVec; + +typedef struct { + uint64_t gen; + size_t index; +} GenIndex; + +GenVec genvec_init(size_t data_size, DropFunction drop); +GenIndex genvec_push(GenVec *v, void *item); +void genvec_remove(GenVec *v, GenIndex idx); +void *genvec_get(GenVec *v, GenIndex idx); +void genvec_drop(GenVec v); + +#endif diff --git a/ser/grammar.bnf b/ser/grammar.bnf new file mode 100644 index 0000000..37bd741 --- /dev/null +++ b/ser/grammar.bnf @@ -0,0 +1,19 @@ +items -> item* ; +item -> align | type_decl | struct | messages | constant; + +type_decl -> "type" IDENT "=" type ";" +align -> "align" "(" number ")" ";" +struct -> "struct" IDENT "{" field ("," field)* ","? "}" ; +messages -> "messages" IDENT "{" message* "}" ; +constant -> "const" IDENT "=" number ";" ; + +field -> IDENT ":" type ; +number -> NUMBER | IDENT ; +message -> IDENT "{" field ("," field)* ","? "}" ; + +type -> IDENT | heap_array | field_array ; +heap_array -> type "&" "[" "]" | type "[" "]" + | type "&" "[" max_size | fixed_size "]" ; +field_array -> type "[" max_size | fixed_size "]" ; +max_size -> "^" number ; +fixed_size -> number ; diff --git a/ser/hashmap.c b/ser/hashmap.c new file mode 100644 index 0000000..34ca951 --- /dev/null +++ b/ser/hashmap.c @@ -0,0 +1,346 @@ +#include "hashmap.h" + +#include "assert.h" +#include "utils.h" + +#include +#include +#include +#include +#include +#include + +#if __BYTE_ORDER__ == __LITTLE_ENDIAN +#define U32TO8_LE(p, v) (*(uint32_t *)(p) = v) +#define U8TO32_LE(p) (*(uint32_t *)(p)) +#else +#define U32TO8_LE(p, v) \ + do { \ + (p)[0] = (uint8_t)((v)); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); \ + } while (0) + +#define U8TO32_LE(p) (((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) +#endif + +#define ROTL(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b)))) +#define SIPROUND \ + do { \ + v0 += v1; \ + v1 = ROTL(v1, 5); \ + v1 ^= v0; \ + v0 = ROTL(v0, 16); \ + v2 += v3; \ + v3 = ROTL(v3, 8); \ + v3 ^= v2; \ + v0 += v3; \ + v3 = ROTL(v3, 7); \ + v3 ^= v0; \ + v2 += v1; \ + v1 = ROTL(v1, 13); \ + v1 ^= v2; \ + v2 = ROTL(v2, 16); \ + } while (0) + +// Kinda useless check +_Static_assert(sizeof(uint32_t) == 4, "uint32_t isn't 4 bytes"); + +uint32_t hash(Hasher state, const byte *data, const size_t len) { + uint32_t v0 = 0, v1 = 0, v2 = UINT32_C(0x6c796765), v3 = UINT32_C(0x74656462); + uint32_t k0 = U8TO32_LE((byte *)&state.key), k1 = U8TO32_LE(((byte *)&state.key) + 4); + uint32_t m; + // Pointer to the end of the last 4 byte block + const byte *end = data + len - (len % sizeof(uint32_t)); + const int left = len % sizeof(uint32_t); + uint32_t b = ((uint32_t)len) << 24; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + for (; data != end; data += 4) { + m = U8TO32_LE(data); + v3 ^= m; + for (int i = 0; i < 2; i++) { + SIPROUND; + } + v0 ^= m; + } + + switch (left) { + case 3: + b |= ((uint32_t)data[2]) << 16; + case 2: + b |= ((uint32_t)data[1]) << 8; + case 1: + b |= ((uint32_t)data[0]); + } + + v3 ^= b; + v2 ^= 0xff; + + for (int i = 0; i < 4; i++) { + SIPROUND; + } + + return v1 ^ v3; +} + +Hasher hasher_init() { + static Hasher HASHER = {.key = UINT64_C(0x5E3514A61CC01657)}; + static uint64_t COUNT = 0; + struct timespec ts; + timespec_get(&ts, TIME_UTC); + ts.tv_nsec += COUNT++; + ts.tv_sec ^= ts.tv_nsec; + uint64_t k; + ((uint32_t *)&k)[0] = hash(HASHER, (byte *)&ts.tv_sec, sizeof(ts.tv_sec)); + ((uint32_t *)&k)[1] = hash(HASHER, (byte *)&ts.tv_nsec, sizeof(ts.tv_nsec)); + // return (Hasher){.key = k}; + // TODO: change that back + return (Hasher){.key = 113223440}; +} + +// Must be a power of 2 +#define HASHMAP_BASE_CAP 64 +#define MAX_ITEMS(cap) (cap / (2)) + +typedef struct { + uint32_t hash; + bool occupied; +} __attribute__((aligned(8))) Bucket; + +Hashmap *hashmap_init(HashFunction hash, EqualFunction equal, DropFunction drop, size_t data_size) { + size_t aligned_size = (((data_size - 1) >> 3) + 1) << 3; + size_t entry_size = sizeof(Bucket) + aligned_size; + byte *alloc = malloc(sizeof(Hashmap)); + byte *buckets = malloc(HASHMAP_BASE_CAP * entry_size); + assert_alloc(alloc); + assert_alloc(buckets); + Hashmap *map = (Hashmap *)alloc; + map->size = data_size; + map->aligned_size = aligned_size; + map->entry_size = sizeof(Bucket) + aligned_size; + map->cap = HASHMAP_BASE_CAP; + map->mask = HASHMAP_BASE_CAP - 1; + map->count = 0; + map->max = MAX_ITEMS(HASHMAP_BASE_CAP); + map->state = hasher_init(); + map->hash = hash; + map->equal = equal; + map->drop = drop; + map->alloc = alloc; + map->buckets = buckets; + map->buckets_end = map->buckets + HASHMAP_BASE_CAP * map->entry_size; + + for (size_t i = 0; i < HASHMAP_BASE_CAP; i++) { + ((Bucket *)buckets)->occupied = false; + buckets += map->entry_size; + } + + return map; +} + +// Return the first empty bucket or the first matching bucket +static inline __attribute__((always_inline)) byte *hashmap_bucket(Hashmap *map, const void *item, uint32_t hash, size_t *rindex) { + int32_t index = hash & map->mask; + byte *ptr = map->buckets + index * map->entry_size; + while (((Bucket *)ptr)->occupied && (((Bucket *)ptr)->hash != hash || !map->equal(item, ptr + sizeof(Bucket)))) { + ptr += map->entry_size; + index++; + if (ptr >= map->buckets_end) { + ptr = map->buckets; + index = 0; + } + } + if (rindex != NULL) { + *rindex = index; + } + return ptr; +} + +static bool hashmap_insert(Hashmap *map, const void *item, uint32_t hash) { + byte *ptr = hashmap_bucket(map, item, hash, NULL); + Bucket *bucket = (Bucket *)ptr; + void *dst = ptr + sizeof(Bucket); + bool replace = bucket->occupied; + if (map->drop != NULL && replace) { + map->drop(dst); + } + + bucket->hash = hash; + bucket->occupied = true; + memcpy(dst, item, map->size); + if (!replace) { + map->count++; + } + return replace; +} + +// Grow hashmap to double the size +static void hashmap_grow(Hashmap *map) { + byte *old_buckets = map->buckets; + size_t old_cap = map->cap; + + map->cap *= 2; + map->mask = map->cap - 1; + map->count = 0; + map->max = MAX_ITEMS(map->cap); + map->buckets = malloc(map->cap * map->entry_size); + assert_alloc(map->buckets); + map->buckets_end = map->buckets + map->cap * map->entry_size; + + for (byte *ptr = map->buckets; ptr < map->buckets_end; ptr += map->entry_size) { + ((Bucket *)ptr)->occupied = false; + } + + byte *ptr = old_buckets; + for (size_t i = 0; i < old_cap; i++) { + Bucket *bucket = (Bucket *)ptr; + void *item = ptr + sizeof(Bucket); + if (bucket->occupied) { + hashmap_insert(map, item, bucket->hash); + } + ptr += map->entry_size; + } + + free(old_buckets); +} + +bool hashmap_set(Hashmap *map, const void *item) { + if (map->count >= map->max) { + hashmap_grow(map); + } + + uint32_t hash = map->hash(map->state, item); + return hashmap_insert(map, item, hash); +} + +void *hashmap_get(Hashmap *map, const void *key) { + uint32_t hash = map->hash(map->state, key); + byte *ptr = hashmap_bucket(map, key, hash, NULL); + Bucket *bucket = (Bucket *)ptr; + void *res = ptr + sizeof(Bucket); + if (!bucket->occupied) { + return NULL; + } else { + return res; + } +} + +bool hashmap_has(Hashmap *map, const void *key) { + uint32_t hash = map->hash(map->state, key); + byte *ptr = hashmap_bucket(map, key, hash, NULL); + Bucket *bucket = (Bucket *)ptr; + + return bucket->occupied; +} + +bool hashmap_take(Hashmap *map, const void *key, void *dst) { + uint32_t hash = map->hash(map->state, key); + byte *ptr = hashmap_bucket(map, key, hash, NULL); + Bucket *bucket = (Bucket *)ptr; + void *item = ptr + sizeof(Bucket); + + if (!bucket->occupied) { + return false; + } + + map->count--; + if (dst == NULL && map->drop != NULL) { + map->drop(item); + } else if (dst != NULL) { + memcpy(dst, item, map->size); + } + + byte *nptr = ptr; + while (true) { + // Kinda jank ? better solution ? + size_t index = (uintptr_t)(ptr - map->buckets) / map->entry_size; + + nptr += map->entry_size; + if (nptr >= map->buckets_end) { + nptr = map->buckets; + } + + while (((Bucket *)nptr)->occupied && (((Bucket *)nptr)->hash & map->mask) > index) { + nptr += map->entry_size; + if (nptr >= map->buckets_end) { + nptr = map->buckets; + } + } + + if (!((Bucket *)nptr)->occupied) { + bucket->occupied = false; + return true; + } + + *bucket = *(Bucket *)nptr; + memcpy(item, nptr + sizeof(Bucket), map->size); + + ptr = nptr; + bucket = (Bucket *)ptr; + item = ptr + sizeof(Bucket); + } +} + +void hashmap_clear(Hashmap *map) { + if (map->count == 0) + return; + + for (byte *ptr = map->buckets; ptr < map->buckets_end; ptr += map->entry_size) { + if (map->drop != NULL) { + map->drop(ptr + sizeof(Bucket)); + } + ((Bucket *)ptr)->occupied = false; + } + map->count = 0; +} + +bool hashmap_iter(Hashmap *map, void *iter_) { + void **iter = (void **)iter_; + if (*iter == NULL) { + if (map->count == 0) { + return false; + } + byte *ptr = map->buckets; + while (!((Bucket *)ptr)->occupied) { + ptr += map->entry_size; + } + *iter = ptr + sizeof(Bucket); + return true; + } + + byte *ptr = ((byte *)(*iter)) - sizeof(Bucket); + ptr += map->entry_size; + if (ptr >= map->buckets_end) + return false; + while (!((Bucket *)ptr)->occupied) { + ptr += map->entry_size; + if (ptr >= map->buckets_end) { + return false; + } + } + + *iter = ptr + sizeof(Bucket); + return true; +} + +void hashmap_drop(Hashmap *map) { + if (map->drop != NULL) { + byte *ptr = map->buckets; + for (size_t i = 0; i < map->cap; i++) { + Bucket *bucket = (Bucket *)ptr; + if (bucket->occupied) { + void *item = ptr + sizeof(Bucket); + map->drop(item); + } + ptr += map->entry_size; + } + } + + free(map->buckets); + free(map->alloc); +} diff --git a/ser/hashmap.h b/ser/hashmap.h new file mode 100644 index 0000000..4952b35 --- /dev/null +++ b/ser/hashmap.h @@ -0,0 +1,87 @@ +#ifndef HASHMAP_H +#define HASHMAP_H + +typedef unsigned char byte; + +#include "gen_vec.h" + +#include +#include +#include + +typedef struct { + uint64_t key; +} Hasher; + +// Create new hasher with a pseudo random state +Hasher hasher_init(); +// Hash given data with hasher +uint32_t hash(Hasher state, const byte *data, const size_t len); + +typedef uint32_t (*HashFunction)(Hasher state, const void *item); +typedef bool (*EqualFunction)(const void *a, const void *b); +typedef void (*DropFunction)(void *item); + +typedef struct { + size_t size; + size_t aligned_size; + size_t entry_size; + size_t cap; + size_t mask; + size_t count; + size_t max; + Hasher state; + byte *buckets; + byte *buckets_end; + byte *alloc; + HashFunction hash; + EqualFunction equal; + DropFunction drop; +} Hashmap; + +typedef struct { + Hashmap *map; + GenVec items; +} StableHashmap; + +// Initialize a new hashmap +Hashmap *hashmap_init(HashFunction hash, EqualFunction equal, DropFunction drop, size_t data_size); +// Insert value in hashmapn returns true if the value was overwritten +bool hashmap_set(Hashmap *map, const void *item); +// Get value of hashmap, return NULL if not found +void *hashmap_get(Hashmap *map, const void *key); +// Take a value from a hashmap and put it into dst +bool hashmap_take(Hashmap *map, const void *key, void *dst); +// Destroy hashmap +void hashmap_drop(Hashmap *map); +// Delete entry from hasmap +static inline __attribute__((always_inline)) bool hashmap_delete(Hashmap *map, const void *key) { + return hashmap_take(map, key, NULL); +} +// Check if hashmap contains key +bool hashmap_has(Hashmap *map, const void *key); +// Clear hashmap of all entries +void hashmap_clear(Hashmap *map); +// Iterate hasmap +bool hashmap_iter(Hashmap *map, void *iter); + +#define impl_hashmap(prefix, type, hash, equal) \ + uint32_t prefix##_hash(Hasher state, const void *_v) { \ + type *v = (type *)_v; \ + hash \ + } \ + bool prefix##_equal(const void *_a, const void *_b) { \ + type *a = (type *)_a; \ + type *b = (type *)_b; \ + equal \ + } \ + _Static_assert(1, "Semicolon required") +#define impl_hashmap_delegate(prefix, type, delegate, accessor) \ + impl_hashmap( \ + prefix, \ + type, \ + { return delegate##_hash(state, &v->accessor); }, \ + { return delegate##_equal(&a->accessor, &b->accessor); } \ + ) + +#endif diff --git a/ser/lexer.c b/ser/lexer.c new file mode 100644 index 0000000..ecc06ee --- /dev/null +++ b/ser/lexer.c @@ -0,0 +1,372 @@ +#include "lexer.h" + +#include "vector.h" + +#include +#include + +typedef struct { + uint32_t start; + uint32_t current; + Source *src; + Location loc; + Location start_loc; + TokenVec tokens; + LexingErrorVec errors; +} Lexer; + +static inline __attribute__((always_inline)) Token +token(Source *src, TokenType type, const char *lexeme, uint32_t len, uint64_t lit, Location loc) { + IF_DEBUG(src->ref_count++); + return (Token){ + .src = src, + .lit = lit, + .span.loc = loc, + .span.len = len, + .type = type, + .lexeme = lexeme, + }; +} +static inline __attribute__((always_inline)) LexingError +lexing_error(Source *src, LexingErrorType type, Location loc, uint32_t len) { + IF_DEBUG(src->ref_count++); + return (LexingError){ + .src = src, + .type = type, + .span.loc = loc, + .span.len = len, + }; +} + +void token_drop(Token t) { IF_DEBUG(t.src->ref_count--); } + +void lexing_error_drop(LexingError e) { IF_DEBUG(e.src->ref_count--); } + +void lexing_result_drop(LexingResult res) { + vec_drop(res.tokens); + vec_drop(res.errors); +} + +static Lexer lexer_init(Source *src) { + TokenVec tokens = vec_init(); + vec_grow(&tokens, 256); + return (Lexer){ + .start = 0, + .current = 0, + .src = src, + .loc = location(1, 0, 0), + .start_loc = location(1, 1, 0), + .tokens = tokens, + .errors = vec_init(), + }; +} + +static void lexer_add_token(Lexer *lex, TokenType type, uint32_t len, uint64_t lit) { + vec_push(&lex->tokens, token(lex->src, type, &lex->src->str[lex->start], len, lit, lex->start_loc)); +} + +static void lexer_add_error(Lexer *lex, LexingErrorType type, uint32_t len) { + vec_push(&lex->errors, lexing_error(lex->src, type, lex->start_loc, len)); +} + +static char lexer_advance(Lexer *lex) { + char c = lex->src->str[lex->current++]; + lex->loc.offset = lex->current; + lex->loc.column++; + if (c == '\n') { + lex->loc.line++; + lex->loc.column = 0; + } + return c; +} + +static bool lexer_match(Lexer *lex, char exp) { + if (lex->current >= lex->src->len) + return false; + if (lex->src->str[lex->current] != exp) + return false; + lexer_advance(lex); + return true; +} + +static bool lexer_match_not(Lexer *lex, char unexp) { + if (lex->current >= lex->src->len) + return false; + if (lex->src->str[lex->current] == unexp) + return false; + lexer_advance(lex); + return true; +} + +static char lexer_peek(Lexer *lex) { return lex->src->str[lex->current]; } + +inline static bool is_digit(char c) { return c >= '0' && c <= '9'; } +inline static uint64_t to_digit(char c) { return c - '0'; } +inline static bool is_ident_start(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } +inline static bool is_ident(char c) { return is_ident_start(c) || is_digit(c) || c == '_'; } + +static void lexer_scan_number(Lexer *lex) { + // Get first digit (the one we already passed) + uint64_t lit = to_digit(lex->src->str[lex->start]); + uint32_t len = 1; + char c = lexer_peek(lex); + bool overflow = false; + while (is_digit(c)) { + uint64_t nlit = lit * 10 + to_digit(c); + if (nlit < lit) { // overflow + overflow = true; + } + lit = nlit; + lexer_advance(lex); + c = lexer_peek(lex); + len++; + } + + if (overflow) { + lexer_add_error(lex, LexingErrorNumberLiteralOverflow, len); + } + + lexer_add_token(lex, Number, len, lit); +} + +static uint32_t u32max(uint32_t a, uint32_t b) { return a > b ? a : b; } + +static inline __attribute__((always_inline)) void lexer_scan_ident(Lexer *lex) { + uint32_t len = 1; + while (is_ident(lexer_peek(lex))) { + lexer_advance(lex); + len++; + } + const char *s = &lex->src->str[lex->start]; +#define handle(x, str) else if (strncmp(str, s, u32max(sizeof(str) - 1, len)) == 0) lexer_add_token(lex, x, len, 0) + if (false) + ; + handle(Messages, "messages"); + handle(Struct, "struct"); + handle(Version, "version"); + handle(Const, "const"); + handle(Type, "type"); + else lexer_add_token(lex, Ident, len, 0); +#undef handle +} + +static void lexer_scan(Lexer *lex) { + char c = lexer_advance(lex); + switch (c) { + case '(': + lexer_add_token(lex, LeftParen, 1, 0); + break; + case ')': + lexer_add_token(lex, RightParen, 1, 0); + break; + case '{': + lexer_add_token(lex, LeftBrace, 1, 0); + break; + case '}': + lexer_add_token(lex, RightBrace, 1, 0); + break; + case '[': + lexer_add_token(lex, LeftBracket, 1, 0); + break; + case ']': + lexer_add_token(lex, RightBracket, 1, 0); + break; + case ',': + lexer_add_token(lex, Comma, 1, 0); + break; + case ';': + lexer_add_token(lex, Semicolon, 1, 0); + break; + case '&': + lexer_add_token(lex, Ampersand, 1, 0); + break; + case '^': + lexer_add_token(lex, Caret, 1, 0); + break; + case ':': + lexer_add_token(lex, Colon, 1, 0); + break; + case '=': + lexer_add_token(lex, Equal, 1, 0); + break; + case '#': + lexer_add_token(lex, Hash, 1, 0); + case '/': + if (lexer_match(lex, '/')) { + while (lexer_match_not(lex, '\n')) + ; + } + break; + case ' ': + case '\t': + case '\n': + case '\r': + break; + default: + if (is_digit(c)) { + lexer_scan_number(lex); + } else if (is_ident_start(c)) { + lexer_scan_ident(lex); + } else { + // Try to merge with the last error if possible + if (lex->errors.len > 0) { + LexingError *last_err = &lex->errors.data[lex->errors.len - 1]; + if (last_err->span.loc.line == lex->loc.line && last_err->type == LexingErrorUnexpectedCharacter && + last_err->span.loc.column + last_err->span.len == lex->start_loc.column) { + last_err->span.len++; + break; + } + } + lexer_add_error(lex, LexingErrorUnexpectedCharacter, 1); + } + } +} + +static void lexer_lex(Lexer *lex) { + while (lex->current < lex->src->len) { + lex->start = lex->current; + lex->start_loc = lex->loc; + lexer_scan(lex); + } + + lex->start = lex->current; + lex->start_loc = lex->loc; + + lexer_add_token(lex, Eof, 0, 0); +} + +static LexingResult lexer_finish(Lexer lex) { + return (LexingResult){ + .errors = lex.errors, + .tokens = lex.tokens, + }; +} + +LexingResult lex(Source *src) { + Lexer lex = lexer_init(src); + lexer_lex(&lex); + return lexer_finish(lex); +} + +void lexing_error_report(LexingError *le) { + ReportSpan span = {.span = le->span, .sev = ReportSeverityError}; +#define report(fmt, ...) source_report(le->src, le->span.loc, ReportSeverityError, &span, 1, NULL, fmt, __VA_ARGS__); + switch (le->type) { + case LexingErrorUnexpectedCharacter: + report("Unexpected character%s '%.*s'", le->span.len > 1 ? "s" : "", le->span.len, &le->src->str[le->span.loc.offset]); + break; + case LexingErrorNumberLiteralOverflow: + report("number literal '%.*s' overflows max value of %lu", le->span.len, &le->src->str[le->span.loc.offset], UINT64_MAX); + break; + default: + break; + } +#undef report +} + +char *token_type_string(TokenType t) { + TokenType types[TOKEN_TYPE_COUNT]; + size_t count = 0; +#define handle(type) \ + if (t & type) \ + types[count++] = type + handle(LeftParen); + handle(RightParen); + handle(LeftBrace); + handle(RightBrace); + handle(LeftBracket); + handle(RightBracket); + handle(Comma); + handle(Semicolon); + handle(Ampersand); + handle(Caret); + handle(Colon); + handle(Equal); + handle(Ident); + handle(Number); + handle(Messages); + handle(Struct); + handle(Version); + handle(Const); + handle(Type); + handle(Eof); +#undef handle + CharVec str = vec_init(); + for (size_t i = 0; i < count; i++) { + if (i == 0) { + } else if (i == count - 1) { + vec_push_array(&str, " or ", 4); + } else { + vec_push_array(&str, ", ", 2); + } + + switch (types[i]) { + case LeftParen: + vec_push_array(&str, "'('", 3); + break; + case RightParen: + vec_push_array(&str, "')'", 3); + break; + case LeftBrace: + vec_push_array(&str, "'{'", 3); + break; + case RightBrace: + vec_push_array(&str, "'}'", 3); + break; + case LeftBracket: + vec_push_array(&str, "'['", 3); + break; + case RightBracket: + vec_push_array(&str, "']'", 3); + break; + case Comma: + vec_push_array(&str, "','", 3); + break; + case Semicolon: + vec_push_array(&str, "';'", 3); + break; + case Ampersand: + vec_push_array(&str, "'&'", 3); + break; + case Caret: + vec_push_array(&str, "'^'", 3); + break; + case Colon: + vec_push_array(&str, "':'", 3); + break; + case Equal: + vec_push_array(&str, "'='", 3); + break; + case Hash: + vec_push_array(&str, "'#'", 3); + break; + case Ident: + vec_push_array(&str, "identifier", 10); + break; + case Number: + vec_push_array(&str, "number literal", 15); + break; + case Messages: + vec_push_array(&str, "keyword messages", 16); + break; + case Struct: + vec_push_array(&str, "keyword struct", 14); + break; + case Version: + vec_push_array(&str, "keyword version", 15); + break; + case Const: + vec_push_array(&str, "keyword const", 13); + break; + case Type: + vec_push_array(&str, "keyword type", 12); + break; + case Eof: + vec_push_array(&str, "end of file", 11); + break; + } + } + + vec_push(&str, '\0'); + return str.data; +} diff --git a/ser/lexer.h b/ser/lexer.h new file mode 100644 index 0000000..de4a931 --- /dev/null +++ b/ser/lexer.h @@ -0,0 +1,107 @@ +#ifndef LEXER_H +#define LEXER_H +#include "source.h" +#include "vector_impl.h" + +#include + +typedef enum : uint32_t { + LeftParen = 1 << 0, + RightParen = 1 << 1, + LeftBrace = 1 << 2, + RightBrace = 1 << 3, + LeftBracket = 1 << 4, + RightBracket = 1 << 5, + Comma = 1 << 6, + Semicolon = 1 << 7, + Ampersand = 1 << 8, + Caret = 1 << 9, + Colon = 1 << 10, + Equal = 1 << 11, + Hash = 1 << 12, + Ident = 1 << 13, + Number = 1 << 14, + Messages = 1 << 15, + Struct = 1 << 16, + Version = 1 << 17, + Const = 1 << 18, + Type = 1 << 19, + Eof = 1 << 20, +} TokenType; + +#define TOKEN_TYPE_COUNT 21 + +typedef struct { + // The type of the token + TokenType type; + // Span of the lexeme (line, columnn, offset, length) + Span span; + // A pointer to the start of the lexeme (not null terminated) + const char *lexeme; + // Pointer to the source object + Source *src; + // In the case of a Number token: the parsed number + uint64_t lit; +} Token; + +typedef enum : uint32_t { + LexingErrorNoError, + LexingErrorUnexpectedCharacter, + LexingErrorNumberLiteralOverflow, +} LexingErrorType; + +typedef struct { + Source *src; + Span span; + LexingErrorType type; +} LexingError; +// Destroy the token +void token_drop(Token t); +// Destroy lexing error +void lexing_error_drop(LexingError e); + +VECTOR_IMPL(Token, TokenVec, token, token_drop); +VECTOR_IMPL(LexingError, LexingErrorVec, lexing_error, lexing_error_drop); + +typedef struct { + TokenVec tokens; + LexingErrorVec errors; +} LexingResult; + +LexingResult lex(Source *src); + +void lexing_result_drop(LexingResult res); + +void lexing_error_report(LexingError *le); + +__attribute__((unused)) static inline const char *token_type_name(TokenType t) { +#define _case(type) \ + case type: \ + return #type + switch (t) { + _case(LeftParen); + _case(RightParen); + _case(LeftBrace); + _case(RightBrace); + _case(LeftBracket); + _case(RightBracket); + _case(Comma); + _case(Semicolon); + _case(Ampersand); + _case(Caret); + _case(Colon); + _case(Equal); + _case(Hash); + _case(Ident); + _case(Number); + _case(Messages); + _case(Struct); + _case(Version); + _case(Const); + _case(Type); + _case(Eof); + } +#undef _case +} +char *token_type_string(TokenType t); +#endif diff --git a/ser/log.c b/ser/log.c new file mode 100644 index 0000000..da24e10 --- /dev/null +++ b/ser/log.c @@ -0,0 +1,143 @@ +#include "log.h" + +#include +#include +#include +#include +#include +#include +#include + +#define BASE_BUFFER_SIZE 1024 +#define SOURCE_BUFFER_SIZE 128 +static char BASE_BUFFER[BASE_BUFFER_SIZE] = {0}; +static char SOURCE_BUFFER[SOURCE_BUFFER_SIZE] = {0}; + +// TODO: mutex +static Logger LOGGER = {.sevs = Info | Warning | Error}; + +void logger_set_fd(FILE *fd) { LOGGER.fd = fd; } + +void logger_enable_severities(LogSeverities sevs) { LOGGER.sevs |= sevs; } + +void logger_disable_severities(LogSeverities sevs) { LOGGER.sevs &= ~sevs; } + +void logger_set_severities(LogSeverities sevs) { LOGGER.sevs = sevs; } + +void logger_init() { LOGGER.initialized = true; } + +// Logging function, should be rarely called by itself (use the log_* macros instead) +// message takes the form: (file:line?) SEVERITY func > fmt ... +// line can be ignored if negative +void _log_severity(LogSeverity sev, const char *func, const char *file, const int line, char *fmt, ...) { + if (!LOGGER.initialized) { + fprintf(stderr, "Trying to log, but the logger hasn't been initialized.\n"); + return; + } + + // Ignore if the logger doesn't have a configured target or if the severity is ignored. + if (LOGGER.fd == NULL || !(LOGGER.sevs & sev)) { + return; + } + + // format source in second half of buffer + int source_len; + if (line >= 0) { + source_len = snprintf(SOURCE_BUFFER, SOURCE_BUFFER_SIZE, "(%s:%d)", file, line); + } else { + source_len = snprintf(SOURCE_BUFFER, SOURCE_BUFFER_SIZE, "(%s)", file); + } + + // Keep track of width for alignment + if (source_len > LOGGER.source_width) { + LOGGER.source_width = source_len; + } + + // "format" severity + const char *sev_str; + switch (sev) { + case Trace: + sev_str = "\033[0;35mTRACE"; + break; + case Debug: + sev_str = "\033[0;34mDEBUG"; + break; + case Info: + sev_str = "\033[0;32mINFO "; + break; + case Warning: + sev_str = "\033[0;33mWARN "; + break; + case Error: + sev_str = "\033[0;31mERROR"; + break; + default: + sev_str = "\033[0;31m?????"; + break; + } + + // no format for func since there is nothing to do + + // SAFETY: func should always come from the __func__ macro, which shouldn't allow buffer overflow. + int func_len = strlen(func); + + // Keep track of width for alignment + if (func_len > LOGGER.func_width) { + LOGGER.func_width = func_len; + } + + // Final string buffer + char *buf = BASE_BUFFER; + int prefix_len = snprintf( + buf, + BASE_BUFFER_SIZE / 2, + "\033[0;2m%-*s %s \033[0;1m%-*s \033[0;2m> ", + LOGGER.source_width, + SOURCE_BUFFER, + sev_str, + LOGGER.func_width, + func + ); + + const char *suffix = "\033[0m\n"; + const int suffix_len = 5; + + // max slice of the buffer used by the message + char *str = buf + prefix_len; + int str_size = BASE_BUFFER_SIZE - prefix_len - suffix_len; // -1 for the trailing newline + + va_list args; + va_start(args, fmt); + int len = vsnprintf(str, str_size, fmt, args); + va_end(args); + + // Make sure we have enough space in the BASE_BUFFER, allocate if we don't + if (len >= str_size) { + buf = malloc(prefix_len + len + suffix_len); + str = buf + prefix_len; + + if (buf == NULL) { + fprintf(stderr, "Couldn't allocate buffer (Out of memory ?), aborting...\n"); + exit(1); + } + + // Copy over prefix into new buffer + memcpy(buf, BASE_BUFFER, prefix_len * sizeof(char)); + + va_start(args, fmt); + vsnprintf(str, len + 1, fmt, args); + va_end(args); + } + + memcpy(buf + prefix_len + len, suffix, suffix_len * sizeof(char)); + + fwrite(buf, 1, prefix_len + len + suffix_len, LOGGER.fd); + +#ifdef LOG_FLUSH + fflush(LOGGER.fd); +#endif + + if (buf != BASE_BUFFER) { + free(buf); + } +} diff --git a/ser/log.h b/ser/log.h new file mode 100644 index 0000000..d097fb0 --- /dev/null +++ b/ser/log.h @@ -0,0 +1,53 @@ +#ifndef LOG_H +#define LOG_H + +#include +#include +#include + +// Bit field of severities +typedef uint32_t LogSeverities; + +// The logger +typedef struct { + bool initialized; + FILE *fd; + LogSeverities sevs; + int source_width; + int func_width; +} Logger; + +// A message's severity Error > Warning > Info > Debug > Trace +typedef enum : LogSeverities { + Trace = 1 << 0, + Debug = 1 << 1, + Info = 1 << 2, + Warning = 1 << 3, + Error = 1 << 4, +} LogSeverity; + +// Needs to be here but log_* macros should be used instead +void _log_severity(LogSeverity sev, const char *func, const char *file, const int line, char *fmt, ...); + +// Set the file desciptor for the logger +void logger_set_fd(FILE *fd); +void logger_enable_severities(LogSeverities sevs); +void logger_disable_severities(LogSeverities sevs); +void logger_set_severities(LogSeverities sevs); +void logger_init(); + +#ifdef LOG_DISABLE +#define log_trace(...) (void)0 +#define log_debug(...) (void)0 +#define log_info(...) (void)0 +#define log_warn(...) (void)0 +#define log_error(...) (void)0 +#else +#define log_trace(...) _log_severity(Trace, __func__, __FILE__, __LINE__, __VA_ARGS__) +#define log_debug(...) _log_severity(Debug, __func__, __FILE__, __LINE__, __VA_ARGS__) +#define log_info(...) _log_severity(Info, __func__, __FILE__, __LINE__, __VA_ARGS__) +#define log_warn(...) _log_severity(Warning, __func__, __FILE__, __LINE__, __VA_ARGS__) +#define log_error(...) _log_severity(Error, __func__, __FILE__, __LINE__, __VA_ARGS__) +#endif + +#endif diff --git a/ser/macro_utils.h b/ser/macro_utils.h new file mode 100644 index 0000000..45c3553 --- /dev/null +++ b/ser/macro_utils.h @@ -0,0 +1,66 @@ +#ifndef MACRO_UTILS_H +#define MACRO_UTILS_H + +#define CALL(m, ...) m(__VA_ARGS__) + +#define EMPTY() + +#define EVAL(...) EVAL32(__VA_ARGS__) +#define EVAL1024(...) EVAL512(EVAL512(__VA_ARGS__)) +#define EVAL512(...) EVAL256(EVAL256(__VA_ARGS__)) +#define EVAL256(...) EVAL128(EVAL128(__VA_ARGS__)) +#define EVAL128(...) EVAL64(EVAL64(__VA_ARGS__)) +#define EVAL64(...) EVAL32(EVAL32(__VA_ARGS__)) +#define EVAL32(...) EVAL16(EVAL16(__VA_ARGS__)) +#define EVAL16(...) EVAL8(EVAL8(__VA_ARGS__)) +#define EVAL8(...) EVAL4(EVAL4(__VA_ARGS__)) +#define EVAL4(...) EVAL2(EVAL2(__VA_ARGS__)) +#define EVAL2(...) EVAL1(EVAL1(__VA_ARGS__)) +#define EVAL1(...) __VA_ARGS__ +#define EVAL0(...) + +#define SND(a, b, ...) b +#define FST(a, ...) a +#define CAT(a, b) a##b + +#define PROBE() ~, 1 +#define IS_PROBE(...) SND(__VA_ARGS__, 0) +// _FAST_NOT(0) -> 1 _FAST_NOT(1) -> 0 +#define _FAST_NOT(x) CAT(_FAST_NOT_, x)() +#define _FAST_NOT_0() 1 +#define _FAST_NOT_1() 0 +// NOT(0) -> 1 NOT(...) -> 0 +#define NOT(x) IS_PROBE(CAT(_NOT_, x)) +#define _NOT_0 PROBE() +// BOOL(0) -> 0 BOOL(...) -> 1 +#define BOOL(x) _FAST_NOT(NOT(x)) + +// Same as EVAL1 but different meaning +#define KEEP(...) __VA_ARGS__ +// Drop / Delete the arguments +#define DROP(...) + +#define IF_ELSE(c) FAST_IF_ELSE(BOOL(c)) +// IF_ELSE if c is know to be 0 or 1 +#define FAST_IF_ELSE(c) CAT(_IF_ELSE_, c) +#define _IF_ELSE_0(...) KEEP +#define _IF_ELSE_1(...) __VA_ARGS__ DROP + +#define HAS_ARGS(...) BOOL(FST(_HAS_ARGS_ __VA_ARGS__)()) +#define _HAS_ARGS_() 0 +#define IF_ELSE_ARGS(...) FAST_IF_ELSE(HAS_ARGS(__VA_ARGS__)) + +#define DEFER1(x) x EMPTY() +#define DEFER2(x) x EMPTY EMPTY()() +#define DEFER3(x) x EMPTY EMPTY EMPTY()()() +#define DEFER4(x) x EMPTY EMPTY EMPTY EMPTY()()()() +#define DEFER5(x) x EMPTY EMPTY EMPTY EMPTY EMPTY()()()()() + +#define MAP(m, fst, ...) m(fst) __VA_OPT__(DEFER1(_MAP)()(DEFER1(m), __VA_ARGS__)) +#define _MAP() MAP + +#define REVERSE(...) IF_ELSE_ARGS(__VA_ARGS__)(EVAL(_REVERSE(__VA_ARGS__)))() +#define _REVERSE(a, ...) __VA_OPT__(DEFER1(__REVERSE)()(__VA_ARGS__), ) a +#define __REVERSE() _REVERSE + +#endif diff --git a/ser/main.c b/ser/main.c new file mode 100644 index 0000000..183cea7 --- /dev/null +++ b/ser/main.c @@ -0,0 +1,111 @@ +#include "ast.h" +#include "codegen_c.h" +#include "hashmap.h" +#include "lexer.h" +#include "log.h" +#include "parser.h" +#include "source.h" + +#include +#include + +void abort_error(uint32_t error_count) { + fprintf(stderr, "\033[1;91merror\033[0m: aborting due to previous error%s\n", error_count > 1 ? "s" : ""); + exit(1); +} + +typedef enum { + BackendC, +} Backend; + +Backend parse_backend(const char *b) { + if (strcmp(b, "c") == 0) { + return BackendC; + } else { + log_error("Couldn't parse requested backend: got %s expected one of 'c'.", b); + exit(1); + } +} + +int main(int argc, char **argv) { + logger_set_fd(stderr); + logger_enable_severities(Info | Warning | Error); + logger_init(); + + if (argc != 4) { + fprintf(stderr, "Expected 3 arguments: ser \n"); + } + + char *source_path = argv[1]; + Backend back = parse_backend(argv[2]); + char *output = argv[3]; + + Source src; + SourceError serr = source_open(source_path, &src); + + if (serr != SourceErrorNoError) { + log_error("Error when opening or reading source"); + exit(1); + } + + LexingResult lexing_result = lex(&src); + if (lexing_result.errors.len > 0) { + for (size_t i = 0; i < lexing_result.errors.len; i++) { + lexing_error_report(&lexing_result.errors.data[i]); + } + abort_error(lexing_result.errors.len); + } + vec_drop(lexing_result.errors); + + ParsingResult parsing_result = parse(lexing_result.tokens); + + if (parsing_result.errors.len > 0) { + for (size_t i = 0; i < parsing_result.errors.len; i++) { + parsing_error_report(&src, &parsing_result.errors.data[i]); + } + abort_error(parsing_result.errors.len); + } + vec_drop(parsing_result.errors); + + EvaluationResult evaluation_result = resolve_statics(&parsing_result.ctx); + if (evaluation_result.errors.len > 0) { + for (size_t i = 0; i < evaluation_result.errors.len; i++) { + eval_error_report(&src, &evaluation_result.errors.data[i]); + } + abort_error(evaluation_result.errors.len); + } + vec_drop(evaluation_result.errors); + + switch (back) { + case BackendC: { + char *basename; + { + char *last_slash = strrchr(output, '/'); + if (last_slash == NULL) { + basename = output; + } else { + basename = last_slash + 1; + } + } + char *header_path = msprintf("%s.h", output); + char *source_path = msprintf("%s.c", output); + + FileWriter header = file_writer_init(header_path); + FileWriter source = file_writer_init(source_path); + + codegen_c((Writer *)&header, (Writer *)&source, basename, &evaluation_result.program); + + file_writer_drop(header); + file_writer_drop(source); + + free(source_path); + free(header_path); + break; + } + } + + program_drop(evaluation_result.program); + ast_drop(parsing_result.ctx); + vec_drop(lexing_result.tokens); + source_drop(src); +} diff --git a/ser/parser.c b/ser/parser.c new file mode 100644 index 0000000..2f66355 --- /dev/null +++ b/ser/parser.c @@ -0,0 +1,350 @@ +#include "parser.h" + +#include "ast.h" +#include "lexer.h" +#include "vector.h" + +#include +#include + +typedef struct { + TokenVec tokens; + ParsingErrorVec errors; + AstContext ctx; + uint32_t current; +} Parser; + +static Parser parser_init(TokenVec tokens) { + return (Parser){ + .tokens = tokens, + .ctx = ast_init(), + .current = 0, + .errors = vec_init(), + }; +} + +inline static ParsingError err_expected(TokenType type, Span span) { + return (ParsingError){.span = span, .type = ParsingErrorUnexpectedToken, .data.type = type}; +} + +inline static void add_error(Parser *p, ParsingError err) { vec_push(&p->errors, err); } + +inline static Token peek(Parser *p) { return p->tokens.data[p->current]; } + +inline static Token previous(Parser *p) { return p->tokens.data[p->current - 1]; } + +static bool check(Parser *p, TokenType type) { + if (peek(p).type == Eof) { + return type == Eof; + } + return peek(p).type == type; +} + +static Token advance(Parser *p) { + if (peek(p).type != Eof) + p->current++; + return previous(p); +} + +static bool match(Parser *p, TokenType t) { + if (check(p, t)) { + advance(p); + return true; + } + return false; +} + +static void skip_until(Parser *p, TokenType type) { + while ((peek(p).type & (type | Eof)) == 0) { + advance(p); + } +} + +static bool consume(Parser *p, TokenType t, Token *res) { + if (peek(p).type == t) { + if (res != NULL) { + *res = advance(p); + } else { + advance(p); + } + return true; + } + add_error(p, err_expected(t, peek(p).span)); + return false; +} + +#define bubble(...) \ + if (!(__VA_ARGS__)) { \ + return false; \ + } + +static Location parser_loc(Parser *p) { return p->tokens.data[p->current].span.loc; } + +static inline Span span_end(Parser *p, Location start) { + Span prev = previous(p).span; + return span_from_to( + start, + (Location){.line = prev.loc.line, .column = prev.loc.column + prev.len, .offset = prev.loc.offset + prev.len} + ); +} + +static bool parse_number(Parser *p, AstNumber *res) { + Token t = advance(p); + if (t.type == Number) { + *res = ast_number(p->ctx, t.span, t); + return true; + } + if (t.type == Ident) { + *res = ast_number(p->ctx, t.span, t); + return true; + } + add_error(p, err_expected(Number | Ident, t.span)); + return false; +} + +static bool parse_ident(Parser *p, AstIdent *res) { + Token t = advance(p); + if (t.type == Ident) { + *res = ast_ident(p->ctx, t.span, t); + return true; + } + add_error(p, err_expected(Ident, t.span)); + return false; +} + +static bool parse_size(Parser *p, AstSize *res) { + Location start = parser_loc(p); + if (check(p, RightBracket)) { + *res = ast_no_size(p->ctx, span_end(p, start)); + return true; + } + AstNumber size; + if (match(p, Caret)) { + bubble(parse_number(p, &size)); + *res = ast_max_size(p->ctx, span_end(p, start), size); + return true; + } + bubble(parse_number(p, &size)); + *res = ast_fixed_size(p->ctx, span_end(p, start), size); + return true; +} + +static bool parse_type(Parser *p, AstType *res) { + bubble(parse_ident(p, &res->ident)); + + Location start = parser_loc(p); + TokenType next = peek(p).type; + while (next == Ampersand || next == LeftBracket) { + AstType *type = arena_alloc(&p->ctx.alloc, sizeof(AstType)); + *type = *res; + AstSize size; + bool heap = match(p, Ampersand); + bubble(consume(p, LeftBracket, NULL)); + bubble(parse_size(p, &size)); + bubble(consume(p, RightBracket, NULL)); + if (heap || size.tag == ATNoSize) { + res->array = ast_heap_array(p->ctx, span_end(p, start), type, size); + } else { + res->array = ast_field_array(p->ctx, span_end(p, start), type, size); + } + next = peek(p).type; + } + return true; +} + +static bool parse_field(Parser *p, AstField *res) { + Token name; + AstType type; + Location start = parser_loc(p); + bubble(consume(p, Ident, &name)); + bubble(consume(p, Colon, NULL)); + bubble(parse_type(p, &type)); + *res = ast_field(p->ctx, span_end(p, start), name, type); + return true; +} + +static bool parse_message(Parser *p, AstMessage *res) { + Token name; + AstFieldVec fields = vec_init(); + Location start = parser_loc(p); + bubble(consume(p, Ident, &name)); + bubble(consume(p, LeftBrace, NULL)); + + AstField f; + do { + if (check(p, RightBrace)) { + break; + } + if (parse_field(p, &f)) { + vec_push(&fields, f); + } else { + skip_until(p, Comma | Ident | RightBrace); + } + } while (match(p, Comma)); + bubble(consume(p, RightBrace, NULL)); + *res = ast_message(p->ctx, span_end(p, start), name, fields); + return true; +} + +static bool parse_attribute(Parser *p, AstAttribute *res) { + Token ident; + Location start = parser_loc(p); + bubble(consume(p, Hash, NULL)); + bubble(consume(p, LeftBracket, NULL)); + bubble(consume(p, Ident, &ident)); + bubble(consume(p, RightBracket, NULL)); + *res = ast_attribute(p->ctx, span_end(p, start), ident); + return true; +} + +static bool parse_attribute_or_message(Parser *p, AstAttributeOrMessage *res) { + if (check(p, Hash)) { + return parse_attribute(p, &res->attribute); + } else if (check(p, Ident)) { + return parse_message(p, &res->message); + } else { + vec_push(&p->errors, err_expected(Hash | Ident, peek(p).span)); + return false; + } +} + +static bool parse_version(Parser *p, AstVersion *res) { + AstNumber ver; + Location start = parser_loc(p); + bubble(consume(p, Version, NULL)); + bubble(consume(p, LeftParen, NULL)); + bubble(parse_number(p, &ver)); + bubble(consume(p, RightParen, NULL)); + bubble(consume(p, Semicolon, NULL)); + *res = ast_version(p->ctx, span_end(p, start), ver); + return true; +} + +static bool parse_struct(Parser *p, AstStruct *res) { + Token name; + AstFieldVec fields = vec_init(); + Location start = parser_loc(p); + bubble(consume(p, Struct, NULL)); + bubble(consume(p, Ident, &name)); + bubble(consume(p, LeftBrace, NULL)); + + AstField f; + do { + if (check(p, RightBrace)) { + break; + } + if (parse_field(p, &f)) { + vec_push(&fields, f); + } else { + skip_until(p, Comma | Ident | RightBrace); + } + } while (match(p, Comma)); + bubble(consume(p, RightBrace, NULL)); + *res = ast_struct(p->ctx, span_end(p, start), name, fields); + return true; +} + +static bool parse_type_decl(Parser *p, AstTypeDecl *res) { + Token name; + AstType type; + Location start = parser_loc(p); + bubble(consume(p, Type, NULL)); + bubble(consume(p, Ident, &name)); + bubble(consume(p, Equal, NULL)); + bubble(parse_type(p, &type)); + bubble(consume(p, Semicolon, NULL)); + *res = ast_type_decl(p->ctx, span_end(p, start), name, type); + return true; +} + +static bool parse_messages(Parser *p, AstMessages *res) { + AstAttributeOrMessageVec children = vec_init(); + AstAttributeOrMessage child; + Token name; + Location start = parser_loc(p); + bubble(consume(p, Messages, NULL)); + bubble(consume(p, Ident, &name)); + bubble(consume(p, LeftBrace, NULL)); + while (!match(p, RightBrace)) { + if (parse_attribute_or_message(p, &child)) { + vec_push(&children, child); + } else { + skip_until(p, RightBrace | Hash | Ident); + } + } + *res = ast_messages(p->ctx, span_end(p, start), name, children); + return true; +} + +static bool parse_constant(Parser *p, AstConstant *res) { + Token name; + AstNumber value; + Location start = parser_loc(p); + bubble(consume(p, Const, NULL)); + bubble(consume(p, Ident, &name)); + bubble(consume(p, Equal, NULL)); + bubble(parse_number(p, &value)); + bubble(consume(p, Semicolon, NULL)); + *res = ast_constant(p->ctx, span_end(p, start), name, value); + return true; +} + +static bool parse_item(Parser *p, AstItem *res) { + switch (peek(p).type) { + case Version: + return parse_version(p, &res->version); + case Struct: + return parse_struct(p, &res->struct_); + case Type: + return parse_type_decl(p, &res->type_decl); + case Messages: + return parse_messages(p, &res->messages); + case Const: + return parse_constant(p, &res->constant); + default: + // TODO: error handling + advance(p); + return false; + } +} + +static bool parse_items(Parser *p, AstItems *res) { + AstItemVec items = vec_init(); + AstItem item; + Location start = parser_loc(p); + while (!check(p, Eof)) { + if (parse_item(p, &item)) { + vec_push(&items, item); + } else { + skip_until(p, Version | Struct | Type | Messages | Const); + } + } + *res = ast_items(p->ctx, span_end(p, start), items); + return true; +} + +ParsingResult parse(TokenVec vec) { + Parser p = parser_init(vec); + AstNode *items = arena_alloc(&p.ctx.alloc, sizeof(AstNode)); + parse_items(&p, &items->items); + p.ctx.root = items; + return (ParsingResult){.ctx = p.ctx, .errors = p.errors}; +} + +void parsing_error_report(Source *src, ParsingError *err) { + ReportSpan span = {.span = err->span, .sev = ReportSeverityError}; +#define report(fmt, ...) source_report(src, err->span.loc, ReportSeverityError, &span, 1, NULL, fmt, __VA_ARGS__); + switch (err->type) { + case ParsingErrorUnexpectedToken: { + char *type = token_type_string(err->data.type); + span.message = msprintf("expected %s", type); + report("Expected %s, found '%.*s'", type, err->span.len, &src->str[err->span.loc.offset]); + free((char *)span.message); + free(type); + break; + } + default: + break; + } +#undef report +} diff --git a/ser/parser.h b/ser/parser.h new file mode 100644 index 0000000..c469d58 --- /dev/null +++ b/ser/parser.h @@ -0,0 +1,35 @@ +#ifndef PARSER_H +#define PARSER_H +#include "ast.h" +#include "lexer.h" +#include "source.h" +#include "vector.h" +#include "vector_impl.h" + +typedef union { + TokenType type; +} ParsingErrorData; + +typedef enum { + ParsingErrorNoError, + ParsingErrorUnexpectedToken, +} ParsingErrorType; + +typedef struct { + Span span; + ParsingErrorType type; + ParsingErrorData data; +} ParsingError; + +VECTOR_IMPL(ParsingError, ParsingErrorVec, parsing_error); + +typedef struct { + AstContext ctx; + ParsingErrorVec errors; +} ParsingResult; + +ParsingResult parse(TokenVec vec); + +void parsing_error_report(Source *src, ParsingError *err); + +#endif diff --git a/ser/source.c b/ser/source.c new file mode 100644 index 0000000..1c296e7 --- /dev/null +++ b/ser/source.c @@ -0,0 +1,297 @@ +#include "source.h" + +#include "assert.h" +#include "vector.h" + +#include +#include +#include +#include + +uint32_t sss_hash(Hasher state, const void *v) { + SpannedStringSlice *sss = (SpannedStringSlice *)v; + return string_slice_hash(state, &sss->slice); +} +bool sss_equal(const void *a, const void *b) { + SpannedStringSlice *sa = (SpannedStringSlice *)a; + SpannedStringSlice *sb = (SpannedStringSlice *)b; + return string_slice_equal(sa, sb); +} + +Source source_init(const char *str, uint32_t len) { + char *ptr = malloc(len + 1); + assert_alloc(ptr); + strncpy(ptr, str, len); + ptr[len] = '\0'; + // Will initlalize ref_count to 0 in DEBUG mode as well + return (Source){.str = ptr, .len = len, .path = NULL}; +} +SourceError source_from_file(FILE *f, Source *src) { + fseek(f, 0, SEEK_END); + uint64_t len = ftell(f); + fseek(f, 0, SEEK_SET); + char *ptr = malloc(len + 1); + + if (fread(ptr, 1, len, f) != len) { + return SourceErrorReadFailed; + } + + IF_DEBUG(src->ref_count = 0); + src->str = ptr; + src->len = len; + src->path = NULL; + return SourceErrorNoError; +} +SourceError source_open(const char *path, Source *src) { + FILE *f = fopen(path, "r"); + if (f == NULL) { + return SourceErrorOpenFailed; + } + + SourceError err = source_from_file(f, src); + fclose(f); + + if (err == SourceErrorNoError) { + char *p = strdup(path); + assert_alloc(p); + src->path = p; + } + + return err; +} +void source_drop(Source src) { + IF_DEBUG({ + if (src.ref_count > 0) { + log_error("Trying to destroy currently used source, leaking instead"); + return; + } + }); + if (src.path != NULL) { + free((char *)src.path); + } + free((char *)src.str); +} + +int span_compare(const void *sa, const void *sb) { + Span *a = (Span *)sa; + Span *b = (Span *)sb; + int line = a->loc.line - b->loc.line; + if (line != 0) + return line; + int column = b->loc.column - a->loc.column; + if (column != 0) + return column; + return a->len - b->len; +} + +static int report_span_compare(const void *va, const void *vb) { + ReportSpan *a = (ReportSpan *)va; + ReportSpan *b = (ReportSpan *)vb; + return span_compare(&a->span, &b->span); +} + +void source_report( + const Source *src, + Location loc, + ReportSeverity sev, + const ReportSpan *pspans, + uint32_t span_count, + const char *help, + const char *fmt, + ... +) { + va_list args; + va_start(args, fmt); + int len = vsnprintf(NULL, 0, fmt, args); + va_end(args); + char *message = malloc(len + 1); + assert_alloc(message); + va_start(args, fmt); + vsnprintf(message, len + 1, fmt, args); + va_end(args); + + ReportSpanVec spans = vec_init(); + vec_push_array(&spans, pspans, span_count); + qsort(spans.data, spans.len, sizeof(ReportSpan), report_span_compare); + + const char *s; + switch (sev) { + case ReportSeverityError: + s = "\033[91merror"; + break; + case ReportSeverityWarning: + s = "\033[93mwarning"; + break; + case ReportSeverityNote: + s = "\033[92mnote"; + break; + default: + s = "?????"; + break; + } + const char *file; + if (src->path == NULL) { + file = ""; + } else { + file = src->path; + } + + uint32_t last_line, first_line; + if (spans.len > 0) { + last_line = spans.data[spans.len - 1].span.loc.line; + first_line = spans.data[0].span.loc.line; + } else { + last_line = loc.line; + first_line = loc.line; + } + + uint32_t pad = floor(log10(last_line)) + 2; + + fprintf( + stderr, + "\033[1m%s\033[0;1m: %s\n%*s\033[94m--> \033[0m%s:%d:%d\n%*s\033[1;94m|\n", + s, + message, + pad - 1, + "", + file, + loc.line, + loc.column, + pad, + "" + ); + + free(message); + + // The line of the span + StyledString line_str = styled_string_init(); + // Extra lines used when no more space in the sub + StyledStringVec sub_strs = vec_init(); + uint32_t line_length; + uint32_t offset; + + last_line = first_line - 1; + for (uint32_t i = 0; i < spans.len; i++) { + ReportSpan rspan = spans.data[i]; + Span span = rspan.span; + + offset = span.loc.offset - span.loc.column; + uint32_t line_end_off = offset; + while (line_end_off < src->len && src->str[line_end_off] != '\n') { + line_end_off++; + } + + uint32_t line_delta = span.loc.line - last_line; + + line_length = line_end_off - offset; + last_line = span.loc.line; + + styled_string_clear(&line_str); + vec_clear(&sub_strs); + vec_push(&sub_strs, styled_string_init()); + styled_string_set(&line_str, 0, NULL, &src->str[offset], line_length); + + while (i < spans.len && spans.data[i].span.loc.line == last_line) { + ReportSpan rspan = spans.data[i]; + Span span = rspan.span; + ReportSeverity span_sev = rspan.sev; + + const char *sev_style = "\033[1;97m"; + char underline = ' '; + switch (span_sev) { + case ReportSeverityError: + sev_style = "\033[1;91m"; + underline = '^'; + break; + case ReportSeverityWarning: + sev_style = "\033[1;93m"; + underline = '^'; + break; + case ReportSeverityNote: + sev_style = "\033[1;94m"; + underline = '-'; + break; + } + + styled_string_set_style(&line_str, span.loc.column, sev_style, span.len); + styled_string_set_style(sub_strs.data, span.loc.column, sev_style, span.len); + styled_string_fill(&sub_strs.data[0], span.loc.column, underline, span.len); + + // Not a loop, but I want break; + while (rspan.message != NULL) { + int mlen = strlen(rspan.message); + size_t index = span.loc.column + span.len + 1; + if (styled_string_available_space(&sub_strs.data[0], index, mlen + 1) > mlen) { + styled_string_set(&sub_strs.data[0], index, sev_style, rspan.message, mlen); + // We got the message in + break; + } + + index = span.loc.column; + + // We never put any message on the second sub string, so it needs to exist if we put one on the third + if (sub_strs.len == 1) { + vec_push(&sub_strs, styled_string_init()); + } + + // Start looking at the third sub line + size_t line = 2; + while (true) { + // The line doesn't exist yet: it is available + if (line >= sub_strs.len) { + vec_push(&sub_strs, styled_string_init()); + break; + } + // Check if the subline is ok + if (styled_string_available_space(&sub_strs.data[line], index, mlen + 1) > mlen) { + break; + } + // We couldn't find any space, continue on the next line. + line++; + } + + for (size_t l = 1; l < line; l++) { + styled_string_set(&sub_strs.data[l], index, sev_style, "|", 1); + } + + styled_string_set(&sub_strs.data[line], index, sev_style, rspan.message, mlen); + break; + } + + i++; + } + // We exited the loop, i points to a span on the next line or to the end of spans + // Se decrement it because it'll get reincremented by the outer for loop + i--; + + // Print elipsies if we skipped more than a line + if (line_delta > 2) { + fprintf(stderr, "\033[1;94m...\n"); + } else if (line_delta > 1) { + uint32_t off_end = offset - 1; + uint32_t off_start = off_end; + while (src->str[off_start - 1] != '\n' && off_start > 0) { + off_start--; + } + uint32_t len = off_end - off_start; + fprintf(stderr, "\033[1;94m%*d |\033[0m %.*s\n", pad - 1, last_line - 1, len, &src->str[off_start]); + } + + char *line = styled_string_build(&line_str); + fprintf(stderr, "\033[1;94m%*d |\033[0m %s\n", pad - 1, last_line, line); + free(line); + for (size_t i = 0; i < sub_strs.len; i++) { + line = styled_string_build(&sub_strs.data[i]); + fprintf(stderr, "%*s\033[1;94m|\033[0m %s\n", pad, "", line); + free(line); + } + } + + styled_string_drop(line_str); + vec_drop(sub_strs); + vec_drop(spans); + + if (help != NULL) { + fprintf(stderr, "\033[1;96mhelp\033[0m: %s\n", help); + } +} diff --git a/ser/source.h b/ser/source.h new file mode 100644 index 0000000..05a921e --- /dev/null +++ b/ser/source.h @@ -0,0 +1,91 @@ +#ifndef SOURCE_H +#define SOURCE_H +#include "utils.h" +#include "vector_impl.h" + +#include +#include + +typedef struct { + uint32_t line; + uint32_t column; + uint32_t offset; +} Location; + +typedef struct { + Location loc; + uint32_t len; +} Span; + +typedef struct { + StringSlice slice; + Span span; +} SpannedStringSlice; + +int span_compare(const void *sa, const void *sb); + +bool sss_equal(const void *a, const void *b); +uint32_t sss_hash(Hasher state, const void *v); + +VECTOR_IMPL(Span, SpanVec, span); +VECTOR_IMPL(SpannedStringSlice, SpannedStringSliceVec, spanned_string_slice); + +typedef struct { + // The string content + const char *str; + // Path of the source file if available + const char *path; + uint32_t len; + IF_DEBUG(uint32_t ref_count;) +} Source; + +typedef enum : uint32_t { + SourceErrorNoError = 0, + SourceErrorReadFailed = 1, + SourceErrorOpenFailed = 2, +} SourceError; + +typedef enum { + ReportSeverityError, + ReportSeverityWarning, + ReportSeverityNote, +} ReportSeverity; + +typedef struct { + Span span; + ReportSeverity sev; + const char *message; +} ReportSpan; + +VECTOR_IMPL(ReportSpan, ReportSpanVec, report_span); + +static inline __attribute__((always_inline)) Location location(uint32_t line, uint32_t column, uint32_t offset) { + return (Location){.line = line, .column = column, .offset = offset}; +} + +// Initialize source from a string and its length (without null terminator), the string will be copied. +Source source_init(const char *str, uint32_t len); +// Try to initialize source from a FILE* +SourceError source_from_file(FILE *f, Source *src); +// Try to initialize source +SourceError source_open(const char *path, Source *src); +// Destroy source +void source_drop(Source src); +void source_report( + const Source *src, + Location loc, + ReportSeverity sev, + const ReportSpan *pspans, + uint32_t span_count, + const char *help, + const char *fmt, + ... +); + +static inline Span span_from_to(Location from, Location to) { + return (Span){ + .loc = from, + .len = to.offset - from.offset, + }; +} +#endif diff --git a/ser/utils.c b/ser/utils.c new file mode 100644 index 0000000..640ec2f --- /dev/null +++ b/ser/utils.c @@ -0,0 +1,147 @@ +#include "utils.h" + +#include "vector.h" + +#include +#include + +bool string_slice_equal(const void *_a, const void *_b) { + const StringSlice *a = (StringSlice *)_a; + const StringSlice *b = (StringSlice *)_b; + if (a->len != b->len) { + return false; + } + uint32_t len = a->len < b->len ? a->len : b->len; + return strncmp(a->ptr, b->ptr, len) == 0; +} + +uint32_t string_slice_hash(Hasher state, const void *_item) { + const StringSlice *item = (StringSlice *)_item; + return hash(state, (byte *)item->ptr, item->len); +} + +bool pointer_equal(const void *_a, const void *_b) { + const void *a = *(void **)_a; + const void *b = *(void **)_b; + return a == b; +} +uint32_t pointer_hash(Hasher state, const void *item) { return hash(state, (byte *)item, sizeof(void *)); } + +StyledString styled_string_init() { + return (StyledString){ + .chars = vec_init(), + .styles = vec_init(), + }; +} + +char *msprintf(const char *fmt, ...) { + va_list args; + va_start(args, fmt); + int len = vsnprintf(NULL, 0, fmt, args); + va_end(args); + char *res = malloc(len + 1); + assert_alloc(res); + va_start(args, fmt); + vsnprintf(res, len + 1, fmt, args); + va_end(args); + return res; +} + +void styled_string_set(StyledString *str, size_t index, const char *style, const char *s, size_t len) { + if (index > str->chars.len) { + vec_fill_range(&str->chars, str->chars.len, index, ' '); + vec_fill_range(&str->styles, str->styles.len, index, NULL); + } + vec_set_array(&str->chars, index, s, len); + vec_fill_range(&str->styles, index, index + len, NULL); + str->styles.data[index] = style; + // Reset the style if there are characters after + if (style != NULL && str->chars.len > index + len) { + str->styles.data[index + len] = "\033[0m"; + } +} + +void styled_string_set_style(StyledString *str, size_t index, const char *style, size_t len) { + if (index > str->chars.len) { + vec_fill_range(&str->chars, str->chars.len, index, ' '); + vec_fill_range(&str->styles, str->styles.len, index, NULL); + } + if (index + len > str->chars.len) { + vec_fill_range(&str->chars, str->chars.len, index + len, ' '); + } + vec_fill_range(&str->styles, index, index + len, NULL); + str->styles.data[index] = style; + // Reset the style if there are characters after + if (style != NULL && str->chars.len > index + len && str->styles.data[index + len] == NULL) { + str->styles.data[index + len] = "\033[0m"; + } +} + +void styled_string_clear(StyledString *str) { + vec_clear(&str->chars); + vec_clear(&str->styles); +} + +void styled_string_fill(StyledString *str, size_t index, char fill, size_t len) { + vec_fill_range(&str->chars, index, index + len, fill); +} + +void styled_string_push(StyledString *str, const char *style, const char *s) { + size_t len = strlen(s); + vec_push_array(&str->chars, s, len); + size_t index = str->styles.len; + vec_fill_range(&str->styles, index, str->chars.len, NULL); + str->styles.data[index] = style; +} + +char *styled_string_build(StyledString *str) { + CharVec res = vec_init(); + vec_grow(&res, str->chars.len + 1); + for (size_t i = 0; i < str->chars.len; i++) { + const char *style = str->styles.data[i]; + if (style != NULL) { + int len = strlen(style); + vec_push_array(&res, style, len); + } + vec_push(&res, str->chars.data[i]); + } + vec_push_array(&res, "\033[0m\0", 5); + return res.data; +} + +size_t styled_string_available_space(StyledString *str, size_t from, size_t stop_at) { + // We always have more space past the end of the string + if (from >= str->chars.len) + return stop_at; + size_t space = 0; + char *c = &str->chars.data[from]; + char *end = &str->chars.data[str->chars.len]; + while (space < stop_at && c < end && *c == ' ') { + space++; + c++; // Blasphemy + } + if (c == end || space == stop_at) { + // We either found enough space, or we got the end of the string (infinite space) + return stop_at; + } + return space; +} + +void styled_string_drop(StyledString str) { + vec_drop(str.styles); + vec_drop(str.chars); +} + +void charvec_push_str(CharVec *v, const char *str) { vec_push_array(v, str, strlen(str)); } + +void charvec_format(CharVec *v, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + int len = vsnprintf(NULL, 0, fmt, args); + va_end(args); + vec_grow(v, v->len + len + 1); + va_start(args, fmt); + vsnprintf(&v->data[v->len], len + 1, fmt, args); + va_end(args); + v->len += len; +} diff --git a/ser/utils.h b/ser/utils.h new file mode 100644 index 0000000..baaca65 --- /dev/null +++ b/ser/utils.h @@ -0,0 +1,60 @@ +#ifndef UTILS_H +#define UTILS_H + +#ifdef NDEBUG +#define IF_DEBUG(...) +#else +#define IF_DEBUG(...) __VA_ARGS__ +#endif + +#include +#include + +typedef unsigned char byte; + +#include "hashmap.h" +#include "vector_impl.h" + +#define STRING_SLICE(str) ((StringSlice){.ptr = str, .len = sizeof(str) - 1}) + +typedef struct { + const char *ptr; + uint32_t len; +} StringSlice; + +bool string_slice_equal(const void *a, const void *b); +uint32_t string_slice_hash(Hasher state, const void *item); + +bool pointer_equal(const void *a, const void *b); +uint32_t pointer_hash(Hasher state, const void *item); + +VECTOR_IMPL(void *, PointerVec, pointer); +VECTOR_IMPL(const char *, ConstStringVec, const_string); +VECTOR_IMPL(char, CharVec, char); +VECTOR_IMPL(uint64_t, UInt64Vec, uint64); +VECTOR_IMPL(CharVec, CharVec2, char_vec, _vec_char_drop); +VECTOR_IMPL(StringSlice, StringSliceVec, string_slice); + +// Styled strings are very mutable strings +typedef struct { + CharVec chars; + ConstStringVec styles; +} StyledString; + +StyledString styled_string_init(); +void styled_string_clear(StyledString *str); +void styled_string_set(StyledString *str, size_t index, const char *style, const char *s, size_t len); +void styled_string_set_style(StyledString *str, size_t index, const char *style, size_t len); +void styled_string_fill(StyledString *str, size_t index, char fill, size_t len); +size_t styled_string_available_space(StyledString *str, size_t from, size_t stop_at); +void styled_string_push(StyledString *str, const char *style, const char *s); +char *styled_string_build(StyledString *str); +void styled_string_drop(StyledString str); +// Printf to an allocated string +char *msprintf(const char *fmt, ...); +void charvec_push_str(CharVec *v, const char *str); +void charvec_format(CharVec *v, const char *fmt, ...); + +VECTOR_IMPL(StyledString, StyledStringVec, styled_string, styled_string_drop); + +#endif diff --git a/ser/vector.h b/ser/vector.h new file mode 100644 index 0000000..cda21e2 --- /dev/null +++ b/ser/vector.h @@ -0,0 +1,30 @@ +#ifndef VECTOR_H +#define VECTOR_H +#include "arena_allocator.h" +#include "ast.h" +#include "codegen.h" +#include "eval.h" +#include "lexer.h" +#include "parser.h" +#include "source.h" +#include "utils.h" + +// This files contains the generic vector macro, which are generated according the VECTOR_IMPL_LIST + +// clang-format: off +#define VECTOR_IMPL_LIST \ + (Token, TokenVec, token, token_drop), (LexingError, LexingErrorVec, lexing_error, lexing_error_drop), \ + (AstItem, AstItemVec, ast_item), (AstField, AstFieldVec, ast_field), \ + (AstAttributeOrMessage, AstAttributeOrMessageVec, ast_attribute_or_message), \ + (ArenaBlock, ArenaBlockVec, arena_block, arena_block_drop), (ParsingError, ParsingErrorVec, parsing_error), \ + (Field, FieldVec, field, field_drop), (EvalError, EvalErrorVec, eval_error), \ + (const char *, ConstStringVec, const_string), (StringSlice, StringSliceVec, string_slice), (char, CharVec, char), \ + (CharVec, CharVec2, char_vec, _vec_char_drop), (ReportSpan, ReportSpanVec, report_span), \ + (StyledString, StyledStringVec, styled_string, styled_string_drop), (Span, SpanVec, span), \ + (void *, PointerVec, pointer), (SpannedStringSlice, SpannedStringSliceVec, spanned_string_slice), \ + (MessageObject, MessageObjectVec, message_object, message_drop), \ + (MessagesObject, MessagesObjectVec, messages_object, messages_drop), (uint64_t, UInt64Vec, uint64), \ + (FieldAccessor, FieldAccessorVec, field_accessor, field_accessor_drop) +#include "vector_impl.h" +// clang-format: on +#endif diff --git a/ser/vector_impl.h b/ser/vector_impl.h new file mode 100644 index 0000000..1b849e9 --- /dev/null +++ b/ser/vector_impl.h @@ -0,0 +1,226 @@ +#ifndef VECTOR_IMPL_H +#define VECTOR_IMPL_H +#include "assert.h" +#include "macro_utils.h" + +#include +#include +#include +#include + +#define _VECTOR_MAP_ADD(m, a, fst, ...) m(a, fst) __VA_OPT__(DEFER1(_VECTOR__MAP_ADD)()(m, a, __VA_ARGS__)) +#define _VECTOR__MAP_ADD() _VECTOR_MAP_ADD + +#define VECTOR_IMPL(T, V, qualifier, ...) \ + typedef struct { \ + T *data; \ + size_t len; \ + size_t cap; \ + } V; \ + __attribute__((unused)) static inline V _vec_##qualifier##_init() { return (V){.data = NULL, .len = 0, .cap = 0}; } \ + __attribute__((unused)) static inline void _vec_##qualifier##_drop(V vec) { \ + __VA_OPT__({ \ + for (size_t i = 0; i < vec.len; i++) { \ + __VA_ARGS__(vec.data[i]); \ + } \ + }) \ + if (vec.data != NULL) { \ + free(vec.data); \ + } \ + } \ + __attribute__((unused)) static inline void _vec_##qualifier##_grow(V *vec, size_t cap) { \ + if (cap <= vec->cap) \ + return; \ + if (vec->data == NULL || vec->cap == 0) { \ + vec->data = malloc(cap * sizeof(T)); \ + assert_alloc(vec->data); \ + vec->cap = cap; \ + return; \ + } \ + if (cap < 2 * vec->cap) { \ + cap = 2 * vec->cap; \ + } \ + if (cap < 4) { \ + cap = 4; \ + } \ + T *newp = realloc(vec->data, cap * sizeof(T)); \ + assert_alloc(newp); \ + vec->data = newp; \ + vec->cap = cap; \ + } \ + __attribute__((unused)) static inline V _vec_##qualifier##_init_with_cap(size_t cap) { \ + V vec = {.data = NULL, .len = 0, .cap = 0}; \ + _vec_##qualifier##_grow(&vec, cap); \ + return vec; \ + } \ + __attribute__((unused)) static inline void _vec_##qualifier##_shrink(V *vec) { \ + if (vec->len > 0) { \ + T *newp = realloc(vec->data, vec->len); \ + assert_alloc(newp); \ + vec->data = newp; \ + vec->cap = vec->len; \ + } else { \ + free(vec->data); \ + vec->data = NULL; \ + vec->cap = 0; \ + } \ + } \ + __attribute__((unused)) static inline void _vec_##qualifier##_push(V *vec, T val) { \ + _vec_##qualifier##_grow(vec, vec->len + 1); \ + vec->data[vec->len++] = val; \ + } \ + __attribute__((unused)) static inline void _vec_##qualifier##_push_array(V *vec, T const *vals, size_t count) { \ + _vec_##qualifier##_grow(vec, vec->len + count); \ + for (size_t i = 0; i < count; i++) { \ + vec->data[vec->len++] = vals[i]; \ + } \ + } \ + __attribute__((unused)) static inline V _vec_##qualifier##_clone(V *vec) { \ + if (vec->len == 0) { \ + return (V){.data = NULL, .len = 0, .cap = 0}; \ + } \ + V res = {.data = NULL, .len = 0, .cap = 0}; \ + _vec_##qualifier##_grow(&res, vec->len); \ + _vec_##qualifier##_push_array(&res, vec->data, vec->len); \ + return res; \ + } \ + __attribute__((unused)) static inline bool _vec_##qualifier##_pop_opt(V *vec, T *val) { \ + if (vec->len == 0) \ + return false; \ + vec->len--; \ + if (val != NULL) { \ + *val = vec->data[vec->len]; \ + } \ + __VA_OPT__(else { __VA_ARGS__(vec->data[vec->len]); }) \ + return true; \ + } \ + __attribute__((unused)) static inline T _vec_##qualifier##_pop(V *vec) { \ + debug_assert(vec->len > 0, "Popping zero length %s", #V); \ + return vec->data[--vec->len]; \ + } \ + __attribute__((unused)) static inline void _vec_##qualifier##_clear(V *vec) { \ + __VA_OPT__({ \ + for (size_t i = 0; i < vec->len; i++) { \ + __VA_ARGS__(vec->data[i]); \ + } \ + }) \ + vec->len = 0; \ + } \ + __attribute__((unused)) static inline T _vec_##qualifier##_get(V *vec, size_t index) { \ + debug_assert(index < vec->len, "Out of bound index, on %s (index is %lu, but length is %lu)", #V, index, vec->len); \ + return vec->data[index]; \ + } \ + __attribute__((unused)) static inline bool _vec_##qualifier##_get_opt(V *vec, size_t index, T *val) { \ + if (index >= vec->len) { \ + return false; \ + } else if (val != NULL) { \ + *val = vec->data[index]; \ + } \ + return true; \ + } \ + __attribute__((unused)) static inline T _vec_##qualifier##_take(V *vec, size_t index) { \ + debug_assert(index < vec->len, "Out of bound index, on %s (index is %lu but length is %lu)", #V, index, vec->len); \ + T res = vec->data[index]; \ + if (index != vec->len - 1) \ + memmove(&vec->data[index], &vec->data[index + 1], (vec->len - index) * sizeof(T)); \ + vec->len--; \ + return res; \ + } \ + __attribute__((unused)) static inline void _vec_##qualifier##_fill_range(V *vec, size_t from, size_t to, T item) { \ + debug_assert(from <= vec->len, "Can't start fill past the end of a vector"); \ + _vec_##qualifier##_grow(vec, to); \ + for (size_t i = from; i < to; i++) { \ + vec->data[i] = item; \ + } \ + vec->len = vec->len > to ? vec->len : to; \ + } \ + __attribute__((unused)) static inline void _vec_##qualifier##_fill(V *vec, T item) { \ + _vec_##qualifier##_fill_range(vec, 0, vec->len, item); \ + } \ + __attribute__((unused)) static inline void _vec_##qualifier##_insert(V *vec, size_t index, T val) { \ + debug_assert(index <= vec->len, "Can't insert past the end of vector"); \ + _vec_##qualifier##_grow(vec, vec->len + 1); \ + if (index < vec->len) { \ + memmove(&vec->data[index + 1], &vec->data[index], (vec->len - index) * sizeof(T)); \ + } \ + vec->data[index] = val; \ + vec->len++; \ + } \ + __attribute__((unused) \ + ) static inline void _vec_##qualifier##_insert_array(V *vec, size_t index, T const *vals, size_t count) { \ + debug_assert(index <= vec->len, "Can't insert past the end of vector"); \ + _vec_##qualifier##_grow(vec, vec->len + count); \ + if (index < vec->len) { \ + memmove(&vec->data[index + count], &vec->data[index], (vec->len - index) * sizeof(T)); \ + } \ + for (size_t i = 0; i < count; i++) { \ + vec->data[index + i] = vals[i]; \ + } \ + vec->len += count; \ + } \ + __attribute__((unused)) static inline void _vec_##qualifier##_set_array(V *vec, size_t index, T const *vals, size_t count) { \ + debug_assert(index <= vec->len, "Can't start set past the end of vector"); \ + _vec_##qualifier##_grow(vec, index + count); \ + for (size_t i = 0; i < count; i++) { \ + vec->data[index + i] = vals[i]; \ + } \ + vec->len = vec->len > (index + count) ? vec->len : (index + count); \ + } \ + __attribute__((unused)) static inline void _vec_##qualifier##_splice(V *vec, size_t index, size_t count) { \ + debug_assert(index < vec->len, "Can't splice past end of vector"); \ + if (count == 0) \ + return; \ + if (index + count < vec->len) { \ + memmove(&vec->data[index], &vec->data[index + count], (vec->len - index - count) * sizeof(T)); \ + } \ + vec->len -= count; \ + } \ + _Static_assert(1, "Semicolon required") + +typedef struct { + void *data; + size_t len; + size_t cap; +} AnyVec; + +#endif + +#ifdef VECTOR_IMPL_LIST + +#define _VECTOR_GEN(a, x) DEFER1(_VECTOR__GEN)(a, _VECTOR__GEN_CLOSE x +#define _VECTOR__GEN_CLOSE(a, b, c, ...) a, b, c __VA_OPT__(,) __VA_ARGS__) +#define _VECTOR__GEN(x, _, V, qualifier, ...) \ + V: \ + _vec_##qualifier##x, + +#define _VECTOR_GENERIC(expr, x) _Generic(expr, EVAL(CALL(_VECTOR_MAP_ADD, _VECTOR_GEN, _##x, VECTOR_IMPL_LIST)) AnyVec: (void)0) + +#define vec_init() \ + { .data = NULL, .len = 0, .cap = 0 } +#define vec_drop(vec) _VECTOR_GENERIC(vec, drop)(vec) +#define vec_grow(vec, len) _VECTOR_GENERIC(*(vec), grow)(vec, len) +#define vec_shrink(vec) _VECTOR_GENERIC(*(vec), shrink)(vec) +#define vec_push(vec, val) _VECTOR_GENERIC(*(vec), push)(vec, val) +#define vec_push_array(vec, vals, count) _VECTOR_GENERIC(*(vec), push_array)(vec, vals, count) +#define vec_clone(vec) _VECTOR_GENERIC(*(vec), clone)(vec) +#define vec_pop_opt(vec, val) _VECTOR_GENERIC(*(vec), pop_opt)(vec, val) +#define vec_pop(vec) _VECTOR_GENERIC(*(vec), pop)(vec) +#define vec_clear(vec) _VECTOR_GENERIC(*(vec), clear)(vec) +#define vec_get(vec, index) _VECTOR_GENERIC(*(vec), get)(vec, index) +#define vec_get_opt(vec, index, val) _VECTOR_GENERIC(*(vec), get_opt)(vec, index, val) +#define vec_take(vec, index) _VECTOR_GENERIC(*(vec), take)(vec, index) +#define vec_fill_range(vec, from, to, item) _VECTOR_GENERIC(*(vec), fill_range)(vec, from, to, item) +#define vec_fill(vec, item) _VECTOR_GENERIC(*(vec), fill)(vec, item) +#define vec_insert(vec, index, val) _VECTOR_GENERIC(*(vec), insert)(vec, index, val) +#define vec_insert_array(vec, index, vals, count) _VECTOR_GENERIC(*(vec), insert_array)(vec, index, vals, count) +#define vec_set_array(vec, index, vals, count) _VECTOR_GENERIC(*(vec), set_array)(vec, index, vals, count) +#define vec_splice(vec, index, count) _VECTOR_GENERIC(*(vec), splice)(vec, index, count) +#define vec_foreach(vec, var, expr) \ + do { \ + for (size_t _i = 0; _i < (vec)->len; _i++) { \ + typeof(*(vec)->data) var = (vec)->data[_i]; \ + expr; \ + } \ + } while (false) + +#endif