From 265a0221e8c96710b9f4ddfcd04d420ff756083f Mon Sep 17 00:00:00 2001 From: Patrick MARIE Date: Fri, 30 Aug 2024 09:53:32 +0200 Subject: [PATCH] implementing garbage collection (ch26) --- samples/ch26_gc.lox | 21 ++++ src/chunk.zig | 14 ++- src/compile.zig | 7 +- src/constant.zig | 11 +- src/main.zig | 13 +- src/memory.zig | 286 ++++++++++++++++++++++++++++++++++++++++++++ src/native.zig | 4 +- src/object.zig | 4 +- src/table.zig | 17 ++- src/utils.zig | 7 -- src/values.zig | 4 +- src/vm.zig | 54 +++++++-- 12 files changed, 403 insertions(+), 39 deletions(-) create mode 100644 samples/ch26_gc.lox create mode 100644 src/memory.zig diff --git a/samples/ch26_gc.lox b/samples/ch26_gc.lox new file mode 100644 index 0000000..ccd3323 --- /dev/null +++ b/samples/ch26_gc.lox @@ -0,0 +1,21 @@ +fun do_stuff(v) { + var s = v + v; + print s; +} + +fun blah() { + var c = "ha" + "ha"; + + for(var i = 0; i < 100; i = i + 1) { + var d = "ha" + num2str(i); + do_stuff(d); + } + + return "hiii"; +} + +var a = blah(); + +a = "updated"; +// GC here. +print a; \ No newline at end of file diff --git a/src/chunk.zig b/src/chunk.zig index 2ed26b7..26479c6 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -5,12 +5,15 @@ const Allocator = std.mem.Allocator; const Value = @import("./values.zig").Value; const ValueArray = @import("./values.zig").ValueArray; const OpCode = @import("./opcode.zig").OpCode; +const VM = @import("./vm.zig").VM; +const ZloxAllocator = @import("./memory.zig").ZloxAllocator; -const grow_capacity = @import("./utils.zig").grow_capacity; +const constants = @import("./constant.zig"); const utils = @import("./utils.zig"); pub const Chunk = struct { allocator: Allocator, + vm: *VM, count: usize, capacity: usize, @@ -18,10 +21,11 @@ pub const Chunk = struct { lines: []usize, constants: ValueArray, - pub fn new(allocator: Allocator) *Chunk { + pub fn new(allocator: Allocator, vm: *VM) *Chunk { var chunk: *Chunk = allocator.create(Chunk) catch unreachable; chunk.allocator = allocator; + chunk.vm = vm; chunk.count = 0; chunk.capacity = 0; chunk.code = &.{}; @@ -45,7 +49,8 @@ pub const Chunk = struct { pub fn write(self: *Chunk, byte: u8, line: usize) !void { if (self.capacity < self.count + 1) { const old_capacity = self.capacity; - self.capacity = grow_capacity(old_capacity); + self.capacity = ZloxAllocator.grow_capacity(old_capacity); + self.code = try self.allocator.realloc(self.code, self.capacity); self.lines = try self.allocator.realloc(self.lines, self.capacity); } @@ -151,7 +156,10 @@ pub const Chunk = struct { } pub fn add_constant(self: *Chunk, value: Value) !usize { + _ = try self.vm.push(value); try self.constants.write(value); + _ = self.vm.pop(); + return self.constants.count - 1; } }; diff --git a/src/compile.zig b/src/compile.zig index 9831af1..b2e744b 100644 --- a/src/compile.zig +++ b/src/compile.zig @@ -41,7 +41,7 @@ const ParserRule = struct { precedence: Precedence, }; -const Parser = struct { +pub const Parser = struct { compiler: *Compiler, current: ?Token, previous: ?Token, @@ -865,7 +865,7 @@ const FunctionType = enum { Script, }; -const Compiler = struct { +pub const Compiler = struct { enclosing: ?*Compiler, function: *Obj.Function, @@ -925,6 +925,8 @@ pub fn compile(vm: *VM, contents: []const u8) !?*Obj.Function { var scanner = Scanner.init(contents); var parser = Parser.new(vm, &compiler, &scanner); + vm.parser = &parser; + parser.advance(); while (!parser.match(TokenType.EOF)) { @@ -933,6 +935,7 @@ pub fn compile(vm: *VM, contents: []const u8) !?*Obj.Function { const function = try parser.end_parser(); + vm.parser = null; if (!parser.had_error) { return function; } else { diff --git a/src/constant.zig b/src/constant.zig index 68a7740..0f390ff 100644 --- a/src/constant.zig +++ b/src/constant.zig @@ -10,7 +10,14 @@ pub const UINT8_COUNT = UINT8_MAX + 1; pub const FRAMES_MAX = 64; pub const STACK_MAX = (FRAMES_MAX * UINT8_MAX); -pub const DEBUG_PRINT_CODE = true; -pub const DEBUG_TRACE_EXECUTION = true; +pub const DEBUG_PRINT_CODE = false; +pub const DEBUG_TRACE_EXECUTION = false; pub const DEBUG_PRINT_INTERNAL_STRINGS = false; pub const DEBUG_PRINT_GLOBALS = false; + +pub const DEBUG_STRESS_GC = true; +pub const DEBUG_LOG_GC = false; + +pub const USE_CUSTON_ALLOCATOR = true; + +pub const GC_HEAP_GROW_FACTOR = 2; diff --git a/src/main.zig b/src/main.zig index 07179c6..5f7bbd2 100644 --- a/src/main.zig +++ b/src/main.zig @@ -9,6 +9,8 @@ const OpCode = @import("./opcode.zig").OpCode; const VM = @import("./vm.zig").VM; const InterpretResult = @import("./vm.zig").InterpretResult; +const ZloxAllocator = @import("./memory.zig").ZloxAllocator; + // XXX imported to run tests. const Table = @import("./table.zig"); @@ -62,8 +64,15 @@ pub fn main() !void { const args = try std.process.argsAlloc(allocator); defer std.process.argsFree(allocator, args); - var vm = VM.new(allocator); - vm.init_vm(); + var vm = VM.new(); + if (constants.USE_CUSTON_ALLOCATOR) { + var zlox_allocator_generator = ZloxAllocator.init(allocator, &vm); + const z_allocator = zlox_allocator_generator.allocator(); + vm.init_vm(z_allocator); + } else { + vm.init_vm(allocator); + } + defer vm.destroy(); if (args.len == 1) { diff --git a/src/memory.zig b/src/memory.zig new file mode 100644 index 0000000..2d8247d --- /dev/null +++ b/src/memory.zig @@ -0,0 +1,286 @@ +const std = @import("std"); + +const VM = @import("./vm.zig").VM; +const Obj = @import("./object.zig").Obj; +const ObjType = @import("./object.zig").ObjType; +const Table = @import("./table.zig").Table; +const Entry = @import("./table.zig").Entry; +const Value = @import("./values.zig").Value; +const Compiler = @import("./compile.zig").Compiler; +const ValueArray = @import("./values.zig").ValueArray; + +const constants = @import("./constant.zig"); + +pub const ZloxAllocator = struct { + parent_allocator: std.mem.Allocator, + vm: *VM, + bytes_allocated: usize, + next_gc: usize, + current_gc: bool, + + const Self = @This(); + + pub fn init(parent_allocator: std.mem.Allocator, vm: *VM) Self { + return .{ + .parent_allocator = parent_allocator, + .vm = vm, + .bytes_allocated = 0, + .next_gc = 1024, + .current_gc = false, + }; + } + + pub fn deinit(self: *Self) void { + _ = self; + } + + pub fn allocator(self: *Self) std.mem.Allocator { + return .{ + .ptr = self, + .vtable = &.{ + .alloc = alloc, + .resize = resize, + .free = free, + }, + }; + } + + fn alloc(ctx: *anyopaque, len: usize, ptr_align: u8, ret_addr: usize) ?[*]u8 { + const self: *Self = @ptrCast(@alignCast(ctx)); + + const res = self.parent_allocator.rawAlloc(len, ptr_align, ret_addr); + + if (self.bytes_allocated > self.next_gc) { + self.collect_garbage(); + } + + self.bytes_allocated += len; + + if (constants.DEBUG_LOG_GC) { + if (res == null) { + std.debug.print("GC: failed allocing buffer of size {d}\n", .{len}); + } else { + std.debug.print("GC: allocing buffer {*} of size {d}\n", .{ res.?, len }); + } + } + + return res; + } + + fn resize(ctx: *anyopaque, buf: []u8, log2_buf_align: u8, new_len: usize, ret_addr: usize) bool { + const self: *Self = @ptrCast(@alignCast(ctx)); + + if (constants.DEBUG_LOG_GC) { + std.debug.print("GC: resizing buffer {*} from size {d} to size {d}\n", .{ buf, buf.len, new_len }); + } + + self.bytes_allocated += new_len - buf.len; + + if (self.bytes_allocated > self.next_gc or constants.DEBUG_STRESS_GC) { + self.collect_garbage(); + } + + return self.parent_allocator.rawResize(buf, log2_buf_align, new_len, ret_addr); + } + + fn free(ctx: *anyopaque, buf: []u8, log2_buf_align: u8, ret_addr: usize) void { + const self: *Self = @ptrCast(@alignCast(ctx)); + + if (constants.DEBUG_LOG_GC) { + std.debug.print("GC: freeing buffer {*} of size {d} ({d}/{d})\n", .{ &buf, buf.len, self.bytes_allocated, self.next_gc }); + } + + return self.parent_allocator.rawFree(buf, log2_buf_align, ret_addr); + } + + pub fn set_vm(self: *Self, vm: *VM) void { + self.vm = vm; + } + + pub fn grow_capacity(capacity: usize) usize { + if (capacity < 8) { + return 8; + } + return capacity * 2; + } + + pub fn collect_garbage(self: *Self) void { + if (self.current_gc) { + return; + } + if (comptime constants.DEBUG_LOG_GC == true) { + std.debug.print("\nGC: collect_garbage(): begin\n", .{}); + } + self.current_gc = true; + + self.mark_roots(); + self.trace_references(); + self.table_remove_white(&self.vm.strings); + + self.sweep(); + + self.next_gc = self.bytes_allocated * constants.GC_HEAP_GROW_FACTOR; + + if (comptime constants.DEBUG_LOG_GC == true) { + std.debug.print("GC: collect_garbage(): end\n\n", .{}); + } + self.current_gc = false; + } + + pub fn mark_roots(self: *Self) void { + for (0..self.vm.stack_top) |stack_idx| { + self.mark_value(&self.vm.stack[stack_idx]); + } + + for (0..self.vm.frame_count) |frame_idx| { + self.mark_object(&self.vm.frames[frame_idx].closure.obj); + } + + var upvalue = self.vm.open_upvalues; + while (upvalue != null) { + self.mark_object(&upvalue.?.obj); + upvalue = upvalue.?.next; + } + + self.mark_table(&self.vm.globals); + + self.mark_compiler_roots(); + } + + pub fn mark_value(self: *Self, value: *Value) void { + if (value.is_obj()) { + self.mark_object(value.as_obj()); + } + } + + pub fn mark_object(self: *Self, obj: *Obj) void { + if (obj.is_marked) { + return; + } + + if (constants.DEBUG_LOG_GC) { + std.debug.print("GC: mark {*} ", .{obj}); + obj.print(); + std.debug.print("\n", .{}); + } + obj.is_marked = true; + + if (self.vm.gray_capacity < self.vm.gray_count + 1) { + self.vm.gray_capacity = grow_capacity(self.vm.gray_capacity); + self.vm.gray_stack = self.allocator().realloc(self.vm.gray_stack.?, self.vm.gray_capacity) catch { + @panic("failed to realloc gray stack"); + }; + } + + // doing a realloc here will likely recall mark_roots and so on. + self.vm.gray_stack.?[self.vm.gray_count] = obj; + self.vm.gray_count += 1; + } + + pub fn mark_table(self: *Self, table: *Table) void { + for (0..table.capacity) |idx| { + const entry = &table.entries[idx]; + if (entry.key != null) { + self.mark_object(&entry.key.?.obj); + } + + self.mark_value(&entry.value); + } + } + + pub fn mark_compiler_roots(self: *Self) void { + if (self.vm.parser == null) { + return; + } + var compiler: ?*Compiler = self.vm.parser.?.compiler; + + while (compiler != null) { + self.mark_object(&compiler.?.function.obj); + compiler = compiler.?.enclosing; + } + } + + pub fn trace_references(self: *Self) void { + while (self.vm.gray_count > 0) { + self.vm.gray_count -= 1; + const obj: *Obj = self.vm.gray_stack.?[self.vm.gray_count]; + self.blacken_object(obj); + } + } + + pub fn blacken_object(self: *Self, obj: *Obj) void { + if (constants.DEBUG_LOG_GC) { + std.debug.print("GC: {*} blacken ", .{obj}); + obj.print(); + std.debug.print("\n", .{}); + } + switch (obj.kind) { + ObjType.Native, ObjType.String => {}, + ObjType.Upvalue => self.mark_value(&obj.as_upvalue().closed), + ObjType.Function => { + const function: *Obj.Function = obj.as_function(); + if (function.name != null) { + self.mark_object(&function.name.?.obj); + } + + self.mark_array(&function.chunk.constants); + }, + ObjType.Closure => { + const closure: *Obj.Closure = obj.as_closure(); + self.mark_object(&closure.function.obj); + for (0..closure.upvalue_count) |i| { + if (closure.upvalues[i] != null) { + self.mark_object(&closure.upvalues[i].?.obj); + } + } + }, + } + } + + pub fn mark_array(self: *Self, value_array: *ValueArray) void { + for (0..value_array.count) |i| { + self.mark_value(&value_array.values[i]); + } + } + + pub fn table_remove_white(self: *Self, table: *Table) void { + _ = self; + + for (0..table.capacity) |idx| { + const entry: *Entry = &table.entries[idx]; + if (entry.key != null and !entry.key.?.obj.is_marked) { + _ = table.del(entry.key.?); + } + } + } + + pub fn sweep(self: *Self) void { + var previous: ?*Obj = null; + var object: ?*Obj = self.vm.objects; + + while (object != null) { + if (object.?.is_marked) { + object.?.is_marked = false; + previous = object; + object = object.?.next; + } else { + const unreached: *Obj = object.?; + object = object.?.next; + + if (previous != null) { + previous.?.next = object; + } else { + self.vm.objects = object; + } + + if (comptime constants.DEBUG_LOG_GC == true) { + std.debug.print("GC: sweeping {*}: ", .{unreached}); + unreached.print(); + std.debug.print("\n", .{}); + } + + unreached.destroy(); + } + } + } +}; diff --git a/src/native.zig b/src/native.zig index ddce08c..52adcb3 100644 --- a/src/native.zig +++ b/src/native.zig @@ -58,7 +58,7 @@ pub fn num2str(vm: *VM, arg_count: usize, args: []Value) Value { return Value.nil_val(); }; - const result = Obj.String.new(vm.allocator, str); + const result = Obj.String.new(vm, str); - return Value.obj_val(result); + return Value.obj_val(&result.obj); } diff --git a/src/object.zig b/src/object.zig index 19a7da5..e711f5a 100644 --- a/src/object.zig +++ b/src/object.zig @@ -22,6 +22,7 @@ pub const Obj = struct { kind: ObjType, allocator: Allocator, next: ?*Obj, + is_marked: bool, fn new(comptime T: type, vm: *VM, kind: ObjType) *T { const created_obj = vm.allocator.create(T) catch unreachable; @@ -30,6 +31,7 @@ pub const Obj = struct { .kind = kind, .allocator = vm.allocator, .next = vm.objects, + .is_marked = false, }; vm.objects = &created_obj.obj; @@ -79,7 +81,7 @@ pub const Obj = struct { function_obj.arity = 0; function_obj.upvalue_count = 0; - function_obj.chunk = Chunk.new(vm.allocator); + function_obj.chunk = Chunk.new(vm.allocator, vm); function_obj.name = null; return function_obj; diff --git a/src/table.zig b/src/table.zig index fc72980..1b67a09 100644 --- a/src/table.zig +++ b/src/table.zig @@ -6,13 +6,13 @@ const constants = @import("./constant.zig"); const Obj = @import("./object.zig").Obj; const Value = @import("./values.zig").Value; +const ZloxAllocator = @import("./memory.zig").ZloxAllocator; -const grow_capacity = @import("./utils.zig").grow_capacity; const compute_hash = @import("./utils.zig").compute_hash; -const Entry = struct { - key: ?*Obj.String, - value: Value, +pub const Entry = struct { + key: ?*Obj.String = null, + value: Value = Value.nil_val(), }; pub const Table = struct { @@ -43,7 +43,7 @@ pub const Table = struct { const current_capacity: f32 = @floatFromInt(self.capacity); if (current_count > current_capacity * constants.TABLE_MAX_LOAD) { - const capacity = grow_capacity(self.capacity); + const capacity = ZloxAllocator.grow_capacity(self.capacity); self.adjust_capacity(capacity); } @@ -90,11 +90,10 @@ pub const Table = struct { } pub fn adjust_capacity(self: *Table, capacity: usize) void { - var entries = self.allocator.alloc(Entry, capacity) catch unreachable; + const entries = self.allocator.alloc(Entry, capacity) catch unreachable; - for (0..entries.len) |idx| { - entries[idx].key = null; - entries[idx].value = Value.nil_val(); + for (entries) |*e| { + e.* = Entry{}; } self.count = 0; diff --git a/src/utils.zig b/src/utils.zig index 7a3e01b..91f25ba 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -6,13 +6,6 @@ const Token = @import("./scanner.zig").Token; const print_value = @import("./values.zig").print_value; -pub fn grow_capacity(capacity: usize) usize { - if (capacity < 8) { - return 8; - } - return capacity * 2; -} - pub fn simple_instruction(opcode_name: []const u8, offset: usize) usize { debug.print("{s:<16}\n", .{opcode_name}); diff --git a/src/values.zig b/src/values.zig index 4deae4f..65c5cc0 100644 --- a/src/values.zig +++ b/src/values.zig @@ -4,7 +4,7 @@ const debug = std.debug; const Allocator = std.mem.Allocator; const Obj = @import("./object.zig").Obj; -const utils = @import("./utils.zig"); +const ZloxAllocator = @import("./memory.zig").ZloxAllocator; pub const ValueType = enum { Bool, @@ -158,7 +158,7 @@ pub const ValueArray = struct { pub fn write(self: *ValueArray, value: Value) !void { if (self.capacity < self.count + 1) { const old_capacity = self.capacity; - self.capacity = utils.grow_capacity(old_capacity); + self.capacity = ZloxAllocator.grow_capacity(old_capacity); self.values = try self.allocator.realloc(self.values, self.capacity); } diff --git a/src/vm.zig b/src/vm.zig index a966d38..d61e0e7 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -4,11 +4,14 @@ const Allocator = std.mem.Allocator; const constants = @import("./constant.zig"); +const ZloxAllocator = @import("./memory.zig").ZloxAllocator; + const Chunk = @import("./chunk.zig").Chunk; const OpCode = @import("./opcode.zig").OpCode; const Value = @import("./values.zig").Value; const Obj = @import("./object.zig").Obj; const ObjType = @import("./object.zig").ObjType; +const Parser = @import("./compile.zig").Parser; const NativeFn = @import("./object.zig").NativeFn; const Table = @import("./table.zig").Table; @@ -42,25 +45,40 @@ pub const VM = struct { frame_count: usize, open_upvalues: ?*Obj.Upvalue, objects: ?*Obj, + parser: ?*Parser, + gray_count: usize, + gray_capacity: usize, + gray_stack: ?[]*Obj, - pub fn new(allocator: Allocator) VM { - return VM{ - .allocator = allocator, + pub fn new() VM { + const vm = VM{ + .allocator = undefined, .stack = undefined, .stack_top = 0, - .strings = Table.new(allocator), - .globals = Table.new(allocator), + .strings = undefined, + .globals = undefined, .frames = undefined, .frame_count = 0, .open_upvalues = null, .objects = null, + .parser = null, + .gray_capacity = 0, + .gray_count = 0, + .gray_stack = &.{}, }; + + return vm; } - pub fn init_vm(self: *VM) void { + pub fn init_vm(self: *VM, allocator: Allocator) void { + self.allocator = allocator; + self.globals = Table.new(self.allocator); + self.strings = Table.new(self.allocator); + self.define_native("clock", natives.clock); self.define_native("power", natives.power); self.define_native("str2num", natives.str2num); + self.define_native("num2str", natives.num2str); } pub fn destroy(self: *VM) void { @@ -71,6 +89,10 @@ pub const VM = struct { self.strings.destroy(); self.globals.destroy(); self.destroy_objects(); + + if (self.gray_stack != null) { + self.allocator.free(self.gray_stack.?); + } } pub fn destroy_objects(self: *VM) void { @@ -82,6 +104,15 @@ pub const VM = struct { } } + pub fn dump_objects(self: *VM) void { + var obj = self.objects; + while (obj != null) { + const obj_next = obj.?.next; + std.debug.print("OBJ: {*}\n", .{obj.?}); + obj = obj_next; + } + } + inline fn current_chunk(self: *VM) *Chunk { return self.frames[self.frame_count - 1].closure.function.chunk; } @@ -329,13 +360,15 @@ pub const VM = struct { } pub fn concatenate(self: *VM) !void { - const b = self.pop().as_cstring(); - const a = self.pop().as_cstring(); + const b = self.peek(0).as_cstring(); + const a = self.peek(1).as_cstring(); const concat_str = try std.mem.concat(self.current_chunk().allocator, u8, &.{ a, b }); - const string_obj = self.take_string(concat_str); + _ = self.pop(); + _ = self.pop(); + try self.push(Value.obj_val(&string_obj.obj)); } @@ -395,7 +428,10 @@ pub const VM = struct { pub fn allocate_string(self: *VM, source: []const u8) *Obj.String { const obj_string = Obj.String.new(self, source); + + _ = try self.push(Value.obj_val(&obj_string.obj)); _ = self.strings.set(obj_string, Value.nil_val()); + _ = self.pop(); return obj_string; }