From 42646b65c2bbb3f901a10e1e4a265d2ae611b50f Mon Sep 17 00:00:00 2001 From: Patrick MARIE Date: Fri, 30 Aug 2024 09:53:32 +0200 Subject: [PATCH] implementing garbage collection (ch26) --- samples/ch24_fib.lox | 6 +- samples/ch26_gc.lox | 16 +++ src/chunk.zig | 14 ++- src/compile.zig | 6 +- src/constant.zig | 11 +- src/main.zig | 20 ++- src/memory.zig | 290 +++++++++++++++++++++++++++++++++++++++++++ src/native.zig | 4 +- src/object.zig | 4 +- src/table.zig | 19 ++- src/utils.zig | 7 -- src/values.zig | 4 +- src/vm.zig | 46 ++++++- 13 files changed, 405 insertions(+), 42 deletions(-) create mode 100644 samples/ch26_gc.lox create mode 100644 src/memory.zig diff --git a/samples/ch24_fib.lox b/samples/ch24_fib.lox index d2742e7..3cab42d 100644 --- a/samples/ch24_fib.lox +++ b/samples/ch24_fib.lox @@ -1,9 +1,5 @@ -fun fib(n) { - if (n < 2) return n; - return fib(n - 2) + fib(n - 1); -} var start = clock(); -print fib(20); +// print fib(20); print clock() - start; diff --git a/samples/ch26_gc.lox b/samples/ch26_gc.lox new file mode 100644 index 0000000..0090578 --- /dev/null +++ b/samples/ch26_gc.lox @@ -0,0 +1,16 @@ +fun blah() { + var c = "ha" + "ha"; + + for(var i = 0; i < 100; i = i + 1) { + // var d = "ha" + num2str(i); + // print d; + } + + return "hiii"; +} + +var a = blah(); + +a = "updated"; +// GC here. +print a; \ No newline at end of file diff --git a/src/chunk.zig b/src/chunk.zig index 2ed26b7..26479c6 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -5,12 +5,15 @@ const Allocator = std.mem.Allocator; const Value = @import("./values.zig").Value; const ValueArray = @import("./values.zig").ValueArray; const OpCode = @import("./opcode.zig").OpCode; +const VM = @import("./vm.zig").VM; +const ZloxAllocator = @import("./memory.zig").ZloxAllocator; -const grow_capacity = @import("./utils.zig").grow_capacity; +const constants = @import("./constant.zig"); const utils = @import("./utils.zig"); pub const Chunk = struct { allocator: Allocator, + vm: *VM, count: usize, capacity: usize, @@ -18,10 +21,11 @@ pub const Chunk = struct { lines: []usize, constants: ValueArray, - pub fn new(allocator: Allocator) *Chunk { + pub fn new(allocator: Allocator, vm: *VM) *Chunk { var chunk: *Chunk = allocator.create(Chunk) catch unreachable; chunk.allocator = allocator; + chunk.vm = vm; chunk.count = 0; chunk.capacity = 0; chunk.code = &.{}; @@ -45,7 +49,8 @@ pub const Chunk = struct { pub fn write(self: *Chunk, byte: u8, line: usize) !void { if (self.capacity < self.count + 1) { const old_capacity = self.capacity; - self.capacity = grow_capacity(old_capacity); + self.capacity = ZloxAllocator.grow_capacity(old_capacity); + self.code = try self.allocator.realloc(self.code, self.capacity); self.lines = try self.allocator.realloc(self.lines, self.capacity); } @@ -151,7 +156,10 @@ pub const Chunk = struct { } pub fn add_constant(self: *Chunk, value: Value) !usize { + _ = try self.vm.push(value); try self.constants.write(value); + _ = self.vm.pop(); + return self.constants.count - 1; } }; diff --git a/src/compile.zig b/src/compile.zig index 9831af1..959ac46 100644 --- a/src/compile.zig +++ b/src/compile.zig @@ -41,7 +41,7 @@ const ParserRule = struct { precedence: Precedence, }; -const Parser = struct { +pub const Parser = struct { compiler: *Compiler, current: ?Token, previous: ?Token, @@ -865,7 +865,7 @@ const FunctionType = enum { Script, }; -const Compiler = struct { +pub const Compiler = struct { enclosing: ?*Compiler, function: *Obj.Function, @@ -925,6 +925,8 @@ pub fn compile(vm: *VM, contents: []const u8) !?*Obj.Function { var scanner = Scanner.init(contents); var parser = Parser.new(vm, &compiler, &scanner); + vm.parser = &parser; + parser.advance(); while (!parser.match(TokenType.EOF)) { diff --git a/src/constant.zig b/src/constant.zig index 68a7740..3eedde0 100644 --- a/src/constant.zig +++ b/src/constant.zig @@ -10,7 +10,14 @@ pub const UINT8_COUNT = UINT8_MAX + 1; pub const FRAMES_MAX = 64; pub const STACK_MAX = (FRAMES_MAX * UINT8_MAX); -pub const DEBUG_PRINT_CODE = true; -pub const DEBUG_TRACE_EXECUTION = true; +pub const DEBUG_PRINT_CODE = false; +pub const DEBUG_TRACE_EXECUTION = false; pub const DEBUG_PRINT_INTERNAL_STRINGS = false; pub const DEBUG_PRINT_GLOBALS = false; + +pub const DEBUG_STRESS_GC = true; +pub const DEBUG_LOG_GC = true; + +pub const USE_CUSTON_ALLOCATOR = true; + +pub const GC_HEAP_GROW_FACTOR = 2; diff --git a/src/main.zig b/src/main.zig index 07179c6..fdf0e35 100644 --- a/src/main.zig +++ b/src/main.zig @@ -9,6 +9,8 @@ const OpCode = @import("./opcode.zig").OpCode; const VM = @import("./vm.zig").VM; const InterpretResult = @import("./vm.zig").InterpretResult; +const ZloxAllocator = @import("./memory.zig").ZloxAllocator; + // XXX imported to run tests. const Table = @import("./table.zig"); @@ -54,6 +56,13 @@ pub fn run_file(allocator: Allocator, vm: *VM, filepath: []const u8) !void { } } +pub fn get_z_alloc(allocator: std.mem.Allocator) std.mem.Allocator { + var zlox_allocator_generator = ZloxAllocator.init(allocator); + const z_allocator = zlox_allocator_generator.allocator(); + + return z_allocator; +} + pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{ .safety = true }){}; defer _ = debug.assert(gpa.deinit() == .ok); @@ -62,8 +71,15 @@ pub fn main() !void { const args = try std.process.argsAlloc(allocator); defer std.process.argsFree(allocator, args); - var vm = VM.new(allocator); - vm.init_vm(); + var vm = VM.new(); + if (constants.USE_CUSTON_ALLOCATOR) { + var zlox_allocator_generator = ZloxAllocator.init(allocator, &vm); + const z_allocator = zlox_allocator_generator.allocator(); + vm.init_vm(z_allocator); + } else { + vm.init_vm(allocator); + } + defer vm.destroy(); if (args.len == 1) { diff --git a/src/memory.zig b/src/memory.zig new file mode 100644 index 0000000..b162001 --- /dev/null +++ b/src/memory.zig @@ -0,0 +1,290 @@ +const std = @import("std"); + +const VM = @import("./vm.zig").VM; +const Obj = @import("./object.zig").Obj; +const ObjType = @import("./object.zig").ObjType; +const Table = @import("./table.zig").Table; +const Entry = @import("./table.zig").Entry; +const Value = @import("./values.zig").Value; +const Compiler = @import("./compile.zig").Compiler; +const ValueArray = @import("./values.zig").ValueArray; + +const constants = @import("./constant.zig"); + +pub const ZloxAllocator = struct { + parent_allocator: std.mem.Allocator, + vm: *VM, + bytes_allocated: usize, + next_gc: usize, + + const Self = @This(); + + pub fn init(parent_allocator: std.mem.Allocator, vm: *VM) Self { + return .{ + .parent_allocator = parent_allocator, + .vm = vm, + .bytes_allocated = 0, + .next_gc = 1024 * 1024, + }; + } + + pub fn deinit(self: *Self) void { + _ = self; + } + + pub fn allocator(self: *Self) std.mem.Allocator { + return .{ + .ptr = self, + .vtable = &.{ + .alloc = alloc, + .resize = resize, + .free = free, + }, + }; + } + + fn alloc(ctx: *anyopaque, len: usize, ptr_align: u8, ret_addr: usize) ?[*]u8 { + const self: *Self = @ptrCast(@alignCast(ctx)); + + const res = self.parent_allocator.rawAlloc(len, ptr_align, ret_addr); + + if (self.bytes_allocated > self.next_gc) { + self.collect_garbage(); + } + + // if (constants.DEBUG_LOG_GC) { + // if (res == null) { + // std.debug.print("GC: failed allocing buffer of size {d}\n", .{len}); + // } else { + // std.debug.print("GC: allocing buffer {*} of size {d}\n", .{ res.?, len }); + // } + // } + + return res; + } + + fn resize(ctx: *anyopaque, buf: []u8, log2_buf_align: u8, new_len: usize, ret_addr: usize) bool { + const self: *Self = @ptrCast(@alignCast(ctx)); + + if (constants.DEBUG_STRESS_GC) { + self.collect_garbage(); + } + + self.bytes_allocated += new_len - buf.len; + + // if (self.bytes_allocated > self.next_gc) { + // self.collect_garbage(); + // } + + return self.parent_allocator.rawResize(buf, log2_buf_align, new_len, ret_addr); + } + + fn free(ctx: *anyopaque, buf: []u8, log2_buf_align: u8, ret_addr: usize) void { + const self: *Self = @ptrCast(@alignCast(ctx)); + + // if (constants.DEBUG_LOG_GC) { + // std.debug.print("GC: freeing buffer {*} of size {d}\n", .{ &buf, buf.len }); + // } + + return self.parent_allocator.rawFree(buf, log2_buf_align, ret_addr); + } + + pub fn set_vm(self: *Self, vm: *VM) void { + self.vm = vm; + } + + pub fn grow_capacity(capacity: usize) usize { + if (capacity < 8) { + return 8; + } + return capacity * 2; + } + + pub fn collect_garbage(self: *Self) void { + if (comptime constants.DEBUG_LOG_GC == true) { + std.debug.print("\nGC: collect_garbage(): begin\n", .{}); + } + + self.mark_roots(); + self.trace_references(); + self.table_remove_white(&self.vm.strings); + + self.sweep(); + + self.next_gc = self.bytes_allocated * constants.GC_HEAP_GROW_FACTOR; + + if (comptime constants.DEBUG_LOG_GC == true) { + std.debug.print("GC: collect_garbage(): end\n\n", .{}); + } + } + + pub fn mark_roots(self: *Self) void { + for (0..self.vm.stack_top) |stack_idx| { + self.vm.stack[stack_idx].print(); + self.mark_value(&self.vm.stack[stack_idx]); + } + + for (0..self.vm.frame_count) |frame_idx| { + self.mark_object(&self.vm.frames[frame_idx].closure.obj); + } + + var upvalue = self.vm.open_upvalues; + while (upvalue != null) { + self.mark_object(&upvalue.?.obj); + upvalue = upvalue.?.next; + } + + self.mark_table(&self.vm.globals); + + self.mark_compiler_roots(); + } + + pub fn mark_value(self: *Self, value: *Value) void { + if (value.is_obj()) { + self.mark_object(value.as_obj()); + } + } + + pub fn mark_object(self: *Self, obj: *Obj) void { + if (obj.is_marked) { + return; + } + + if (constants.DEBUG_LOG_GC) { + std.debug.print("GC: mark {*} ", .{obj}); + obj.print(); + std.debug.print("\n", .{}); + } + obj.is_marked = true; + + if (self.vm.gray_capacity < self.vm.gray_count + 1) { + self.vm.gray_capacity = grow_capacity(self.vm.gray_capacity); + self.vm.gray_stack = self.allocator().realloc(self.vm.gray_stack.?, self.vm.gray_capacity) catch { + @panic("failed to realloc gray stack"); + }; + } + + // doing a realloc here will likely recall mark_roots and so on. + if (self.vm.gray_stack.?.len > self.vm.gray_count) { + self.vm.gray_stack.?[self.vm.gray_count] = obj; + self.vm.gray_count += 1; + } + } + + pub fn mark_table(self: *Self, table: *Table) void { + for (0..table.capacity) |idx| { + const entry = &table.entries[idx]; + if (entry.key != null) { + self.mark_object(&entry.key.?.obj); + } + + self.mark_value(&entry.value); + } + } + + pub fn mark_compiler_roots(self: *Self) void { + var compiler: ?*Compiler = self.vm.parser.?.compiler; + + while (compiler != null) { + std.debug.print("compiler: {any}\n", .{compiler.?.function.obj}); + compiler.?.function.obj.print(); + self.mark_object(&compiler.?.function.obj); + compiler = compiler.?.enclosing; + } + } + + pub fn trace_references(self: *Self) void { + while (self.vm.gray_count > 0) { + self.vm.gray_count -= 1; + const obj: *Obj = self.vm.gray_stack.?[self.vm.gray_count]; + self.blacken_object(obj); + } + } + + pub fn blacken_object(self: *Self, obj: *Obj) void { + if (constants.DEBUG_LOG_GC) { + std.debug.print("GC: {*} blacken ", .{obj}); + obj.print(); + std.debug.print("\n", .{}); + } + switch (obj.kind) { + ObjType.Native, ObjType.String => {}, + ObjType.Upvalue => self.mark_value(&obj.as_upvalue().closed), + ObjType.Function => { + const function: *Obj.Function = obj.as_function(); + if (function.name != null) { + self.mark_object(&function.name.?.obj); + } + + self.mark_array(&function.chunk.constants); + }, + ObjType.Closure => { + const closure: *Obj.Closure = obj.as_closure(); + self.mark_object(&closure.function.obj); + for (0..closure.upvalue_count) |i| { + if (closure.upvalues[i] != null) { + self.mark_object(&closure.upvalues[i].?.obj); + } + } + }, + } + } + + pub fn mark_array(self: *Self, value_array: *ValueArray) void { + for (0..value_array.count) |i| { + self.mark_value(&value_array.values[i]); + } + } + + pub fn table_remove_white(self: *Self, table: *Table) void { + _ = self; + + for (0..table.capacity) |idx| { + const entry: *Entry = &table.entries[idx]; + if (entry.key != null and !entry.key.?.obj.is_marked) { + + // + // _ = table.del(entry.key.?); + } + } + } + + pub fn sweep(self: *Self) void { + var previous: ?*Obj = null; + var object: ?*Obj = self.vm.objects; + + // std.debug.print(" sweep started...\n", .{}); + + var destroyed: usize = 0; + + while (object != null) { + // std.debug.print(" GC: {*} mark:{any} content:", .{ object, object.?.is_marked }); + // object.?.print(); + // std.debug.print("\n", .{}); + + if (object.?.is_marked) { + object.?.is_marked = false; + previous = object; + object = object.?.next; + } else { + const unreached: *Obj = object.?; + object = object.?.next; + + // if (previous != null) { + // previous.?.next = object; + // } else { + // self.vm.objects = object; + // } + + std.debug.print("GC: sweeping {*}: ", .{unreached}); + unreached.print(); + std.debug.print("\n", .{}); + + // unreached.destroy(); + destroyed += 1; + } + } + + // std.debug.print(" sweep ended with {d}...\n", .{destroyed}); + } +}; diff --git a/src/native.zig b/src/native.zig index ddce08c..52adcb3 100644 --- a/src/native.zig +++ b/src/native.zig @@ -58,7 +58,7 @@ pub fn num2str(vm: *VM, arg_count: usize, args: []Value) Value { return Value.nil_val(); }; - const result = Obj.String.new(vm.allocator, str); + const result = Obj.String.new(vm, str); - return Value.obj_val(result); + return Value.obj_val(&result.obj); } diff --git a/src/object.zig b/src/object.zig index 19a7da5..e711f5a 100644 --- a/src/object.zig +++ b/src/object.zig @@ -22,6 +22,7 @@ pub const Obj = struct { kind: ObjType, allocator: Allocator, next: ?*Obj, + is_marked: bool, fn new(comptime T: type, vm: *VM, kind: ObjType) *T { const created_obj = vm.allocator.create(T) catch unreachable; @@ -30,6 +31,7 @@ pub const Obj = struct { .kind = kind, .allocator = vm.allocator, .next = vm.objects, + .is_marked = false, }; vm.objects = &created_obj.obj; @@ -79,7 +81,7 @@ pub const Obj = struct { function_obj.arity = 0; function_obj.upvalue_count = 0; - function_obj.chunk = Chunk.new(vm.allocator); + function_obj.chunk = Chunk.new(vm.allocator, vm); function_obj.name = null; return function_obj; diff --git a/src/table.zig b/src/table.zig index fc72980..2d62c5d 100644 --- a/src/table.zig +++ b/src/table.zig @@ -6,13 +6,13 @@ const constants = @import("./constant.zig"); const Obj = @import("./object.zig").Obj; const Value = @import("./values.zig").Value; +const ZloxAllocator = @import("./memory.zig").ZloxAllocator; -const grow_capacity = @import("./utils.zig").grow_capacity; const compute_hash = @import("./utils.zig").compute_hash; -const Entry = struct { - key: ?*Obj.String, - value: Value, +pub const Entry = struct { + key: ?*Obj.String = null, + value: Value = Value.nil_val(), }; pub const Table = struct { @@ -26,7 +26,7 @@ pub const Table = struct { .allocator = allocator, .count = 0, .capacity = 0, - .entries = &.{}, + .entries = &[_]Entry{}, }; } @@ -43,7 +43,7 @@ pub const Table = struct { const current_capacity: f32 = @floatFromInt(self.capacity); if (current_count > current_capacity * constants.TABLE_MAX_LOAD) { - const capacity = grow_capacity(self.capacity); + const capacity = ZloxAllocator.grow_capacity(self.capacity); self.adjust_capacity(capacity); } @@ -90,11 +90,10 @@ pub const Table = struct { } pub fn adjust_capacity(self: *Table, capacity: usize) void { - var entries = self.allocator.alloc(Entry, capacity) catch unreachable; + const entries = self.allocator.alloc(Entry, capacity) catch unreachable; - for (0..entries.len) |idx| { - entries[idx].key = null; - entries[idx].value = Value.nil_val(); + for (entries) |*e| { + e.* = Entry{}; } self.count = 0; diff --git a/src/utils.zig b/src/utils.zig index 7a3e01b..91f25ba 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -6,13 +6,6 @@ const Token = @import("./scanner.zig").Token; const print_value = @import("./values.zig").print_value; -pub fn grow_capacity(capacity: usize) usize { - if (capacity < 8) { - return 8; - } - return capacity * 2; -} - pub fn simple_instruction(opcode_name: []const u8, offset: usize) usize { debug.print("{s:<16}\n", .{opcode_name}); diff --git a/src/values.zig b/src/values.zig index 4deae4f..65c5cc0 100644 --- a/src/values.zig +++ b/src/values.zig @@ -4,7 +4,7 @@ const debug = std.debug; const Allocator = std.mem.Allocator; const Obj = @import("./object.zig").Obj; -const utils = @import("./utils.zig"); +const ZloxAllocator = @import("./memory.zig").ZloxAllocator; pub const ValueType = enum { Bool, @@ -158,7 +158,7 @@ pub const ValueArray = struct { pub fn write(self: *ValueArray, value: Value) !void { if (self.capacity < self.count + 1) { const old_capacity = self.capacity; - self.capacity = utils.grow_capacity(old_capacity); + self.capacity = ZloxAllocator.grow_capacity(old_capacity); self.values = try self.allocator.realloc(self.values, self.capacity); } diff --git a/src/vm.zig b/src/vm.zig index a966d38..af35d8e 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -4,11 +4,14 @@ const Allocator = std.mem.Allocator; const constants = @import("./constant.zig"); +const ZloxAllocator = @import("./memory.zig").ZloxAllocator; + const Chunk = @import("./chunk.zig").Chunk; const OpCode = @import("./opcode.zig").OpCode; const Value = @import("./values.zig").Value; const Obj = @import("./object.zig").Obj; const ObjType = @import("./object.zig").ObjType; +const Parser = @import("./compile.zig").Parser; const NativeFn = @import("./object.zig").NativeFn; const Table = @import("./table.zig").Table; @@ -42,25 +45,40 @@ pub const VM = struct { frame_count: usize, open_upvalues: ?*Obj.Upvalue, objects: ?*Obj, + parser: ?*Parser, + gray_count: usize, + gray_capacity: usize, + gray_stack: ?[]*Obj, - pub fn new(allocator: Allocator) VM { - return VM{ - .allocator = allocator, + pub fn new() VM { + const vm = VM{ + .allocator = undefined, .stack = undefined, .stack_top = 0, - .strings = Table.new(allocator), - .globals = Table.new(allocator), + .strings = undefined, + .globals = undefined, .frames = undefined, .frame_count = 0, .open_upvalues = null, .objects = null, + .parser = null, + .gray_capacity = 0, + .gray_count = 0, + .gray_stack = &.{}, }; + + return vm; } - pub fn init_vm(self: *VM) void { + pub fn init_vm(self: *VM, allocator: Allocator) void { + self.allocator = allocator; + self.globals = Table.new(self.allocator); + self.strings = Table.new(self.allocator); + self.define_native("clock", natives.clock); self.define_native("power", natives.power); self.define_native("str2num", natives.str2num); + self.define_native("num2str", natives.num2str); } pub fn destroy(self: *VM) void { @@ -71,6 +89,10 @@ pub const VM = struct { self.strings.destroy(); self.globals.destroy(); self.destroy_objects(); + + if (self.gray_stack != null) { + self.allocator.free(self.gray_stack.?); + } } pub fn destroy_objects(self: *VM) void { @@ -82,6 +104,15 @@ pub const VM = struct { } } + pub fn dump_objects(self: *VM) void { + var obj = self.objects; + while (obj != null) { + const obj_next = obj.?.next; + std.debug.print("OBJ: {*}\n", .{obj.?}); + obj = obj_next; + } + } + inline fn current_chunk(self: *VM) *Chunk { return self.frames[self.frame_count - 1].closure.function.chunk; } @@ -395,7 +426,10 @@ pub const VM = struct { pub fn allocate_string(self: *VM, source: []const u8) *Obj.String { const obj_string = Obj.String.new(self, source); + + _ = try self.push(Value.obj_val(&obj_string.obj)); _ = self.strings.set(obj_string, Value.nil_val()); + _ = self.pop(); return obj_string; }