From b0414bbe7b10a5551a6d15596f21f45138fde2da Mon Sep 17 00:00:00 2001
From: Patrick MARIE <pm@mkz.me>
Date: Sun, 25 Aug 2024 18:21:04 +0200
Subject: [PATCH] implementing strings (ch19)

---
 README.md       |  2 +-
 src/compile.zig | 23 ++++++++++++++---
 src/object.zig  | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/values.zig  | 43 +++++++++++++++++++++++++++++++
 src/vm.zig      | 49 +++++++++++++++++++++++++++++++++---
 5 files changed, 176 insertions(+), 8 deletions(-)
 create mode 100644 src/object.zig

diff --git a/README.md b/README.md
index 66d4871..22f3b85 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ While reading [Crafting Interpreters](https://craftinginterpreters.com/), after
 - [x] 16 - Scanning on Demand
 - [x] 17 - Compiling Expressions
 - [x] 18 - Types of Values
-- [ ] 19 - Strings
+- [x] 19 - Strings
 - [ ] 20 - Hash Tables
 - [ ] 21 - Global Variables
 - [ ] 22 - Local Variables
diff --git a/src/compile.zig b/src/compile.zig
index f59a0dc..9118a42 100644
--- a/src/compile.zig
+++ b/src/compile.zig
@@ -2,12 +2,16 @@ const std = @import("std");
 const debug = std.debug;
 const Allocator = std.mem.Allocator;
 
+const Obj = @import("./object.zig").Obj;
+const ObjType = @import("./object.zig").ObjType;
+
 const OpCode = @import("./opcode.zig").OpCode;
 const Scanner = @import("./scanner.zig").Scanner;
 const Token = @import("./scanner.zig").Token;
 const TokenType = @import("./scanner.zig").TokenType;
 const Chunk = @import("./chunk.zig").Chunk;
 const Value = @import("./values.zig").Value;
+const VM = @import("./vm.zig").VM;
 
 const ParsingError = @import("./errors.zig").ParsingError;
 
@@ -40,8 +44,9 @@ const Parser = struct {
     had_error: bool,
     panic_mode: bool,
     chunk: *Chunk,
+    vm: *VM,
 
-    fn new(scanner: *Scanner, chunk: *Chunk) Parser {
+    fn new(vm: *VM, scanner: *Scanner, chunk: *Chunk) Parser {
         return Parser{
             .current = null,
             .previous = null,
@@ -49,6 +54,7 @@ const Parser = struct {
             .had_error = false,
             .panic_mode = false,
             .chunk = chunk,
+            .vm = vm,
         };
     }
 
@@ -232,7 +238,7 @@ const Parser = struct {
             TokenType.LESS => ParserRule{ .prefix = null, .infix = binary, .precedence = Precedence.Comparison },
             TokenType.LESS_EQUAL => ParserRule{ .prefix = null, .infix = binary, .precedence = Precedence.Comparison },
             TokenType.IDENTIFIER => ParserRule{ .prefix = null, .infix = null, .precedence = Precedence.None },
-            TokenType.STRING => ParserRule{ .prefix = null, .infix = null, .precedence = Precedence.None },
+            TokenType.STRING => ParserRule{ .prefix = string, .infix = null, .precedence = Precedence.None },
             TokenType.NUMBER => ParserRule{ .prefix = number, .infix = null, .precedence = Precedence.None },
             TokenType.AND => ParserRule{ .prefix = null, .infix = null, .precedence = Precedence.None },
             TokenType.CLASS => ParserRule{ .prefix = null, .infix = null, .precedence = Precedence.None },
@@ -281,13 +287,22 @@ const Parser = struct {
             else => unreachable,
         };
     }
+
+    fn string(self: *Parser) ParsingError!void {
+        const str = self.previous.?.start[1 .. self.previous.?.length - 1];
+        var string_obj = Obj.String.new(self.chunk.allocator, str);
+
+        self.vm.add_reference(&string_obj.obj);
+
+        try self.emit_constant(Value.obj_val(&string_obj.obj));
+    }
 };
 
-pub fn compile(allocator: Allocator, contents: []const u8, chunk: *Chunk) !bool {
+pub fn compile(allocator: Allocator, vm: *VM, contents: []const u8, chunk: *Chunk) !bool {
     _ = allocator;
 
     var scanner = Scanner.init(contents);
-    var parser = Parser.new(&scanner, chunk);
+    var parser = Parser.new(vm, &scanner, chunk);
 
     parser.advance();
     try parser.expression();
diff --git a/src/object.zig b/src/object.zig
new file mode 100644
index 0000000..760e530
--- /dev/null
+++ b/src/object.zig
@@ -0,0 +1,67 @@
+const std = @import("std");
+const debug = std.debug;
+const Allocator = std.mem.Allocator;
+
+pub const ObjType = enum {
+    String,
+};
+
+pub const Obj = struct {
+    kind: ObjType,
+    allocator: std.mem.Allocator,
+
+    pub const String = struct {
+        chars: []const u8,
+        obj: Obj,
+
+        pub fn new(allocator: std.mem.Allocator, str: []const u8) *String {
+            const obj = Obj{
+                .kind = ObjType.String,
+                .allocator = allocator,
+            };
+
+            const str_obj = allocator.create(String) catch unreachable;
+            str_obj.obj = obj;
+            str_obj.chars = allocator.dupe(u8, str) catch unreachable;
+
+            return str_obj;
+        }
+
+        pub fn destroy(self: *String) void {
+            const allocator = self.obj.allocator;
+            allocator.free(self.chars);
+            allocator.destroy(self);
+        }
+    };
+
+    pub fn is_type(self: *Obj, kind: ObjType) bool {
+        return self.kind == kind;
+    }
+
+    pub fn is_string(self: *Obj) bool {
+        return self.is_type(ObjType.String);
+    }
+
+    pub fn print(self: *Obj) void {
+        switch (self.kind) {
+            ObjType.String => {
+                const obj = self.as_string();
+                debug.print("{s}", .{obj.chars});
+            },
+        }
+    }
+
+    pub fn destroy(self: *Obj) void {
+        switch (self.kind) {
+            ObjType.String => {
+                const obj: *String = @fieldParentPtr("obj", self);
+                obj.destroy();
+            },
+        }
+    }
+
+    pub fn as_string(self: *Obj) *String {
+        std.debug.assert(self.kind == ObjType.String);
+        return @fieldParentPtr("obj", self);
+    }
+};
diff --git a/src/values.zig b/src/values.zig
index 5ab8c3c..e5f40f0 100644
--- a/src/values.zig
+++ b/src/values.zig
@@ -3,12 +3,14 @@ const debug = std.debug;
 
 const Allocator = std.mem.Allocator;
 
+const Obj = @import("./object.zig").Obj;
 const utils = @import("./utils.zig");
 
 pub const ValueType = enum {
     Bool,
     Nil,
     Number,
+    Obj,
 };
 
 pub const Value = struct {
@@ -16,6 +18,7 @@ pub const Value = struct {
     as: union {
         boolean: bool,
         number: f64,
+        obj: *Obj,
     },
 
     pub fn bool_val(value: bool) Value {
@@ -45,6 +48,15 @@ pub const Value = struct {
         };
     }
 
+    pub fn obj_val(obj: *Obj) Value {
+        return Value{
+            .value_type = ValueType.Obj,
+            .as = .{
+                .obj = obj,
+            },
+        };
+    }
+
     pub fn as_bool(self: Value) bool {
         return self.as.boolean;
     }
@@ -53,6 +65,22 @@ pub const Value = struct {
         return self.as.number;
     }
 
+    pub fn as_obj(self: Value) *Obj {
+        return self.as.obj;
+    }
+
+    pub fn as_string(self: Value) *Obj.String {
+        const obj: *Obj.String = self.as_obj();
+
+        return obj;
+    }
+
+    pub fn as_cstring(self: Value) []const u8 {
+        const obj: *Obj.String = self.as_obj().as_string();
+
+        return obj.chars;
+    }
+
     pub fn is_bool(self: Value) bool {
         return self.value_type == ValueType.Bool;
     }
@@ -65,6 +93,14 @@ pub const Value = struct {
         return self.value_type == ValueType.Nil;
     }
 
+    pub fn is_obj(self: Value) bool {
+        return self.value_type == ValueType.Obj;
+    }
+
+    pub fn is_string(self: Value) bool {
+        return self.is_obj() and self.as_obj().is_string();
+    }
+
     pub fn is_falsey(self: Value) bool {
         return self.is_nil() or (self.is_bool() and !self.as_bool());
     }
@@ -78,6 +114,12 @@ pub const Value = struct {
             ValueType.Nil => true,
             ValueType.Bool => self.as_bool() == other.as_bool(),
             ValueType.Number => self.as_number() == other.as_number(),
+            ValueType.Obj => {
+                const obj_string0 = self.as_cstring();
+                const obj_string1 = other.as_cstring();
+
+                return std.mem.eql(u8, obj_string0, obj_string1);
+            },
         };
     }
 };
@@ -118,5 +160,6 @@ pub fn print_value(value: Value) void {
         ValueType.Nil => debug.print("nil", .{}),
         ValueType.Bool => debug.print("{any}", .{value.as_bool()}),
         ValueType.Number => debug.print("{d}", .{value.as_number()}),
+        ValueType.Obj => value.as_obj().print(),
     }
 }
diff --git a/src/vm.zig b/src/vm.zig
index a5943a7..835455d 100644
--- a/src/vm.zig
+++ b/src/vm.zig
@@ -5,6 +5,7 @@ const Allocator = std.mem.Allocator;
 const Chunk = @import("./chunk.zig").Chunk;
 const OpCode = @import("./opcode.zig").OpCode;
 const Value = @import("./values.zig").Value;
+const Obj = @import("./object.zig").Obj;
 
 const compile = @import("./compile.zig").compile;
 
@@ -24,24 +25,30 @@ pub const VM = struct {
     chunk: ?*Chunk,
     ip: ?usize,
     stack: std.ArrayList(Value),
+    // Keeping creating objects in references to destroy objects on cleaning.
+    // In the book, a linked list between objects is used to handle this.
+    references: std.ArrayList(*Obj),
 
     pub fn new(allocator: Allocator) VM {
         return VM{
             .chunk = null,
             .ip = null,
             .stack = std.ArrayList(Value).init(allocator),
+            .references = std.ArrayList(*Obj).init(allocator),
         };
     }
 
     pub fn free(self: *VM) void {
         self.stack.deinit();
+        self.clean_references();
+        self.references.deinit();
     }
 
     pub fn interpret(self: *VM, allocator: Allocator, content: []const u8) !InterpretResult {
         var chunk = Chunk.new(allocator);
         defer chunk.deinit();
 
-        const res = try compile(allocator, content, &chunk);
+        const res = try compile(allocator, self, content, &chunk);
         if (!res) {
             return InterpretResult.COMPILE_ERROR;
         }
@@ -77,7 +84,13 @@ pub const VM = struct {
                 @intFromEnum(OpCode.OP_NIL) => try self.push(Value.nil_val()),
                 @intFromEnum(OpCode.OP_FALSE) => try self.push(Value.bool_val(false)),
                 @intFromEnum(OpCode.OP_TRUE) => try self.push(Value.bool_val(true)),
-                @intFromEnum(OpCode.OP_ADD), @intFromEnum(OpCode.OP_SUBSTRACT), @intFromEnum(OpCode.OP_MULTIPLY), @intFromEnum(OpCode.OP_DIVIDE), @intFromEnum(OpCode.OP_LESS), @intFromEnum(OpCode.OP_GREATER) => {
+                @intFromEnum(OpCode.OP_ADD),
+                @intFromEnum(OpCode.OP_SUBSTRACT),
+                @intFromEnum(OpCode.OP_MULTIPLY),
+                @intFromEnum(OpCode.OP_DIVIDE),
+                @intFromEnum(OpCode.OP_LESS),
+                @intFromEnum(OpCode.OP_GREATER),
+                => {
                     const res = try self.binary_op(@enumFromInt(instruction));
                     if (res != InterpretResult.OK) {
                         return res;
@@ -133,8 +146,13 @@ pub const VM = struct {
     }
 
     pub fn binary_op(self: *VM, op: OpCode) !InterpretResult {
+        if (op == OpCode.OP_ADD and self.peek(0).is_string() and self.peek(1).is_string()) {
+            try self.concatenate();
+            return InterpretResult.OK;
+        }
+
         if (!self.peek(0).is_number() or !self.peek(0).is_number()) {
-            self.runtime_error("Operands must be numbers");
+            self.runtime_error("Operands must be two numbers or two strings");
             return InterpretResult.RUNTIME_ERROR;
         }
 
@@ -156,6 +174,20 @@ pub const VM = struct {
         return InterpretResult.OK;
     }
 
+    pub fn concatenate(self: *VM) !void {
+        const b = self.pop().as_cstring();
+        const a = self.pop().as_cstring();
+
+        const concat_str = try std.mem.concat(self.chunk.?.allocator, u8, &.{ a, b });
+        defer self.chunk.?.allocator.free(concat_str);
+
+        var string_obj = Obj.String.new(self.chunk.?.allocator, concat_str);
+
+        self.add_reference(&string_obj.obj);
+
+        try self.push(Value.obj_val(&string_obj.obj));
+    }
+
     pub fn peek(self: *VM, distance: usize) Value {
         return self.stack.items[self.stack.items.len - 1 - distance];
     }
@@ -167,4 +199,15 @@ pub const VM = struct {
         debug.print("err: {s}\n", .{err_msg});
         debug.print("[line {d}] in script\n", .{line});
     }
+
+    pub fn add_reference(self: *VM, obj: *Obj) void {
+        // XXX TODO catch unreachable to prevents
+        self.references.append(obj) catch unreachable;
+    }
+
+    pub fn clean_references(self: *VM) void {
+        for (self.references.items) |item| {
+            item.destroy();
+        }
+    }
 };