implementing scanning on demand (ch16)

This commit is contained in:
Patrick MARIE 2024-08-24 21:52:38 +02:00
parent e619f7016f
commit 4d04208b2a
5 changed files with 472 additions and 23 deletions

1
samples/addition.lox Normal file
View File

@ -0,0 +1 @@
true+false

29
src/compile.zig Normal file
View File

@ -0,0 +1,29 @@
const std = @import("std");
const debug = std.debug;
const Allocator = std.mem.Allocator;
const Scanner = @import("./scanner.zig").Scanner;
const Token = @import("./scanner.zig").Token;
const TokenType = @import("./scanner.zig").TokenType;
pub fn compile(allocator: Allocator, contents: []const u8) !void {
var line: ?usize = null;
_ = allocator;
var scanner = Scanner.init(contents);
while (true) {
const token = scanner.scan_token();
if (line == null or token.line != line.?) {
debug.print("{d:4} ", .{token.line});
line = token.line;
} else {
debug.print(" | ", .{});
}
debug.print("{s:12} len:{d:2} '{s}'\n", .{ token.token_type.string(), token.length, token.start[0..token.length] });
if (token.token_type == TokenType.EOF) {
break;
}
}
}

View File

@ -5,42 +5,102 @@ const Allocator = std.mem.Allocator;
const Chunk = @import("./chunk.zig").Chunk; const Chunk = @import("./chunk.zig").Chunk;
const OpCode = @import("./opcode.zig").OpCode; const OpCode = @import("./opcode.zig").OpCode;
const VM = @import("./vm.zig").VM; const VM = @import("./vm.zig").VM;
const InterpretResult = @import("./vm.zig").InterpretResult;
const compile = @import("./compile.zig").compile;
pub const DEBUG_TRACE_EXECUTION = true; pub const DEBUG_TRACE_EXECUTION = true;
pub fn repl(allocator: Allocator) !void {
var line: [1024]u8 = undefined;
const stdin = std.io.getStdIn().reader();
const stdout = std.io.getStdOut().writer();
while (true) {
try stdout.print("> ", .{});
@memset(&line, 0);
const bytes_read = try stdin.read(&line);
if (bytes_read == 0) {
try stdout.print("\n", .{});
break;
}
_ = try interpret(allocator, &line);
}
}
pub fn run_file(allocator: Allocator, filepath: []const u8) !void {
const file = try std.fs.cwd().openFile(filepath, .{});
defer file.close();
const file_content = try file.readToEndAlloc(allocator, 1024 * 1024);
defer allocator.free(file_content);
const result = try interpret(allocator, file_content);
switch (result) {
InterpretResult.COMPILE_ERROR => std.process.exit(65),
InterpretResult.RUNTIME_ERROR => std.process.exit(70),
else => {},
}
}
pub fn interpret(allocator: Allocator, content: []const u8) !InterpretResult {
// XXX catch and return InterpretResult.COMPILE_ERROR ?
try compile(allocator, content);
return InterpretResult.OK;
}
pub fn main() !void { pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{ .safety = true }){}; var gpa = std.heap.GeneralPurposeAllocator(.{ .safety = true }){};
defer _ = debug.assert(gpa.deinit() == .ok); defer _ = debug.assert(gpa.deinit() == .ok);
const allocator = gpa.allocator(); const allocator = gpa.allocator();
var vm = VM.new(allocator); const args = try std.process.argsAlloc(allocator);
defer std.process.argsFree(allocator, args);
var chunk = Chunk.new(); if (args.len == 1) {
try chunk.init(allocator); try repl(allocator);
} else if (args.len == 2) {
try run_file(allocator, args[1]);
} else {
const stdout = std.io.getStdOut().writer();
try stdout.print("Usage: clox [path]\n", .{});
std.process.exit(64);
}
var constant = try chunk.add_constant(allocator, 1.2); // var vm = VM.new(allocator);
try chunk.write(allocator, @intFromEnum(OpCode.OP_CONSTANT), 123);
try chunk.write(allocator, @intCast(constant), 123);
constant = try chunk.add_constant(allocator, 3.4); // var chunk = Chunk.new();
try chunk.write(allocator, @intFromEnum(OpCode.OP_CONSTANT), 123); // try chunk.init(allocator);
try chunk.write(allocator, @intCast(constant), 123);
try chunk.write(allocator, @intFromEnum(OpCode.OP_ADD), 123); // var constant = try chunk.add_constant(allocator, 1.2);
// try chunk.write(allocator, @intFromEnum(OpCode.OP_CONSTANT), 123);
// try chunk.write(allocator, @intCast(constant), 123);
constant = try chunk.add_constant(allocator, 5.6); // constant = try chunk.add_constant(allocator, 3.4);
try chunk.write(allocator, @intFromEnum(OpCode.OP_CONSTANT), 123); // try chunk.write(allocator, @intFromEnum(OpCode.OP_CONSTANT), 123);
try chunk.write(allocator, @intCast(constant), 123); // try chunk.write(allocator, @intCast(constant), 123);
try chunk.write(allocator, @intFromEnum(OpCode.OP_DIVIDE), 123); // try chunk.write(allocator, @intFromEnum(OpCode.OP_ADD), 123);
try chunk.write(allocator, @intFromEnum(OpCode.OP_NEGATE), 123); // constant = try chunk.add_constant(allocator, 5.6);
try chunk.write(allocator, @intFromEnum(OpCode.OP_RETURN), 123); // try chunk.write(allocator, @intFromEnum(OpCode.OP_CONSTANT), 123);
// try chunk.write(allocator, @intCast(constant), 123);
chunk.dissassemble("test chunk"); // try chunk.write(allocator, @intFromEnum(OpCode.OP_DIVIDE), 123);
_ = try vm.interpret(&chunk); // try chunk.write(allocator, @intFromEnum(OpCode.OP_NEGATE), 123);
vm.free(); // try chunk.write(allocator, @intFromEnum(OpCode.OP_RETURN), 123);
chunk.deinit(allocator); // chunk.dissassemble("test chunk");
// _ = try vm.interpret(&chunk);
// vm.free();
// chunk.deinit(allocator);
} }

359
src/scanner.zig Normal file
View File

@ -0,0 +1,359 @@
const std = @import("std");
pub const TokenType = enum {
// Single-character tokens.
LEFT_PAREN,
RIGHT_PAREN,
LEFT_BRACE,
RIGHT_BRACE,
COMMA,
DOT,
MINUS,
PLUS,
SEMICOLON,
SLASH,
STAR,
// One or two character tokens.
BANG,
BANG_EQUAL,
EQUAL,
EQUAL_EQUAL,
GREATER,
GREATER_EQUAL,
LESS,
LESS_EQUAL,
// Literals.
IDENTIFIER,
STRING,
NUMBER,
// Keywords.
AND,
CLASS,
ELSE,
FALSE,
FOR,
FUN,
IF,
NIL,
OR,
PRINT,
RETURN,
SUPER,
THIS,
TRUE,
VAR,
WHILE,
ERROR,
EOF,
pub fn string(self: TokenType) []const u8 {
return switch (self) {
TokenType.LEFT_PAREN => "LEFT_PAREN",
TokenType.RIGHT_PAREN => "RIGHT_PAREN",
TokenType.LEFT_BRACE => "LEFT_BRACE",
TokenType.RIGHT_BRACE => "RIGHT_BRACE",
TokenType.COMMA => "COMMA",
TokenType.DOT => "DOT",
TokenType.MINUS => "MINUS",
TokenType.PLUS => "PLUS",
TokenType.SEMICOLON => "SEMICOLON",
TokenType.SLASH => "SLASH",
TokenType.STAR => "STAR",
TokenType.BANG => "BANG",
TokenType.BANG_EQUAL => "BANG_EQUAL",
TokenType.EQUAL => "EQUAL",
TokenType.EQUAL_EQUAL => "EQUAL_EQUAL",
TokenType.GREATER => "GREATER",
TokenType.GREATER_EQUAL => "GREATER_EQUAL",
TokenType.LESS => "LESS",
TokenType.LESS_EQUAL => "LESS_EQUAL",
TokenType.IDENTIFIER => "IDENTIFIER",
TokenType.STRING => "STRING",
TokenType.NUMBER => "NUMBER",
TokenType.AND => "AND",
TokenType.CLASS => "CLASS",
TokenType.ELSE => "ELSE",
TokenType.FALSE => "FALSE",
TokenType.FOR => "FOR",
TokenType.FUN => "FUN",
TokenType.IF => "IF",
TokenType.NIL => "NIL",
TokenType.OR => "OR",
TokenType.PRINT => "PRINT",
TokenType.RETURN => "RETURN",
TokenType.SUPER => "SUPER",
TokenType.THIS => "THIS",
TokenType.TRUE => "TRUE",
TokenType.VAR => "VAR",
TokenType.WHILE => "WHILE",
TokenType.ERROR => "ERROR",
TokenType.EOF => "EOF",
};
}
};
pub const Token = struct {
token_type: TokenType,
start: []const u8,
length: usize,
line: usize,
};
pub const Scanner = struct {
source: []const u8,
start: usize,
current: usize,
line: usize,
pub fn init(content: []const u8) Scanner {
return Scanner{
.start = 0,
.current = 0,
.line = 1,
.source = content,
};
}
pub fn scan_token(self: *Scanner) Token {
self.skip_whitespace();
self.start = self.current;
if (self.is_at_end()) {
return self.make_token(TokenType.EOF);
}
const c = self.advance();
if (self.is_alpha(c)) {
return self.identifier();
}
if (self.is_digit(c)) {
return self.number();
}
return switch (c) {
'(' => self.make_token(TokenType.LEFT_PAREN),
')' => self.make_token(TokenType.RIGHT_PAREN),
'{' => self.make_token(TokenType.LEFT_BRACE),
'}' => self.make_token(TokenType.RIGHT_BRACE),
';' => self.make_token(TokenType.SEMICOLON),
',' => self.make_token(TokenType.COMMA),
'.' => self.make_token(TokenType.DOT),
'-' => self.make_token(TokenType.MINUS),
'+' => self.make_token(TokenType.PLUS),
'/' => self.make_token(TokenType.SLASH),
'*' => self.make_token(TokenType.STAR),
'!' => {
if (self.match('=')) {
return self.make_token(TokenType.BANG_EQUAL);
} else {
return self.make_token(TokenType.BANG);
}
},
'=' => {
if (self.match('=')) {
return self.make_token(TokenType.EQUAL_EQUAL);
} else {
return self.make_token(TokenType.EQUAL);
}
},
'<' => {
if (self.match('=')) {
return self.make_token(TokenType.LESS_EQUAL);
} else {
return self.make_token(TokenType.LESS);
}
},
'>' => {
if (self.match('=')) {
return self.make_token(TokenType.GREATER_EQUAL);
} else {
return self.make_token(TokenType.GREATER);
}
},
'"' => return self.string(),
else => self.error_token("Unexpected character."),
};
}
pub fn is_at_end(self: Scanner) bool {
return self.source.len == self.current;
}
pub fn make_token(self: Scanner, token_type: TokenType) Token {
return Token{
.token_type = token_type,
.start = self.source[self.start..],
.length = self.current - self.start,
.line = self.line,
};
}
pub fn error_token(self: Scanner, error_message: []const u8) Token {
return Token{
.token_type = TokenType.EOF,
.start = error_message,
.length = error_message.len,
.line = self.line,
};
}
pub fn advance(self: *Scanner) u8 {
self.current += 1;
return self.source[self.current - 1];
}
pub fn match(self: *Scanner, expected: u8) bool {
if (self.is_at_end()) {
return false;
}
if (self.source[self.current] != expected) {
return false;
}
self.current += 1;
return true;
}
pub fn skip_whitespace(self: *Scanner) void {
while (!self.is_at_end()) {
const c = self.peek();
switch (c) {
' ', '\r', '\t' => {
_ = self.advance();
},
'\n' => {
self.line += 1;
_ = self.advance();
},
'/' => {
if (self.peek_next() == '/') {
while (self.peek() != '\n' and !self.is_at_end()) {
_ = self.advance();
}
} else {
return;
}
},
else => return,
}
}
}
pub fn peek(self: *Scanner) u8 {
if (self.is_at_end()) {
return 0;
}
return self.source[self.current];
}
pub fn peek_next(self: *Scanner) u8 {
if (self.is_at_end()) {
return 0;
}
return self.source[self.current + 1];
}
pub fn string(self: *Scanner) Token {
while (self.peek() != '"' and !self.is_at_end()) {
if (self.peek() == '\n') {
self.line += 1;
}
_ = self.advance();
}
if (self.is_at_end()) {
return self.error_token("Unterminated string.");
}
_ = self.advance();
return self.make_token(TokenType.STRING);
}
pub fn is_digit(self: Scanner, c: u8) bool {
_ = self;
return c >= '0' and c <= '9';
}
pub fn number(self: *Scanner) Token {
while (self.is_digit(self.peek())) {
_ = self.advance();
}
if (self.peek() == '.' and self.is_digit(self.peek_next())) {
// consume the '.'
_ = self.advance();
}
while (self.is_digit(self.peek())) {
_ = self.advance();
}
return self.make_token(TokenType.NUMBER);
}
pub fn is_alpha(self: Scanner, c: u8) bool {
_ = self;
return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_';
}
pub fn identifier(self: *Scanner) Token {
while (self.is_alpha(self.peek()) or self.is_digit(self.peek())) {
_ = self.advance();
}
return self.make_token(self.identifier_type());
}
pub fn identifier_type(self: *Scanner) TokenType {
return switch (self.source[self.start]) {
'a' => self.check_keyword(1, 2, "nd", TokenType.AND),
'c' => self.check_keyword(1, 4, "class", TokenType.CLASS),
'e' => self.check_keyword(1, 3, "lse", TokenType.ELSE),
'f' => if (self.current - self.start > 1) {
return switch (self.source[self.start + 1]) {
'a' => self.check_keyword(2, 3, "lse", TokenType.FALSE),
'o' => self.check_keyword(2, 1, "r", TokenType.FOR),
'u' => self.check_keyword(2, 1, "n", TokenType.FUN),
else => TokenType.IDENTIFIER,
};
} else {
return TokenType.IDENTIFIER;
},
'i' => self.check_keyword(1, 1, "f", TokenType.IF),
'n' => self.check_keyword(1, 2, "il", TokenType.NIL),
'o' => self.check_keyword(1, 1, "or", TokenType.OR),
'p' => self.check_keyword(1, 4, "rint", TokenType.PRINT),
'r' => self.check_keyword(1, 5, "eturn", TokenType.RETURN),
's' => self.check_keyword(1, 4, "uper", TokenType.SUPER),
't' => if (self.current - self.start > 1) {
return switch (self.source[self.start + 1]) {
'h' => self.check_keyword(2, 2, "is", TokenType.THIS),
'r' => self.check_keyword(2, 2, "ue", TokenType.TRUE),
else => TokenType.IDENTIFIER,
};
} else {
return TokenType.IDENTIFIER;
},
'v' => self.check_keyword(1, 2, "ar", TokenType.VAR),
'w' => self.check_keyword(1, 4, "hile", TokenType.WHILE),
else => TokenType.IDENTIFIER,
};
}
pub fn check_keyword(self: *Scanner, start: usize, length: usize, rest: []const u8, token_type: TokenType) TokenType {
if (self.current - self.start == start + length and std.mem.eql(u8, rest, self.source[self.start + start .. self.start + start + length])) {
return token_type;
}
return TokenType.IDENTIFIER;
}
};

View File

@ -12,7 +12,7 @@ const print_value = @import("./values.zig").print_value;
const STACK_MAX = 256; const STACK_MAX = 256;
const InterpretResult = enum { pub const InterpretResult = enum {
OK, OK,
COMPILE_ERROR, COMPILE_ERROR,
RUNTIME_ERROR, RUNTIME_ERROR,
@ -66,8 +66,8 @@ pub const VM = struct {
}, },
@intFromEnum(OpCode.OP_ADD) => self.binary_op(OpCode.OP_ADD), @intFromEnum(OpCode.OP_ADD) => self.binary_op(OpCode.OP_ADD),
@intFromEnum(OpCode.OP_SUBSTRACT) => self.binary_op(OpCode.OP_SUBSTRACT), @intFromEnum(OpCode.OP_SUBSTRACT) => self.binary_op(OpCode.OP_SUBSTRACT),
@intFromEnum(OpCode.OP_MULTIPLY) => self.binary_op(OpCode.OP_SUBSTRACT), @intFromEnum(OpCode.OP_MULTIPLY) => self.binary_op(OpCode.OP_MULTIPLY),
@intFromEnum(OpCode.OP_DIVIDE) => self.binary_op(OpCode.OP_SUBSTRACT), @intFromEnum(OpCode.OP_DIVIDE) => self.binary_op(OpCode.OP_DIVIDE),
@intFromEnum(OpCode.OP_NEGATE) => { @intFromEnum(OpCode.OP_NEGATE) => {
try self.push(-self.pop()); try self.push(-self.pop());
}, },