Skip to content

Commit

Permalink
Implement the Google flat hashtable
Browse files Browse the repository at this point in the history
  • Loading branch information
devraymondsh committed Jan 26, 2024
1 parent aa9a9ad commit ecf1fa3
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 96 deletions.
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"args": ["src/drivers/js/tests/kivi.js"]
},
{
"type": "node",
"type": "lldb",
"program": "node",
"request": "launch",
"cwd": "${workspaceFolder}",
Expand Down
14 changes: 5 additions & 9 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ var target: std.Target = undefined;
var optimize: std.builtin.OptimizeMode = undefined;
var resolved_target: std.Build.ResolvedTarget = undefined;

var global_deps: [3]Dependency = undefined;
var global_deps: [2]Dependency = undefined;
const Dependency = struct {
name: []const u8,
module: *std.Build.Module,
Expand All @@ -28,16 +28,12 @@ const Dependency = struct {
b: *std.Build,
comptime name: []const u8,
comptime source: []const u8,
comptime link_module_to: ?comptime_int,
comptime n: comptime_int,
) void {
global_deps[n] = .{
.name = name,
.module = b.createModule(.{ .root_source_file = .{ .path = source } }),
};
if (link_module_to) |link_module| {
global_deps[n].module.addImport(global_deps[link_module].name, global_deps[link_module].module);
}
}
};

Expand Down Expand Up @@ -146,9 +142,8 @@ pub fn build(b: *std.Build) !void {
const tag = @tagName(target.os.tag);

// Declares dependencies
Dependency.addExternal(b, "memsimd", 0);
Dependency.addInternal(b, "Kivi", "src/core/Kivi.zig", 0, 1);
Dependency.addInternal(b, "core", "src/core/main.zig", 0, 2);
Dependency.addInternal(b, "Kivi", "src/core/Kivi.zig", 0);
Dependency.addInternal(b, "core", "src/core/main.zig", 1);

// Executes codegens
const codegen_step = b.step("codegen", "Generates bindings");
Expand Down Expand Up @@ -226,7 +221,8 @@ pub fn build(b: *std.Build) !void {
// "deno-test",
// "bun-test",
},
.{ core_build_step, drivers_build_step, ffi_tests_step },
// ffi_tests_step
.{ core_build_step, drivers_build_step },
test_step,
);

Expand Down
7 changes: 1 addition & 6 deletions build.zig.zon
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,5 @@
.version = "0.0.0",
.minimum_zig_version = "0.11.0",
.paths = .{ "src", "build.zig", "build.zig.zon" },
.dependencies = .{
.memsimd = .{
.url = "https://github.com/devraymondsh/memsimd/archive/refs/tags/v0.2.0.tar.gz",
.hash = "1220f41ad9de27c2aef3ec82aea399d66217163fe7f05af1a25f6e7d11cd2d8621ba",
},
},
.dependencies = .{},
}
217 changes: 143 additions & 74 deletions src/core/ByteMap.zig
Original file line number Diff line number Diff line change
@@ -1,141 +1,210 @@
/// This is Byte(u8) hashmap implementation that relies on the caller to handle allocations and lifetimes.
const memsimd = @import("memsimd");
const builtin = @import("builtin");
const Wyhash = @import("./Wyhash.zig");
const Math = @import("Math.zig");
const Mmap = @import("./Mmap.zig");

// Key == null and value == null : empty slot
// Key == null and value != null : tombstone
const Entry = struct {
key: ?[]u8 = null,
value: ?[]u8 = null,
key: []u8,
value: []u8,
};
const Group = struct {
elements: [16]Entry,
};
const GroupMetadata = struct {
// Full: 0xFF or 0b11111111
// Empty: 0x0 or 0b00000000
// Tombstone: 0x80 or 0b10000000
elements: [16]u8,
};

// pub const CrtlEmpty: u8 = 0b10000000;
// pub const CrtlDeleted: u8 = 0b11111110;
// pub const CrtlSentinel: u8 = 0b11111111;
// const Crtl = packed struct(u8) {
// Empty: u8 = CrtlEmpty,
// };
// fn h1(hash: usize) usize {
// return hash >> 7;
// }
// fn h2(hash: usize) Crtl {
// return hash & 0x7f;
// }

fn nosimd_eql_byte(a: []const u8, b: []const u8) bool {
return memsimd.nosimd.eql(u8, a, b);
@setRuntimeSafety(false);

if (a.len != b.len) return false;

for (a, b) |a_elem, b_elem| {
if (a_elem != b_elem) return false;
}
return true;
}
var eql_bye_nocheck: *const fn ([]const u8, []const u8) bool = nosimd_eql_byte;
fn eql_byte(a: []const u8, b: []const u8) bool {
fn simd_eql_byte(a: []const u8, b: []const u8) bool {
@setRuntimeSafety(false);

if (a.len != b.len) return false;
if (a.ptr == b.ptr) return true;
if (a.len == 0) return true;
if (a[0] != b[0]) {
return false;
}

return eql_bye_nocheck(a, b);
const rem: usize = a.len & 0xf;
const len: usize = a.len -% rem;
const ptra = a.ptr;
const ptrb = b.ptr;

var off: usize = 0;
while (off < len) : (off +%= 16) {
const xmm0: @Vector(16, u8) = ptra[off..][0..16].*;
const xmm1: @Vector(16, u8) = ptrb[off..][0..16].*;
if (!@reduce(.And, xmm0 == xmm1)) {
return false;
}
}
if (rem != 0) {
if (!nosimd_eql_byte(a[off..a.len], b[off..b.len])) {
return false;
}
}

return true;
}
var eql_byte: *const fn ([]const u8, []const u8) bool = simd_eql_byte;

fn unlikely() void {
@setCold(true);
}

const ByteMap = @This();

table: []Entry,
table: []Group,
table_metadata: []GroupMetadata,
table_size: usize,

var hasher = Wyhash.init(0);

pub fn init(self: *ByteMap, allocator: *Mmap, size_arg: usize) !void {
self.table_size = @intCast(Math.ceilPowerOfTwo(@intCast(size_arg)));
self.table = try allocator.alloc(Entry, self.table_size);
pub fn init(self: *ByteMap, allocator: *Mmap, size: usize) !void {
self.table_size = @intCast(Math.ceilPowerOfTwo(@intCast(size)));
self.table_metadata = try allocator.alloc(GroupMetadata, self.table_size);
self.table = try allocator.alloc(Group, self.table_size);

for (0..self.table_size) |idx| {
self.table[idx].key = null;
self.table[idx].value = null;
inline for (0..16) |inner_idx| {
self.table_metadata[idx].elements[inner_idx] = 0x0;
}
}
}

if (memsimd.is_x86_64) {
eql_bye_nocheck = memsimd.avx.eql_byte_nocheck;
} else if (memsimd.is_aarch64) {
eql_bye_nocheck = memsimd.sve.eql_byte_nocheck;
}
fn hash(key: []const u8) usize {
return hasher.reset_hash(@intCast(key.len), key);
}
fn hash_to_groupidx(self: *ByteMap, hashed: usize) usize {
return (hashed >> 7) & (self.table_size - 1);
}
fn hash_to_elemidx(hashed: usize) usize {
return (hashed & 0x7F) % 7;
}

fn get_index(self: *ByteMap, key: []const u8) usize {
return hasher.reset_hash(@intCast(key.len), key) & (self.table_size - 1);
fn mask_vec(metadata: @Vector(16, u8), mask: comptime_int) @Vector(16, bool) {
return metadata == @as(@Vector(16, u8), @splat(mask));
}

fn find_entry(self: *ByteMap, key: []const u8, comptime insertion: bool) ?*Entry {
var index = self.get_index(key);
var searching_second_time = false;
const FoundEntity = struct { group_idx: usize, elem_idx: u4 };
fn find_index(self: *ByteMap, key: []const u8, comptime lookup: bool) ?FoundEntity {
const hashed = hash(key);
var groupidx = self.hash_to_groupidx(hashed);
const elemidx = hash_to_elemidx(hashed);

while (true) {
const entry = &self.table[index];
const null_key = entry.key == null;
const null_value = entry.value == null;

if (!insertion) {
if (null_key) {
if (null_value) {
return null;
} else index += 1;
} else {
if (eql_byte(key, entry.key.?)) {
return entry;
} else index += 1;
var searching_second_time = false;
while (true) : (groupidx += 1) {
const group = &self.table[groupidx];
const metadata = self.table_metadata[groupidx];
const metadata_vec: @Vector(16, u8) = metadata.elements;
const isempty_vec: @Vector(16, bool) = mask_vec(metadata_vec, 0x0);
const isfull_vec: @Vector(16, bool) = mask_vec(metadata_vec, 0xFF);

if (lookup) {
for (elemidx..16) |idx| {
if (isfull_vec[idx]) {
if (eql_byte(group.elements[idx].key, key)) {
return FoundEntity{
.group_idx = groupidx,
.elem_idx = @intCast(idx),
};
}
} else if (isempty_vec[idx]) return null;
}
for (0..elemidx) |idx| {
if (isfull_vec[idx]) {
if (eql_byte(group.elements[idx].key, key)) {
return FoundEntity{
.group_idx = groupidx,
.elem_idx = @intCast(idx),
};
}
} else if (isempty_vec[idx]) return null;
}
} else {
if (null_key) {
return entry;
for (elemidx..16) |idx| {
if (!isfull_vec[idx]) {
return FoundEntity{
.group_idx = groupidx,
.elem_idx = @intCast(idx),
};
} else {
if (eql_byte(group.elements[idx].key, key)) {
return FoundEntity{
.group_idx = groupidx,
.elem_idx = @intCast(idx),
};
}
}
}
if (eql_byte(key, entry.key.?)) {
return entry;
} else {
index += 1;
for (0..elemidx) |idx| {
if (!isfull_vec[idx]) {
return FoundEntity{
.group_idx = groupidx,
.elem_idx = @intCast(idx),
};
} else {
if (eql_byte(group.elements[idx].key, key)) {
return FoundEntity{
.group_idx = groupidx,
.elem_idx = @intCast(idx),
};
}
}
}
}

if (index >= (self.table_size - 1)) {
if (groupidx >= (self.table_size - 1)) {
unlikely();
if (!searching_second_time) {
index = 0;
groupidx = 0;
searching_second_time = true;
} else break;
}
}

return null;
}

pub fn get(self: *ByteMap, key: []const u8) ?[]u8 {
const found_entry = self.find_entry(key, false);
if (found_entry) |entry| return entry.value.?;
const found_entity = self.find_index(key, true);
if (found_entity) |entity| {
return self.table[entity.group_idx].elements[entity.elem_idx].value;
}
return null;
}
pub fn del(self: *ByteMap, allocator: *Mmap, key: []const u8) ?[]u8 {
const found_entry = self.find_entry(key, false);
if (found_entry) |entry| {
allocator.free(entry.key.?);
entry.*.key = null;
return entry.value.?;
const found_entity = self.find_index(key, true);
if (found_entity) |entity| {
const entry = &self.table[entity.group_idx].elements[entity.elem_idx];
allocator.free(entry.key);

self.table_metadata[entity.group_idx].elements[entity.elem_idx] = 0x80;

return entry.value;
}
return null;
}
pub fn put(self: *ByteMap, key: []u8, value: []u8) !void {
const found_entry = self.find_entry(key, true);
if (found_entry) |entry| {
const found_entity = self.find_index(key, false);
if (found_entity) |entity| {
const entry = &self.table[entity.group_idx].elements[entity.elem_idx];
entry.key = key;
entry.value = value;
return;
}
return error.OutOfMemory;

self.table_metadata[entity.group_idx].elements[entity.elem_idx] = 0xFF;
} else return error.OutOfMemory;
}

pub fn deinit(self: *ByteMap) void {
Expand Down
6 changes: 3 additions & 3 deletions src/core/Kivi.zig
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
const MMap = @import("Mmap.zig");
const memsimd = @import("memsimd");
const ByteMap = @import("ByteMap.zig");

pub const Config = extern struct {
maximum_elements: usize = 4_100_000,
// (2 ** 18) * 16 = 4194304
group_size: usize = 262144,
mem_size: usize = 2 * 1024 * 1024 * 1024,
page_size: usize = 100 * 1024 * 1024,
};
Expand All @@ -23,7 +23,7 @@ fn stringcpy(dest: []u8, src: []const u8) !void {
pub fn init(self: *Kivi, config: *const Config) !usize {
self.mem = try MMap.init(config.mem_size, config.page_size);

try self.map.init(&self.mem, config.maximum_elements);
try self.map.init(&self.mem, config.group_size);

return @sizeOf(Kivi);
}
Expand Down
4 changes: 2 additions & 2 deletions src/core/include/kivi.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
#include <stddef.h>

struct __attribute__((aligned(8))) Kivi {
char __opaque[72];
char __opaque[88];
};

struct Config {
size_t maximum_elements;
size_t group_size;
size_t mem_size;
size_t page_size;
};
Expand Down
2 changes: 1 addition & 1 deletion src/drivers/js/codegen-generated.js
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
// This is generated by the codegen. Please don't touch this file.
export const kiviInstanceSize = 72;
export const kiviInstanceSize = 88;

0 comments on commit ecf1fa3

Please sign in to comment.