From 364cbb8bc650ff6e31df76502dab7498070269e8 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Wed, 25 Jun 2025 21:17:09 +0800
Subject: [PATCH 01/18] Add spinlock implementation using RV32A atomics

Introduce a simple spinlock implementation based on test-and-set using
RV32A atomic instructions. The spinlock API includes basic locking,
IRQ-safe variants, and versions that save and restore interrupt state.

To support atomic instructions, the Makefile is updated to enable the
'A' extension by changing the -march flag.

This is the first step toward enabling multi-core task scheduling
support on RISC-V SMP systems.
---
 arch/riscv/build.mk   |  4 +--
 arch/riscv/spinlock.h | 74 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 arch/riscv/spinlock.h

diff --git a/arch/riscv/build.mk b/arch/riscv/build.mk
index 7e51bf8..952a06d 100644
--- a/arch/riscv/build.mk
+++ b/arch/riscv/build.mk
@@ -16,10 +16,10 @@ DEFINES := -DF_CPU=$(F_CLK) \
            -DUSART_BAUD=$(SERIAL_BAUDRATE) \
            -DF_TIMER=$(F_TICK)
 
-ASFLAGS = -march=rv32imzicsr -mabi=ilp32
+ASFLAGS = -march=rv32imzaicsr -mabi=ilp32
 CFLAGS += -Wall -Wextra -Wshadow -Wno-unused-parameter -Werror
 CFLAGS += -O2 -std=gnu99
-CFLAGS += -march=rv32imzicsr -mabi=ilp32
+CFLAGS += -march=rv32imazicsr -mabi=ilp32
 CFLAGS += -mstrict-align -ffreestanding -nostdlib -fomit-frame-pointer
 CFLAGS += $(INC_DIRS) $(DEFINES) -fdata-sections -ffunction-sections
 ARFLAGS = r
diff --git a/arch/riscv/spinlock.h b/arch/riscv/spinlock.h
new file mode 100644
index 0000000..e05ca5a
--- /dev/null
+++ b/arch/riscv/spinlock.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <hal.h>
+
+/* Spinlock structure */
+typedef struct {
+    volatile uint32_t lock;
+} spinlock_t;
+
+#define SPINLOCK_INITIALIZER { 0 }
+
+/* Save and restore interrupt state */
+static inline uint32_t intr_save(void)
+{
+    uint32_t mstatus_val = read_csr(mstatus);
+    _di();
+    return mstatus_val;
+}
+
+static inline void intr_restore(uint32_t mstatus_val)
+{
+    write_csr(mstatus, mstatus_val);
+}
+
+/* CPU relax */
+static inline void cpu_relax(void)
+{
+    asm volatile("nop");
+}
+
+/* Basic spinlock API */
+static inline void spin_lock(spinlock_t *lock)
+{
+    while (__sync_lock_test_and_set(&lock->lock, 1)) {
+        while (lock->lock)
+            cpu_relax();
+    }
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+    __sync_lock_release(&lock->lock);
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+    return (__sync_lock_test_and_set(&lock->lock, 1) == 0);
+}
+
+/* IRQ-safe spinlock (no state saving) */
+static inline void spin_lock_irq(spinlock_t *lock)
+{
+    _di();
+    spin_lock(lock);
+}
+
+static inline void spin_unlock_irq(spinlock_t *lock)
+{
+    spin_unlock(lock);
+    _ei();
+}
+
+/* IRQ-safe spinlock (with state saving) */
+static inline void spin_lock_irqsave(spinlock_t *lock, uint32_t *flags)
+{
+    *flags = intr_save();
+    spin_lock(lock);
+}
+
+static inline void spin_unlock_irqrestore(spinlock_t *lock, uint32_t flags)
+{
+    spin_unlock(lock);
+    intr_restore(flags);
+}

From 0f7eb021d095cc74e3f4eac60dc213f4b67266a1 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Wed, 25 Jun 2025 22:45:11 +0800
Subject: [PATCH 02/18] Replace interrupt masking with spinlock in malloc for
 SMP support

The original malloc/free implementation used CRITICAL_ENTER() and
CRITICAL_LEAVE() to protect critical sections by simply disabling
interrupts, which is sufficient on single-core systems.

To support SMP, we replace these with a proper spinlock that uses RV32A
atomic instructions. This ensures correctness when multiple harts
access the allocator concurrently.

This change allows future task scheduling across multiple harts without
risking race conditions in the memory allocator.
---
 lib/malloc.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/lib/malloc.c b/lib/malloc.c
index 3f1f26f..7a0537e 100644
--- a/lib/malloc.c
+++ b/lib/malloc.c
@@ -1,6 +1,7 @@
 #include <lib/libc.h>
 #include <sys/task.h>
 #include <types.h>
+#include <spinlock.h>
 
 #include "private/utils.h"
 
@@ -24,6 +25,8 @@ typedef struct __memblock {
 static memblock_t *first_free;
 static void *heap_start, *heap_end;
 static uint32_t free_blocks_count; /* track fragmentation */
+static spinlock_t malloc_lock = SPINLOCK_INITIALIZER;
+static uint32_t malloc_flags = 0;
 
 /* Block manipulation macros */
 #define IS_USED(b) ((b)->size & 1L)
@@ -62,13 +65,13 @@ void free(void *ptr)
     if (!ptr)
         return;
 
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&malloc_lock, &malloc_flags);
 
     memblock_t *p = ((memblock_t *) ptr) - 1;
 
     /* Validate the block being freed */
     if (!validate_block(p) || !IS_USED(p)) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&malloc_lock, malloc_flags);
         return; /* Invalid or double-free */
     }
 
@@ -100,7 +103,7 @@ void free(void *ptr)
         free_blocks_count--;
     }
 
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&malloc_lock, malloc_flags);
 }
 
 /* Selective coalescing: only when fragmentation becomes significant */
@@ -136,7 +139,7 @@ void *malloc(uint32_t size)
     if (size < MALLOC_MIN_SIZE)
         size = MALLOC_MIN_SIZE;
 
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&malloc_lock, &malloc_flags);
 
     /* Trigger coalescing only when fragmentation is high */
     if (free_blocks_count > COALESCE_THRESHOLD)
@@ -145,7 +148,7 @@ void *malloc(uint32_t size)
     memblock_t *p = first_free;
     while (p) {
         if (!validate_block(p)) {
-            CRITICAL_LEAVE();
+            spin_unlock_irqrestore(&malloc_lock, malloc_flags);
             return NULL; /* Heap corruption detected */
         }
 
@@ -168,13 +171,13 @@ void *malloc(uint32_t size)
             if (free_blocks_count > 0)
                 free_blocks_count--;
 
-            CRITICAL_LEAVE();
+            spin_unlock_irqrestore(&malloc_lock, malloc_flags);
             return (void *) (p + 1);
         }
         p = p->next;
     }
 
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&malloc_lock, malloc_flags);
     return NULL; /* allocation failed */
 }
 

From 075a90c37a0b1041c608830917f982e7be81b0b0 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Wed, 25 Jun 2025 22:49:29 +0800
Subject: [PATCH 03/18] Replace interrupt masking with spinlock in mqueue for
 SMP support

The original message queue implementation used CRITICAL_ENTER() and
CRITICAL_LEAVE() to protect critical sections by disabling interrupts.
This was sufficient for single-core systems, where only one hart could
execute tasks.

To support SMP, we replace these macros with a proper spinlock using
RV32A atomic instructions. This ensures safe access to the internal
queue structures when multiple harts concurrently interact with message
queues.

This change eliminates potential race conditions in message queue
operations as we move toward multi-hart scheduling.
---
 kernel/mqueue.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/kernel/mqueue.c b/kernel/mqueue.c
index 88190b9..2d521a3 100644
--- a/kernel/mqueue.c
+++ b/kernel/mqueue.c
@@ -6,8 +6,13 @@
 #include <sys/mqueue.h>
 #include <sys/task.h>
 
+#include <spinlock.h>
+
 #include "private/error.h"
 
+static spinlock_t queue_lock = SPINLOCK_INITIALIZER;
+static uint32_t queue_flags = 0;
+
 mq_t *mo_mq_create(uint16_t max_items)
 {
     mq_t *mq = malloc(sizeof *mq);
@@ -24,17 +29,17 @@ mq_t *mo_mq_create(uint16_t max_items)
 
 int32_t mo_mq_destroy(mq_t *mq)
 {
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&queue_lock, &queue_flags);
 
     if (queue_count(mq->q) != 0) { /* refuse to destroy non-empty q */
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&queue_lock, queue_flags);
         return ERR_MQ_NOTEMPTY;
     }
 
     queue_destroy(mq->q);
     free(mq);
 
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&queue_lock, queue_flags);
     return ERR_OK;
 }
 
@@ -42,9 +47,9 @@ int32_t mo_mq_enqueue(mq_t *mq, message_t *msg)
 {
     int32_t rc;
 
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&queue_lock, &queue_flags);
     rc = queue_enqueue(mq->q, msg);
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&queue_lock, queue_flags);
 
     return rc; /* 0 on success, −1 on full */
 }
@@ -54,9 +59,9 @@ message_t *mo_mq_dequeue(mq_t *mq)
 {
     message_t *msg;
 
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&queue_lock, &queue_flags);
     msg = queue_dequeue(mq->q);
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&queue_lock, queue_flags);
 
     return msg; /* NULL when queue is empty */
 }
@@ -66,9 +71,9 @@ message_t *mo_mq_peek(mq_t *mq)
 {
     message_t *msg;
 
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&queue_lock, &queue_flags);
     msg = queue_peek(mq->q);
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&queue_lock, queue_flags);
 
     return msg; /* NULL when queue is empty */
 }

From c746d0ba11a89afd94ebf61789b9bb2d3510491b Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Wed, 25 Jun 2025 22:55:09 +0800
Subject: [PATCH 04/18] Replace interrupt masking with spinlock in task
 management for SMP support

The original task management code used CRITICAL_ENTER() /
CRITICAL_LEAVE() and NOSCHED_ENTER() / NOSCHED_LEAVE() to protect
critical sections by disabling interrupts, which was sufficient for
single-core systems.

To support SMP, these macros are replaced with a spinlock based on
RV32A atomic instructions. This ensures that multiple harts can safely
access and modify shared task data such as ready queues, priority
values, and task control blocks.

This change is essential for enabling multi-hart task scheduling
without introducing race conditions in the kernel task subsystem.
---
 kernel/task.c | 62 +++++++++++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 29 deletions(-)

diff --git a/kernel/task.c b/kernel/task.c
index e3b99bb..2e17a10 100644
--- a/kernel/task.c
+++ b/kernel/task.c
@@ -6,6 +6,7 @@
  */
 
 #include <hal.h>
+#include <spinlock.h>
 #include <lib/queue.h>
 #include <sys/task.h>
 
@@ -47,6 +48,9 @@ static struct {
 } task_cache[TASK_CACHE_SIZE];
 static uint8_t cache_index = 0;
 
+static spinlock_t task_lock = SPINLOCK_INITIALIZER;
+static uint32_t task_flags = 0;
+
 static inline bool is_valid_task(tcb_t *task)
 {
     return (task && task->stack && task->stack_sz >= MIN_TASK_STACK_SIZE &&
@@ -383,12 +387,12 @@ int32_t mo_task_spawn(void *task_entry, uint16_t stack_size_req)
     }
 
     /* Minimize critical section duration */
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&task_lock, &task_flags);
 
     if (!kcb->tasks) {
         kcb->tasks = list_create();
         if (!kcb->tasks) {
-            CRITICAL_LEAVE();
+            spin_unlock_irqrestore(&task_lock, task_flags);
             free(tcb->stack);
             free(tcb);
             panic(ERR_KCB_ALLOC);
@@ -397,7 +401,7 @@ int32_t mo_task_spawn(void *task_entry, uint16_t stack_size_req)
 
     list_node_t *node = list_pushback(kcb->tasks, tcb);
     if (!node) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         free(tcb->stack);
         free(tcb);
         panic(ERR_TCB_ALLOC);
@@ -410,7 +414,7 @@ int32_t mo_task_spawn(void *task_entry, uint16_t stack_size_req)
     if (!kcb->task_current)
         kcb->task_current = node;
 
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&task_lock, task_flags);
 
     /* Initialize execution context outside critical section */
     hal_context_init(&tcb->context, (size_t) tcb->stack, new_stack_size,
@@ -430,16 +434,16 @@ int32_t mo_task_cancel(uint16_t id)
     if (id == 0 || id == mo_task_id())
         return ERR_TASK_CANT_REMOVE;
 
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&task_lock, &task_flags);
     list_node_t *node = find_task_node_by_id(id);
     if (!node) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     tcb_t *tcb = node->data;
     if (!tcb || tcb->state == TASK_RUNNING) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         return ERR_TASK_CANT_REMOVE;
     }
 
@@ -459,7 +463,7 @@ int32_t mo_task_cancel(uint16_t id)
     if (kcb->last_ready_hint == node)
         kcb->last_ready_hint = NULL;
 
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&task_lock, task_flags);
 
     /* Free memory outside critical section */
     free(tcb->stack);
@@ -478,16 +482,16 @@ void mo_task_delay(uint16_t ticks)
     if (!ticks)
         return;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&task_lock, &task_flags);
     if (unlikely(!kcb || !kcb->task_current || !kcb->task_current->data)) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         return;
     }
 
     tcb_t *self = kcb->task_current->data;
     self->delay = ticks;
     self->state = TASK_BLOCKED;
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&task_lock, task_flags);
 
     mo_task_yield();
 }
@@ -497,17 +501,17 @@ int32_t mo_task_suspend(uint16_t id)
     if (id == 0)
         return ERR_TASK_NOT_FOUND;
 
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&task_lock, &task_flags);
     list_node_t *node = find_task_node_by_id(id);
     if (!node) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     tcb_t *task = node->data;
     if (!task || (task->state != TASK_READY && task->state != TASK_RUNNING &&
                   task->state != TASK_BLOCKED)) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         return ERR_TASK_CANT_SUSPEND;
     }
 
@@ -518,7 +522,7 @@ int32_t mo_task_suspend(uint16_t id)
     if (kcb->last_ready_hint == node)
         kcb->last_ready_hint = NULL;
 
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&task_lock, task_flags);
 
     if (is_current)
         mo_task_yield();
@@ -531,21 +535,21 @@ int32_t mo_task_resume(uint16_t id)
     if (id == 0)
         return ERR_TASK_NOT_FOUND;
 
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&task_lock, &task_flags);
     list_node_t *node = find_task_node_by_id(id);
     if (!node) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     tcb_t *task = node->data;
     if (!task || task->state != TASK_SUSPENDED) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         return ERR_TASK_CANT_RESUME;
     }
 
     task->state = TASK_READY;
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&task_lock, task_flags);
     return ERR_OK;
 }
 
@@ -554,22 +558,22 @@ int32_t mo_task_priority(uint16_t id, uint16_t priority)
     if (id == 0 || !is_valid_priority(priority))
         return ERR_TASK_INVALID_PRIO;
 
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&task_lock, &task_flags);
     list_node_t *node = find_task_node_by_id(id);
     if (!node) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     tcb_t *task = node->data;
     if (!task) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     uint8_t base = (uint8_t) (priority >> 8);
     task->prio = ((uint16_t) base << 8) | base;
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&task_lock, task_flags);
 
     return ERR_OK;
 }
@@ -579,21 +583,21 @@ int32_t mo_task_rt_priority(uint16_t id, void *priority)
     if (id == 0)
         return ERR_TASK_NOT_FOUND;
 
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&task_lock, &task_flags);
     list_node_t *node = find_task_node_by_id(id);
     if (!node) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     tcb_t *task = node->data;
     if (!task) {
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&task_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     task->rt_prio = priority;
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&task_lock, task_flags);
     return ERR_OK;
 }
 
@@ -609,9 +613,9 @@ int32_t mo_task_idref(void *task_entry)
     if (!task_entry || !kcb->tasks)
         return ERR_TASK_NOT_FOUND;
 
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&task_lock, &task_flags);
     list_node_t *node = list_foreach(kcb->tasks, refcmp, task_entry);
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&task_lock, task_flags);
 
     return node ? ((tcb_t *) node->data)->id : ERR_TASK_NOT_FOUND;
 }

From 3f9d1bb500d05b555fce0b4655f020f7e1e964b8 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Wed, 25 Jun 2025 22:58:12 +0800
Subject: [PATCH 05/18] Replace interrupt masking with spinlock in pipe for SMP
 support

The original pipe implementation used CRITICAL_ENTER() and
CRITICAL_LEAVE() to protect critical sections by disabling interrupts,
which was acceptable for single-core systems.

To support SMP, these macros are replaced with a proper spinlock based
on RV32A atomic instructions. This ensures safe concurrent access to
the circular buffer used by the pipe, even when multiple harts are
performing read or write operations simultaneously.

This change is necessary to avoid race conditions and ensure correct
pipe behavior under multi-hart task scheduling.
---
 kernel/pipe.c | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/kernel/pipe.c b/kernel/pipe.c
index 03a57f2..a463058 100644
--- a/kernel/pipe.c
+++ b/kernel/pipe.c
@@ -1,11 +1,16 @@
 #include <lib/libc.h>
+#include <hal.h>
+#include <spinlock.h>
 #include <sys/pipe.h>
 #include <sys/task.h>
 
 #include "private/error.h"
 #include "private/utils.h"
 
-static inline bool pipe_is_empty(const pipe_t *p)
+static spinlock_t pipe_lock = SPINLOCK_INITIALIZER;
+static uint32_t pipe_flags = 0;
+
+static inline int pipe_is_empty(const pipe_t *p)
 {
     return p->used == 0;
 }
@@ -64,9 +69,9 @@ int32_t mo_pipe_destroy(pipe_t *p)
 
 void mo_pipe_flush(pipe_t *p)
 {
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&pipe_lock, &pipe_flags);
     p->head = p->tail = p->used = 0;
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&pipe_lock, pipe_flags);
 }
 
 int32_t mo_pipe_size(pipe_t *p)
@@ -78,13 +83,13 @@ int32_t mo_pipe_size(pipe_t *p)
 static void pipe_wait_until_readable(pipe_t *p)
 {
     while (1) {
-        CRITICAL_ENTER();
+        spin_lock_irqsave(&pipe_lock, &pipe_flags);
         if (!pipe_is_empty(p)) {
-            CRITICAL_LEAVE();
+            spin_unlock_irqrestore(&pipe_lock, pipe_flags);
             return;
         }
         /* nothing to read – drop critical section and yield CPU */
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&pipe_lock, pipe_flags);
         mo_task_wfi();
     }
 }
@@ -92,13 +97,13 @@ static void pipe_wait_until_readable(pipe_t *p)
 static void pipe_wait_until_writable(pipe_t *p)
 {
     while (1) {
-        CRITICAL_ENTER();
+        spin_lock_irqsave(&pipe_lock, &pipe_flags);
         if (!pipe_is_full(p)) {
-            CRITICAL_LEAVE();
+            spin_unlock_irqrestore(&pipe_lock, pipe_flags);
             return;
         }
         /* buffer full – yield until space is available */
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&pipe_lock, pipe_flags);
         mo_task_wfi();
     }
 }
@@ -113,9 +118,9 @@ int32_t mo_pipe_read(pipe_t *p, char *dst, uint16_t len)
     while (i < len) {
         pipe_wait_until_readable(p);
 
-        CRITICAL_ENTER();
+        spin_lock_irqsave(&pipe_lock, &pipe_flags);
         dst[i++] = pipe_get_byte(p);
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&pipe_lock, pipe_flags);
     }
     return i;
 }
@@ -129,9 +134,9 @@ int32_t mo_pipe_write(pipe_t *p, const char *src, uint16_t len)
     while (i < len) {
         pipe_wait_until_writable(p);
 
-        CRITICAL_ENTER();
+        spin_lock_irqsave(&pipe_lock, &pipe_flags);
         pipe_put_byte(p, src[i++]);
-        CRITICAL_LEAVE();
+        spin_unlock_irqrestore(&pipe_lock, pipe_flags);
     }
     return i;
 }
@@ -143,10 +148,10 @@ int32_t mo_pipe_nbread(pipe_t *p, char *dst, uint16_t len)
         return ERR_FAIL;
 
     uint16_t i = 0;
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&pipe_lock, &pipe_flags);
     while (i < len && !pipe_is_empty(p))
         dst[i++] = pipe_get_byte(p);
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&pipe_lock, pipe_flags);
 
     return i;
 }
@@ -157,10 +162,10 @@ int32_t mo_pipe_nbwrite(pipe_t *p, const char *src, uint16_t len)
         return ERR_FAIL;
 
     uint16_t i = 0;
-    CRITICAL_ENTER();
+    spin_lock_irqsave(&pipe_lock, &pipe_flags);
     while (i < len && !pipe_is_full(p))
         pipe_put_byte(p, src[i++]);
-    CRITICAL_LEAVE();
+    spin_unlock_irqrestore(&pipe_lock, pipe_flags);
 
     return i;
 }

From 9e4bdb76fb1a8a627ac94f4f2fb6a9af9aa33060 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Wed, 25 Jun 2025 23:06:19 +0800
Subject: [PATCH 06/18] Replace interrupt masking with spinlock in semaphore
 for SMP support

The original semaphore implementation used NOSCHED_ENTER() and
NOSCHED_LEAVE() to protect critical sections by disabling interrupts,
which was sufficient in single-core environments.

To support SMP, we replace these macros with a spinlock based on RV32A
atomic instructions. This ensures safe access to shared semaphore
state, including the count and wait queue, when multiple harts operate
concurrently.

This change is necessary to avoid race conditions during mo_sem_wait(),
mo_sem_signal(), and other semaphore operations under multi-hart
scheduling.
---
 kernel/semaphore.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 6e99b49..4e6329a 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -8,6 +8,7 @@
  */
 
 #include <hal.h>
+#include <spinlock.h>
 #include <sys/semaphore.h>
 #include <sys/task.h>
 
@@ -24,6 +25,9 @@ struct sem_t {
 /* Magic number for semaphore validation */
 #define SEM_MAGIC 0x53454D00 /* "SEM\0" */
 
+static spinlock_t semaphore_lock = SPINLOCK_INITIALIZER;
+static uint32_t semaphore_flags = 0;
+
 static inline bool sem_is_valid(const sem_t *s)
 {
     return (s && s->magic == SEM_MAGIC && s->wait_q);
@@ -68,11 +72,11 @@ int32_t mo_sem_destroy(sem_t *s)
     if (!sem_is_valid(s))
         return ERR_FAIL;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&semaphore_lock, &semaphore_flags);
 
     /* Check if any tasks are waiting - unsafe to destroy if so */
     if (queue_count(s->wait_q) > 0) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&semaphore_lock, semaphore_flags);
         return ERR_TASK_BUSY;
     }
 
@@ -81,7 +85,7 @@ int32_t mo_sem_destroy(sem_t *s)
     queue_t *wait_q = s->wait_q;
     s->wait_q = NULL;
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&semaphore_lock, semaphore_flags);
 
     /* Clean up resources outside critical section */
     queue_destroy(wait_q);
@@ -96,19 +100,19 @@ void mo_sem_wait(sem_t *s)
         panic(ERR_SEM_OPERATION);
     }
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&semaphore_lock, &semaphore_flags);
 
     /* Fast path: resource available and no waiters (preserves FIFO) */
     if (s->count > 0 && queue_count(s->wait_q) == 0) {
         s->count--;
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&semaphore_lock, semaphore_flags);
         return;
     }
 
     /* Slow path: must wait for resource */
     /* Verify wait queue has capacity (should never fail for valid semaphore) */
     if (queue_count(s->wait_q) >= s->max_waiters) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&semaphore_lock, semaphore_flags);
         panic(ERR_SEM_OPERATION); /* Queue overflow - system error */
     }
 
@@ -133,7 +137,7 @@ int32_t mo_sem_trywait(sem_t *s)
 
     int32_t result = ERR_FAIL;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&semaphore_lock, &semaphore_flags);
 
     /* Only succeed if resource is available AND no waiters (preserves FIFO) */
     if (s->count > 0 && queue_count(s->wait_q) == 0) {
@@ -141,7 +145,7 @@ int32_t mo_sem_trywait(sem_t *s)
         result = ERR_OK;
     }
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&semaphore_lock, semaphore_flags);
     return result;
 }
 
@@ -155,7 +159,7 @@ void mo_sem_signal(sem_t *s)
     bool should_yield = false;
     tcb_t *awakened_task = NULL;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&semaphore_lock, &semaphore_flags);
 
     /* Check if any tasks are waiting */
     if (queue_count(s->wait_q) > 0) {
@@ -181,7 +185,7 @@ void mo_sem_signal(sem_t *s)
         /* Silently ignore overflow - semaphore remains at max count */
     }
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&semaphore_lock, semaphore_flags);
 
     /* Yield outside critical section to allow awakened task to run.
      * This improves responsiveness if the awakened task has higher priority.
@@ -209,9 +213,9 @@ int32_t mo_sem_waiting_count(sem_t *s)
 
     int32_t count;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&semaphore_lock, &semaphore_flags);
     count = queue_count(s->wait_q);
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&semaphore_lock, semaphore_flags);
 
     return count;
 }

From b42d2f15b3d93de7de448ac89925832573ab572e Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Wed, 25 Jun 2025 23:10:57 +0800
Subject: [PATCH 07/18] Replace interrupt masking with spinlock in timer for
 SMP support

The timer subsystem originally used NOSCHED_ENTER() and NOSCHED_LEAVE()
to disable interrupts when accessing shared timer state, which sufficed
on single-core systems.

To support SMP, we now replace these macros with a spinlock based on
RV32A atomic instructions. This ensures safe concurrent access to
global timer state such as timer_initialized, the timer list, and ID
management.

This change prepares the timer subsystem for correct operation when
multiple harts simultaneously create, start, or cancel timers.
---
 kernel/timer.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/kernel/timer.c b/kernel/timer.c
index 63abe69..99394f8 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -8,6 +8,7 @@
  */
 
 #include <hal.h>
+#include <spinlock.h>
 #include <lib/list.h>
 #include <lib/malloc.h>
 #include <sys/task.h>
@@ -31,6 +32,9 @@ static struct {
 } timer_cache[4];
 static uint8_t timer_cache_index = 0;
 
+static spinlock_t timer_lock = SPINLOCK_INITIALIZER;
+static uint32_t timer_flags = 0;
+
 /* Get a node from the pool, fall back to malloc if pool is empty */
 static list_node_t *get_timer_node(void)
 {
@@ -82,9 +86,9 @@ static int32_t timer_subsystem_init(void)
     if (timer_initialized)
         return ERR_OK;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&timer_lock, &timer_flags);
     if (timer_initialized) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&timer_lock, timer_flags);
         return ERR_OK;
     }
 
@@ -95,7 +99,7 @@ static int32_t timer_subsystem_init(void)
         free(all_timers_list);
         free(kcb->timer_list);
         kcb->timer_list = NULL;
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&timer_lock, timer_flags);
         return ERR_FAIL;
     }
 
@@ -106,7 +110,7 @@ static int32_t timer_subsystem_init(void)
     }
 
     timer_initialized = true;
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&timer_lock, timer_flags);
     return ERR_OK;
 }
 
@@ -284,7 +288,7 @@ int32_t mo_timer_create(void *(*callback)(void *arg),
     if (!t)
         return ERR_FAIL;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&timer_lock, &timer_flags);
 
     /* Initialize timer */
     t->id = next_id++;
@@ -296,7 +300,7 @@ int32_t mo_timer_create(void *(*callback)(void *arg),
 
     /* Insert into sorted all_timers_list */
     if (timer_insert_sorted_by_id(t) != ERR_OK) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&timer_lock, timer_flags);
         free(t);
         return ERR_FAIL;
     }
@@ -304,7 +308,7 @@ int32_t mo_timer_create(void *(*callback)(void *arg),
     /* Add to cache */
     cache_timer(t->id, t);
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&timer_lock, timer_flags);
     return t->id;
 }
 
@@ -313,11 +317,11 @@ int32_t mo_timer_destroy(uint16_t id)
     if (!timer_initialized)
         return ERR_FAIL;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&timer_lock, &timer_flags);
 
     list_node_t *node = timer_find_node_by_id(id);
     if (!node) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&timer_lock, timer_flags);
         return ERR_FAIL;
     }
 
@@ -340,7 +344,7 @@ int32_t mo_timer_destroy(uint16_t id)
     free(t);
     return_timer_node(node);
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&timer_lock, timer_flags);
     return ERR_OK;
 }
 
@@ -351,11 +355,11 @@ int32_t mo_timer_start(uint16_t id, uint8_t mode)
     if (!timer_initialized)
         return ERR_FAIL;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&timer_lock, &timer_flags);
 
     timer_t *t = timer_find_by_id_fast(id);
     if (!t) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&timer_lock, timer_flags);
         return ERR_FAIL;
     }
 
@@ -369,11 +373,11 @@ int32_t mo_timer_start(uint16_t id, uint8_t mode)
 
     if (timer_sorted_insert(t) != ERR_OK) {
         t->mode = TIMER_DISABLED;
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&timer_lock, timer_flags);
         return ERR_FAIL;
     }
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&timer_lock, timer_flags);
     return ERR_OK;
 }
 
@@ -382,17 +386,17 @@ int32_t mo_timer_cancel(uint16_t id)
     if (!timer_initialized)
         return ERR_FAIL;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&timer_lock, &timer_flags);
 
     timer_t *t = timer_find_by_id_fast(id);
     if (!t || t->mode == TIMER_DISABLED) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&timer_lock, timer_flags);
         return ERR_FAIL;
     }
 
     timer_remove_item_by_data(kcb->timer_list, t);
     t->mode = TIMER_DISABLED;
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&timer_lock, timer_flags);
     return ERR_OK;
 }

From 8863bcbb0cafd9d9ab826f0cf731ee2957b1add7 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Wed, 25 Jun 2025 23:15:39 +0800
Subject: [PATCH 08/18] Replace interrupt masking with spinlock in mutex for
 SMP support

The mutex and condition variable implementation previously relied on
NOSCHED_ENTER() and NOSCHED_LEAVE() to protect critical sections by
disabling interrupts. This works in single-core environments but breaks
down under SMP due to race conditions between harts.

This patch replaces those macros with a spinlock built using RV32A
atomic instructions. The spinlock protects access to shared state
including mutex ownership, waiter lists, and condition wait queues.

This change ensures correct mutual exclusion and atomicity when
multiple harts concurrently lock/unlock mutexes or signal condition
variables.
---
 kernel/mutex.c | 88 ++++++++++++++++++++++++++------------------------
 1 file changed, 46 insertions(+), 42 deletions(-)

diff --git a/kernel/mutex.c b/kernel/mutex.c
index 4319911..05a9c3d 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -4,12 +4,16 @@
  * that are independent of the semaphore module.
  */
 
+#include <spinlock.h>
 #include <lib/libc.h>
 #include <sys/mutex.h>
 #include <sys/task.h>
 
 #include "private/error.h"
 
+static spinlock_t mutex_lock = SPINLOCK_INITIALIZER;
+static uint32_t mutex_flags = 0;
+
 /* Validate mutex pointer and structure integrity */
 static inline bool mutex_is_valid(const mutex_t *m)
 {
@@ -87,17 +91,17 @@ int32_t mo_mutex_destroy(mutex_t *m)
     if (!mutex_is_valid(m))
         return ERR_FAIL;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
 
     /* Check if any tasks are waiting */
     if (!list_is_empty(m->waiters)) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         return ERR_TASK_BUSY;
     }
 
     /* Check if mutex is still owned */
     if (m->owner_tid != 0) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         return ERR_TASK_BUSY;
     }
 
@@ -107,7 +111,7 @@ int32_t mo_mutex_destroy(mutex_t *m)
     m->waiters = NULL;
     m->owner_tid = 0;
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
 
     list_destroy(waiters);
     return ERR_OK;
@@ -120,18 +124,18 @@ int32_t mo_mutex_lock(mutex_t *m)
 
     uint16_t self_tid = mo_task_id();
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
 
     /* Non-recursive: reject if caller already owns it */
     if (m->owner_tid == self_tid) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         return ERR_TASK_BUSY;
     }
 
     /* Fast path: mutex is free, acquire immediately */
     if (m->owner_tid == 0) {
         m->owner_tid = self_tid;
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         return ERR_OK;
     }
 
@@ -151,7 +155,7 @@ int32_t mo_mutex_trylock(mutex_t *m)
     uint16_t self_tid = mo_task_id();
     int32_t result = ERR_TASK_BUSY;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
 
     if (m->owner_tid == self_tid) {
         /* Already owned by caller (non-recursive) */
@@ -163,7 +167,7 @@ int32_t mo_mutex_trylock(mutex_t *m)
     }
     /* else: owned by someone else, return ERR_TASK_BUSY */
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
     return result;
 }
 
@@ -178,37 +182,37 @@ int32_t mo_mutex_timedlock(mutex_t *m, uint32_t ticks)
     uint16_t self_tid = mo_task_id();
     uint32_t deadline = mo_ticks() + ticks;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
 
     /* Non-recursive check */
     if (m->owner_tid == self_tid) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         return ERR_TASK_BUSY;
     }
 
     /* Fast path: mutex is free */
     if (m->owner_tid == 0) {
         m->owner_tid = self_tid;
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         return ERR_OK;
     }
 
     /* Must block with timeout */
     tcb_t *self = kcb->task_current->data;
     if (!list_pushback(m->waiters, self)) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         panic(ERR_SEM_OPERATION);
     }
     self->state = TASK_BLOCKED;
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
 
     /* Wait loop with timeout check */
     while (self->state == TASK_BLOCKED && mo_ticks() < deadline)
         mo_task_yield();
 
     int32_t status;
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
 
     if (self->state == TASK_BLOCKED) {
         /* Timeout occurred - remove ourselves from wait list */
@@ -224,7 +228,7 @@ int32_t mo_mutex_timedlock(mutex_t *m, uint32_t ticks)
         status = ERR_OK;
     }
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
     return status;
 }
 
@@ -235,11 +239,11 @@ int32_t mo_mutex_unlock(mutex_t *m)
 
     uint16_t self_tid = mo_task_id();
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
 
     /* Verify caller owns the mutex */
     if (m->owner_tid != self_tid) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         return ERR_NOT_OWNER;
     }
 
@@ -265,7 +269,7 @@ int32_t mo_mutex_unlock(mutex_t *m)
         }
     }
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
     return ERR_OK;
 }
 
@@ -283,9 +287,9 @@ int32_t mo_mutex_waiting_count(mutex_t *m)
         return -1;
 
     int32_t count;
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
     count = m->waiters ? (int32_t) m->waiters->length : 0;
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
 
     return count;
 }
@@ -317,11 +321,11 @@ int32_t mo_cond_destroy(cond_t *c)
     if (!cond_is_valid(c))
         return ERR_FAIL;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
 
     /* Check if any tasks are waiting */
     if (!list_is_empty(c->waiters)) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         return ERR_TASK_BUSY;
     }
 
@@ -330,7 +334,7 @@ int32_t mo_cond_destroy(cond_t *c)
     list_t *waiters = c->waiters;
     c->waiters = NULL;
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
 
     list_destroy(waiters);
     return ERR_OK;
@@ -348,27 +352,27 @@ int32_t mo_cond_wait(cond_t *c, mutex_t *m)
         return ERR_NOT_OWNER;
 
     /* Atomically add to wait list and block */
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
     tcb_t *self = kcb->task_current->data;
     if (!list_pushback(c->waiters, self)) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         panic(ERR_SEM_OPERATION);
     }
     self->state = TASK_BLOCKED;
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
 
     /* Release mutex and yield */
     int32_t unlock_result = mo_mutex_unlock(m);
     if (unlock_result != ERR_OK) {
         /* Failed to unlock - remove from wait list */
-        NOSCHED_ENTER();
+        spin_lock_irqsave(&mutex_lock, &mutex_flags);
         list_node_t *self_node = find_node_by_data(c->waiters, self);
         if (self_node) {
             list_remove(c->waiters, self_node);
             free(self_node);
         }
         self->state = TASK_READY;
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         return unlock_result;
     }
 
@@ -394,27 +398,27 @@ int32_t mo_cond_timedwait(cond_t *c, mutex_t *m, uint32_t ticks)
     uint32_t deadline = mo_ticks() + ticks;
 
     /* Atomically add to wait list */
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
     tcb_t *self = kcb->task_current->data;
     if (!list_pushback(c->waiters, self)) {
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         panic(ERR_SEM_OPERATION);
     }
     self->state = TASK_BLOCKED;
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
 
     /* Release mutex */
     int32_t unlock_result = mo_mutex_unlock(m);
     if (unlock_result != ERR_OK) {
         /* Failed to unlock - cleanup */
-        NOSCHED_ENTER();
+        spin_lock_irqsave(&mutex_lock, &mutex_flags);
         list_node_t *self_node = find_node_by_data(c->waiters, self);
         if (self_node) {
             list_remove(c->waiters, self_node);
             free(self_node);
         }
         self->state = TASK_READY;
-        NOSCHED_LEAVE();
+        spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         return unlock_result;
     }
 
@@ -424,7 +428,7 @@ int32_t mo_cond_timedwait(cond_t *c, mutex_t *m, uint32_t ticks)
     }
 
     int32_t wait_status;
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
 
     if (self->state == TASK_BLOCKED) {
         /* Timeout - remove from wait list */
@@ -440,7 +444,7 @@ int32_t mo_cond_timedwait(cond_t *c, mutex_t *m, uint32_t ticks)
         wait_status = ERR_OK;
     }
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
 
     /* Re-acquire mutex regardless of timeout status */
     int32_t lock_result = mo_mutex_lock(m);
@@ -454,7 +458,7 @@ int32_t mo_cond_signal(cond_t *c)
     if (!cond_is_valid(c))
         return ERR_FAIL;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
 
     if (!list_is_empty(c->waiters)) {
         tcb_t *waiter = (tcb_t *) list_pop(c->waiters);
@@ -469,7 +473,7 @@ int32_t mo_cond_signal(cond_t *c)
         }
     }
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
     return ERR_OK;
 }
 
@@ -478,7 +482,7 @@ int32_t mo_cond_broadcast(cond_t *c)
     if (!cond_is_valid(c))
         return ERR_FAIL;
 
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
 
     while (!list_is_empty(c->waiters)) {
         tcb_t *waiter = (tcb_t *) list_pop(c->waiters);
@@ -493,7 +497,7 @@ int32_t mo_cond_broadcast(cond_t *c)
         }
     }
 
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
     return ERR_OK;
 }
 
@@ -503,9 +507,9 @@ int32_t mo_cond_waiting_count(cond_t *c)
         return -1;
 
     int32_t count;
-    NOSCHED_ENTER();
+    spin_lock_irqsave(&mutex_lock, &mutex_flags);
     count = c->waiters ? (int32_t) c->waiters->length : 0;
-    NOSCHED_LEAVE();
+    spin_unlock_irqrestore(&mutex_lock, mutex_flags);
 
     return count;
 }

From 34fdc562a3c3df13cd3725d31a868fa87015e868 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Sun, 29 Jun 2025 22:02:02 +0800
Subject: [PATCH 09/18] Protect printf with spinlock to prevent interleaved
 output on SMP

On SMP systems, concurrent calls to printf() from multiple harts can
cause interleaved and unreadable output due to racing writes to the
shared output buffer.

Add a spinlock to serialize access to printf(), ensuring that only one
hart writes at a time.

This change improves the readability of debug messages and prevents
garbled output when multiple harts are active.
---
 lib/libc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lib/libc.c b/lib/libc.c
index de0bd35..d0a693b 100644
--- a/lib/libc.c
+++ b/lib/libc.c
@@ -1,5 +1,6 @@
 #include <lib/libc.h>
 #include <stdarg.h>
+#include <spinlock.h>
 
 #include "private/stdio.h"
 #include "private/utils.h"
@@ -896,15 +897,20 @@ static int vsprintf(char **buf, const char *fmt, va_list args)
     return len;
 }
 
+
+static spinlock_t printf_lock = SPINLOCK_INITIALIZER;
+static uint32_t printf_flags = 0;
 /* Formatted output to stdout. */
 int32_t printf(const char *fmt, ...)
 {
     va_list args;
     int32_t v;
 
+    spin_lock_irqsave(&printf_lock, &printf_flags);
     va_start(args, fmt);
     v = vsprintf(0, fmt, args);
     va_end(args);
+    spin_unlock_irqrestore(&printf_lock, printf_flags);
     return v;
 }
 

From bd65251d97b82f9469cdc49ee98fae39a275bbbc Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Wed, 25 Jun 2025 23:18:02 +0800
Subject: [PATCH 10/18] Remove obsolete NOSCHED_ENTER/LEAVE and
 CRITICAL_ENTER/LEAVE macros

All calls to NOSCHED_ENTER(), NOSCHED_LEAVE(), CRITICAL_ENTER(), and
CRITICAL_LEAVE() have been replaced with spinlock-based synchronization
primitives throughout the kernel.

As a result, these macros are no longer used and have been removed from
include/sys/task.h to clean up the codebase and avoid confusion.
---
 include/sys/task.h | 33 ---------------------------------
 1 file changed, 33 deletions(-)

diff --git a/include/sys/task.h b/include/sys/task.h
index f6385ea..2443438 100644
--- a/include/sys/task.h
+++ b/include/sys/task.h
@@ -108,39 +108,6 @@ extern kcb_t *kcb;
 /* Task lookup cache size for frequently accessed tasks */
 #define TASK_CACHE_SIZE 4
 
-/* Disables/enables ALL maskable interrupts globally.
- * This provides the strongest protection against concurrency from both other
- * tasks and all ISRs. Use this when modifying data shared with any ISR.
- * WARNING: This increases interrupt latency. Use NOSCHED macros if protection
- * is only needed against task preemption.
- */
-#define CRITICAL_ENTER()     \
-    do {                     \
-        if (kcb->preemptive) \
-            _di();           \
-    } while (0)
-#define CRITICAL_LEAVE()     \
-    do {                     \
-        if (kcb->preemptive) \
-            _ei();           \
-    } while (0)
-
-/* Disables/enables ONLY the scheduler timer interrupt.
- * This is a lighter-weight critical section that prevents task preemption but
- * allows other hardware interrupts (e.g., UART) to be serviced, minimizing
- * latency. Use this when protecting data shared between tasks.
- */
-#define NOSCHED_ENTER()          \
-    do {                         \
-        if (kcb->preemptive)     \
-            hal_timer_disable(); \
-    } while (0)
-#define NOSCHED_LEAVE()         \
-    do {                        \
-        if (kcb->preemptive)    \
-            hal_timer_enable(); \
-    } while (0)
-
 /* Core Kernel and Task Management API */
 
 /* Prints a fatal error message and halts the system. */

From bec9d50e9ceec240dc6fcca3b10c5a83a5cfbce3 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Sun, 29 Jun 2025 21:46:15 +0800
Subject: [PATCH 11/18] Add per-hart stack allocation in RISC-V boot for SMP
 support

To support SMP, allocate separate stack memory regions for each hart
during boot.

This patch modifies the assembly entry code in arch/riscv/boot.c to
compute the initial stack pointer based on the hart ID, ensuring each
hart uses a distinct stack area of fixed size (STACK_SIZE_PER_HART).

This enables multiple harts to safely run concurrently without stack
collisions during early boot stages.
---
 arch/riscv/boot.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/boot.c b/arch/riscv/boot.c
index 0d8f876..852df4f 100644
--- a/arch/riscv/boot.c
+++ b/arch/riscv/boot.c
@@ -14,6 +14,8 @@
 extern uint32_t _gp, _stack, _end;
 extern uint32_t _sbss, _ebss;
 
+#define STACK_SIZE_PER_HART 524288
+
 /* C entry points */
 void main(void);
 void do_trap(uint32_t cause, uint32_t epc);
@@ -29,6 +31,12 @@ __attribute__((naked, section(".text.prologue"))) void _entry(void)
         /* Initialize Global Pointer (gp) and Stack Pointer (sp). */
         "la     gp, _gp\n"
         "la     sp, _stack\n"
+        /* Set up stack for each hart */
+        "csrr   t0, mhartid\n"               /* t0 = hartid */
+        "la     t1, _stack_top\n"            /* t1 = base address of full stack region (top) */
+        "li     t2, %2\n"                    /* t2 = per-hart stack size */
+        "mul    t0, t0, t2\n"                /* t0 = hartid * STACK_SIZE_PER_HART */
+        "sub    sp, t1, t0\n"                /* sp = _stack_top - hartid * stack_size */
 
         /* Initialize Thread Pointer (tp). The ABI requires tp to point to
          * a 64-byte aligned memory region for thread-local storage. Here, we
@@ -89,7 +97,7 @@ __attribute__((naked, section(".text.prologue"))) void _entry(void)
         "j      .Lpark_hart\n"
 
         : /* no outputs */
-        : "i"(MSTATUS_MPP_MACH), "i"(MIE_MEIE)
+        : "i"(MSTATUS_MPP_MACH), "i"(MIE_MEIE), "i"(STACK_SIZE_PER_HART)
         : "memory");
 }
 

From d7658228888a676e6aa0d7888bc4218fa017092b Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 27 Jun 2025 22:25:18 +0800
Subject: [PATCH 12/18] Remove hart parking and add spinlock synchronization
 during boot for SMP

Remove the old logic that parks all secondary harts in WFI, which
caused them to hang indefinitely. Instead, all harts proceed with boot.

To ensure proper initialization sequence, hart 0 performs hardware
setup, heap initialization, and task creation. Other harts spin-wait on
a spinlock-protected flag until hart 0 finishes initialization before
starting task dispatch.
---
 arch/riscv/boot.c |  9 +--------
 kernel/main.c     | 41 ++++++++++++++++++++++++++++++++---------
 2 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/arch/riscv/boot.c b/arch/riscv/boot.c
index 852df4f..5259981 100644
--- a/arch/riscv/boot.c
+++ b/arch/riscv/boot.c
@@ -70,10 +70,6 @@ __attribute__((naked, section(".text.prologue"))) void _entry(void)
         "csrw   mideleg, zero\n" /* No interrupt delegation to S-mode */
         "csrw   medeleg, zero\n" /* No exception delegation to S-mode */
 
-        /* Park secondary harts (cores). */
-        "csrr   t0, mhartid\n"
-        "bnez   t0, .Lpark_hart\n"
-
         /* Set the machine trap vector (mtvec) to point to our ISR. */
         "la     t0, _isr\n"
         "csrw   mtvec, t0\n"
@@ -87,15 +83,12 @@ __attribute__((naked, section(".text.prologue"))) void _entry(void)
         "csrw   mie, t0\n"
 
         /* Jump to the C-level main function. */
+        "csrr   a0, mhartid\n"
         "call   main\n"
 
         /* If main() ever returns, it is a fatal error. */
         "call   hal_panic\n"
 
-        ".Lpark_hart:\n"
-        "wfi\n"
-        "j      .Lpark_hart\n"
-
         : /* no outputs */
         : "i"(MSTATUS_MPP_MACH), "i"(MIE_MEIE), "i"(STACK_SIZE_PER_HART)
         : "memory");
diff --git a/kernel/main.c b/kernel/main.c
index efa46ff..e39a69a 100644
--- a/kernel/main.c
+++ b/kernel/main.c
@@ -1,9 +1,13 @@
 #include <hal.h>
+#include <spinlock.h>
 #include <lib/libc.h>
 #include <sys/task.h>
 
 #include "private/error.h"
 
+static volatile bool finish = false;
+static spinlock_t finish_lock = SPINLOCK_INITIALIZER;
+
 /* C-level entry point for the kernel.
  *
  * This function is called from the boot code ('_entry'). It is responsible for
@@ -12,21 +16,36 @@
  *
  * Under normal operation, this function never returns.
  */
-int32_t main(void)
+int32_t main(int32_t hartid)
 {
     /* Initialize hardware abstraction layer and memory heap. */
     hal_hardware_init();
 
-    printf("Linmo kernel is starting...\n");
+    if (hartid == 0) {
+        printf("Linmo kernel is starting...\n");
+
+        mo_heap_init((void *) &_heap_start, (size_t) &_heap_size);
+        printf("Heap initialized, %u bytes available\n",
+            (unsigned int) (size_t) &_heap_size);
+
+        /* Call the application's main entry point to create initial tasks. */
+        kcb->preemptive = (bool) app_main();
+        printf("Scheduler mode: %s\n",
+            kcb->preemptive ? "Preemptive" : "Cooperative");
 
-    mo_heap_init((void *) &_heap_start, (size_t) &_heap_size);
-    printf("Heap initialized, %u bytes available\n",
-           (unsigned int) (size_t) &_heap_size);
+        spin_lock(&finish_lock);
+        finish = true;
+        spin_unlock(&finish_lock);
+    }
 
-    /* Call the application's main entry point to create initial tasks. */
-    kcb->preemptive = (bool) app_main();
-    printf("Scheduler mode: %s\n",
-           kcb->preemptive ? "Preemptive" : "Cooperative");
+    /* Make sure hardware initialize before running the first task. */
+    while (1) {
+        spin_lock(&finish_lock);
+        if (finish)
+            break;
+        spin_unlock(&finish_lock);
+    }
+    spin_unlock(&finish_lock);
 
     /* Verify that the application created at least one task.
      * If 'kcb->task_current' is still NULL, it means mo_task_spawn was never
@@ -40,6 +59,8 @@ int32_t main(void)
      */
     setjmp(kcb->context);
 
+    spin_lock(&finish_lock);
+
     /* Launch the first task.
      * 'kcb->task_current' was set by the first call to mo_task_spawn.
      * This function transfers control and does not return.
@@ -48,6 +69,8 @@ int32_t main(void)
     if (!first_task)
         panic(ERR_NO_TASKS);
 
+    spin_unlock(&finish_lock);
+
     hal_dispatch_init(first_task->context);
 
     /* This line should be unreachable. */

From c87ebe6fb65421e11b37e28627fdbc815f672a00 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Sun, 29 Jun 2025 23:14:07 +0800
Subject: [PATCH 13/18] Move task_lock spinlock into kcb struct

The task_lock spinlock was primarily used to protect access to the
Kernel Control Block (kcb) and its internal data structures. Move the
spinlock into the kcb_t struct as kcb_lock, consolidating related
state and synchronization primitives together.

All uses of the standalone task_lock spinlock are replaced by
kcb->kcb_lock accesses, improving code clarity and encapsulation of the
kernel's core control block.
---
 include/sys/task.h |  3 +++
 kernel/task.c      | 60 +++++++++++++++++++++++-----------------------
 2 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/include/sys/task.h b/include/sys/task.h
index 2443438..6ff67b4 100644
--- a/include/sys/task.h
+++ b/include/sys/task.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <hal.h>
+#include <spinlock.h>
 #include <lib/list.h>
 #include <lib/queue.h>
 
@@ -87,6 +88,8 @@ typedef struct {
     list_t *timer_list; /* List of active software timers. */
     /* Global system tick counter, incremented by the timer ISR. */
     volatile uint32_t ticks;
+
+    spinlock_t kcb_lock;
 } kcb_t;
 
 /* Global pointer to the singleton Kernel Control Block. */
diff --git a/kernel/task.c b/kernel/task.c
index 2e17a10..bfe6502 100644
--- a/kernel/task.c
+++ b/kernel/task.c
@@ -31,6 +31,7 @@ static kcb_t kernel_state = {
     .ticks = 0,
     .preemptive = true, /* Default to preemptive mode */
     .last_ready_hint = NULL,
+    .kcb_lock = SPINLOCK_INITIALIZER,
 };
 kcb_t *kcb = &kernel_state;
 
@@ -48,7 +49,6 @@ static struct {
 } task_cache[TASK_CACHE_SIZE];
 static uint8_t cache_index = 0;
 
-static spinlock_t task_lock = SPINLOCK_INITIALIZER;
 static uint32_t task_flags = 0;
 
 static inline bool is_valid_task(tcb_t *task)
@@ -387,12 +387,12 @@ int32_t mo_task_spawn(void *task_entry, uint16_t stack_size_req)
     }
 
     /* Minimize critical section duration */
-    spin_lock_irqsave(&task_lock, &task_flags);
+    spin_lock_irqsave(&kcb->kcb_lock, &task_flags);
 
     if (!kcb->tasks) {
         kcb->tasks = list_create();
         if (!kcb->tasks) {
-            spin_unlock_irqrestore(&task_lock, task_flags);
+            spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
             free(tcb->stack);
             free(tcb);
             panic(ERR_KCB_ALLOC);
@@ -401,7 +401,7 @@ int32_t mo_task_spawn(void *task_entry, uint16_t stack_size_req)
 
     list_node_t *node = list_pushback(kcb->tasks, tcb);
     if (!node) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         free(tcb->stack);
         free(tcb);
         panic(ERR_TCB_ALLOC);
@@ -414,7 +414,7 @@ int32_t mo_task_spawn(void *task_entry, uint16_t stack_size_req)
     if (!kcb->task_current)
         kcb->task_current = node;
 
-    spin_unlock_irqrestore(&task_lock, task_flags);
+    spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
 
     /* Initialize execution context outside critical section */
     hal_context_init(&tcb->context, (size_t) tcb->stack, new_stack_size,
@@ -434,16 +434,16 @@ int32_t mo_task_cancel(uint16_t id)
     if (id == 0 || id == mo_task_id())
         return ERR_TASK_CANT_REMOVE;
 
-    spin_lock_irqsave(&task_lock, &task_flags);
+    spin_lock_irqsave(&kcb->kcb_lock, &task_flags);
     list_node_t *node = find_task_node_by_id(id);
     if (!node) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     tcb_t *tcb = node->data;
     if (!tcb || tcb->state == TASK_RUNNING) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return ERR_TASK_CANT_REMOVE;
     }
 
@@ -463,7 +463,7 @@ int32_t mo_task_cancel(uint16_t id)
     if (kcb->last_ready_hint == node)
         kcb->last_ready_hint = NULL;
 
-    spin_unlock_irqrestore(&task_lock, task_flags);
+    spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
 
     /* Free memory outside critical section */
     free(tcb->stack);
@@ -482,16 +482,16 @@ void mo_task_delay(uint16_t ticks)
     if (!ticks)
         return;
 
-    spin_lock_irqsave(&task_lock, &task_flags);
+    spin_lock_irqsave(&kcb->kcb_lock, &task_flags);
     if (unlikely(!kcb || !kcb->task_current || !kcb->task_current->data)) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return;
     }
 
     tcb_t *self = kcb->task_current->data;
     self->delay = ticks;
     self->state = TASK_BLOCKED;
-    spin_unlock_irqrestore(&task_lock, task_flags);
+    spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
 
     mo_task_yield();
 }
@@ -501,17 +501,17 @@ int32_t mo_task_suspend(uint16_t id)
     if (id == 0)
         return ERR_TASK_NOT_FOUND;
 
-    spin_lock_irqsave(&task_lock, &task_flags);
+    spin_lock_irqsave(&kcb->kcb_lock, &task_flags);
     list_node_t *node = find_task_node_by_id(id);
     if (!node) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     tcb_t *task = node->data;
     if (!task || (task->state != TASK_READY && task->state != TASK_RUNNING &&
                   task->state != TASK_BLOCKED)) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return ERR_TASK_CANT_SUSPEND;
     }
 
@@ -522,7 +522,7 @@ int32_t mo_task_suspend(uint16_t id)
     if (kcb->last_ready_hint == node)
         kcb->last_ready_hint = NULL;
 
-    spin_unlock_irqrestore(&task_lock, task_flags);
+    spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
 
     if (is_current)
         mo_task_yield();
@@ -535,21 +535,21 @@ int32_t mo_task_resume(uint16_t id)
     if (id == 0)
         return ERR_TASK_NOT_FOUND;
 
-    spin_lock_irqsave(&task_lock, &task_flags);
+    spin_lock_irqsave(&kcb->kcb_lock, &task_flags);
     list_node_t *node = find_task_node_by_id(id);
     if (!node) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     tcb_t *task = node->data;
     if (!task || task->state != TASK_SUSPENDED) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return ERR_TASK_CANT_RESUME;
     }
 
     task->state = TASK_READY;
-    spin_unlock_irqrestore(&task_lock, task_flags);
+    spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
     return ERR_OK;
 }
 
@@ -558,22 +558,22 @@ int32_t mo_task_priority(uint16_t id, uint16_t priority)
     if (id == 0 || !is_valid_priority(priority))
         return ERR_TASK_INVALID_PRIO;
 
-    spin_lock_irqsave(&task_lock, &task_flags);
+    spin_lock_irqsave(&kcb->kcb_lock, &task_flags);
     list_node_t *node = find_task_node_by_id(id);
     if (!node) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     tcb_t *task = node->data;
     if (!task) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     uint8_t base = (uint8_t) (priority >> 8);
     task->prio = ((uint16_t) base << 8) | base;
-    spin_unlock_irqrestore(&task_lock, task_flags);
+    spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
 
     return ERR_OK;
 }
@@ -583,21 +583,21 @@ int32_t mo_task_rt_priority(uint16_t id, void *priority)
     if (id == 0)
         return ERR_TASK_NOT_FOUND;
 
-    spin_lock_irqsave(&task_lock, &task_flags);
+    spin_lock_irqsave(&kcb->kcb_lock, &task_flags);
     list_node_t *node = find_task_node_by_id(id);
     if (!node) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     tcb_t *task = node->data;
     if (!task) {
-        spin_unlock_irqrestore(&task_lock, task_flags);
+        spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return ERR_TASK_NOT_FOUND;
     }
 
     task->rt_prio = priority;
-    spin_unlock_irqrestore(&task_lock, task_flags);
+    spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
     return ERR_OK;
 }
 
@@ -613,9 +613,9 @@ int32_t mo_task_idref(void *task_entry)
     if (!task_entry || !kcb->tasks)
         return ERR_TASK_NOT_FOUND;
 
-    spin_lock_irqsave(&task_lock, &task_flags);
+    spin_lock_irqsave(&kcb->kcb_lock, &task_flags);
     list_node_t *node = list_foreach(kcb->tasks, refcmp, task_entry);
-    spin_unlock_irqrestore(&task_lock, task_flags);
+    spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
 
     return node ? ((tcb_t *) node->data)->id : ERR_TASK_NOT_FOUND;
 }

From 5248c14ec28c7c0c1e0ac11636c48344de42e8ad Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Sun, 29 Jun 2025 23:19:01 +0800
Subject: [PATCH 14/18] Add idle task per hart at boot to prevent panic on no
 runnable tasks

To prevent kernel panic during startup when some harts may not have any
runnable tasks assigned, add an idle task for each hart. The idle task
runs an infinite loop calling mo_task_wfi(), ensuring the hart remains
in a low-power wait state instead of causing a panic due to lack of
tasks.

This guarantees that every hart has at least one task to execute
immediately after boot, improving system robustness and stability on
SMP setups.
---
 kernel/main.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/kernel/main.c b/kernel/main.c
index e39a69a..0946931 100644
--- a/kernel/main.c
+++ b/kernel/main.c
@@ -5,6 +5,12 @@
 
 #include "private/error.h"
 
+static void idle_task(void)
+{
+    while (1)
+        mo_task_wfi();
+}
+
 static volatile bool finish = false;
 static spinlock_t finish_lock = SPINLOCK_INITIALIZER;
 
@@ -47,6 +53,8 @@ int32_t main(int32_t hartid)
     }
     spin_unlock(&finish_lock);
 
+    mo_task_spawn(idle_task, DEFAULT_STACK_SIZE);
+
     /* Verify that the application created at least one task.
      * If 'kcb->task_current' is still NULL, it means mo_task_spawn was never
      * successfully called.

From 2c924316bf1975bcb580a3037e5cacaeff36ae5b Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Sun, 29 Jun 2025 23:22:42 +0800
Subject: [PATCH 15/18] Use per-hart current task pointer in KCB

Previously, only a single global pointer tracked the current running
task, which worked for single-core systems. To support SMP, change the
Kernel Control Block (KCB) to maintain an array of current task
pointers, one per hart.

Added get_task_current() and set_task_current() helper functions to
retrieve and update the current task for the executing hart.

Modify kernel and HAL code to use these new functions instead of the
single global current task pointer, ensuring correct task tracking on
each hart.
---
 arch/riscv/hal.c   |  2 +-
 include/sys/task.h | 18 ++++++++++++++++-
 kernel/main.c      |  9 +++++----
 kernel/mutex.c     | 10 +++++-----
 kernel/task.c      | 48 +++++++++++++++++++++++-----------------------
 5 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/arch/riscv/hal.c b/arch/riscv/hal.c
index 940433d..5b5ea47 100644
--- a/arch/riscv/hal.c
+++ b/arch/riscv/hal.c
@@ -325,7 +325,7 @@ void hal_timer_disable(void)
  */
 void hal_interrupt_tick(void)
 {
-    tcb_t *task = kcb->task_current->data;
+    tcb_t *task = get_task_current(kcb)->data;
     if (unlikely(!task))
         hal_panic(); /* Fatal error - invalid task state */
 
diff --git a/include/sys/task.h b/include/sys/task.h
index 6ff67b4..ff58a84 100644
--- a/include/sys/task.h
+++ b/include/sys/task.h
@@ -61,6 +61,8 @@ typedef struct {
     void *rt_prio; /* Opaque pointer for a custom real-time scheduler hook. */
 } tcb_t;
 
+#define MAX_HARTS 8
+
 /* Kernel Control Block (KCB)
  *
  * A singleton structure that holds the global state of the kernel, including
@@ -69,7 +71,7 @@ typedef struct {
 typedef struct {
     /* Task Management */
     list_t *tasks; /* The master list of all tasks (nodes contain tcb_t). */
-    list_node_t *task_current; /* Node of the currently running task. */
+    list_node_t *task_current[MAX_HARTS]; /* Node of the currently running task. */
     /* Saved context of the main kernel thread before scheduling starts. */
     jmp_buf context;
     uint16_t next_tid; /* Monotonically increasing ID for the next new task. */
@@ -113,6 +115,20 @@ extern kcb_t *kcb;
 
 /* Core Kernel and Task Management API */
 
+static inline list_node_t *get_task_current()
+{
+    const uint32_t mhartid = read_csr(mhartid);
+
+    return kcb->task_current[mhartid];
+}
+
+static inline void set_task_current(list_node_t *task)
+{
+    const uint32_t mhartid = read_csr(mhartid);
+
+    kcb->task_current[mhartid] = task;
+}
+
 /* Prints a fatal error message and halts the system. */
 void panic(int32_t ecode);
 
diff --git a/kernel/main.c b/kernel/main.c
index 0946931..63f3a39 100644
--- a/kernel/main.c
+++ b/kernel/main.c
@@ -56,10 +56,10 @@ int32_t main(int32_t hartid)
     mo_task_spawn(idle_task, DEFAULT_STACK_SIZE);
 
     /* Verify that the application created at least one task.
-     * If 'kcb->task_current' is still NULL, it means mo_task_spawn was never
+     * If 'get_task_current()' is still NULL, it means mo_task_spawn was never
      * successfully called.
      */
-    if (!kcb->task_current)
+    if (!get_task_current())
         panic(ERR_NO_TASKS);
 
     /* Save the kernel's context. This is a formality to establish a base
@@ -70,10 +70,11 @@ int32_t main(int32_t hartid)
     spin_lock(&finish_lock);
 
     /* Launch the first task.
-     * 'kcb->task_current' was set by the first call to mo_task_spawn.
+     * 'get_task_current()' was set by the first call to mo_task_spawn.
      * This function transfers control and does not return.
      */
-    tcb_t *first_task = kcb->task_current->data;
+
+    tcb_t *first_task = get_task_current()->data;
     if (!first_task)
         panic(ERR_NO_TASKS);
 
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 05a9c3d..86e7016 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -47,10 +47,10 @@ static list_node_t *find_node_by_data(list_t *list, void *data)
  */
 static void mutex_block_atomic(list_t *waiters)
 {
-    if (!waiters || !kcb || !kcb->task_current || !kcb->task_current->data)
+    if (!waiters || !kcb || !get_task_current() || !get_task_current()->data)
         panic(ERR_SEM_OPERATION);
 
-    tcb_t *self = kcb->task_current->data;
+    tcb_t *self = get_task_current()->data;
 
     /* Add to waiters list */
     if (!list_pushback(waiters, self))
@@ -198,7 +198,7 @@ int32_t mo_mutex_timedlock(mutex_t *m, uint32_t ticks)
     }
 
     /* Must block with timeout */
-    tcb_t *self = kcb->task_current->data;
+    tcb_t *self = get_task_current()->data;
     if (!list_pushback(m->waiters, self)) {
         spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         panic(ERR_SEM_OPERATION);
@@ -353,7 +353,7 @@ int32_t mo_cond_wait(cond_t *c, mutex_t *m)
 
     /* Atomically add to wait list and block */
     spin_lock_irqsave(&mutex_lock, &mutex_flags);
-    tcb_t *self = kcb->task_current->data;
+    tcb_t *self = get_task_current()->data;
     if (!list_pushback(c->waiters, self)) {
         spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         panic(ERR_SEM_OPERATION);
@@ -399,7 +399,7 @@ int32_t mo_cond_timedwait(cond_t *c, mutex_t *m, uint32_t ticks)
 
     /* Atomically add to wait list */
     spin_lock_irqsave(&mutex_lock, &mutex_flags);
-    tcb_t *self = kcb->task_current->data;
+    tcb_t *self = get_task_current()->data;
     if (!list_pushback(c->waiters, self)) {
         spin_unlock_irqrestore(&mutex_lock, mutex_flags);
         panic(ERR_SEM_OPERATION);
diff --git a/kernel/task.c b/kernel/task.c
index bfe6502..0424ad0 100644
--- a/kernel/task.c
+++ b/kernel/task.c
@@ -23,7 +23,7 @@ void _timer_tick_handler(void);
  */
 static kcb_t kernel_state = {
     .tasks = NULL,
-    .task_current = NULL,
+    .task_current = {},
     .rt_sched = noop_rtsched,
     .timer_list = NULL, /* Managed by timer.c, but stored here. */
     .next_tid = 1,      /* Start from 1 to avoid confusion with invalid ID 0 */
@@ -85,10 +85,10 @@ static void task_stack_check(void)
     if (!should_check)
         return;
 
-    if (unlikely(!kcb || !kcb->task_current || !kcb->task_current->data))
+    if (unlikely(!kcb || !get_task_current() || !get_task_current()->data))
         panic(ERR_STACK_CHECK);
 
-    tcb_t *self = kcb->task_current->data;
+    tcb_t *self = get_task_current()->data;
     if (unlikely(!is_valid_task(self)))
         panic(ERR_STACK_CHECK);
 
@@ -205,7 +205,7 @@ void _yield(void) __attribute__((weak, alias("yield")));
 /* Scheduler with hint-based ready task search */
 static list_node_t *find_next_ready_task(void)
 {
-    if (unlikely(!kcb->task_current))
+    if (unlikely(!get_task_current()))
         return NULL;
 
     list_node_t *node;
@@ -225,7 +225,7 @@ static list_node_t *find_next_ready_task(void)
         }
     }
 
-    node = kcb->task_current;
+    node = get_task_current();
     while (itcnt++ < SCHED_IMAX) {
         node = list_cnext(kcb->tasks, node);
         if (unlikely(!node || !node->data))
@@ -258,11 +258,11 @@ static list_node_t *find_next_ready_task(void)
 /* Scheduler with reduced overhead */
 static uint16_t schedule_next_task(void)
 {
-    if (unlikely(!kcb->task_current || !kcb->task_current->data))
+    if (unlikely(!get_task_current() || !get_task_current()->data))
         panic(ERR_NO_TASKS);
 
     /* Mark the previously running task as ready for the next cycle. */
-    tcb_t *current_task = kcb->task_current->data;
+    tcb_t *current_task = get_task_current()->data;
     if (current_task->state == TASK_RUNNING)
         current_task->state = TASK_READY;
 
@@ -273,7 +273,7 @@ static uint16_t schedule_next_task(void)
     }
 
     /* Update scheduler state */
-    kcb->task_current = next_node;
+    set_task_current(next_node);
     tcb_t *next_task = next_node->data;
     next_task->state = TASK_RUNNING;
 
@@ -297,11 +297,11 @@ void dispatcher(void)
 /* Top-level context-switch for preemptive scheduling. */
 void dispatch(void)
 {
-    if (unlikely(!kcb || !kcb->task_current || !kcb->task_current->data))
+    if (unlikely(!kcb || !get_task_current() || !get_task_current()->data))
         panic(ERR_NO_TASKS);
 
     /* Return from longjmp: context is restored, continue task execution. */
-    if (setjmp(((tcb_t *) kcb->task_current->data)->context) != 0)
+    if (setjmp(((tcb_t *) get_task_current()->data)->context) != 0)
         return;
 
     task_stack_check();
@@ -313,16 +313,16 @@ void dispatch(void)
     }
 
     hal_interrupt_tick();
-    longjmp(((tcb_t *) kcb->task_current->data)->context, 1);
+    longjmp(((tcb_t *) get_task_current()->data)->context, 1);
 }
 
 /* Cooperative context switch */
 void yield(void)
 {
-    if (unlikely(!kcb || !kcb->task_current || !kcb->task_current->data))
+    if (unlikely(!kcb || !get_task_current() || !get_task_current()->data))
         return;
 
-    if (setjmp(((tcb_t *) kcb->task_current->data)->context) != 0)
+    if (setjmp(((tcb_t *) get_task_current()->data)->context) != 0)
         return;
 
     task_stack_check();
@@ -332,7 +332,7 @@ void yield(void)
         list_foreach(kcb->tasks, delay_update, NULL);
 
     schedule_next_task();
-    longjmp(((tcb_t *) kcb->task_current->data)->context, 1);
+    longjmp(((tcb_t *) get_task_current()->data)->context, 1);
 }
 
 /* Stack initialization with minimal overhead */
@@ -411,8 +411,8 @@ int32_t mo_task_spawn(void *task_entry, uint16_t stack_size_req)
     tcb->id = kcb->next_tid++;
     kcb->task_count++;
 
-    if (!kcb->task_current)
-        kcb->task_current = node;
+    if (!get_task_current())
+        set_task_current(node);
 
     spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
 
@@ -483,12 +483,12 @@ void mo_task_delay(uint16_t ticks)
         return;
 
     spin_lock_irqsave(&kcb->kcb_lock, &task_flags);
-    if (unlikely(!kcb || !kcb->task_current || !kcb->task_current->data)) {
+    if (unlikely(!kcb || !get_task_current() || !get_task_current()->data)) {
         spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
         return;
     }
 
-    tcb_t *self = kcb->task_current->data;
+    tcb_t *self = get_task_current()->data;
     self->delay = ticks;
     self->state = TASK_BLOCKED;
     spin_unlock_irqrestore(&kcb->kcb_lock, task_flags);
@@ -516,7 +516,7 @@ int32_t mo_task_suspend(uint16_t id)
     }
 
     task->state = TASK_SUSPENDED;
-    bool is_current = (kcb->task_current == node);
+    bool is_current = (get_task_current() == node);
 
     /* Clear ready hint if suspending that task */
     if (kcb->last_ready_hint == node)
@@ -603,9 +603,9 @@ int32_t mo_task_rt_priority(uint16_t id, void *priority)
 
 uint16_t mo_task_id(void)
 {
-    if (unlikely(!kcb || !kcb->task_current || !kcb->task_current->data))
+    if (unlikely(!kcb || !get_task_current() || !get_task_current()->data))
         return 0;
-    return ((tcb_t *) kcb->task_current->data)->id;
+    return ((tcb_t *) get_task_current()->data)->id;
 }
 
 int32_t mo_task_idref(void *task_entry)
@@ -647,11 +647,11 @@ uint64_t mo_uptime(void)
 
 void _sched_block(queue_t *wait_q)
 {
-    if (unlikely(!wait_q || !kcb || !kcb->task_current ||
-                 !kcb->task_current->data))
+    if (unlikely(!wait_q || !kcb || !get_task_current() ||
+                 !get_task_current()->data))
         panic(ERR_SEM_OPERATION);
 
-    tcb_t *self = kcb->task_current->data;
+    tcb_t *self = get_task_current()->data;
 
     if (queue_enqueue(wait_q, self) != 0) {
         panic(ERR_SEM_OPERATION);

From 967b5958249c3609231c0f0414d50ed39786ab83 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Sun, 29 Jun 2025 21:45:24 +0800
Subject: [PATCH 16/18] Protect shared kcb->ticks with spinlock

Since kcb->ticks is shared and updated by all cores, add a spinlock to
protect its increment operation in the dispatcher, ensuring atomicity
and preventing race conditions in SMP environments.
---
 kernel/task.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/task.c b/kernel/task.c
index 0424ad0..d99e323 100644
--- a/kernel/task.c
+++ b/kernel/task.c
@@ -289,7 +289,11 @@ static int32_t noop_rtsched(void)
 /* The main entry point from the system tick interrupt. */
 void dispatcher(void)
 {
+    uint32_t flag = 0;
+
+    spin_lock_irqsave(&kcb->kcb_lock, &flag);
     kcb->ticks++;
+    spin_unlock_irqrestore(&kcb->kcb_lock, flag);
     _timer_tick_handler();
     _dispatch();
 }

From ceb40078dfc374ac13c26050c39258e25832b7d1 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Sun, 29 Jun 2025 21:48:25 +0800
Subject: [PATCH 17/18] Adapt mtimecmp read/write for per-hart registers

Previously, mtimecmp was accessed at a fixed MMIO address assuming a
single core. Each hart has its own mtimecmp register at distinct
offsets, so update mtimecmp read and write functions to index based on
the current hart ID, ensuring correct timer compare handling in SMP
systems.
---
 arch/riscv/hal.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/riscv/hal.c b/arch/riscv/hal.c
index 5b5ea47..6acbbb2 100644
--- a/arch/riscv/hal.c
+++ b/arch/riscv/hal.c
@@ -131,11 +131,14 @@ static inline uint64_t mtime_r(void)
 /* Safely read the 64-bit 'mtimecmp' register. */
 static inline uint64_t mtimecmp_r(void)
 {
-    uint32_t hi, lo;
+    uint32_t hi, lo, hartid;
+
+    hartid = read_csr(mhartid);
+
     do {
-        hi = MTIMECMP_H;
-        lo = MTIMECMP_L;
-    } while (hi != MTIMECMP_H);
+        hi = * (volatile uint32_t *) (CLINT_BASE + 0x4004u + 8 * hartid);
+        lo = * (volatile uint32_t *) (CLINT_BASE + 0x4000u + 8 * hartid);
+    } while (hi != *(volatile uint32_t *) (CLINT_BASE + 0x4004u + 8 * hartid));
     return CT64(hi, lo);
 }
 
@@ -152,10 +155,11 @@ static inline void mtimecmp_w(uint64_t val)
     /* Disable interrupts during the critical section to ensure atomicity */
     uint32_t old_mie = read_csr(mie);
     write_csr(mie, old_mie & ~MIE_MTIE);
+    uint32_t hartid = read_csr(mhartid);
 
-    MTIMECMP_L = 0xFFFFFFFF; /* Set to maximum to prevent spurious interrupt */
-    MTIMECMP_H = (uint32_t) (val >> 32); /* Set high word */
-    MTIMECMP_L = (uint32_t) val;         /* Set low word to final value */
+    * (volatile uint32_t *) (CLINT_BASE + 0x4000u + 8 * hartid) = 0xFFFFFFFF; /* Set to maximum to prevent spurious interrupt */
+    * (volatile uint32_t *) (CLINT_BASE + 0x4004u + 8 * hartid) = (uint32_t) (val >> 32); /* Set high word */
+    * (volatile uint32_t *) (CLINT_BASE + 0x4000u + 8 * hartid) = (uint32_t) val;         /* Set low word to final value */
 
     /* Re-enable timer interrupts if they were previously enabled */
     write_csr(mie, old_mie);

From 933c254815505fe8c96386b4d92a1e17b6f31bbe Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Wed, 25 Jun 2025 23:22:38 +0800
Subject: [PATCH 18/18] Add -smp 4 to QEMU run command for multi-core
 simulation

Enable running the kernel on 4 simulated cores by passing the -smp 4
parameter to qemu-system-riscv32, facilitating SMP testing and
development.
---
 arch/riscv/build.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/build.mk b/arch/riscv/build.mk
index 952a06d..caf4403 100644
--- a/arch/riscv/build.mk
+++ b/arch/riscv/build.mk
@@ -48,4 +48,4 @@ $(BUILD_KERNEL_DIR)/%.o: $(ARCH_DIR)/%.c | $(BUILD_DIR)
 
 run:
 	@$(call notice, Ready to launch Linmo kernel + application.)
-	$(Q)qemu-system-riscv32 -machine virt -nographic -bios none -kernel $(BUILD_DIR)/image.elf -nographic
+	$(Q)qemu-system-riscv32 -smp 4 -machine virt -nographic -bios none -kernel $(BUILD_DIR)/image.elf -nographic