Skip to content

Commit

Permalink
macos: implement host->guest time sync
Browse files Browse the repository at this point in the history
macOS can be quite aggressive when it comes to force apps to take long
naps for power saving reasons. In addition to this, CNTVCT_EL0 doesn't
increase during these naps, meaning the guest is not aware that's
missing ticks.

Both things combined mean that guests that run for long enough will
suffer from clock skews.

To address this, we introduce here a simple and lightweight host to
guest time sync. From the host side, the VMM will send DGRAM packets
via vsock with the current time at regular intervals (60 seconds) and
when an oversleep (host thread sleeping 3 times more than expected) is
detected. On the guest side, "init" spawns a process that listens for
such packets and sets the system clock if a delta bigger than 100ms is
detected.

Fixes: #80
Signed-off-by: Sergio Lopez <[email protected]>
  • Loading branch information
slp committed Mar 2, 2023
1 parent ece96a8 commit b9930a7
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 1 deletion.
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@ ifeq ($(SEV),1)
INIT_DEFS := -DSEV=1
endif

INIT_DEFS =
ifeq ($(ROSETTA),1)
INIT_DEFS := -D__ROSETTA__
INIT_DEFS += -D__ROSETTA__
endif
ifeq ($(TIMESYNC),1)
INIT_DEFS += -D__TIMESYNC__
endif

OS = $(shell uname -s)
Expand Down
70 changes: 70 additions & 0 deletions init/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
Expand All @@ -15,6 +17,7 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <linux/vm_sockets.h>

#include "jsmn.h"

Expand Down Expand Up @@ -480,6 +483,67 @@ static void enable_rosetta()
}
#endif

#ifdef __TIMESYNC__

#define TSYNC_PORT 123
#define BUFSIZE 8
#define NANOS_IN_SECOND 1000000000
/* Set clock if delta is bigger than 100ms */
#define DELTA_SYNC 100000000

void clock_worker()
{
int sockfd, n;
struct sockaddr_vm serveraddr;
char buf[BUFSIZE];
struct timespec gtime;
struct timespec htime;
uint64_t gtime_ns;
uint64_t htime_ns;

sockfd = socket(AF_VSOCK, SOCK_DGRAM, 0);
if (sockfd < 0) {
perror("Couldn't create timesync socket\n");
return;
}

bzero((char *) &serveraddr, sizeof(serveraddr));
serveraddr.svm_family = AF_VSOCK;
serveraddr.svm_port = TSYNC_PORT;
serveraddr.svm_cid = 3;

bzero(buf, BUFSIZE);

n = bind(sockfd, (struct sockaddr *)&serveraddr, sizeof(serveraddr));
if (n < 0) {
printf("Couldn't bind timesync socket\n");
return;
}

while (1) {
n = recv(sockfd, buf, BUFSIZE, 0);
if (n < 0) {
perror("Error in timesync recv\n");
return;
} else if (n != 8) {
printf("Ignoring bogus timesync packet\n");
continue;
}

htime_ns = *(uint64_t *) &buf[0];
clock_gettime(CLOCK_REALTIME, &gtime);
gtime_ns = gtime.tv_sec * NANOS_IN_SECOND;
gtime_ns += gtime.tv_nsec;

if (llabs(htime_ns - gtime_ns) > DELTA_SYNC) {
htime.tv_sec = htime_ns / NANOS_IN_SECOND;
htime.tv_nsec = htime_ns % NANOS_IN_SECOND;
clock_settime(CLOCK_REALTIME, &htime);
}
}
}
#endif

int main(int argc, char **argv)
{
struct ifreq ifr;
Expand Down Expand Up @@ -566,6 +630,12 @@ int main(int argc, char **argv)
exec_argv[0] = &DEFAULT_KRUN_INIT[0];
}

#ifdef __TIMESYNC__
if (fork() == 0) {
clock_worker();
}
#endif

execvp(exec_argv[0], exec_argv);

return 0;
Expand Down
2 changes: 2 additions & 0 deletions src/devices/src/virtio/vsock/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ mod packet;
mod proxy;
mod reaper;
mod tcp;
#[cfg(target_os = "macos")]
mod timesync;
mod udp;

pub use self::defs::uapi::VIRTIO_ID_VSOCK as TYPE_VSOCK;
Expand Down
16 changes: 16 additions & 0 deletions src/devices/src/virtio/vsock/muxer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ use super::packet::{TsiGetnameRsp, VsockPacket};
use super::proxy::{Proxy, ProxyRemoval, ProxyUpdate};
use super::reaper::ReaperThread;
use super::tcp::TcpProxy;
#[cfg(target_os = "macos")]
use super::timesync::TimesyncThread;
use super::udp::UdpProxy;
use super::VsockError;
use crossbeam_channel::{unbounded, Sender};
Expand Down Expand Up @@ -146,6 +148,20 @@ impl VsockMuxer {
self.intc = intc.clone();
self.irq_line = irq_line;

#[cfg(target_os = "macos")]
{
let timesync = TimesyncThread::new(
self.cid,
mem.clone(),
queue_dgram.clone(),
self.interrupt_evt.try_clone().unwrap(),
self.interrupt_status.clone(),
intc.clone(),
irq_line,
);
timesync.run();
}

let (sender, receiver) = unbounded();

let thread = MuxerThread::new(
Expand Down
8 changes: 8 additions & 0 deletions src/devices/src/virtio/vsock/packet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -610,4 +610,12 @@ impl VsockPacket {
None
}
}

pub fn write_time_sync(&mut self, time: u64) {
if self.buf_size >= 8 {
if let Some(buf) = self.buf_mut() {
byte_order::write_le_u64(&mut buf[0..], time);
}
}
}
}
99 changes: 99 additions & 0 deletions src/devices/src/virtio/vsock/timesync.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::thread;
use std::time;

use super::super::super::legacy::Gic;
use super::super::Queue as VirtQueue;
use super::super::VIRTIO_MMIO_INT_VRING;
use super::defs::uapi;
use super::packet::VsockPacket;

use utils::eventfd::EventFd;
use vm_memory::GuestMemoryMmap;

const UPDATE_INTERVAL: u64 = 60 * 1000 * 1000 * 1000;
const SLEEP_NSECS: u64 = 2 * 1000 * 1000 * 1000;
const TSYNC_PORT: u32 = 123;

pub struct TimesyncThread {
cid: u64,
mem: GuestMemoryMmap,
queue_mutex: Arc<Mutex<VirtQueue>>,
interrupt_evt: EventFd,
interrupt_status: Arc<AtomicUsize>,
intc: Option<Arc<Mutex<Gic>>>,
irq_line: Option<u32>,
}

impl TimesyncThread {
pub fn new(
cid: u64,
mem: GuestMemoryMmap,
queue_mutex: Arc<Mutex<VirtQueue>>,
interrupt_evt: EventFd,
interrupt_status: Arc<AtomicUsize>,
intc: Option<Arc<Mutex<Gic>>>,
irq_line: Option<u32>,
) -> Self {
Self {
cid,
mem,
queue_mutex,
interrupt_evt,
interrupt_status,
intc,
irq_line,
}
}

fn send_time(&self, time: u64) {
let mut queue = self.queue_mutex.lock().unwrap();
if let Some(head) = queue.pop(&self.mem) {
if let Ok(mut pkt) = VsockPacket::from_rx_virtq_head(&head) {
pkt.set_op(uapi::VSOCK_OP_RW)
.set_src_cid(uapi::VSOCK_HOST_CID)
.set_dst_cid(self.cid)
.set_src_port(TSYNC_PORT)
.set_dst_port(TSYNC_PORT)
.set_type(uapi::VSOCK_TYPE_DGRAM);

pkt.write_time_sync(time);
pkt.set_len(pkt.buf().unwrap().len() as u32);
queue.add_used(&self.mem, head.index, pkt.hdr().len() as u32 + pkt.len());
self.interrupt_status
.fetch_or(VIRTIO_MMIO_INT_VRING as usize, Ordering::SeqCst);
if let Some(intc) = &self.intc {
intc.lock().unwrap().set_irq(self.irq_line.unwrap());
} else if let Err(e) = self.interrupt_evt.write(1) {
warn!("failed to signal used queue: {:?}", e);
}
}
}
}

fn work(&mut self) {
let mut last_update = 0u64;
let mut last_awake = utils::time::get_time(utils::time::ClockType::Real);
loop {
let now = utils::time::get_time(utils::time::ClockType::Real);
/*
* We send a time sync packet if we slept for 3 times more
* nanoseconds than expected (which is an indication the
* system forced us to take a long nap), or if UPDATE_INTERVAL
* has been reached.
*/
if (now - last_awake) >= (SLEEP_NSECS * 3) || (now - last_update) >= UPDATE_INTERVAL {
self.send_time(now);
last_update = now;
}

last_awake = utils::time::get_time(utils::time::ClockType::Real);
thread::sleep(time::Duration::from_nanos(SLEEP_NSECS));
}
}

pub fn run(mut self) {
thread::spawn(move || self.work());
}
}

0 comments on commit b9930a7

Please sign in to comment.