Skip to content

Commit

Permalink
Add back uuid
Browse files Browse the repository at this point in the history
  • Loading branch information
H-Huang committed Mar 6, 2025
1 parent 8ffa75f commit 7839e2e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 10 deletions.
19 changes: 12 additions & 7 deletions src/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,19 @@ use log::{info, warn};
#[cfg(test)]
use std::{println as info, println as warn};

// The replica_id string is of the form {replica_name}:{uuid} or just {uuid} (see torchft/manager.py)
// We can parse the replica_id if it exists, otherwise we just use the uuid
macro_rules! info_with_replica {
($replica_id:expr, $($arg:tt)*) => {
info!(
"[Replica {}] {}",
$replica_id,
format!($($arg)*)
);
};
($replica_id:expr, $($arg:tt)*) => {{
let parts: Vec<&str> = $replica_id.splitn(2, ':').collect();
let formatted_message = if parts.len() == 2 {
// If there are two parts, use the replica name
info!("[Replica {}] {}", parts[0], format!($($arg)*))
} else {
// Otherwise, just use the UUID
info!("[Replica {}] {}", $replica_id, format!($($arg)*))
};
}};
}

struct ManagerState {
Expand Down
11 changes: 8 additions & 3 deletions torchft/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import logging
import os
import socket
import uuid
from concurrent.futures import ThreadPoolExecutor
from datetime import timedelta
from enum import Enum
Expand Down Expand Up @@ -188,9 +189,13 @@ def __init__(
bind = f"[::]:{port}"
lighthouse_addr = lighthouse_addr or os.environ["TORCHFT_LIGHTHOUSE"]

if replica_id is None:
replica_id = ""
replica_id = replica_id
# We need a unique identifier in the case that a worker restarts quickly and
# replaces the previous worker with the same ID.
new_uuid = str(uuid.uuid4())
if replica_id is None or replica_id == "":
replica_id = new_uuid
else:
replica_id = f"{replica_id}:{new_uuid}"
self._manager = ManagerServer(
replica_id=replica_id,
lighthouse_addr=lighthouse_addr,
Expand Down

0 comments on commit 7839e2e

Please sign in to comment.