Unverified Commit e3a63b6f authored by OlivierDehaene's avatar OlivierDehaene Committed by GitHub
Browse files

fix(launcher): revert change on shard errors (#173)

parent 880a76ee
...@@ -349,8 +349,8 @@ fn main() -> ExitCode { ...@@ -349,8 +349,8 @@ fn main() -> ExitCode {
Err(TryRecvError::Empty) => { Err(TryRecvError::Empty) => {
sleep(Duration::from_millis(100)); sleep(Duration::from_millis(100));
} }
Ok(ShardStatus::Failed(rank)) => { Ok(ShardStatus::Failed((rank, err))) => {
tracing::error!("Shard {} failed to start.", rank); tracing::error!("Shard {} failed to start:\n{}", rank, err);
shutdown_shards(shutdown, &shutdown_receiver); shutdown_shards(shutdown, &shutdown_receiver);
return ExitCode::FAILURE; return ExitCode::FAILURE;
} }
...@@ -457,8 +457,8 @@ fn main() -> ExitCode { ...@@ -457,8 +457,8 @@ fn main() -> ExitCode {
let mut exit_code = ExitCode::SUCCESS; let mut exit_code = ExitCode::SUCCESS;
while running.load(Ordering::SeqCst) { while running.load(Ordering::SeqCst) {
if let Ok(ShardStatus::Failed(rank)) = status_receiver.try_recv() { if let Ok(ShardStatus::Failed((rank, err))) = status_receiver.try_recv() {
tracing::error!("Shard {rank} failed."); tracing::error!("Shard {rank} failed:\n{err}");
exit_code = ExitCode::FAILURE; exit_code = ExitCode::FAILURE;
break; break;
}; };
...@@ -488,7 +488,7 @@ fn main() -> ExitCode { ...@@ -488,7 +488,7 @@ fn main() -> ExitCode {
#[derive(Debug)] #[derive(Debug)]
enum ShardStatus { enum ShardStatus {
Ready, Ready,
Failed(usize), Failed((usize, String)),
} }
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
...@@ -627,7 +627,9 @@ fn shard_manager( ...@@ -627,7 +627,9 @@ fn shard_manager(
tracing::error!("Please install it with `make install-server`") tracing::error!("Please install it with `make install-server`")
} }
} }
status_sender.send(ShardStatus::Failed(rank)).unwrap(); status_sender
.send(ShardStatus::Failed((rank, err.to_string())))
.unwrap();
return; return;
} }
}; };
...@@ -656,7 +658,11 @@ fn shard_manager( ...@@ -656,7 +658,11 @@ fn shard_manager(
loop { loop {
// Process exited // Process exited
if p.poll().is_some() { if p.poll().is_some() {
status_sender.send(ShardStatus::Failed(rank)).unwrap(); let mut err = String::new();
p.stderr.take().unwrap().read_to_string(&mut err).unwrap();
status_sender
.send(ShardStatus::Failed((rank, err)))
.unwrap();
return; return;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment