small fix
This commit is contained in:
parent
340a344e91
commit
f70658eaed
@ -85,7 +85,7 @@ def setup_common_handlers(trainer: Engine, config, stop_on_nan=True, clear_cuda_
|
|||||||
if not checkpoint_path.exists():
|
if not checkpoint_path.exists():
|
||||||
raise FileNotFoundError(f"Checkpoint '{checkpoint_path}' is not found")
|
raise FileNotFoundError(f"Checkpoint '{checkpoint_path}' is not found")
|
||||||
ckp = torch.load(checkpoint_path.as_posix(), map_location="cpu")
|
ckp = torch.load(checkpoint_path.as_posix(), map_location="cpu")
|
||||||
trainer.logger.info(f"load state_dict for {ckp.keys()}")
|
trainer.logger.info(f"load state_dict for {to_save.keys()}")
|
||||||
Checkpoint.load_objects(to_load=to_save, checkpoint=ckp)
|
Checkpoint.load_objects(to_load=to_save, checkpoint=ckp)
|
||||||
engine.logger.info(f"resume from a checkpoint {checkpoint_path}")
|
engine.logger.info(f"resume from a checkpoint {checkpoint_path}")
|
||||||
trainer.add_event_handler(
|
trainer.add_event_handler(
|
||||||
|
|||||||
3
main.py
3
main.py
@ -32,8 +32,7 @@ def running(local_rank, config, task, backup_config=False, setup_output_dir=Fals
|
|||||||
|
|
||||||
if setup_output_dir and config.resume_from is None:
|
if setup_output_dir and config.resume_from is None:
|
||||||
if output_dir.exists():
|
if output_dir.exists():
|
||||||
assert len(list(output_dir.glob("events*"))) == 0
|
assert len(list(output_dir.glob("events*"))) == 0, f"{output_dir} containers tensorboard event"
|
||||||
assert len(list(output_dir.glob("*.pt"))) == 0
|
|
||||||
if (output_dir / "train.log").exists() and idist.get_rank() == 0:
|
if (output_dir / "train.log").exists() and idist.get_rank() == 0:
|
||||||
(output_dir / "train.log").unlink()
|
(output_dir / "train.log").unlink()
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user