You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "/data/bart/temp_workspace/examples/distributed/FSDP/T5_training.py", line 215, in<module>
fsdp_main(args)
File "/data/bart/temp_workspace/examples/distributed/FSDP/T5_training.py", line 148, in fsdp_main
train_accuracy = train(args, model, rank, world_size, train_loader, optimizer, epoch, sampler=sampler1)
File "/data/bart/temp_workspace/examples/distributed/FSDP/utils/train_utils.py", line 50, in train
output = model(input_ids=batch["source_ids"],attention_mask=batch["source_mask"],labels=batch["target_ids"] )
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 839, in forward
output = self._fsdp_wrapped_module(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1706, in forward
encoder_outputs = self.encoder(
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1110, in forward
layer_outputs = layer_module(
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 839, in forward
output = self._fsdp_wrapped_module(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/distributed/algorithms/_checkpoint/checkpoint_wrapper.py", line 164, in forward
return self.checkpoint_fn( # type: ignore[misc]
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 328, in _fn
return fn(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 17, in inner
return fn(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 458, in checkpoint
ret = function(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/bart/temp_workspace/pytorch-trainer/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
TypeError: T5Block.forward() got an unexpected keyword argument 'offload_to_cpu'
The text was updated successfully, but these errors were encountered:
Context
Your Environment
Expected Behavior
training well
Current Behavior
error raised and training stop
Possible Solution
Steps to Reproduce
TypeError: T5Block.forward() got an unexpected keyword argument 'offload_to_cpu'
...
Failure Logs [if any]
The text was updated successfully, but these errors were encountered: