You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "summarization_pipeline.py", line 1382, in
main()
File "summarization_pipeline.py", line 1376, in main
train_ds(configs)
File "summarization_pipeline.py", line 1040, in train_ds
trainer.run(model=model, loss_fct=loss_fct,
File "/home/ubuntu/.local/lib/python3.8/site-packages/lightning_fabric/fabric.py", line 628, in _run_impl
return self._strategy.launcher.launch(run_method, *args, **kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/lightning_fabric/strategies/launchers/subprocess_script.py", line 90, in launch
return function(*args, **kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/lightning_fabric/fabric.py", line 638, in _run_with_setup
return run_function(*args, **kwargs)
File "summarization_pipeline.py", line 888, in run
run_epoch('train')
File "summarization_pipeline.py", line 838, in run_epoch
self.backward(loss)
File "/home/ubuntu/.local/lib/python3.8/site-packages/lightning_fabric/fabric.py", line 359, in backward
self._precision.backward(tensor, module, *args, **kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/lightning_fabric/plugins/precision/precision.py", line 73, in backward
tensor.backward(*args, **kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/torch/_tensor.py", line 488, in backward
torch.autograd.backward(
File "/home/ubuntu/.local/lib/python3.8/site-packages/torch/autograd/init.py", line 197, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
File "/home/ubuntu/.local/lib/python3.8/site-packages/deepspeed/runtime/zero/stage_1_and_2.py", line 800, in reduce_partition_and_remove_grads
self.reduce_ready_partitions_and_remove_grads(param, i)
File "/home/ubuntu/.local/lib/python3.8/site-packages/deepspeed/runtime/zero/stage_1_and_2.py", line 1271, in reduce_ready_partitions_and_remove_grads
self.reduce_independent_p_g_buckets_and_remove_grads(param, i)
File "/home/ubuntu/.local/lib/python3.8/site-packages/deepspeed/runtime/zero/stage_1_and_2.py", line 845, in reduce_independent_p_g_buckets_and_remove_grads
new_grad_tensor = self.ipg_buffer[self.ipg_index].narrow(
AttributeError: 'DeepSpeedZeroOptimizer' object has no attribute 'ipg_index'
The text was updated successfully, but these errors were encountered:
deepspeed==0.7.0 pytorch-lightning==1.9.2 torch 1.13.1+cu117
一样的版本;
Traceback (most recent call last):
File "summarization_pipeline.py", line 1382, in
main()
File "summarization_pipeline.py", line 1376, in main
train_ds(configs)
File "summarization_pipeline.py", line 1040, in train_ds
trainer.run(model=model, loss_fct=loss_fct,
File "/home/ubuntu/.local/lib/python3.8/site-packages/lightning_fabric/fabric.py", line 628, in _run_impl
return self._strategy.launcher.launch(run_method, *args, **kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/lightning_fabric/strategies/launchers/subprocess_script.py", line 90, in launch
return function(*args, **kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/lightning_fabric/fabric.py", line 638, in _run_with_setup
return run_function(*args, **kwargs)
File "summarization_pipeline.py", line 888, in run
run_epoch('train')
File "summarization_pipeline.py", line 838, in run_epoch
self.backward(loss)
File "/home/ubuntu/.local/lib/python3.8/site-packages/lightning_fabric/fabric.py", line 359, in backward
self._precision.backward(tensor, module, *args, **kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/lightning_fabric/plugins/precision/precision.py", line 73, in backward
tensor.backward(*args, **kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/torch/_tensor.py", line 488, in backward
torch.autograd.backward(
File "/home/ubuntu/.local/lib/python3.8/site-packages/torch/autograd/init.py", line 197, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
File "/home/ubuntu/.local/lib/python3.8/site-packages/deepspeed/runtime/zero/stage_1_and_2.py", line 800, in reduce_partition_and_remove_grads
self.reduce_ready_partitions_and_remove_grads(param, i)
File "/home/ubuntu/.local/lib/python3.8/site-packages/deepspeed/runtime/zero/stage_1_and_2.py", line 1271, in reduce_ready_partitions_and_remove_grads
self.reduce_independent_p_g_buckets_and_remove_grads(param, i)
File "/home/ubuntu/.local/lib/python3.8/site-packages/deepspeed/runtime/zero/stage_1_and_2.py", line 845, in reduce_independent_p_g_buckets_and_remove_grads
new_grad_tensor = self.ipg_buffer[self.ipg_index].narrow(
AttributeError: 'DeepSpeedZeroOptimizer' object has no attribute 'ipg_index'
The text was updated successfully, but these errors were encountered: