RuntimeError: Given groups=1, weight of size [1024, 3, 14, 14], expected input[12, 15, 336, 336] to have 3 channels, but got 15 channels instead #23

mann1 · 2024-05-04T15:01:53Z

File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1539, in train
return inner_training_loop(
File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1809, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 2654, in training_step
loss = self.compute_loss(model, inputs)
File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 2679, in compute_loss
outputs = model(**inputs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 15, in wrapped_fn
ret_val = func(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 1855, in forward
loss = self.module(*inputs, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1568, in _call_impl
result = forward_call(*args, **kwargs)
File "/mnt/azureml/cr/j/bb9ab5e5f804413b90f100da8d318d85/exe/wd/llava_uhd/train/llava-uhd/adapt_llava.py", line 364, in forward
) = self.prepare_inputs_labels_for_multimodal(
File "/mnt/azureml/cr/j/bb9ab5e5f804413b90f100da8d318d85/exe/wd/llava_uhd/train/llava-uhd/adapt_llava.py", line 147, in prepare_inputs_labels_for_multimodal
image_features = self.encode_images(images,origin_image_widths,origin_image_heights).to(self.device)
File "/mnt/azureml/cr/j/bb9ab5e5f804413b90f100da8d318d85/exe/wd/llava_uhd/train/llava-uhd/adapt_llava.py", line 91, in encode_images
image_features = self.get_model().get_vision_tower()(images,origin_image_widths,origin_image_heights)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1568, in _call_impl
result = forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/mnt/azureml/cr/j/bb9ab5e5f804413b90f100da8d318d85/exe/wd/llava_uhd/train/llava-uhd/adapt_clip.py", line 349, in forward
image_forward_outs = self.vision_tower(images.to(device=self.device, dtype=self.dtype),
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1568, in _call_impl
result = forward_call(*args, **kwargs)
File "/mnt/azureml/cr/j/bb9ab5e5f804413b90f100da8d318d85/exe/wd/llava_uhd/train/llava-uhd/adapt_clip.py", line 265, in forward
return self.vision_model(
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1568, in _call_impl
result = forward_call(*args, **kwargs)
File "/mnt/azureml/cr/j/bb9ab5e5f804413b90f100da8d318d85/exe/wd/llava_uhd/train/llava-uhd/adapt_clip.py", line 155, in forward
hidden_states = self.embeddings(pixel_values = pixel_values,
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1568, in _call_impl
result = forward_call(args, **kwargs)
File "/mnt/azureml/cr/j/bb9ab5e5f804413b90f100da8d318d85/exe/wd/llava_uhd/train/llava-uhd/adapt_clip.py", line 97, in forward
patch_embeds = self.patch_embedding(pixel_values) # shape = [, width, grid, grid]
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1568, in _call_impl
result = forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 460, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 456, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: Given groups=1, weight of size [1024, 3, 14, 14], expected input[12, 15, 336, 336] to have 3 channels, but got 15 channels instead

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

RuntimeError: Given groups=1, weight of size [1024, 3, 14, 14], expected input[12, 15, 336, 336] to have 3 channels, but got 15 channels instead #23

RuntimeError: Given groups=1, weight of size [1024, 3, 14, 14], expected input[12, 15, 336, 336] to have 3 channels, but got 15 channels instead #23

mann1 commented May 4, 2024

RuntimeError: Given groups=1, weight of size [1024, 3, 14, 14], expected input[12, 15, 336, 336] to have 3 channels, but got 15 channels instead #23

RuntimeError: Given groups=1, weight of size [1024, 3, 14, 14], expected input[12, 15, 336, 336] to have 3 channels, but got 15 channels instead #23

Comments

mann1 commented May 4, 2024