You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
We tried to test DNABERT-2 on AWS EC2 p2.xlarge instance with Ubuntu and CUDA 11.5 and gcc version 9 (and we tried also version 11).
Every attempt failed.
We set the environment exploiting the requirements.txt posted on github but it still no worked.
The trouble has come with the command: hidden_states = model(inputs)[0] # [1, sequence_length, 768]
>>> hidden_states = model(inputs)[0] # [1, sequence_length, 768]
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
To disable this warning, you can either:
- Avoid using `tokenizers` before the fork if possible
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
To disable this warning, you can either:
- Avoid using `tokenizers` before the fork if possible
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Traceback (most recent call last):
File "<string>", line 21, in _fwd_kernel
KeyError: ('2-.-0-.-0--d6252949da17ceb5f3a278a70250af13-3b85c7bef5f0a641282f3b73af50f599-14de7de5c4da5794c8ca14e7e41a122d-3498c340fd4b6ee7805fd54b882a04f5-e1f133f98d04093da2078dfc51c36b72-b26258bf01f839199e39d64851821f26-d7c06e3b46e708006c15224aac7a1378-f585402118c8a136948ce0a49cfe122c', (torch.float16, torch.float16, torch.float16, torch.float16, torch.float16, torch.float32, torch.float32, 'fp32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32'), ('matrix', False, 64, False, False, True, 128, 128), (True, True, True, True, True, True, True, (False,), (True, False), (True, False), (True, False), (True, False), (True, False), (True, False), (True, False), (True, False), (True, False), (False, False), (False, False), (False, False), (True, False), (True, False), (True, False), (False, False), (False, False), (False, False), (True, False), (True, False), (True, False), (True, False)))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 937, in build_triton_ir
generator.visit(fn.parse())
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 855, in visit
return super().visit(node)
File "/root/miniconda3/envs/dna/lib/python3.8/ast.py", line 371, in visit
return visitor(node)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 183, in visit_Module
ast.NodeVisitor.generic_visit(self, node)
File "/root/miniconda3/envs/dna/lib/python3.8/ast.py", line 379, in generic_visit
self.visit(item)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 855, in visit
return super().visit(node)
File "/root/miniconda3/envs/dna/lib/python3.8/ast.py", line 371, in visit
return visitor(node)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 252, in visit_FunctionDef
has_ret = self.visit_compound_statement(node.body)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 177, in visit_compound_statement
self.last_ret_type = self.visit(stmt)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 855, in visit
return super().visit(node)
File "/root/miniconda3/envs/dna/lib/python3.8/ast.py", line 371, in visit
return visitor(node)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 678, in visit_For
self.visit_compound_statement(node.body)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 177, in visit_compound_statement
self.last_ret_type = self.visit(stmt)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 855, in visit
return super().visit(node)
File "/root/miniconda3/envs/dna/lib/python3.8/ast.py", line 371, in visit
return visitor(node)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 319, in visit_AugAssign
self.visit(assign)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 855, in visit
return super().visit(node)
File "/root/miniconda3/envs/dna/lib/python3.8/ast.py", line 371, in visit
return visitor(node)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 301, in visit_Assign
values = self.visit(node.value)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 855, in visit
return super().visit(node)
File "/root/miniconda3/envs/dna/lib/python3.8/ast.py", line 371, in visit
return visitor(node)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 339, in visit_BinOp
rhs = self.visit(node.right)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 855, in visit
return super().visit(node)
File "/root/miniconda3/envs/dna/lib/python3.8/ast.py", line 371, in visit
return visitor(node)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 797, in visit_Call
return fn(*args, _builder=self.builder, **kws)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/impl/base.py", line 22, in wrapper
return fn(*args, **kwargs)
TypeError: dot() got an unexpected keyword argument 'trans_b'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/81ac6a98387cf94bc283553260f3fa6b88cef2fa/bert_layers.py", line 608, in forward
encoder_outputs = self.encoder(
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/81ac6a98387cf94bc283553260f3fa6b88cef2fa/bert_layers.py", line 446, in forward
hidden_states = layer_module(hidden_states,
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/81ac6a98387cf94bc283553260f3fa6b88cef2fa/bert_layers.py", line 327, in forward
attention_output = self.attention(hidden_states, cu_seqlens, seqlen,
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/81ac6a98387cf94bc283553260f3fa6b88cef2fa/bert_layers.py", line 240, in forward
self_output = self.self(input_tensor, cu_seqlens, max_s, indices,
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/81ac6a98387cf94bc283553260f3fa6b88cef2fa/bert_layers.py", line 181, in forward
attention = flash_attn_qkvpacked_func(qkv, bias)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/root/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/81ac6a98387cf94bc283553260f3fa6b88cef2fa/flash_attn_triton.py", line1021, in forward
o, lse, ctx.softmax_scale = _flash_attn_forward(
File "/root/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/81ac6a98387cf94bc283553260f3fa6b88cef2fa/flash_attn_triton.py", line826, in _flash_attn_forward
_fwd_kernel[grid]( # type: ignore
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/runtime/autotuner.py", line 90, in run
return self.fn.run(*args, num_warps=config.num_warps, num_stages=config.num_stages, **kwargs, **config.kwargs)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/runtime/autotuner.py", line 199, in run
return self.fn.run(*args, **kwargs)
File "<string>", line 41, in _fwd_kernel
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 1621, in compile
next_module = compile(module)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 1550, in <lambda>
lambda src: ast_to_ttir(src, signature, configs[0], constants)),
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 962, in ast_to_ttir
mod, _ = build_triton_ir(fn, signature, specialization, constants)
File "/root/miniconda3/envs/dna/lib/python3.8/site-packages/triton/compiler.py", line 942, in build_triton_ir
raise CompilationError(fn.src, node) from e
triton.compiler.CompilationError: at 114:24:
def _fwd_kernel(
Q,
K,
V,
Bias,
Out,
Lse,
TMP, # NOTE: TMP is a scratchpad buffer to workaround a compiler bug
softmax_scale,
stride_qb,
stride_qh,
stride_qm,
stride_kb,
stride_kh,
stride_kn,
stride_vb,
stride_vh,
stride_vn,
stride_bb,
stride_bh,
stride_bm,
stride_ob,
stride_oh,
stride_om,
nheads,
seqlen_q,
seqlen_k,
seqlen_q_rounded,
headdim,
CACHE_KEY_SEQLEN_Q,
CACHE_KEY_SEQLEN_K,
BIAS_TYPE: tl.constexpr,
IS_CAUSAL: tl.constexpr,
BLOCK_HEADDIM: tl.constexpr,
EVEN_M: tl.constexpr,
EVEN_N: tl.constexpr,
EVEN_HEADDIM: tl.constexpr,
BLOCK_M: tl.constexpr,
BLOCK_N: tl.constexpr,
):
start_m = tl.program_id(0)
off_hb = tl.program_id(1)
off_b = off_hb // nheads
off_h = off_hb % nheads
# off_b = tl.program_id(1)
# off_h = tl.program_id(2)
# off_hb = off_b * nheads + off_h
# initialize offsets
offs_m = start_m * BLOCK_M + tl.arange(0, BLOCK_M)
offs_n = tl.arange(0, BLOCK_N)
offs_d = tl.arange(0, BLOCK_HEADDIM)
# Initialize pointers to Q, K, V
# Adding parenthesis around indexing might use int32 math instead of int64 math?
# https://github.com/openai/triton/issues/741
# I'm seeing a tiny bit of difference (5-7us)
q_ptrs = Q + off_b * stride_qb + off_h * stride_qh + (
offs_m[:, None] * stride_qm + offs_d[None, :])
k_ptrs = K + off_b * stride_kb + off_h * stride_kh + (
offs_n[:, None] * stride_kn + offs_d[None, :])
v_ptrs = V + off_b * stride_vb + off_h * stride_vh + (
offs_n[:, None] * stride_vn + offs_d[None, :])
if BIAS_TYPE == 'vector':
b_ptrs = Bias + off_b * stride_bb + off_h * stride_bh + offs_n
elif BIAS_TYPE == 'matrix':
b_ptrs = Bias + off_b * stride_bb + off_h * stride_bh + (
offs_m[:, None] * stride_bm + offs_n[None, :])
else:
raise ValueError("BIAS_TYPE must be one of {'vector', 'matrix'}")
# initialize pointer to m and l
t_ptrs = TMP + off_hb * seqlen_q_rounded + offs_m
lse_i = tl.zeros([BLOCK_M], dtype=tl.float32) - float('inf')
m_i = tl.zeros([BLOCK_M], dtype=tl.float32) - float('inf')
acc_o = tl.zeros([BLOCK_M, BLOCK_HEADDIM], dtype=tl.float32)
# load q: it will stay in SRAM throughout
# [2022-10-30] TD: Triton bug - in the case of EVEN_M=True and EVEN_N=False, if we just call
# tl.load(q_ptrs), we get the wrong output!
if EVEN_M & EVEN_N:
if EVEN_HEADDIM:
q = tl.load(q_ptrs)
else:
q = tl.load(q_ptrs, mask=offs_d[None, :] < headdim, other=0.0)
else:
if EVEN_HEADDIM:
q = tl.load(q_ptrs, mask=offs_m[:, None] < seqlen_q, other=0.0)
else:
q = tl.load(q_ptrs,
mask=(offs_m[:, None] < seqlen_q) &
(offs_d[None, :] < headdim),
other=0.0)
# loop over k, v and update accumulator
end_n = seqlen_k if not IS_CAUSAL else tl.minimum(
(start_m + 1) * BLOCK_M, seqlen_k)
for start_n in range(0, end_n, BLOCK_N):
start_n = tl.multiple_of(start_n, BLOCK_N)
# -- compute qk ----
if EVEN_N & EVEN_M: # If we just do "if EVEN_N", there seems to be some race condition
if EVEN_HEADDIM:
k = tl.load(k_ptrs + start_n * stride_kn)
else:
k = tl.load(k_ptrs + start_n * stride_kn,
mask=offs_d[None, :] < headdim,
other=0.0)
else:
if EVEN_HEADDIM:
k = tl.load(k_ptrs + start_n * stride_kn,
mask=(start_n + offs_n)[:, None] < seqlen_k,
other=0.0)
else:
k = tl.load(k_ptrs + start_n * stride_kn,
mask=((start_n + offs_n)[:, None] < seqlen_k) &
(offs_d[None, :] < headdim),
other=0.0)
qk = tl.zeros([BLOCK_M, BLOCK_N], dtype=tl.float32)
qk += tl.dot(q, k, trans_b=True)
^
>>>
The text was updated successfully, but these errors were encountered:
We tried to test DNABERT-2 on AWS EC2 p2.xlarge instance with Ubuntu and CUDA 11.5 and gcc version 9 (and we tried also version 11).
Every attempt failed.
We set the environment exploiting the requirements.txt posted on github but it still no worked.
The trouble has come with the command: hidden_states = model(inputs)[0] # [1, sequence_length, 768]
The text was updated successfully, but these errors were encountered: