You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I tried out attention sink with the example command in the FAQ, and it works fine:
python generate.py --base_model=mistralai/Mistral-7B-Instruct-v0.2 --score_model=None --attention_sinks=True --max_new_tokens=100000 --max_max_new_tokens=100000 --top_k_docs=-1 --use_gpu_id=False --max_seq_len=4096 --sink_dict="{'num_sink_tokens': 4, 'window_length': 4096}"
However, if I change the base model to h2oai/h2ogpt-4096-llama2-13b-chat, and I got the error below:
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/queueing.py", line 501, in call_prediction
output = await route_utils.call_process_api(
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/route_utils.py", line 252, in call_process_api
output = await app.get_blocks().process_api(
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/blocks.py", line 1664, in process_api
result = await self.call_function(
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/blocks.py", line 1217, in call_function
prediction = await utils.async_iteration(iterator)
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/utils.py", line 526, in async_iteration
return await iterator.anext()
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/utils.py", line 519, in anext
return await anyio.to_thread.run_sync(
File "/h2ogpt_conda/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "/h2ogpt_conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
return await future
File "/h2ogpt_conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 851, in run
result = context.run(func, *args)
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/utils.py", line 502, in run_sync_iterator_async
return next(iterator)
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/utils.py", line 685, in gen_wrapper
response = next(iterator)
File "/workspace/src/gradio_runner.py", line 4600, in bot
for res in get_response(fun1, history, chatbot_role1, speaker1, tts_language1, roles_state1,
File "/workspace/src/gradio_runner.py", line 4495, in get_response
for output_fun in fun1():
File "/workspace/src/gen.py", line 4096, in evaluate
for r in run_qa_db(
File "/workspace/src/gpt_langchain.py", line 5676, in _run_qa_db
answer = yield from run_target_func(query=query,
File "/workspace/src/gpt_langchain.py", line 5849, in run_target
raise thread.exc
File "/workspace/src/utils.py", line 476, in run
self._return = self._target(*self._args, **self._kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/base.py", line 316, in call
raise e
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/base.py", line 310, in call
self._call(inputs, run_manager=run_manager)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/combine_documents/base.py", line 136, in _call
output, extra_return_dict = self.combine_docs(
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/combine_documents/stuff.py", line 244, in combine_docs
return self.llm_chain.predict(callbacks=callbacks, **inputs), {}
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/llm.py", line 293, in predict
return self(kwargs, callbacks=callbacks)[self.output_key]
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/base.py", line 316, in call
raise e
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/base.py", line 310, in call
self._call(inputs, run_manager=run_manager)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/llm.py", line 103, in _call
response = self.generate([inputs], run_manager=run_manager)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/llm.py", line 115, in generate
return self.llm.generate_prompt(
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain_core/language_models/llms.py", line 521, in generate_prompt
return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain_core/language_models/llms.py", line 671, in generate
output = self._generate_helper(
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain_core/language_models/llms.py", line 558, in _generate_helper
raise e
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain_core/language_models/llms.py", line 545, in _generate_helper
self._generate(
File "/workspace/src/gpt_langchain.py", line 1769, in _generate
rets = super()._generate(prompts, stop=stop, run_manager=run_manager, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain_community/llms/huggingface_pipeline.py", line 203, in _generate
responses = self.pipeline(batch_prompts)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 241, in call
return super().call(text_inputs, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1177, in call
outputs = list(final_iterator)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/pipelines/pt_utils.py", line 124, in next
item = next(self.iterator)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/pipelines/pt_utils.py", line 125, in next
processed = self.infer(item, **self.params)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1102, in forward
model_outputs = self._forward(model_inputs, **forward_params)
File "/workspace/src/h2oai_pipeline.py", line 260, in _forward
return self.__forward(model_inputs, **generate_kwargs)
File "/workspace/src/h2oai_pipeline.py", line 296, in __forward
generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/generation/utils.py", line 1544, in generate
return self.greedy_search(
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/generation/utils.py", line 2404, in greedy_search
outputs = self(
File "/h2ogpt_conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1177, in forward
outputs = self.model(
File "/h2ogpt_conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 983, in forward
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/cache_utils.py", line 166, in from_legacy_cache
for layer_idx in range(len(past_key_values)):
Could you help with this issue? thank you very much!
The text was updated successfully, but these errors were encountered:
Hello,
I tried out attention sink with the example command in the FAQ, and it works fine:
python generate.py --base_model=mistralai/Mistral-7B-Instruct-v0.2 --score_model=None --attention_sinks=True --max_new_tokens=100000 --max_max_new_tokens=100000 --top_k_docs=-1 --use_gpu_id=False --max_seq_len=4096 --sink_dict="{'num_sink_tokens': 4, 'window_length': 4096}"
However, if I change the base model to h2oai/h2ogpt-4096-llama2-13b-chat, and I got the error below:
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/queueing.py", line 501, in call_prediction
output = await route_utils.call_process_api(
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/route_utils.py", line 252, in call_process_api
output = await app.get_blocks().process_api(
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/blocks.py", line 1664, in process_api
result = await self.call_function(
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/blocks.py", line 1217, in call_function
prediction = await utils.async_iteration(iterator)
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/utils.py", line 526, in async_iteration
return await iterator.anext()
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/utils.py", line 519, in anext
return await anyio.to_thread.run_sync(
File "/h2ogpt_conda/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "/h2ogpt_conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
return await future
File "/h2ogpt_conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 851, in run
result = context.run(func, *args)
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/utils.py", line 502, in run_sync_iterator_async
return next(iterator)
File "/h2ogpt_conda/lib/python3.10/site-packages/gradio/utils.py", line 685, in gen_wrapper
response = next(iterator)
File "/workspace/src/gradio_runner.py", line 4600, in bot
for res in get_response(fun1, history, chatbot_role1, speaker1, tts_language1, roles_state1,
File "/workspace/src/gradio_runner.py", line 4495, in get_response
for output_fun in fun1():
File "/workspace/src/gen.py", line 4096, in evaluate
for r in run_qa_db(
File "/workspace/src/gpt_langchain.py", line 5676, in _run_qa_db
answer = yield from run_target_func(query=query,
File "/workspace/src/gpt_langchain.py", line 5849, in run_target
raise thread.exc
File "/workspace/src/utils.py", line 476, in run
self._return = self._target(*self._args, **self._kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/base.py", line 316, in call
raise e
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/base.py", line 310, in call
self._call(inputs, run_manager=run_manager)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/combine_documents/base.py", line 136, in _call
output, extra_return_dict = self.combine_docs(
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/combine_documents/stuff.py", line 244, in combine_docs
return self.llm_chain.predict(callbacks=callbacks, **inputs), {}
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/llm.py", line 293, in predict
return self(kwargs, callbacks=callbacks)[self.output_key]
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/base.py", line 316, in call
raise e
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/base.py", line 310, in call
self._call(inputs, run_manager=run_manager)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/llm.py", line 103, in _call
response = self.generate([inputs], run_manager=run_manager)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain/chains/llm.py", line 115, in generate
return self.llm.generate_prompt(
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain_core/language_models/llms.py", line 521, in generate_prompt
return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain_core/language_models/llms.py", line 671, in generate
output = self._generate_helper(
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain_core/language_models/llms.py", line 558, in _generate_helper
raise e
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain_core/language_models/llms.py", line 545, in _generate_helper
self._generate(
File "/workspace/src/gpt_langchain.py", line 1769, in _generate
rets = super()._generate(prompts, stop=stop, run_manager=run_manager, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/langchain_community/llms/huggingface_pipeline.py", line 203, in _generate
responses = self.pipeline(batch_prompts)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 241, in call
return super().call(text_inputs, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1177, in call
outputs = list(final_iterator)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/pipelines/pt_utils.py", line 124, in next
item = next(self.iterator)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/pipelines/pt_utils.py", line 125, in next
processed = self.infer(item, **self.params)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1102, in forward
model_outputs = self._forward(model_inputs, **forward_params)
File "/workspace/src/h2oai_pipeline.py", line 260, in _forward
return self.__forward(model_inputs, **generate_kwargs)
File "/workspace/src/h2oai_pipeline.py", line 296, in __forward
generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/generation/utils.py", line 1544, in generate
return self.greedy_search(
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/generation/utils.py", line 2404, in greedy_search
outputs = self(
File "/h2ogpt_conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1177, in forward
outputs = self.model(
File "/h2ogpt_conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 983, in forward
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
File "/h2ogpt_conda/lib/python3.10/site-packages/transformers/cache_utils.py", line 166, in from_legacy_cache
for layer_idx in range(len(past_key_values)):
Could you help with this issue? thank you very much!
The text was updated successfully, but these errors were encountered: