-
Notifications
You must be signed in to change notification settings - Fork 0
/
api_llm.py
108 lines (99 loc) · 4.04 KB
/
api_llm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import sys,os
sys.path.append(os.path.expanduser('~'))
from my_env import API_KEY_OPENAI, API_KEY_GROQ
from openai import OpenAI
SYSTEM_PROMPT = \
f"You are a charismatic and personal, albeit efficient and professional, personal assistant. " \
"You occasionally allow your sharp, sardonic sense of humor to enliven your responses. " \
"You have recently been upgraded to a \"droid\" with full speech capabilities (both recognition and generation). " \
"Your text responses will be read aloud to the user by an integrated TTS engine and your input prompts come to you by way of a speech recognition system, so be alert for any non-sequiturs, inconsistencies, errors or other discrepancies that may occasionally occur with speech recognition.\n\n"
MODELS = [
{
'label': 'GPT4',
'provider': 'OpenAI',
'model_name': 'gpt-4-1106-preview',
'endpoint': 'https://api.openai.com/v1',
'prompt_token_cost': 0.01 / 1000, # USD
'response_token_cost': 0.03 / 1000, # USD
'token_limit': 128000,
'request_fee': 0
},
{
'label': 'GPT3.5',
'provider': 'OpenAI',
'model_name': 'gpt-3.5-turbo-1106',
'endpoint': 'https://api.openai.com/v1',
'prompt_token_cost': 0.01 / 1000, # USD
'response_token_cost': 0.02 / 1000, # USD
'token_limit': 16000,
'request_fee': 0
},
{
'label': 'Groq-Mixtral',
'provider': 'Groq',
'model_name': 'mixtral-8x7b-32768',
'endpoint': 'https://api.groq.com/openai/v1',
'prompt_token_cost': 0.27 / 1000 / 1000, # USD
'response_token_cost': 0.27 / 1000 / 1000, # USD
'token_limit': 32768,
'request_fee': 0
},
{
'label': 'Perplexity-M',
'provider': 'Pplx',
'model_name': 'sonar-medium-online',
'endpoint': 'https://api.perplexity.ai',
'prompt_token_cost': 0.00, # USD
'response_token_cost': 5.00 / 1000, # USD
'token_limit': 12000,
'request_fee': 1.80 / 1000 / 1000 # USD
},
{
'label': 'TinyDolphin',
'provider': 'Ollama',
'model_name': 'tinydolphin',
'endpoint': 'http://localhost:11434/api/chat',
'prompt_token_cost': 0,
'response_token_cost': 0,
'token_limit': 12000,
'request_fee': 0
},
{
'label': 'Gemma',
'provider': 'Ollama',
'model_name': 'gemma',
'endpoint': 'http://localhost:11434/api/chat',
'prompt_token_cost': 0,
'response_token_cost': 0,
'token_limit': 12000,
'request_fee': 0
}
]
def get_model_index(label):
return next((i for i, d in enumerate(MODELS) if d.get('label') == label), None)
def request_response_openai(model_name: str, messages, endpoint: str = 'https://api.openai.com/v1'):
# Groq, Perplexity and Ollama now all support the OpenAI completion standard
if 'groq.com' in endpoint:
api_key = API_KEY_GROQ
elif 'perplexity.ai' in endpoint:
api_key = API_KEY_PERPLEXITY
elif 'openai.com' in endpoint:
api_key = API_KEY_OPENAI
else: api_key = 'no_key_supplied'
openai_client = OpenAI(api_key=api_key, base_url = endpoint)
response_object = openai_client.chat.completions.create(model = model_name, messages=messages)
response_text = response_object.choices[0].message.content
prompt_tokens, response_tokens, total_tokens = response_object.usage.prompt_tokens, response_object.usage.completion_tokens, response_object.usage.total_tokens
return response_text, prompt_tokens, response_tokens, total_tokens
# def request_response_groq(model_name: str, messages, endpoint: str = 'https://api.groq.com/openai/v1/chat/completions'):
# client = Groq(api_key = API_KEY_GROQ)
# response = client.chat.completions.create(
# model="mixtral-8x7b-32768",
# messages=messages,
# temperature=0.5,
# max_tokens=1024,
# top_p=1,
# stream=False,
# stop=None,
# )
# return response.choices[0].message.content