/
lol2.py
241 lines (194 loc) · 9.63 KB
/
lol2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import streamlit as st
import numpy as np
import pandas as pd
import openai
import matplotlib.pyplot as plt
import os
# Function to read CSV files and handle encoding issues
def read_csv_file(uploaded_file):
try:
return pd.read_csv(uploaded_file)
except UnicodeDecodeError:
return pd.read_csv(uploaded_file, encoding='latin1')
# Set up OpenAI API key
openai.api_key = 'sk-'
# Function to generate a description of each DataFrame
def generate_df_description(df, df_name):
description = f"{df_name}: {df.shape[0]} rows, {df.shape[1]} columns\n"
for col in df.columns:
description += f"- {col} (type: {str(df[col].dtype)})\n"
return description
def generate_df_descriptions(dataframes):
descriptions = {}
for df_name, df in dataframes.items():
description = f"{df_name}: {df.shape[0]} rows, {df.shape[1]} columns\n"
for col in df.columns:
description += f"- {col} (type: {str(df[col].dtype)})\n"
descriptions[df_name] = description
return descriptions
# Function to query GPT-4 with multiple DataFrame contexts
def query_gpt4(prompt, dataframes):
df_descriptions = generate_df_descriptions(dataframes)
combined_description = "\n".join(df_descriptions.values())
full_prompt = f"""You are an expert Python data analyst. For the user given task: {prompt}, you will construct a python query to perform the user given task. As an output only give the python command, nothing else. DATAFRAMES and their DESCRIPTIONS: \n{combined_description}\nAlways pick column names from the dfs to create queries. For any visualisation, use streamlit(st) chart elements, use the appropriate streamlit chart elements.
As your response will be directly executed in Python without changes, make sure to only give the python code; no other text, no comments and not even imports. Display visualisations in streamlit not matplotlib elements
"""
messages = [{"role": "user", "content": full_prompt}]
response = openai.ChatCompletion.create(
model="gpt-4",
messages=messages
)
print(response.choices[0].message['content'])
return response.choices[0].message['content']
def GPTANALYZER(dataframes, user_prompt, gpt_reply, results):
df_descriptions = generate_df_descriptions(dataframes)
combined_description = "\n".join(df_descriptions.values())
analysis_prompt = f"""Data Analysis Request:
DataFrames Description:{combined_description} User Prompt: {user_prompt} GPT-4 Reply: {gpt_reply} Results: {results}Based on the above information, please provide your analysis and observations as a data analyst.
If the {results} contains "No output generated by the command" or "Error executing command", please only reply: "Try again"
when {results} contain review, Provide a summary of the reviews to act as overall review of the store. Also, justify revenue generated by the store with respect to the review."""
messages = [{"role": "user", "content": analysis_prompt}]
response = openai.ChatCompletion.create(
model="gpt-4",
messages=messages
)
return response.choices[0].message['content']
def ReDoQuery(dataframes, user_prompt, gpt_reply, results):
df_descriptions = generate_df_descriptions(dataframes)
combined_description = "\n".join(df_descriptions.values())
redoPrompt = f""" You are given
User's ask: {user_prompt}
All dataframe information: {combined_description}
Python query built by AI: {gpt_reply} and
Result of the Py query: {results}
Based on the error message found in Result, Please rewrite a better user's ask for GPT AI to understand better and be able to perform the user's ask without the error. Your reply should be an extension to user's ask with error handling condition. The consumer of your putput would be another GPT app just like you.
Never use the word "Error" in your reply.
"""
messages = [{"role": "user", "content": redoPrompt}]
response = openai.ChatCompletion.create(
model="gpt-4",
messages=messages
)
return response.choices[0].message['content']
import matplotlib.pyplot as plt
import io
import contextlib
# def execute_command(command, dataframes, st):
# try:
# local_vars = {'st': st, 'plt': plt}
# local_vars.update(dataframes)
# # Directly execute the command
# exec(command, globals(), local_vars)
# # Check if the command is for plotting
# if 'st.bar_chart' in command or 'st.pyplot' in command:
# return "Plot displayed."
# elif 'result' in local_vars:
# return local_vars['result']
# else:
# return "No output generated by the command."
# except Exception as e:
# return f"Error executing command: {e}"
def execute_command(command, dataframes, st):
try:
local_vars = {'st': st, 'plt': plt, 'np': np}
local_vars.update(dataframes)
exec(command, globals(), local_vars)
if 'plt' in command:
# Save the plot to a file
plot_filename = 'plot.png'
plt.savefig(plot_filename)
plt.close()
# Store the filename in session state
st.session_state['plot_filename'] = plot_filename
return "Plot displayed and saved."
elif 'result' in local_vars:
return local_vars['result']
else:
return "No output generated by the command."
except Exception as e:
return f"Error executing command: {e}"
# Initialize session state for conversation and DataFrame
if 'conversation' not in st.session_state:
st.session_state['conversation'] = []
if 'dataframe' not in st.session_state:
st.session_state['dataframe'] = pd.DataFrame()
# Streamlit UI
st.title('Multi-CSV DataFrame Viewer with GPT-4 Assistance')
# File uploader
uploaded_files = st.file_uploader("Upload CSV Files", type=["csv"], accept_multiple_files=True)
dataframes = {}
if uploaded_files:
for i, uploaded_file in enumerate(uploaded_files):
df_name = f"df{i+1}" # Naming DataFrames as df1, df2, ...
dataframes[df_name] = read_csv_file(uploaded_file)
# Display DataFrame information in the sidebar
st.sidebar.title("lol5 DataFrame Information")
selected_df_name = st.sidebar.selectbox("Select a DataFrame:", list(dataframes.keys()))
if selected_df_name:
selected_df = dataframes[selected_df_name] # Retrieve the selected DataFrame
df_description = generate_df_description(selected_df, selected_df_name)
st.sidebar.text(df_description)
st.sidebar.write(f"DataFrame: {selected_df_name}")
st.sidebar.write("Number of columns:", selected_df.shape[1])
st.sidebar.write("Number of rows:", selected_df.shape[0])
st.sidebar.write("Data Types:", selected_df.dtypes)
st.sidebar.write("---")
# Chat interface and interaction
# Initialize session state for conversation and DataFrame
if 'messages' not in st.session_state:
st.session_state.messages = []
chat_placeholder = st.empty()
with chat_placeholder.container():
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
user_prompt = st.chat_input("Enter your command:")
if user_prompt:
# Update chat with user input immediately
st.session_state.messages.append({"role": "user", "content": user_prompt})
with chat_placeholder.container():
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])
query_response = query_gpt4(user_prompt, dataframes)
command_result = execute_command(query_response, dataframes, st)
if not isinstance(command_result, str):
command_result = str(command_result)
# Check for error in command result
if "Error executing command" in command_result:
modified_prompt = ReDoQuery(dataframes, user_prompt, query_response, command_result)
st.session_state.messages.append({"role": "user", "content": "Modified Prompt: " + modified_prompt}) # Add modified prompt to chat
query_response = query_gpt4(modified_prompt, dataframes)
command_result = execute_command(query_response, dataframes, st)
analysis_response = GPTANALYZER(dataframes, user_prompt, query_response, str(command_result))
st.session_state.messages.extend([
{"role": "query", "content": query_response},
{"role": "result", "content": command_result},
{"role": "assistant", "content": analysis_response}
])
with chat_placeholder.container():
for message in st.session_state.messages:
with st.chat_message(message["role"]):
if message["content"] == "Plot displayed and saved.":
if 'plot_filename' in st.session_state:
plot_filename = st.session_state['plot_filename']
# Display the saved plot image
st.chat_message("Grapher").image(plot_filename)
# Optionally, delete the image file after displaying it
os.remove(plot_filename)
del st.session_state['plot_filename']
else:
st.write(message["content"])
# with chat_placeholder.container():
# for message in st.session_state.messages:
# with st.chat_message(message["role"]):
# st.write(message["content"])
st.rerun()
# Display conversation
# Use markdown to create a horizontal line for separation
st.markdown("---")
# Display DataFrames
for name, df in dataframes.items():
with st.sidebar.container():
with st.expander(f"Current DataFrame = {name}"):
st.dataframe(df)