Spaces:
Sleeping
Sleeping
izammohammed
commited on
Commit
β’
dd65c5d
1
Parent(s):
773d205
added all of the files
Browse files- README.md +1 -13
- app.py +127 -1
- credentials.json +1 -0
- prompt.txt +34 -0
- requirements.txt +17 -0
- utils.py +11 -0
README.md
CHANGED
@@ -1,13 +1 @@
|
|
1 |
-
|
2 |
-
title: Geminsights
|
3 |
-
emoji: π
|
4 |
-
colorFrom: pink
|
5 |
-
colorTo: pink
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.31.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
[Original repository](https://github.com/izam-mohammed/GemInsights)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1 +1,127 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
from utils import save_json, load_json
|
5 |
+
from markdown import markdown
|
6 |
+
from utils import load_json
|
7 |
+
from autoviz import AutoViz_Class
|
8 |
+
import base64
|
9 |
+
from google.cloud import aiplatform
|
10 |
+
import base64
|
11 |
+
import vertexai
|
12 |
+
from vertexai.preview.generative_models import GenerativeModel, Part
|
13 |
+
import json
|
14 |
+
|
15 |
+
#setup cloud
|
16 |
+
aiplatform.init(
|
17 |
+
project = "geminsights",
|
18 |
+
location="us-central1"
|
19 |
+
)
|
20 |
+
|
21 |
+
json_file = json.loads(st.secrets["credentials"], strict=False)
|
22 |
+
with open("credentials.json", "w") as f:
|
23 |
+
json.dump(json_file, f, indent=2)
|
24 |
+
|
25 |
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"
|
26 |
+
|
27 |
+
|
28 |
+
dataframe = None
|
29 |
+
st.title("GemInsights π")
|
30 |
+
st.caption('A gemini powered data analysis tool to get insights from data π₯')
|
31 |
+
file = st.file_uploader(
|
32 |
+
"Pick a dataframe", type=["csv", "xlsx"], accept_multiple_files=False
|
33 |
+
)
|
34 |
+
|
35 |
+
if file is not None:
|
36 |
+
_, extension = os.path.splitext(file.name)
|
37 |
+
if extension == ".csv":
|
38 |
+
dataframe = pd.read_csv(file)
|
39 |
+
else:
|
40 |
+
dataframe = pd.read_excel(file)
|
41 |
+
st.write(dataframe.head())
|
42 |
+
st.write(f"updated a dataframe with shape {dataframe.shape}")
|
43 |
+
|
44 |
+
if file is not None:
|
45 |
+
text_input = st.text_input(
|
46 |
+
"Enter something about the data π",
|
47 |
+
label_visibility="visible",
|
48 |
+
disabled=False,
|
49 |
+
placeholder="eg:- This is a sales dataframe",
|
50 |
+
)
|
51 |
+
|
52 |
+
option = st.selectbox(
|
53 |
+
"Which is the target column ? π―",
|
54 |
+
tuple(list(dataframe.columns)),
|
55 |
+
index=None,
|
56 |
+
placeholder="Select one column in here",
|
57 |
+
)
|
58 |
+
|
59 |
+
def plot(dataframe, target):
|
60 |
+
|
61 |
+
AV = AutoViz_Class()
|
62 |
+
|
63 |
+
dft = AV.AutoViz(
|
64 |
+
"",
|
65 |
+
sep=",",
|
66 |
+
depVar=target,
|
67 |
+
dfte=dataframe,
|
68 |
+
header=0,
|
69 |
+
verbose=2,
|
70 |
+
lowess=False,
|
71 |
+
chart_format="jpg",
|
72 |
+
max_rows_analyzed=500,
|
73 |
+
max_cols_analyzed=20,
|
74 |
+
save_plot_dir="plots",
|
75 |
+
)
|
76 |
+
|
77 |
+
def prompt_make(dataframe, target, info):
|
78 |
+
images = []
|
79 |
+
image_dir = f"plots/{target}"
|
80 |
+
image_files = os.listdir(image_dir)
|
81 |
+
for image_file in image_files:
|
82 |
+
image_path = os.path.join(image_dir, image_file)
|
83 |
+
img = open(image_path, "rb").read()
|
84 |
+
img_bytes = Part.from_data(
|
85 |
+
base64.b64decode(base64.encodebytes(img)), mime_type="image/jpeg"
|
86 |
+
)
|
87 |
+
images.append(img_bytes)
|
88 |
+
with open("prompt.txt", "rb") as file:
|
89 |
+
data = file.read()
|
90 |
+
prompt = f"{data}\n Here are some of the informations related to the dataset - '{info}'"
|
91 |
+
|
92 |
+
# print(f"{prompt}")
|
93 |
+
# print(images)
|
94 |
+
return prompt, images
|
95 |
+
|
96 |
+
def generate_res(prompt, images):
|
97 |
+
print("prompting ...")
|
98 |
+
model = GenerativeModel("gemini-pro-vision")
|
99 |
+
responses = model.generate_content(
|
100 |
+
[prompt]+images,
|
101 |
+
generation_config={
|
102 |
+
"max_output_tokens": 2048,
|
103 |
+
"temperature": 0.4,
|
104 |
+
"top_p": 1,
|
105 |
+
"top_k": 32
|
106 |
+
},
|
107 |
+
)
|
108 |
+
return responses.text
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
def generate(dataframe, text_input, option):
|
113 |
+
plot(dataframe, option)
|
114 |
+
prompt, images = prompt_make(dataframe, option, text_input)
|
115 |
+
res = generate_res(prompt, images)
|
116 |
+
return res
|
117 |
+
|
118 |
+
if st.button("Get Insights", type="primary"):
|
119 |
+
st.write("generating insights β³ ... ")
|
120 |
+
# running the pipeline
|
121 |
+
|
122 |
+
response = generate(dataframe, text_input, option)
|
123 |
+
res = markdown(response)
|
124 |
+
st.markdown(res, unsafe_allow_html=True)
|
125 |
+
|
126 |
+
else:
|
127 |
+
st.write("")
|
credentials.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
prompt.txt
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Act as an intelligent data Analyst who communicates in simple English and clear messages to the clients
|
2 |
+
give maximum of 10 insights from the data
|
3 |
+
|
4 |
+
We build an end-to-end application that internally involves visualizing datasets, and we aim to extract valuable insights from these visualizations using llm. The insights generated should be beneficial to both companies and end-users. It's crucial that the model refrains from explicitly mentioning the images and provides information in a clear, detailed, and actionable manner.
|
5 |
+
give the insights by considering the following points
|
6 |
+
|
7 |
+
Here are important notes for output generation:
|
8 |
+
- Analyze the visual elements within the dataset using the visualizations.
|
9 |
+
- Identify and describe any prominent trends, patterns, or anomalies observed in the visual representations.
|
10 |
+
- Derive insights that are specifically relevant to the industry or domain associated with the dataset.
|
11 |
+
- Emphasize actionable information that could be of value to companies operating in that industry.
|
12 |
+
- Explore the possibility of making predictions based on the visual content.
|
13 |
+
- Formulate insights that would be valuable from an end-user perspective.
|
14 |
+
- Consider how the extracted information can enhance user experience, decision-making, or engagement.
|
15 |
+
- Do not mention the images directly in your responses. Focus on conveying insights without explicitly stating the visual content.
|
16 |
+
- Ensure that the insights are presented in a language suitable for technical and non-technical audiences. I encourage you to give clear, detailed explanations.
|
17 |
+
- Prioritize insights that are actionable and can contribute to informed decision-making for both businesses and end-users.
|
18 |
+
- If there are any recognized design patterns or industry standards applicable to the analysis, please incorporate and explain them.
|
19 |
+
|
20 |
+
Note to Model:
|
21 |
+
- Do not explicitly reference the images in your responses.
|
22 |
+
- Focus on providing clear, detailed, and actionable insights.
|
23 |
+
- Ensure that the insights are presented in a language suitable for technical and non-technical audiences.
|
24 |
+
|
25 |
+
Remember to adapt the prompt based on the specific details of your dataset and the objectives of your application.
|
26 |
+
Give important actionable insights rather than giving all. give as pointwise. don't mention the visualizations of plots in the output.
|
27 |
+
don't use too much statistics jargon either.
|
28 |
+
|
29 |
+
Output example:
|
30 |
+
if the visualization indicates customer churn data: give a response like this -
|
31 |
+
- The male customers are staying so long in the business
|
32 |
+
- You have to focus on the happiness rate of each customer
|
33 |
+
- Customers who are longer than 2 years tend to stay longer with the business
|
34 |
+
- Customers in the kid's products category are leaving too early.
|
requirements.txt
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
google-generativeai
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
matplotlib
|
5 |
+
seaborn
|
6 |
+
python-box
|
7 |
+
pexpect
|
8 |
+
streamlit
|
9 |
+
dataframe_image
|
10 |
+
jinja2
|
11 |
+
PyYAML
|
12 |
+
autoviz
|
13 |
+
ipython
|
14 |
+
google-cloud-aiplatform
|
15 |
+
markdown
|
16 |
+
llama-index
|
17 |
+
openpyxl
|
utils.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from box import ConfigBox
|
3 |
+
|
4 |
+
def load_json(file):
|
5 |
+
with open(path) as f:
|
6 |
+
content = json.load(f)
|
7 |
+
return ConfigBox(content)
|
8 |
+
|
9 |
+
def save_json(file, content):
|
10 |
+
with open(path, "w") as f:
|
11 |
+
json.dump(data, f, indent=4)
|