Skip to content

Commit

Permalink
Merge pull request #68 from kshitijrajsharma/feature/streamlit
Browse files Browse the repository at this point in the history
Feature : Streamlit
  • Loading branch information
kshitijrajsharma authored Dec 31, 2023
2 parents 27a35b0 + 9f2772d commit 777f010
Show file tree
Hide file tree
Showing 2 changed files with 256 additions and 0 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,10 @@ Follow [Version Control Docs](./docs/Version_control.md)

### Request Stats Tracking on Github
Follow [stats_request_docs](./docs/Request_Stats.md)

## Visualize stats with streamlit

```bash
pip install streamlit
streamlit run streamlit_app.py
```
249 changes: 249 additions & 0 deletions streamlit_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
import json
from datetime import datetime
from io import StringIO

import matplotlib.pyplot as plt
import pandas as pd
import requests
import streamlit as st
from matplotlib.ticker import FuncFormatter


def extract_json_value(df, column, selected_key):
def extract_value(row):
try:
json_data = json.loads(row)
return json_data[selected_key] if selected_key in json_data else None
except (json.JSONDecodeError, KeyError):
return None

return df[column].apply(extract_value)


def get_data_from_url(url):
try:
if "github.com" in url:
# If the URL is from GitHub, map it to the raw URL
url = url.replace("github.com", "raw.githubusercontent.com").replace(
"/blob/", "/"
)
response = requests.get(url)
response.raise_for_status()
st.sidebar.success("Remote URl Added Successfully")
return pd.read_csv(StringIO(response.text))
except requests.exceptions.RequestException as e:
st.sidebar.error(f"Error retrieving data from URL: {e}")
return pd.DataFrame()


def dynamic_json_extraction(df):
st.text("Extract item(s) from JSON Stats")

selected_column = st.selectbox("Select column to extract from", df.columns)

# Extract unique keys from the selected column
unique_keys = set()
for row in df[selected_column]:
try:
json_data = json.loads(row)
unique_keys.update(json_data.keys())
except Exception as ex:
break

# Allow the user to select multiple keys
selected_keys = st.multiselect(
"Select JSON Key(s) to extract", sorted(list(unique_keys))
)
if not selected_keys:
st.warning("No keys selected. Please choose one or more keys for extract")
return df

# Extract the selected JSON keys and create new columns
if selected_keys:
if st.button("Extract"):
# Extract the selected JSON keys and create new columns
for key in selected_keys:
new_column_name = f"{selected_column}_{key}"
df[new_column_name] = extract_json_value(df, selected_column, key)
st.success(f"Successfully extracted {key} from {selected_column}.")

return df


# Function to plot the chart based on user selections
def plot_chart(df, x_column, y_columns, plot_type, title, chart_properties):
fig, ax = plt.subplots(figsize=(12, 6))

if plot_type == "line":
for i, column in enumerate(y_columns):
ax.plot(
df[x_column],
df[column],
label=column,
marker=chart_properties["marker"],
linestyle=chart_properties["linestyle"],
)
elif plot_type == "bar":
df.plot(
x=x_column,
y=y_columns,
kind="bar",
stacked=True,
width=chart_properties["bar_width"],
ax=ax,
)
elif plot_type == "scatter":
for i, column in enumerate(y_columns):
ax.scatter(
df[x_column],
df[column],
label=column,
marker=chart_properties["marker"],
)

ax.set_title(title)
ax.set_xlabel(x_column)
ax.set_ylabel("Values")
ax.legend()

return fig, ax


# Main Streamlit app
def main():
# Placeholder for df

df = pd.DataFrame()
st.sidebar.title("OSMSG Stats Visualizer")
st.sidebar.subheader("Choose Data")
data_source = st.sidebar.radio(
"Select data source", ("Upload CSV", "Use Remote URL")
)

if data_source == "Upload CSV":
uploaded_file = st.sidebar.file_uploader("Upload a CSV file", type=["csv"])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
else:
# Use remote data
remote_url = st.sidebar.text_input("Enter Remote CSV URL", "")
if remote_url:
df = get_data_from_url(remote_url)

st.sidebar.title("Choose Chart Options")
selected_columns = st.sidebar.multiselect("Select columns for chart", df.columns)

if not selected_columns:
st.warning("Please select at least one column for the chart.")
st.stop()

plot_type = st.sidebar.selectbox("Select chart type", ["line", "bar", "scatter"])

# Ask user for chart title
chart_title = st.text_input("Enter Chart Title", "My Customized Chart")
show_table = st.sidebar.checkbox("Show Table", value=True)
if "tags_create" in selected_columns or "tags_modify" in selected_columns:
selected_key = st.sidebar.text_input("Enter JSON Key for Tags", "name")

if "tags_create" in selected_columns:
df["tags_create"] = extract_json_value(df, "tags_create", selected_key)
selected_columns.remove("tags_create")
selected_columns.append("tags_create")

if "tags_modify" in selected_columns:
df["tags_modify"] = extract_json_value(df, "tags_modify", selected_key)
selected_columns.remove("tags_modify")
selected_columns.append("tags_modify")

chart_properties = {}
if plot_type == "line":
chart_properties["marker"] = st.sidebar.selectbox(
"Select Marker Type", options=["o", "s", "^"], index=0, key="marker"
)
chart_properties["linestyle"] = st.sidebar.selectbox(
"Select Line Style", options=["-", "--", "-."], index=0, key="linestyle"
)
elif plot_type == "bar":
chart_properties["bar_width"] = st.sidebar.slider(
"Select Bar Width",
min_value=0.1,
max_value=1.0,
value=0.8,
step=0.1,
key="bar_width",
)
elif plot_type == "scatter":
chart_properties["marker"] = st.sidebar.selectbox(
"Select Marker Type", options=["o", "s", "^"], index=0, key="marker"
)

# Allow users to select x-axis interval based on data type
x_column = st.selectbox("Select X-axis column", df.columns)
# st.title(chart_title)
y_columns = st.multiselect("Select Y-axis column(s)", selected_columns)

if pd.api.types.is_numeric_dtype(df[x_column]):
x_min = st.slider(
"Select X-axis Interval (Min)",
min_value=df[x_column].min(),
max_value=df[x_column].max(),
value=df[x_column].min(),
)

x_max = st.slider(
"Select X-axis Interval (Max)",
min_value=df[x_column].min(),
max_value=df[x_column].max(),
value=df[x_column].max(),
)
else:
x_min = st.slider(
"Select X-axis Interval (Min)",
min_value=datetime.strptime(df[x_column].min(), "%Y-%m-%d").date(),
max_value=datetime.strptime(df[x_column].max(), "%Y-%m-%d").date(),
value=datetime.strptime(df[x_column].min(), "%Y-%m-%d").date(),
format="MMM DD, YYYY",
)

x_max = st.slider(
"Select X-axis Interval (Max)",
min_value=datetime.strptime(df[x_column].min(), "%Y-%m-%d").date(),
max_value=datetime.strptime(df[x_column].max(), "%Y-%m-%d").date(),
value=datetime.strptime(df[x_column].max(), "%Y-%m-%d").date(),
format="MMM DD, YYYY",
)
df[x_column] = pd.to_datetime(df[x_column]).dt.date
# Filter the DataFrame based on the selected x-axis interval
df_filtered = df[(df[x_column] >= x_min) & (df[x_column] <= x_max)]

fig, ax = plot_chart(
df_filtered, x_column, y_columns, plot_type, chart_title, chart_properties
)
st.pyplot(fig)

if show_table:
# Display the table with interactive features
st.subheader("Data Table:")
displayed_df = dynamic_json_extraction(df)

# Additional options for the displayed table
if st.checkbox("Show Selected Columns"):
selected_columns = st.multiselect("Select columns", df.columns)
displayed_df = df[selected_columns]

if st.checkbox("Sort Data"):
selected_column = st.selectbox("Select column to sort by", df.columns)
ascending = st.checkbox("Sort in Ascending Order", True)
displayed_df = df.sort_values(by=selected_column, ascending=ascending)

if st.checkbox("Filter Data"):
column_name = st.selectbox("Select column to filter", df.columns)
filter_value = st.text_input("Enter filter value", "")
displayed_df = df[df[column_name] == filter_value]

# Display the modified DataFrame based on user selections
st.write(displayed_df)


if __name__ == "__main__":
main()

0 comments on commit 777f010

Please sign in to comment.