You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
e2c-api/python/app2.py

143 lines
4.6 KiB

import streamlit as st
st.set_page_config(page_title="Graduation Dashboard", layout="wide")
import pandas as pd
from sodapy import Socrata
from fpdf import FPDF
from io import BytesIO
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
SOCRATA_APP_TOKEN = os.getenv("SOCRATA_APP_TOKEN")
# Socrata Client
client = Socrata("educationtocareer.data.mass.gov", SOCRATA_APP_TOKEN)
# --------------------------
# 🔧 Get Filter Options
# --------------------------
@st.cache_data
def get_filter_options():
sample = client.get("n2xa-p822", limit=1000)
df = pd.DataFrame.from_records(sample)
return {
"org_types": sorted(df["org_type"].dropna().unique()),
"school_years": sorted(df["sy"].dropna().unique(), reverse=True),
"rate_types": sorted(df["grad_rate_type"].dropna().unique()),
"student_groups": sorted(df["stu_grp"].dropna().unique())
}
filter_options = get_filter_options()
# --------------------------
# 📥 Load Filtered Data
# --------------------------
@st.cache_data(show_spinner="Loading filtered data...")
def fetch_filtered_data(org_type, sy, grad_rate_type, stu_grp, max_rows=50000):
where_clause = (
f"org_type='{org_type}' AND sy='{sy}' AND "
f"grad_rate_type='{grad_rate_type}' AND stu_grp='{stu_grp}'"
)
all_data = []
offset = 0
while True:
rows = client.get("n2xa-p822", where=where_clause, limit=50000, offset=offset)
if not rows:
break
all_data.extend(rows)
if len(rows) < 50000 or len(all_data) >= max_rows:
break
offset += 50000
df = pd.DataFrame.from_records(all_data)
numeric_cols = ['cohort_cnt', 'grad_pct', 'in_sch_pct', 'non_grad_pct', 'ged_pct', 'drpout_pct', 'exclud_pct']
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
else:
df[col] = pd.NA
return df
# --------------------------
# 🌐 Streamlit UI
# --------------------------
st.title("🎓 Massachusetts Graduation Statistics Dashboard")
# Filters
st.sidebar.header("🔍 Filters")
org_type = st.sidebar.selectbox("Report Type", filter_options["org_types"], index=filter_options["org_types"].index("District"))
sy = st.sidebar.selectbox("School Year", filter_options["school_years"], index=0)
rate_type = st.sidebar.selectbox("Graduation Rate Type", filter_options["rate_types"], index=0)
stu_grp = st.sidebar.selectbox("Student Group", filter_options["student_groups"], index=0)
# Load filtered data
df = fetch_filtered_data(org_type, sy, rate_type, stu_grp)
if df.empty:
st.warning("⚠️ No data found for the selected filter combination.")
st.stop()
# Dynamic column name logic
if org_type == "District":
name_col = 'dist_name'
code_col = 'dist_code'
name_label = 'District Name'
code_label = 'District Code'
elif org_type == "School":
name_col = 'org_name'
code_col = 'org_code'
name_label = 'School Name'
code_label = 'School Code'
else:
name_col = 'org_name'
code_col = 'org_code'
name_label = 'State Name'
code_label = 'State Code'
# Rename and filter relevant columns
df_display = df.rename(columns={
name_col: name_label,
code_col: code_label,
'cohort_cnt': '# in Cohort',
'grad_pct': '% Graduated',
'in_sch_pct': '% Still in School',
'non_grad_pct': '% Non-Grad Completers',
'ged_pct': '% H.S. Equiv.',
'drpout_pct': '% Dropped Out',
'exclud_pct': '% Permanently Excluded'
})[[name_label, code_label, '# in Cohort', '% Graduated', '% Still in School',
'% Non-Grad Completers', '% H.S. Equiv.', '% Dropped Out', '% Permanently Excluded']]
# Add serial number
df_display.insert(0, "S. No.", range(1, len(df_display) + 1))
# Show table
st.subheader(f"Filtered Results ({len(df_display)} rows)")
st.dataframe(df_display, use_container_width=True, hide_index=True)
# --------------------------
# 📤 Export Buttons
# --------------------------
csv_data = df_display.to_csv(index=False).encode("utf-8")
st.download_button("⬇️ Download CSV", csv_data, file_name="graduation_stats.csv", mime="text/csv")
def generate_pdf(df):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=10)
pdf.cell(200, 10, txt="Graduation Stats Report", ln=True, align="C")
pdf.ln(5)
for _, row in df.iterrows():
line = ", ".join(str(x) for x in row.values)
pdf.multi_cell(0, 8, line)
return pdf.output(dest='S').encode('latin-1')
pdf_bytes = generate_pdf(df_display)
st.download_button("📄 Download PDF", data=pdf_bytes, file_name="graduation_stats.pdf", mime="application/pdf")