add api implementation

2026-04-22 10:41:50 -07:00 · 2025-05-23 12:27:58 -04:00 · 2025-05-23 12:27:58 -04:00 · 5606f7fa69
commit 5606f7fa69
parent e3dd39b5df
114 changed files with 3202 additions and 0 deletions
--- a/python/.env
+++ b/python/.env
@ -0,0 +1 @@
+SOCRATA_APP_TOKEN=cRHtiRLtqRwUC3Zr1TVNNVuxT
--- a/python/app.py
+++ b/python/app.py
@ -0,0 +1,70 @@
+import streamlit as st
+import pandas as pd
+import requests
+from fpdf import FPDF
+
+# Load data
+@st.cache_data
+def load_data():
+    url = "https://educationtocareer.data.mass.gov/resource/n2xa-p822.json"
+    response = requests.get(url)
+    df = pd.DataFrame(response.json())
+    num_cols = ['cohort_cnt', 'grad_pct', 'in_sch_pct', 'non_grad_pct', 'ged_pct', 'drpout_pct', 'exclud_pct']
+    for col in num_cols:
+        df[col] = pd.to_numeric(df[col], errors='coerce')
+    return df
+
+df = load_data()
+
+st.title("📊 Massachusetts Graduation Statistics Dashboard")
+
+# Sidebar filters
+with st.sidebar:
+    st.header("Filter Options")
+    org_type = st.selectbox("Report Type", [""] + sorted(df['org_type'].dropna().unique().tolist()))
+    sy = st.selectbox("School Year", [""] + sorted(df['sy'].dropna().unique().tolist(), reverse=True))
+    grad_rate_type = st.selectbox("Rate Type", [""] + sorted(df['grad_rate_type'].dropna().unique().tolist()))
+    stu_grp = st.selectbox("Student Group", [""] + sorted(df['stu_grp'].dropna().unique().tolist()))
+
+# Apply filters
+filtered = df.copy()
+if org_type: filtered = filtered[filtered['org_type'] == org_type]
+if sy: filtered = filtered[filtered['sy'] == sy]
+if grad_rate_type: filtered = filtered[filtered['grad_rate_type'] == grad_rate_type]
+if stu_grp: filtered = filtered[filtered['stu_grp'] == stu_grp]
+
+# Select columns
+cols = {
+    'dist_name': 'District Name',
+    'dist_code': 'District Code',
+    'cohort_cnt': '# in Cohort',
+    'grad_pct': '% Graduated',
+    'in_sch_pct': '% Still in School',
+    'non_grad_pct': '% Non-Grad Completers',
+    'ged_pct': '% H.S. Equiv.',
+    'drpout_pct': '% Dropped Out',
+    'exclud_pct': '% Permanently Excluded'
+}
+display_df = filtered[list(cols.keys())].rename(columns=cols)
+
+# Display
+st.dataframe(display_df, use_container_width=True)
+
+# Export options
+st.download_button("📥 Download CSV", display_df.to_csv(index=False), "report.csv", "text/csv")
+
+# PDF export
+def export_pdf(df):
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_font("Arial", size=10)
+    pdf.cell(200, 10, txt="Graduation Report", ln=True, align='C')
+    for _, row in df.iterrows():
+        row_text = ', '.join(str(val) for val in row.values)
+        pdf.multi_cell(0, 8, row_text)
+    pdf.output("report.pdf")
+
+if st.button("📄 Export PDF"):
+    export_pdf(display_df)
+    with open("report.pdf", "rb") as f:
+        st.download_button("Download PDF", f, "report.pdf", mime="application/pdf")
--- a/python/app2.py
+++ b/python/app2.py
@ -0,0 +1,142 @@
+import streamlit as st
+st.set_page_config(page_title="Graduation Dashboard", layout="wide")
+
+import pandas as pd
+from sodapy import Socrata
+from fpdf import FPDF
+from io import BytesIO
+import os
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+SOCRATA_APP_TOKEN = os.getenv("SOCRATA_APP_TOKEN")
+
+# Socrata Client
+client = Socrata("educationtocareer.data.mass.gov", SOCRATA_APP_TOKEN)
+
+# --------------------------
+# 🔧 Get Filter Options
+# --------------------------
+@st.cache_data
+def get_filter_options():
+    sample = client.get("n2xa-p822", limit=1000)
+    df = pd.DataFrame.from_records(sample)
+    return {
+        "org_types": sorted(df["org_type"].dropna().unique()),
+        "school_years": sorted(df["sy"].dropna().unique(), reverse=True),
+        "rate_types": sorted(df["grad_rate_type"].dropna().unique()),
+        "student_groups": sorted(df["stu_grp"].dropna().unique())
+    }
+
+filter_options = get_filter_options()
+
+# --------------------------
+# 📥 Load Filtered Data
+# --------------------------
+@st.cache_data(show_spinner="Loading filtered data...")
+def fetch_filtered_data(org_type, sy, grad_rate_type, stu_grp, max_rows=50000):
+    where_clause = (
+        f"org_type='{org_type}' AND sy='{sy}' AND "
+        f"grad_rate_type='{grad_rate_type}' AND stu_grp='{stu_grp}'"
+    )
+
+    all_data = []
+    offset = 0
+
+    while True:
+        rows = client.get("n2xa-p822", where=where_clause, limit=50000, offset=offset)
+        if not rows:
+            break
+        all_data.extend(rows)
+        if len(rows) < 50000 or len(all_data) >= max_rows:
+            break
+        offset += 50000
+
+    df = pd.DataFrame.from_records(all_data)
+
+    numeric_cols = ['cohort_cnt', 'grad_pct', 'in_sch_pct', 'non_grad_pct', 'ged_pct', 'drpout_pct', 'exclud_pct']
+    for col in numeric_cols:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors='coerce')
+        else:
+            df[col] = pd.NA
+
+    return df
+
+# --------------------------
+# 🌐 Streamlit UI
+# --------------------------
+st.title("🎓 Massachusetts Graduation Statistics Dashboard")
+
+# Filters
+st.sidebar.header("🔍 Filters")
+org_type = st.sidebar.selectbox("Report Type", filter_options["org_types"], index=filter_options["org_types"].index("District"))
+sy = st.sidebar.selectbox("School Year", filter_options["school_years"], index=0)
+rate_type = st.sidebar.selectbox("Graduation Rate Type", filter_options["rate_types"], index=0)
+stu_grp = st.sidebar.selectbox("Student Group", filter_options["student_groups"], index=0)
+
+# Load filtered data
+df = fetch_filtered_data(org_type, sy, rate_type, stu_grp)
+
+if df.empty:
+    st.warning("⚠️ No data found for the selected filter combination.")
+    st.stop()
+
+# Dynamic column name logic
+if org_type == "District":
+    name_col = 'dist_name'
+    code_col = 'dist_code'
+    name_label = 'District Name'
+    code_label = 'District Code'
+elif org_type == "School":
+    name_col = 'org_name'
+    code_col = 'org_code'
+    name_label = 'School Name'
+    code_label = 'School Code'
+else:
+    name_col = 'org_name'
+    code_col = 'org_code'
+    name_label = 'State Name'
+    code_label = 'State Code'
+
+# Rename and filter relevant columns
+df_display = df.rename(columns={
+    name_col: name_label,
+    code_col: code_label,
+    'cohort_cnt': '# in Cohort',
+    'grad_pct': '% Graduated',
+    'in_sch_pct': '% Still in School',
+    'non_grad_pct': '% Non-Grad Completers',
+    'ged_pct': '% H.S. Equiv.',
+    'drpout_pct': '% Dropped Out',
+    'exclud_pct': '% Permanently Excluded'
+})[[name_label, code_label, '# in Cohort', '% Graduated', '% Still in School',
+    '% Non-Grad Completers', '% H.S. Equiv.', '% Dropped Out', '% Permanently Excluded']]
+
+# Add serial number
+df_display.insert(0, "S. No.", range(1, len(df_display) + 1))
+
+# Show table
+st.subheader(f"Filtered Results ({len(df_display)} rows)")
+st.dataframe(df_display, use_container_width=True, hide_index=True)
+
+# --------------------------
+# 📤 Export Buttons
+# --------------------------
+csv_data = df_display.to_csv(index=False).encode("utf-8")
+st.download_button("⬇️ Download CSV", csv_data, file_name="graduation_stats.csv", mime="text/csv")
+
+def generate_pdf(df):
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_font("Arial", size=10)
+    pdf.cell(200, 10, txt="Graduation Stats Report", ln=True, align="C")
+    pdf.ln(5)
+    for _, row in df.iterrows():
+        line = ", ".join(str(x) for x in row.values)
+        pdf.multi_cell(0, 8, line)
+    return pdf.output(dest='S').encode('latin-1')
+
+pdf_bytes = generate_pdf(df_display)
+st.download_button("📄 Download PDF", data=pdf_bytes, file_name="graduation_stats.pdf", mime="application/pdf")
--- a/python/report.pdf
+++ b/python/report.pdf