"""Download real FRED datasets for the time-series visualization app. Uses only ``urllib.request`` (stdlib) — no extra dependencies. Series ------ * OHUR — Ohio Unemployment Rate (%, Monthly, 1976–present) * OHMFG — Ohio Manufacturing Employment (1000s, SA, Monthly) * INMFG — Indiana Manufacturing Employment * MIMFG — Michigan Manufacturing Employment * TXMFG — Texas Manufacturing Employment * CAMFG — California Manufacturing Employment Run once locally, then commit the resulting CSVs:: python scripts/download_fred_data.py """ from __future__ import annotations import io import urllib.request from pathlib import Path import pandas as pd # Resolve paths relative to the project root (parent of scripts/) PROJECT_ROOT = Path(__file__).resolve().parent.parent DATA_DIR = PROJECT_ROOT / "data" DATA_DIR.mkdir(parents=True, exist_ok=True) _FRED_CSV_URL = "https://fred.stlouisfed.org/graph/fredgraph.csv?id={series_id}" def _fetch_fred(series_id: str) -> pd.DataFrame: """Download a single FRED series and return a two-column DataFrame.""" url = _FRED_CSV_URL.format(series_id=series_id) print(f" Downloading {series_id} …") with urllib.request.urlopen(url, timeout=30) as resp: # noqa: S310 raw = resp.read().decode("utf-8") df = pd.read_csv(io.StringIO(raw)) # FRED uses "." for missing values — coerce to NaN df.columns = ["date", series_id] df[series_id] = pd.to_numeric(df[series_id], errors="coerce") df["date"] = pd.to_datetime(df["date"]) df = df.dropna() return df # --------------------------------------------------------------------------- # 1. Single series: Ohio Unemployment Rate # --------------------------------------------------------------------------- def build_ohio_unemployment() -> pd.DataFrame: df = _fetch_fred("OHUR") df = df.rename(columns={"OHUR": "unemployment_rate"}) return df # --------------------------------------------------------------------------- # 2. Multi-series wide: Manufacturing Employment by State # --------------------------------------------------------------------------- _MFG_SERIES = { "OHMFG": "Ohio", "INMFG": "Indiana", "MIMFG": "Michigan", "TXMFG": "Texas", "CAMFG": "California", } def build_manufacturing_wide() -> pd.DataFrame: frames = [] for sid, state_name in _MFG_SERIES.items(): df = _fetch_fred(sid) df = df.rename(columns={sid: state_name}) frames.append(df) # Inner-join on date so all states share the same date range wide = frames[0] for f in frames[1:]: wide = wide.merge(f, on="date", how="inner") wide = wide.sort_values("date").reset_index(drop=True) return wide # --------------------------------------------------------------------------- # 3. Multi-series long: same data melted # --------------------------------------------------------------------------- def build_manufacturing_long(wide: pd.DataFrame) -> pd.DataFrame: long = wide.melt( id_vars="date", var_name="state", value_name="manufacturing_employment", ) long = long.sort_values(["date", "state"]).reset_index(drop=True) return long # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main() -> None: print("=== Downloading FRED data ===\n") # 1. Ohio Unemployment single = build_ohio_unemployment() out = DATA_DIR / "demo_ohio_unemployment.csv" single.to_csv(out, index=False) print(f" -> {len(single)} rows => {out}\n") # 2. Manufacturing wide wide = build_manufacturing_wide() out = DATA_DIR / "demo_manufacturing_wide.csv" wide.to_csv(out, index=False) print(f" -> {len(wide)} rows => {out}\n") # 3. Manufacturing long long = build_manufacturing_long(wide) out = DATA_DIR / "demo_manufacturing_long.csv" long.to_csv(out, index=False) print(f" -> {len(long)} rows => {out}\n") print("Done.") if __name__ == "__main__": main()