| | """Download real FRED datasets for the time-series visualization app. |
| | |
| | Uses only ``urllib.request`` (stdlib) — no extra dependencies. |
| | |
| | Series |
| | ------ |
| | * OHUR — Ohio Unemployment Rate (%, Monthly, 1976–present) |
| | * OHMFG — Ohio Manufacturing Employment (1000s, SA, Monthly) |
| | * INMFG — Indiana Manufacturing Employment |
| | * MIMFG — Michigan Manufacturing Employment |
| | * TXMFG — Texas Manufacturing Employment |
| | * CAMFG — California Manufacturing Employment |
| | |
| | Run once locally, then commit the resulting CSVs:: |
| | |
| | python scripts/download_fred_data.py |
| | """ |
| |
|
| | from __future__ import annotations |
| |
|
| | import io |
| | import urllib.request |
| | from pathlib import Path |
| |
|
| | import pandas as pd |
| |
|
| | |
| | PROJECT_ROOT = Path(__file__).resolve().parent.parent |
| | DATA_DIR = PROJECT_ROOT / "data" |
| | DATA_DIR.mkdir(parents=True, exist_ok=True) |
| |
|
| | _FRED_CSV_URL = "https://fred.stlouisfed.org/graph/fredgraph.csv?id={series_id}" |
| |
|
| |
|
| | def _fetch_fred(series_id: str) -> pd.DataFrame: |
| | """Download a single FRED series and return a two-column DataFrame.""" |
| | url = _FRED_CSV_URL.format(series_id=series_id) |
| | print(f" Downloading {series_id} …") |
| | with urllib.request.urlopen(url, timeout=30) as resp: |
| | raw = resp.read().decode("utf-8") |
| | df = pd.read_csv(io.StringIO(raw)) |
| | |
| | df.columns = ["date", series_id] |
| | df[series_id] = pd.to_numeric(df[series_id], errors="coerce") |
| | df["date"] = pd.to_datetime(df["date"]) |
| | df = df.dropna() |
| | return df |
| |
|
| |
|
| | |
| | |
| | |
| | def build_ohio_unemployment() -> pd.DataFrame: |
| | df = _fetch_fred("OHUR") |
| | df = df.rename(columns={"OHUR": "unemployment_rate"}) |
| | return df |
| |
|
| |
|
| | |
| | |
| | |
| | _MFG_SERIES = { |
| | "OHMFG": "Ohio", |
| | "INMFG": "Indiana", |
| | "MIMFG": "Michigan", |
| | "TXMFG": "Texas", |
| | "CAMFG": "California", |
| | } |
| |
|
| |
|
| | def build_manufacturing_wide() -> pd.DataFrame: |
| | frames = [] |
| | for sid, state_name in _MFG_SERIES.items(): |
| | df = _fetch_fred(sid) |
| | df = df.rename(columns={sid: state_name}) |
| | frames.append(df) |
| |
|
| | |
| | wide = frames[0] |
| | for f in frames[1:]: |
| | wide = wide.merge(f, on="date", how="inner") |
| |
|
| | wide = wide.sort_values("date").reset_index(drop=True) |
| | return wide |
| |
|
| |
|
| | |
| | |
| | |
| | def build_manufacturing_long(wide: pd.DataFrame) -> pd.DataFrame: |
| | long = wide.melt( |
| | id_vars="date", |
| | var_name="state", |
| | value_name="manufacturing_employment", |
| | ) |
| | long = long.sort_values(["date", "state"]).reset_index(drop=True) |
| | return long |
| |
|
| |
|
| | |
| | |
| | |
| | def main() -> None: |
| | print("=== Downloading FRED data ===\n") |
| |
|
| | |
| | single = build_ohio_unemployment() |
| | out = DATA_DIR / "demo_ohio_unemployment.csv" |
| | single.to_csv(out, index=False) |
| | print(f" -> {len(single)} rows => {out}\n") |
| |
|
| | |
| | wide = build_manufacturing_wide() |
| | out = DATA_DIR / "demo_manufacturing_wide.csv" |
| | wide.to_csv(out, index=False) |
| | print(f" -> {len(wide)} rows => {out}\n") |
| |
|
| | |
| | long = build_manufacturing_long(wide) |
| | out = DATA_DIR / "demo_manufacturing_long.csv" |
| | long.to_csv(out, index=False) |
| | print(f" -> {len(long)} rows => {out}\n") |
| |
|
| | print("Done.") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|