| | """ |
| | QueryChat initialization and filtered DataFrame helpers. |
| | |
| | Provides convenience wrappers around the ``querychat`` library for |
| | natural-language filtering of time-series DataFrames inside a Gradio |
| | app. All functions degrade gracefully when the package or an API key |
| | is unavailable. |
| | """ |
| |
|
| | from __future__ import annotations |
| |
|
| | import os |
| | from typing import List, Optional |
| |
|
| | import pandas as pd |
| |
|
| | try: |
| | from querychat.gradio import QueryChat as _QueryChat |
| |
|
| | _QUERYCHAT_AVAILABLE = True |
| | except ImportError: |
| | _QUERYCHAT_AVAILABLE = False |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def check_querychat_available() -> bool: |
| | """Return ``True`` when both *querychat* is installed and an API key is set. |
| | |
| | QueryChat requires an ``OPENAI_API_KEY`` environment variable. This |
| | helper lets callers gate UI elements behind a simple boolean. |
| | """ |
| | if not _QUERYCHAT_AVAILABLE: |
| | return False |
| | return bool(os.environ.get("OPENAI_API_KEY")) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def create_querychat( |
| | df: pd.DataFrame, |
| | name: str = "dataset", |
| | date_col: str = "date", |
| | y_cols: Optional[List[str]] = None, |
| | freq_label: str = "", |
| | ): |
| | """Create and return a QueryChat instance bound to *df*. |
| | |
| | Parameters |
| | ---------- |
| | df: |
| | The pandas DataFrame to expose to the chat interface. |
| | name: |
| | A human-readable name for the dataset (used in the description). |
| | date_col: |
| | Name of the date/time column. |
| | y_cols: |
| | Names of the value (numeric) columns. If ``None``, an empty |
| | list is used in the description. |
| | freq_label: |
| | Optional frequency label (e.g. ``"Monthly"``, ``"Daily"``). |
| | |
| | Returns |
| | ------- |
| | QueryChat instance |
| | The object returned by ``QueryChat()``. |
| | |
| | Raises |
| | ------ |
| | RuntimeError |
| | If querychat is not installed. |
| | """ |
| | if not _QUERYCHAT_AVAILABLE: |
| | raise RuntimeError( |
| | "The 'querychat' package is not installed. " |
| | "Install it with: pip install 'querychat[gradio]'" |
| | ) |
| |
|
| | if y_cols is None: |
| | y_cols = [] |
| |
|
| | value_cols_str = ", ".join(y_cols) if y_cols else "none specified" |
| | freq_part = f" Frequency: {freq_label}." if freq_label else "" |
| |
|
| | data_description = ( |
| | f"This dataset is named '{name}'. " |
| | f"It contains {len(df):,} rows. " |
| | f"The date column is '{date_col}'. " |
| | f"Value columns: {value_cols_str}." |
| | f"{freq_part}" |
| | ) |
| |
|
| | |
| | if y_cols: |
| | first_y = y_cols[0] |
| | filter_example = f'- "Filter where {first_y} > median"' |
| | else: |
| | filter_example = '- "Filter where value > 100"' |
| |
|
| | greeting = ( |
| | f"Hi! I can help you filter and explore the **{name}** dataset. " |
| | "Try asking me something like:\n" |
| | '- "Show only the last 5 years"\n' |
| | f"{filter_example}\n" |
| | '- "Show rows from January to March"' |
| | ) |
| |
|
| | qc = _QueryChat( |
| | data_source=df, |
| | table_name=name.replace(" ", "_"), |
| | client="openai/gpt-5.2-2025-12-11", |
| | data_description=data_description, |
| | greeting=greeting, |
| | ) |
| |
|
| | return qc |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def get_filtered_pandas_df(qc, state_dict=None) -> pd.DataFrame: |
| | """Extract the currently filtered DataFrame from a QueryChat instance. |
| | |
| | The underlying ``qc.df()`` may return a *narwhals* DataFrame rather |
| | than a pandas one. This helper transparently converts when needed |
| | and falls back to the original frame on any error. |
| | |
| | Parameters |
| | ---------- |
| | qc: |
| | A QueryChat instance previously created via :func:`create_querychat`. |
| | state_dict: |
| | The Gradio state dictionary from ``qc.ui()``. Required for the |
| | Gradio variant of QueryChat. |
| | |
| | Returns |
| | ------- |
| | pd.DataFrame |
| | The filtered data as a pandas DataFrame. |
| | """ |
| | try: |
| | if state_dict is not None: |
| | result = qc.df(state_dict) |
| | else: |
| | result = qc.df() |
| |
|
| | |
| | if hasattr(result, "to_pandas"): |
| | return result.to_pandas() |
| |
|
| | |
| | if hasattr(result, "to_native"): |
| | native = result.to_native() |
| | if isinstance(native, pd.DataFrame): |
| | return native |
| | return pd.DataFrame(native) |
| |
|
| | |
| | if isinstance(result, pd.DataFrame): |
| | return result |
| |
|
| | |
| | return pd.DataFrame(result) |
| | except Exception: |
| | |
| | |
| | try: |
| | raw = qc.df() if state_dict is None else qc.df(state_dict) |
| | if isinstance(raw, pd.DataFrame): |
| | return raw |
| | except Exception: |
| | pass |
| |
|
| | return pd.DataFrame() |
| |
|