Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,6 +14,14 @@ from langchain.agents import create_agent
|
|
| 14 |
from langchain.tools import tool
|
| 15 |
from langgraph.checkpoint.memory import InMemorySaver
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# ------------------------------------------------------------
|
| 19 |
# 1. Environment configuration
|
|
@@ -27,46 +35,225 @@ from langgraph.checkpoint.memory import InMemorySaver
|
|
| 27 |
|
| 28 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 29 |
MODEL_NAME = os.getenv("MODEL_NAME", "openai:gpt-5.4")
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
# ------------------------------------------------------------
|
| 34 |
-
# 2. Database helpers
|
| 35 |
# ------------------------------------------------------------
|
| 36 |
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
"""
|
| 39 |
-
Resolve the SQLite database path.
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
|
|
|
| 46 |
"""
|
| 47 |
|
| 48 |
-
if
|
| 49 |
-
return
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
|
| 60 |
-
if path.exists():
|
| 61 |
-
return path
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
)
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
def get_database_schema(db_path: Path) -> str:
|
|
@@ -355,6 +542,25 @@ def normalize_history_to_messages(history):
|
|
| 355 |
# 7. Gradio chat function
|
| 356 |
# ------------------------------------------------------------
|
| 357 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
def chat_with_sql_agent(message, history, thread_id):
|
| 359 |
"""
|
| 360 |
Handles one user message from Gradio.
|
|
@@ -481,6 +687,7 @@ The agent can generate SQL, execute read-only queries, and remember follow-up qu
|
|
| 481 |
|
| 482 |
**Model:** `{MODEL_NAME}`
|
| 483 |
**Database:** `{DB_PATH}`
|
|
|
|
| 484 |
"""
|
| 485 |
)
|
| 486 |
|
|
|
|
| 14 |
from langchain.tools import tool
|
| 15 |
from langgraph.checkpoint.memory import InMemorySaver
|
| 16 |
|
| 17 |
+
# Optional Hugging Face ZeroGPU support.
|
| 18 |
+
# This is useful only if you select ZeroGPU hardware in Space settings.
|
| 19 |
+
# For this OpenAI API app, CPU Basic is recommended.
|
| 20 |
+
try:
|
| 21 |
+
import spaces
|
| 22 |
+
except Exception:
|
| 23 |
+
spaces = None
|
| 24 |
+
|
| 25 |
|
| 26 |
# ------------------------------------------------------------
|
| 27 |
# 1. Environment configuration
|
|
|
|
| 35 |
|
| 36 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 37 |
MODEL_NAME = os.getenv("MODEL_NAME", "openai:gpt-5.4")
|
| 38 |
+
|
| 39 |
+
# Download the real Chinook SQLite DB directly from GitHub.
|
| 40 |
+
# This avoids manually uploading the DB file to Hugging Face Spaces.
|
| 41 |
+
CHINOOK_URL = os.getenv(
|
| 42 |
+
"CHINOOK_URL",
|
| 43 |
+
"https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite",
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
# Local runtime path inside the Space.
|
| 47 |
+
# You can override this with DATABASE_PATH if needed.
|
| 48 |
+
DB_PATH = Path(os.getenv("DATABASE_PATH", "Chinook.db"))
|
| 49 |
+
|
| 50 |
+
# ------------------------------------------------------------
|
| 51 |
+
# Optional ZeroGPU mode
|
| 52 |
+
# ------------------------------------------------------------
|
| 53 |
+
# Recommended for this app: CPU Basic or CPU Upgrade.
|
| 54 |
+
#
|
| 55 |
+
# Why?
|
| 56 |
+
# - The LLM is called through the OpenAI API.
|
| 57 |
+
# - The model is not loaded locally on Hugging Face.
|
| 58 |
+
# - SQLite and Gradio do not require GPU.
|
| 59 |
+
#
|
| 60 |
+
# If you selected ZeroGPU hardware and see "no GPU function",
|
| 61 |
+
# keep USE_ZEROGPU=true. The @spaces.GPU decorator tells HF
|
| 62 |
+
# that this function is allowed to request ZeroGPU.
|
| 63 |
+
#
|
| 64 |
+
# If you run on CPU hardware, this can stay true; HF says the
|
| 65 |
+
# decorator is effect-free in non-ZeroGPU environments.
|
| 66 |
+
USE_ZEROGPU = os.getenv("USE_ZEROGPU", "true").strip().lower() in {
|
| 67 |
+
"1",
|
| 68 |
+
"true",
|
| 69 |
+
"yes",
|
| 70 |
+
"y",
|
| 71 |
+
}
|
| 72 |
|
| 73 |
|
| 74 |
# ------------------------------------------------------------
|
| 75 |
+
# 2. Database download + validation helpers
|
| 76 |
# ------------------------------------------------------------
|
| 77 |
|
| 78 |
+
APP_DIR = Path(__file__).resolve().parent
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def is_sqlite_database_file(path: Path) -> bool:
|
| 82 |
+
"""
|
| 83 |
+
A valid SQLite database starts with:
|
| 84 |
+
SQLite format 3\x00
|
| 85 |
"""
|
|
|
|
| 86 |
|
| 87 |
+
if not path.exists() or not path.is_file():
|
| 88 |
+
return False
|
| 89 |
+
|
| 90 |
+
try:
|
| 91 |
+
with open(path, "rb") as file:
|
| 92 |
+
header = file.read(16)
|
| 93 |
+
|
| 94 |
+
return header == b"SQLite format 3\x00"
|
| 95 |
+
|
| 96 |
+
except Exception:
|
| 97 |
+
return False
|
| 98 |
+
|
| 99 |
|
| 100 |
+
def inspect_file_type(path: Path) -> str:
|
| 101 |
+
"""
|
| 102 |
+
Diagnose common file issues.
|
| 103 |
"""
|
| 104 |
|
| 105 |
+
if not path.exists():
|
| 106 |
+
return "missing"
|
| 107 |
|
| 108 |
+
if path.is_dir():
|
| 109 |
+
return "directory"
|
| 110 |
+
|
| 111 |
+
try:
|
| 112 |
+
with open(path, "rb") as file:
|
| 113 |
+
sample = file.read(4096)
|
| 114 |
+
|
| 115 |
+
if sample.startswith(b"SQLite format 3\x00"):
|
| 116 |
+
return "sqlite"
|
| 117 |
+
|
| 118 |
+
if sample.startswith(b"PK"):
|
| 119 |
+
return "zip_file"
|
| 120 |
+
|
| 121 |
+
lower_sample = sample.lower()
|
| 122 |
+
|
| 123 |
+
if b"version https://git-lfs.github.com/spec" in lower_sample:
|
| 124 |
+
return "git_lfs_pointer"
|
| 125 |
+
|
| 126 |
+
if b"<html" in lower_sample or b"<!doctype html" in lower_sample:
|
| 127 |
+
return "html_file"
|
| 128 |
+
|
| 129 |
+
text_sample = sample.decode("utf-8", errors="ignore").lower()
|
| 130 |
+
|
| 131 |
+
sql_markers = [
|
| 132 |
+
"create table",
|
| 133 |
+
"insert into",
|
| 134 |
+
"begin transaction",
|
| 135 |
+
"pragma foreign_keys",
|
| 136 |
+
"drop table",
|
| 137 |
+
]
|
| 138 |
+
|
| 139 |
+
if any(marker in text_sample for marker in sql_markers):
|
| 140 |
+
return "sql_script"
|
| 141 |
+
|
| 142 |
+
return "unknown"
|
| 143 |
+
|
| 144 |
+
except Exception:
|
| 145 |
+
return "unreadable"
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def resolve_runtime_db_path(path: Path) -> Path:
|
| 149 |
+
"""
|
| 150 |
+
Resolve DB path inside Hugging Face Spaces.
|
| 151 |
+
|
| 152 |
+
If DATABASE_PATH is relative, place it relative to the app directory.
|
| 153 |
+
Example:
|
| 154 |
+
DATABASE_PATH=Chinook.db
|
| 155 |
+
becomes:
|
| 156 |
+
/home/user/app/Chinook.db
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
if path.is_absolute():
|
| 160 |
+
return path
|
| 161 |
|
| 162 |
+
return APP_DIR / path
|
|
|
|
|
|
|
| 163 |
|
| 164 |
+
|
| 165 |
+
def download_file(url: str, output_path: Path) -> None:
|
| 166 |
+
"""
|
| 167 |
+
Download file using Python standard library.
|
| 168 |
+
|
| 169 |
+
requests is intentionally avoided to keep requirements simpler.
|
| 170 |
+
"""
|
| 171 |
+
|
| 172 |
+
import urllib.request
|
| 173 |
+
|
| 174 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 175 |
+
|
| 176 |
+
request = urllib.request.Request(
|
| 177 |
+
url,
|
| 178 |
+
headers={
|
| 179 |
+
"User-Agent": "Mozilla/5.0 HuggingFaceSpace SQLite Downloader",
|
| 180 |
+
},
|
| 181 |
)
|
| 182 |
|
| 183 |
+
with urllib.request.urlopen(request, timeout=60) as response:
|
| 184 |
+
content = response.read()
|
| 185 |
+
|
| 186 |
+
output_path.write_bytes(content)
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def download_chinook_database_if_needed() -> Path:
|
| 190 |
+
"""
|
| 191 |
+
Download the real Chinook SQLite database from GitHub if needed.
|
| 192 |
+
|
| 193 |
+
This function fixes:
|
| 194 |
+
- missing DB files
|
| 195 |
+
- corrupted files
|
| 196 |
+
- HTML files saved as DB
|
| 197 |
+
- Git LFS pointer files
|
| 198 |
+
- SQL scripts renamed as .db/.sqlite
|
| 199 |
+
"""
|
| 200 |
+
|
| 201 |
+
runtime_db_path = resolve_runtime_db_path(DB_PATH)
|
| 202 |
+
|
| 203 |
+
if is_sqlite_database_file(runtime_db_path):
|
| 204 |
+
print(f"Using existing valid SQLite database: {runtime_db_path}")
|
| 205 |
+
return runtime_db_path
|
| 206 |
+
|
| 207 |
+
if runtime_db_path.exists():
|
| 208 |
+
existing_type = inspect_file_type(runtime_db_path)
|
| 209 |
+
print(
|
| 210 |
+
f"Existing database path is not valid SQLite: {runtime_db_path}. "
|
| 211 |
+
f"Detected type: {existing_type}. Re-downloading..."
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
try:
|
| 215 |
+
runtime_db_path.unlink()
|
| 216 |
+
except Exception:
|
| 217 |
+
pass
|
| 218 |
+
else:
|
| 219 |
+
print(f"Database not found at {runtime_db_path}. Downloading...")
|
| 220 |
+
|
| 221 |
+
print(f"Downloading Chinook database from: {CHINOOK_URL}")
|
| 222 |
+
download_file(CHINOOK_URL, runtime_db_path)
|
| 223 |
+
|
| 224 |
+
if not is_sqlite_database_file(runtime_db_path):
|
| 225 |
+
detected_type = inspect_file_type(runtime_db_path)
|
| 226 |
|
| 227 |
+
raise sqlite3.DatabaseError(
|
| 228 |
+
f"""
|
| 229 |
+
Downloaded file is not a valid SQLite database.
|
| 230 |
+
|
| 231 |
+
Download URL:
|
| 232 |
+
{CHINOOK_URL}
|
| 233 |
+
|
| 234 |
+
Saved path:
|
| 235 |
+
{runtime_db_path}
|
| 236 |
+
|
| 237 |
+
Detected file type:
|
| 238 |
+
{detected_type}
|
| 239 |
+
|
| 240 |
+
Possible fixes:
|
| 241 |
+
1. Check that CHINOOK_URL points to a raw SQLite file.
|
| 242 |
+
2. Use this default URL:
|
| 243 |
+
https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite
|
| 244 |
+
|
| 245 |
+
3. Do not use a normal GitHub webpage URL.
|
| 246 |
+
4. Do not use a .sql dump URL unless you add SQL conversion logic.
|
| 247 |
+
"""
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
print(f"Successfully downloaded valid SQLite database: {runtime_db_path}")
|
| 251 |
+
print(f"Database size: {runtime_db_path.stat().st_size:,} bytes")
|
| 252 |
+
|
| 253 |
+
return runtime_db_path
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
DB_PATH = download_chinook_database_if_needed()
|
| 257 |
|
| 258 |
|
| 259 |
def get_database_schema(db_path: Path) -> str:
|
|
|
|
| 542 |
# 7. Gradio chat function
|
| 543 |
# ------------------------------------------------------------
|
| 544 |
|
| 545 |
+
def zerogpu_compatible(fn):
|
| 546 |
+
"""
|
| 547 |
+
Optional Hugging Face ZeroGPU wrapper.
|
| 548 |
+
|
| 549 |
+
If ZeroGPU hardware is selected, Hugging Face expects at least
|
| 550 |
+
one function to be decorated with @spaces.GPU.
|
| 551 |
+
|
| 552 |
+
For this app, GPU is not technically required because the LLM runs
|
| 553 |
+
through the OpenAI API. CPU Basic is recommended. This wrapper exists
|
| 554 |
+
only to make the Space compatible with ZeroGPU if selected.
|
| 555 |
+
"""
|
| 556 |
+
|
| 557 |
+
if USE_ZEROGPU and spaces is not None:
|
| 558 |
+
return spaces.GPU(duration=120)(fn)
|
| 559 |
+
|
| 560 |
+
return fn
|
| 561 |
+
|
| 562 |
+
|
| 563 |
+
@zerogpu_compatible
|
| 564 |
def chat_with_sql_agent(message, history, thread_id):
|
| 565 |
"""
|
| 566 |
Handles one user message from Gradio.
|
|
|
|
| 687 |
|
| 688 |
**Model:** `{MODEL_NAME}`
|
| 689 |
**Database:** `{DB_PATH}`
|
| 690 |
+
**Hardware note:** CPU Basic is recommended. ZeroGPU compatibility is enabled for Spaces that require it.
|
| 691 |
"""
|
| 692 |
)
|
| 693 |
|