decodingdatascience commited on
Commit
b001e8e
·
verified ·
1 Parent(s): e57fb6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +231 -24
app.py CHANGED
@@ -14,6 +14,14 @@ from langchain.agents import create_agent
14
  from langchain.tools import tool
15
  from langgraph.checkpoint.memory import InMemorySaver
16
 
 
 
 
 
 
 
 
 
17
 
18
  # ------------------------------------------------------------
19
  # 1. Environment configuration
@@ -27,46 +35,225 @@ from langgraph.checkpoint.memory import InMemorySaver
27
 
28
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
29
  MODEL_NAME = os.getenv("MODEL_NAME", "openai:gpt-5.4")
30
- DATABASE_PATH = Path(os.getenv("DATABASE_PATH", "data/Chinook_Db2.sql"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
 
33
  # ------------------------------------------------------------
34
- # 2. Database helpers
35
  # ------------------------------------------------------------
36
 
37
- def resolve_database_path() -> Path:
 
 
 
 
 
 
38
  """
39
- Resolve the SQLite database path.
40
 
41
- Default:
42
- - data/Chinook_Sqlite.sqlite
 
 
 
 
 
 
 
 
 
 
43
 
44
- You can override it in Hugging Face Spaces with:
45
- DATABASE_PATH=/path/to/your/database.sqlite
 
46
  """
47
 
48
- if DATABASE_PATH.exists():
49
- return DATABASE_PATH
50
 
51
- common_paths = [
52
- Path("Chinook_Sqlite.sqlite"),
53
- Path("chinook.db"),
54
- Path("Chinook.db"),
55
- Path("data/chinook.db"),
56
- Path("data/Chinook.db"),
57
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- for path in common_paths:
60
- if path.exists():
61
- return path
62
 
63
- raise FileNotFoundError(
64
- "SQLite database file was not found. "
65
- "Upload your database file or set DATABASE_PATH in Hugging Face Variables."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  )
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- DB_PATH = resolve_database_path()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
  def get_database_schema(db_path: Path) -> str:
@@ -355,6 +542,25 @@ def normalize_history_to_messages(history):
355
  # 7. Gradio chat function
356
  # ------------------------------------------------------------
357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  def chat_with_sql_agent(message, history, thread_id):
359
  """
360
  Handles one user message from Gradio.
@@ -481,6 +687,7 @@ The agent can generate SQL, execute read-only queries, and remember follow-up qu
481
 
482
  **Model:** `{MODEL_NAME}`
483
  **Database:** `{DB_PATH}`
 
484
  """
485
  )
486
 
 
14
  from langchain.tools import tool
15
  from langgraph.checkpoint.memory import InMemorySaver
16
 
17
+ # Optional Hugging Face ZeroGPU support.
18
+ # This is useful only if you select ZeroGPU hardware in Space settings.
19
+ # For this OpenAI API app, CPU Basic is recommended.
20
+ try:
21
+ import spaces
22
+ except Exception:
23
+ spaces = None
24
+
25
 
26
  # ------------------------------------------------------------
27
  # 1. Environment configuration
 
35
 
36
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
37
  MODEL_NAME = os.getenv("MODEL_NAME", "openai:gpt-5.4")
38
+
39
+ # Download the real Chinook SQLite DB directly from GitHub.
40
+ # This avoids manually uploading the DB file to Hugging Face Spaces.
41
+ CHINOOK_URL = os.getenv(
42
+ "CHINOOK_URL",
43
+ "https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite",
44
+ )
45
+
46
+ # Local runtime path inside the Space.
47
+ # You can override this with DATABASE_PATH if needed.
48
+ DB_PATH = Path(os.getenv("DATABASE_PATH", "Chinook.db"))
49
+
50
+ # ------------------------------------------------------------
51
+ # Optional ZeroGPU mode
52
+ # ------------------------------------------------------------
53
+ # Recommended for this app: CPU Basic or CPU Upgrade.
54
+ #
55
+ # Why?
56
+ # - The LLM is called through the OpenAI API.
57
+ # - The model is not loaded locally on Hugging Face.
58
+ # - SQLite and Gradio do not require GPU.
59
+ #
60
+ # If you selected ZeroGPU hardware and see "no GPU function",
61
+ # keep USE_ZEROGPU=true. The @spaces.GPU decorator tells HF
62
+ # that this function is allowed to request ZeroGPU.
63
+ #
64
+ # If you run on CPU hardware, this can stay true; HF says the
65
+ # decorator is effect-free in non-ZeroGPU environments.
66
+ USE_ZEROGPU = os.getenv("USE_ZEROGPU", "true").strip().lower() in {
67
+ "1",
68
+ "true",
69
+ "yes",
70
+ "y",
71
+ }
72
 
73
 
74
  # ------------------------------------------------------------
75
+ # 2. Database download + validation helpers
76
  # ------------------------------------------------------------
77
 
78
+ APP_DIR = Path(__file__).resolve().parent
79
+
80
+
81
+ def is_sqlite_database_file(path: Path) -> bool:
82
+ """
83
+ A valid SQLite database starts with:
84
+ SQLite format 3\x00
85
  """
 
86
 
87
+ if not path.exists() or not path.is_file():
88
+ return False
89
+
90
+ try:
91
+ with open(path, "rb") as file:
92
+ header = file.read(16)
93
+
94
+ return header == b"SQLite format 3\x00"
95
+
96
+ except Exception:
97
+ return False
98
+
99
 
100
+ def inspect_file_type(path: Path) -> str:
101
+ """
102
+ Diagnose common file issues.
103
  """
104
 
105
+ if not path.exists():
106
+ return "missing"
107
 
108
+ if path.is_dir():
109
+ return "directory"
110
+
111
+ try:
112
+ with open(path, "rb") as file:
113
+ sample = file.read(4096)
114
+
115
+ if sample.startswith(b"SQLite format 3\x00"):
116
+ return "sqlite"
117
+
118
+ if sample.startswith(b"PK"):
119
+ return "zip_file"
120
+
121
+ lower_sample = sample.lower()
122
+
123
+ if b"version https://git-lfs.github.com/spec" in lower_sample:
124
+ return "git_lfs_pointer"
125
+
126
+ if b"<html" in lower_sample or b"<!doctype html" in lower_sample:
127
+ return "html_file"
128
+
129
+ text_sample = sample.decode("utf-8", errors="ignore").lower()
130
+
131
+ sql_markers = [
132
+ "create table",
133
+ "insert into",
134
+ "begin transaction",
135
+ "pragma foreign_keys",
136
+ "drop table",
137
+ ]
138
+
139
+ if any(marker in text_sample for marker in sql_markers):
140
+ return "sql_script"
141
+
142
+ return "unknown"
143
+
144
+ except Exception:
145
+ return "unreadable"
146
+
147
+
148
+ def resolve_runtime_db_path(path: Path) -> Path:
149
+ """
150
+ Resolve DB path inside Hugging Face Spaces.
151
+
152
+ If DATABASE_PATH is relative, place it relative to the app directory.
153
+ Example:
154
+ DATABASE_PATH=Chinook.db
155
+ becomes:
156
+ /home/user/app/Chinook.db
157
+ """
158
+
159
+ if path.is_absolute():
160
+ return path
161
 
162
+ return APP_DIR / path
 
 
163
 
164
+
165
+ def download_file(url: str, output_path: Path) -> None:
166
+ """
167
+ Download file using Python standard library.
168
+
169
+ requests is intentionally avoided to keep requirements simpler.
170
+ """
171
+
172
+ import urllib.request
173
+
174
+ output_path.parent.mkdir(parents=True, exist_ok=True)
175
+
176
+ request = urllib.request.Request(
177
+ url,
178
+ headers={
179
+ "User-Agent": "Mozilla/5.0 HuggingFaceSpace SQLite Downloader",
180
+ },
181
  )
182
 
183
+ with urllib.request.urlopen(request, timeout=60) as response:
184
+ content = response.read()
185
+
186
+ output_path.write_bytes(content)
187
+
188
+
189
+ def download_chinook_database_if_needed() -> Path:
190
+ """
191
+ Download the real Chinook SQLite database from GitHub if needed.
192
+
193
+ This function fixes:
194
+ - missing DB files
195
+ - corrupted files
196
+ - HTML files saved as DB
197
+ - Git LFS pointer files
198
+ - SQL scripts renamed as .db/.sqlite
199
+ """
200
+
201
+ runtime_db_path = resolve_runtime_db_path(DB_PATH)
202
+
203
+ if is_sqlite_database_file(runtime_db_path):
204
+ print(f"Using existing valid SQLite database: {runtime_db_path}")
205
+ return runtime_db_path
206
+
207
+ if runtime_db_path.exists():
208
+ existing_type = inspect_file_type(runtime_db_path)
209
+ print(
210
+ f"Existing database path is not valid SQLite: {runtime_db_path}. "
211
+ f"Detected type: {existing_type}. Re-downloading..."
212
+ )
213
+
214
+ try:
215
+ runtime_db_path.unlink()
216
+ except Exception:
217
+ pass
218
+ else:
219
+ print(f"Database not found at {runtime_db_path}. Downloading...")
220
+
221
+ print(f"Downloading Chinook database from: {CHINOOK_URL}")
222
+ download_file(CHINOOK_URL, runtime_db_path)
223
+
224
+ if not is_sqlite_database_file(runtime_db_path):
225
+ detected_type = inspect_file_type(runtime_db_path)
226
 
227
+ raise sqlite3.DatabaseError(
228
+ f"""
229
+ Downloaded file is not a valid SQLite database.
230
+
231
+ Download URL:
232
+ {CHINOOK_URL}
233
+
234
+ Saved path:
235
+ {runtime_db_path}
236
+
237
+ Detected file type:
238
+ {detected_type}
239
+
240
+ Possible fixes:
241
+ 1. Check that CHINOOK_URL points to a raw SQLite file.
242
+ 2. Use this default URL:
243
+ https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite
244
+
245
+ 3. Do not use a normal GitHub webpage URL.
246
+ 4. Do not use a .sql dump URL unless you add SQL conversion logic.
247
+ """
248
+ )
249
+
250
+ print(f"Successfully downloaded valid SQLite database: {runtime_db_path}")
251
+ print(f"Database size: {runtime_db_path.stat().st_size:,} bytes")
252
+
253
+ return runtime_db_path
254
+
255
+
256
+ DB_PATH = download_chinook_database_if_needed()
257
 
258
 
259
  def get_database_schema(db_path: Path) -> str:
 
542
  # 7. Gradio chat function
543
  # ------------------------------------------------------------
544
 
545
+ def zerogpu_compatible(fn):
546
+ """
547
+ Optional Hugging Face ZeroGPU wrapper.
548
+
549
+ If ZeroGPU hardware is selected, Hugging Face expects at least
550
+ one function to be decorated with @spaces.GPU.
551
+
552
+ For this app, GPU is not technically required because the LLM runs
553
+ through the OpenAI API. CPU Basic is recommended. This wrapper exists
554
+ only to make the Space compatible with ZeroGPU if selected.
555
+ """
556
+
557
+ if USE_ZEROGPU and spaces is not None:
558
+ return spaces.GPU(duration=120)(fn)
559
+
560
+ return fn
561
+
562
+
563
+ @zerogpu_compatible
564
  def chat_with_sql_agent(message, history, thread_id):
565
  """
566
  Handles one user message from Gradio.
 
687
 
688
  **Model:** `{MODEL_NAME}`
689
  **Database:** `{DB_PATH}`
690
+ **Hardware note:** CPU Basic is recommended. ZeroGPU compatibility is enabled for Spaces that require it.
691
  """
692
  )
693