prithivMLmods commited on
Commit
b34685a
·
verified ·
1 Parent(s): eb6d2ca

update app

Browse files
Files changed (1) hide show
  1. app.py +327 -275
app.py CHANGED
@@ -77,7 +77,6 @@ MODEL_MAP = {
77
  "coreOCR-7B-050325-preview": (processor_k, model_k),
78
  "SpaceOm-3B": (processor_y, model_y),
79
  }
80
-
81
  MODEL_CHOICES = list(MODEL_MAP.keys())
82
 
83
  image_examples = [
@@ -89,6 +88,12 @@ image_examples = [
89
  ]
90
 
91
 
 
 
 
 
 
 
92
  def pil_to_data_url(img: Image.Image, fmt="PNG"):
93
  buf = BytesIO()
94
  img.save(buf, format=fmt)
@@ -147,22 +152,25 @@ EXAMPLE_CARDS_HTML = build_example_cards_html()
147
 
148
  def load_example_data(idx_str):
149
  try:
150
- idx = int(float(idx_str))
151
  except Exception:
152
- return json.dumps({"status": "error", "message": "Invalid example index"})
 
153
  if idx < 0 or idx >= len(image_examples):
154
- return json.dumps({"status": "error", "message": "Example index out of range"})
 
155
  ex = image_examples[idx]
156
  media_b64 = file_to_data_url(ex["media"])
157
  if not media_b64:
158
- return json.dumps({"status": "error", "message": "Could not load example image"})
159
- return json.dumps({
 
160
  "status": "ok",
161
  "query": ex["query"],
162
  "media": media_b64,
163
  "model": ex["model"],
164
  "name": os.path.basename(ex["media"]),
165
- })
166
 
167
 
168
  def b64_to_pil(b64_str):
@@ -179,88 +187,135 @@ def b64_to_pil(b64_str):
179
  return None
180
 
181
 
182
- def calc_timeout_image(model_name, text, image, max_new_tokens, temperature, top_p, top_k, repetition_penalty, gpu_timeout):
 
 
 
183
  try:
184
  return int(gpu_timeout)
185
  except Exception:
186
  return 60
187
 
188
 
189
- @spaces.GPU(duration=calc_timeout_image)
190
- def generate_image(model_name, text, image, max_new_tokens=1024, temperature=0.6, top_p=0.9, top_k=50, repetition_penalty=1.2, gpu_timeout=60):
191
- if not model_name or model_name not in MODEL_MAP:
192
- raise gr.Error("Please select a valid model.")
193
- if image is None:
194
- raise gr.Error("Please upload an image.")
195
- if not text or not str(text).strip():
196
- raise gr.Error("Please enter your instruction.")
197
- if len(str(text)) > MAX_INPUT_TOKEN_LENGTH * 8:
198
- raise gr.Error("Query is too long. Please shorten your input.")
199
-
200
- processor, model = MODEL_MAP[model_name]
201
-
202
- messages = [{
203
- "role": "user",
204
- "content": [
205
- {"type": "image"},
206
- {"type": "text", "text": text},
207
- ]
208
- }]
209
-
210
- prompt_full = processor.apply_chat_template(
211
- messages,
212
- tokenize=False,
213
- add_generation_prompt=True
214
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
- inputs = processor(
217
- text=[prompt_full],
218
- images=[image],
219
- return_tensors="pt",
220
- padding=True,
221
- truncation=True,
222
- max_length=MAX_INPUT_TOKEN_LENGTH
223
- ).to(device)
224
-
225
- streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
226
- generation_kwargs = {
227
- **inputs,
228
- "streamer": streamer,
229
- "max_new_tokens": int(max_new_tokens),
230
- "do_sample": True,
231
- "temperature": float(temperature),
232
- "top_p": float(top_p),
233
- "top_k": int(top_k),
234
- "repetition_penalty": float(repetition_penalty),
235
- }
 
 
 
 
 
 
 
 
 
 
 
236
 
237
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
238
- thread.start()
239
-
240
- buffer = ""
241
- for new_text in streamer:
242
- buffer += new_text.replace("<|im_end|>", "")
243
- time.sleep(0.01)
244
- yield buffer
245
-
246
- gc.collect()
247
- if torch.cuda.is_available():
248
- torch.cuda.empty_cache()
249
-
250
-
251
- def run_inference(model_name, text, image_b64, max_new_tokens_v, temperature_v, top_p_v, top_k_v, repetition_penalty_v, gpu_timeout_v):
252
- image = b64_to_pil(image_b64)
253
- yield from generate_image(
254
- model_name=model_name,
255
- text=text,
256
- image=image,
257
- max_new_tokens=max_new_tokens_v,
258
- temperature=temperature_v,
259
- top_p=top_p_v,
260
- top_k=top_k_v,
261
- repetition_penalty=repetition_penalty_v,
262
- gpu_timeout=gpu_timeout_v,
263
- )
264
 
265
 
266
  def noop():
@@ -288,7 +343,7 @@ footer{display:none!important}
288
 
289
  .app-shell{
290
  background:#18181b;border:1px solid #27272a;border-radius:16px;
291
- margin:12px auto;max-width:1400px;overflow:hidden;
292
  box-shadow:0 25px 50px -12px rgba(0,0,0,.6),0 0 0 1px rgba(255,255,255,.03);
293
  }
294
  .app-header{
@@ -297,9 +352,9 @@ footer{display:none!important}
297
  }
298
  .app-header-left{display:flex;align-items:center;gap:12px}
299
  .app-logo{
300
- width:38px;height:38px;background:linear-gradient(135deg,#0000FF,#335CFF,#6680FF);
301
  border-radius:10px;display:flex;align-items:center;justify-content:center;
302
- box-shadow:0 4px 12px rgba(0,0,255,.35);
303
  }
304
  .app-logo svg{width:22px;height:22px;fill:#fff;flex-shrink:0}
305
  .app-title{
@@ -308,9 +363,9 @@ footer{display:none!important}
308
  }
309
  .app-badge{
310
  font-size:11px;font-weight:600;padding:3px 10px;border-radius:20px;
311
- background:rgba(0,0,255,.12);color:#8aa2ff;border:1px solid rgba(0,0,255,.25);letter-spacing:.3px;
312
  }
313
- .app-badge.fast{background:rgba(51,92,255,.10);color:#7f9cff;border:1px solid rgba(51,92,255,.22)}
314
 
315
  .model-tabs-bar{
316
  background:#18181b;border-bottom:1px solid #27272a;padding:10px 16px;
@@ -322,74 +377,64 @@ footer{display:none!important}
322
  border-radius:999px;cursor:pointer;font-size:12px;font-weight:600;padding:0 12px;
323
  color:#ffffff!important;transition:all .15s ease;
324
  }
325
- .model-tab:hover{background:rgba(0,0,255,.12);border-color:rgba(0,0,255,.35)}
326
- .model-tab.active{background:rgba(0,0,255,.22);border-color:#0000FF;color:#fff!important;box-shadow:0 0 0 2px rgba(0,0,255,.10)}
327
  .model-tab-label{font-size:12px;color:#ffffff!important;font-weight:600}
328
 
329
  .app-main-row{display:flex;gap:0;flex:1;overflow:hidden}
330
  .app-main-left{flex:1;display:flex;flex-direction:column;min-width:0;border-right:1px solid #27272a}
331
- .app-main-right{width:470px;display:flex;flex-direction:column;flex-shrink:0;background:#18181b}
332
 
333
  #media-drop-zone{
334
- position:relative;background:#09090b;height:440px;min-height:440px;max-height:440px;
335
- overflow:hidden;
336
  }
337
- #media-drop-zone.drag-over{outline:2px solid #0000FF;outline-offset:-2px;background:rgba(0,0,255,.04)}
338
  .upload-prompt-modern{
339
- position:absolute;inset:0;display:flex;align-items:center;justify-content:center;
340
- padding:20px;z-index:20;overflow:hidden;
341
  }
342
  .upload-click-area{
343
- display:flex;flex-direction:column;align-items:center;justify-content:center;
344
- cursor:pointer;padding:28px 36px;max-width:92%;max-height:92%;
345
- border:2px dashed #3f3f46;border-radius:16px;
346
- background:rgba(0,0,255,.03);transition:all .2s ease;gap:8px;text-align:center;
347
- overflow:hidden;
348
  }
349
- .upload-click-area:hover{background:rgba(0,0,255,.08);border-color:#0000FF;transform:scale(1.02)}
350
- .upload-click-area:active{background:rgba(0,0,255,.12);transform:scale(.99)}
351
  .upload-click-area svg{width:86px;height:86px;max-width:100%;flex-shrink:0}
352
  .upload-main-text{color:#a1a1aa;font-size:14px;font-weight:600;margin-top:4px}
353
  .upload-sub-text{color:#71717a;font-size:12px}
354
 
355
  .single-preview-wrap{
356
- width:100%;height:100%;display:none;align-items:center;justify-content:center;padding:16px;
357
- overflow:hidden;
358
  }
359
  .single-preview-card{
360
- width:100%;height:100%;max-width:100%;max-height:100%;border-radius:14px;
361
- overflow:hidden;border:1px solid #27272a;background:#111114;
362
  display:flex;align-items:center;justify-content:center;position:relative;
363
  }
364
  .single-preview-card img{
365
- width:100%;height:100%;max-width:100%;max-height:100%;
366
- object-fit:contain;display:block;background:#000;
367
  }
368
  .preview-overlay-actions{
369
  position:absolute;top:12px;right:12px;display:flex;gap:8px;z-index:5;
370
  }
371
  .preview-action-btn{
372
- display:inline-flex;align-items:center;justify-content:center;
373
- min-width:34px;height:34px;padding:0 12px;background:rgba(0,0,0,.65);
374
- border:1px solid rgba(255,255,255,.14);border-radius:10px;cursor:pointer;
375
- color:#fff!important;font-size:12px;font-weight:600;transition:all .15s ease;
376
  }
377
- .preview-action-btn:hover{background:#0000FF;border-color:#0000FF}
378
 
379
  .hint-bar{
380
- background:rgba(0,0,255,.06);border-top:1px solid #27272a;border-bottom:1px solid #27272a;
381
  padding:10px 20px;font-size:13px;color:#a1a1aa;line-height:1.7;
382
  }
383
- .hint-bar b{color:#8aa2ff;font-weight:600}
384
  .hint-bar kbd{
385
- display:inline-block;padding:1px 6px;background:#27272a;border:1px solid #3f3f46;
386
- border-radius:4px;font-family:'JetBrains Mono',monospace;font-size:11px;color:#a1a1aa;
387
  }
388
 
389
  .examples-section{border-top:1px solid #27272a;padding:12px 16px}
390
  .examples-title{
391
- font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;
392
- letter-spacing:.8px;margin-bottom:10px;
393
  }
394
  .examples-scroll{display:flex;gap:10px;overflow-x:auto;padding-bottom:8px}
395
  .examples-scroll::-webkit-scrollbar{height:6px}
@@ -397,46 +442,39 @@ footer{display:none!important}
397
  .examples-scroll::-webkit-scrollbar-thumb{background:#27272a;border-radius:3px}
398
  .examples-scroll::-webkit-scrollbar-thumb:hover{background:#3f3f46}
399
  .example-card{
400
- position:relative;
401
- flex-shrink:0;width:220px;background:#09090b;border:1px solid #27272a;
402
- border-radius:10px;overflow:hidden;cursor:pointer;transition:all .2s ease;
403
  }
404
- .example-card:hover{border-color:#0000FF;transform:translateY(-2px);box-shadow:0 4px 12px rgba(0,0,255,.15)}
405
  .example-card.loading{opacity:.5;pointer-events:none}
406
  .example-thumb-wrap{height:120px;overflow:hidden;background:#18181b;position:relative}
407
  .example-thumb-wrap img{width:100%;height:100%;object-fit:cover}
408
  .example-media-chip{
409
- position:absolute;top:8px;left:8px;
410
- display:inline-flex;padding:3px 7px;background:rgba(0,0,0,.7);border:1px solid rgba(255,255,255,.12);
411
  border-radius:999px;font-size:10px;font-weight:700;color:#fff;letter-spacing:.5px;
412
  }
413
  .example-thumb-placeholder{
414
- width:100%;height:100%;display:flex;align-items:center;justify-content:center;
415
- background:#18181b;color:#3f3f46;font-size:11px;
416
  }
417
  .example-meta-row{padding:6px 10px;display:flex;align-items:center;gap:6px}
418
  .example-badge{
419
- display:inline-flex;padding:2px 7px;background:rgba(0,0,255,.12);border-radius:4px;
420
- font-size:10px;font-weight:600;color:#8aa2ff;font-family:'JetBrains Mono',monospace;white-space:nowrap;
421
  }
422
  .example-prompt-text{
423
- padding:0 10px 8px;font-size:11px;color:#a1a1aa;line-height:1.4;
424
- display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;overflow:hidden;
425
  }
426
 
427
  .panel-card{border-bottom:1px solid #27272a}
428
  .panel-card-title{
429
- padding:12px 20px;font-size:12px;font-weight:600;color:#71717a;
430
- text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);
431
  }
432
  .panel-card-body{padding:16px 20px;display:flex;flex-direction:column;gap:8px}
433
  .modern-label{font-size:13px;font-weight:500;color:#a1a1aa;margin-bottom:4px;display:block}
434
  .modern-textarea{
435
- width:100%;background:#09090b;border:1px solid #27272a;border-radius:8px;
436
- padding:10px 14px;font-family:'Inter',sans-serif;font-size:14px;color:#e4e4e7;
437
  resize:none;outline:none;min-height:100px;transition:border-color .2s;
438
  }
439
- .modern-textarea:focus{border-color:#0000FF;box-shadow:0 0 0 3px rgba(0,0,255,.15)}
440
  .modern-textarea::placeholder{color:#3f3f46}
441
  .modern-textarea.error-flash{
442
  border-color:#ef4444!important;box-shadow:0 0 0 3px rgba(239,68,68,.2)!important;animation:shake .4s ease;
@@ -444,132 +482,103 @@ footer{display:none!important}
444
  @keyframes shake{0%,100%{transform:translateX(0)}20%,60%{transform:translateX(-4px)}40%,80%{transform:translateX(4px)}}
445
 
446
  .toast-notification{
447
- position:fixed;top:24px;left:50%;transform:translateX(-50%) translateY(-120%);
448
- z-index:9999;padding:10px 24px;border-radius:10px;font-family:'Inter',sans-serif;
449
- font-size:14px;font-weight:600;display:flex;align-items:center;gap:8px;
450
- box-shadow:0 8px 24px rgba(0,0,0,.5);
451
  transition:transform .35s cubic-bezier(.34,1.56,.64,1),opacity .35s ease;opacity:0;pointer-events:none;
452
  }
453
  .toast-notification.visible{transform:translateX(-50%) translateY(0);opacity:1;pointer-events:auto}
454
  .toast-notification.error{background:linear-gradient(135deg,#dc2626,#b91c1c);color:#fff;border:1px solid rgba(255,255,255,.15)}
455
  .toast-notification.warning{background:linear-gradient(135deg,#d97706,#b45309);color:#fff;border:1px solid rgba(255,255,255,.15)}
456
- .toast-notification.info{background:linear-gradient(135deg,#1d4ed8,#1e40af);color:#fff;border:1px solid rgba(255,255,255,.15)}
457
  .toast-notification .toast-icon{font-size:16px;line-height:1}
458
  .toast-notification .toast-text{line-height:1.3}
459
 
460
  .btn-run{
461
- display:flex;align-items:center;justify-content:center;gap:8px;width:100%;
462
- background:linear-gradient(135deg,#0000FF,#003DCC);border:none;border-radius:10px;
463
- padding:12px 24px;cursor:pointer;font-size:15px;font-weight:600;font-family:'Inter',sans-serif;
464
- color:#ffffff!important;-webkit-text-fill-color:#ffffff!important;
465
- transition:all .2s ease;letter-spacing:-.2px;
466
- box-shadow:0 4px 16px rgba(0,0,255,.3),inset 0 1px 0 rgba(255,255,255,.1);
467
  }
468
  .btn-run:hover{
469
- background:linear-gradient(135deg,#335CFF,#0000FF);transform:translateY(-1px);
470
- box-shadow:0 6px 24px rgba(0,0,255,.45),inset 0 1px 0 rgba(255,255,255,.15);
471
  }
472
- .btn-run:active{transform:translateY(0);box-shadow:0 2px 8px rgba(0,0,255,.3)}
473
  #custom-run-btn,#custom-run-btn *,#run-btn-label,.btn-run,.btn-run *{
474
  color:#ffffff!important;-webkit-text-fill-color:#ffffff!important;fill:#ffffff!important;
475
  }
476
 
477
  .output-frame{border-bottom:1px solid #27272a;display:flex;flex-direction:column;position:relative}
478
- .output-frame .out-title,
479
- .output-frame .out-title *,
480
- #output-title-label{
481
- color:#ffffff!important;
482
- -webkit-text-fill-color:#ffffff!important;
483
  }
484
  .output-frame .out-title{
485
- padding:10px 20px;font-size:13px;font-weight:700;
486
- text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);
487
  display:flex;align-items:center;justify-content:space-between;gap:8px;flex-wrap:wrap;
488
  }
489
  .out-title-right{display:flex;gap:8px;align-items:center}
490
  .out-action-btn{
491
- display:inline-flex;align-items:center;justify-content:center;background:rgba(0,0,255,.1);
492
- border:1px solid rgba(0,0,255,.2);border-radius:6px;cursor:pointer;padding:3px 10px;
493
- font-size:11px;font-weight:500;color:#8aa2ff!important;gap:4px;height:24px;transition:all .15s;
494
  }
495
- .out-action-btn:hover{background:rgba(0,0,255,.2);border-color:rgba(0,0,255,.35);color:#ffffff!important}
496
- .out-action-btn svg{width:12px;height:12px;fill:#8aa2ff}
497
  .output-frame .out-body{
498
- flex:1;background:#09090b;display:flex;align-items:stretch;justify-content:stretch;
499
- overflow:hidden;min-height:320px;position:relative;
500
- }
501
- .output-scroll-wrap{
502
- width:100%;height:100%;padding:0;overflow:hidden;
503
  }
 
504
  .output-textarea{
505
- width:100%;height:320px;min-height:320px;max-height:320px;background:#09090b;color:#e4e4e7;
506
- border:none;outline:none;padding:16px 18px;font-size:13px;line-height:1.6;
507
  font-family:'JetBrains Mono',monospace;overflow:auto;resize:none;white-space:pre-wrap;
508
  }
509
  .output-textarea::placeholder{color:#52525b}
510
- .output-textarea.error-flash{
511
- box-shadow:inset 0 0 0 2px rgba(239,68,68,.6);
512
- }
513
  .modern-loader{
514
- display:none;position:absolute;top:0;left:0;right:0;bottom:0;background:rgba(9,9,11,.92);
515
- z-index:15;flex-direction:column;align-items:center;justify-content:center;gap:16px;backdrop-filter:blur(4px);
516
  }
517
  .modern-loader.active{display:flex}
518
  .modern-loader .loader-spinner{
519
- width:36px;height:36px;border:3px solid #27272a;border-top-color:#0000FF;
520
- border-radius:50%;animation:spin .8s linear infinite;
521
  }
522
  @keyframes spin{to{transform:rotate(360deg)}}
523
  .modern-loader .loader-text{font-size:13px;color:#a1a1aa;font-weight:500}
524
  .loader-bar-track{width:200px;height:4px;background:#27272a;border-radius:2px;overflow:hidden}
525
  .loader-bar-fill{
526
- height:100%;background:linear-gradient(90deg,#0000FF,#6680FF,#0000FF);
527
- background-size:200% 100%;animation:shimmer 1.5s ease-in-out infinite;border-radius:2px;
528
  }
529
  @keyframes shimmer{0%{background-position:200% 0}100%{background-position:-200% 0}}
530
 
531
  .settings-group{border:1px solid #27272a;border-radius:10px;margin:12px 16px;padding:0;overflow:hidden}
532
  .settings-group-title{
533
- font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;
534
- padding:10px 16px;border-bottom:1px solid #27272a;background:rgba(24,24,27,.5);
535
  }
536
  .settings-group-body{padding:14px 16px;display:flex;flex-direction:column;gap:12px}
537
  .slider-row{display:flex;align-items:center;gap:10px;min-height:28px}
538
  .slider-row label{font-size:13px;font-weight:500;color:#a1a1aa;min-width:118px;flex-shrink:0}
539
  .slider-row input[type="range"]{
540
- flex:1;-webkit-appearance:none;appearance:none;height:6px;background:#27272a;
541
- border-radius:3px;outline:none;min-width:0;
542
  }
543
  .slider-row input[type="range"]::-webkit-slider-thumb{
544
- -webkit-appearance:none;width:16px;height:16px;background:linear-gradient(135deg,#0000FF,#003DCC);
545
- border-radius:50%;cursor:pointer;box-shadow:0 2px 6px rgba(0,0,255,.4);transition:transform .15s;
546
  }
547
  .slider-row input[type="range"]::-webkit-slider-thumb:hover{transform:scale(1.2)}
548
  .slider-row input[type="range"]::-moz-range-thumb{
549
- width:16px;height:16px;background:linear-gradient(135deg,#0000FF,#003DCC);
550
- border-radius:50%;cursor:pointer;border:none;box-shadow:0 2px 6px rgba(0,0,255,.4);
551
  }
552
  .slider-row .slider-val{
553
- min-width:58px;text-align:right;font-family:'JetBrains Mono',monospace;font-size:12px;
554
- font-weight:500;padding:3px 8px;background:#09090b;border:1px solid #27272a;
555
- border-radius:6px;color:#a1a1aa;flex-shrink:0;
556
  }
557
 
558
  .app-statusbar{
559
- background:#18181b;border-top:1px solid #27272a;padding:6px 20px;
560
- display:flex;gap:12px;height:34px;align-items:center;font-size:12px;
561
  }
562
  .app-statusbar .sb-section{
563
- padding:0 12px;flex:1;display:flex;align-items:center;font-family:'JetBrains Mono',monospace;
564
- font-size:12px;color:#52525b;overflow:hidden;white-space:nowrap;
565
  }
566
  .app-statusbar .sb-section.sb-fixed{
567
- flex:0 0 auto;min-width:110px;text-align:center;justify-content:center;
568
- padding:3px 12px;background:rgba(0,0,255,.08);border-radius:6px;color:#8aa2ff;font-weight:500;
569
  }
570
 
571
  .exp-note{padding:10px 20px;font-size:12px;color:#52525b;border-top:1px solid #27272a;text-align:center}
572
- .exp-note a{color:#8aa2ff;text-decoration:none}
573
  .exp-note a:hover{text-decoration:underline}
574
 
575
  ::-webkit-scrollbar{width:8px;height:8px}
@@ -587,7 +596,7 @@ footer{display:none!important}
587
  gallery_js = r"""
588
  () => {
589
  function init() {
590
- if (window.__visionScopeInitDone) return;
591
 
592
  const dropZone = document.getElementById('media-drop-zone');
593
  const uploadPrompt = document.getElementById('upload-prompt');
@@ -607,7 +616,7 @@ function init() {
607
  return;
608
  }
609
 
610
- window.__visionScopeInitDone = true;
611
  let mediaState = null;
612
  let toastTimer = null;
613
  let examplePoller = null;
@@ -634,7 +643,6 @@ function init() {
634
  toast.classList.add('visible');
635
  toastTimer = setTimeout(() => toast.classList.remove('visible'), 3500);
636
  }
637
- window.__showToast = showToast;
638
 
639
  function showLoader() {
640
  const l = document.getElementById('output-loader');
@@ -648,8 +656,16 @@ function init() {
648
  const sb = document.getElementById('sb-run-state');
649
  if (sb) sb.textContent = 'Done';
650
  }
651
- window.__showLoader = showLoader;
 
 
 
 
 
 
652
  window.__hideLoader = hideLoader;
 
 
653
 
654
  function flashPromptError() {
655
  promptInput.classList.add('error-flash');
@@ -672,23 +688,23 @@ function init() {
672
 
673
  function setGradioValue(containerId, value) {
674
  const container = document.getElementById(containerId);
675
- if (!container) return;
676
- container.querySelectorAll('input, textarea').forEach(el => {
677
- if (el.type === 'file' || el.type === 'range' || el.type === 'checkbox') return;
678
- const proto = el.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
679
- const ns = Object.getOwnPropertyDescriptor(proto, 'value');
680
- if (ns && ns.set) {
681
- ns.set.call(el, value);
682
- el.dispatchEvent(new Event('input', {bubbles:true, composed:true}));
683
- el.dispatchEvent(new Event('change', {bubbles:true, composed:true}));
684
- }
685
- });
 
686
  }
687
 
688
  function syncImageToGradio() {
689
  setGradioValue('hidden-image-b64', mediaState ? mediaState.b64 : '');
690
- const txt = mediaState ? '1 image uploaded' : 'No image uploaded';
691
- if (mediaStatus) mediaStatus.textContent = txt;
692
  }
693
 
694
  function syncPromptToGradio() {
@@ -709,18 +725,17 @@ function init() {
709
  return;
710
  }
711
 
712
- previewImg.src = mediaState.b64;
713
- previewImg.style.display = 'block';
714
  previewWrap.style.display = 'flex';
715
  if (uploadPrompt) uploadPrompt.style.display = 'none';
 
 
716
  syncImageToGradio();
717
  }
718
 
719
- function setPreview(b64, name) {
720
- mediaState = {b64, name: name || 'file'};
721
  renderPreview();
722
  }
723
- window.__setPreview = setPreview;
724
 
725
  function clearPreview() {
726
  mediaState = null;
@@ -735,7 +750,7 @@ function init() {
735
  return;
736
  }
737
  const reader = new FileReader();
738
- reader.onload = (e) => setPreview(e.target.result, file.name);
739
  reader.readAsDataURL(file);
740
  }
741
 
@@ -771,6 +786,7 @@ function init() {
771
  });
772
  syncModelToGradio(name);
773
  }
 
774
  window.__activateModelTab = activateModelTab;
775
 
776
  document.querySelectorAll('.model-tab[data-model]').forEach(btn => {
@@ -807,8 +823,8 @@ function init() {
807
 
808
  function validateBeforeRun() {
809
  const promptVal = promptInput.value.trim();
810
- if (!mediaState && !promptVal) {
811
- showToast('Please upload an image and enter your instruction', 'error');
812
  flashPromptError();
813
  return false;
814
  }
@@ -816,11 +832,6 @@ function init() {
816
  showToast('Please upload an image', 'error');
817
  return false;
818
  }
819
- if (!promptVal) {
820
- showToast('Please enter your instruction', 'warning');
821
- flashPromptError();
822
- return false;
823
- }
824
  const currentModel = (document.querySelector('.model-tab.active') || {}).dataset?.model;
825
  if (!currentModel) {
826
  showToast('Please select a model', 'error');
@@ -839,7 +850,12 @@ function init() {
839
  showLoader();
840
  setTimeout(() => {
841
  const gradioBtn = document.getElementById('gradio-run-btn');
842
- if (!gradioBtn) return;
 
 
 
 
 
843
  const btn = gradioBtn.querySelector('button');
844
  if (btn) btn.click(); else gradioBtn.click();
845
  }, 180);
@@ -891,22 +907,26 @@ function init() {
891
  function applyExamplePayload(raw) {
892
  try {
893
  const data = JSON.parse(raw);
894
- if (data.status === 'ok') {
895
- if (data.media) setPreview(data.media, data.name || 'example_file');
896
- if (data.query) {
897
- promptInput.value = data.query;
898
- syncPromptToGradio();
899
- }
900
- if (data.model) activateModelTab(data.model);
901
- document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
902
- showToast('Example loaded', 'info');
903
- } else if (data.status === 'error') {
904
- document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
905
- showToast(data.message || 'Failed to load example', 'error');
906
  }
 
 
 
 
 
 
 
 
 
 
 
907
  } catch (e) {
908
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
909
- showToast('Failed to parse example data', 'error');
910
  }
911
  }
912
 
@@ -923,33 +943,54 @@ function init() {
923
  applyExamplePayload(current);
924
  return;
925
  }
926
- if (attempts >= 80) {
927
  clearInterval(examplePoller);
928
  examplePoller = null;
929
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
930
  showToast('Example load timed out', 'error');
931
  }
932
- }, 150);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
933
  }
934
 
935
  document.querySelectorAll('.example-card[data-idx]').forEach(card => {
936
  card.addEventListener('click', () => {
937
  const idx = card.getAttribute('data-idx');
 
938
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
939
  card.classList.add('loading');
940
  showToast('Loading example...', 'info');
941
-
942
- setGradioValue('example-result-data', '');
943
- setGradioValue('example-idx-input', idx);
944
-
945
- setTimeout(() => {
946
- const btn = document.getElementById('example-load-btn');
947
- if (btn) {
948
- const b = btn.querySelector('button');
949
- if (b) b.click(); else btn.click();
950
- }
951
- startExamplePolling();
952
- }, 220);
953
  });
954
  });
955
 
@@ -957,14 +998,13 @@ function init() {
957
  if (observerTarget) {
958
  const obs = new MutationObserver(() => {
959
  const current = getValueFromContainer('example-result-data');
960
- if (current && current !== lastSeenExamplePayload) {
961
- lastSeenExamplePayload = current;
962
- if (examplePoller) {
963
- clearInterval(examplePoller);
964
- examplePoller = null;
965
- }
966
- applyExamplePayload(current);
967
  }
 
968
  });
969
  obs.observe(observerTarget, {childList:true, subtree:true, characterData:true, attributes:true});
970
  }
@@ -987,6 +1027,10 @@ function watchOutputs() {
987
 
988
  let lastText = '';
989
 
 
 
 
 
990
  function syncOutput() {
991
  const el = resultContainer.querySelector('textarea') || resultContainer.querySelector('input');
992
  if (!el) return;
@@ -995,7 +1039,15 @@ function watchOutputs() {
995
  lastText = val;
996
  outArea.value = val;
997
  outArea.scrollTop = outArea.scrollHeight;
998
- if (window.__hideLoader && val.trim()) window.__hideLoader();
 
 
 
 
 
 
 
 
999
  }
1000
  }
1001
 
@@ -1007,17 +1059,17 @@ watchOutputs();
1007
  }
1008
  """
1009
 
1010
- VISION_LOGO_SVG = """
1011
  <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
1012
- <path d="M12 5C6.5 5 2.1 8.4 1 12c1.1 3.6 5.5 7 11 7s9.9-3.4 11-7c-1.1-3.6-5.5-7-11-7Zm0 11a4 4 0 1 1 0-8 4 4 0 0 1 0 8Zm0-2.2A1.8 1.8 0 1 0 12 10a1.8 1.8 0 0 0 0 3.6Z" fill="white"/>
1013
  </svg>
1014
  """
1015
 
1016
  UPLOAD_PREVIEW_SVG = """
1017
  <svg viewBox="0 0 80 80" fill="none" xmlns="http://www.w3.org/2000/svg">
1018
- <rect x="8" y="14" width="64" height="52" rx="6" fill="none" stroke="#0000FF" stroke-width="2" stroke-dasharray="4 3"/>
1019
- <polygon points="12,62 30,40 42,50 54,34 68,62" fill="rgba(0,0,255,0.15)" stroke="#0000FF" stroke-width="1.5"/>
1020
- <circle cx="28" cy="30" r="6" fill="rgba(0,0,255,0.2)" stroke="#0000FF" stroke-width="1.5"/>
1021
  </svg>
1022
  """
1023
 
@@ -1051,7 +1103,7 @@ with gr.Blocks() as demo:
1051
  <div class="app-shell">
1052
  <div class="app-header">
1053
  <div class="app-header-left">
1054
- <div class="app-logo">{VISION_LOGO_SVG}</div>
1055
  <span class="app-title">VisionScope R2</span>
1056
  <span class="app-badge">vision enabled</span>
1057
  <span class="app-badge fast">Image Inference</span>
@@ -1069,7 +1121,7 @@ with gr.Blocks() as demo:
1069
  <div id="upload-click-area" class="upload-click-area">
1070
  {UPLOAD_PREVIEW_SVG}
1071
  <span id="upload-main-text" class="upload-main-text">Click or drag an image here</span>
1072
- <span id="upload-sub-text" class="upload-sub-text">Upload one document, page, receipt, screenshot, or scene image for vision tasks</span>
1073
  </div>
1074
  </div>
1075
 
@@ -1087,8 +1139,8 @@ with gr.Blocks() as demo:
1087
  </div>
1088
 
1089
  <div class="hint-bar">
1090
- <b>Upload:</b> Click or drag an image into the panel &nbsp;&middot;&nbsp;
1091
- <b>Model:</b> Change models from the header &nbsp;&middot;&nbsp;
1092
  <kbd>Clear</kbd> removes the current image
1093
  </div>
1094
 
@@ -1102,10 +1154,10 @@ with gr.Blocks() as demo:
1102
 
1103
  <div class="app-main-right">
1104
  <div class="panel-card">
1105
- <div class="panel-card-title">Vision Instruction</div>
1106
  <div class="panel-card-body">
1107
- <label class="modern-label" for="custom-query-input">Query Input</label>
1108
- <textarea id="custom-query-input" class="modern-textarea" rows="4" placeholder="e.g., describe the scene, read the handwriting, extract visible text, estimate distance..."></textarea>
1109
  </div>
1110
  </div>
1111
 
@@ -1174,7 +1226,7 @@ with gr.Blocks() as demo:
1174
  </div>
1175
 
1176
  <div class="exp-note">
1177
- Experimental vision suite &middot; Open on <a href="https://github.com/PRITHIVSAKTHIUR/VisionScope-R2" target="_blank">GitHub</a>
1178
  </div>
1179
 
1180
  <div class="app-statusbar">
@@ -1190,7 +1242,7 @@ with gr.Blocks() as demo:
1190
  demo.load(fn=noop, inputs=None, outputs=None, js=wire_outputs_js)
1191
 
1192
  run_btn.click(
1193
- fn=run_inference,
1194
  inputs=[
1195
  hidden_model_name,
1196
  prompt,
 
77
  "coreOCR-7B-050325-preview": (processor_k, model_k),
78
  "SpaceOm-3B": (processor_y, model_y),
79
  }
 
80
  MODEL_CHOICES = list(MODEL_MAP.keys())
81
 
82
  image_examples = [
 
88
  ]
89
 
90
 
91
+ def select_model(model_name: str):
92
+ if model_name not in MODEL_MAP:
93
+ raise ValueError("Invalid model selected.")
94
+ return MODEL_MAP[model_name]
95
+
96
+
97
  def pil_to_data_url(img: Image.Image, fmt="PNG"):
98
  buf = BytesIO()
99
  img.save(buf, format=fmt)
 
152
 
153
  def load_example_data(idx_str):
154
  try:
155
+ idx = int(str(idx_str).strip())
156
  except Exception:
157
+ return gr.update(value="")
158
+
159
  if idx < 0 or idx >= len(image_examples):
160
+ return gr.update(value="")
161
+
162
  ex = image_examples[idx]
163
  media_b64 = file_to_data_url(ex["media"])
164
  if not media_b64:
165
+ return gr.update(value=json.dumps({"status": "error", "message": "Could not load example image"}))
166
+
167
+ return gr.update(value=json.dumps({
168
  "status": "ok",
169
  "query": ex["query"],
170
  "media": media_b64,
171
  "model": ex["model"],
172
  "name": os.path.basename(ex["media"]),
173
+ }))
174
 
175
 
176
  def b64_to_pil(b64_str):
 
187
  return None
188
 
189
 
190
+ def calc_timeout_generic(*args, **kwargs):
191
+ gpu_timeout = kwargs.get("gpu_timeout", None)
192
+ if gpu_timeout is None and args:
193
+ gpu_timeout = args[-1]
194
  try:
195
  return int(gpu_timeout)
196
  except Exception:
197
  return 60
198
 
199
 
200
+ @spaces.GPU(duration=calc_timeout_generic)
201
+ def generate_image(model_name: str, text: str, image: Image.Image,
202
+ max_new_tokens: int = 1024, temperature: float = 0.6,
203
+ top_p: float = 0.9, top_k: int = 50,
204
+ repetition_penalty: float = 1.2, gpu_timeout: int = 60):
205
+ try:
206
+ if not model_name or model_name not in MODEL_MAP:
207
+ yield "[ERROR] Please select a valid model."
208
+ return
209
+ if image is None:
210
+ yield "[ERROR] Please upload an image."
211
+ return
212
+ if not text or not str(text).strip():
213
+ yield "[ERROR] Please enter your instruction."
214
+ return
215
+ if len(str(text)) > MAX_INPUT_TOKEN_LENGTH * 8:
216
+ yield "[ERROR] Query is too long. Please shorten your input."
217
+ return
218
+
219
+ processor, model = select_model(model_name)
220
+
221
+ messages = [{
222
+ "role": "user",
223
+ "content": [
224
+ {"type": "image"},
225
+ {"type": "text", "text": text},
226
+ ]
227
+ }]
228
+
229
+ prompt_full = processor.apply_chat_template(
230
+ messages,
231
+ tokenize=False,
232
+ add_generation_prompt=True
233
+ )
234
+
235
+ inputs = processor(
236
+ text=[prompt_full],
237
+ images=[image],
238
+ return_tensors="pt",
239
+ padding=True,
240
+ truncation=True,
241
+ max_length=MAX_INPUT_TOKEN_LENGTH
242
+ ).to(device)
243
+
244
+ streamer = TextIteratorStreamer(
245
+ processor.tokenizer if hasattr(processor, "tokenizer") else processor,
246
+ skip_prompt=True,
247
+ skip_special_tokens=True
248
+ )
249
+
250
+ generation_error = {"error": None}
251
+
252
+ generation_kwargs = {
253
+ **inputs,
254
+ "streamer": streamer,
255
+ "max_new_tokens": int(max_new_tokens),
256
+ "do_sample": True,
257
+ "temperature": float(temperature),
258
+ "top_p": float(top_p),
259
+ "top_k": int(top_k),
260
+ "repetition_penalty": float(repetition_penalty),
261
+ }
262
 
263
+ def _run_generation():
264
+ try:
265
+ model.generate(**generation_kwargs)
266
+ except Exception as e:
267
+ generation_error["error"] = e
268
+ try:
269
+ streamer.end()
270
+ except Exception:
271
+ pass
272
+
273
+ thread = Thread(target=_run_generation, daemon=True)
274
+ thread.start()
275
+
276
+ buffer = ""
277
+ for new_text in streamer:
278
+ buffer += new_text.replace("<|im_end|>", "")
279
+ time.sleep(0.01)
280
+ yield buffer
281
+
282
+ thread.join(timeout=1.0)
283
+
284
+ if generation_error["error"] is not None:
285
+ err_msg = f"[ERROR] Inference failed: {str(generation_error['error'])}"
286
+ if buffer.strip():
287
+ yield buffer + "\n\n" + err_msg
288
+ else:
289
+ yield err_msg
290
+ return
291
+
292
+ if not buffer.strip():
293
+ yield "[ERROR] No output was generated."
294
 
295
+ except Exception as e:
296
+ yield f"[ERROR] {str(e)}"
297
+ finally:
298
+ gc.collect()
299
+ if torch.cuda.is_available():
300
+ torch.cuda.empty_cache()
301
+
302
+
303
+ def run_router(model_name, text, image_b64, max_new_tokens_v, temperature_v, top_p_v, top_k_v, repetition_penalty_v, gpu_timeout_v):
304
+ try:
305
+ image = b64_to_pil(image_b64)
306
+ yield from generate_image(
307
+ model_name=model_name,
308
+ text=text,
309
+ image=image,
310
+ max_new_tokens=max_new_tokens_v,
311
+ temperature=temperature_v,
312
+ top_p=top_p_v,
313
+ top_k=top_k_v,
314
+ repetition_penalty=repetition_penalty_v,
315
+ gpu_timeout=gpu_timeout_v,
316
+ )
317
+ except Exception as e:
318
+ yield f"[ERROR] {str(e)}"
 
 
 
319
 
320
 
321
  def noop():
 
343
 
344
  .app-shell{
345
  background:#18181b;border:1px solid #27272a;border-radius:16px;
346
+ margin:12px auto;max-width:1450px;overflow:hidden;
347
  box-shadow:0 25px 50px -12px rgba(0,0,0,.6),0 0 0 1px rgba(255,255,255,.03);
348
  }
349
  .app-header{
 
352
  }
353
  .app-header-left{display:flex;align-items:center;gap:12px}
354
  .app-logo{
355
+ width:38px;height:38px;background:linear-gradient(135deg,#0000CD,#2645ff,#5876ff);
356
  border-radius:10px;display:flex;align-items:center;justify-content:center;
357
+ box-shadow:0 4px 12px rgba(0,0,205,.35);
358
  }
359
  .app-logo svg{width:22px;height:22px;fill:#fff;flex-shrink:0}
360
  .app-title{
 
363
  }
364
  .app-badge{
365
  font-size:11px;font-weight:600;padding:3px 10px;border-radius:20px;
366
+ background:rgba(0,0,205,.12);color:#8da1ff;border:1px solid rgba(0,0,205,.25);letter-spacing:.3px;
367
  }
368
+ .app-badge.fast{background:rgba(38,69,255,.10);color:#90a3ff;border:1px solid rgba(38,69,255,.22)}
369
 
370
  .model-tabs-bar{
371
  background:#18181b;border-bottom:1px solid #27272a;padding:10px 16px;
 
377
  border-radius:999px;cursor:pointer;font-size:12px;font-weight:600;padding:0 12px;
378
  color:#ffffff!important;transition:all .15s ease;
379
  }
380
+ .model-tab:hover{background:rgba(0,0,205,.12);border-color:rgba(0,0,205,.35)}
381
+ .model-tab.active{background:rgba(0,0,205,.22);border-color:#0000CD;color:#fff!important;box-shadow:0 0 0 2px rgba(0,0,205,.10)}
382
  .model-tab-label{font-size:12px;color:#ffffff!important;font-weight:600}
383
 
384
  .app-main-row{display:flex;gap:0;flex:1;overflow:hidden}
385
  .app-main-left{flex:1;display:flex;flex-direction:column;min-width:0;border-right:1px solid #27272a}
386
+ .app-main-right{width:500px;display:flex;flex-direction:column;flex-shrink:0;background:#18181b}
387
 
388
  #media-drop-zone{
389
+ position:relative;background:#09090b;height:440px;min-height:440px;max-height:440px;overflow:hidden;
 
390
  }
391
+ #media-drop-zone.drag-over{outline:2px solid #0000CD;outline-offset:-2px;background:rgba(0,0,205,.04)}
392
  .upload-prompt-modern{
393
+ position:absolute;inset:0;display:flex;align-items:center;justify-content:center;padding:20px;z-index:20;overflow:hidden;
 
394
  }
395
  .upload-click-area{
396
+ display:flex;flex-direction:column;align-items:center;justify-content:center;cursor:pointer;
397
+ padding:28px 36px;max-width:92%;max-height:92%;border:2px dashed #3f3f46;border-radius:16px;
398
+ background:rgba(0,0,205,.03);transition:all .2s ease;gap:8px;text-align:center;overflow:hidden;
 
 
399
  }
400
+ .upload-click-area:hover{background:rgba(0,0,205,.08);border-color:#0000CD;transform:scale(1.02)}
401
+ .upload-click-area:active{background:rgba(0,0,205,.12);transform:scale(.99)}
402
  .upload-click-area svg{width:86px;height:86px;max-width:100%;flex-shrink:0}
403
  .upload-main-text{color:#a1a1aa;font-size:14px;font-weight:600;margin-top:4px}
404
  .upload-sub-text{color:#71717a;font-size:12px}
405
 
406
  .single-preview-wrap{
407
+ width:100%;height:100%;display:none;align-items:center;justify-content:center;padding:16px;overflow:hidden;
 
408
  }
409
  .single-preview-card{
410
+ width:100%;height:100%;max-width:100%;max-height:100%;border-radius:14px;overflow:hidden;border:1px solid #27272a;background:#111114;
 
411
  display:flex;align-items:center;justify-content:center;position:relative;
412
  }
413
  .single-preview-card img{
414
+ width:100%;height:100%;max-width:100%;max-height:100%;object-fit:contain;display:block;background:#000;border:none;
 
415
  }
416
  .preview-overlay-actions{
417
  position:absolute;top:12px;right:12px;display:flex;gap:8px;z-index:5;
418
  }
419
  .preview-action-btn{
420
+ display:inline-flex;align-items:center;justify-content:center;min-width:34px;height:34px;padding:0 12px;background:rgba(0,0,0,.65);
421
+ border:1px solid rgba(255,255,255,.14);border-radius:10px;cursor:pointer;color:#fff!important;font-size:12px;font-weight:600;transition:all .15s ease;
 
 
422
  }
423
+ .preview-action-btn:hover{background:#0000CD;border-color:#0000CD}
424
 
425
  .hint-bar{
426
+ background:rgba(0,0,205,.06);border-top:1px solid #27272a;border-bottom:1px solid #27272a;
427
  padding:10px 20px;font-size:13px;color:#a1a1aa;line-height:1.7;
428
  }
429
+ .hint-bar b{color:#8da1ff;font-weight:600}
430
  .hint-bar kbd{
431
+ display:inline-block;padding:1px 6px;background:#27272a;border:1px solid #3f3f46;border-radius:4px;
432
+ font-family:'JetBrains Mono',monospace;font-size:11px;color:#a1a1aa;
433
  }
434
 
435
  .examples-section{border-top:1px solid #27272a;padding:12px 16px}
436
  .examples-title{
437
+ font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;margin-bottom:10px;
 
438
  }
439
  .examples-scroll{display:flex;gap:10px;overflow-x:auto;padding-bottom:8px}
440
  .examples-scroll::-webkit-scrollbar{height:6px}
 
442
  .examples-scroll::-webkit-scrollbar-thumb{background:#27272a;border-radius:3px}
443
  .examples-scroll::-webkit-scrollbar-thumb:hover{background:#3f3f46}
444
  .example-card{
445
+ position:relative;flex-shrink:0;width:220px;background:#09090b;border:1px solid #27272a;border-radius:10px;overflow:hidden;cursor:pointer;transition:all .2s ease;
 
 
446
  }
447
+ .example-card:hover{border-color:#0000CD;transform:translateY(-2px);box-shadow:0 4px 12px rgba(0,0,205,.15)}
448
  .example-card.loading{opacity:.5;pointer-events:none}
449
  .example-thumb-wrap{height:120px;overflow:hidden;background:#18181b;position:relative}
450
  .example-thumb-wrap img{width:100%;height:100%;object-fit:cover}
451
  .example-media-chip{
452
+ position:absolute;top:8px;left:8px;display:inline-flex;padding:3px 7px;background:rgba(0,0,0,.7);border:1px solid rgba(255,255,255,.12);
 
453
  border-radius:999px;font-size:10px;font-weight:700;color:#fff;letter-spacing:.5px;
454
  }
455
  .example-thumb-placeholder{
456
+ width:100%;height:100%;display:flex;align-items:center;justify-content:center;background:#18181b;color:#3f3f46;font-size:11px;
 
457
  }
458
  .example-meta-row{padding:6px 10px;display:flex;align-items:center;gap:6px}
459
  .example-badge{
460
+ display:inline-flex;padding:2px 7px;background:rgba(0,0,205,.12);border-radius:4px;font-size:10px;font-weight:600;color:#8da1ff;
461
+ font-family:'JetBrains Mono',monospace;white-space:nowrap;
462
  }
463
  .example-prompt-text{
464
+ padding:0 10px 8px;font-size:11px;color:#a1a1aa;line-height:1.4;display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;overflow:hidden;
 
465
  }
466
 
467
  .panel-card{border-bottom:1px solid #27272a}
468
  .panel-card-title{
469
+ padding:12px 20px;font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);
 
470
  }
471
  .panel-card-body{padding:16px 20px;display:flex;flex-direction:column;gap:8px}
472
  .modern-label{font-size:13px;font-weight:500;color:#a1a1aa;margin-bottom:4px;display:block}
473
  .modern-textarea{
474
+ width:100%;background:#09090b;border:1px solid #27272a;border-radius:8px;padding:10px 14px;font-family:'Inter',sans-serif;font-size:14px;color:#e4e4e7;
 
475
  resize:none;outline:none;min-height:100px;transition:border-color .2s;
476
  }
477
+ .modern-textarea:focus{border-color:#0000CD;box-shadow:0 0 0 3px rgba(0,0,205,.15)}
478
  .modern-textarea::placeholder{color:#3f3f46}
479
  .modern-textarea.error-flash{
480
  border-color:#ef4444!important;box-shadow:0 0 0 3px rgba(239,68,68,.2)!important;animation:shake .4s ease;
 
482
  @keyframes shake{0%,100%{transform:translateX(0)}20%,60%{transform:translateX(-4px)}40%,80%{transform:translateX(4px)}}
483
 
484
  .toast-notification{
485
+ position:fixed;top:24px;left:50%;transform:translateX(-50%) translateY(-120%);z-index:9999;padding:10px 24px;border-radius:10px;
486
+ font-family:'Inter',sans-serif;font-size:14px;font-weight:600;display:flex;align-items:center;gap:8px;box-shadow:0 8px 24px rgba(0,0,0,.5);
 
 
487
  transition:transform .35s cubic-bezier(.34,1.56,.64,1),opacity .35s ease;opacity:0;pointer-events:none;
488
  }
489
  .toast-notification.visible{transform:translateX(-50%) translateY(0);opacity:1;pointer-events:auto}
490
  .toast-notification.error{background:linear-gradient(135deg,#dc2626,#b91c1c);color:#fff;border:1px solid rgba(255,255,255,.15)}
491
  .toast-notification.warning{background:linear-gradient(135deg,#d97706,#b45309);color:#fff;border:1px solid rgba(255,255,255,.15)}
492
+ .toast-notification.info{background:linear-gradient(135deg,#1e40af,#1d4ed8);color:#fff;border:1px solid rgba(255,255,255,.15)}
493
  .toast-notification .toast-icon{font-size:16px;line-height:1}
494
  .toast-notification .toast-text{line-height:1.3}
495
 
496
  .btn-run{
497
+ display:flex;align-items:center;justify-content:center;gap:8px;width:100%;background:linear-gradient(135deg,#0000CD,#1638b7);border:none;border-radius:10px;
498
+ padding:12px 24px;cursor:pointer;font-size:15px;font-weight:600;font-family:'Inter',sans-serif;color:#ffffff!important;-webkit-text-fill-color:#ffffff!important;
499
+ transition:all .2s ease;letter-spacing:-.2px;box-shadow:0 4px 16px rgba(0,0,205,.3),inset 0 1px 0 rgba(255,255,255,.1);
 
 
 
500
  }
501
  .btn-run:hover{
502
+ background:linear-gradient(135deg,#2645ff,#0000CD);transform:translateY(-1px);box-shadow:0 6px 24px rgba(0,0,205,.45),inset 0 1px 0 rgba(255,255,255,.15);
 
503
  }
504
+ .btn-run:active{transform:translateY(0);box-shadow:0 2px 8px rgba(0,0,205,.3)}
505
  #custom-run-btn,#custom-run-btn *,#run-btn-label,.btn-run,.btn-run *{
506
  color:#ffffff!important;-webkit-text-fill-color:#ffffff!important;fill:#ffffff!important;
507
  }
508
 
509
  .output-frame{border-bottom:1px solid #27272a;display:flex;flex-direction:column;position:relative}
510
+ .output-frame .out-title,.output-frame .out-title *,#output-title-label{
511
+ color:#ffffff!important;-webkit-text-fill-color:#ffffff!important;
 
 
 
512
  }
513
  .output-frame .out-title{
514
+ padding:10px 20px;font-size:13px;font-weight:700;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);
 
515
  display:flex;align-items:center;justify-content:space-between;gap:8px;flex-wrap:wrap;
516
  }
517
  .out-title-right{display:flex;gap:8px;align-items:center}
518
  .out-action-btn{
519
+ display:inline-flex;align-items:center;justify-content:center;background:rgba(0,0,205,.1);border:1px solid rgba(0,0,205,.2);border-radius:6px;cursor:pointer;padding:3px 10px;
520
+ font-size:11px;font-weight:500;color:#8da1ff!important;gap:4px;height:24px;transition:all .15s;
 
521
  }
522
+ .out-action-btn:hover{background:rgba(0,0,205,.2);border-color:rgba(0,0,205,.35);color:#ffffff!important}
523
+ .out-action-btn svg{width:12px;height:12px;fill:#8da1ff}
524
  .output-frame .out-body{
525
+ flex:1;background:#09090b;display:flex;align-items:stretch;justify-content:stretch;overflow:hidden;min-height:320px;position:relative;
 
 
 
 
526
  }
527
+ .output-scroll-wrap{width:100%;height:100%;padding:0;overflow:hidden}
528
  .output-textarea{
529
+ width:100%;height:320px;min-height:320px;max-height:320px;background:#09090b;color:#e4e4e7;border:none;outline:none;padding:16px 18px;font-size:13px;line-height:1.6;
 
530
  font-family:'JetBrains Mono',monospace;overflow:auto;resize:none;white-space:pre-wrap;
531
  }
532
  .output-textarea::placeholder{color:#52525b}
533
+ .output-textarea.error-flash{box-shadow:inset 0 0 0 2px rgba(239,68,68,.6)}
 
 
534
  .modern-loader{
535
+ display:none;position:absolute;top:0;left:0;right:0;bottom:0;background:rgba(9,9,11,.92);z-index:15;flex-direction:column;align-items:center;justify-content:center;gap:16px;backdrop-filter:blur(4px);
 
536
  }
537
  .modern-loader.active{display:flex}
538
  .modern-loader .loader-spinner{
539
+ width:36px;height:36px;border:3px solid #27272a;border-top-color:#0000CD;border-radius:50%;animation:spin .8s linear infinite;
 
540
  }
541
  @keyframes spin{to{transform:rotate(360deg)}}
542
  .modern-loader .loader-text{font-size:13px;color:#a1a1aa;font-weight:500}
543
  .loader-bar-track{width:200px;height:4px;background:#27272a;border-radius:2px;overflow:hidden}
544
  .loader-bar-fill{
545
+ height:100%;background:linear-gradient(90deg,#0000CD,#4d6dff,#0000CD);background-size:200% 100%;animation:shimmer 1.5s ease-in-out infinite;border-radius:2px;
 
546
  }
547
  @keyframes shimmer{0%{background-position:200% 0}100%{background-position:-200% 0}}
548
 
549
  .settings-group{border:1px solid #27272a;border-radius:10px;margin:12px 16px;padding:0;overflow:hidden}
550
  .settings-group-title{
551
+ font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;padding:10px 16px;border-bottom:1px solid #27272a;background:rgba(24,24,27,.5);
 
552
  }
553
  .settings-group-body{padding:14px 16px;display:flex;flex-direction:column;gap:12px}
554
  .slider-row{display:flex;align-items:center;gap:10px;min-height:28px}
555
  .slider-row label{font-size:13px;font-weight:500;color:#a1a1aa;min-width:118px;flex-shrink:0}
556
  .slider-row input[type="range"]{
557
+ flex:1;-webkit-appearance:none;appearance:none;height:6px;background:#27272a;border-radius:3px;outline:none;min-width:0;
 
558
  }
559
  .slider-row input[type="range"]::-webkit-slider-thumb{
560
+ -webkit-appearance:none;width:16px;height:16px;background:linear-gradient(135deg,#0000CD,#1638b7);border-radius:50%;cursor:pointer;box-shadow:0 2px 6px rgba(0,0,205,.4);transition:transform .15s;
 
561
  }
562
  .slider-row input[type="range"]::-webkit-slider-thumb:hover{transform:scale(1.2)}
563
  .slider-row input[type="range"]::-moz-range-thumb{
564
+ width:16px;height:16px;background:linear-gradient(135deg,#0000CD,#1638b7);border-radius:50%;cursor:pointer;border:none;box-shadow:0 2px 6px rgba(0,0,205,.4);
 
565
  }
566
  .slider-row .slider-val{
567
+ min-width:58px;text-align:right;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:500;padding:3px 8px;background:#09090b;border:1px solid #27272a;border-radius:6px;color:#a1a1aa;flex-shrink:0;
 
 
568
  }
569
 
570
  .app-statusbar{
571
+ background:#18181b;border-top:1px solid #27272a;padding:6px 20px;display:flex;gap:12px;height:34px;align-items:center;font-size:12px;
 
572
  }
573
  .app-statusbar .sb-section{
574
+ padding:0 12px;flex:1;display:flex;align-items:center;font-family:'JetBrains Mono',monospace;font-size:12px;color:#52525b;overflow:hidden;white-space:nowrap;
 
575
  }
576
  .app-statusbar .sb-section.sb-fixed{
577
+ flex:0 0 auto;min-width:110px;text-align:center;justify-content:center;padding:3px 12px;background:rgba(0,0,205,.08);border-radius:6px;color:#8da1ff;font-weight:500;
 
578
  }
579
 
580
  .exp-note{padding:10px 20px;font-size:12px;color:#52525b;border-top:1px solid #27272a;text-align:center}
581
+ .exp-note a{color:#8da1ff;text-decoration:none}
582
  .exp-note a:hover{text-decoration:underline}
583
 
584
  ::-webkit-scrollbar{width:8px;height:8px}
 
596
  gallery_js = r"""
597
  () => {
598
  function init() {
599
+ if (window.__outpostInitDone) return;
600
 
601
  const dropZone = document.getElementById('media-drop-zone');
602
  const uploadPrompt = document.getElementById('upload-prompt');
 
616
  return;
617
  }
618
 
619
+ window.__outpostInitDone = true;
620
  let mediaState = null;
621
  let toastTimer = null;
622
  let examplePoller = null;
 
643
  toast.classList.add('visible');
644
  toastTimer = setTimeout(() => toast.classList.remove('visible'), 3500);
645
  }
 
646
 
647
  function showLoader() {
648
  const l = document.getElementById('output-loader');
 
656
  const sb = document.getElementById('sb-run-state');
657
  if (sb) sb.textContent = 'Done';
658
  }
659
+ function setRunErrorState() {
660
+ const l = document.getElementById('output-loader');
661
+ if (l) l.classList.remove('active');
662
+ const sb = document.getElementById('sb-run-state');
663
+ if (sb) sb.textContent = 'Error';
664
+ }
665
+
666
  window.__hideLoader = hideLoader;
667
+ window.__setRunErrorState = setRunErrorState;
668
+ window.__showToast = showToast;
669
 
670
  function flashPromptError() {
671
  promptInput.classList.add('error-flash');
 
688
 
689
  function setGradioValue(containerId, value) {
690
  const container = document.getElementById(containerId);
691
+ if (!container) return false;
692
+ const el = container.querySelector('textarea, input');
693
+ if (!el) return false;
694
+ const proto = el.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
695
+ const ns = Object.getOwnPropertyDescriptor(proto, 'value');
696
+ if (ns && ns.set) {
697
+ ns.set.call(el, value);
698
+ el.dispatchEvent(new Event('input', {bubbles:true, composed:true}));
699
+ el.dispatchEvent(new Event('change', {bubbles:true, composed:true}));
700
+ return true;
701
+ }
702
+ return false;
703
  }
704
 
705
  function syncImageToGradio() {
706
  setGradioValue('hidden-image-b64', mediaState ? mediaState.b64 : '');
707
+ if (mediaStatus) mediaStatus.textContent = mediaState ? '1 image uploaded' : 'No image uploaded';
 
708
  }
709
 
710
  function syncPromptToGradio() {
 
725
  return;
726
  }
727
 
 
 
728
  previewWrap.style.display = 'flex';
729
  if (uploadPrompt) uploadPrompt.style.display = 'none';
730
+ previewImg.src = mediaState.preview || mediaState.b64;
731
+ previewImg.style.display = 'block';
732
  syncImageToGradio();
733
  }
734
 
735
+ function setPreviewFromFileReader(b64, name) {
736
+ mediaState = {b64, name: name || 'file', mode: 'image'};
737
  renderPreview();
738
  }
 
739
 
740
  function clearPreview() {
741
  mediaState = null;
 
750
  return;
751
  }
752
  const reader = new FileReader();
753
+ reader.onload = (e) => setPreviewFromFileReader(e.target.result, file.name);
754
  reader.readAsDataURL(file);
755
  }
756
 
 
786
  });
787
  syncModelToGradio(name);
788
  }
789
+
790
  window.__activateModelTab = activateModelTab;
791
 
792
  document.querySelectorAll('.model-tab[data-model]').forEach(btn => {
 
823
 
824
  function validateBeforeRun() {
825
  const promptVal = promptInput.value.trim();
826
+ if (!promptVal) {
827
+ showToast('Please enter your instruction', 'warning');
828
  flashPromptError();
829
  return false;
830
  }
 
832
  showToast('Please upload an image', 'error');
833
  return false;
834
  }
 
 
 
 
 
835
  const currentModel = (document.querySelector('.model-tab.active') || {}).dataset?.model;
836
  if (!currentModel) {
837
  showToast('Please select a model', 'error');
 
850
  showLoader();
851
  setTimeout(() => {
852
  const gradioBtn = document.getElementById('gradio-run-btn');
853
+ if (!gradioBtn) {
854
+ setRunErrorState();
855
+ if (outputArea) outputArea.value = '[ERROR] Run button not found.';
856
+ showToast('Run button not found', 'error');
857
+ return;
858
+ }
859
  const btn = gradioBtn.querySelector('button');
860
  if (btn) btn.click(); else gradioBtn.click();
861
  }, 180);
 
907
  function applyExamplePayload(raw) {
908
  try {
909
  const data = JSON.parse(raw);
910
+ if (data.status !== 'ok') return;
911
+
912
+ if (data.model) activateModelTab(data.model);
913
+ if (data.query) {
914
+ promptInput.value = data.query;
915
+ syncPromptToGradio();
 
 
 
 
 
 
916
  }
917
+
918
+ mediaState = {
919
+ b64: data.media || '',
920
+ preview: data.media || '',
921
+ name: data.name || 'example_file',
922
+ mode: 'image'
923
+ };
924
+ renderPreview();
925
+
926
+ document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
927
+ showToast('Example loaded', 'info');
928
  } catch (e) {
929
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
 
930
  }
931
  }
932
 
 
943
  applyExamplePayload(current);
944
  return;
945
  }
946
+ if (attempts >= 100) {
947
  clearInterval(examplePoller);
948
  examplePoller = null;
949
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
950
  showToast('Example load timed out', 'error');
951
  }
952
+ }, 120);
953
+ }
954
+
955
+ function triggerExampleLoad(idx) {
956
+ const btnWrap = document.getElementById('example-load-btn');
957
+ const btn = btnWrap ? (btnWrap.querySelector('button') || btnWrap) : null;
958
+ if (!btn) return;
959
+
960
+ let attempts = 0;
961
+
962
+ function writeIdxAndClick() {
963
+ attempts += 1;
964
+
965
+ const ok1 = setGradioValue('example-idx-input', String(idx));
966
+ setGradioValue('example-result-data', '');
967
+ const currentVal = getValueFromContainer('example-idx-input');
968
+
969
+ if (ok1 && currentVal === String(idx)) {
970
+ btn.click();
971
+ startExamplePolling();
972
+ return;
973
+ }
974
+
975
+ if (attempts < 30) {
976
+ setTimeout(writeIdxAndClick, 100);
977
+ } else {
978
+ document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
979
+ showToast('Failed to initialize example loader', 'error');
980
+ }
981
+ }
982
+
983
+ writeIdxAndClick();
984
  }
985
 
986
  document.querySelectorAll('.example-card[data-idx]').forEach(card => {
987
  card.addEventListener('click', () => {
988
  const idx = card.getAttribute('data-idx');
989
+ if (idx === null || idx === undefined || idx === '') return;
990
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
991
  card.classList.add('loading');
992
  showToast('Loading example...', 'info');
993
+ triggerExampleLoad(idx);
 
 
 
 
 
 
 
 
 
 
 
994
  });
995
  });
996
 
 
998
  if (observerTarget) {
999
  const obs = new MutationObserver(() => {
1000
  const current = getValueFromContainer('example-result-data');
1001
+ if (!current || current === lastSeenExamplePayload) return;
1002
+ lastSeenExamplePayload = current;
1003
+ if (examplePoller) {
1004
+ clearInterval(examplePoller);
1005
+ examplePoller = null;
 
 
1006
  }
1007
+ applyExamplePayload(current);
1008
  });
1009
  obs.observe(observerTarget, {childList:true, subtree:true, characterData:true, attributes:true});
1010
  }
 
1027
 
1028
  let lastText = '';
1029
 
1030
+ function isErrorText(val) {
1031
+ return typeof val === 'string' && val.trim().startsWith('[ERROR]');
1032
+ }
1033
+
1034
  function syncOutput() {
1035
  const el = resultContainer.querySelector('textarea') || resultContainer.querySelector('input');
1036
  if (!el) return;
 
1039
  lastText = val;
1040
  outArea.value = val;
1041
  outArea.scrollTop = outArea.scrollHeight;
1042
+
1043
+ if (val.trim()) {
1044
+ if (isErrorText(val)) {
1045
+ if (window.__setRunErrorState) window.__setRunErrorState();
1046
+ if (window.__showToast) window.__showToast('Inference failed', 'error');
1047
+ } else {
1048
+ if (window.__hideLoader) window.__hideLoader();
1049
+ }
1050
+ }
1051
  }
1052
  }
1053
 
 
1059
  }
1060
  """
1061
 
1062
+ FIRE_LOGO_SVG = """
1063
  <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
1064
+ <path d="M13.5 2.5c.4 2.3-.4 4-1.7 5.6-1.3 1.6-2.6 3-2.6 5 0 1.8 1.2 3.2 2.8 3.2 1.8 0 3.1-1.4 3.1-3.5 0-1.1-.4-2.1-1.2-3.3 2.7 1.2 5.1 4.1 5.1 7.4 0 4-3.1 7.1-7.2 7.1-4.3 0-7.8-3.3-7.8-7.8 0-3.1 1.7-5.5 4-7.8 1.7-1.7 3.9-3.6 4.7-6z" fill="white"/>
1065
  </svg>
1066
  """
1067
 
1068
  UPLOAD_PREVIEW_SVG = """
1069
  <svg viewBox="0 0 80 80" fill="none" xmlns="http://www.w3.org/2000/svg">
1070
+ <rect x="8" y="14" width="64" height="52" rx="6" fill="none" stroke="#0000CD" stroke-width="2" stroke-dasharray="4 3"/>
1071
+ <polygon points="12,62 30,40 42,50 54,34 68,62" fill="rgba(0,0,205,0.15)" stroke="#0000CD" stroke-width="1.5"/>
1072
+ <circle cx="28" cy="30" r="6" fill="rgba(0,0,205,0.2)" stroke="#0000CD" stroke-width="1.5"/>
1073
  </svg>
1074
  """
1075
 
 
1103
  <div class="app-shell">
1104
  <div class="app-header">
1105
  <div class="app-header-left">
1106
+ <div class="app-logo">{FIRE_LOGO_SVG}</div>
1107
  <span class="app-title">VisionScope R2</span>
1108
  <span class="app-badge">vision enabled</span>
1109
  <span class="app-badge fast">Image Inference</span>
 
1121
  <div id="upload-click-area" class="upload-click-area">
1122
  {UPLOAD_PREVIEW_SVG}
1123
  <span id="upload-main-text" class="upload-main-text">Click or drag an image here</span>
1124
+ <span id="upload-sub-text" class="upload-sub-text">Upload one image for multimodal inference</span>
1125
  </div>
1126
  </div>
1127
 
 
1139
  </div>
1140
 
1141
  <div class="hint-bar">
1142
+ <b>Mode:</b> Image inference only &nbsp;&middot;&nbsp;
1143
+ <b>Model:</b> Switch between caption, OCR, and reasoning variants &nbsp;&middot;&nbsp;
1144
  <kbd>Clear</kbd> removes the current image
1145
  </div>
1146
 
 
1154
 
1155
  <div class="app-main-right">
1156
  <div class="panel-card">
1157
+ <div id="instruction-title" class="panel-card-title">Vision Instruction</div>
1158
  <div class="panel-card-body">
1159
+ <label id="query-label" class="modern-label" for="custom-query-input">Query Input</label>
1160
+ <textarea id="custom-query-input" class="modern-textarea" rows="4" placeholder="e.g., perform OCR, estimate distance, describe the scene, count objects, extract visible text..."></textarea>
1161
  </div>
1162
  </div>
1163
 
 
1226
  </div>
1227
 
1228
  <div class="exp-note">
1229
+ Experimental vision workspace
1230
  </div>
1231
 
1232
  <div class="app-statusbar">
 
1242
  demo.load(fn=noop, inputs=None, outputs=None, js=wire_outputs_js)
1243
 
1244
  run_btn.click(
1245
+ fn=run_router,
1246
  inputs=[
1247
  hidden_model_name,
1248
  prompt,