Spaces:

InternScience
/

ResearchHarness

Running

App Files Files Community

CoCoOne commited on 9 days ago

Commit

353ee9f

1 Parent(s): 6798401

Slim Space deployment mirror

Browse files

Files changed (30) hide show

.dockerignore +10 -9
.env.example +0 -39
.gitignore +16 -215
README.md +107 -10
agent_base/console_utils.py +0 -223
agent_base/react_agent.py +9 -154
agent_base/tools/README.md +0 -457
agent_base/tools/tool_web.py +34 -8
agent_base/utils.py +0 -15
api/__init__.py +0 -1
api/openai_server.py +0 -518
api_runs/.gitkeep +0 -1
app.py +0 -10
benchmarks/QA/README.md +0 -102
benchmarks/QA/role_prompt.md +0 -31
benchmarks/README.md +0 -18
benchmarks/ResearchClawBench/README.md +0 -44
benchmarks/ResearchClawBench/adapter.py +0 -93
benchmarks/ResearchClawBench/role_prompt.md +0 -195
docs/tutorial_en.md +0 -531
docs/tutorial_zh.md +0 -511
frontend/local_server.py +36 -157
frontend/static/app.css +17 -188
frontend/static/app.js +5 -126
frontend/static/index.html +4 -23
run_agent.py +0 -7
run_frontend.py +0 -48
run_server.py +0 -61
traces/.gitkeep +0 -1
workspace/.gitkeep +0 -1

.dockerignore CHANGED Viewed

@@ -1,23 +1,24 @@
-.git
 .gitignore
 __pycache__/
 *.py[cod]
 .pytest_cache/
 .mypy_cache/
 .ruff_cache/
 .env
 .envrc
 .venv/
 venv/
-workspace/*
-!workspace/.gitkeep
-traces/*
-!traces/.gitkeep
-api_runs/*
-!api_runs/.gitkeep
-runtime/
-tests/
 .codex/
 .idea/
 .vscode/
 .DS_Store

+.git/
 .gitignore
+AGENTS.md
+runtime/
+data/
+inputs/
 __pycache__/
 *.py[cod]
 .pytest_cache/
 .mypy_cache/
 .ruff_cache/
 .env
 .envrc
 .venv/
 venv/
+env/
 .codex/
+.agents/
 .idea/
 .vscode/
 .DS_Store

.env.example DELETED Viewed

@@ -1,39 +0,0 @@
-# Required
-API_KEY="your_openai_compatible_key"                      # API key for your OpenAI-compatible LLM provider.
-API_BASE="https://your-openai-compatible-endpoint/v1"     # Base URL for the OpenAI-compatible chat-completions endpoint.
-MODEL_NAME="gpt-5.5"                                      # Main model used by the agent and WebFetch summarization.
-SERPER_KEY="your_serper_key"                              # https://serper.dev/
-JINA_KEY="your_jina_key"                                  # https://jina.ai/
-MINERU_TOKEN="your_mineru_token"                          # https://mineru.net/
-HF_TOKEN="your_huggingface_token"                         # Hugging Face token with dataset write access when collection is enabled.
-# Optional
-WORKSPACE_ROOT="./workspace"                              # Default local workspace root when --workspace-root is not provided.
-MAX_LLM_CALL_PER_RUN=100                                  # Maximum chat-completions calls allowed in one agent run.
-MAX_AGENT_ROUNDS=100                                      # Maximum ReAct loop rounds before forced termination.
-MAX_AGENT_RUNTIME_SECONDS=9000                            # Maximum wall-clock runtime per agent run.
-LLM_TIMEOUT_SECONDS=600                                   # Timeout for each chat-completions request.
-LLM_MAX_OUTPUT_TOKENS=10000                               # Maximum output tokens requested from the main model.
-MAX_INPUT_TOKENS=320000                                   # Maximum input-token budget used for runtime token accounting.
-LLM_MAX_RETRIES=10                                        # Maximum retries for transient LLM API failures.
-TEMPERATURE=0.6                                           # Main model sampling temperature.
-TOP_P=0.95                                                # Main model nucleus-sampling top_p.
-PRESENCE_PENALTY=1.1                                      # Main model presence penalty when supported by the provider.
-AUTO_COMPACT_TRIGGER_TOKENS="128k"                        # Context size threshold that triggers automatic memory compaction.
-IMAGE_PART_TOKEN_ESTIMATE=1536                            # Token estimate used for each runtime image_url content part.
-LLM_IMAGE_MAX_EDGE=1568                                   # Maximum image edge length sent to multimodal LLMs.
-LLM_IMAGE_MAX_BYTES=524288                                # Maximum compressed image payload size sent to multimodal LLMs.
-LLM_IMAGE_JPEG_QUALITY=85                                 # Initial JPEG quality for runtime image compression.
-DEBUG_AGENT=false                                         # Print verbose agent-loop debug logs.
-DEBUG_SEARCH=false                                        # Print verbose WebSearch debug logs.
-DEBUG_SCHOLAR=false                                       # Print verbose ScholarSearch debug logs.
-DEBUG_VISIT=false                                         # Print verbose WebFetch debug logs.
-RH_SPACE_RUNS_DIR="/tmp/researchharness_space/runs"       # Parent directory for temporary per-chat runs in hosted mode.
-RH_SPACE_RETENTION_SECONDS=21600                          # Delete inactive hosted runs older than this many seconds.
-RH_SPACE_MAX_RUNS=40                                      # Keep at most this many inactive hosted runs.
-RH_SPACE_CLEANUP_INTERVAL_SECONDS=900                     # Background cleanup interval for hosted runs.
-RH_COLLECTION_ENABLED=true                                # Automatically collect hosted run traces after each completed run.
-RH_COLLECTION_DATASET_REPO="CoCoOne/ResearchHarness-Data" # Hugging Face dataset repo receiving trace PRs.
-RH_COLLECTION_BATCH_SIZE=5                                # Create one dataset PR after this many collected runs.
-RH_COLLECTION_MAX_BUNDLE_BYTES=20971520                   # Drop any single trace bundle larger than this many bytes.
-RH_ROLE_PROMPT_FILES=""                                   # Optional role prompt files separated by os.pathsep.

.gitignore CHANGED Viewed

@@ -1,230 +1,31 @@
-runtime/
-# Local agent artifacts
 AGENTS.md
-workspace/*
-!workspace/.gitkeep
-api_runs/*
-!api_runs/.gitkeep
-traces/*
-!traces/.gitkeep
-/inputs/
 data/
-benchmarks/**/local_*.py
-.idea/
-.vscode/
-.DS_Store
-tests/example_files/pdfs/dummy_document
-.codex
-# Byte-compiled / optimized / DLL files
 __pycache__/
-*.py[codz]
 *$py.class
-# C extensions
 *.so
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py.cover
-.hypothesis/
-.pytest_cache/
-cover/
-# Translations
-*.mo
-*.pot
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-# Flask stuff:
-instance/
-.webassets-cache
-# Scrapy stuff:
-.scrapy
-# Sphinx documentation
-docs/_build/
-# PyBuilder
-.pybuilder/
-target/
-# Jupyter Notebook
-.ipynb_checkpoints
-# IPython
-profile_default/
-ipython_config.py
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-# UV
-#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#uv.lock
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-#poetry.toml
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
-#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
-#pdm.lock
-#pdm.toml
-.pdm-python
-.pdm-build/
-# pixi
-#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
-#pixi.lock
-#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
-#   in the .venv directory. It is recommended not to include this directory in version control.
-.pixi
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-# SageMath parsed files
-*.sage.py
-# Environments
 .env
 .envrc
-.venv
-env/
 venv/
-ENV/
-env.bak/
-venv.bak/
-# Spyder project settings
-.spyderproject
-.spyproject
-# Rope project settings
-.ropeproject
-# mkdocs documentation
-/site
-# mypy
 .mypy_cache/
-.dmypy.json
-dmypy.json
-# Pyre type checker
-.pyre/
-# pytype static type analyzer
-.pytype/
-# Cython debug symbols
-cython_debug/
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
-# Abstra
-# Abstra is an AI-powered process automation framework.
-# Ignore directories containing user credentials, local state, and settings.
-# Learn more at https://abstra.io/docs
-.abstra/
-# Visual Studio Code
-#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
-#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
-#  and can be added to the global gitignore or merged into this file. However, if you prefer,
-#  you could uncomment the following to ignore the entire vscode folder
-# .vscode/
-# Ruff stuff:
 .ruff_cache/
-# PyPI configuration file
-.pypirc
-# Cursor
-#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
-#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
-#  refer to https://docs.cursor.com/context/ignore-files
-.cursorignore
-.cursorindexingignore
-# Marimo
-marimo/_static/
-marimo/_lsp/
-__marimo__/
-# Hugging Face Space runtime artifacts
-runtime/
-/tmp/

 AGENTS.md
+runtime/
 data/
+inputs/
 __pycache__/
+*.py[cod]
 *$py.class
 *.so
 .env
 .envrc
+.venv/
 venv/
+env/
+.pytest_cache/
 .mypy_cache/
 .ruff_cache/
+.coverage
+htmlcov/
+build/
+dist/
+*.egg-info/
+.codex/
+.agents/
+.idea/
+.vscode/
+.DS_Store

README.md CHANGED Viewed

@@ -10,17 +10,87 @@ license: mit
 short_description: Lightweight harness for tool-using LLM agents.
 ---
-# ResearchHarness Space
-This Space runs the ResearchHarness browser frontend as a lightweight hosted agent UI.
-It reuses the ResearchHarness tool-calling runtime and keeps the hosted mode intentionally simple:
-- Users do not choose a local workspace.
-- Each new chat gets an isolated temporary runtime directory.
-- Uploaded images are saved under that chat workspace and also passed to the model when supported.
-- Agent traces and session state are stored beside the temporary workspace.
-- Completed runs are automatically packaged for trajectory collection.
-- Old workspaces and traces are cleaned periodically so the Space does not grow without bound.
 ## Required Secrets
@@ -48,7 +118,6 @@ Configure these as Hugging Face Space secrets before starting the app:
 | `RH_COLLECTION_DATASET_REPO` | `CoCoOne/ResearchHarness-Data` | Dataset repo that receives trajectory PRs. |
 | `RH_COLLECTION_BATCH_SIZE` | `5` | Create one dataset PR after this many collected runs. |
 | `RH_COLLECTION_MAX_BUNDLE_BYTES` | `20971520` | Drop a single run bundle if it exceeds this byte limit. |
-| `RH_ROLE_PROMPT_FILES` | empty | Optional `os.pathsep`-separated role prompt files inside the Space image. |
 | `PORT` | `7860` | Port used by Hugging Face Docker Spaces. |
 ## Runtime Layout
@@ -82,3 +151,31 @@ python app.py
 ```
 Then open `http://127.0.0.1:7860`.

 short_description: Lightweight harness for tool-using LLM agents.
 ---
+# ResearchHarness Space Maintenance Notes
+This repository is the Hugging Face Docker Space deployment for
+[`ResearchHarness`](https://github.com/black-yt/ResearchHarness). It is an online
+app mirror, not the public open-source documentation and not a full source mirror.
+The public project README, tutorials, benchmark notes, API server documentation,
+and local CLI documentation belong in the main GitHub repository. This Space
+README should stay focused on long-term deployment maintenance: what is copied
+from the main repo, what is intentionally changed for hosted use, and what is
+new in the Space.
+## Repository Relationship
+| Repository | Role |
+| --- | --- |
+| `black-yt/ResearchHarness` | Main open-source runtime, CLI, API server, frontend, docs, tests, and benchmark adapters. |
+| `CoCoOne/ResearchHarness` | Hugging Face Space app that hosts the browser frontend with managed temporary workspaces. |
+| `CoCoOne/ResearchHarness-Data` | Hugging Face dataset receiving collected hosted-run trajectory PRs. |
+Maintenance rule:
+- Copy only the runtime/frontend pieces needed by the hosted app.
+- Do not blindly sync the whole main repository into this Space.
+- Space-only deployment logic must not be copied back into the main repo unless
+  it is genuinely general-purpose.
+- Public documentation should be updated in the main repo, not duplicated here.
+## Copied From The Main Repository
+These files/directories are copied from the main repo and should be refreshed
+when their corresponding upstream implementation changes:
+- `agent_base/`: core ReAct runtime, prompts, tool registry, provider
+  compatibility, trace/session state, image handling, and compaction logic.
+- `agent_base/tools/`: hosted-safe tool implementations used by the frontend.
+- `frontend/static/`: shared browser UI assets, styles, and client logic.
+- `frontend/local_server.py`: WebSocket streaming frontend server base, with
+  Space-specific managed-workspace behavior preserved.
+- `requirements.txt`: Python runtime dependencies needed by the hosted app.
+When updating these files from the main repo, inspect the diff and preserve the
+Space-specific changes listed below.
+## Space-Specific Changes
+These behaviors are intentional Space-only deltas:
+- `app.py` is the Hugging Face entrypoint and owns Space startup, cleanup, and
+  trajectory collection configuration.
+- Users cannot select arbitrary server folders. Each new chat gets an isolated
+  managed run directory under `RH_SPACE_RUNS_DIR`.
+- The runtime layout is always:
+  `run_.../agent_workspace/` for agent-visible files and
+  `run_.../agent_trace/` for traces and `_session_state.json`.
+- Uploaded images are saved under `agent_workspace/inputs/images/` and are also
+  passed to the model as image inputs when supported.
+- The frontend exposes a per-run model dropdown. Current options are `gpt-5.5`
+  and `claude-opus-4-7`; the selection must stay local to that run and must not
+  mutate global process environment variables.
+- Completed runs are packaged for trajectory collection and submitted as pull
+  requests to the configured Hugging Face dataset after the batch threshold is
+  reached.
+- Old inactive runs are cleaned periodically so the Space does not grow without
+  bound.
+## Intentionally Removed From The Space
+The Space intentionally does not keep the full main-repo surface area:
+- `run_agent.py`, `run_server.py`, `run_frontend.py`
+- OpenAI-compatible API server code under `api/`
+- benchmark adapters and benchmark documentation under `benchmarks/`
+- long-form tutorials under `docs/`
+- local placeholder directories such as `workspace/`, `api_runs/`, and `traces/`
+- CLI-only console formatting helpers
+- test fixtures and local test suites
+- `.env.example`
+Removing these files keeps the deployed app small and avoids stale code or
+misleading documentation drifting away from the main repository.
 ## Required Secrets
 | `RH_COLLECTION_DATASET_REPO` | `CoCoOne/ResearchHarness-Data` | Dataset repo that receives trajectory PRs. |
 | `RH_COLLECTION_BATCH_SIZE` | `5` | Create one dataset PR after this many collected runs. |
 | `RH_COLLECTION_MAX_BUNDLE_BYTES` | `20971520` | Drop a single run bundle if it exceeds this byte limit. |
 | `PORT` | `7860` | Port used by Hugging Face Docker Spaces. |
 ## Runtime Layout
 ```
 Then open `http://127.0.0.1:7860`.
+Before pushing Space changes, run at least:
+```bash
+python3 -B - <<'PY'
+from pathlib import Path
+import py_compile
+for path in Path(".").rglob("*.py"):
+    if ".git" not in path.parts:
+        py_compile.compile(str(path), doraise=True)
+print("syntax ok")
+PY
+RH_COLLECTION_ENABLED=false python3 -B - <<'PY'
+from fastapi.testclient import TestClient
+import app
+client = TestClient(app.app)
+response = client.get("/")
+assert response.status_code == 200
+assert "ResearchHarness" in response.text
+print("app ok")
+PY
+node --check frontend/static/app.js
+git diff --check
+```

agent_base/console_utils.py DELETED Viewed

@@ -1,223 +0,0 @@
-import argparse
-import json
-import os
-from pathlib import Path
-import shutil
-import sys
-import unicodedata
-from typing import Any, Optional
-ANSI_RESET = "\033[0m"
-ANSI_COLORS = {
-    "header": "\033[36m",
-    "assistant": "\033[32m",
-    "tool": "\033[33m",
-    "runtime": "\033[34m",
-    "user": "\033[35m",
-    "error": "\033[31m",
-}
-def _char_display_width(char: str) -> int:
-    if unicodedata.combining(char):
-        return 0
-    if unicodedata.category(char) in {"Cc", "Cf"}:
-        return 0
-    return 2 if unicodedata.east_asian_width(char) in {"F", "W"} else 1
-def _display_width(text: str) -> int:
-    return sum(_char_display_width(char) for char in str(text))
-def _truncate_display(text: str, width: int) -> str:
-    if _display_width(text) <= width:
-        return text
-    suffix = "..."
-    target = max(0, width - _display_width(suffix))
-    out = []
-    used = 0
-    for char in text:
-        char_width = _char_display_width(char)
-        if used + char_width > target:
-            break
-        out.append(char)
-        used += char_width
-    return "".join(out) + suffix
-def _pad_display(text: str, width: int) -> str:
-    return text + " " * max(0, width - _display_width(text))
-def _last_soft_break(chars: list[str]) -> int:
-    for index in range(len(chars) - 1, 0, -1):
-        if chars[index].isspace() and "".join(chars[:index]).strip():
-            return index
-    return -1
-class ConsoleEventPrinter:
-    def __init__(self, *, model_name: str, workspace_root: Path, prompt: str):
-        self.model_name = model_name
-        self.workspace_root = workspace_root
-        self.prompt = prompt.strip()
-        self._printed_any = False
-        self._use_color = (
-            "NO_COLOR" not in os.environ
-            and os.environ.get("TERM") != "dumb"
-            and (sys.stdout.isatty() or bool(os.environ.get("FORCE_COLOR") or os.environ.get("CLICOLOR_FORCE")))
-        )
-    def print_header(self) -> None:
-        self._print_box(
-            "ResearchHarness CLI",
-            f"Model: {self.model_name}\nWorkspace Root: {self.workspace_root}\n\nPrompt:\n{self.prompt}",
-            "header",
-        )
-    def reset_rounds(self) -> None:
-        self._printed_any = False
-    def _paint(self, text: str, color_key: str) -> str:
-        if not self._use_color:
-            return text
-        return f"{ANSI_COLORS.get(color_key, '')}{text}{ANSI_RESET}"
-    def _terminal_width(self) -> int:
-        return max(60, min(110, shutil.get_terminal_size((100, 20)).columns))
-    def _wrap_line(self, line: str, width: int) -> list[str]:
-        expanded = line.expandtabs(2)
-        if expanded == "":
-            return [""]
-        chunks: list[str] = []
-        current: list[str] = []
-        current_width = 0
-        for char in expanded:
-            char_width = _char_display_width(char)
-            if current and current_width + char_width > width:
-                break_at = _last_soft_break(current)
-                if break_at > 0:
-                    chunks.append("".join(current[:break_at]).rstrip())
-                    current = list("".join(current[break_at + 1 :]).lstrip())
-                    current_width = _display_width("".join(current))
-                else:
-                    chunks.append("".join(current))
-                    current = []
-                    current_width = 0
-            current.append(char)
-            current_width += char_width
-        if current:
-            chunks.append("".join(current))
-        return chunks or [""]
-    def _print_box(self, title: str, body: str, color_key: str = "runtime") -> None:
-        width = self._terminal_width()
-        inner_width = width - 4
-        title_text = f" {_truncate_display(title.strip(), width - 6)} "
-        top = "+" + title_text + "-" * max(0, width - 2 - _display_width(title_text)) + "+"
-        bottom = "+" + "-" * (width - 2) + "+"
-        if self._printed_any:
-            print()
-        print(self._paint(top, color_key))
-        for raw_line in str(body or "").splitlines() or [""]:
-            for line in self._wrap_line(raw_line, inner_width):
-                padded = _pad_display(line, inner_width)
-                print(f"{self._paint('|', color_key)} {padded} {self._paint('|', color_key)}")
-        print(self._paint(bottom, color_key))
-        self._printed_any = True
-    def _title(self, label: str, turn_index: int) -> str:
-        return f"{label} | round {turn_index}" if turn_index > 0 else label
-    def _format_tool_call(self, tool_name: str, tool_args: Any) -> str:
-        try:
-            tool_args_text = json.dumps(tool_args, ensure_ascii=False, indent=2)
-        except TypeError:
-            tool_args_text = str(tool_args)
-        return f"- {tool_name}\n{tool_args_text}"
-    def handle_event(self, row: dict[str, Any]) -> None:
-        role = str(row.get("role", ""))
-        turn_index = int(row.get("turn_index", 0) or 0)
-        text = str(row.get("text", ""))
-        capture_type = str(row.get("capture_type", ""))
-        tool_names = row.get("tool_names") if isinstance(row.get("tool_names"), list) else []
-        tool_arguments = row.get("tool_arguments") if isinstance(row.get("tool_arguments"), list) else []
-        finish_reason = str(row.get("finish_reason", ""))
-        error = str(row.get("error", ""))
-        if capture_type and not text.strip():
-            return
-        if role == "system":
-            return
-        if role == "user":
-            if turn_index == 0:
-                return
-            self._print_box(self._title("Runtime Message", turn_index), text, "user")
-            return
-        if role == "assistant":
-            lines: list[str] = []
-            if tool_names:
-                if text.strip():
-                    lines.append(text)
-                else:
-                    suffix = f" finish_reason={finish_reason}" if finish_reason else ""
-                    lines.append(f"(no text; native tool-calls only.{suffix})")
-                lines.append("")
-                lines.append("Assistant Tool Calls:")
-                for idx, tool_name in enumerate(tool_names):
-                    tool_args = tool_arguments[idx] if idx < len(tool_arguments) else {}
-                    lines.append(self._format_tool_call(str(tool_name), tool_args))
-            elif text.strip():
-                lines.append(text)
-            else:
-                suffix = f" finish_reason={finish_reason}" if finish_reason else ""
-                lines.append(f"(empty assistant output.{suffix})")
-            if error:
-                lines.append("")
-                lines.append(f"Assistant Error: {error}")
-            self._print_box(self._title("Assistant", turn_index), "\n".join(lines), "error" if error else "assistant")
-            return
-        if role == "tool":
-            tool_name = str(tool_names[0]) if tool_names else "Tool"
-            lines = [text]
-            if error:
-                lines.extend(["", f"{tool_name} Error: {error}"])
-            self._print_box(self._title(f"{tool_name} Result", turn_index), "\n".join(lines), "error" if error else "tool")
-            return
-        if role == "runtime":
-            lines = [text]
-            if error:
-                lines.extend(["", f"Runtime Error: {error}"])
-            self._print_box(self._title("Runtime", turn_index), "\n".join(lines), "error" if error else "runtime")
-def main(argv: Optional[list[str]] = None) -> int:
-    parser = argparse.ArgumentParser(description="Show a minimal example of the CLI console event formatter.")
-    parser.parse_args(argv)
-    printer = ConsoleEventPrinter(model_name="demo-model", workspace_root=Path("."), prompt="demo question")
-    printer.print_header()
-    printer.handle_event(
-        {
-            "role": "assistant",
-            "turn_index": 1,
-            "text": "",
-            "tool_names": ["Read"],
-            "tool_arguments": [{"path": "demo.txt"}],
-            "termination": "",
-            "error": "",
-        }
-    )
-    return 0
-if __name__ == "__main__":
-    raise SystemExit(main())

agent_base/react_agent.py CHANGED Viewed

@@ -1,18 +1,15 @@
-import argparse
 from contextlib import contextmanager
 import json
 import os
 import re
 import signal
-import sys
 import threading
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Sequence, Type
 from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
 import tiktoken
 from agent_base.base import BaseAgent
-from agent_base.console_utils import ConsoleEventPrinter
 from agent_base.context_compact import compact_messages, should_compact_messages
 from agent_base.model_profiles import resolve_model_profile
 from agent_base.provider_compat import apply_sampling_params
@@ -25,16 +22,8 @@ from agent_base.tools.tool_runtime import Bash, TerminalInterrupt, TerminalKill,
 from agent_base.tools.tool_user import AskUser
 from agent_base.tools.tool_web import ScholarSearch, WebFetch, WebSearch
 from agent_base.utils import (
-    PROJECT_ROOT,
-    MissingRequiredEnvError,
-    append_saved_image_paths_to_prompt,
     env_flag,
-    image_input_content_parts,
-    load_dotenv,
-    read_role_prompt_files,
-    require_required_env,
     safe_jsonable,
-    stage_image_file_for_input,
 )
 import datetime
@@ -75,6 +64,10 @@ DEFAULT_PRESENCE_PENALTY = 1.1
 DEFAULT_LLM_TIMEOUT_SECONDS = 600.0
 class LLMHardTimeoutError(TimeoutError):
     pass
@@ -551,10 +544,10 @@ def image_context_trace_text(result: Any) -> str:
     return text
-def default_llm_config() -> dict:
-    model_name = os.environ.get("MODEL_NAME", DEFAULT_MODEL_NAME)
     return {
-        "model": model_name,
         "api_key": os.environ.get("API_KEY", "EMPTY"),
         "api_base": os.environ.get("API_BASE"),
         "timeout_seconds": float(os.environ.get("LLM_TIMEOUT_SECONDS", str(DEFAULT_LLM_TIMEOUT_SECONDS))),
@@ -1195,6 +1188,7 @@ class MultiTurnReactAgent(BaseAgent):
                             tool_arguments,
                             workspace_root=resolved_workspace_root,
                             runtime_deadline=runtime_deadline,
                         )
                     except KeyboardInterrupt:
                         messages = messages[:tool_turn_message_start]
@@ -1312,142 +1306,3 @@ class MultiTurnReactAgent(BaseAgent):
     def custom_call_tool(self, tool_name: str, tool_args: Any, **kwargs):
         return execute_tool_by_name(self.tool_map, tool_name, tool_args, **kwargs)
-def _path_has_suffix(path: Path, suffix_parts: Sequence[str]) -> bool:
-    normalized_parts = tuple(part.casefold() for part in path.parts)
-    normalized_suffix = tuple(part.casefold() for part in suffix_parts)
-    if len(normalized_parts) < len(normalized_suffix):
-        return False
-    return normalized_parts[-len(normalized_suffix) :] == normalized_suffix
-def resolve_agent_class_for_role_prompt_files(role_prompt_files: Sequence[str]) -> Type[MultiTurnReactAgent]:
-    for raw_path in role_prompt_files:
-        path_text = str(raw_path).strip()
-        if not path_text:
-            continue
-        path = Path(path_text).expanduser().resolve(strict=False)
-        if _path_has_suffix(path, ("benchmarks", "ResearchClawBench", "role_prompt.md")):
-            from benchmarks.ResearchClawBench.adapter import ResearchClawBenchAgent
-            return ResearchClawBenchAgent
-    return MultiTurnReactAgent
-def _parse_cli_args(argv: list[str]) -> tuple[str, Optional[str], Optional[str], str, list[str], list[str], Optional[bool]]:
-    parser = argparse.ArgumentParser(description="Run the local agent directly from agent_base.react_agent.")
-    parser.add_argument("prompt", nargs="*", help="Prompt text.")
-    parser.add_argument("--prompt-file", help="Optional UTF-8 text file containing the prompt.")
-    parser.add_argument("--trace-dir", help="Optional directory where the run trace JSONL should be created.")
-    parser.add_argument(
-        "--workspace-root",
-        help="Optional workspace root for local file tools, Bash, and TerminalStart.",
-    )
-    parser.add_argument(
-        "--role-prompt-file",
-        action="append",
-        default=[],
-        dest="role_prompt_files",
-        metavar="PATH",
-        help="Append one role-specific prompt file to the base system prompt. May be passed multiple times.",
-    )
-    parser.add_argument(
-        "--images",
-        action="append",
-        nargs="+",
-        default=[],
-        dest="image_paths",
-        metavar="PATH",
-        help="Attach one or more local image paths to the initial user message.",
-    )
-    parser.add_argument(
-        "--chat",
-        action=argparse.BooleanOptionalAction,
-        default=None,
-        help="Continue asking for follow-up user messages after each final answer. Defaults to on only in an interactive terminal.",
-    )
-    args = parser.parse_args(argv)
-    prompt_text = ""
-    if args.prompt_file:
-        prompt_text = Path(args.prompt_file).read_text(encoding="utf-8").strip()
-    elif args.prompt:
-        prompt_text = " ".join(args.prompt).strip()
-    if not prompt_text:
-        raise ValueError("A non-empty prompt is required via positional args or --prompt-file.")
-    role_prompt = read_role_prompt_files(args.role_prompt_files)
-    return (
-        prompt_text,
-        args.trace_dir,
-        args.workspace_root,
-        role_prompt,
-        list(args.role_prompt_files),
-        [path for group in args.image_paths for path in group],
-        args.chat,
-    )
-def main(argv: Optional[list[str]] = None) -> int:
-    load_dotenv(PROJECT_ROOT / ".env")
-    try:
-        require_required_env("ResearchHarness agent")
-        prompt_text, trace_dir, workspace_root, role_prompt, role_prompt_files, image_paths, chat_arg = _parse_cli_args(argv or sys.argv[1:])
-        agent_cls = resolve_agent_class_for_role_prompt_files(role_prompt_files)
-        agent = agent_cls(
-            llm=default_llm_config(),
-            trace_dir=trace_dir,
-            role_prompt=role_prompt or None,
-        )
-        resolved_workspace_root = normalize_workspace_root(workspace_root)
-        initial_content_parts: list[dict[str, Any]] = []
-        saved_image_paths: list[str] = []
-        for image_index, image_path in enumerate(image_paths):
-            saved_path, data_url = stage_image_file_for_input(
-                image_path,
-                workspace_root=resolved_workspace_root,
-                image_index=image_index,
-            )
-            saved_image_paths.append(saved_path)
-            initial_content_parts.extend(image_input_content_parts(data_url, saved_path))
-        run_prompt = append_saved_image_paths_to_prompt(prompt_text, saved_image_paths)
-        printer = ConsoleEventPrinter(
-            model_name=agent.model,
-            workspace_root=resolved_workspace_root,
-            prompt=run_prompt,
-        )
-        printer.print_header()
-        session = agent._run_session(
-            run_prompt,
-            workspace_root=str(resolved_workspace_root),
-            event_callback=printer.handle_event,
-            initial_content_parts=initial_content_parts or None,
-        )
-        chat_enabled = chat_arg if chat_arg is not None else (sys.stdin.isatty() and sys.stdout.isatty())
-        messages = session.get("messages", [])
-        while chat_enabled:
-            try:
-                followup = input("\n[ResearchHarness] Follow-up (Ctrl+C to exit): ").strip()
-            except (KeyboardInterrupt, EOFError):
-                print("\n[ResearchHarness] Chat ended.")
-                break
-            if not followup:
-                continue
-            print(f"\n[ResearchHarness] Continuing conversation: {followup}")
-            printer.reset_rounds()
-            session = agent._run_session(
-                followup,
-                workspace_root=str(resolved_workspace_root),
-                event_callback=printer.handle_event,
-                prior_messages=messages,
-            )
-            messages = session.get("messages", messages)
-        return 0
-    except (MissingRequiredEnvError, ValueError) as exc:
-        print(str(exc), file=sys.stderr)
-        return 1
-if __name__ == "__main__":
-    raise SystemExit(main())

 from contextlib import contextmanager
 import json
 import os
 import re
 import signal
 import threading
 from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Sequence
 from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
 import tiktoken
 from agent_base.base import BaseAgent
 from agent_base.context_compact import compact_messages, should_compact_messages
 from agent_base.model_profiles import resolve_model_profile
 from agent_base.provider_compat import apply_sampling_params
 from agent_base.tools.tool_user import AskUser
 from agent_base.tools.tool_web import ScholarSearch, WebFetch, WebSearch
 from agent_base.utils import (
     env_flag,
     safe_jsonable,
 )
 import datetime
 DEFAULT_LLM_TIMEOUT_SECONDS = 600.0
+def default_model_name() -> str:
+    return os.environ.get("MODEL_NAME", DEFAULT_MODEL_NAME).strip() or DEFAULT_MODEL_NAME
 class LLMHardTimeoutError(TimeoutError):
     pass
     return text
+def default_llm_config(model_name: Optional[str] = None) -> dict:
+    selected_model = str(model_name or "").strip() or default_model_name()
     return {
+        "model": selected_model,
         "api_key": os.environ.get("API_KEY", "EMPTY"),
         "api_base": os.environ.get("API_BASE"),
         "timeout_seconds": float(os.environ.get("LLM_TIMEOUT_SECONDS", str(DEFAULT_LLM_TIMEOUT_SECONDS))),
                             tool_arguments,
                             workspace_root=resolved_workspace_root,
                             runtime_deadline=runtime_deadline,
+                            model_name=self.model,
                         )
                     except KeyboardInterrupt:
                         messages = messages[:tool_turn_message_start]
     def custom_call_tool(self, tool_name: str, tool_args: Any, **kwargs):
         return execute_tool_by_name(self.tool_map, tool_name, tool_args, **kwargs)

agent_base/tools/README.md DELETED Viewed

@@ -1,457 +0,0 @@
-# Tools
-This document describes the tool surface exposed to the model. Tool names use PascalCase consistently.
-The current implementation is grouped by category:
-- `agent_base/tools/tool_file.py`
-- `agent_base/tools/tool_runtime.py`
-- `agent_base/tools/tool_user.py`
-- `agent_base/tools/tool_web.py`
-## Overview
-The current tool set is:
-- `Glob`
-- `Grep`
-- `Read`
-- `ReadPDF`
-- `ReadImage`
-- `Write`
-- `Edit`
-- `Bash`
-- `WebSearch`
-- `ScholarSearch`
-- `WebFetch`
-- `AskUser`
-- `TerminalStart`
-- `TerminalWrite`
-- `TerminalRead`
-- `TerminalInterrupt`
-- `TerminalKill`
-## Tool Matrix
-| Tool | Category | Arguments | Description | Return Shape / Notes |
-| --- | --- | --- | --- | --- |
-| `Glob` | Local files | `pattern`, `path?`, `include_dirs?`, `max_results?` | Discover files or directories by pathname pattern inside the workspace. | Returns `root`, `match_count`, `truncated`, and `results`. Best for pathname discovery rather than reading content. |
-| `Grep` | Local files | `pattern`, `path?`, `glob?`, `case_sensitive?`, `max_results?`, `max_chars?` | Search local text files by content and return matching lines. | Returns search metadata plus matched file paths, line numbers, and line text. Skips obvious binary files, images, and PDFs. |
-| `Read` | Local files | `path`, `start_line?`, `end_line?`, `max_chars?` | Read a local text file, optionally by line range. | Returns normalized path, line metadata, truncation status, and `content`. Redirects PDF/image tasks toward `ReadPDF` or `ReadImage`. |
-| `ReadPDF` | Local files | `path`, `max_chars?`, `max_image_paths?` | Read a local PDF, extract text, and expose extracted image paths when available. | Returns text content plus `image_paths` and image-count metadata. Depends on [`structai`](https://github.com/black-yt/structai) and `MINERU_TOKEN`. |
-| `ReadImage` | Local files | `path` | Read a local image and expose image metadata for runtime multimodal use. | Returns image metadata only. During agent runs, the runtime sends a compressed attachment to the LLM API as an `image_url` content part. |
-| `Write` | Local files | `path`, `content`, `overwrite?` | Create a text file or overwrite one when explicitly allowed. | Creates parent directories automatically. Returns an error if the file exists and `overwrite=false`. |
-| `Edit` | Local files | `path`, `patch` | Apply a targeted patch to a local text file. | Expects unified-diff / hunk-style input. Context-based matching, not a full `patch(1)` implementation. |
-| `Bash` | Runtime | `command`, `timeout?`, `workdir?` | Run one-shot shell commands for deterministic local execution, parsing, and validation. | Returns `stdout` and `stderr`. Primary local execution tool for short Python, `rg`, `find`, `git`, and structured local processing. |
-| `WebSearch` | Web | `query` | Perform general web search over one or more complementary queries. | Returns a text summary headed by `## Web Results` with title, link, snippet, and date/source when available. Uses Serper. |
-| `ScholarSearch` | Web | `query` | Search academic results such as papers, year, abstract, and citations. | Returns a text summary headed by `## Scholar Results` with title, PDF link, publication info, year, citation count, and abstract. Uses Serper Scholar. |
-| `WebFetch` | Web | `url`, `goal` | Fetch a page, extract evidence relevant to a concrete goal, and summarize it. | Uses Jina Reader plus the configured summary model. Returns evidence-focused text rather than raw HTML. |
-| `AskUser` | Human interaction | `question`, `context?` | Ask the human user one concise clarification question when essential information cannot be determined from tools or existing instructions. | Writes the question to the interactive terminal and returns the user's answer. If no interactive terminal is available, returns an explicit unavailable message. |
-| `TerminalStart` | Runtime | `cwd?`, `shell?`, `rows?`, `cols?` | Start a persistent terminal session. | Returns session metadata such as `session_id`, `pid`, `cwd`, `shell`, `alive`, and `returncode`. |
-| `TerminalWrite` | Runtime | `session_id`, `input`, `append_newline?`, `yield_time_ms?`, `max_output_chars?` | Send input to a persistent terminal session and read incremental output. | Best for stateful shells, REPLs, and long-running foreground processes. |
-| `TerminalRead` | Runtime | `session_id`, `yield_time_ms?`, `max_output_chars?` | Read unread output from an existing persistent terminal session. | Useful when a process is still running and output arrives over time. |
-| `TerminalInterrupt` | Runtime | `session_id`, `max_output_chars?` | Send `Ctrl-C` to the foreground process in a terminal session without destroying the session. | Use when a long-running process must be interrupted but the shell should remain alive. |
-| `TerminalKill` | Runtime | `session_id`, `force?` | Terminate a persistent terminal session and release resources. | Final cleanup step for terminal sessions that are no longer needed. |
-## Glob
-Purpose:
-- Discover local files or directories by glob pattern.
-- Good for pathname discovery, not for reading file contents.
-Arguments:
-- `pattern`: string, a `pathlib`-style glob such as `**/*.py`
-- `path`: optional string, search root, defaults to the current workspace
-- `include_dirs`: optional boolean, defaults to `false`
-- `max_results`: optional integer, defaults to `200`
-Returns:
-- `root`
-- `pattern`
-- `include_dirs`
-- `match_count`
-- `truncated`
-- `results`
-## Grep
-Purpose:
-- Search local text files by content.
-- Return matched file paths, line numbers, and line text.
-Arguments:
-- `pattern`: string, regular expression
-- `path`: optional string, file or directory path, defaults to the current workspace
-- `glob`: optional string, file filter when scanning a directory, defaults to `**/*`
-- `case_sensitive`: optional boolean, defaults to `false`
-- `max_results`: optional integer, defaults to `100`
-- `max_chars`: optional integer, defaults to `20000`
-Behavior:
-- If `path` is a file, only that file is searched.
-- If `path` is a directory, matching text files are searched recursively.
-- Images, PDFs, and obviously binary files are skipped.
-Returns:
-- `root`
-- `pattern`
-- `glob`
-- `case_sensitive`
-- `files_scanned`
-- `match_count`
-- `truncated`
-- `results`
-## Read
-Purpose:
-- Read a local text file.
-- Support partial line ranges.
-- Support long-text truncation.
-Arguments:
-- `path`: string, file path
-- `start_line`: optional integer, 1-based start line
-- `end_line`: optional integer, 1-based end line
-- `max_chars`: optional integer, maximum returned characters, defaults to `20000`
-Behavior:
-- Only text files are handled directly.
-- If the input is a PDF, the tool tells the model to use `ReadPDF`.
-- If the input is an image, the tool tells the model to use `ReadImage`.
-Returns:
-- `path`
-- `source_type: text`
-- `start_line`
-- `end_line`
-- `total_lines`
-- `truncated`
-- `content`
-## ReadPDF
-Purpose:
-- Read a local PDF.
-- Return extracted text.
-- Return extracted local image paths when the PDF parser produces image assets.
-Arguments:
-- `path`: string, PDF path
-- `max_chars`: optional integer, maximum returned characters, defaults to `20000`
-- `max_image_paths`: optional integer, maximum listed extracted image paths, defaults to `20`
-Behavior:
-- Calls `structai.read_pdf(...)` from [`structai`](https://github.com/black-yt/structai) underneath.
-- Uses the returned `text` and `img_paths`.
-- Depends on `MINERU_TOKEN`.
-- If [`structai`](https://github.com/black-yt/structai) is missing, returns a clear dependency error instead of breaking unrelated file tools.
-- For PDF figure tasks, prefer `ReadPDF` first to discover extracted text and extracted image paths, then use `ReadImage` on the actual extracted image file.
-Returns:
-- `path`
-- `source_type: pdf`
-- `total_lines`
-- `truncated`
-- `image_count`
-- `image_paths_listed`
-- `image_paths_truncated`
-- `image_paths`
-- `content`
-## ReadImage
-Purpose:
-- Read a local image.
-- Return image metadata.
-- During a main agent run, pass a compressed image to the LLM API as an `image_url` content part instead of stuffing raw base64 text into ordinary message text.
-Arguments:
-- `path`: string, image path
-Behavior:
-- Uses `PIL.Image.open(...)` underneath.
-- The runtime creates a compressed JPEG attachment for the LLM request and sends it as an inline `data:` URL in an `image_url` content part.
-- Trace records and direct tool output keep image metadata only, not the full binary payload.
-Returns:
-- `path`
-- `source_type`
-- `format`
-- `mime_type`
-- `mode`
-- `width`
-- `height`
-- `byte_count`
-- `llm_attachment_format`
-- `llm_attachment_width`
-- `llm_attachment_height`
-- `llm_attachment_byte_count`
-## Write
-Purpose:
-- Create a text file.
-- Overwrite an existing file when explicitly requested.
-Arguments:
-- `path`: string, destination file path
-- `content`: string, complete file content
-- `overwrite`: optional boolean, defaults to `false`
-Behavior:
-- Parent directories are created automatically.
-- If `overwrite=false` and the file already exists, the tool returns an error.
-## Edit
-Purpose:
-- Edit a local text file partially.
-- Best for targeted patches, not full-file rewrites.
-Arguments:
-- `path`: string, destination file path
-- `patch`: string, unified-diff / hunk-style patch
-Behavior:
-- Requires explicit hunks such as `@@ -1,2 +1,2 @@`.
-- The current implementation matches by surrounding context blocks rather than implementing full `patch(1)` line-number semantics.
-Returns:
-- updated file path on success
-- applied hunk count
-## Bash
-Purpose:
-- Execute one-shot shell commands.
-- Handle paths, search, git, conda, and local script orchestration.
-- Serve as the primary local execution tool for temporary Python, deterministic computation, validation, formatting, and parsing.
-Arguments:
-- `command`: string, shell command to execute
-- `timeout`: optional integer, seconds, defaults to `30`
-- `workdir`: optional string, working directory
-Behavior:
-- Uses local `bash`.
-- Returns both `stdout` and `stderr`.
-- Timeout produces an explicit error.
-- Short scripts are well suited to a heredoc such as `python3 - <<'PY'`.
-Recommended use cases:
-- pathname and file discovery
-- `rg`, `find`, `git`
-- local Python or other CLI programs
-- deterministic CSV / JSON / text processing
-- local computation and validation against absolute paths returned by file tools
-## WebSearch
-Purpose:
-- General web search.
-- Supports passing multiple complementary queries in one call.
-Arguments:
-- `query`: array of strings, at least one query
-Behavior:
-- Calls Serper's Google Search endpoint.
-- Reads `SERPER_KEY` at runtime.
-Returns:
-- query summary text
-- `## Web Results`
-- title, link, snippet, and date/source when available
-## ScholarSearch
-Purpose:
-- Academic search.
-- Return paper title, year, abstract, citation count, and related metadata.
-Arguments:
-- `query`: array of strings, at least one query
-Behavior:
-- Calls Serper's Google Scholar endpoint.
-- Reads `SERPER_KEY` at runtime.
-Returns:
-- query summary text
-- `## Scholar Results`
-- title, PDF link, `publicationInfo`, year, citation count, and abstract
-## WebFetch
-Purpose:
-- Visit a webpage.
-- Extract evidence relevant to a concrete goal.
-- Produce a goal-oriented summary.
-Arguments:
-- `url`: string or array of strings, page URL or URLs
-- `goal`: string, the specific goal to extract from the page
-Behavior:
-- Fetches page text through Jina Reader first.
-- Then calls the configured summary-model endpoint for evidence extraction and summarization.
-- Returns a fetch-and-extract result, not raw HTML.
-Dependencies:
-- `JINA_KEY`
-- `API_KEY`
-- `API_BASE`
-- `MODEL_NAME`
-Returns:
-- `The useful information in ...`
-- `Evidence in page:`
-- `Summary:`
-## TerminalStart
-Purpose:
-- Start a persistent terminal session.
-Arguments:
-- `cwd`: optional string, working directory
-- `shell`: optional string, shell path
-- `rows`: optional integer, terminal rows, defaults to `30`
-- `cols`: optional integer, terminal columns, defaults to `120`
-Returns:
-- `session_id`
-- `pid`
-- `cwd`
-- `shell`
-- `alive`
-- `returncode`
-## TerminalWrite
-Purpose:
-- Send input to an existing terminal session and read output.
-Arguments:
-- `session_id`: string, session id
-- `input`: string, text to send
-- `append_newline`: optional boolean, defaults to `true`
-- `yield_time_ms`: optional integer, defaults to `200`
-- `max_output_chars`: optional integer, defaults to `20000`
-## TerminalRead
-Purpose:
-- Read unread output from an existing terminal session.
-Arguments:
-- `session_id`: string, session id
-- `yield_time_ms`: optional integer, defaults to `200`
-- `max_output_chars`: optional integer, defaults to `20000`
-## TerminalInterrupt
-Purpose:
-- Send `Ctrl-C` to the foreground process in a terminal session.
-- Keep the session alive.
-Arguments:
-- `session_id`: string, session id
-- `max_output_chars`: optional integer, defaults to `20000`
-## TerminalKill
-Purpose:
-- Terminate a terminal session.
-- Release related resources.
-Arguments:
-- `session_id`: string, session id
-- `force`: optional boolean, defaults to `false`
-## AskUser
-Purpose:
-- Ask the human user for essential missing information, preference, or approval.
-- Use only when the answer cannot be determined from workspace files, available tools, or existing instructions.
-Arguments:
-- `question`: string, concise question to ask.
-- `context`: optional string, brief explanation of why the question is necessary.
-Behavior:
-- Writes the question to the interactive terminal and waits for one user answer.
-- Returns an explicit unavailable message instead of blocking when no interactive terminal exists.
-- Not available in ResearchClawBench runs.
-## Suggested Usage
-- Use `Glob` first for pathname discovery.
-- Use `Grep` first for local text search.
-- Use `Read` for local text files.
-- Use `ReadPDF` for local PDFs.
-- Use `ReadImage` for local images.
-- Use `Edit` for targeted file changes.
-- Use `Write` for full-file writes.
-- Use `Bash` for one-shot system commands.
-- Use `AskUser` only when a human answer is genuinely necessary.
-- Use `Terminal*` only when persistent interactive shell state is actually needed.
-- Route pure Python analysis through `Bash` rather than introducing a separate Python tool.

agent_base/tools/tool_web.py CHANGED Viewed

@@ -373,11 +373,12 @@ class WebFetch(ToolBase):
         except ValueError as exc:
             return f"[WebFetch] {exc}"
         runtime_deadline = kwargs.get("runtime_deadline")
         start_time = time.time()
         if isinstance(url, str):
-            response = self.readpage_jina(url, goal, runtime_deadline=runtime_deadline)
         elif isinstance(url, list):
             response = []
             start_time = time.time()
@@ -396,7 +397,12 @@ class WebFetch(ToolBase):
                     cur_response += "Evidence in page: \n" + "The provided webpage content could not be accessed. Please check the URL or file format." + "\n\n"
                     cur_response += "Summary: \n" + "The webpage content could not be processed, and therefore, no information is available." + "\n\n"
                 else:
-                    cur_response = self.readpage_jina(one_url, goal, runtime_deadline=runtime_deadline)
                 response.append(cur_response)
             response = "\n=======\n".join(response)
         else:
@@ -406,11 +412,18 @@ class WebFetch(ToolBase):
             print(f"Summary Length {len(response)}")
         return response.strip()
-    def call_server(self, msgs, max_retries=2, runtime_deadline: Optional[float] = None):
         client = self._ensure_summary_client()
         if client is None or not self._summary_api_base:
             return "[WebFetch] Summary model error: API_BASE is not set."
-        if not self._summary_model_name:
             return "[WebFetch] Summary model error: MODEL_NAME is not set."
         last_error = "unknown summary-model error"
         for attempt in range(max_retries):
@@ -424,12 +437,12 @@ class WebFetch(ToolBase):
                     else client
                 )
                 request_kwargs = {
-                    "model": self._summary_model_name,
                     "messages": msgs,
                 }
                 apply_sampling_params(
                     request_kwargs,
-                    model_name=self._summary_model_name,
                     temperature=self._summary_temperature,
                     top_p=self._summary_top_p,
                     presence_penalty=self._summary_presence_penalty,
@@ -494,8 +507,21 @@ class WebFetch(ToolBase):
                 return content
         return "[WebFetch] Failed to read page: exhausted retries"
-    def readpage_jina(self, url: str, goal: str, runtime_deadline: Optional[float] = None) -> str:
-        summary_page_func = self.call_server
         max_retries = int(os.getenv("LLM_MAX_RETRIES", str(DEFAULT_LLM_MAX_RETRIES)))
         content = self.html_readpage_jina(url, runtime_deadline=runtime_deadline)

         except ValueError as exc:
             return f"[WebFetch] {exc}"
         runtime_deadline = kwargs.get("runtime_deadline")
+        summary_model_name = str(kwargs.get("model_name") or "").strip()
         start_time = time.time()
         if isinstance(url, str):
+            response = self.readpage_jina(url, goal, runtime_deadline=runtime_deadline, summary_model_name=summary_model_name)
         elif isinstance(url, list):
             response = []
             start_time = time.time()
                     cur_response += "Evidence in page: \n" + "The provided webpage content could not be accessed. Please check the URL or file format." + "\n\n"
                     cur_response += "Summary: \n" + "The webpage content could not be processed, and therefore, no information is available." + "\n\n"
                 else:
+                    cur_response = self.readpage_jina(
+                        one_url,
+                        goal,
+                        runtime_deadline=runtime_deadline,
+                        summary_model_name=summary_model_name,
+                    )
                 response.append(cur_response)
             response = "\n=======\n".join(response)
         else:
             print(f"Summary Length {len(response)}")
         return response.strip()
+    def call_server(
+        self,
+        msgs,
+        max_retries=2,
+        runtime_deadline: Optional[float] = None,
+        model_name: str = "",
+    ):
         client = self._ensure_summary_client()
         if client is None or not self._summary_api_base:
             return "[WebFetch] Summary model error: API_BASE is not set."
+        summary_model_name = str(model_name or self._summary_model_name or os.environ.get("MODEL_NAME", "")).strip()
+        if not summary_model_name:
             return "[WebFetch] Summary model error: MODEL_NAME is not set."
         last_error = "unknown summary-model error"
         for attempt in range(max_retries):
                     else client
                 )
                 request_kwargs = {
+                    "model": summary_model_name,
                     "messages": msgs,
                 }
                 apply_sampling_params(
                     request_kwargs,
+                    model_name=summary_model_name,
                     temperature=self._summary_temperature,
                     top_p=self._summary_top_p,
                     presence_penalty=self._summary_presence_penalty,
                 return content
         return "[WebFetch] Failed to read page: exhausted retries"
+    def readpage_jina(
+        self,
+        url: str,
+        goal: str,
+        runtime_deadline: Optional[float] = None,
+        summary_model_name: str = "",
+    ) -> str:
+        def summary_page_func(messages, max_retries=2, runtime_deadline: Optional[float] = None):
+            return self.call_server(
+                messages,
+                max_retries=max_retries,
+                runtime_deadline=runtime_deadline,
+                model_name=summary_model_name,
+            )
         max_retries = int(os.getenv("LLM_MAX_RETRIES", str(DEFAULT_LLM_MAX_RETRIES)))
         content = self.html_readpage_jina(url, runtime_deadline=runtime_deadline)

agent_base/utils.py CHANGED Viewed

@@ -87,21 +87,6 @@ def require_required_env(context: str = "ResearchHarness") -> None:
     )
-def read_role_prompt_files(paths: Iterable[str]) -> str:
-    blocks: list[str] = []
-    for raw_path in paths:
-        path_text = str(raw_path).strip()
-        if not path_text:
-            continue
-        path = Path(path_text).expanduser()
-        if not path.exists():
-            raise ValueError(f"Role prompt file does not exist: {path}")
-        if not path.is_file():
-            raise ValueError(f"Role prompt path is not a file: {path}")
-        blocks.append(path.read_text(encoding="utf-8").strip())
-    return "\n\n".join(block for block in blocks if block.strip())
 def _safe_image_stem(name: str, fallback: str) -> str:
     stem = re.sub(r"[^A-Za-z0-9_.-]+", "_", Path(name).stem).strip("._")
     return stem or fallback

     )
 def _safe_image_stem(name: str, fallback: str) -> str:
     stem = re.sub(r"[^A-Za-z0-9_.-]+", "_", Path(name).stem).strip("._")
     return stem or fallback

api/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """OpenAI-compatible API helpers for ResearchHarness."""

api/openai_server.py DELETED Viewed

@@ -1,518 +0,0 @@
-from __future__ import annotations
-import base64
-import binascii
-import datetime
-import json
-import re
-import time
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Optional
-from uuid import uuid4
-import uvicorn
-from fastapi import Body, FastAPI, Request
-from fastapi.responses import JSONResponse
-from agent_base.react_agent import (
-    AVAILABLE_TOOL_MAP,
-    MultiTurnReactAgent,
-    assistant_text_content,
-    default_llm_config,
-    model_supports_runtime_image_parts,
-)
-from agent_base.tools.tooling import normalize_workspace_root
-from agent_base.utils import append_jsonl, image_input_content_parts, read_role_prompt_files, safe_jsonable
-DATA_IMAGE_RE = re.compile(r"^data:(image/[A-Za-z0-9.+-]+);base64,(.*)$", re.DOTALL)
-IMAGE_EXTENSIONS = {
-    "image/png": ".png",
-    "image/jpeg": ".jpg",
-    "image/jpg": ".jpg",
-    "image/webp": ".webp",
-    "image/gif": ".gif",
-}
-DEFAULT_MAX_IMAGE_BYTES = 25 * 1024 * 1024
-INPUT_WRAPPER_SYSTEM_PROMPT = """You are the ResearchHarness input wrapper.
-Convert the user's OpenAI-compatible chat request into a stable task for a
-tool-using ResearchHarness agent.
-Return only a JSON object with these string fields:
-- agent_instruction: the task the agent should solve, including all substantive question details.
-- output_contract: the final output format or schema requested by the user. If no strict format is requested, say "plain text".
-- wrapper_notes: brief notes about images, constraints, or benchmark-specific requirements.
-Rules:
-- Do not answer the task.
-- Do not remove substantive constraints.
-- Keep strict final formatting requirements out of agent_instruction when possible.
-- If images are listed, mention their saved paths in agent_instruction.
-"""
-OUTPUT_WRAPPER_SYSTEM_PROMPT = """You are the ResearchHarness output wrapper.
-Format the ResearchHarness agent result so it satisfies the user's requested
-final output contract.
-Rules:
-- Return only the final answer requested by the user.
-- Do not add markdown fences unless the user explicitly required them.
-- Do not solve the task again.
-- Do not introduce facts not present in the agent result.
-- Make the answer complete and self-contained for a remote user or evaluator.
-- The answer may mention workspace files when useful, but it must not depend on
-  local files as the only carrier of the answer.
-- Include the actual answer and any necessary evidence or solution steps in the
-  returned text.
-- If reasoning or evidence is required, summarize it directly in the final
-  answer according to the requested format.
-- If the requested format is JSON, return valid JSON only.
-- If the agent result does not contain enough information, produce the best
-  contract-compliant failure answer instead of inventing evidence.
-"""
-class OpenAICompatError(Exception):
-    def __init__(self, status_code: int, message: str, error_type: str = "invalid_request_error"):
-        super().__init__(message)
-        self.status_code = status_code
-        self.message = message
-        self.error_type = error_type
-@dataclass
-class ServerConfig:
-    api_runs_dir: Path
-    role_prompt: str = ""
-    host: str = "127.0.0.1"
-    port: int = 8686
-    input_wrapper: bool = True
-    output_wrapper: bool = True
-@dataclass
-class PreparedInput:
-    wrapper_messages: list[dict[str, str]]
-    initial_content_parts: list[dict[str, Any]]
-    image_paths: list[str]
-def openai_error_response(exc: OpenAICompatError) -> JSONResponse:
-    return JSONResponse(
-        status_code=exc.status_code,
-        content={"error": {"message": exc.message, "type": exc.error_type}},
-    )
-def make_chat_completion_response(*, request_id: str, model: str, content: str) -> dict[str, Any]:
-    return {
-        "id": request_id,
-        "object": "chat.completion",
-        "created": int(time.time()),
-        "model": model,
-        "choices": [
-            {
-                "index": 0,
-                "message": {"role": "assistant", "content": content},
-                "finish_reason": "stop",
-            }
-        ],
-    }
-def validate_chat_payload(payload: Any) -> dict[str, Any]:
-    if not isinstance(payload, dict):
-        raise OpenAICompatError(400, "Request body must be a JSON object.")
-    if payload.get("stream") is True:
-        raise OpenAICompatError(400, "Streaming is not supported by this synchronous endpoint.")
-    try:
-        n_value = int(payload.get("n", 1) or 1)
-    except (TypeError, ValueError) as exc:
-        raise OpenAICompatError(400, "n must be an integer.") from exc
-    if n_value != 1:
-        raise OpenAICompatError(400, "Only n=1 is supported.")
-    model = str(payload.get("model", "")).strip()
-    if not model:
-        raise OpenAICompatError(400, "model is required.")
-    messages = payload.get("messages")
-    if not isinstance(messages, list) or not messages:
-        raise OpenAICompatError(400, "messages must be a non-empty list.")
-    return payload
-def prepare_openai_input(messages: list[Any], workspace_root: Path) -> PreparedInput:
-    wrapper_messages: list[dict[str, str]] = []
-    initial_content_parts: list[dict[str, Any]] = []
-    image_paths: list[str] = []
-    image_dir = workspace_root / "inputs" / "images"
-    image_index = 0
-    for message in messages:
-        if not isinstance(message, dict):
-            raise OpenAICompatError(400, "Each message must be an object.")
-        role = str(message.get("role", "")).strip()
-        if role not in {"system", "user", "assistant"}:
-            raise OpenAICompatError(400, f"Unsupported message role: {role!r}.")
-        content = message.get("content", "")
-        text_parts: list[str] = []
-        if isinstance(content, str):
-            text_parts.append(content)
-        elif isinstance(content, list):
-            for part in content:
-                if not isinstance(part, dict):
-                    raise OpenAICompatError(400, "Multimodal content parts must be objects.")
-                part_type = str(part.get("type", "")).strip()
-                if part_type == "text":
-                    text_parts.append(str(part.get("text", "")))
-                elif part_type == "image_url":
-                    image_url = part.get("image_url")
-                    if not isinstance(image_url, dict):
-                        raise OpenAICompatError(400, "image_url content must contain an image_url object.")
-                    url = str(image_url.get("url", "")).strip()
-                    detail = str(image_url.get("detail", "auto") or "auto")
-                    rel_path = save_data_image(
-                        url,
-                        workspace_root=workspace_root,
-                        image_dir=image_dir,
-                        image_index=image_index,
-                    )
-                    image_index += 1
-                    image_paths.append(rel_path)
-                    text_parts.append(f"[image saved at {rel_path}]")
-                    initial_content_parts.extend(image_input_content_parts(url, rel_path, detail=detail))
-                else:
-                    raise OpenAICompatError(400, f"Unsupported content part type: {part_type!r}.")
-        else:
-            raise OpenAICompatError(400, "message content must be a string or a list of content parts.")
-        wrapper_messages.append({"role": role, "content": "\n".join(part for part in text_parts if part)})
-    return PreparedInput(
-        wrapper_messages=wrapper_messages,
-        initial_content_parts=initial_content_parts,
-        image_paths=image_paths,
-    )
-def save_data_image(url: str, *, workspace_root: Path, image_dir: Path, image_index: int) -> str:
-    match = DATA_IMAGE_RE.match(url)
-    if not match:
-        raise OpenAICompatError(
-            400,
-            "Only data:image/...;base64,... image_url inputs are supported in the first API version.",
-        )
-    mime_type = match.group(1).lower()
-    extension = IMAGE_EXTENSIONS.get(mime_type)
-    if extension is None:
-        raise OpenAICompatError(400, f"Unsupported image MIME type: {mime_type}.")
-    try:
-        image_bytes = base64.b64decode(match.group(2), validate=True)
-    except (binascii.Error, ValueError) as exc:
-        raise OpenAICompatError(400, "Invalid base64 image data.") from exc
-    if len(image_bytes) > DEFAULT_MAX_IMAGE_BYTES:
-        raise OpenAICompatError(400, f"Image exceeds the {DEFAULT_MAX_IMAGE_BYTES} byte limit.")
-    image_dir.mkdir(parents=True, exist_ok=True)
-    filename = f"image_{image_index:03d}{extension}"
-    path = image_dir / filename
-    path.write_bytes(image_bytes)
-    return path.relative_to(workspace_root).as_posix()
-def wrapper_request_payload(*, prepared: PreparedInput, payload: dict[str, Any]) -> dict[str, Any]:
-    return {
-        "messages": prepared.wrapper_messages,
-        "saved_image_paths": prepared.image_paths,
-        "response_format": safe_jsonable(payload.get("response_format")),
-        "requested_model_label": str(payload.get("model", "")),
-    }
-def build_input_wrapper_messages(*, prepared: PreparedInput, payload: dict[str, Any]) -> list[dict[str, str]]:
-    return [
-        {"role": "system", "content": INPUT_WRAPPER_SYSTEM_PROMPT},
-        {
-            "role": "user",
-            "content": json.dumps(wrapper_request_payload(prepared=prepared, payload=payload), ensure_ascii=False, indent=2),
-        },
-    ]
-def build_passthrough_input_plan(*, prepared: PreparedInput, payload: dict[str, Any]) -> dict[str, str]:
-    conversation = "\n\n".join(
-        f"{message['role'].upper()}:\n{message['content']}" for message in prepared.wrapper_messages
-    ).strip()
-    response_format = payload.get("response_format")
-    output_contract = "Follow the final answer requirements in the original request."
-    if response_format is not None:
-        output_contract += "\nOpenAI response_format request:\n" + json.dumps(
-            safe_jsonable(response_format),
-            ensure_ascii=False,
-            indent=2,
-        )
-    return {
-        "agent_instruction": conversation or "Answer the user's request.",
-        "output_contract": output_contract,
-        "wrapper_notes": "Input wrapper disabled; the original normalized conversation was passed through directly.",
-    }
-def build_agent_prompt(input_plan: dict[str, Any], prepared: PreparedInput) -> str:
-    image_block = "\n".join(f"- {path}" for path in prepared.image_paths) if prepared.image_paths else "- none"
-    return (
-        "You are solving a user request through ResearchHarness.\n\n"
-        "Task for the agent:\n"
-        f"{str(input_plan.get('agent_instruction', '')).strip()}\n\n"
-        "User-provided images saved in this workspace:\n"
-        f"{image_block}\n\n"
-        "The original image content is attached to the initial user message when the backend model supports image parts. "
-        "The same images are also saved at the paths above so you may call ReadImage when visual inspection is needed.\n\n"
-        "Do not optimize your tool-use loop for the final output schema. Solve the task completely, then finish with a complete, "
-        "self-contained internal final text that includes the actual answer, the evidence used, and any concise reasoning needed to understand it. "
-        "You may mention files you created or inspected, but the internal final text must not depend on local files as the only carrier of the answer.\n\n"
-        "Final output contract that will be enforced by a formatter after your run:\n"
-        f"{str(input_plan.get('output_contract', 'plain text')).strip()}\n\n"
-        "Wrapper notes:\n"
-        f"{str(input_plan.get('wrapper_notes', '')).strip()}"
-    )
-def build_output_wrapper_messages(
-    *,
-    prepared: PreparedInput,
-    payload: dict[str, Any],
-    input_plan: dict[str, Any],
-    agent_result_text: str,
-) -> list[dict[str, str]]:
-    output_payload = {
-        "original_messages": prepared.wrapper_messages,
-        "saved_image_paths": prepared.image_paths,
-        "output_contract": str(input_plan.get("output_contract", "plain text")),
-        "response_format": safe_jsonable(payload.get("response_format")),
-        "agent_result_text": agent_result_text,
-    }
-    return [
-        {"role": "system", "content": OUTPUT_WRAPPER_SYSTEM_PROMPT},
-        {"role": "user", "content": json.dumps(output_payload, ensure_ascii=False, indent=2)},
-    ]
-def extract_json_object(text: str) -> dict[str, Any]:
-    stripped = text.strip()
-    if stripped.startswith("```"):
-        stripped = re.sub(r"^```(?:json)?\s*", "", stripped, flags=re.IGNORECASE)
-        stripped = re.sub(r"\s*```$", "", stripped)
-    try:
-        parsed = json.loads(stripped)
-    except json.JSONDecodeError:
-        start = stripped.find("{")
-        end = stripped.rfind("}")
-        if start < 0 or end <= start:
-            raise OpenAICompatError(500, "Input wrapper did not return a JSON object.", "server_error") from None
-        try:
-            parsed = json.loads(stripped[start : end + 1])
-        except json.JSONDecodeError as exc:
-            raise OpenAICompatError(500, f"Input wrapper returned invalid JSON: {exc}", "server_error") from exc
-    if not isinstance(parsed, dict):
-        raise OpenAICompatError(500, "Input wrapper JSON must be an object.", "server_error")
-    if not str(parsed.get("agent_instruction", "")).strip():
-        raise OpenAICompatError(500, "Input wrapper JSON missing agent_instruction.", "server_error")
-    if not str(parsed.get("output_contract", "")).strip():
-        parsed["output_contract"] = "plain text"
-    parsed.setdefault("wrapper_notes", "")
-    return parsed
-def call_wrapper_text(
-    agent: MultiTurnReactAgent,
-    messages: list[dict[str, str]],
-    *,
-    max_output_tokens: Optional[int] = None,
-) -> str:
-    response = agent.call_compaction_api(messages, max_output_tokens=max_output_tokens)
-    if not isinstance(response, dict) or response.get("status") == "error":
-        error_text = response.get("error", "unknown wrapper error") if isinstance(response, dict) else str(response)
-        raise OpenAICompatError(500, error_text, "server_error")
-    text = assistant_text_content(response.get("content")).strip()
-    if not text:
-        raise OpenAICompatError(500, "Wrapper returned empty content.", "server_error")
-    return text
-def final_max_tokens(payload: dict[str, Any]) -> Optional[int]:
-    raw_value = payload.get("max_tokens", payload.get("max_completion_tokens"))
-    if raw_value is None:
-        return None
-    try:
-        value = int(raw_value)
-    except (TypeError, ValueError) as exc:
-        raise OpenAICompatError(400, "max_tokens must be an integer.") from exc
-    if value <= 0:
-        raise OpenAICompatError(400, "max_tokens must be positive.")
-    return value
-def append_api_event(trace_dir: Path, event: str, payload: dict[str, Any]) -> None:
-    append_jsonl(
-        trace_dir / "api_trace.jsonl",
-        {
-            "timestamp": int(time.time()),
-            "event": event,
-            "payload": safe_jsonable(payload),
-        },
-    )
-def run_chat_completion(payload: dict[str, Any], config: ServerConfig) -> dict[str, Any]:
-    payload = validate_chat_payload(payload)
-    request_id = "chatcmpl_" + uuid4().hex
-    run_id = "run_" + datetime.datetime.now().astimezone().strftime("%Y%m%d_%H%M%S") + "_" + uuid4().hex[:8]
-    run_root = config.api_runs_dir / run_id
-    agent_workspace = run_root / "agent_workspace"
-    trace_dir = run_root / "agent_trace"
-    agent_workspace.mkdir(parents=True, exist_ok=False)
-    trace_dir.mkdir(parents=True, exist_ok=False)
-    prepared = prepare_openai_input(payload["messages"], agent_workspace)
-    llm_config = default_llm_config()
-    backend_model = str(llm_config.get("model", ""))
-    if prepared.initial_content_parts and not model_supports_runtime_image_parts(backend_model):
-        raise OpenAICompatError(
-            400,
-            f"Backend model {backend_model!r} does not support image content parts.",
-        )
-    tool_names = [name for name in AVAILABLE_TOOL_MAP if name != "AskUser"]
-    agent = MultiTurnReactAgent(
-        function_list=tool_names,
-        llm=llm_config,
-        trace_dir=str(trace_dir),
-        role_prompt=config.role_prompt or None,
-    )
-    if config.input_wrapper:
-        input_wrapper_messages = build_input_wrapper_messages(prepared=prepared, payload=payload)
-        input_wrapper_text = call_wrapper_text(agent, input_wrapper_messages, max_output_tokens=1200)
-        input_plan = extract_json_object(input_wrapper_text)
-        append_api_event(
-            trace_dir,
-            "input_wrapper",
-            {
-                "enabled": True,
-                "request": input_wrapper_messages,
-                "response_text": input_wrapper_text,
-                "input_plan": input_plan,
-            },
-        )
-    else:
-        input_plan = build_passthrough_input_plan(prepared=prepared, payload=payload)
-        append_api_event(
-            trace_dir,
-            "input_wrapper",
-            {
-                "enabled": False,
-                "input_plan": input_plan,
-            },
-        )
-    agent_prompt = build_agent_prompt(input_plan, prepared)
-    session = agent._run_session(
-        agent_prompt,
-        workspace_root=str(agent_workspace),
-        initial_content_parts=prepared.initial_content_parts or None,
-    )
-    agent_result_text = str(session.get("result_text", "")).strip()
-    append_api_event(
-        trace_dir,
-        "agent_result",
-        {
-            "termination": session.get("termination", ""),
-            "result_text": agent_result_text,
-            "trace_path": session.get("trace_path", ""),
-        },
-    )
-    if config.output_wrapper:
-        output_wrapper_messages = build_output_wrapper_messages(
-            prepared=prepared,
-            payload=payload,
-            input_plan=input_plan,
-            agent_result_text=agent_result_text,
-        )
-        final_text = call_wrapper_text(agent, output_wrapper_messages, max_output_tokens=final_max_tokens(payload))
-        append_api_event(
-            trace_dir,
-            "output_wrapper",
-            {
-                "enabled": True,
-                "request": output_wrapper_messages,
-                "response_text": final_text,
-            },
-        )
-    else:
-        final_text = agent_result_text
-        append_api_event(
-            trace_dir,
-            "output_wrapper",
-            {
-                "enabled": False,
-                "response_text": final_text,
-            },
-        )
-    return make_chat_completion_response(
-        request_id=request_id,
-        model=str(payload.get("model", "researchharness")),
-        content=final_text,
-    )
-def create_app(config: ServerConfig) -> FastAPI:
-    app = FastAPI(title="ResearchHarness OpenAI-Compatible API", version="1.0")
-    @app.exception_handler(OpenAICompatError)
-    async def _handle_openai_compat_error(request: Request, exc: OpenAICompatError) -> JSONResponse:
-        return openai_error_response(exc)
-    @app.get("/v1/health")
-    async def health() -> dict[str, Any]:
-        return {
-            "status": "ok",
-            "api_runs_dir": str(config.api_runs_dir),
-            "input_wrapper": config.input_wrapper,
-            "output_wrapper": config.output_wrapper,
-        }
-    @app.post("/v1/chat/completions")
-    async def chat_completions(payload: dict[str, Any] = Body(...)) -> dict[str, Any]:
-        try:
-            return run_chat_completion(payload, config)
-        except OpenAICompatError:
-            raise
-        except Exception as exc:
-            raise OpenAICompatError(500, f"ResearchHarness API error: {exc}", "server_error") from exc
-    return app
-def serve(
-    *,
-    api_runs_dir: str,
-    host: str = "127.0.0.1",
-    port: int = 8686,
-    role_prompt_files: Optional[list[str]] = None,
-    input_wrapper: bool = True,
-    output_wrapper: bool = True,
-) -> None:
-    root = normalize_workspace_root(api_runs_dir)
-    role_prompt = read_role_prompt_files(role_prompt_files or [])
-    config = ServerConfig(
-        api_runs_dir=root,
-        role_prompt=role_prompt,
-        host=host,
-        port=port,
-        input_wrapper=input_wrapper,
-        output_wrapper=output_wrapper,
-    )
-    app = create_app(config)
-    uvicorn.run(app, host=host, port=port)

api_runs/.gitkeep DELETED Viewed

	@@ -1 +0,0 @@
1	-

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ from pathlib import Path
 import uvicorn
-from agent_base.utils import read_role_prompt_files
 from frontend.local_server import app, configure_frontend
@@ -32,18 +31,9 @@ def _bool_env(name: str, default: bool) -> bool:
     raise ValueError(f"{name} must be a boolean, got {raw!r}")
-def _role_prompt_files() -> list[str]:
-    raw = os.getenv("RH_ROLE_PROMPT_FILES", "").strip()
-    if not raw:
-        return []
-    return [item for item in raw.split(os.pathsep) if item]
 def configure_space() -> None:
     runs_dir = Path(os.getenv("RH_SPACE_RUNS_DIR", "/tmp/researchharness_space/runs")).expanduser()
-    role_prompt = read_role_prompt_files(_role_prompt_files())
     configure_frontend(
-        role_prompt=role_prompt,
         managed_runs_dir=str(runs_dir),
         cleanup_retention_seconds=_int_env("RH_SPACE_RETENTION_SECONDS", 6 * 60 * 60),
         cleanup_max_runs=_int_env("RH_SPACE_MAX_RUNS", 40),

 import uvicorn
 from frontend.local_server import app, configure_frontend
     raise ValueError(f"{name} must be a boolean, got {raw!r}")
 def configure_space() -> None:
     runs_dir = Path(os.getenv("RH_SPACE_RUNS_DIR", "/tmp/researchharness_space/runs")).expanduser()
     configure_frontend(
         managed_runs_dir=str(runs_dir),
         cleanup_retention_seconds=_int_env("RH_SPACE_RETENTION_SECONDS", 6 * 60 * 60),
         cleanup_max_runs=_int_env("RH_SPACE_MAX_RUNS", 40),

benchmarks/QA/README.md DELETED Viewed

@@ -1,102 +0,0 @@
-# QA / VQA Benchmarks
-This directory documents the lightweight ResearchHarness contract for
-question-answering benchmarks, including plain-text QA and multimodal VQA-style
-tasks.
-The recommended integration is the OpenAI-compatible synchronous API server:
-```bash
-python3 /abs/path/to/ResearchHarness/run_server.py \
-  --api-runs-dir ./api_runs
-```
-For QA/VQA benchmark runs, optionally add this benchmark role overlay:
-```bash
-python3 /abs/path/to/ResearchHarness/run_server.py \
-  --api-runs-dir ./api_runs \
-  --role-prompt-file /abs/path/to/ResearchHarness/benchmarks/QA/role_prompt.md
-```
-Each request creates a fresh run directory:
-```text
-./api_runs/
-`-- run_YYYYMMDD_HHMMSS_<random>/
-    |-- agent_workspace/          # visible to the agent
-    |   `-- inputs/
-    |       `-- images/           # user-provided images, when present
-    `-- agent_trace/              # server-side trace and session state
-        |-- api_trace.jsonl
-        |-- trace_*.jsonl
-        `-- _session_state.json
-```
-The input and output LLM wrappers are enabled by default:
-- `--input-wrapper` / `--no-input-wrapper` controls the input normalization pass.
-- `--output-wrapper` / `--no-output-wrapper` controls the final answer formatting pass.
-Strict-format benchmarks should usually keep both wrappers enabled. To return
-the agent's direct final text instead, run:
-```bash
-python3 /abs/path/to/ResearchHarness/run_server.py \
-  --api-runs-dir ./api_runs \
-  --no-input-wrapper \
-  --no-output-wrapper
-```
-External benchmark runners can then use the regular OpenAI SDK with:
-```python
-from openai import OpenAI
-client = OpenAI(api_key="unused", base_url="http://127.0.0.1:8686/v1")
-response = client.chat.completions.create(
-    model="researchharness",
-    messages=[{"role": "user", "content": "Answer the question."}],
-)
-answer = response.choices[0].message.content
-```
-## Multimodal Input
-For image benchmarks, send OpenAI-style content parts. The first API version
-supports one or more `data:image/...;base64,...` URLs in the same request.
-```python
-response = client.chat.completions.create(
-    model="researchharness",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": "What is shown? Return JSON with key answer."},
-                {"type": "image_url", "image_url": {"url": data_url}},
-            ],
-        }
-    ],
-)
-```
-The API saves each submitted image under `agent_workspace/inputs/images/`,
-passes the image content to the first ResearchHarness model call when the
-backend model supports image parts, and includes each saved path in the
-agent-visible text.
-The returned answer should be self-contained for a remote evaluator. Workspace
-files may support the run, but the response should not only say to consult
-`answer.md`, `report.md`, an image file, or another local artifact.
-## Scope
-- The endpoint is synchronous and returns one final text answer.
-- Each request gets a separate workspace subdirectory.
-- The API uses an input wrapper, the ResearchHarness agent, and an output
-  wrapper so strict benchmark output formats do not destabilize the agent loop.
-- Streaming, async run status, artifact download, and remote image fetching are
-  intentionally out of scope for this minimal QA contract.

benchmarks/QA/role_prompt.md DELETED Viewed

@@ -1,31 +0,0 @@
-# Benchmark Role Overlay
-You are running inside ResearchHarness for a QA or VQA benchmark.
-Behavior:
-- Solve the user's task directly and carefully.
-- Use tools only when they materially improve answer quality.
-- If the request includes saved image paths, inspect the image evidence when it
-  is needed for the answer.
-- Do not ask the user follow-up questions.
-- Do not stop with a plan. Produce the answer once enough evidence has been
-  gathered.
-- It is acceptable to explain what evidence was used in the agent's internal
-  final text; a downstream formatter will enforce the benchmark's exact output
-  contract.
-- Assume the remote evaluator only sees the returned text, not your workspace.
-- Your final text must be a complete, independent plain-text answer.
-- Include the actual answer to the original question.
-- Include supporting evidence, calculations, or reasoning steps when they are
-  needed to make the answer understandable.
-- In this benchmark role, do not rely on local workspace files as the answer.
-  Files such as `answer.md`, `report.md`, images, or other artifacts may support
-  your work, but the returned text itself must contain the answer a remote
-  evaluator needs.
-For visual tasks:
-- Prefer the attached image content when it is available in the model input.
-- Use `ReadImage` on saved image paths when additional visual inspection is
-  needed or when the prompt explicitly asks you to inspect local image files.
-- Do not invent visual details that are not supported by the image or tool
-  output.

benchmarks/README.md DELETED Viewed

@@ -1,18 +0,0 @@
-# Benchmarks
-This folder records benchmark-specific integration contracts that live
-**outside** `agent_base` so the core harness stays generic, lightweight, and
-fair across different evaluations.
-| Benchmark | Directory | Tracked contract |
-| --- | --- | --- |
-| ResearchClawBench | `benchmarks/ResearchClawBench/` | `README.md` + `role_prompt.md` + `adapter.py` |
-| QA / VQA-style benchmarks | `benchmarks/QA/` | `README.md` + `role_prompt.md` |
-## Notes
-- `agent_base/` stays focused on the reusable harness runtime.
-- Benchmark-specific prompts, adapters, and integration notes should live under
-  their own benchmark subdirectory.
-- Local benchmark helpers may exist for private experimentation, but they do
-  not define the formal external integration contract.

benchmarks/ResearchClawBench/README.md DELETED Viewed

@@ -1,44 +0,0 @@
-# ResearchClawBench
-This directory contains the tracked files needed to document how `ResearchHarness`
-should be integrated into `ResearchClawBench`.
-ResearchHarness is intended to serve here as a **general and fair execution
-substrate** for tool-using LLM evaluation, while `ResearchClawBench` remains in
-charge of task construction, hidden-answer isolation, and scoring.
-## Recommended `agents.json` Entry
-Use a single direct command that launches the thin top-level ResearchHarness
-entrypoint.
-```json
-{
-  "researchharness": {
-    "label": "ResearchHarness",
-    "icon": "H",
-    "logo": "/static/logos/rh.svg",
-    "cmd": "python3 /abs/path/to/ResearchHarness/run_agent.py <PROMPT> --workspace-root <WORKSPACE> --role-prompt-file /abs/path/to/ResearchHarness/benchmarks/ResearchClawBench/role_prompt.md --trace-dir <WORKSPACE>"
-  }
-}
-```
-## Why This Shape
-- `ResearchClawBench` already prepares the workspace, writes `INSTRUCTIONS.md`,
-  and isolates hidden checklist data.
-- `ResearchHarness` should only execute the agent through a stable harness
-  interface.
-- The command stays unchanged. The entrypoint automatically selects the
-  lightweight adapter in `benchmarks/ResearchClawBench/adapter.py` when this
-  benchmark role prompt is used.
-## Notes
-- Replace `/abs/path/to/ResearchHarness/` with the real local checkout path.
-- The command should stay one-line and non-interactive.
-- The adapter prevents premature termination on long tasks by refusing to accept
-  plain-text completion before `report/report.md` exists in the workspace.
-- The adapter excludes `AskUser`; RCB runs must remain fully non-interactive.
-- Any local batch helpers or ad hoc benchmark scripts should remain untracked
-  and live outside the formal integration contract.

benchmarks/ResearchClawBench/adapter.py DELETED Viewed

@@ -1,93 +0,0 @@
-from __future__ import annotations
-from pathlib import Path
-from typing import Any, Optional, Sequence
-from agent_base.react_agent import AVAILABLE_TOOL_MAP, MultiTurnReactAgent
-from agent_base.tools.tooling import normalize_workspace_root
-class ResearchClawBenchAgent(MultiTurnReactAgent):
-    """
-    Lightweight benchmark adapter for ResearchClawBench.
-    The benchmark task is not complete until the run workspace contains the
-    canonical final report at report/report.md. Pure planning text without that
-    artifact should not terminate the agent loop.
-    """
-    required_report_relpath = Path("report") / "report.md"
-    forbidden_tool_names = {"AskUser"}
-    def __init__(self, function_list: Optional[Sequence[str]] = None, *args: Any, **kwargs: Any):
-        if function_list is None:
-            function_list = [
-                tool_name
-                for tool_name in AVAILABLE_TOOL_MAP
-                if tool_name not in self.forbidden_tool_names
-            ]
-        else:
-            function_list = [str(tool_name).strip() for tool_name in function_list if str(tool_name).strip()]
-            forbidden = sorted(set(function_list) & self.forbidden_tool_names)
-            if forbidden:
-                raise ValueError(f"Tools are not allowed in ResearchClawBench runs: {forbidden}")
-        super().__init__(function_list=list(function_list), *args, **kwargs)
-    def _required_report_path(self, workspace_root: Optional[str]) -> Path:
-        workspace = Path(normalize_workspace_root(workspace_root))
-        return workspace / self.required_report_relpath
-    def should_accept_plaintext_result(
-        self,
-        *,
-        result_text: str,
-        workspace_root: Optional[str],
-        messages: Sequence[dict[str, Any]],
-    ) -> bool:
-        if not self._required_report_path(workspace_root).exists():
-            return False
-        return super().should_accept_plaintext_result(
-            result_text=result_text,
-            workspace_root=workspace_root,
-            messages=messages,
-        )
-    def rejected_plaintext_result_message(
-        self,
-        *,
-        result_text: str,
-        workspace_root: Optional[str],
-        messages: Sequence[dict[str, Any]],
-    ) -> str:
-        if not self._required_report_path(workspace_root).exists():
-            return (
-                "The previous assistant turn was not accepted as the final result because "
-                "ResearchClawBench requires report/report.md and that file is still missing. "
-                "Continue working and use tool calls to produce or verify report/report.md before finishing."
-            )
-        return super().rejected_plaintext_result_message(
-            result_text=result_text,
-            workspace_root=workspace_root,
-            messages=messages,
-        )
-    def should_accept_terminal_error(
-        self,
-        *,
-        error_text: str,
-        workspace_root: Optional[str],
-        messages: Sequence[dict[str, Any]],
-    ) -> bool:
-        return self._required_report_path(workspace_root).exists()
-    def accepted_terminal_error_result_text(
-        self,
-        *,
-        error_text: str,
-        workspace_root: Optional[str],
-        messages: Sequence[dict[str, Any]],
-    ) -> str:
-        return (
-            "ResearchClawBench completion recovered after a terminal LLM/runtime error because "
-            "report/report.md already exists and the required final artifact has been produced."
-        )

benchmarks/ResearchClawBench/role_prompt.md DELETED Viewed

@@ -1,195 +0,0 @@
-# Benchmark Role Overlay
-## Purpose
-You are running inside a benchmark-style scientific evaluation.
-Your job is not just to produce a plausible report. Your job is to produce a
-report whose claims are traceable to concrete artifacts in the workspace and
-whose methods match the task's named scientific commitments as closely as the
-environment allows.
-This benchmark is non-interactive. Do not use `AskUser` or attempt to ask the
-human for clarification. Resolve ambiguity from `INSTRUCTIONS.md`, workspace
-files, related work, and available local or web tools.
-## Method Contract
-- Parse the task into explicit methodological commitments early.
-- Before broad exploration, infer the likely target artifact families required by
-  the task, including:
-  - primary quantitative answers
-  - required comparison tables
-  - expected figure families
-  - interpretability artifacts
-  - subgroup or condition-specific outputs
-- If the task names a framework, protocol, comparison structure,
-  interpretability method, simulator, ablation, posterior treatment,
-  reconciliation step, or validation design, treat that as part of the
-  contract.
-- Do not silently replace an explicitly named method with a looser descriptive
-  analysis.
-- Save a concise contract summary to `outputs/method_contract.json`.
-- Save the inferred target artifact inventory to
-  `outputs/target_artifact_inventory.json`.
-- After reading the most relevant related-work papers, refresh both files if the
-  papers reveal additional named baselines, architectures, figure families,
-  comparison strata, or interpretability artifacts central to the task.
-- Save a concise related-work extraction to `outputs/related_work_contract.json`
-  whenever related work materially changes the contract or artifact inventory.
-## Capability Check
-- Before approximating or skipping a named method, check whether the needed
-  dependency, library, or runtime capability is available.
-- Save the result to `outputs/dependency_check.json`.
-- If a named method cannot be implemented exactly, state the exact limitation
-  and the fallback.
-- If the task centers on a named model family, simulator, architecture, or
-  analysis stack, do not quietly swap to a different family just because it is
-  easier. Either implement a minimally faithful version of the named approach
-  or make the deviation explicit before proceeding.
-## Evidence Discipline
-- Every major scientific claim should have at least one explicit supporting
-  artifact in `outputs/` or `report/images/`.
-- Export the exact tables, matrices, or JSON objects used to create each main
-  figure.
-- Add a dedicated validation subsection to the report that separates:
-  - what was verified directly from workspace data
-  - what came from related work
-  - what remains an assumption or limitation
-- Answer claim-recovery questions claim-by-claim rather than only with a broad
-  narrative.
-- Save a concise claim recovery table before finalizing the report.
-- When the task asks for quantitative constraints, limits, posterior summaries,
-  calibration values, or uncertainty summaries, save those values explicitly in
-  the requested variables and units rather than only through a proxy
-  transformation.
-- If the task ultimately asks for a direct constraint on a named target
-  quantity, prefer deriving and reporting that named quantity itself instead of
-  stopping at an intermediate proxy axis, surrogate scale, or nearby latent
-  variable whenever a defensible derivation is possible from workspace data and
-  related work.
-- If posterior samples are a primary input, report canonical distribution
-  summaries for each primary source, including mean and standard deviation,
-  unless those statistics are mathematically invalid for the variable.
-- If the task names a primary source, cohort, benchmark, or experimental arm,
-  produce at least one source-specific artifact for it before emphasizing only
-  combined or aggregated results.
-- If the task names a direct target quantity, threshold, or decision criterion,
-  export a compact result table that answers it directly before presenting
-  broader supporting analyses.
-## Related Work Use
-- Read `related_work/` early, but bounded.
-- Start with concise or bounded reads when papers are long.
-- Extract only task-relevant facts into notes or structured outputs.
-- If related work contains validation metrics, methodological caveats,
-  baselines, or target comparison axes that matter for the task, incorporate
-  them explicitly.
-- Prefer extracting from related work:
-  - named methods or architectures to reproduce or compare against
-  - target comparison axes and subgroup splits
-  - likely main figure families or panel structures
-  - explicit quantitative targets, thresholds, or calibration outputs
-## Figure And Comparison Fidelity
-- Prefer claim-driven figures over generic exploratory plots.
-- Infer likely figure families and comparison structures from the task and
-  related work.
-- If the task is about projections, calibration, method agreement, subgroup
-  trends, rankings, level-wise comparisons, or ablations, produce figures that
-  directly encode those structures.
-- Keep the main figure set compact: each main figure should support a specific
-  target claim.
-- If the task's core claim is source-specific, dataset-specific, or benchmark-
-  specific, include at least one main figure at that same granularity rather
-  than only a pooled or combined summary figure.
-- If the task implies a named figure family such as ablation curves, PR/ROC
-  curves, parity plots, subgroup heatmaps, saliency maps, architecture
-  diagrams, or level-wise comparisons, prioritize that family over a generic
-  substitute.
-## Group And Condition Preservation
-- If the task names groups, conditions, labs, sexes, environments, shells,
-  depth levels, or other comparison strata, preserve them in at least one
-  exported table or figure.
-- Do not silently collapse mixed categories if the scientific question depends
-  on them.
-- When subgroup structure matters over time, prefer a subgroup-by-time matrix
-  and save it.
-- If the task is a benchmark or model-comparison study across datasets,
-  baselines, cohorts, or conditions, export a compact comparison table with the
-  main metric reported as mean ± standard deviation whenever repeated runs,
-  folds, or stochastic training are part of the setup.
-- For multi-condition or multi-cohort tasks, save at least one artifact at the
-  per-condition granularity before merging across conditions.
-## Named Method Fidelity
-- If the task or related work defines a named mechanism, algorithm, or
-  protocol central to the scientific claim, save a fidelity checklist to
-  `outputs/method_fidelity_checklist.json`.
-- That checklist should capture:
-  - the exact definition
-  - assumptions
-  - invariants
-  - non-negotiable structural steps
-- Use it to verify whether the implemented method actually matches the named
-  mechanism.
-- If you deviate, explain exactly how and why in the report.
-- If the task revolves around a named architecture or protocol, capture the key
-  structural ingredients that distinguish it from nearby alternatives and check
-  them explicitly.
-## Small Sweeps And Ablations
-- If the named mechanism exposes a small discrete design variable, such as
-  levels, layers, stages, shells, bins, or ablation settings, run at least a
-  small sweep unless it is genuinely impossible from the available workspace.
-- If the task names a specific interpretability method such as SHAP,
-  permutation importance, saliency, or similar, produce at least one artifact
-  using that named method.
-- If the task claims improved interpretability, do not stop at aggregate metric
-  gains alone; produce at least one explicit interpretability artifact and tie
-  it back to domain-relevant entities, groups, or substructures named in the
-  task or related work.
-- If the task names multiple groups, labs, cohorts, or environments, prefer an
-  interpretability artifact that compares them directly instead of a single
-  pooled explanation.
-- If interpretability is central and the chosen model family supports a common
-  post hoc explanation method, do not stop at native coefficient or impurity
-  magnitudes alone. Add at least one post hoc explanation artifact such as
-  SHAP, permutation importance, saliency, attention attribution, or a similarly
-  standard method for that model family.
-## Finalization
-- Start `report/report.md` as soon as at least two core result families already
-  have concrete supporting artifacts in `outputs/` or `report/images/`.
-- Prefer an evidence-backed report draft over one more optional script, one
-  more polish pass, or one more non-essential figure.
-- Once the primary quantitative outputs, the main comparison figures, and the
-  core validation artifacts exist, write `report/report.md` immediately.
-- Do not postpone the report in order to chase optional supplementary figures,
-  extra exploratory analyses, or additional polish that is not required to
-  support the task's main claims.
-- Treat optional supplementary work as lower priority than a complete,
-  evidence-backed report. If the report can already answer the task directly,
-  finish the report first and only then consider extras if there is clear
-  remaining need.
-- The final report should be tightly traceable.
-- Important numbers should be reproducible from saved artifacts in the
-  workspace.
-- Do not claim exact reproduction if only a rough approximation was achieved.
-- Before finalizing, check that the report contains direct answers to the main
-  requested outputs in the named variables, units, and confidence language of
-  the task, not only nearby surrogate quantities.
-- Before finalizing, verify that every primary entry in
-  `outputs/target_artifact_inventory.json` is either satisfied by a concrete
-  saved artifact or explicitly marked as unsatisfied with a reason.

docs/tutorial_en.md DELETED Viewed

@@ -1,531 +0,0 @@
-# ResearchHarness Tutorial
-This tutorial explains how to use ResearchHarness from the command line and as
-an OpenAI-compatible API service.
-ResearchHarness is a lightweight, general-purpose harness for tool-using LLM
-agents. It can be used as:
-- a command-line local agent,
-- a fair execution substrate for agent benchmarks,
-- an OpenAI-compatible synchronous API backend,
-- a personal assistant runtime for files, code, reports, PDFs, images, and web tasks.
-## 1. Install
-Clone the repository and install dependencies:
-```bash
-python3 -m pip install -r requirements.txt
-```
-Python 3.10+ is recommended.
-## 2. Configure Environment Variables
-Copy `.env.example` to `.env` and fill in the required values.
-Required variables:
-| Variable | Meaning |
-| --- | --- |
-| `API_KEY` | API key for your OpenAI-compatible LLM provider. |
-| `API_BASE` | Base URL for the OpenAI-compatible chat-completions endpoint. |
-| `MODEL_NAME` | Main model used by ResearchHarness. |
-| `SERPER_KEY` | Serper key for `WebSearch` and `ScholarSearch`: https://serper.dev/ |
-| `JINA_KEY` | Jina key for `WebFetch`: https://jina.ai/ |
-| `MINERU_TOKEN` | MinerU token for `ReadPDF`: https://mineru.net/ |
-Optional variables:
-| Variable | Default | Meaning |
-| --- | --- | --- |
-| `WORKSPACE_ROOT` | `./workspace` | Default workspace root when no explicit workspace is passed. |
-| `MAX_LLM_CALL_PER_RUN` | `100` | Maximum LLM calls in one agent run. |
-| `MAX_AGENT_ROUNDS` | `100` | Maximum ReAct loop rounds. |
-| `MAX_AGENT_RUNTIME_SECONDS` | `9000` | Maximum wall-clock runtime for one agent run. |
-| `LLM_TIMEOUT_SECONDS` | `600` | Timeout for each LLM API request. |
-| `LLM_MAX_OUTPUT_TOKENS` | `10000` | Requested maximum output tokens. |
-| `MAX_INPUT_TOKENS` | `320000` | Input-token budget used by runtime accounting. |
-| `LLM_MAX_RETRIES` | `10` | Maximum retries for transient LLM API errors. |
-| `TEMPERATURE` | `0.6` | Main model temperature. |
-| `TOP_P` | `0.95` | Main model top-p. |
-| `PRESENCE_PENALTY` | `1.1` | Main model presence penalty when supported. |
-| `AUTO_COMPACT_TRIGGER_TOKENS` | `128k` | Context length threshold for automatic compaction. |
-| `IMAGE_PART_TOKEN_ESTIMATE` | `1536` | Token estimate for each image content part. |
-| `LLM_IMAGE_MAX_EDGE` | `1568` | Maximum image edge sent to multimodal models. |
-| `LLM_IMAGE_MAX_BYTES` | `524288` | Maximum compressed image payload size. |
-| `LLM_IMAGE_JPEG_QUALITY` | `85` | Initial JPEG quality for image compression. |
-| `DEBUG_AGENT` | `false` | Verbose agent-loop logs. |
-| `DEBUG_SEARCH` | `false` | Verbose WebSearch logs. |
-| `DEBUG_SCHOLAR` | `false` | Verbose ScholarSearch logs. |
-| `DEBUG_VISIT` | `false` | Verbose WebFetch logs. |
-Before real use, run:
-```bash
-python3 tests/test_tool_availability.py
-```
-All tools should pass. Missing service keys, missing dependencies, exhausted
-credits, or unavailable external tools should be treated as failures.
-If `WebSearch`, `ScholarSearch`, `WebFetch`, or `ReadPDF` fails with network,
-TLS, upload, download, or parsing errors, try disabling VPN/proxy and rerun the
-test.
-## 3. Command-Line Usage
-Run a simple prompt:
-```bash
-python3 run_agent.py "Who proposed the transformer architecture, and in what year was the paper published?"
-```
-Use an explicit workspace:
-```bash
-python3 run_agent.py "Summarize this project." \
-  --workspace-root ./workspace
-```
-You can replace `./workspace` with any other workspace directory.
-Save traces to a directory:
-```bash
-python3 run_agent.py "Summarize this project." \
-  --workspace-root ./workspace \
-  --trace-dir ./traces
-```
-You can replace `./traces` with any other trace directory.
-Without `--trace-dir`, CLI runs do not write a trace file.
-Append a role prompt:
-```bash
-python3 run_agent.py "Answer this QA task." \
-  --workspace-root ./workspace \
-  --role-prompt-file benchmarks/QA/role_prompt.md
-```
-Attach a local image:
-```bash
-python3 run_agent.py "Read the image and return JSON." \
-  --workspace-root ./workspace \
-  --images /path/to/image.png /path/to/second-image.png
-```
-Each image path must exist. RH copies images into `./workspace/inputs/images/`,
-sends them as initial `image_url` content parts, and adds each saved relative
-path to the user text so later rounds can call `ReadImage` on the same files.
-In an interactive terminal, CLI runs continue after a final answer and prompt
-for a follow-up. The follow-up run keeps the prior messages, tool results, and
-saved image path hints. During a running step, `Ctrl+C` interrupts the current
-run at the next safe point and returns to follow-up mode with context preserved.
-Press `Ctrl+C` at the follow-up prompt or send EOF to exit. Use `--no-chat` for
-strict one-shot behavior, or `--chat` to force follow-up mode.
-For browser-based local use, run `python3 run_frontend.py`. The frontend uses an
-existing workspace selected in the page, streams tool steps live, accepts one or
-more image attachments, and continues the current conversation after each final
-answer until you click **New chat**. While running, the send button becomes
-**Stop**; it interrupts at the next safe point and keeps the conversation
-context for the next message.
-### CLI Parameters
-| Parameter | Required | Meaning |
-| --- | --- | --- |
-| positional `prompt` | yes, unless `--prompt-file` is used | Prompt text. |
-| `--prompt-file PATH` | no | Read prompt text from a UTF-8 file. |
-| `--workspace-root PATH` | no | Workspace root for local file tools, Bash, and terminal sessions. Created if missing. |
-| `--trace-dir PATH` | no | Directory where `trace_*.jsonl` is written. |
-| `--role-prompt-file PATH` | no, repeatable | Append role-specific prompt text to the base system prompt. |
-| `--images PATH [PATH ...]` | no | Copy one or more local images into `inputs/images/` and attach them to the initial user message. |
-| `--chat` / `--no-chat` | no | Enable or disable CLI follow-up mode. Default: enabled only when stdin and stdout are interactive terminals. |
-## 4. OpenAI-Compatible API Server
-ResearchHarness can serve a synchronous OpenAI-compatible endpoint:
-```http
-POST /v1/chat/completions
-```
-This allows existing OpenAI SDK clients to call ResearchHarness by changing only
-`base_url`.
-### Start the Server
-Default deployment:
-```bash
-python3 run_server.py \
-  --api-runs-dir ./api_runs \
-  --host 127.0.0.1 \
-  --port 8686
-```
-QA/VQA benchmark deployment with a benchmark role overlay:
-```bash
-python3 run_server.py \
-  --api-runs-dir ./api_runs \
-  --host 127.0.0.1 \
-  --port 8686 \
-  --role-prompt-file benchmarks/QA/role_prompt.md
-```
-### API Server Parameters
-| Parameter | Required | Default | Meaning |
-| --- | --- | --- | --- |
-| `--api-runs-dir PATH` | yes | none | Parent directory for API runs. Each request gets one subdirectory. |
-| `--host HOST` | no | `127.0.0.1` | Host to bind. |
-| `--port PORT` | no | `8686` | Port to bind. |
-| `--role-prompt-file PATH` | no, repeatable | none | Append role prompt text to the base ResearchHarness prompt. |
-| `--input-wrapper` / `--no-input-wrapper` | no | enabled | Enable or disable the input LLM wrapper. |
-| `--output-wrapper` / `--no-output-wrapper` | no | enabled | Enable or disable the output LLM wrapper. |
-### Wrapper Modes
-Both wrappers are enabled by default.
-Strict-format benchmark mode:
-```bash
-python3 run_server.py \
-  --api-runs-dir ./api_runs \
-  --role-prompt-file benchmarks/QA/role_prompt.md \
-  --input-wrapper \
-  --output-wrapper
-```
-Direct agent mode:
-```bash
-python3 run_server.py \
-  --api-runs-dir ./api_runs \
-  --no-input-wrapper \
-  --no-output-wrapper
-```
-Simple input plus strict final formatting:
-```bash
-python3 run_server.py \
-  --api-runs-dir ./api_runs \
-  --no-input-wrapper \
-  --output-wrapper
-```
-The input wrapper rewrites the original user request into a stable task for the
-agent. The output wrapper formats the agent result to match the user's requested
-answer contract. Wrappers must not invent new facts; they only normalize input
-and format output.
-The API server is intentionally one request -> one answer. It does not keep a
-server-side conversation between HTTP requests. If an application needs API
-multi-turn behavior, keep that state in the client and send the needed prior
-context in later requests.
-```mermaid
-flowchart LR
-    U[User Input] --> IW[Input Wrapper LLM]
-    IW --> A[ResearchHarness Agent]
-    A --> OW[Output Wrapper LLM]
-    OW --> O[Output]
-```
-## 5. API Workspace Layout
-Each API request creates one run directory:
-```text
-./api_runs/
-`-- run_YYYYMMDD_HHMMSS_<random>/
-    |-- agent_workspace/
-    |   `-- inputs/
-    |       `-- images/
-    `-- agent_trace/
-        |-- api_trace.jsonl
-        |-- trace_*.jsonl
-        `-- _session_state.json
-```
-Meaning:
-| Path | Meaning |
-| --- | --- |
-| `run_YYYYMMDD_HHMMSS_<random>/` | Per-request run root. |
-| `agent_workspace/` | The only workspace visible to the agent. File tools, Bash, `ls`, and `cat` start here. |
-| `agent_workspace/inputs/images/` | User-provided images saved from API requests. |
-| `agent_trace/` | API trace, agent trace, and runtime records. |
-For multimodal requests, image inputs are handled in two ways at the same time:
-the image content is passed to the backend model as initial multimodal input
-when the selected model supports it, and each image is saved under
-`agent_workspace/inputs/images/`. Each saved relative path is also included in
-the agent-visible text, so later rounds can call `ReadImage` on a stable local
-path without repeatedly resending image bytes.
-This separation keeps user-visible tool work separate from server-side trace files.
-In API deployment mode, traces are saved by default: every request writes
-`api_trace.jsonl`, `trace_*.jsonl`, and `_session_state.json` under that run's `agent_trace/`
-directory.
-## 6. Text Request with OpenAI SDK
-```python
-from openai import OpenAI
-client = OpenAI(api_key="unused", base_url="http://127.0.0.1:8686/v1")
-response = client.chat.completions.create(
-    model="researchharness",
-    messages=[
-        {"role": "user", "content": "Answer in one sentence: what is 2 + 2?"}
-    ],
-)
-print(response.choices[0].message.content)
-```
-## 7. Multimodal Request with OpenAI SDK
-The first API version supports one or more `data:image/...;base64,...` image
-URLs in the same request. Remote image URLs and local file paths are
-intentionally not supported by the API server.
-The example below generates an image in memory and asks for JSON output.
-```python
-import base64
-from io import BytesIO
-from PIL import Image, ImageDraw
-from openai import OpenAI
-image = Image.new("RGB", (320, 120), "white")
-draw = ImageDraw.Draw(image)
-draw.text((40, 45), "7 + 5 = ?", fill="black")
-buffer = BytesIO()
-image.save(buffer, format="PNG")
-data_url = "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode("ascii")
-client = OpenAI(api_key="unused", base_url="http://127.0.0.1:8686/v1")
-response = client.chat.completions.create(
-    model="researchharness",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": (
-                        "The image contains a simple arithmetic expression. "
-                        "Return JSON with exactly two keys: expression and answer."
-                    ),
-                },
-                {"type": "image_url", "image_url": {"url": data_url}},
-            ],
-        }
-    ],
-)
-print(response.choices[0].message.content)
-```
-Expected answer shape:
-```json
-{"expression":"7 + 5","answer":12}
-```
-## 8. API Request and Response Contract
-### `POST /v1/chat/completions`
-Supported request fields:
-| Field | Required | Meaning |
-| --- | --- | --- |
-| `model` | yes | Client-visible model label. It does not override `MODEL_NAME`; the backend model comes from `.env`. |
-| `messages` | yes | OpenAI-style chat messages. |
-| `stream` | no | Must be absent or `false`; streaming is not supported. |
-| `n` | no | Must be absent or `1`. |
-| `max_tokens` | no | Maximum output tokens for the output wrapper. |
-| `max_completion_tokens` | no | Alias accepted for output-wrapper max tokens. |
-| `response_format` | no | Passed to the wrappers as an output-format hint. |
-Supported message roles:
-| Role | Supported |
-| --- | --- |
-| `system` | yes |
-| `user` | yes |
-| `assistant` | yes |
-| `tool` | no |
-Supported content forms:
-```json
-{"role": "user", "content": "plain text"}
-```
-```json
-{
-  "role": "user",
-  "content": [
-    {"type": "text", "text": "question"},
-    {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
-  ]
-}
-```
-Response shape:
-```json
-{
-  "id": "chatcmpl_...",
-  "object": "chat.completion",
-  "created": 1770000000,
-  "model": "researchharness",
-  "choices": [
-    {
-      "index": 0,
-      "message": {
-        "role": "assistant",
-        "content": "final answer"
-      },
-      "finish_reason": "stop"
-    }
-  ]
-}
-```
-Callers usually only need:
-```python
-response.choices[0].message.content
-```
-### `GET /v1/health`
-Returns:
-```json
-{
-  "status": "ok",
-  "api_runs_dir": "./api_runs",
-  "input_wrapper": true,
-  "output_wrapper": true
-}
-```
-## 9. Tool Surface
-ResearchHarness currently includes:
-| Tool | Purpose |
-| --- | --- |
-| `Glob` | Discover files by pattern. |
-| `Grep` | Search text in files. |
-| `Read` | Read text files with bounds. |
-| `ReadPDF` | Parse PDFs with MinerU/structai. |
-| `ReadImage` | Inspect local image files and forward image content to vision-capable models. |
-| `Write` | Write files inside the workspace. |
-| `Edit` | Patch files inside the workspace. |
-| `Bash` | Run shell commands inside the workspace. |
-| `WebSearch` | Web search through Serper. |
-| `ScholarSearch` | Scholar-style search through Serper. |
-| `WebFetch` | Fetch and summarize webpages through Jina and the configured model. |
-| `AskUser` | Ask a human for clarification in interactive runs. Disabled by some benchmark adapters. |
-| `TerminalStart` / `TerminalWrite` / `TerminalRead` / `TerminalInterrupt` / `TerminalKill` | Persistent terminal sessions. |
-## 10. Traces and Records
-CLI runs write traces only when `--trace-dir` is provided. Without
-`--trace-dir`, CLI runs do not write a trace file.
-API runs write traces under:
-```text
-./api_runs/run_.../agent_trace/
-```
-Important files:
-| File | Meaning |
-| --- | --- |
-| `api_trace.jsonl` | Input wrapper, agent result, and output wrapper records. |
-| `trace_*.jsonl` | Flat agent runtime trace. |
-| `_session_state.json` | Current session state, written next to `trace_*.jsonl` when tracing is enabled. |
-The trace stores tool calls, tool results, LLM call capture payloads, compaction
-events, errors, and final termination state.
-## 11. Benchmark Adapters
-Tracked benchmark contracts live under `benchmarks/`.
-Current tracked adapters:
-| Benchmark | Directory | Notes |
-| --- | --- | --- |
-| ResearchClawBench | `benchmarks/ResearchClawBench/` | CLI integration with role prompt and adapter. |
-| QA / VQA | `benchmarks/QA/` | OpenAI-compatible API integration for text and multimodal QA. |
-Benchmark-specific behavior should stay outside `agent_base/`.
-## 12. Testing
-Recommended checks:
-```bash
-python3 tests/test_tool_availability.py
-python3 tests/test_openai_api_checks.py
-python3 tests/test_agent_extension_checks.py
-python3 tests/test_edge_case_checks.py
-python3 tests/test_toolchain_validation.py
-```
-If using conda:
-```bash
-/home/xwh/miniconda3/bin/conda run -n agent python3 tests/test_openai_api_checks.py
-```
-## 13. Troubleshooting
-Common issues:
-| Symptom | Likely cause | Action |
-| --- | --- | --- |
-| Missing required env error | `.env` is incomplete | Fill required variables. |
-| Web/PDF tools fail | VPN/proxy/TLS/service issue | Disable VPN/proxy and rerun tool availability tests. |
-| Image request returns 400 | Image URL is not a `data:image/...;base64,...` URL | Convert the image to a base64 data URL. |
-| Backend model rejects images | Model endpoint is not vision-capable | Use a vision-capable model or send text-only tasks. |
-| API request fails with streaming error | `stream=true` was sent | Use synchronous requests only. |
-| Unexpected output format | Output wrapper disabled or prompt under-specified | Enable `--output-wrapper` and state the desired format clearly. |
-## 14. Current Boundaries
-The first API version intentionally does not include:
-- streaming,
-- async run status,
-- cancellation,
-- artifact download endpoints,
-- remote image URL downloading,
-- user authentication,
-- multi-tenant access control.
-These can be added later as separate layers without changing the core harness
-loop.

docs/tutorial_zh.md DELETED Viewed

@@ -1,511 +0,0 @@
-# ResearchHarness 教程
-本文介绍如何通过命令行和 OpenAI-compatible API 使用 ResearchHarness。
-ResearchHarness 是一个轻量、通用的 tool-using LLM agent harness。它可以作为：
-- 命令行本地 agent，
-- agent benchmark 的公平执行底座，
-- OpenAI-compatible 同步 API 后端，
-- 面向代码、文件、报告、PDF、图片、网页任务的个人助手运行时。
-## 1. 安装
-安装依赖：
-```bash
-python3 -m pip install -r requirements.txt
-```
-推荐使用 Python 3.10+。
-## 2. 配置环境变量
-复制 `.env.example` 为 `.env`，并填写必需变量。
-必需变量：
-| 变量 | 含义 |
-| --- | --- |
-| `API_KEY` | OpenAI-compatible LLM 服务的 API key。 |
-| `API_BASE` | OpenAI-compatible chat-completions endpoint 的 base URL。 |
-| `MODEL_NAME` | ResearchHarness 使用的主模型。 |
-| `SERPER_KEY` | `WebSearch` 和 `ScholarSearch` 使用的 Serper key：https://serper.dev/ |
-| `JINA_KEY` | `WebFetch` 使用的 Jina key：https://jina.ai/ |
-| `MINERU_TOKEN` | `ReadPDF` 使用的 MinerU token：https://mineru.net/ |
-可选变量：
-| 变量 | 默认值 | 含义 |
-| --- | --- | --- |
-| `WORKSPACE_ROOT` | `./workspace` | 未显式传入 workspace 时使用的默认 workspace root。 |
-| `MAX_LLM_CALL_PER_RUN` | `100` | 单次 agent run 最多允许的 LLM 调用次数。 |
-| `MAX_AGENT_ROUNDS` | `100` | ReAct loop 最大轮次。 |
-| `MAX_AGENT_RUNTIME_SECONDS` | `9000` | 单次 agent run 的最大运行秒数。 |
-| `LLM_TIMEOUT_SECONDS` | `600` | 单次 LLM API 请求超时时间。 |
-| `LLM_MAX_OUTPUT_TOKENS` | `10000` | 请求模型输出的最大 token 数。 |
-| `MAX_INPUT_TOKENS` | `320000` | runtime token accounting 使用的输入 token 预算。 |
-| `LLM_MAX_RETRIES` | `10` | 瞬时 LLM API 错误最大重试次数。 |
-| `TEMPERATURE` | `0.6` | 主模型 temperature。 |
-| `TOP_P` | `0.95` | 主模型 top-p。 |
-| `PRESENCE_PENALTY` | `1.1` | provider 支持时使用的 presence penalty。 |
-| `AUTO_COMPACT_TRIGGER_TOKENS` | `128k` | 自动上下文压缩触发阈值。 |
-| `IMAGE_PART_TOKEN_ESTIMATE` | `1536` | 每个 image content part 的 token 估计。 |
-| `LLM_IMAGE_MAX_EDGE` | `1568` | 发送给多模态模型的图片最大边长。 |
-| `LLM_IMAGE_MAX_BYTES` | `524288` | 发送给多模态模型的压缩图片最大字节数。 |
-| `LLM_IMAGE_JPEG_QUALITY` | `85` | 图片压缩时的初始 JPEG 质量。 |
-| `DEBUG_AGENT` | `false` | 打印 agent loop 详细调试日志。 |
-| `DEBUG_SEARCH` | `false` | 打印 WebSearch 调试日志。 |
-| `DEBUG_SCHOLAR` | `false` | 打印 ScholarSearch 调试日志。 |
-| `DEBUG_VISIT` | `false` | 打印 WebFetch 调试日志。 |
-正式使用前，先运行：
-```bash
-python3 tests/test_tool_availability.py
-```
-预期结果是全部工具通过。缺 key、缺依赖、服务额度耗尽、外部工具不可用都应该视为失败，不应 skip。
-如果 `WebSearch`、`ScholarSearch`、`WebFetch` 或 `ReadPDF` 出现 network、TLS、upload、download、PDF parsing 相关错误，优先尝试关闭 VPN / proxy 后重跑测试。
-## 3. 命令行使用
-直接运行一个 prompt：
-```bash
-python3 run_agent.py "Who proposed the transformer architecture, and in what year was the paper published?"
-```
-指定 workspace：
-```bash
-python3 run_agent.py "Summarize this project." \
-  --workspace-root ./workspace
-```
-`./workspace` 可以替换为任何其他 workspace 目录。
-保存 trace：
-```bash
-python3 run_agent.py "Summarize this project." \
-  --workspace-root ./workspace \
-  --trace-dir ./traces
-```
-`./traces` 可以替换为任何其他 trace 目录。
-如果不传 `--trace-dir`，CLI 运行不会写 trace 文件。
-追加 role prompt：
-```bash
-python3 run_agent.py "Answer this QA task." \
-  --workspace-root ./workspace \
-  --role-prompt-file benchmarks/QA/role_prompt.md
-```
-附加本地图片：
-```bash
-python3 run_agent.py "Read the image and return JSON." \
-  --workspace-root ./workspace \
-  --images /path/to/image.png /path/to/second-image.png
-```
-每个图片路径都必须存在。RH 会把图片复制到 `./workspace/inputs/images/`，
-作为初始 `image_url` content part 传给模型，同时把每个保存后的相对路径写进
-用户文本，让后续轮次可以用 `ReadImage` 重新读取这些图片。
-在交互式终端中，CLI 会在最终回答后继续等待 follow-up。下一轮会保留之前的
-messages、工具结果和图片保存路径提示。运行过程中按 `Ctrl+C` 会在下一个安全点
-中断当前 run，并带着上下文回到 follow-up 模式。在 follow-up 输入处按 `Ctrl+C`
-或发送 EOF 可退出。脚本或 benchmark 如果需要严格的一问一答行为，使用
-`--no-chat`；需要强制开启时使用 `--chat`。
-如果需要浏览器本地界面，运行 `python3 run_frontend.py`。前端使用页面中选择的
-已有 workspace，实时显示工具步骤，支持一张或多张图片附件，���在每次最终回答后
-继续当前对话，直到点击 **New chat**。运行中发送按钮会变成 **Stop**；它会在下一个
-安全点中断，并保留上下文用于下一条消息。
-### CLI 参数
-| 参数 | 是否必需 | 含义 |
-| --- | --- | --- |
-| 位置参数 `prompt` | 是，除非使用 `--prompt-file` | prompt 文本。 |
-| `--prompt-file PATH` | 否 | 从 UTF-8 文件读取 prompt。 |
-| `--workspace-root PATH` | 否 | 本地文件工具、Bash、Terminal 使用的 workspace root；不存在会自动创建。 |
-| `--trace-dir PATH` | 否 | 写入 `trace_*.jsonl` 的目录。 |
-| `--role-prompt-file PATH` | 否，可重复 | 追加 role-specific prompt 到 base system prompt。 |
-| `--images PATH [PATH ...]` | 否 | 把一张或多张本地图片复制到 `inputs/images/` 并附加到初始用户消息。 |
-| `--chat` / `--no-chat` | 否 | 开启或关闭 CLI follow-up 模式。默认只在 stdin 和 stdout 都是交互式终端时开启。 |
-## 4. OpenAI-Compatible API Server
-ResearchHarness 可以部署为同步 OpenAI-compatible endpoint：
-```http
-POST /v1/chat/completions
-```
-这样，现有 OpenAI SDK 客户端只需要修改 `base_url` 就可以调用 ResearchHarness。
-### 启动服务
-默认部署：
-```bash
-python3 run_server.py \
-  --api-runs-dir ./api_runs \
-  --host 127.0.0.1 \
-  --port 8686
-```
-QA/VQA benchmark 部署，可以额外加 benchmark role overlay：
-```bash
-python3 run_server.py \
-  --api-runs-dir ./api_runs \
-  --host 127.0.0.1 \
-  --port 8686 \
-  --role-prompt-file benchmarks/QA/role_prompt.md
-```
-### API Server 参数
-| 参数 | 是否必需 | 默认值 | 含义 |
-| --- | --- | --- | --- |
-| `--api-runs-dir PATH` | 是 | 无 | API runs 的父目录；每个请求会创建一个子目录。 |
-| `--host HOST` | 否 | `127.0.0.1` | 服务监听 host。 |
-| `--port PORT` | 否 | `8686` | 服务监听端口。 |
-| `--role-prompt-file PATH` | 否，可重复 | 无 | 追加 role prompt 到 base ResearchHarness prompt。 |
-| `--input-wrapper` / `--no-input-wrapper` | 否 | 开启 | 开启或关闭输入 LLM wrapper。 |
-| `--output-wrapper` / `--no-output-wrapper` | 否 | 开启 | 开启或关闭输出 LLM wrapper。 |
-### Wrapper 模式
-默认两个 wrapper 都开启。
-严格格式 benchmark 模式：
-```bash
-python3 run_server.py \
-  --api-runs-dir ./api_runs \
-  --role-prompt-file benchmarks/QA/role_prompt.md \
-  --input-wrapper \
-  --output-wrapper
-```
-直接 agent 模式：
-```bash
-python3 run_server.py \
-  --api-runs-dir ./api_runs \
-  --no-input-wrapper \
-  --no-output-wrapper
-```
-输入简单但最终答案需要严格格式：
-```bash
-python3 run_server.py \
-  --api-runs-dir ./api_runs \
-  --no-input-wrapper \
-  --output-wrapper
-```
-input wrapper 的作用是把原始用户请求整理为适合 agent 稳定执行的任务。output wrapper 的作用是把 agent 的最终结果整理为用户要求的答案格式。wrapper 不应该引入新事实，只做输入规范化和输出格式化。
-API server 有意保持一问一答：每个 HTTP 请求创建一次隔离 run，并返回一个最终
-assistant message。服务端不会跨请求保存 conversation state。如果应用需要 API
-多轮对话，应由客户端保存状态，并在后续请求中传入需要的上下文。
-```mermaid
-flowchart LR
-    U[User Input] --> IW[Input Wrapper LLM]
-    IW --> A[ResearchHarness Agent]
-    A --> OW[Output Wrapper LLM]
-    OW --> O[Output]
-```
-## 5. API Workspace 结构
-每个 API 请求会创建一个 run 目录：
-```text
-./api_runs/
-`-- run_YYYYMMDD_HHMMSS_<random>/
-    |-- agent_workspace/
-    |   `-- inputs/
-    |       `-- images/
-    `-- agent_trace/
-        |-- api_trace.jsonl
-        |-- trace_*.jsonl
-        `-- _session_state.json
-```
-含义：
-| 路径 | 含义 |
-| --- | --- |
-| `run_YYYYMMDD_HHMMSS_<random>/` | 单个请求对应的 run 根目录。 |
-| `agent_workspace/` | agent 唯一可见的 workspace；文件工具、Bash、`ls`、`cat` 都从这里开始。 |
-| `agent_workspace/inputs/images/` | API 请求中用户提交的图片。 |
-| `agent_trace/` | API trace、agent trace 和 runtime 记录。 |
-对于多模态请求，每张图片会同时走两条路径：当底层模型支持多模态输入时，
-图片内容会作为初始多模态输入直接传给模型；每张图片也会保存到
-`agent_workspace/inputs/images/`。每个保存后的相对路径也会写进 agent 可见文本，
-让后续轮次可以用 `ReadImage` 读取稳定的本地路径，而不是反复依赖内联图片字节。
-这个结构把 agent 可见工作目录和服务端记录目录隔离开。
-在 API 部署模式下，trace 默认保存：每个请求都会在自己的 `agent_trace/`
-目录下写入 `api_trace.jsonl`、`trace_*.jsonl` 和 `_session_state.json`。
-## 6. 纯文本 OpenAI SDK 请求
-```python
-from openai import OpenAI
-client = OpenAI(api_key="unused", base_url="http://127.0.0.1:8686/v1")
-response = client.chat.completions.create(
-    model="researchharness",
-    messages=[
-        {"role": "user", "content": "Answer in one sentence: what is 2 + 2?"}
-    ],
-)
-print(response.choices[0].message.content)
-```
-## 7. 多模态 OpenAI SDK 请求
-第一版 API 支持同一个请求中包含一张或多张 `data:image/...;base64,...` 形式的图片 URL。API server 不支持远程图片 URL，也不支持让外部请求直接传本地文件路径。
-下面的示例在代码中生成一张图片，并要求返回 JSON。
-```python
-import base64
-from io import BytesIO
-from PIL import Image, ImageDraw
-from openai import OpenAI
-image = Image.new("RGB", (320, 120), "white")
-draw = ImageDraw.Draw(image)
-draw.text((40, 45), "7 + 5 = ?", fill="black")
-buffer = BytesIO()
-image.save(buffer, format="PNG")
-data_url = "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode("ascii")
-client = OpenAI(api_key="unused", base_url="http://127.0.0.1:8686/v1")
-response = client.chat.completions.create(
-    model="researchharness",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": (
-                        "The image contains a simple arithmetic expression. "
-                        "Return JSON with exactly two keys: expression and answer."
-                    ),
-                },
-                {"type": "image_url", "image_url": {"url": data_url}},
-            ],
-        }
-    ],
-)
-print(response.choices[0].message.content)
-```
-预期答案形状：
-```json
-{"expression":"7 + 5","answer":12}
-```
-## 8. API 请求与返回协议
-### `POST /v1/chat/completions`
-支持的请求字段：
-| 字段 | 是否必需 | 含义 |
-| --- | --- | --- |
-| `model` | 是 | 客户端看到的 model label；不会覆盖 `.env` 中的 `MODEL_NAME`。 |
-| `messages` | 是 | OpenAI-style chat messages。 |
-| `stream` | 否 | 必须不存在或为 `false`；当前不支持 streaming。 |
-| `n` | 否 | 必须不存在或为 `1`。 |
-| `max_tokens` | 否 | output wrapper 最大输出 token。 |
-| `max_completion_tokens` | 否 | output wrapper 最大输出 token 的兼容别名。 |
-| `response_format` | 否 | 作为输出格式提示传给 wrapper。 |
-支持的 message role：
-| Role | 是否支持 |
-| --- | --- |
-| `system` | 支持 |
-| `user` | 支持 |
-| `assistant` | 支持 |
-| `tool` | 不支持 |
-支持的 content 形式：
-```json
-{"role": "user", "content": "plain text"}
-```
-```json
-{
-  "role": "user",
-  "content": [
-    {"type": "text", "text": "question"},
-    {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
-  ]
-}
-```
-返回结构：
-```json
-{
-  "id": "chatcmpl_...",
-  "object": "chat.completion",
-  "created": 1770000000,
-  "model": "researchharness",
-  "choices": [
-    {
-      "index": 0,
-      "message": {
-        "role": "assistant",
-        "content": "final answer"
-      },
-      "finish_reason": "stop"
-    }
-  ]
-}
-```
-调用方通常只需要读取：
-```python
-response.choices[0].message.content
-```
-### `GET /v1/health`
-返回：
-```json
-{
-  "status": "ok",
-  "api_runs_dir": "./api_runs",
-  "input_wrapper": true,
-  "output_wrapper": true
-}
-```
-## 9. 工具能力
-ResearchHarness 当前包含：
-| 工具 | 用途 |
-| --- | --- |
-| `Glob` | 按模式发现文件。 |
-| `Grep` | 在文件中搜索文本。 |
-| `Read` | 有边界地读取文本文件。 |
-| `ReadPDF` | 通过 MinerU/structai 解析 PDF。 |
-| `ReadImage` | 读取本地图片，并把图片内容传给支持 vision 的模型。 |
-| `Write` | 在 workspace 内写文件。 |
-| `Edit` | 在 workspace 内 patch 文件。 |
-| `Bash` | 在 workspace 内执行 shell 命令。 |
-| `WebSearch` | 通过 Serper 进行网页搜索。 |
-| `ScholarSearch` | 通过 Serper 进行学术搜索。 |
-| `WebFetch` | 通过 Jina 和配置模型抓取、总结网页。 |
-| `AskUser` | 交互式运行中向用户提问；某些 benchmark adapter 会禁用。 |
-| `TerminalStart` / `TerminalWrite` / `TerminalRead` / `TerminalInterrupt` / `TerminalKill` | 持久终端会话。 |
-## 10. Trace 与记录
-CLI 运行只有在传入 `--trace-dir` 时才会写 trace。如果不传
-`--trace-dir`，CLI 运行不会写 trace 文件。
-API 运行时，记录在：
-```text
-./api_runs/run_.../agent_trace/
-```
-重要文件：
-| 文件 | 含义 |
-| --- | --- |
-| `api_trace.jsonl` | input wrapper、agent result、output wrapper 记录。 |
-| `trace_*.jsonl` | agent runtime 的 flat trace。 |
-| `_session_state.json` | 当前 session state；启用 trace 时和 `trace_*.jsonl` 写在同一目录。 |
-trace 会记录工具调用、工具结果、LLM call capture payload、context compaction、错误和终止状态。
-## 11. Benchmark Adapter
-tracked benchmark contract 放在 `benchmarks/` 下。
-当前 tracked adapter：
-| Benchmark | 目录 | 说明 |
-| --- | --- | --- |
-| ResearchClawBench | `benchmarks/ResearchClawBench/` | CLI 方式接入，包含 role prompt 和 adapter。 |
-| QA / VQA | `benchmarks/QA/` | OpenAI-compatible API 方式接入，支持纯文本和多模态 QA。 |
-benchmark-specific 行为应放在 `benchmarks/`，不要塞进 `agent_base/`。
-## 12. 测试
-推荐检查：
-```bash
-python3 tests/test_tool_availability.py
-python3 tests/test_openai_api_checks.py
-python3 tests/test_agent_extension_checks.py
-python3 tests/test_edge_case_checks.py
-python3 tests/test_toolchain_validation.py
-```
-如果使用 conda：
-```bash
-/home/xwh/miniconda3/bin/conda run -n agent python3 tests/test_openai_api_checks.py
-```
-## 13. 排障
-常见问题：
-| 现象 | 可能原因 | 处理 |
-| --- | --- | --- |
-| 缺少 required env | `.env` 不完整 | 填写所有必需变量。 |
-| Web/PDF 工具失败 | VPN/proxy/TLS/服务问题 | 关闭 VPN/proxy 后重跑工具可用性测试。 |
-| 图片请求返回 400 | 图片不是 `data:image/...;base64,...` | 把图片转成 base64 data URL。 |
-| 后端模型拒绝图片 | 当前模型 endpoint 不支持 vision | 换用支持 vision 的模型，或改为纯文本任务。 |
-| API 报 streaming 错误 | 请求里传了 `stream=true` | 当前只支持同步请求。 |
-| 输出格式不符合预期 | output wrapper 关闭，或用户格式要求不明确 | 开启 `--output-wrapper`，并清楚说明输出格式。 |
-## 14. 当前边界
-第一版 API 暂不包括：
-- streaming，
-- async run status，
-- cancellation，
-- artifact download endpoint，
-- 远程图片 URL 下载，
-- 用户认证，
-- 多租户访问控制。
-这些能力以后可以作为外层服务继续扩展，不需要破坏核心 harness loop。

frontend/local_server.py CHANGED Viewed

@@ -4,7 +4,6 @@ import asyncio
 import base64
 import datetime as _dt
 import json
-import os
 import re
 import shutil
 import threading
@@ -16,7 +15,7 @@ from typing import Any
 from uuid import uuid4
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
-from fastapi.responses import FileResponse, JSONResponse
 from fastapi.staticfiles import StaticFiles
 from agent_base.react_agent import MultiTurnReactAgent, default_llm_config
@@ -35,9 +34,6 @@ from agent_base.utils import (
 STATIC_DIR = Path(__file__).resolve().parent / "static"
 MAX_UPLOAD_IMAGES = 12
 MAX_IMAGE_BYTES = 12 * 1024 * 1024
-MAX_DIRECTORY_ENTRIES = 800
-FRONTEND_ROLE_PROMPT = ""
-FRONTEND_TRACE_DIR: str | None = None
 FRONTEND_MANAGED_RUNS_DIR: str | None = None
 FRONTEND_CLEANUP_RETENTION_SECONDS = 6 * 60 * 60
 FRONTEND_CLEANUP_MAX_RUNS = 40
@@ -52,14 +48,12 @@ _ACTIVE_MANAGED_RUNS_LOCK = threading.Lock()
 _COLLECTION_LOCK = threading.Lock()
 _COLLECTION_CONFIG_WARNED: set[str] = set()
-app = FastAPI(title="ResearchHarness Local UI")
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="frontend-static")
 def configure_frontend(
     *,
-    role_prompt: str = "",
-    trace_dir: str | None = None,
     managed_runs_dir: str | None = None,
     cleanup_retention_seconds: int | None = None,
     cleanup_max_runs: int | None = None,
@@ -69,11 +63,10 @@ def configure_frontend(
     collection_batch_size: int | None = None,
     collection_max_bundle_bytes: int | None = None,
 ) -> None:
-    global FRONTEND_ROLE_PROMPT, FRONTEND_TRACE_DIR, FRONTEND_MANAGED_RUNS_DIR
     global FRONTEND_CLEANUP_RETENTION_SECONDS, FRONTEND_CLEANUP_MAX_RUNS, FRONTEND_CLEANUP_INTERVAL_SECONDS
     global FRONTEND_COLLECTION_ENABLED, FRONTEND_COLLECTION_DATASET_REPO
     global FRONTEND_COLLECTION_BATCH_SIZE, FRONTEND_COLLECTION_MAX_BUNDLE_BYTES
-    FRONTEND_ROLE_PROMPT = str(role_prompt or "").strip()
     if collection_enabled is not None:
         FRONTEND_COLLECTION_ENABLED = bool(collection_enabled)
     if collection_dataset_repo is not None:
@@ -82,32 +75,22 @@ def configure_frontend(
         FRONTEND_COLLECTION_BATCH_SIZE = max(1, int(collection_batch_size))
     if collection_max_bundle_bytes is not None:
         FRONTEND_COLLECTION_MAX_BUNDLE_BYTES = max(1, int(collection_max_bundle_bytes))
-    if trace_dir:
-        path = Path(trace_dir).expanduser()
-        if path.exists() and not path.is_dir():
-            raise ValueError(f"trace-dir is not a directory: {path}")
-        path.mkdir(parents=True, exist_ok=True)
-        FRONTEND_TRACE_DIR = str(path)
-    else:
-        FRONTEND_TRACE_DIR = None
-    if managed_runs_dir:
-        path = Path(managed_runs_dir).expanduser()
-        if path.exists() and not path.is_dir():
-            raise ValueError(f"managed-runs-dir is not a directory: {path}")
-        path.mkdir(parents=True, exist_ok=True)
-        FRONTEND_MANAGED_RUNS_DIR = str(path)
-        if cleanup_retention_seconds is not None:
-            FRONTEND_CLEANUP_RETENTION_SECONDS = max(60, int(cleanup_retention_seconds))
-        if cleanup_max_runs is not None:
-            FRONTEND_CLEANUP_MAX_RUNS = max(1, int(cleanup_max_runs))
-        if cleanup_interval_seconds is not None:
-            FRONTEND_CLEANUP_INTERVAL_SECONDS = max(60, int(cleanup_interval_seconds))
-        _collection_root()
-        cleanup_managed_runs_once()
-        _start_managed_cleanup_thread()
-    else:
-        FRONTEND_MANAGED_RUNS_DIR = None
 class FrontendRunBridge:
@@ -543,26 +526,25 @@ def _run_agent_thread(
     prompt: str,
     workspace_root: Path,
     initial_content_parts: list[dict[str, Any]],
-    trace_dir: str | None = None,
     prior_messages: list[dict[str, Any]] | None = None,
     managed_run_root: str = "",
 ) -> None:
     try:
         load_dotenv(PROJECT_ROOT / ".env")
         require_required_env("ResearchHarness frontend")
-        effective_trace_dir = trace_dir if trace_dir is not None else FRONTEND_TRACE_DIR
         agent = FrontendInteractiveAgent(
             bridge=bridge,
-            llm=default_llm_config(),
-            trace_dir=effective_trace_dir,
-            role_prompt=FRONTEND_ROLE_PROMPT or None,
         )
         bridge.send(
             {
                 "type": "run_started",
                 "model": agent.model,
                 "workspace_root": str(workspace_root),
-                "trace_dir": effective_trace_dir or "",
             }
         )
         result = agent._run_session(
@@ -590,98 +572,6 @@ def _run_agent_thread(
         bridge.send({"type": "run_error", "error": str(exc), "traceback": traceback.format_exc()})
-def _resolve_existing_workspace(raw_path: str) -> Path:
-    if not str(raw_path or "").strip():
-        raise ValueError("workspace path is required")
-    path = Path(raw_path).expanduser()
-    if not path.is_absolute():
-        path = (Path.cwd() / path).resolve()
-    else:
-        path = path.resolve()
-    if not path.exists() or not path.is_dir():
-        raise ValueError(f"workspace must be an existing directory: {path}")
-    return path
-def _resolve_directory_browser_path(raw_path: str = "") -> Path:
-    text = str(raw_path or "").strip()
-    if text:
-        path = Path(text).expanduser()
-    else:
-        path = Path.home() if Path.home().exists() else PROJECT_ROOT
-    if not path.is_absolute():
-        path = (Path.cwd() / path).resolve()
-    else:
-        path = path.resolve()
-    if not path.exists() or not path.is_dir():
-        raise ValueError(f"directory does not exist: {path}")
-    return path
-def _directory_root_choices() -> list[dict[str, str]]:
-    candidates = [Path.home(), PROJECT_ROOT, PROJECT_ROOT / "workspace", Path.cwd(), Path("/mnt"), Path("/")]
-    if os.name == "nt":
-        for letter in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
-            candidates.append(Path(f"{letter}:\\"))
-    seen: set[str] = set()
-    roots: list[dict[str, str]] = []
-    for candidate in candidates:
-        try:
-            resolved = candidate.expanduser().resolve()
-        except (OSError, RuntimeError):
-            continue
-        if not resolved.exists() or not resolved.is_dir():
-            continue
-        key = str(resolved)
-        if key in seen:
-            continue
-        seen.add(key)
-        label = "Home" if resolved == Path.home().resolve() else (resolved.name or key)
-        roots.append({"label": label, "path": key})
-    return roots
-def _workspace_directory_payload(raw_path: str = "") -> dict[str, Any]:
-    directory = _resolve_directory_browser_path(raw_path)
-    entries: list[dict[str, str]] = []
-    truncated = False
-    try:
-        children = sorted(directory.iterdir(), key=lambda item: item.name.casefold())
-    except PermissionError as exc:
-        raise ValueError(f"permission denied: {directory}") from exc
-    except OSError as exc:
-        raise ValueError(f"cannot read directory {directory}: {exc}") from exc
-    for child in children:
-        if len(entries) >= MAX_DIRECTORY_ENTRIES:
-            truncated = True
-            break
-        try:
-            if not child.is_dir():
-                continue
-        except OSError:
-            continue
-        entries.append({"name": child.name or str(child), "path": str(child)})
-    parent = directory.parent if directory.parent != directory else None
-    return {
-        "path": str(directory),
-        "parent": str(parent) if parent else "",
-        "entries": entries,
-        "truncated": truncated,
-        "roots": _directory_root_choices(),
-    }
-@app.get("/api/workspace-directories")
-def workspace_directories(path: str = "") -> JSONResponse:
-    try:
-        return JSONResponse(_workspace_directory_payload(path))
-    except ValueError as exc:
-        return JSONResponse({"error": str(exc)}, status_code=400)
 @app.get("/")
 def index() -> FileResponse:
     return FileResponse(STATIC_DIR / "index.html")
@@ -705,7 +595,7 @@ async def websocket_endpoint(websocket: WebSocket) -> None:
     sender_task = asyncio.create_task(sender())
     try:
-        await websocket.send_json({"type": "ready", "managed_workspace": bool(FRONTEND_MANAGED_RUNS_DIR)})
         while True:
             message = await websocket.receive_json()
             message_type = str(message.get("type", "")).strip()
@@ -719,30 +609,18 @@ async def websocket_endpoint(websocket: WebSocket) -> None:
                     continue
                 try:
                     continue_conversation = bool(message.get("continue_conversation"))
                     prior_messages = None
-                    effective_trace_dir = FRONTEND_TRACE_DIR
-                    if FRONTEND_MANAGED_RUNS_DIR:
-                        if continue_conversation:
-                            if not bridge.conversation_messages or not bridge.managed_workspace_root:
-                                bridge.send({"type": "run_error", "error": "No active conversation is available on the server. Click New chat and start again."})
-                                continue
-                            workspace_root = Path(bridge.managed_workspace_root)
-                            effective_trace_dir = bridge.managed_trace_dir or FRONTEND_TRACE_DIR
-                            prior_messages = bridge.conversation_messages
-                        else:
-                            _release_managed_run(bridge)
-                            workspace_root, effective_trace_dir = _create_managed_run(bridge)
                     else:
-                        workspace_root = _resolve_existing_workspace(str(message.get("workspace_root", "")))
-                        if continue_conversation:
-                            if not bridge.conversation_messages:
-                                bridge.send({"type": "run_error", "error": "No active conversation is available on the server. Click New chat and start again."})
-                                continue
-                            elif bridge.conversation_workspace_root and bridge.conversation_workspace_root != str(workspace_root):
-                                bridge.send({"type": "run_error", "error": "Workspace changed. Start a new chat before using a different workspace."})
-                                continue
-                            else:
-                                prior_messages = bridge.conversation_messages
                     image_parts, saved_paths = save_uploaded_images(
                         workspace_root,
                         message.get("images", []) if isinstance(message.get("images", []), list) else [],
@@ -768,6 +646,7 @@ async def websocket_endpoint(websocket: WebSocket) -> None:
                         "trace_dir": effective_trace_dir,
                         "prior_messages": prior_messages,
                         "managed_run_root": bridge.managed_run_root,
                     },
                     daemon=True,
                 )

 import base64
 import datetime as _dt
 import json
 import re
 import shutil
 import threading
 from uuid import uuid4
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
 from agent_base.react_agent import MultiTurnReactAgent, default_llm_config
 STATIC_DIR = Path(__file__).resolve().parent / "static"
 MAX_UPLOAD_IMAGES = 12
 MAX_IMAGE_BYTES = 12 * 1024 * 1024
 FRONTEND_MANAGED_RUNS_DIR: str | None = None
 FRONTEND_CLEANUP_RETENTION_SECONDS = 6 * 60 * 60
 FRONTEND_CLEANUP_MAX_RUNS = 40
 _COLLECTION_LOCK = threading.Lock()
 _COLLECTION_CONFIG_WARNED: set[str] = set()
+app = FastAPI(title="ResearchHarness Space UI")
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="frontend-static")
 def configure_frontend(
     *,
     managed_runs_dir: str | None = None,
     cleanup_retention_seconds: int | None = None,
     cleanup_max_runs: int | None = None,
     collection_batch_size: int | None = None,
     collection_max_bundle_bytes: int | None = None,
 ) -> None:
+    global FRONTEND_MANAGED_RUNS_DIR
     global FRONTEND_CLEANUP_RETENTION_SECONDS, FRONTEND_CLEANUP_MAX_RUNS, FRONTEND_CLEANUP_INTERVAL_SECONDS
     global FRONTEND_COLLECTION_ENABLED, FRONTEND_COLLECTION_DATASET_REPO
     global FRONTEND_COLLECTION_BATCH_SIZE, FRONTEND_COLLECTION_MAX_BUNDLE_BYTES
     if collection_enabled is not None:
         FRONTEND_COLLECTION_ENABLED = bool(collection_enabled)
     if collection_dataset_repo is not None:
         FRONTEND_COLLECTION_BATCH_SIZE = max(1, int(collection_batch_size))
     if collection_max_bundle_bytes is not None:
         FRONTEND_COLLECTION_MAX_BUNDLE_BYTES = max(1, int(collection_max_bundle_bytes))
+    if not managed_runs_dir:
+        raise ValueError("managed_runs_dir is required for the Space frontend")
+    path = Path(managed_runs_dir).expanduser()
+    if path.exists() and not path.is_dir():
+        raise ValueError(f"managed-runs-dir is not a directory: {path}")
+    path.mkdir(parents=True, exist_ok=True)
+    FRONTEND_MANAGED_RUNS_DIR = str(path)
+    if cleanup_retention_seconds is not None:
+        FRONTEND_CLEANUP_RETENTION_SECONDS = max(60, int(cleanup_retention_seconds))
+    if cleanup_max_runs is not None:
+        FRONTEND_CLEANUP_MAX_RUNS = max(1, int(cleanup_max_runs))
+    if cleanup_interval_seconds is not None:
+        FRONTEND_CLEANUP_INTERVAL_SECONDS = max(60, int(cleanup_interval_seconds))
+    _collection_root()
+    cleanup_managed_runs_once()
+    _start_managed_cleanup_thread()
 class FrontendRunBridge:
     prompt: str,
     workspace_root: Path,
     initial_content_parts: list[dict[str, Any]],
+    trace_dir: str,
     prior_messages: list[dict[str, Any]] | None = None,
     managed_run_root: str = "",
+    model_name: str = "",
 ) -> None:
     try:
         load_dotenv(PROJECT_ROOT / ".env")
         require_required_env("ResearchHarness frontend")
         agent = FrontendInteractiveAgent(
             bridge=bridge,
+            llm=default_llm_config(model_name=model_name or None),
+            trace_dir=trace_dir,
         )
         bridge.send(
             {
                 "type": "run_started",
                 "model": agent.model,
                 "workspace_root": str(workspace_root),
+                "trace_dir": trace_dir,
             }
         )
         result = agent._run_session(
         bridge.send({"type": "run_error", "error": str(exc), "traceback": traceback.format_exc()})
 @app.get("/")
 def index() -> FileResponse:
     return FileResponse(STATIC_DIR / "index.html")
     sender_task = asyncio.create_task(sender())
     try:
+        await websocket.send_json({"type": "ready", "managed_workspace": True})
         while True:
             message = await websocket.receive_json()
             message_type = str(message.get("type", "")).strip()
                     continue
                 try:
                     continue_conversation = bool(message.get("continue_conversation"))
+                    model_name = str(message.get("model_name", "") or "").strip()
                     prior_messages = None
+                    if continue_conversation:
+                        if not bridge.conversation_messages or not bridge.managed_workspace_root:
+                            bridge.send({"type": "run_error", "error": "No active conversation is available on the server. Click New chat and start again."})
+                            continue
+                        workspace_root = Path(bridge.managed_workspace_root)
+                        effective_trace_dir = bridge.managed_trace_dir
+                        prior_messages = bridge.conversation_messages
                     else:
+                        _release_managed_run(bridge)
+                        workspace_root, effective_trace_dir = _create_managed_run(bridge)
                     image_parts, saved_paths = save_uploaded_images(
                         workspace_root,
                         message.get("images", []) if isinstance(message.get("images", []), list) else [],
                         "trace_dir": effective_trace_dir,
                         "prior_messages": prior_messages,
                         "managed_run_root": bridge.managed_run_root,
+                        "model_name": model_name,
                     },
                     daemon=True,
                 )

frontend/static/app.css CHANGED Viewed

@@ -201,7 +201,8 @@ button {
 .plain,
 .send-button,
-.icon-button {
   border: 1px solid var(--border);
   border-radius: 999px;
   background: var(--panel-strong);
@@ -214,12 +215,24 @@ button {
   padding: 8px 12px;
 }
 .plain:hover,
-.icon-button:hover {
   border-color: rgba(var(--glow-rgb), 0.38);
   transform: translateY(-1px);
 }
 .workspace-strip {
   position: sticky;
   top: 66px;
@@ -266,13 +279,11 @@ button {
   -webkit-overflow-scrolling: touch;
 }
-.messages::-webkit-scrollbar,
-.workspace-list::-webkit-scrollbar {
   width: 10px;
 }
-.messages::-webkit-scrollbar-thumb,
-.workspace-list::-webkit-scrollbar-thumb {
   border: 3px solid transparent;
   border-radius: 999px;
   background: rgba(var(--glow-rgb), 0.24);
@@ -696,172 +707,6 @@ button:disabled {
   text-align: center;
 }
-.modal {
-  position: fixed;
-  inset: 0;
-  z-index: 30;
-  display: grid;
-  place-items: center;
-  padding: 18px;
-  background: rgba(0, 0, 0, 0.24);
-  backdrop-filter: blur(14px);
-}
-.modal.hidden {
-  display: none;
-}
-.modal-card {
-  display: grid;
-  grid-template-rows: auto auto auto minmax(0, 1fr) auto;
-  gap: 12px;
-  width: min(780px, 100%);
-  max-height: min(760px, 82vh);
-  border: 1px solid var(--border);
-  border-radius: 28px;
-  background: var(--panel-strong);
-  box-shadow: 0 24px 88px rgba(0, 0, 0, 0.22);
-  padding: 18px;
-}
-.modal-head,
-.modal-path-row,
-.modal-actions {
-  display: flex;
-  align-items: center;
-  gap: 12px;
-}
-.modal-head {
-  justify-content: space-between;
-}
-.modal-head h2,
-.modal-head p {
-  margin: 0;
-}
-.modal-head h2 {
-  font-size: 1.18rem;
-  letter-spacing: -0.025em;
-}
-.modal-head p,
-.modal-actions span {
-  color: var(--muted);
-  font-size: 0.86rem;
-}
-.modal-path-row {
-  border: 1px solid var(--border);
-  border-radius: 18px;
-  background: var(--hover);
-  padding: 8px;
-}
-.modal-path-row input {
-  min-width: 0;
-  flex: 1;
-  border: 0;
-  outline: 0;
-  background: transparent;
-  color: var(--text);
-}
-.workspace-roots {
-  display: flex;
-  flex-wrap: wrap;
-  gap: 8px;
-}
-.root-chip {
-  max-width: 190px;
-  overflow: hidden;
-  border: 1px solid var(--border);
-  border-radius: 999px;
-  background: var(--panel);
-  color: var(--text);
-  font-weight: 800;
-  padding: 7px 11px;
-  text-overflow: ellipsis;
-  white-space: nowrap;
-}
-.workspace-list {
-  display: grid;
-  align-content: start;
-  gap: 7px;
-  min-height: 0;
-  overflow: auto;
-  padding-right: 4px;
-}
-.dir-row {
-  display: grid;
-  grid-template-columns: auto minmax(0, 1fr) auto;
-  align-items: center;
-  gap: 10px;
-  width: 100%;
-  border: 1px solid var(--border);
-  border-radius: 18px;
-  background: var(--panel);
-  color: var(--text);
-  padding: 10px 12px;
-  text-align: left;
-}
-.dir-row:hover,
-.root-chip:hover {
-  border-color: rgba(var(--glow-rgb), 0.38);
-  background: var(--hover);
-}
-.dir-icon {
-  display: grid;
-  place-items: center;
-  width: 24px;
-  height: 24px;
-  border-radius: 50%;
-  background: rgba(var(--glow-rgb), 0.1);
-  font-weight: 900;
-}
-.dir-main {
-  min-width: 0;
-}
-.dir-main strong,
-.dir-main small {
-  display: block;
-  overflow: hidden;
-  text-overflow: ellipsis;
-  white-space: nowrap;
-}
-.dir-main small {
-  margin-top: 2px;
-  color: var(--muted);
-  font-size: 0.78rem;
-}
-.dir-action {
-  color: var(--muted);
-  font-size: 0.76rem;
-  font-weight: 850;
-}
-.dir-empty {
-  border: 1px dashed var(--border);
-  border-radius: 18px;
-  padding: 18px;
-  color: var(--muted);
-  text-align: center;
-}
-.modal-actions {
-  justify-content: space-between;
-}
 #theme-switcher {
   position: fixed;
   right: 22px;
@@ -984,22 +829,6 @@ button:disabled {
     max-width: none;
   }
-  .modal-card {
-    max-height: 88vh;
-    padding: 14px;
-  }
-  .modal-head,
-  .modal-actions {
-    align-items: stretch;
-    flex-direction: column;
-  }
-  .modal-path-row {
-    align-items: stretch;
-    flex-direction: column;
-  }
   .message,
   .event {
     max-width: 96%;

 .plain,
 .send-button,
+.icon-button,
+.model-select {
   border: 1px solid var(--border);
   border-radius: 999px;
   background: var(--panel-strong);
   padding: 8px 12px;
 }
+.model-select {
+  min-width: 150px;
+  padding: 8px 34px 8px 12px;
+}
 .plain:hover,
+.icon-button:hover,
+.model-select:hover:not(:disabled),
+.model-select:focus-visible {
   border-color: rgba(var(--glow-rgb), 0.38);
   transform: translateY(-1px);
 }
+.model-select:disabled {
+  cursor: not-allowed;
+  opacity: 0.58;
+}
 .workspace-strip {
   position: sticky;
   top: 66px;
   -webkit-overflow-scrolling: touch;
 }
+.messages::-webkit-scrollbar {
   width: 10px;
 }
+.messages::-webkit-scrollbar-thumb {
   border: 3px solid transparent;
   border-radius: 999px;
   background: rgba(var(--glow-rgb), 0.24);
   text-align: center;
 }
 #theme-switcher {
   position: fixed;
   right: 22px;
     max-width: none;
   }
   .message,
   .event {
     max-width: 96%;

frontend/static/app.js CHANGED Viewed

@@ -131,28 +131,16 @@
   var images = [];
   var COLLAPSED_STEP_HEIGHT = 220;
-  var workspaceInput = document.getElementById("workspaceInput");
-  var workspaceStrip = document.getElementById("workspaceStrip");
   var promptInput = document.getElementById("promptInput");
   var runBtn = document.getElementById("runBtn");
   var newBtn = document.getElementById("newBtn");
-  var pickWorkspaceBtn = document.getElementById("pickWorkspaceBtn");
   var attachBtn = document.getElementById("attachBtn");
   var imageInput = document.getElementById("imageInput");
   var imagePreview = document.getElementById("imagePreview");
   var dropZone = document.getElementById("dropZone");
   var timeline = document.getElementById("timeline");
   var statusPill = document.getElementById("statusPill");
-  var workspaceMeta = document.getElementById("workspaceMeta");
-  var workspaceModal = document.getElementById("workspaceModal");
-  var workspaceCloseBtn = document.getElementById("workspaceCloseBtn");
-  var workspacePathInput = document.getElementById("workspacePathInput");
-  var workspaceGoBtn = document.getElementById("workspaceGoBtn");
-  var workspaceRoots = document.getElementById("workspaceRoots");
-  var workspaceList = document.getElementById("workspaceList");
-  var workspaceUseBtn = document.getElementById("workspaceUseBtn");
-  var workspacePickerHint = document.getElementById("workspacePickerHint");
-  var currentWorkspacePath = "";
   var defaultPromptPlaceholder = promptInput.getAttribute("placeholder") || "Message ResearchHarness";
   function escapeHtml(value) {
@@ -223,23 +211,20 @@
     statusPill.className = "status " + (kind || "idle");
   }
-  function setWorkspaceSelected(path) {
-    workspaceInput.value = path;
-    workspaceMeta.textContent = "Workspace selected: " + path;
-  }
   function updateComposerMode() {
     if (pendingAskId) {
       runBtn.disabled = false;
       runBtn.classList.remove("is-running");
       runBtn.textContent = "Reply";
       promptInput.placeholder = defaultPromptPlaceholder;
       return;
     }
     runBtn.disabled = running && interrupting;
     runBtn.classList.toggle("is-running", running);
     runBtn.textContent = running ? (interrupting ? "Stopping" : "Stop") : "Run";
     promptInput.placeholder = defaultPromptPlaceholder;
   }
   function setRunning(active, statusText) {
@@ -254,7 +239,7 @@
     timeline.innerHTML = ''
       + '<div class="welcome">'
       + '<h1>What should the agent do?</h1>'
-      + '<p>Ask a question, attach images, choose a local workspace, and watch tool calls stream here.</p>'
       + '</div>';
   }
@@ -551,7 +536,7 @@
     ws.send(JSON.stringify({
       type: "start",
       prompt: prompt,
-      workspace_root: workspaceInput.value,
       images: sentImages,
       continue_conversation: continueConversation
     }));
@@ -611,89 +596,6 @@
     });
   }
-  function openWorkspaceModal() {
-    workspaceModal.classList.remove("hidden");
-    loadWorkspaceDirectory(workspaceInput.value.trim());
-  }
-  function closeWorkspaceModal() {
-    workspaceModal.classList.add("hidden");
-  }
-  function setWorkspacePickerBusy(text) {
-    workspaceList.innerHTML = '<div class="dir-empty">' + escapeHtml(text || "Loading...") + "</div>";
-    workspacePickerHint.textContent = text || "Loading...";
-  }
-  function renderWorkspaceError(message) {
-    workspaceList.innerHTML = '<div class="dir-empty error-text">' + escapeHtml(message) + "</div>";
-    workspacePickerHint.textContent = "Paste a valid existing folder path, then press Go.";
-  }
-  function directoryRow(label, path, actionLabel, onClick) {
-    var row = document.createElement("button");
-    row.type = "button";
-    row.className = "dir-row";
-    row.innerHTML = ''
-      + '<span class="dir-icon">&rsaquo;</span>'
-      + '<span class="dir-main"><strong>' + escapeHtml(label) + '</strong><small>' + escapeHtml(path) + '</small></span>'
-      + '<span class="dir-action">' + escapeHtml(actionLabel || "Open") + '</span>';
-    row.addEventListener("click", onClick);
-    return row;
-  }
-  function renderWorkspacePicker(payload) {
-    currentWorkspacePath = payload.path || "";
-    workspacePathInput.value = currentWorkspacePath;
-    workspaceRoots.innerHTML = "";
-    (payload.roots || []).forEach(function (root) {
-      var chip = document.createElement("button");
-      chip.type = "button";
-      chip.className = "root-chip";
-      chip.textContent = root.label || root.path;
-      chip.title = root.path || "";
-      chip.addEventListener("click", function () {
-        loadWorkspaceDirectory(root.path || "");
-      });
-      workspaceRoots.appendChild(chip);
-    });
-    workspaceList.innerHTML = "";
-    if (payload.parent) {
-      workspaceList.appendChild(directoryRow("..", payload.parent, "Parent", function () {
-        loadWorkspaceDirectory(payload.parent);
-      }));
-    }
-    (payload.entries || []).forEach(function (entry) {
-      workspaceList.appendChild(directoryRow(entry.name, entry.path, "Open", function () {
-        loadWorkspaceDirectory(entry.path);
-      }));
-    });
-    if (!payload.parent && !(payload.entries || []).length) {
-      workspaceList.innerHTML = '<div class="dir-empty">No readable child folders.</div>';
-    }
-    workspacePickerHint.textContent = payload.truncated
-      ? "Directory list was truncated. Paste a deeper path if needed."
-      : "Current folder will be used when you click Use this folder.";
-  }
-  async function loadWorkspaceDirectory(path) {
-    setWorkspacePickerBusy("Loading folders...");
-    try {
-      var url = "/api/workspace-directories";
-      if (path) url += "?path=" + encodeURIComponent(path);
-      var response = await fetch(url);
-      var payload = await response.json();
-      if (!response.ok || payload.error) {
-        renderWorkspaceError(payload.error || "Cannot open this folder.");
-        return;
-      }
-      renderWorkspacePicker(payload);
-    } catch (error) {
-      renderWorkspaceError(String(error));
-    }
-  }
   runBtn.addEventListener("click", sendStart);
   timeline.addEventListener("scroll", syncTimelineFollowMode);
   timeline.addEventListener("wheel", function (event) {
@@ -730,29 +632,6 @@
   });
   imageInput.addEventListener("change", function (event) { addImageFiles(event.target.files); });
-  pickWorkspaceBtn.addEventListener("click", function () {
-    openWorkspaceModal();
-  });
-  workspaceCloseBtn.addEventListener("click", closeWorkspaceModal);
-  workspaceModal.addEventListener("click", function (event) {
-    if (event.target === workspaceModal) closeWorkspaceModal();
-  });
-  workspaceGoBtn.addEventListener("click", function () {
-    loadWorkspaceDirectory(workspacePathInput.value.trim());
-  });
-  workspacePathInput.addEventListener("keydown", function (event) {
-    if (event.key === "Enter") {
-      event.preventDefault();
-      loadWorkspaceDirectory(workspacePathInput.value.trim());
-    }
-  });
-  workspaceUseBtn.addEventListener("click", function () {
-    if (!currentWorkspacePath) return;
-    setWorkspaceSelected(currentWorkspacePath);
-    closeWorkspaceModal();
-  });
   ["dragenter", "dragover"].forEach(function (name) {
     dropZone.addEventListener(name, function (event) {
       event.preventDefault();

   var images = [];
   var COLLAPSED_STEP_HEIGHT = 220;
   var promptInput = document.getElementById("promptInput");
   var runBtn = document.getElementById("runBtn");
   var newBtn = document.getElementById("newBtn");
+  var modelSelect = document.getElementById("modelSelect");
   var attachBtn = document.getElementById("attachBtn");
   var imageInput = document.getElementById("imageInput");
   var imagePreview = document.getElementById("imagePreview");
   var dropZone = document.getElementById("dropZone");
   var timeline = document.getElementById("timeline");
   var statusPill = document.getElementById("statusPill");
   var defaultPromptPlaceholder = promptInput.getAttribute("placeholder") || "Message ResearchHarness";
   function escapeHtml(value) {
     statusPill.className = "status " + (kind || "idle");
   }
   function updateComposerMode() {
     if (pendingAskId) {
       runBtn.disabled = false;
       runBtn.classList.remove("is-running");
       runBtn.textContent = "Reply";
       promptInput.placeholder = defaultPromptPlaceholder;
+      if (modelSelect) modelSelect.disabled = true;
       return;
     }
     runBtn.disabled = running && interrupting;
     runBtn.classList.toggle("is-running", running);
     runBtn.textContent = running ? (interrupting ? "Stopping" : "Stop") : "Run";
     promptInput.placeholder = defaultPromptPlaceholder;
+    if (modelSelect) modelSelect.disabled = running;
   }
   function setRunning(active, statusText) {
     timeline.innerHTML = ''
       + '<div class="welcome">'
       + '<h1>What should the agent do?</h1>'
+      + '<p>Ask a question, attach images, and watch tool calls stream from an isolated temporary workspace.</p>'
       + '</div>';
   }
     ws.send(JSON.stringify({
       type: "start",
       prompt: prompt,
+      model_name: modelSelect ? modelSelect.value : "",
       images: sentImages,
       continue_conversation: continueConversation
     }));
     });
   }
   runBtn.addEventListener("click", sendStart);
   timeline.addEventListener("scroll", syncTimelineFollowMode);
   timeline.addEventListener("wheel", function (event) {
   });
   imageInput.addEventListener("change", function (event) { addImageFiles(event.target.files); });
   ["dragenter", "dragover"].forEach(function (name) {
     dropZone.addEventListener(name, function (event) {
       event.preventDefault();

frontend/static/index.html CHANGED Viewed

@@ -19,13 +19,15 @@
           </div>
         </div>
         <div class="top-actions">
-          <button id="pickWorkspaceBtn" class="plain" type="button" hidden>Open workspace</button>
           <button id="newBtn" class="plain" type="button">New chat</button>
         </div>
       </header>
       <section id="workspaceStrip" class="workspace-strip">
-        <input id="workspaceInput" type="hidden" value="" />
         <span id="workspaceMeta">Managed temporary workspace. Each chat uses an isolated runtime directory.</span>
       </section>
@@ -48,27 +50,6 @@
       </footer>
     </main>
-    <section id="workspaceModal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="workspaceModalTitle">
-      <div class="modal-card">
-        <header class="modal-head">
-          <div>
-            <h2 id="workspaceModalTitle">Open workspace</h2>
-            <p>Choose an existing local folder. Unicode paths are supported.</p>
-          </div>
-          <button id="workspaceCloseBtn" class="plain" type="button" aria-label="Close workspace picker">Close</button>
-        </header>
-        <div class="modal-path-row">
-          <input id="workspacePathInput" type="text" autocomplete="off" placeholder="Paste a folder path..." />
-          <button id="workspaceGoBtn" class="plain" type="button">Go</button>
-        </div>
-        <div id="workspaceRoots" class="workspace-roots"></div>
-        <div id="workspaceList" class="workspace-list"></div>
-        <footer class="modal-actions">
-          <span id="workspacePickerHint">Select a folder to use as the agent workspace.</span>
-          <button id="workspaceUseBtn" class="send-button" type="button">Use this folder</button>
-        </footer>
-      </div>
-    </section>
     <nav class="space-links" aria-label="Project links">
       <a href="https://github.com/black-yt/ResearchHarness" target="_blank" rel="noopener noreferrer" title="GitHub">
         <svg viewBox="0 0 16 16" aria-hidden="true"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/></svg>

           </div>
         </div>
         <div class="top-actions">
+          <select id="modelSelect" class="model-select" aria-label="Model">
+            <option value="gpt-5.5">gpt-5.5</option>
+            <option value="claude-opus-4-7">claude-opus-4-7</option>
+          </select>
           <button id="newBtn" class="plain" type="button">New chat</button>
         </div>
       </header>
       <section id="workspaceStrip" class="workspace-strip">
         <span id="workspaceMeta">Managed temporary workspace. Each chat uses an isolated runtime directory.</span>
       </section>
       </footer>
     </main>
     <nav class="space-links" aria-label="Project links">
       <a href="https://github.com/black-yt/ResearchHarness" target="_blank" rel="noopener noreferrer" title="GitHub">
         <svg viewBox="0 0 16 16" aria-hidden="true"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/></svg>

run_agent.py DELETED Viewed

@@ -1,7 +0,0 @@
-"""Thin top-level CLI entrypoint for the ResearchHarness agent."""
-from agent_base.react_agent import main
-if __name__ == "__main__":
-    raise SystemExit(main())

run_frontend.py DELETED Viewed

@@ -1,48 +0,0 @@
-"""Launch the local ResearchHarness browser UI."""
-from __future__ import annotations
-import argparse
-import sys
-import threading
-import webbrowser
-import uvicorn
-from agent_base.utils import read_role_prompt_files
-from frontend.local_server import app, configure_frontend
-def main(argv: list[str] | None = None) -> int:
-    parser = argparse.ArgumentParser(description="Run the local ResearchHarness frontend.")
-    parser.add_argument("--host", default="127.0.0.1", help="Host to bind. Default: 127.0.0.1")
-    parser.add_argument("--port", type=int, default=8765, help="Port to bind. Default: 8765")
-    parser.add_argument("--no-browser", action="store_true", help="Do not open the browser automatically.")
-    parser.add_argument("--trace-dir", help="Optional directory where frontend agent traces are written.")
-    parser.add_argument(
-        "--role-prompt-file",
-        action="append",
-        default=[],
-        dest="role_prompt_files",
-        metavar="PATH",
-        help="Append one role-specific prompt file to the frontend agent. May be passed multiple times.",
-    )
-    args = parser.parse_args(argv)
-    try:
-        role_prompt = read_role_prompt_files(args.role_prompt_files)
-        configure_frontend(role_prompt=role_prompt, trace_dir=args.trace_dir)
-    except (OSError, ValueError) as exc:
-        print(str(exc), file=sys.stderr)
-        return 1
-    url = f"http://{args.host}:{args.port}"
-    if not args.no_browser:
-        threading.Timer(0.8, lambda: webbrowser.open(url)).start()
-    print(f"ResearchHarness frontend: {url}")
-    uvicorn.run(app, host=args.host, port=args.port, reload=False)
-    return 0
-if __name__ == "__main__":
-    raise SystemExit(main())

run_server.py DELETED Viewed

@@ -1,61 +0,0 @@
-"""Run ResearchHarness as a minimal OpenAI-compatible API server."""
-from __future__ import annotations
-import argparse
-import sys
-from agent_base.utils import PROJECT_ROOT, MissingRequiredEnvError, load_dotenv, require_required_env
-from api.openai_server import serve
-def main(argv: list[str] | None = None) -> int:
-    parser = argparse.ArgumentParser(description="Serve ResearchHarness through /v1/chat/completions.")
-    parser.add_argument(
-        "--api-runs-dir",
-        required=True,
-        dest="api_runs_dir",
-        help="Directory where the server creates one isolated subdirectory per request.",
-    )
-    parser.add_argument("--host", default="127.0.0.1", help="Host to bind. Defaults to 127.0.0.1.")
-    parser.add_argument("--port", type=int, default=8686, help="Port to bind. Defaults to 8686.")
-    parser.add_argument(
-        "--role-prompt-file",
-        action="append",
-        default=[],
-        dest="role_prompt_files",
-        help="Optional role prompt file appended to the base ResearchHarness prompt.",
-    )
-    parser.add_argument(
-        "--input-wrapper",
-        action=argparse.BooleanOptionalAction,
-        default=True,
-        help="Enable or disable the input LLM wrapper. Enabled by default.",
-    )
-    parser.add_argument(
-        "--output-wrapper",
-        action=argparse.BooleanOptionalAction,
-        default=True,
-        help="Enable or disable the output LLM wrapper. Enabled by default.",
-    )
-    args = parser.parse_args(argv)
-    load_dotenv(PROJECT_ROOT / ".env")
-    try:
-        require_required_env("ResearchHarness API server")
-        serve(
-            api_runs_dir=args.api_runs_dir,
-            host=args.host,
-            port=args.port,
-            role_prompt_files=list(args.role_prompt_files),
-            input_wrapper=args.input_wrapper,
-            output_wrapper=args.output_wrapper,
-        )
-    except (MissingRequiredEnvError, ValueError) as exc:
-        print(str(exc), file=sys.stderr)
-        return 1
-    return 0
-if __name__ == "__main__":
-    raise SystemExit(main())

traces/.gitkeep DELETED Viewed

	@@ -1 +0,0 @@
1	-

workspace/.gitkeep DELETED Viewed

	@@ -1 +0,0 @@
1	-