Reinforcement Learning
Transformers
English
post-training
distillation
agentic-coding
composer-2.5
cursor
kimi-k2
grpo
dapo
diloco
openenv
trl
verl
research
methodology
Instructions to use Codeseys/composer-replication-framework with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Codeseys/composer-replication-framework with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Codeseys/composer-replication-framework", dtype="auto") - Notebooks
- Google Colab
- Kaggle
Baladithya Balamurugan
Wave 21: Stage-0 dataset pipeline — swesmith engine, rollout harness, gates, contract
9a2ce20 | [build-system] | |
| requires = ["hatchling>=1.21"] | |
| build-backend = "hatchling.build" | |
| [project] | |
| name = "composer-replication" | |
| version = "0.1.0" | |
| description = "Open replication framework for Cursor Composer 2.5: GRPO + SDPO + multi-teacher trace-replay DPO with optional DiLoCo outer loop." | |
| readme = "README.md" | |
| license = { file = "LICENSE" } | |
| authors = [ | |
| { name = "Codeseys", email = "bbaladithyab@gmail.com" } | |
| ] | |
| keywords = [ | |
| "rl-training", | |
| "rlvr", | |
| "grpo", | |
| "sdpo", | |
| "simpo", | |
| "taid", | |
| "dpo", | |
| "diloco", | |
| "decoupled-diloco", | |
| "agentic", | |
| "coding-agents", | |
| "composer-2-5", | |
| "cursor", | |
| "trl", | |
| "verl", | |
| "prime-rl", | |
| "openenv", | |
| "torchft", | |
| "modal", | |
| "huggingface-jobs", | |
| ] | |
| classifiers = [ | |
| "Development Status :: 3 - Alpha", | |
| "Intended Audience :: Science/Research", | |
| "License :: OSI Approved :: MIT License", | |
| "Programming Language :: Python :: 3.10", | |
| "Programming Language :: Python :: 3.11", | |
| "Programming Language :: Python :: 3.12", | |
| "Topic :: Scientific/Engineering :: Artificial Intelligence", | |
| ] | |
| requires-python = ">=3.10" | |
| dependencies = [ | |
| "torch>=2.0", | |
| "transformers>=4.46", | |
| ] | |
| [project.optional-dependencies] | |
| # Real teacher-replay over OpenRouter | |
| replay = [ | |
| "httpx>=0.27", | |
| ] | |
| # DiLoCo outer-loop optimizer (single-process) | |
| diloco = [ | |
| "torchft-nightly", | |
| ] | |
| # Decoupled DiLoCo over serverless executors (per ADR-005) | |
| # fsspec gives the object-store rendezvous one code path (s3://, gs://, hf://, | |
| # file://); s3fs is the concrete S3 backend (the AWS default per the EKS design); | |
| # boto3 + kubernetes are needed by the AWS leaf adapters (SageMakerExecutor uses | |
| # boto3.create_training_job; EKSExecutor uses the kubernetes BatchV1 client). | |
| serverless = [ | |
| "fsspec>=2024.6", | |
| "huggingface_hub>=0.27", # for hf:// fsspec backend + HF Jobs | |
| "s3fs>=2024.6", # concrete S3 backend for ObjectStoreAllReduce (AWS default) | |
| "boto3>=1.34", # SageMakerExecutor (create_training_job) + S3 IAM | |
| "kubernetes>=29.0", # EKSExecutor (indexed k8s Jobs via BatchV1Api) | |
| ] | |
| # Amazon EKS / Kubernetes Indexed-Job executor (EKSExecutor, per ADR-005). | |
| # kubernetes is lazy-imported at adapter-init/method time (not at package import). | |
| eks = [ | |
| "kubernetes>=29", | |
| ] | |
| # Amazon SageMaker training-job executor (SageMakerExecutor, per ADR-005). | |
| # boto3: the executor uses raw create_training_job. sagemaker (<3): the v2 | |
| # Estimator API the GSM8K smoke launcher (examples/gsm8k_grpo/ | |
| # run_sagemaker_launch.py) uses — pinned <3 because SDK v3 is an API rewrite | |
| # that dropped sagemaker.estimator.Estimator (F3 §1, verified live 2026-06-09). | |
| aws = [ | |
| "boto3>=1.34", | |
| "sagemaker>=2.200,<3", | |
| ] | |
| # SWE-smith task-synthesis engine (deepread finding V4 buy-vs-build verdict): | |
| # the swesmith toolkit builds env images from arbitrary GitHub repos and | |
| # synthesizes bugs (PR Mirror = this repo's gold-patch-reversion mechanic). | |
| # LIVE synthesis needs Docker on Linux (the toolkit does not support macOS/ | |
| # Windows officially); the SwesmithAdapter itself needs nothing beyond core. | |
| swesmith = [ | |
| "swesmith>=0.1", | |
| ] | |
| # Replaysim dataset normalization (per ADR-004) | |
| # | |
| # NOTE: data-juicer is intentionally NOT pinned as an extra. The package | |
| # named "data-juicer" does not exist on PyPI (the closest match, | |
| # "py-data-juicer==1.0.0", has broken transitive deps; later py-data-juicer | |
| # releases work but install ~150 transitive packages). Users who want the | |
| # DJNormalizer adapter should install data-juicer from source themselves — | |
| # see docs/TROUBLESHOOTING.md ("monarch / data-juicer install"). The | |
| # replaysim Python module imports data_juicer lazily, so the framework | |
| # package imports cleanly without it; only DJNormalizer use-time fails. | |
| replaysim = [ | |
| "composer-replication[replay]", # replaysim builds on the replay channel | |
| ] | |
| # Production training (TRL GRPOTrainer subclass — Recipe A) | |
| train = [ | |
| "trl>=0.12", | |
| "peft>=0.13", | |
| "accelerate>=1.0", | |
| "datasets>=3.0", | |
| ] | |
| # Feature-Deletion synthetic-data generation (ADR-010) | |
| # Inverts OSS SWE substrates into reimplement-to-pass tasks. `datasets` loads | |
| # the substrate instances; `docker` runs tests in the substrate's frozen image. | |
| # Pure-Python core (schema/env/monitor/curriculum/validator/substrate-adapter) | |
| # needs only `datasets`; `docker` is for the real LocalSubprocessSandbox / | |
| # substrate-inversion path. | |
| datagen = [ | |
| "datasets>=3.0", | |
| "docker>=7.0", | |
| ] | |
| # PRIME-RL recipe (Recipe C — per ADR-006) | |
| # NOTE: a `prime-rl` extra used to be advertised here pinning | |
| # `prime-rl>=0.5`. That pin is unsatisfiable: the `prime-rl` PyPI name is | |
| # not registered. Prime Intellect publishes prime-rl from source only | |
| # (https://github.com/PrimeIntellect-ai/prime-rl). The framework's | |
| # composer_replication.recipes.prime_rl adapter handles its absence | |
| # gracefully (the upstream parity test is skip-marked when prime-rl is | |
| # not importable) and the in-file shadow-parity test still verifies the | |
| # loss formula independently. The extra is dropped — see | |
| # docs/TROUBLESHOOTING.md ("prime-rl install") for installation guidance. | |
| # NOTE: a `monarch` extra used to be advertised here pinning | |
| # `monarch>=0.4.1`. That pin is unsatisfiable: PyPI's `monarch` package | |
| # is unrelated to Meta's actor framework and tops out at 0.1.11. The real | |
| # Meta Monarch is published as `torchmonarch-nightly` and ships only as | |
| # nightly wheels with platform constraints. Per ADR-006, full Monarch | |
| # integration is a v0.2+ bet and the `composer_replication.recipes.monarch` | |
| # module is a documentation skeleton (importing it does NOT require | |
| # monarch installed). The extra is dropped — see docs/TROUBLESHOOTING.md | |
| # ("monarch / data-juicer install") for installation guidance. | |
| # Development — the BASE dev set installs on every platform (macOS arm64 incl.). | |
| # NOTE: `diloco` (torchft-nightly) is deliberately NOT in base `dev`: torchft-nightly | |
| # ships Linux-x86_64 wheels only, so including it made `pip install -e '.[dev]'` fail | |
| # outright on Apple Silicon / any non-Linux-x86_64 host. The torchft-dependent tests | |
| # skipif-gate cleanly when it is absent, so the base dev set runs the full suite minus | |
| # the torchft integration tests on any platform. | |
| dev = [ | |
| "pytest>=8.0", | |
| "ruff>=0.6", | |
| "composer-replication[replay,train]", | |
| ] | |
| # Full development incl. the DiLoCo outer-loop dep (Linux-x86_64 only — torchft-nightly). | |
| # Use on a Linux GPU/CI host to also exercise the torchft integration tests. | |
| dev-full = [ | |
| "composer-replication[dev,diloco,serverless,datagen]", | |
| ] | |
| [project.urls] | |
| Homepage = "https://huggingface.co/Codeseys/composer-replication-framework" | |
| Documentation = "https://huggingface.co/Codeseys/composer-replication-framework/blob/main/docs/INTEGRATION_ARCHITECTURE.md" | |
| Repository = "https://huggingface.co/Codeseys/composer-replication-framework" | |
| Issues = "https://huggingface.co/Codeseys/composer-replication-framework/discussions" | |
| [tool.hatch.build.targets.wheel] | |
| packages = ["composer_replication"] | |
| [tool.hatch.build.targets.sdist] | |
| include = [ | |
| "/composer_replication", | |
| "/README.md", | |
| "/LICENSE", | |
| "/CITATION.cff", | |
| "/CITATION.bib", | |
| ] | |
| [tool.ruff] | |
| line-length = 100 | |
| target-version = "py310" | |
| [tool.ruff.lint] | |
| select = ["E", "F", "W", "I", "N", "UP", "B"] | |
| ignore = ["E501", "E741"] | |