Create debug
root@dlc1phd8ui9n87ro-master-0:~# ps -fp 11782,11783,11784,11785,19150,19151,19152,19153 -o pid,etimes,cmd
PID ELAPSED CMD
19153 1516 ray::SGLangEngine
19152 1516 ray::SGLangEngine
19151 1516 ray::SGLangEngine
19150 1516 ray::SGLangEngine
11785 1976 ray::SGLangEngine
11784 1976 ray::SGLangEngine
11783 1976 ray::SGLangEngine
11782 1976 ray::SGLangEngine
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:10] Prefill batch, #new-seq: 1, #new-token: 42, #cached-token: 8880, token usage: 0.26, #running-req: 8, #queue-req: 0, input throughput (token/s): 559.93, cuda graph: False [repeated 193x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:29:13] acot_rollout.py:94 - [acot_rollout] active=120 inflight=88 reqs=31177 ctrl=15401 reas=15776 done=136
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:29:14] INFO: 10.29.240.3:37174 - "POST /generate HTTP/1.1" 200 OK [repeated 252x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:29:14] Decode batch, #running-req: 13, #token: 89655, token usage: 0.09, cuda graph: True, gen throughput (token/s): 402.36, #queue-req: 0 [repeated 19x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:15] Prefill batch, #new-seq: 2, #new-token: 148, #cached-token: 21580, token usage: 0.28, #running-req: 14, #queue-req: 0, input throughput (token/s): 272.81, cuda graph: False [repeated 191x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:19] INFO: 10.29.240.3:42702 - "POST /generate HTTP/1.1" 200 OK [repeated 248x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:19] Decode batch, #running-req: 5, #token: 44735, token usage: 0.11, cuda graph: True, gen throughput (token/s): 87.03, #queue-req: 0 [repeated 19x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:20] Prefill batch, #new-seq: 1, #new-token: 161, #cached-token: 16378, token usage: 0.21, #running-req: 8, #queue-req: 0, input throughput (token/s): 541.56, cuda graph: False [repeated 192x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:29:23] acot_rollout.py:94 - [acot_rollout] active=120 inflight=87 reqs=31654 ctrl=15637 reas=16017 done=136
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:29:24] INFO: 10.29.240.3:47496 - "POST /generate HTTP/1.1" 200 OK [repeated 239x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11784) [2026-05-07 08:29:25] Decode batch, #running-req: 9, #token: 86905, token usage: 0.20, cuda graph: True, gen throughput (token/s): 36.39, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:25] Prefill batch, #new-seq: 3, #new-token: 475, #cached-token: 31058, token usage: 0.26, #running-req: 3, #queue-req: 0, input throughput (token/s): 600.19, cuda graph: False [repeated 175x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:30] INFO: 10.29.240.3:39510 - "POST /generate HTTP/1.1" 200 OK [repeated 212x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:30] Decode batch, #running-req: 6, #token: 59467, token usage: 0.14, cuda graph: True, gen throughput (token/s): 67.83, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:29:30] Prefill batch, #new-seq: 1, #new-token: 1, #cached-token: 6398, token usage: 0.13, #running-req: 20, #queue-req: 0, input throughput (token/s): 92.51, cuda graph: False [repeated 183x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:29:33] acot_rollout.py:94 - [acot_rollout] active=120 inflight=80 reqs=32074 ctrl=15840 reas=16234 done=136
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:35] INFO: 10.29.240.3:39544 - "POST /generate HTTP/1.1" 200 OK [repeated 222x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:29:35] Decode batch, #running-req: 17, #token: 104993, token usage: 0.11, cuda graph: True, gen throughput (token/s): 370.41, #queue-req: 0 [repeated 18x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:35] Prefill batch, #new-seq: 1, #new-token: 45, #cached-token: 18283, token usage: 0.32, #running-req: 10, #queue-req: 0, input throughput (token/s): 2443.95, cuda graph: False [repeated 178x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:29:40] INFO: 10.29.240.3:40058 - "POST /generate HTTP/1.1" 200 OK [repeated 200x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:29:40] Decode batch, #running-req: 26, #token: 159431, token usage: 0.16, cuda graph: True, gen throughput (token/s): 449.31, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:29:40] Prefill batch, #new-seq: 1, #new-token: 2, #cached-token: 7027, token usage: 0.11, #running-req: 18, #queue-req: 0, input throughput (token/s): 21.36, cuda graph: False [repeated 170x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:29:43] acot_rollout.py:94 - [acot_rollout] active=120 inflight=77 reqs=32478 ctrl=16041 reas=16437 done=136
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:45] INFO: 10.29.240.3:60716 - "POST /generate HTTP/1.1" 200 OK [repeated 205x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:29:45] Decode batch, #running-req: 17, #token: 103432, token usage: 0.11, cuda graph: True, gen throughput (token/s): 395.47, #queue-req: 0 [repeated 18x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:29:45] Prefill batch, #new-seq: 1, #new-token: 37, #cached-token: 16734, token usage: 0.10, #running-req: 4, #queue-req: 0, input throughput (token/s): 3871.08, cuda graph: False [repeated 165x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:29:50] INFO: 10.29.240.3:44488 - "POST /generate HTTP/1.1" 200 OK [repeated 203x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:50] Decode batch, #running-req: 1, #token: 0, token usage: 0.00, cuda graph: True, gen throughput (token/s): 66.46, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:50] Prefill batch, #new-seq: 1, #new-token: 81, #cached-token: 10670, token usage: 0.18, #running-req: 4, #queue-req: 0, input throughput (token/s): 886.59, cuda graph: False [repeated 175x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:29:53] acot_rollout.py:94 - [acot_rollout] active=120 inflight=73 reqs=32883 ctrl=16240 reas=16643 done=136
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:29:55] INFO: 10.29.240.3:47366 - "POST /generate HTTP/1.1" 200 OK [repeated 198x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:29:56] Decode batch, #running-req: 10, #token: 66144, token usage: 0.07, cuda graph: True, gen throughput (token/s): 350.35, #queue-req: 0 [repeated 21x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:29:55] Prefill batch, #new-seq: 1, #new-token: 82, #cached-token: 10637, token usage: 0.09, #running-req: 2, #queue-req: 0, input throughput (token/s): 3268.92, cuda graph: False [repeated 149x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:00] INFO: 10.29.240.3:60972 - "POST /generate HTTP/1.1" 200 OK [repeated 52x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:30:01] Decode batch, #running-req: 2, #token: 15961, token usage: 0.02, cuda graph: True, gen throughput (token/s): 60.58, #queue-req: 0 [repeated 13x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091)
[dlc1phd8ui9n87ro-master-0] Rollout generation: 56%|ββββββ | 144/256 [12:16<07:46, 4.17s/it]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:30:03] Prefill batch, #new-seq: 1, #new-token: 36, #cached-token: 12234, token usage: 0.03, #running-req: 0, #queue-req: 0, input throughput (token/s): 3.65, cuda graph: False
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:30:05] INFO: 10.29.240.3:44488 - "POST /generate HTTP/1.1" 200 OK [repeated 10x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:06] Decode batch, #running-req: 4, #token: 23451, token usage: 0.02, cuda graph: True, gen throughput (token/s): 145.41, #queue-req: 0 [repeated 12x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091)
[dlc1phd8ui9n87ro-master-0] Rollout generation: 59%|ββββββ | 152/256 [12:20<05:19, 3.07s/it][2026-05-07 08:30:07] acot_rollout.py:94 - [acot_rollout] active=104 inflight=62 reqs=32990 ctrl=16301 reas=16689 done=152
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:30:07] Prefill batch, #new-seq: 1, #new-token: 30, #cached-token: 10166, token usage: 0.02, #running-req: 2, #queue-req: 0, input throughput (token/s): 38.35, cuda graph: False [repeated 10x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091)
[dlc1phd8ui9n87ro-master-0] Rollout generation: 62%|βββββββ | 160/256 [12:24<03:39, 2.29s/it]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:30:11] INFO: 10.29.240.3:33668 - "POST /generate HTTP/1.1" 200 OK [repeated 11x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:30:11] Decode batch, #running-req: 2, #token: 14667, token usage: 0.02, cuda graph: True, gen throughput (token/s): 22.35, #queue-req: 0 [repeated 10x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:30:11] Prefill batch, #new-seq: 1, #new-token: 3, #cached-token: 6672, token usage: 0.02, #running-req: 1, #queue-req: 0, input throughput (token/s): 53.46, cuda graph: False [repeated 5x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091)
[dlc1phd8ui9n87ro-master-0] Rollout generation: 66%|βββββββ | 168/256 [12:28<02:35, 1.77s/it]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:16] INFO: 10.29.240.3:49426 - "POST /generate HTTP/1.1" 200 OK [repeated 30x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19151) [2026-05-07 08:30:16] Decode batch, #running-req: 6, #token: 56377, token usage: 0.06, cuda graph: True, gen throughput (token/s): 11.02, #queue-req: 0 [repeated 10x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:30:17] acot_rollout.py:94 - [acot_rollout] active=88 inflight=54 reqs=33063 ctrl=16320 reas=16743 done=168
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:19] Prefill batch, #new-seq: 2, #new-token: 84, #cached-token: 27998, token usage: 0.13, #running-req: 3, #queue-req: 0, input throughput (token/s): 3540.53, cuda graph: False [repeated 133x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:21] INFO: 10.29.240.3:37460 - "POST /generate HTTP/1.1" 200 OK [repeated 188x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:21] Decode batch, #running-req: 12, #token: 77736, token usage: 0.08, cuda graph: True, gen throughput (token/s): 276.18, #queue-req: 0 [repeated 21x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:24] Prefill batch, #new-seq: 1, #new-token: 55, #cached-token: 19289, token usage: 0.23, #running-req: 7, #queue-req: 0, input throughput (token/s): 317.62, cuda graph: False [repeated 154x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:26] INFO: 10.29.240.3:37448 - "POST /generate HTTP/1.1" 200 OK [repeated 185x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:26] Decode batch, #running-req: 1, #token: 17119, token usage: 0.04, cuda graph: True, gen throughput (token/s): 63.53, #queue-req: 0 [repeated 18x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:30:27] acot_rollout.py:94 - [acot_rollout] active=88 inflight=52 reqs=33421 ctrl=16495 reas=16926 done=168
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:30] Prefill batch, #new-seq: 1, #new-token: 59, #cached-token: 12833, token usage: 0.09, #running-req: 3, #queue-req: 0, input throughput (token/s): 909.46, cuda graph: False [repeated 162x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:31] INFO: 10.29.240.3:37472 - "POST /generate HTTP/1.1" 200 OK [repeated 173x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:31] Decode batch, #running-req: 2, #token: 26910, token usage: 0.06, cuda graph: True, gen throughput (token/s): 52.52, #queue-req: 0 [repeated 21x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:30:35] Prefill batch, #new-seq: 1, #new-token: 49, #cached-token: 10514, token usage: 0.15, #running-req: 6, #queue-req: 0, input throughput (token/s): 14935.95, cuda graph: False [repeated 143x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:36] INFO: 10.29.240.3:33056 - "POST /generate HTTP/1.1" 200 OK [repeated 154x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:37] Decode batch, #running-req: 14, #token: 91534, token usage: 0.09, cuda graph: True, gen throughput (token/s): 311.02, #queue-req: 0 [repeated 23x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:30:37] acot_rollout.py:94 - [acot_rollout] active=88 inflight=48 reqs=33741 ctrl=16654 reas=17087 done=168
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11784) [2026-05-07 08:30:40] Prefill batch, #new-seq: 1, #new-token: 27, #cached-token: 11347, token usage: 0.08, #running-req: 2, #queue-req: 0, input throughput (token/s): 5042.37, cuda graph: False [repeated 131x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:42] INFO: 10.29.240.3:59366 - "POST /generate HTTP/1.1" 200 OK [repeated 160x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:41] Decode batch, #running-req: 5, #token: 47460, token usage: 0.11, cuda graph: True, gen throughput (token/s): 56.23, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:45] Prefill batch, #new-seq: 1, #new-token: 68, #cached-token: 10716, token usage: 0.05, #running-req: 1, #queue-req: 0, input throughput (token/s): 218.29, cuda graph: False [repeated 132x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:47] INFO: 10.29.240.3:37456 - "POST /generate HTTP/1.1" 200 OK [repeated 148x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:30:47] Decode batch, #running-req: 9, #token: 63678, token usage: 0.07, cuda graph: True, gen throughput (token/s): 249.90, #queue-req: 0 [repeated 21x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:30:47] acot_rollout.py:94 - [acot_rollout] active=88 inflight=41 reqs=34036 ctrl=16795 reas=17241 done=168
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:50] Prefill batch, #new-seq: 1, #new-token: 47, #cached-token: 17131, token usage: 0.04, #running-req: 0, #queue-req: 0, input throughput (token/s): 152.36, cuda graph: False [repeated 122x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:52] INFO: 10.29.240.3:49426 - "POST /generate HTTP/1.1" 200 OK [repeated 106x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19151) [2026-05-07 08:30:52] Decode batch, #running-req: 3, #token: 29771, token usage: 0.03, cuda graph: True, gen throughput (token/s): 89.19, #queue-req: 0 [repeated 19x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:55] Prefill batch, #new-seq: 1, #new-token: 1, #cached-token: 7015, token usage: 0.10, #running-req: 13, #queue-req: 0, input throughput (token/s): 44.94, cuda graph: False [repeated 79x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:57] INFO: 10.29.240.3:42232 - "POST /generate HTTP/1.1" 200 OK [repeated 96x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:30:57] Decode batch, #running-req: 9, #token: 69421, token usage: 0.07, cuda graph: True, gen throughput (token/s): 214.56, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:30:57] acot_rollout.py:94 - [acot_rollout] active=88 inflight=36 reqs=34226 ctrl=16882 reas=17344 done=168
ps -fp 11782,11783,11784,11785,19150,19151,19152,19153 -o pid,etimes,cmd
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18721) [2026-04-28 09:22:55] Decode batch, #running-req: 1, #token: 0, token usage: 0.00, cuda graph: True, gen throughput (token/s): 33.48, #queue-req: 0 [repeated 15x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11793) [2026-04-28 09:22:55] Prefill batch, #new-seq: 1, #new-token: 38, #cached-token: 12117, token usage: 0.17, #running-req: 5, #queue-req: 0, input throughput (token/s): 173.15, cuda graph: False [repeated 116x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11792) [2026-04-28 09:22:56] INFO: 10.29.240.42:56064 - "POST /generate HTTP/1.1" 200 OK [repeated 155x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11794) [2026-04-28 09:23:00] Decode batch, #running-req: 3, #token: 44593, token usage: 0.10, cuda graph: True, gen throughput (token/s): 36.66, #queue-req: 0 [repeated 21x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11795) [2026-04-28 09:23:00] Prefill batch, #new-seq: 1, #new-token: 56, #cached-token: 12282, token usage: 0.09, #running-req: 2, #queue-req: 0, input throughput (token/s): 4402.72, cuda graph: False [repeated 144x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:01] INFO: 10.29.240.42:47380 - "POST /generate HTTP/1.1" 200 OK [repeated 147x across cluster]
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105) [2026-04-28 09:23:04] acot_rollout.py:93 - [acot_rollout] active=18 inflight=31 reqs=168990 ctrl=83014 reas=85976 done=366
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11794) [2026-04-28 09:23:05] Decode batch, #running-req: 1, #token: 14079, token usage: 0.03, cuda graph: True, gen throughput (token/s): 23.74, #queue-req: 0 [repeated 19x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11795) [2026-04-28 09:23:05] Prefill batch, #new-seq: 1, #new-token: 34, #cached-token: 14495, token usage: 0.07, #running-req: 1, #queue-req: 0, input throughput (token/s): 61.16, cuda graph: False [repeated 136x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:06] INFO: 10.29.240.42:47384 - "POST /generate HTTP/1.1" 200 OK [repeated 149x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:10] Decode batch, #running-req: 12, #token: 87327, token usage: 0.09, cuda graph: True, gen throughput (token/s): 248.22, #queue-req: 0 [repeated 21x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:11] Prefill batch, #new-seq: 1, #new-token: 6, #cached-token: 7847, token usage: 0.11, #running-req: 12, #queue-req: 0, input throughput (token/s): 20.89, cuda graph: False [repeated 119x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:11] INFO: 10.29.240.42:47720 - "POST /generate HTTP/1.1" 200 OK [repeated 142x across cluster]
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105) [2026-04-28 09:23:14] acot_rollout.py:93 - [acot_rollout] active=18 inflight=30 reqs=169256 ctrl=83144 reas=86112 done=366
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:16] Decode batch, #running-req: 7, #token: 39238, token usage: 0.04, cuda graph: True, gen throughput (token/s): 330.77, #queue-req: 0 [repeated 20x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:15] Prefill batch, #new-seq: 1, #new-token: 1, #cached-token: 7112, token usage: 0.05, #running-req: 6, #queue-req: 0, input throughput (token/s): 33.14, cuda graph: False [repeated 101x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:16] INFO: 10.29.240.42:36666 - "POST /generate HTTP/1.1" 200 OK [repeated 106x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:21] INFO: 10.29.240.42:42394 - "POST /generate HTTP/1.1" 200 OK [repeated 8x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:21] Decode batch, #running-req: 1, #token: 8586, token usage: 0.01, cuda graph: True, gen throughput (token/s): 53.95, #queue-req: 0 [repeated 10x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11793) [2026-04-28 09:23:21] Prefill batch, #new-seq: 1, #new-token: 52, #cached-token: 12823, token usage: 0.03, #running-req: 0, #queue-req: 0, input throughput (token/s): 7.71, cuda graph: False
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105) [2026-04-28 09:23:25] acot_rollout.py:93 - [acot_rollout] active=15 inflight=25 reqs=169302 ctrl=83171 reas=86131 done=369
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:27] Decode batch, #running-req: 1, #token: 7711, token usage: 0.01, cuda graph: True, gen throughput (token/s): 33.73, #queue-req: 0 [repeated 6x across cluster]
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105)
[dlczq3cay47mtefu-master-0] Rollout generation: 75%|ββββββββ | 96/128 [23:05<06:36, 12.40s/it]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:29] Prefill batch, #new-seq: 1, #new-token: 1, #cached-token: 8054, token usage: 0.01, #running-req: 0, #queue-req: 0, input throughput (token/s): 0.08, cuda graph: False
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18720) [2026-04-28 09:23:29] INFO: 10.29.240.42:45660 - "POST /generate HTTP/1.1" 200 OK [repeated 4x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11795) [2026-04-28 09:23:32] Decode batch, #running-req: 2, #token: 26469, token usage: 0.06, cuda graph: True, gen throughput (token/s): 64.14, #queue-req: 0 [repeated 13x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:34] Prefill batch, #new-seq: 1, #new-token: 4, #cached-token: 6863, token usage: 0.08, #running-req: 10, #queue-req: 0, input throughput (token/s): 19.21, cuda graph: False [repeated 102x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11792) [2026-04-28 09:23:34] INFO: 10.29.240.42:49204 - "POST /generate HTTP/1.1" 200 OK [repeated 109x across cluster]
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105) [2026-04-28 09:23:35] acot_rollout.py:93 - [acot_rollout] active=15 inflight=22 reqs=169441 ctrl=83231 reas=86210 done=369
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:37] Decode batch, #running-req: 3, #token: 23573, token usage: 0.02, cuda graph: True, gen throughput (token/s): 87.20, #queue-req: 0 [repeated 21x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11792) [2026-04-28 09:23:39] Prefill batch, #new-seq: 1, #new-token: 26, #cached-token: 16064, token usage: 0.04, #running-req: 0, #queue-req: 0, input throughput (token/s): 47.34, cuda graph: False [repeated 104x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11793) [2026-04-28 09:23:39] INFO: 10.29.240.42:46792 - "POST /generate HTTP/1.1" 200 OK [repeated 112x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:42] Decode batch, #running-req: 10, #token: 64645, token usage: 0.07, cuda graph: True, gen throughput (token/s): 185.35, #queue-req: 0 [repeated 20x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11792) [2026-04-28 09:23:44] Prefill batch, #new-seq: 1, #new-token: 51, #cached-token: 13549, token usage: 0.03, #running-req: 0, #queue-req: 0, input throughput (token/s): 64.94, cuda graph: False [repeated 98x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11792) [2026-04-28 09:23:44] INFO: 10.29.240.42:49220 - "POST /generate HTTP/1.1" 200 OK [repeated 108x across cluster]
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105) [2026-04-28 09:23:45] acot_rollout.py:93 - [acot_rollout] active=15 inflight=21 reqs=169658 ctrl=83338 reas=86320 done=369