Create debug

by Jennny - opened 17 days ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+39

-0

Jennny

17 days ago

•

edited 1 day ago

root@dlc1phd8ui9n87ro-master-0:~# ps -fp 11782,11783,11784,11785,19150,19151,19152,19153 -o pid,etimes,cmd
PID ELAPSED CMD
19153 1516 ray::SGLangEngine
19152 1516 ray::SGLangEngine
19151 1516 ray::SGLangEngine
19150 1516 ray::SGLangEngine
11785 1976 ray::SGLangEngine
11784 1976 ray::SGLangEngine
11783 1976 ray::SGLangEngine
11782 1976 ray::SGLangEngine

[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:10] Prefill batch, #new-seq: 1, #new-token: 42, #cached-token: 8880, token usage: 0.26, #running-req: 8, #queue-req: 0, input throughput (token/s): 559.93, cuda graph: False [repeated 193x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:29:13] acot_rollout.py:94 - [acot_rollout] active=120 inflight=88 reqs=31177 ctrl=15401 reas=15776 done=136
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:29:14] INFO: 10.29.240.3:37174 - "POST /generate HTTP/1.1" 200 OK [repeated 252x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:29:14] Decode batch, #running-req: 13, #token: 89655, token usage: 0.09, cuda graph: True, gen throughput (token/s): 402.36, #queue-req: 0 [repeated 19x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:15] Prefill batch, #new-seq: 2, #new-token: 148, #cached-token: 21580, token usage: 0.28, #running-req: 14, #queue-req: 0, input throughput (token/s): 272.81, cuda graph: False [repeated 191x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:19] INFO: 10.29.240.3:42702 - "POST /generate HTTP/1.1" 200 OK [repeated 248x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:19] Decode batch, #running-req: 5, #token: 44735, token usage: 0.11, cuda graph: True, gen throughput (token/s): 87.03, #queue-req: 0 [repeated 19x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:20] Prefill batch, #new-seq: 1, #new-token: 161, #cached-token: 16378, token usage: 0.21, #running-req: 8, #queue-req: 0, input throughput (token/s): 541.56, cuda graph: False [repeated 192x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:29:23] acot_rollout.py:94 - [acot_rollout] active=120 inflight=87 reqs=31654 ctrl=15637 reas=16017 done=136
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:29:24] INFO: 10.29.240.3:47496 - "POST /generate HTTP/1.1" 200 OK [repeated 239x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11784) [2026-05-07 08:29:25] Decode batch, #running-req: 9, #token: 86905, token usage: 0.20, cuda graph: True, gen throughput (token/s): 36.39, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:25] Prefill batch, #new-seq: 3, #new-token: 475, #cached-token: 31058, token usage: 0.26, #running-req: 3, #queue-req: 0, input throughput (token/s): 600.19, cuda graph: False [repeated 175x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:30] INFO: 10.29.240.3:39510 - "POST /generate HTTP/1.1" 200 OK [repeated 212x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:30] Decode batch, #running-req: 6, #token: 59467, token usage: 0.14, cuda graph: True, gen throughput (token/s): 67.83, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:29:30] Prefill batch, #new-seq: 1, #new-token: 1, #cached-token: 6398, token usage: 0.13, #running-req: 20, #queue-req: 0, input throughput (token/s): 92.51, cuda graph: False [repeated 183x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:29:33] acot_rollout.py:94 - [acot_rollout] active=120 inflight=80 reqs=32074 ctrl=15840 reas=16234 done=136
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:35] INFO: 10.29.240.3:39544 - "POST /generate HTTP/1.1" 200 OK [repeated 222x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:29:35] Decode batch, #running-req: 17, #token: 104993, token usage: 0.11, cuda graph: True, gen throughput (token/s): 370.41, #queue-req: 0 [repeated 18x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:35] Prefill batch, #new-seq: 1, #new-token: 45, #cached-token: 18283, token usage: 0.32, #running-req: 10, #queue-req: 0, input throughput (token/s): 2443.95, cuda graph: False [repeated 178x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:29:40] INFO: 10.29.240.3:40058 - "POST /generate HTTP/1.1" 200 OK [repeated 200x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:29:40] Decode batch, #running-req: 26, #token: 159431, token usage: 0.16, cuda graph: True, gen throughput (token/s): 449.31, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:29:40] Prefill batch, #new-seq: 1, #new-token: 2, #cached-token: 7027, token usage: 0.11, #running-req: 18, #queue-req: 0, input throughput (token/s): 21.36, cuda graph: False [repeated 170x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:29:43] acot_rollout.py:94 - [acot_rollout] active=120 inflight=77 reqs=32478 ctrl=16041 reas=16437 done=136
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:45] INFO: 10.29.240.3:60716 - "POST /generate HTTP/1.1" 200 OK [repeated 205x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:29:45] Decode batch, #running-req: 17, #token: 103432, token usage: 0.11, cuda graph: True, gen throughput (token/s): 395.47, #queue-req: 0 [repeated 18x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:29:45] Prefill batch, #new-seq: 1, #new-token: 37, #cached-token: 16734, token usage: 0.10, #running-req: 4, #queue-req: 0, input throughput (token/s): 3871.08, cuda graph: False [repeated 165x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:29:50] INFO: 10.29.240.3:44488 - "POST /generate HTTP/1.1" 200 OK [repeated 203x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:29:50] Decode batch, #running-req: 1, #token: 0, token usage: 0.00, cuda graph: True, gen throughput (token/s): 66.46, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:29:50] Prefill batch, #new-seq: 1, #new-token: 81, #cached-token: 10670, token usage: 0.18, #running-req: 4, #queue-req: 0, input throughput (token/s): 886.59, cuda graph: False [repeated 175x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:29:53] acot_rollout.py:94 - [acot_rollout] active=120 inflight=73 reqs=32883 ctrl=16240 reas=16643 done=136
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:29:55] INFO: 10.29.240.3:47366 - "POST /generate HTTP/1.1" 200 OK [repeated 198x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:29:56] Decode batch, #running-req: 10, #token: 66144, token usage: 0.07, cuda graph: True, gen throughput (token/s): 350.35, #queue-req: 0 [repeated 21x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:29:55] Prefill batch, #new-seq: 1, #new-token: 82, #cached-token: 10637, token usage: 0.09, #running-req: 2, #queue-req: 0, input throughput (token/s): 3268.92, cuda graph: False [repeated 149x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:00] INFO: 10.29.240.3:60972 - "POST /generate HTTP/1.1" 200 OK [repeated 52x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:30:01] Decode batch, #running-req: 2, #token: 15961, token usage: 0.02, cuda graph: True, gen throughput (token/s): 60.58, #queue-req: 0 [repeated 13x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091)
[dlc1phd8ui9n87ro-master-0] Rollout generation: 56%|█████▋ | 144/256 [12:16<07:46, 4.17s/it]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:30:03] Prefill batch, #new-seq: 1, #new-token: 36, #cached-token: 12234, token usage: 0.03, #running-req: 0, #queue-req: 0, input throughput (token/s): 3.65, cuda graph: False
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19153) [2026-05-07 08:30:05] INFO: 10.29.240.3:44488 - "POST /generate HTTP/1.1" 200 OK [repeated 10x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:06] Decode batch, #running-req: 4, #token: 23451, token usage: 0.02, cuda graph: True, gen throughput (token/s): 145.41, #queue-req: 0 [repeated 12x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091)
[dlc1phd8ui9n87ro-master-0] Rollout generation: 59%|█████▉ | 152/256 [12:20<05:19, 3.07s/it][2026-05-07 08:30:07] acot_rollout.py:94 - [acot_rollout] active=104 inflight=62 reqs=32990 ctrl=16301 reas=16689 done=152
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:30:07] Prefill batch, #new-seq: 1, #new-token: 30, #cached-token: 10166, token usage: 0.02, #running-req: 2, #queue-req: 0, input throughput (token/s): 38.35, cuda graph: False [repeated 10x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091)
[dlc1phd8ui9n87ro-master-0] Rollout generation: 62%|██████▎ | 160/256 [12:24<03:39, 2.29s/it]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:30:11] INFO: 10.29.240.3:33668 - "POST /generate HTTP/1.1" 200 OK [repeated 11x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:30:11] Decode batch, #running-req: 2, #token: 14667, token usage: 0.02, cuda graph: True, gen throughput (token/s): 22.35, #queue-req: 0 [repeated 10x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:30:11] Prefill batch, #new-seq: 1, #new-token: 3, #cached-token: 6672, token usage: 0.02, #running-req: 1, #queue-req: 0, input throughput (token/s): 53.46, cuda graph: False [repeated 5x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091)
[dlc1phd8ui9n87ro-master-0] Rollout generation: 66%|██████▌ | 168/256 [12:28<02:35, 1.77s/it]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:16] INFO: 10.29.240.3:49426 - "POST /generate HTTP/1.1" 200 OK [repeated 30x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19151) [2026-05-07 08:30:16] Decode batch, #running-req: 6, #token: 56377, token usage: 0.06, cuda graph: True, gen throughput (token/s): 11.02, #queue-req: 0 [repeated 10x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:30:17] acot_rollout.py:94 - [acot_rollout] active=88 inflight=54 reqs=33063 ctrl=16320 reas=16743 done=168
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:19] Prefill batch, #new-seq: 2, #new-token: 84, #cached-token: 27998, token usage: 0.13, #running-req: 3, #queue-req: 0, input throughput (token/s): 3540.53, cuda graph: False [repeated 133x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:21] INFO: 10.29.240.3:37460 - "POST /generate HTTP/1.1" 200 OK [repeated 188x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:21] Decode batch, #running-req: 12, #token: 77736, token usage: 0.08, cuda graph: True, gen throughput (token/s): 276.18, #queue-req: 0 [repeated 21x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:24] Prefill batch, #new-seq: 1, #new-token: 55, #cached-token: 19289, token usage: 0.23, #running-req: 7, #queue-req: 0, input throughput (token/s): 317.62, cuda graph: False [repeated 154x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:26] INFO: 10.29.240.3:37448 - "POST /generate HTTP/1.1" 200 OK [repeated 185x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:26] Decode batch, #running-req: 1, #token: 17119, token usage: 0.04, cuda graph: True, gen throughput (token/s): 63.53, #queue-req: 0 [repeated 18x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:30:27] acot_rollout.py:94 - [acot_rollout] active=88 inflight=52 reqs=33421 ctrl=16495 reas=16926 done=168
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:30] Prefill batch, #new-seq: 1, #new-token: 59, #cached-token: 12833, token usage: 0.09, #running-req: 3, #queue-req: 0, input throughput (token/s): 909.46, cuda graph: False [repeated 162x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:31] INFO: 10.29.240.3:37472 - "POST /generate HTTP/1.1" 200 OK [repeated 173x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:31] Decode batch, #running-req: 2, #token: 26910, token usage: 0.06, cuda graph: True, gen throughput (token/s): 52.52, #queue-req: 0 [repeated 21x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11785) [2026-05-07 08:30:35] Prefill batch, #new-seq: 1, #new-token: 49, #cached-token: 10514, token usage: 0.15, #running-req: 6, #queue-req: 0, input throughput (token/s): 14935.95, cuda graph: False [repeated 143x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:36] INFO: 10.29.240.3:33056 - "POST /generate HTTP/1.1" 200 OK [repeated 154x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:37] Decode batch, #running-req: 14, #token: 91534, token usage: 0.09, cuda graph: True, gen throughput (token/s): 311.02, #queue-req: 0 [repeated 23x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:30:37] acot_rollout.py:94 - [acot_rollout] active=88 inflight=48 reqs=33741 ctrl=16654 reas=17087 done=168
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11784) [2026-05-07 08:30:40] Prefill batch, #new-seq: 1, #new-token: 27, #cached-token: 11347, token usage: 0.08, #running-req: 2, #queue-req: 0, input throughput (token/s): 5042.37, cuda graph: False [repeated 131x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:42] INFO: 10.29.240.3:59366 - "POST /generate HTTP/1.1" 200 OK [repeated 160x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:41] Decode batch, #running-req: 5, #token: 47460, token usage: 0.11, cuda graph: True, gen throughput (token/s): 56.23, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:45] Prefill batch, #new-seq: 1, #new-token: 68, #cached-token: 10716, token usage: 0.05, #running-req: 1, #queue-req: 0, input throughput (token/s): 218.29, cuda graph: False [repeated 132x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:47] INFO: 10.29.240.3:37456 - "POST /generate HTTP/1.1" 200 OK [repeated 148x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:30:47] Decode batch, #running-req: 9, #token: 63678, token usage: 0.07, cuda graph: True, gen throughput (token/s): 249.90, #queue-req: 0 [repeated 21x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:30:47] acot_rollout.py:94 - [acot_rollout] active=88 inflight=41 reqs=34036 ctrl=16795 reas=17241 done=168
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11782) [2026-05-07 08:30:50] Prefill batch, #new-seq: 1, #new-token: 47, #cached-token: 17131, token usage: 0.04, #running-req: 0, #queue-req: 0, input throughput (token/s): 152.36, cuda graph: False [repeated 122x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=11783) [2026-05-07 08:30:52] INFO: 10.29.240.3:49426 - "POST /generate HTTP/1.1" 200 OK [repeated 106x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19151) [2026-05-07 08:30:52] Decode batch, #running-req: 3, #token: 29771, token usage: 0.03, cuda graph: True, gen throughput (token/s): 89.19, #queue-req: 0 [repeated 19x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:55] Prefill batch, #new-seq: 1, #new-token: 1, #cached-token: 7015, token usage: 0.10, #running-req: 13, #queue-req: 0, input throughput (token/s): 44.94, cuda graph: False [repeated 79x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19150) [2026-05-07 08:30:57] INFO: 10.29.240.3:42232 - "POST /generate HTTP/1.1" 200 OK [repeated 96x across cluster]
[dlc1phd8ui9n87ro-master-0] (SGLangEngine pid=19152) [2026-05-07 08:30:57] Decode batch, #running-req: 9, #token: 69421, token usage: 0.07, cuda graph: True, gen throughput (token/s): 214.56, #queue-req: 0 [repeated 20x across cluster]
[dlc1phd8ui9n87ro-master-0] (RolloutManager pid=11091) [2026-05-07 08:30:57] acot_rollout.py:94 - [acot_rollout] active=88 inflight=36 reqs=34226 ctrl=16882 reas=17344 done=168

Create debug93e76f37

Jennny

17 days ago

This comment has been hidden (marked as Resolved)

andre930

Owner 17 days ago

•

edited 1 day ago

ps -fp 11782,11783,11784,11785,19150,19151,19152,19153 -o pid,etimes,cmd

Jennny

11 days ago

•

edited 11 days ago

[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18721) [2026-04-28 09:22:55] Decode batch, #running-req: 1, #token: 0, token usage: 0.00, cuda graph: True, gen throughput (token/s): 33.48, #queue-req: 0 [repeated 15x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11793) [2026-04-28 09:22:55] Prefill batch, #new-seq: 1, #new-token: 38, #cached-token: 12117, token usage: 0.17, #running-req: 5, #queue-req: 0, input throughput (token/s): 173.15, cuda graph: False [repeated 116x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11792) [2026-04-28 09:22:56] INFO: 10.29.240.42:56064 - "POST /generate HTTP/1.1" 200 OK [repeated 155x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11794) [2026-04-28 09:23:00] Decode batch, #running-req: 3, #token: 44593, token usage: 0.10, cuda graph: True, gen throughput (token/s): 36.66, #queue-req: 0 [repeated 21x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11795) [2026-04-28 09:23:00] Prefill batch, #new-seq: 1, #new-token: 56, #cached-token: 12282, token usage: 0.09, #running-req: 2, #queue-req: 0, input throughput (token/s): 4402.72, cuda graph: False [repeated 144x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:01] INFO: 10.29.240.42:47380 - "POST /generate HTTP/1.1" 200 OK [repeated 147x across cluster]
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105) [2026-04-28 09:23:04] acot_rollout.py:93 - [acot_rollout] active=18 inflight=31 reqs=168990 ctrl=83014 reas=85976 done=366
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11794) [2026-04-28 09:23:05] Decode batch, #running-req: 1, #token: 14079, token usage: 0.03, cuda graph: True, gen throughput (token/s): 23.74, #queue-req: 0 [repeated 19x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11795) [2026-04-28 09:23:05] Prefill batch, #new-seq: 1, #new-token: 34, #cached-token: 14495, token usage: 0.07, #running-req: 1, #queue-req: 0, input throughput (token/s): 61.16, cuda graph: False [repeated 136x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:06] INFO: 10.29.240.42:47384 - "POST /generate HTTP/1.1" 200 OK [repeated 149x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:10] Decode batch, #running-req: 12, #token: 87327, token usage: 0.09, cuda graph: True, gen throughput (token/s): 248.22, #queue-req: 0 [repeated 21x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:11] Prefill batch, #new-seq: 1, #new-token: 6, #cached-token: 7847, token usage: 0.11, #running-req: 12, #queue-req: 0, input throughput (token/s): 20.89, cuda graph: False [repeated 119x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:11] INFO: 10.29.240.42:47720 - "POST /generate HTTP/1.1" 200 OK [repeated 142x across cluster]
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105) [2026-04-28 09:23:14] acot_rollout.py:93 - [acot_rollout] active=18 inflight=30 reqs=169256 ctrl=83144 reas=86112 done=366
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:16] Decode batch, #running-req: 7, #token: 39238, token usage: 0.04, cuda graph: True, gen throughput (token/s): 330.77, #queue-req: 0 [repeated 20x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:15] Prefill batch, #new-seq: 1, #new-token: 1, #cached-token: 7112, token usage: 0.05, #running-req: 6, #queue-req: 0, input throughput (token/s): 33.14, cuda graph: False [repeated 101x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:16] INFO: 10.29.240.42:36666 - "POST /generate HTTP/1.1" 200 OK [repeated 106x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:21] INFO: 10.29.240.42:42394 - "POST /generate HTTP/1.1" 200 OK [repeated 8x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:21] Decode batch, #running-req: 1, #token: 8586, token usage: 0.01, cuda graph: True, gen throughput (token/s): 53.95, #queue-req: 0 [repeated 10x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11793) [2026-04-28 09:23:21] Prefill batch, #new-seq: 1, #new-token: 52, #cached-token: 12823, token usage: 0.03, #running-req: 0, #queue-req: 0, input throughput (token/s): 7.71, cuda graph: False
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105) [2026-04-28 09:23:25] acot_rollout.py:93 - [acot_rollout] active=15 inflight=25 reqs=169302 ctrl=83171 reas=86131 done=369
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:27] Decode batch, #running-req: 1, #token: 7711, token usage: 0.01, cuda graph: True, gen throughput (token/s): 33.73, #queue-req: 0 [repeated 6x across cluster]
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105)
[dlczq3cay47mtefu-master-0] Rollout generation: 75%|███████▌ | 96/128 [23:05<06:36, 12.40s/it]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:29] Prefill batch, #new-seq: 1, #new-token: 1, #cached-token: 8054, token usage: 0.01, #running-req: 0, #queue-req: 0, input throughput (token/s): 0.08, cuda graph: False
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18720) [2026-04-28 09:23:29] INFO: 10.29.240.42:45660 - "POST /generate HTTP/1.1" 200 OK [repeated 4x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11795) [2026-04-28 09:23:32] Decode batch, #running-req: 2, #token: 26469, token usage: 0.06, cuda graph: True, gen throughput (token/s): 64.14, #queue-req: 0 [repeated 13x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:34] Prefill batch, #new-seq: 1, #new-token: 4, #cached-token: 6863, token usage: 0.08, #running-req: 10, #queue-req: 0, input throughput (token/s): 19.21, cuda graph: False [repeated 102x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11792) [2026-04-28 09:23:34] INFO: 10.29.240.42:49204 - "POST /generate HTTP/1.1" 200 OK [repeated 109x across cluster]
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105) [2026-04-28 09:23:35] acot_rollout.py:93 - [acot_rollout] active=15 inflight=22 reqs=169441 ctrl=83231 reas=86210 done=369
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18722) [2026-04-28 09:23:37] Decode batch, #running-req: 3, #token: 23573, token usage: 0.02, cuda graph: True, gen throughput (token/s): 87.20, #queue-req: 0 [repeated 21x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11792) [2026-04-28 09:23:39] Prefill batch, #new-seq: 1, #new-token: 26, #cached-token: 16064, token usage: 0.04, #running-req: 0, #queue-req: 0, input throughput (token/s): 47.34, cuda graph: False [repeated 104x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11793) [2026-04-28 09:23:39] INFO: 10.29.240.42:46792 - "POST /generate HTTP/1.1" 200 OK [repeated 112x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=18719) [2026-04-28 09:23:42] Decode batch, #running-req: 10, #token: 64645, token usage: 0.07, cuda graph: True, gen throughput (token/s): 185.35, #queue-req: 0 [repeated 20x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11792) [2026-04-28 09:23:44] Prefill batch, #new-seq: 1, #new-token: 51, #cached-token: 13549, token usage: 0.03, #running-req: 0, #queue-req: 0, input throughput (token/s): 64.94, cuda graph: False [repeated 98x across cluster]
[dlczq3cay47mtefu-master-0] (SGLangEngine pid=11792) [2026-04-28 09:23:44] INFO: 10.29.240.42:49220 - "POST /generate HTTP/1.1" 200 OK [repeated 108x across cluster]
[dlczq3cay47mtefu-master-0] (RolloutManager pid=11105) [2026-04-28 09:23:45] acot_rollout.py:93 - [acot_rollout] active=15 inflight=21 reqs=169658 ctrl=83338 reas=86320 done=369

Jennny

11 days ago

This comment has been hidden (marked as Resolved)

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

Ready to merge

This branch is ready to get merged automatically.

· Sign up or log in to comment