| { |
| "_class_name": "AutoencoderKLWan", |
| "_diffusers_version": "0.37.0.dev0", |
| "attn_scales": [], |
| "base_dim": 3, |
| "decoder_base_dim": null, |
| "dim_mult": [ |
| 1, |
| 1, |
| 1, |
| 1 |
| ], |
| "dropout": 0.0, |
| "in_channels": 3, |
| "is_residual": false, |
| "latents_mean": [ |
| -0.7571, |
| -0.7089, |
| -0.9113, |
| 0.1075, |
| -0.1745, |
| 0.9653, |
| -0.1517, |
| 1.5508, |
| 0.4134, |
| -0.0715, |
| 0.5517, |
| -0.3632, |
| -0.1922, |
| -0.9497, |
| 0.2503, |
| -0.2921 |
| ], |
| "latents_std": [ |
| 2.8184, |
| 1.4541, |
| 2.3275, |
| 2.6558, |
| 1.2196, |
| 1.7708, |
| 2.6052, |
| 2.0743, |
| 3.2687, |
| 2.1526, |
| 2.8652, |
| 1.5579, |
| 1.6382, |
| 1.1253, |
| 2.8251, |
| 1.916 |
| ], |
| "num_res_blocks": 1, |
| "out_channels": 3, |
| "patch_size": null, |
| "scale_factor_spatial": 8, |
| "scale_factor_temporal": 4, |
| "temperal_downsample": [ |
| false, |
| true, |
| true |
| ], |
| "z_dim": 16 |
| } |
|
|