{ "batcher": null, "cacher": null, "compiler": "torch_compile", "factorizer": null, "kernel": null, "pruner": null, "quantizer": "hqq", "hqq_backend": "torchao_int4", "hqq_compute_dtype": "torch.bfloat16", "hqq_force_hf_implementation": false, "hqq_group_size": 128, "hqq_use_torchao_kernels": true, "hqq_weight_bits": 4, "torch_compile_backend": "inductor", "torch_compile_dynamic": false, "torch_compile_fullgraph": true, "torch_compile_make_portable": false, "torch_compile_max_kv_cache_size": 800, "torch_compile_mode": "default", "torch_compile_seqlen_manual_cuda_graph": 400, "torch_compile_target": "module_list", "batch_size": 1, "device": "cuda:0", "device_map": null, "save_fns": [ "hqq", "save_before_apply" ], "load_fns": [ "hqq" ], "reapply_after_load": { "factorizer": null, "pruner": null, "quantizer": null, "kernel": null, "cacher": null, "compiler": "torch_compile", "batcher": null } }