{ "architectures": [ "MixtureOfRecursions" ], "model_type": "mixture_of_recursions", "vocab_size": 31985, "d_model": 384, "hidden_size": 384, "n_layers": 12, "num_hidden_layers": 12, "n_heads": 6, "num_attention_heads": 6, "dim_feedforward": 2048, "intermediate_size": 2048, "max_seq_len": 128, "max_position_embeddings": 128, "max_steps": 4, "dropout": 0.1, "router_type": "adaptive", "padding_idx": 0, "pos_encoding": "learned", "torch_dtype": "float32", "transformers_version": "4.57.0", "auto_map": { "AutoConfig": "model_slm.MixtureOfRecursionsConfig", "AutoModel": "model_slm.MixtureOfRecursions", "AutoModelForCausalLM": "model_slm.MixtureOfRecursions" } }