{ "aggregator_dropout": 0.1, "aggregator_type": "multibranch", "calibrator_method": "spline", "d_feedforward": 512, "d_model": 256, "encoder_dropout": 0.1, "encoder_pooling": "cls", "encoder_type": "transformer", "head_dropout": 0.1, "head_hidden_dim": 128, "max_seq_len": 128, "n_attention_branches": 3, "n_difficulty_classes": 5, "n_encoder_layers": 4, "n_heads": 4, "star_ranges": { "0": [ 1, 5 ], "1": [ 1, 7 ], "2": [ 1, 8 ], "3": [ 1, 10 ], "4": [ 1, 10 ] }, "stochastic_mask_prob": 0.3, "top_k_ratio": 0.1 }