{ "activation": "swiglu", "bias": false, "dim_feedforward": 4096, "dpt_features": 256, "dpt_out_channels": [ 256, 512, 1024, 1024 ], "dpt_out_layers": [ 2, 5, 8, 11 ], "dropout": 0.0, "include_alpha": false, "latent_dim": 1024, "norm_first": true, "norm_type": "rms_norm", "num_heads": 8, "num_layers": 12, "num_register_tokens": 32, "patch_size": 8, "pe_type": "rope", "qk_norm": true, "rope_double_max_freq": true, "rope_type": "triangle", "texture_channels": 13, "texture_encode_patch_size": 1, "texture_encoder_norm_type": "rms_norm", "turn_to_cam_coord": true, "use_dpt_decoder": true, "use_ldr": false, "use_vn_encoder": true, "vdir_num_freqs": 0, "vdir_pe_type": "nerf", "vertex_pe_num_freqs": 12, "view_indep_qk_norm": true, "view_transformer_ffn_hidden_dim": 4096, "view_transformer_include_self_attn": true, "view_transformer_latent_dim": 1024, "view_transformer_n_heads": 8, "view_transformer_n_layers": 12, "view_transformer_use_swin_attn": true, "vn_encoder_norm_type": "rms_norm", "vn_pe_num_freqs": 6 }