| { |
| "architectures": [ |
| "MiniCPMForCausalLM" |
| ], |
| "attention_bias": false, |
| "attention_dropout": 0.0, |
| "auto_map": { |
| "AutoConfig": "configuration_minicpm.MiniCPMConfig", |
| "AutoModel": "modeling_minicpm.MiniCPMForCausalLM", |
| "AutoModelForCausalLM": "modeling_minicpm.MiniCPMForCausalLM", |
| "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMForCausalLM", |
| "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification" |
| }, |
| "bos_token_id": 1, |
| "dim_model_base": 256, |
| "eos_token_id": 73440, |
| "hidden_act": "silu", |
| "hidden_size": 4096, |
| "initializer_range": 0.1, |
| "intermediate_size": 16384, |
| "max_position_embeddings": 65536, |
| "model_type": "minicpm", |
| "mup_denominator": 32, |
| "num_attention_heads": 32, |
| "num_hidden_layers": 32, |
| "num_key_value_heads": 2, |
| "pad_token_id": 73440, |
| "pretraining_tp": 1, |
| "rms_norm_eps": 1e-06, |
| "rope_scaling": { |
| "long_factor": [ |
| 0.9982316082870437, |
| 1.033048153422584, |
| 1.0749920956484724, |
| 1.1255096879436193, |
| 1.1863348602111476, |
| 1.259543828902579, |
| 1.3476188888731149, |
| 1.4535223827776373, |
| 1.5807816745852985, |
| 1.7335856049489526, |
| 1.9168922912975785, |
| 2.1365471404135326, |
| 2.3994084200118646, |
| 2.713475511863602, |
| 3.0880118452194134, |
| 3.533650295140154, |
| 4.062463396503134, |
| 4.687974098908333, |
| 5.425075306704039, |
| 6.289818967956352, |
| 7.29902962722721, |
| 8.469695779093664, |
| 9.81809877306655, |
| 11.358657902065282, |
| 13.102505860712087, |
| 15.055862949967128, |
| 17.218348131364184, |
| 19.581439255386453, |
| 22.127353314656723, |
| 24.828633849376587, |
| 27.6486820771775, |
| 30.54334096108829, |
| 33.46345345363812, |
| 36.358112337548896, |
| 39.17816056534983, |
| 41.879441100069684, |
| 44.425355159339965, |
| 46.78844628336223, |
| 48.95093146475928, |
| 50.90428855401433, |
| 52.648136512661125, |
| 54.18869564165987, |
| 55.537098635632745, |
| 56.7077647874992, |
| 57.71697544677006, |
| 58.58171910802236, |
| 59.31882031581807, |
| 59.94433101822328, |
| 60.47314411958625, |
| 60.918782569507, |
| 61.29331890286281, |
| 61.60738599471455, |
| 61.87024727431288, |
| 62.089902123428836, |
| 62.27320880977746, |
| 62.42601274014111, |
| 62.55327203194878, |
| 62.65917552585329, |
| 62.74725058582382, |
| 62.82045955451526, |
| 62.88128472678279, |
| 62.931802319077946, |
| 62.97374626130382, |
| 63.008562806439365 |
| ], |
| "original_max_position_embeddings": 65536, |
| "rope_type": "longrope", |
| "short_factor": [ |
| 0.9982316082870437, |
| 1.033048153422584, |
| 1.0749920956484724, |
| 1.1255096879436193, |
| 1.1863348602111476, |
| 1.259543828902579, |
| 1.3476188888731149, |
| 1.4535223827776373, |
| 1.5807816745852985, |
| 1.7335856049489526, |
| 1.9168922912975785, |
| 2.1365471404135326, |
| 2.3994084200118646, |
| 2.713475511863602, |
| 3.0880118452194134, |
| 3.533650295140154, |
| 4.062463396503134, |
| 4.687974098908333, |
| 5.425075306704039, |
| 6.289818967956352, |
| 7.29902962722721, |
| 8.469695779093664, |
| 9.81809877306655, |
| 11.358657902065282, |
| 13.102505860712087, |
| 15.055862949967128, |
| 17.218348131364184, |
| 19.581439255386453, |
| 22.127353314656723, |
| 24.828633849376587, |
| 27.6486820771775, |
| 30.54334096108829, |
| 33.46345345363812, |
| 36.358112337548896, |
| 39.17816056534983, |
| 41.879441100069684, |
| 44.425355159339965, |
| 46.78844628336223, |
| 48.95093146475928, |
| 50.90428855401433, |
| 52.648136512661125, |
| 54.18869564165987, |
| 55.537098635632745, |
| 56.7077647874992, |
| 57.71697544677006, |
| 58.58171910802236, |
| 59.31882031581807, |
| 59.94433101822328, |
| 60.47314411958625, |
| 60.918782569507, |
| 61.29331890286281, |
| 61.60738599471455, |
| 61.87024727431288, |
| 62.089902123428836, |
| 62.27320880977746, |
| 62.42601274014111, |
| 62.55327203194878, |
| 62.65917552585329, |
| 62.74725058582382, |
| 62.82045955451526, |
| 62.88128472678279, |
| 62.931802319077946, |
| 62.97374626130382, |
| 63.008562806439365 |
| ] |
| }, |
| "rope_theta": 10000.0, |
| "scale_depth": 1.4, |
| "scale_emb": 12, |
| "sparse_config": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.52.4", |
| "use_cache": false, |
| "vocab_size": 73448 |
| } |
|
|