See axolotl config

axolotl version: 0.9.2

# ============= SFT PRODUCTION (4M ShareGPT) =============
base_model: /leonardo_work/EUHPC_A04_045/training/ale_outputs/opendata-zagreus-sft-final #giux78/zagreus-test-202000
strict: false
output_dir: ./ale_outputs/zagreus-350M-sft-all-union-fixed
seed: 42

# ---- Dataset ----
datasets:
  - path: /leonardo_work/EUHPC_A04_045/training/.data_is_zagreus  # /leonardo_work/EUHPC_A04_045/.data   
    type: chat_template
    field_messages: conversations
    message_property_mappings:
      role: from
      content: value
    roles:
      user: ["human", "user"]
      assistant: ["gpt", "assistant"]
      system: ["system"]
      tool: ["tool"]
    roles_to_train: ["assistant"]   # loss solo sui turni assistant
    train_on_eos: turn              # predici <|eot_id|> a fine risposta assistant

# (opzionale ma consigliato: cache pretokenizzata tra run)
dataset_prepared_path: ./ale_outputs/dataset_cache/zagreus_all_data_sft_fixed_is_zagreus

# default_system_message: "Sei un assistente utile."

# ---- Chat template (Llama-3.2 style) ----
chat_template_jinja: | 
        {%- if enable_thinking is not defined -%}
           {%- set enable_thinking = true -%}
        {%- endif -%}

        {%- if enable_thinking -%}
          {%- set reasoning_mode = "/think" -%}
        {%- else -%}
          {%- set reasoning_mode = "/no_think" -%}
        {%- endif -%}

        {{- "<|im_start|>system\n" -}}

        {%- if messages[0].role == "system" -%}
          {%- set system_message = messages[0].content -%}
          {%- if "/no_think" in system_message -%}
            {%- set reasoning_mode = "/no_think" -%}
          {%- elif "/think" in system_message -%}
            {%- set reasoning_mode = "/think" -%}
          {%- endif -%}
          {%- set custom_instructions = system_message.replace("/no_think", "").replace("/think", "").rstrip() -%}
        {%- endif -%}

        {%- if "/system_override" in system_message -%}
          {{- custom_instructions.replace("/system_override", "").rstrip() -}}
          {{- "<|im_end|>\n" -}}
        {%- else -%}
          {{- "## Metadata\n\n" -}}
          {{- "Knowledge Cutoff Date: June 2025\n" -}}
          {%- set today = strftime_now("%d %B %Y") -%}
          {{- "Today Date: " ~ today ~ "\n" -}}
          {{- "Reasoning Mode: " + reasoning_mode + "\n\n" -}}
          
          {{- "## Custom Instructions\n\n" -}}
          {%- if custom_instructions -%}
            {{- custom_instructions + "\n\n" -}}
          {%- elif reasoning_mode == "/think" -%}
            {{- "Sei un assistente IA disponibile di nome Zagreus, addestrata da mii-llm. Il tuo ruolo come assistente comporta l'esplorazione approfondita delle domande mediante un processo di pensiero sistematico prima di fornire soluzioni finali precise e accurate. Ciò richiede di impegnarti in un ciclo completo di analisi, sintesi, esplorazione, rivalutazione, riflessione, ripercorrimento dei passi e iterazione per sviluppare un processo di pensiero ben ponderato. Struttura la tua risposta in due sezioni principali: Pensiero e Soluzione, usando il formato specificato: <think> Sezione Pensiero </think> Sezione Soluzione. Nella sezione Pensiero, dettaglia il tuo processo di ragionamento per passi. Ogni passo dovrebbe includere considerazioni dettagliate come l'analisi delle domande, la sintesi dei risultati pertinenti, la generazione di nuove idee, la verifica dell'accuratezza dei passaggi correnti, il perfezionamento di eventuali errori e la revisione dei passi precedenti. Nella sezione Soluzione, sulla base dei vari tentativi, delle esplorazioni e delle riflessioni della sezione Pensiero, presenta in modo sistematico la soluzione finale che ritieni corretta. La sezione Soluzione dovrebbe essere logica, accurata e concisa e descrivere i passaggi necessari per arrivare alla conclusione.\n\n" -}}
          {%- else -%}
            {{- "Sei un assistente utile di nome Zagreus allenato da mii-llm.\n\n" -}}
          {%- endif -%}
          
          {{- "## Tools\n\n" -}}
          {{- "### XML Tools\n\n" -}}
          {%- if tools -%}
            {%- set ns = namespace(xml_tool_string="You may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n") -%}
            {%- for tool in tools -%}
              {%- set ns.xml_tool_string = ns.xml_tool_string ~ (tool | tojson) ~ "\n" -%}
            {%- endfor -%}
            {%- set xml_tools = ns.xml_tool_string + "</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags." -%}
          {%- endif -%}
          {%- if xml_tools -%}
            {{- xml_tools -}}
          {%- else -%}
            {{- "None"  -}}
          {%- endif -%}
          {{- "\n\n" -}}
          {{- "### Python Tools\n\n" -}}
          {%- if python_tools -%}
            {{- python_tools -}}
          {%- else -%}
            {{- "None"  -}}
          {%- endif -%}
          {{- "\n\n" -}}
          {{- "<|im_end|>\n" -}}
        {%- endif -%}

        {%- for message in messages -%}
            {%- set content = message.content if message.content is string else "" -%}
            {%- if message.role == "user" -%}
                {{ "<|im_start|>" + message.role + "\n"  + content + "<|im_end|>\n" }}
            {%- elif message.role == "assistant" -%}
                {%- if reasoning_mode == "/think" -%}
                    {{ "<|im_start|>assistant\n" + content.lstrip("\n") + "<|im_end|>\n" }}
                {%- else -%}
                    {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" + content.lstrip("\n") + "<|im_end|>\n" }}
                {%- endif -%}
            {%- elif message.role == "tool" -%}
            {{ "<|im_start|>" + "user\n"  + content + "<|im_end|>\n" }}
            {%- endif -%}
        {%- endfor -%}

        {%- if add_generation_prompt -%}
            {%- if reasoning_mode == "/think" -%}
                {{ "<|im_start|>assistant\n" }}
            {%- else -%}
                {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n"  }}
            {%- endif -%}
        {%- endif -%}



# ---- Training ----
sequence_len: 4096
sample_packing: true            # ON per efficienza
eval_sample_packing: true
pad_to_sequence_len: true

optimizer: adamw_torch_fused
lr_scheduler: constant
learning_rate: 1.0e-3
#warmup_ratio: 0.03              # ~3% dei passi totali
#weight_decay: 0.01
max_grad_norm: 1.0

# 32 GPU totali -> eff. batch = 1 * 8 * 32 = 256
micro_batch_size: 1
gradient_accumulation_steps: 8

num_epochs: 1.0                 # 1 epoca completa su 4M conv
# (alternativa: usa max_steps se vuoi fermarti prima)

# ---- Precisione & memoria ----
bf16: auto
flash_attention: true
gradient_checkpointing: true

# ---- Log/Eval/Save ----
logging_steps: 20
eval_strategy: steps
eval_steps: 300                 # ~7-8 eval/epoca
save_strategy: steps
save_steps: 500                 # ~3 checkpoint/epoca
save_total_limit: 4
# (opzionale) val_set_size: 10000   # se vuoi split automatico dal dataset

# ---- FSDP multi-nodo ----
fsdp_config:
  fsdp_sharding_strategy: FULL_SHARD
  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
  fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
  fsdp_backward_prefetch_policy: BACKWARD_PRE
  fsdp_state_dict_type: FULL_STATE_DICT

# ---- Token speciali (coerenti col tokenizer del base_model) ----
special_tokens:
  pad_token: <|im_end|>
  eos_token: <|im_end|>

tokens:
   - <|im_start|>
   - <|im_end|>
   - <tool_response>
   - </tool_response>
   - <tool_call>
   - </tool_call>
   - <code>
   - </code>

ale_outputs/zagreus-350M-sft-all-union-fixed

This model was trained from scratch on the None dataset.

Model description

More information needed

Intended uses & limitations

More information needed

Training and evaluation data

More information needed

Training procedure

Training hyperparameters

The following hyperparameters were used during training:

learning_rate: 0.001
train_batch_size: 1
eval_batch_size: 1
seed: 42
distributed_type: multi-GPU
num_devices: 32
gradient_accumulation_steps: 8
total_train_batch_size: 256
total_eval_batch_size: 32
optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
lr_scheduler_type: constant
lr_scheduler_warmup_steps: 100
num_epochs: 1.0

Training results

Framework versions

Transformers 4.56.2
Pytorch 2.5.1+cu121
Datasets 3.5.1
Tokenizers 0.22.1

Downloads last month: 21

Safetensors

Model size

0.4B params

Tensor type

BF16

giux78
/

zagreus-350M-sft-all-union-fixed

ale_outputs/zagreus-350M-sft-all-union-fixed

Model description

Intended uses & limitations

Training and evaluation data

Training procedure

Training hyperparameters

Training results

Framework versions

Evaluation results