| | import os |
| | import math |
| | import torch |
| | from torch import nn |
| | from functools import partial |
| | import torch.nn.functional as F |
| |
|
| |
|
| | class Adapter_Template(nn.Module): |
| | def __init__(self, config): |
| | super().__init__() |
| | self.gradient_checkpointing = False |
| | |
| | def freeze_module(self, module): |
| | for p in module.parameters(): |
| | p.requires_grad = False |
| |
|
| | def forward(self, inputs, add_start_end=True): |
| | input_ids, hidden_states, targets, attn_mask, loss_mask = inputs |
| | image_features = self.forward_adapter_modules(hidden_states) |
| | return (input_ids, image_features, targets, attn_mask, loss_mask) |
| | |
| |
|
| | class Adapter_AIM(Adapter_Template): |
| | |
| | def __init__(self, config): |
| | super().__init__(config) |
| |
|
| | self.p0 = nn.Sequential( |
| | nn.LayerNorm(config.vision_config.hidden_size), |
| | nn.Linear(config.vision_config.hidden_size, config.intermediate_size), |
| | nn.GELU(), |
| | nn.Linear(config.intermediate_size, config.intermediate_size), |
| | nn.GELU(), |
| | ) |
| | self.proj = nn.Linear(config.intermediate_size, config.vision_config.proj_output_dim) |
| | self.retained_feature_size = int(config.retained_image_size/config.vision_config.patch_size) |
| | self.retained_border_size = int((config.vision_config.image_size-config.retained_image_size)/2/config.vision_config.patch_size) |
| |
|
| | def freeze(self): |
| | self.freeze_module(self.p0) |
| | self.freeze_module(self.proj) |
| |
|
| | def pixel_shuffle(self, x, scale_factor=0.5): |
| | n, w, h, c = x.size() |
| | |
| | x = x.reshape(n, w, int(h * scale_factor), int(c / scale_factor)) |
| | |
| | x = x.permute(0, 2, 1, 3).contiguous() |
| | |
| | x = x.view(n, int(h * scale_factor), int(w * scale_factor), |
| | int(c / (scale_factor * scale_factor))) |
| | return x |
| |
|
| | def forward_adapter_modules(self, hidden_states): |
| | h = w = int(hidden_states.shape[1] ** 0.5) |
| | hidden_states = hidden_states.reshape(hidden_states.shape[0], h, w, -1) |
| | hidden_states = hidden_states[:, self.retained_border_size:self.retained_border_size+self.retained_feature_size, self.retained_border_size:self.retained_border_size+self.retained_feature_size, :] |
| | hidden_states = hidden_states.reshape(hidden_states.shape[0], -1, hidden_states.shape[-1]) |
| |
|
| | hidden_states = self.proj(self.p0(hidden_states)) |
| |
|
| | return hidden_states |