| from typing import List |
|
|
| import pandas as pd |
| from sentence_transformers.util import cos_sim |
|
|
| from utils.models import ModelWithPooling |
|
|
|
|
| def p0_originality(df: pd.DataFrame, model_name: str, pooling: str) -> pd.DataFrame: |
| """ |
| row-wise |
| :param df: |
| :param model_name: |
| :return: |
| """ |
| assert 'prompt' in df.columns |
| assert 'response' in df.columns |
| model = ModelWithPooling(model_name) |
|
|
| def get_cos_sim(prompt: str, response: str) -> float: |
| prompt_vec = model(text=prompt, pooling=pooling) |
| response_vec = model(text=response, pooling=pooling) |
| score = cos_sim(prompt_vec, response_vec).item() |
| return score |
|
|
| df['originality'] = df.apply(lambda x: 1 - get_cos_sim(x['prompt'], x['response']), axis=1) |
| return df |
|
|
|
|
| def p1_flexibility(df: pd.DataFrame, model_name: str, pooling: str) -> pd.DataFrame: |
| """ |
| group-wise |
| :param df: |
| :param model_name: |
| :return: |
| """ |
| assert 'prompt' in df.columns |
| assert 'response' in df.columns |
| assert 'id' in df.columns |
| model = ModelWithPooling(model_name) |
|
|
| def get_flexibility(responses: List[str]) -> float: |
| responses_vec = [model(text=_, pooling=pooling) for _ in responses] |
| score = 0 |
| for i in range(len(responses_vec) - 1): |
| score += 1 - cos_sim(responses_vec[i], responses_vec[i + 1]).item() |
| return score |
|
|
| df_out = df.groupby(by=['id', 'prompt']) \ |
| .agg({'id': 'first', 'prompt': 'first', 'response': get_flexibility}) \ |
| .rename(columns={'response': 'flexibility'}) \ |
| .reset_index(drop=True) |
| return df_out |
|
|
|
|
| if __name__ == '__main__': |
| _df_input = pd.read_csv('data/tmp/example_3.csv') |
| _df_0 = p0_originality(_df_input, 'paraphrase-multilingual-MiniLM-L12-v2') |
| _df_1 = p1_flexibility(_df_input, 'paraphrase-multilingual-MiniLM-L12-v2') |
|
|