Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generated Robustness Evaluation Sample Scripts #117

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
78 changes: 78 additions & 0 deletions prompttools/utils/robustness_score.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import promptbanch as pb
from promptbench.attack import Attack
from promptbench.models import LLMModel
import textattack
import tensorflow as tf
import tensorflow_hub as tf_hub
import numpy as np
from sklearn.metrics import accuracy_score
from pb.metrics import bleu_score
from pb.metrics import math_score

LABEL_SET = {
# 'positive\'', 'negative\'' is used for label constraint due to a bug of TextAttack repo.
'sst2': ['positive', 'negative', 'positive\'', 'negative\'', '0', '1', '0\'', '1\''],
'mnli': MNLI_LABEL,
'mnli_mismatched': MNLI_LABEL,
'mnli_matched': MNLI_LABEL,
'qqp': EQ_LABEL,
'qnli': ENTAIL_LABEL,
'rte': ENTAIL_LABEL,
'cola': ['unacceptable', 'acceptable', 'unacceptable\'', 'acceptable\''],
'mrpc': EQ_LABEL,
'wnli': ENTAIL_LABEL,
'mmlu': ['A', 'B', 'C', 'D', 'A\'', 'B\'', 'C\'', 'D\'', 'a', 'b', 'c', 'd', 'a\'', 'b\'', 'c\'', 'd\''],
# do not change the word 'nothing' in prompts.
'squad_v2': ['unanswerable', 'unanswerable\''],
'iwslt': ['translate', 'translate\''],
'un_multi': ['translate', 'translate\''],
'math': ['math', 'math\''],
'bool_logic': ['True', 'False', 'True\'', 'False\'', "bool", "boolean", "bool\'", "boolean\'"],
'valid_parentheses': ['Valid', 'Invalid', 'Valid\'', 'Invalid\'', 'matched', 'matched\'', 'valid', 'invalid', 'valid\'', 'invalid\''],
}

tasks = ["classification","translation","math"]
def eval_func(prompts, dataset, model, gts, task):
preds = []
scores = {}
for prompt in prompts:
for d in dataset:
input_text = pb.InputProcess.basic_format(prompt, d)
raw_output = model(input_text)
preds.append(output)
if task == "classification":
scores[prompt] = accuracy_score(gts,preds)
elif task == "translation":
scores[prompt] = bleu_score(gts,preds)
elif task == "math":
scores[prompt] = math_score(dataset,gts,preds)
return scores

def calculate_robustness_score(task = None,model,prompts,ground_truth_labels,is_attack = False,attack = 'stresstest',dataset, percentage_dataset = 1.0):

if task is None or task not in tasks:
raise Exception('Please enter one of the following tasks: \
(classification,transaltion,math).')

try:
llm = LLMModel(model)
except NameError:
print("Unable to support this model: Please input one of the following models: ",pb.SUPPORTED_MODELS)

try:
model_dataset = pd.DatasetLoader.load_dataset(dataset)
except NameError:
print("Unable to support this dataset: Please input one of the following dataset: ",pb.SUPPORTED_DATASETS)

if is_attack:
dataset = dataset[:int(percentage*len(dataset))]
unmodifiable_words = LABEL_SET[dataset]
attack = Attack(llm, attack, dataset, prompt, eval_func, unmodifiable_words, verbose = True)

print(attack.attack())
return

return eval_func(prompts, dataset, model, gts, task)



4 changes: 2 additions & 2 deletions prompttools/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = '0.0.44a0+7d36bb0'
git_version = '7d36bb0922948f299ace033d0de58590ae4254f3'
__version__ = '0.0.44a0+3e52fab'
git_version = '3e52fabcf1e2c3fabf7cf95ce256ca890be22ee1'
36 changes: 36 additions & 0 deletions robustness_evaluation_part_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import promptbench as pb
from promptbench.models import LLMModel
from promptbench.attack import Attack
import numpy as np
import textattack
import tensorflow as tf
import tensorflow_hub as tb_hub

model = LLMModel(name="gpt-3.5-turbo")
dataset = pb.DatasetLoader.load_dataset("sst2")

dataset = dataset[:10]

def proj_func(pred):
mapping = {
"positive": 1,
"negative": 0
}
return mapping.get(pred, -1)

def eval_func(prompt, dataset, model):
preds = []
labels = []
for d in dataset:
input_text = pb.InputProcess.basic_format(prompt, d)
raw_output = model(input_text)

output = pb.OutputProcess.cls(raw_output, proj_func)
preds.append(output)

labels.append(d["label"])
return pb.Eval.compute_cls_accuracy(preds,labels)

unmodifiable_words = ["positive\'", "negative\'", "content"]
attack = Attack(model, "stresstest", dataset, prompt, eval_func, unmodifiable_words, verbose = True)
print(attack.attack())
35 changes: 35 additions & 0 deletions robustness_evaluation_part_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import promptbench as pb
from promptbench.models import LLMModel
from tqdm import tqdm

model_t5 = LLMModel(name="gpt-3.5-turbo",max_new_tokens=10, temperature=0.0001, device='cuda')
dataset = pb.DatasetLoader.load_dataset("sst2")
prompts = pb.Prompt(["Classify the sentence as positive or negative: {content}",
"Determine the emotion of the following sentence as positive or negative: {content}"
])
dataset = dataset[:5]

def proj_func(pred):
mapping = {
"positive": 1,
"negative": 0
}
return mapping.get(pred, -1)

def eval_func(prompts, dataset, model):
for prompt in prompts:
preds = []
labels = []
for data in tqdm(dataset):
input_text = pb.InputProcess.basic_format(prompt, data)
label = data['label']
raw_pred = model(input_text)

pred = pb.OutputProcess.cls(raw_pred, proj_func)
preds.append(pred)
labels.append(label)

score = pb.Eval.compute_cls_accuracy(preds, labels)
return score

print(eval_func(prompts, dataset, model))