hegelai · RigvedRocks · Dec 28, 2023 · Jan 2, 2024
diff --git a/prompttools/utils/robustness_score.py b/prompttools/utils/robustness_score.py
@@ -0,0 +1,78 @@
+import promptbanch as pb 
+from promptbench.attack import Attack
+from promptbench.models import LLMModel 
+import textattack
+import tensorflow as tf   
+import tensorflow_hub as tf_hub   
+import numpy as np
+from sklearn.metrics import accuracy_score 
+from pb.metrics import bleu_score
+from pb.metrics import math_score
+
+LABEL_SET = {
+    # 'positive\'', 'negative\'' is used for label constraint due to a bug of TextAttack repo.
+    'sst2': ['positive', 'negative', 'positive\'', 'negative\'', '0', '1', '0\'', '1\''],
+    'mnli': MNLI_LABEL,
+    'mnli_mismatched': MNLI_LABEL,
+    'mnli_matched': MNLI_LABEL,
+    'qqp': EQ_LABEL,
+    'qnli': ENTAIL_LABEL,
+    'rte': ENTAIL_LABEL,
+    'cola': ['unacceptable', 'acceptable', 'unacceptable\'', 'acceptable\''],
+    'mrpc': EQ_LABEL,
+    'wnli': ENTAIL_LABEL,
+    'mmlu': ['A', 'B', 'C', 'D', 'A\'', 'B\'', 'C\'', 'D\'', 'a', 'b', 'c', 'd', 'a\'', 'b\'', 'c\'', 'd\''],
+    # do not change the word 'nothing' in prompts.
+    'squad_v2': ['unanswerable', 'unanswerable\''],
+    'iwslt': ['translate', 'translate\''],
+    'un_multi': ['translate', 'translate\''],
+    'math': ['math', 'math\''],
+    'bool_logic': ['True', 'False', 'True\'', 'False\'', "bool", "boolean", "bool\'", "boolean\'"],
+    'valid_parentheses': ['Valid', 'Invalid', 'Valid\'', 'Invalid\'', 'matched', 'matched\'', 'valid', 'invalid', 'valid\'', 'invalid\''],
+}
+
+tasks = ["classification","translation","math"]
+def eval_func(prompts, dataset, model, gts, task):
+    preds = []
+    scores = {}
+    for prompt in prompts:
+        for d in dataset:
+            input_text = pb.InputProcess.basic_format(prompt, d)
+            raw_output = model(input_text)
+            preds.append(output)
+        if task == "classification":
+            scores[prompt] = accuracy_score(gts,preds)
+        elif task == "translation":
+            scores[prompt] = bleu_score(gts,preds)
+        elif task == "math":
+            scores[prompt] = math_score(dataset,gts,preds)
+    return scores
+
+def calculate_robustness_score(task = None,model,prompts,ground_truth_labels,is_attack = False,attack = 'stresstest',dataset, percentage_dataset = 1.0):
+
+    if task is None or task not in tasks:
+        raise Exception('Please enter one of the following tasks: \
+                        (classification,transaltion,math).')
+
+    try:
+        llm = LLMModel(model)
+    except NameError:
+        print("Unable to support this model: Please input one of the following models: ",pb.SUPPORTED_MODELS)
+
+    try:
+        model_dataset = pd.DatasetLoader.load_dataset(dataset)
+    except NameError:
+        print("Unable to support this dataset: Please input one of the following dataset: ",pb.SUPPORTED_DATASETS)
+
+    if is_attack:
+        dataset = dataset[:int(percentage*len(dataset))]
+        unmodifiable_words = LABEL_SET[dataset]
+        attack = Attack(llm, attack, dataset, prompt, eval_func, unmodifiable_words, verbose = True)
+
+        print(attack.attack())
+        return 
+
+    return eval_func(prompts, dataset, model, gts, task)
+
+
+
diff --git a/prompttools/version.py b/prompttools/version.py
@@ -1,2 +1,2 @@
-__version__ = '0.0.44a0+7d36bb0'
-git_version = '7d36bb0922948f299ace033d0de58590ae4254f3'
+__version__ = '0.0.44a0+3e52fab'
+git_version = '3e52fabcf1e2c3fabf7cf95ce256ca890be22ee1'
diff --git a/robustness_evaluation_part_1.py b/robustness_evaluation_part_1.py
@@ -0,0 +1,36 @@
+import promptbench as pb
+from promptbench.models import LLMModel
+from promptbench.attack import Attack
+import numpy as np
+import textattack
+import tensorflow as tf
+import tensorflow_hub as tb_hub
+
+model = LLMModel(name="gpt-3.5-turbo")
+dataset = pb.DatasetLoader.load_dataset("sst2")
+
+dataset = dataset[:10]
+
+def proj_func(pred):
+    mapping = {
+        "positive": 1,
+        "negative": 0
+    }
+    return mapping.get(pred, -1)
+
+def eval_func(prompt, dataset, model):
+    preds = []
+    labels = []
+    for d in dataset:
+        input_text = pb.InputProcess.basic_format(prompt, d)
+        raw_output = model(input_text)
+
+        output = pb.OutputProcess.cls(raw_output, proj_func)
+        preds.append(output)
+
+        labels.append(d["label"])
+    return pb.Eval.compute_cls_accuracy(preds,labels)
+
+unmodifiable_words = ["positive\'", "negative\'", "content"]
+attack = Attack(model, "stresstest", dataset, prompt, eval_func, unmodifiable_words, verbose = True)
+print(attack.attack())
diff --git a/robustness_evaluation_part_2.py b/robustness_evaluation_part_2.py
@@ -0,0 +1,35 @@
+import promptbench as pb
+from promptbench.models import LLMModel
+from tqdm import tqdm
+
+model_t5 = LLMModel(name="gpt-3.5-turbo",max_new_tokens=10, temperature=0.0001, device='cuda')
+dataset = pb.DatasetLoader.load_dataset("sst2")
+prompts = pb.Prompt(["Classify the sentence as positive or negative: {content}",
+                     "Determine the emotion of the following sentence as positive or negative: {content}"
+                     ])
+dataset = dataset[:5]
+
+def proj_func(pred):
+    mapping = {
+        "positive": 1,
+        "negative": 0
+    }
+    return mapping.get(pred, -1)
+
+def eval_func(prompts, dataset, model):
+    for prompt in prompts:
+        preds = []
+        labels = []
+        for data in tqdm(dataset):
+            input_text = pb.InputProcess.basic_format(prompt, data)
+            label = data['label']
+            raw_pred = model(input_text)
+
+            pred = pb.OutputProcess.cls(raw_pred, proj_func)
+            preds.append(pred)
+            labels.append(label)
+
+    score = pb.Eval.compute_cls_accuracy(preds, labels)        
+    return score
+
+print(eval_func(prompts, dataset, model))