/
1025random16.py
88 lines (77 loc) · 3.13 KB
/
1025random16.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# %%
import json
from src.trainer import train_and_eval
from src.utils import select_lora_layers, select_train_datasets, random_log_scale_int, random_log_scale_float, flush
import random
from datetime import datetime
import random
import time
import copy
random.seed(int(time.time()*10000000 % 1234567))
n_loop = 1000
count = 0
model_name = "meta-llama/Llama-2-7b-chat-hf"
test_dataset_path = "../smallDB/1018ig/qa.json"
context_path = "../smallDB/1018ig/context_ig_paraphrase_plus_oa.json"
log_filepath = 'results/1025random16/' + \
datetime.now().strftime('%Y%m%d%H%M%S') # +".json"
with open(test_dataset_path, "r") as f:
test_dataset = json.load(f)
# context
with open(context_path, 'r') as f:
context_list = json.load(f)
# randomly select parameters
for i in range(n_loop):
random.seed(int(time.time()*10000000 % 1234567))
count += 1
flush()
train_context_dict = {
"1": random.choice([True, False]),
"2": random.choice([True, False]),
"3": random.choice([True, False]),
"4": random.choice([True, False]),
"5": random.choice([True, False]),
"6": random.choice([True, False]),
"7": random.choice([True, False]),
"8": random.choice([True, False]),
"9": random.choice([True, False]),
"10": random.choice([True, False]),
"11": random.choice([True, False]),
"12": random.choice([True, False]),
}
lora_layer_dict = {
"embed_tokens": random.choice([True, False]),
"lm_head": random.choice([True, False]),
"q_proj": random.choice([True, False]),
"k_proj": random.choice([True, False]),
"v_proj": random.choice([True, False]),
"o_proj": random.choice([True, False]),
"gate_proj": random.choice([True, False]),
"up_proj": random.choice([True, False]),
"down_proj": random.choice([True, False]),
}
train_dict = {}
train_dict["bit"] = 16
train_dict["n_irrelevant_texts"] = random_log_scale_int(
min_val=1, max_val=5000)
target_layers = select_lora_layers(lora_layer_dict)
use_context_list = select_train_datasets(
train_context_dict, context_list, train_dict["n_irrelevant_texts"])
train_dict["train_text_list"] = use_context_list
train_dict["lola_layer_dict"] = lora_layer_dict
train_dict["test_dataset"] = test_dataset
train_dict["model_name"] = model_name
train_dict["target_layers"] = target_layers
train_dict["train_context_dict"] = train_context_dict
c = count//100
train_dict["log_filepath"] = f"{log_filepath}_{c}.json"
train_dict["per_device_train_batch_size"] = 1
train_dict["r"] = random_log_scale_int(min_val=1, max_val=1024)
train_dict["lr"] = random_log_scale_float(min_val=10**-5, max_val=10**-2)
train_dict["lora_alpha"] = random_log_scale_int(min_val=1, max_val=1024)
train_dict["total_epochs"] = random_log_scale_int(min_val=1, max_val=30)
show_keys = ["r", "lr", "lora_alpha", "total_epochs", "n_irrelevant_texts",
"train_context_dict", "lola_layer_dict", "target_layers"]
for k in show_keys:
print(k, train_dict[k])
score = train_and_eval(train_dict)