From ccf64c0cbedeff2d3e8ceec33ea0238e1262b98a Mon Sep 17 00:00:00 2001
From: Bouzid MEDJDOUB <medjdoub.bouzid.ini@gmail.com>
Date: Sat, 27 Jan 2024 22:48:16 +0100
Subject: [PATCH 01/10] Added latest gpt models, mistral local and mistral api

---
 README.md             |   4 ++
 demo.py               | 134 ++++++++++++++++++++++++++++++++++++++
 install.sh            |   1 +
 models/gpt.py         |   4 +-
 models/llms.py        |  25 +++++++
 models/mistral.py     | 148 ++++++++++++++++++++++++++++++++++++++++++
 models/mistral_api.py | 108 ++++++++++++++++++++++++++++++
 7 files changed, 422 insertions(+), 2 deletions(-)
 create mode 100644 demo.py
 create mode 100644 models/mistral.py
 create mode 100644 models/mistral_api.py
diff --git a/README.md b/README.md
index 7986048..db406b3 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,10 @@ Add your openai api key to `~/.bashrc` with
 ```
 echo "export OPENAI_API_KEY=<your key>" >> ~/.bashrc
 ```
+Add your mistral api key to `~/.bashrc` with
+```
+echo "export MISTRAL_KEY=<your key>" >> ~/.bashrc
+```
 
 Finally, if you have a diffferent OpenAI API base, change it in your `~/.bashrc` with
 ```
diff --git a/demo.py b/demo.py
new file mode 100644
index 0000000..5526ca2
--- /dev/null
+++ b/demo.py
@@ -0,0 +1,134 @@
+import os
+import torch
+os.environ['OMP_NUM_THREADS'] = '4'
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import openai
+openai.api_key = os.environ['OPENAI_API_KEY']
+openai.api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")
+from data.serialize import SerializerSettings
+from models.utils import grid_iter
+from models.promptcast import get_promptcast_predictions_data
+from models.darts import get_arima_predictions_data
+from models.llmtime import get_llmtime_predictions_data
+from data.small_context import get_datasets
+from models.validation_likelihood_tuning import get_autotuned_predictions_data
+
+def plot_preds(train, test, pred_dict, model_name, show_samples=False):
+    pred = pred_dict['median']
+    pred = pd.Series(pred, index=test.index)
+    plt.figure(figsize=(8, 6), dpi=100)
+    plt.plot(train)
+    plt.plot(test, label='Truth', color='black')
+    plt.plot(pred, label=model_name, color='purple')
+    # shade 90% confidence interval
+    samples = pred_dict['samples']
+    lower = np.quantile(samples, 0.05, axis=0)
+    upper = np.quantile(samples, 0.95, axis=0)
+    plt.fill_between(pred.index, lower, upper, alpha=0.3, color='purple')
+    if show_samples:
+        samples = pred_dict['samples']
+        # convert df to numpy array
+        samples = samples.values if isinstance(samples, pd.DataFrame) else samples
+        for i in range(min(10, samples.shape[0])):
+            plt.plot(pred.index, samples[i], color='purple', alpha=0.3, linewidth=1)
+    plt.legend(loc='upper left')
+    if 'NLL/D' in pred_dict:
+        nll = pred_dict['NLL/D']
+        if nll is not None:
+            plt.text(0.03, 0.85, f'NLL/D: {nll:.2f}', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
+    plt.show()
+
+
+
+print(torch.cuda.max_memory_allocated())
+print()
+
+gpt4_hypers = dict(
+    alpha=0.3,
+    basic=True,
+    temp=1.0,
+    top_p=0.8,
+    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
+)
+
+mistral_api_hypers = dict(
+    alpha=0.3,
+    basic=True,
+    temp=1.0,
+    top_p=0.8,
+    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
+)
+
+gpt3_hypers = dict(
+    temp=0.7,
+    alpha=0.95,
+    beta=0.3,
+    basic=False,
+    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
+)
+
+
+llma2_hypers = dict(
+    temp=0.7,
+    alpha=0.95,
+    beta=0.3,
+    basic=False,
+    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
+)
+
+
+promptcast_hypers = dict(
+    temp=0.7,
+    settings=SerializerSettings(base=10, prec=0, signed=True, 
+                                time_sep=', ',
+                                bit_sep='',
+                                plus_sign='',
+                                minus_sign='-',
+                                half_bin_correction=False,
+                                decimal_point='')
+)
+
+arima_hypers = dict(p=[12,30], d=[1,2], q=[0])
+
+model_hypers = {
+     'LLMTime GPT-3.5': {'model': 'gpt-3.5-turbo-instruct', **gpt3_hypers},
+     'LLMTime GPT-4': {'model': 'gpt-4', **gpt4_hypers},
+     'LLMTime GPT-3': {'model': 'text-davinci-003', **gpt3_hypers},
+     'PromptCast GPT-3': {'model': 'text-davinci-003', **promptcast_hypers},
+     'LLMA2': {'model': 'llama-7b', **llma2_hypers},
+     'mistral': {'model': 'mistral', **llma2_hypers},
+     'mistral-api-tiny': {'model': 'mistral-api-tiny', **mistral_api_hypers},
+     'mistral-api-small': {'model': 'mistral-api-tiny', **mistral_api_hypers},
+     'mistral-api-medium': {'model': 'mistral-api-tiny', **mistral_api_hypers},
+     'ARIMA': arima_hypers,
+    
+ }
+
+
+model_predict_fns = {
+    #'LLMA2': get_llmtime_predictions_data,
+    #'mistral': get_llmtime_predictions_data,
+    #'LLMTime GPT-4': get_llmtime_predictions_data,
+    'mistral-api-tiny': get_llmtime_predictions_data
+}
+
+
+model_names = list(model_predict_fns.keys())
+
+datasets = get_datasets()
+ds_name = 'AirPassengersDataset'
+
+
+data = datasets[ds_name]
+train, test = data # or change to your own data
+out = {}
+
+for model in model_names: # GPT-4 takes a about a minute to run
+    model_hypers[model].update({'dataset_name': ds_name}) # for promptcast
+    hypers = list(grid_iter(model_hypers[model]))
+    num_samples = 10
+    pred_dict = get_autotuned_predictions_data(train, test, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)
+    out[model] = pred_dict
+    plot_preds(train, test, pred_dict, model, show_samples=True)
\ No newline at end of file
diff --git a/install.sh b/install.sh
index 5506f60..e3002a8 100644
--- a/install.sh
+++ b/install.sh
@@ -16,4 +16,5 @@ pip install multiprocess
 pip install SentencePiece
 pip install accelerate
 pip install gdown
+pip install mistralai #for mistral models
 conda deactivate
diff --git a/models/gpt.py b/models/gpt.py
index 95c1dd8..886ddba 100644
--- a/models/gpt.py
+++ b/models/gpt.py
@@ -58,9 +58,9 @@ def gpt_completion_fn(model, input_str, steps, settings, num_samples, temp):
     allowed_tokens = [settings.bit_sep + str(i) for i in range(settings.base)] 
     allowed_tokens += [settings.time_sep, settings.plus_sign, settings.minus_sign]
     allowed_tokens = [t for t in allowed_tokens if len(t) > 0] # remove empty tokens like an implicit plus sign
-    if (model not in ['gpt-3.5-turbo','gpt-4']): # logit bias not supported for chat models
+    if (model not in ['gpt-3.5-turbo','gpt-4','gpt-4-1106-preview']): # logit bias not supported for chat models
         logit_bias = {id: 30 for id in get_allowed_ids(allowed_tokens, model)}
-    if model in ['gpt-3.5-turbo','gpt-4']:
+    if model in ['gpt-3.5-turbo','gpt-4','gpt-4-1106-preview']:
         chatgpt_sys_message = "You are a helpful assistant that performs time series predictions. The user will provide a sequence and you will predict the remaining sequence. The sequence is represented by decimal strings separated by commas."
         extra_input = "Please continue the following sequence without producing any additional text. Do not say anything like 'the next terms in the sequence are', just return the numbers. Sequence:\n"
         response = openai.ChatCompletion.create(
diff --git a/models/llms.py b/models/llms.py
index 3e263d1..310c592 100644
--- a/models/llms.py
+++ b/models/llms.py
@@ -4,6 +4,13 @@
 from models.llama import llama_completion_fn, llama_nll_fn
 from models.llama import tokenize_fn as llama_tokenize_fn
 
+from models.mistral import mistral_completion_fn, mistral_nll_fn
+from models.mistral import tokenize_fn as mistral_tokenize_fn
+
+from models.mistral_api import mistral_api_completion_fn, mistral_api_nll_fn
+from models.mistral_api import tokenize_fn as mistral_api_tokenize_fn
+
+
 # Required: Text completion function for each model
 # -----------------------------------------------
 # Each model is mapped to a function that samples text completions.
@@ -21,7 +28,12 @@
 completion_fns = {
     'text-davinci-003': partial(gpt_completion_fn, model='text-davinci-003'),
     'gpt-4': partial(gpt_completion_fn, model='gpt-4'),
+    'gpt-4-1106-preview':partial(gpt_completion_fn, model='gpt-4-1106-preview'),
     'gpt-3.5-turbo-instruct': partial(gpt_completion_fn, model='gpt-3.5-turbo-instruct'),
+    'mistral': partial(mistral_completion_fn, model='mistral'),
+    'mistral-api-tiny': partial(mistral_api_completion_fn, model='mistral-tiny'),
+    'mistral-api-small': partial(mistral_api_completion_fn, model='mistral-small'),
+    'mistral-api-medium': partial(mistral_api_completion_fn, model='mistral-medium'),
     'llama-7b': partial(llama_completion_fn, model='7b'),
     'llama-13b': partial(llama_completion_fn, model='13b'),
     'llama-70b': partial(llama_completion_fn, model='70b'),
@@ -49,6 +61,11 @@
 #   - float: Computed NLL per dimension for p(target_arr | input_arr).
 nll_fns = {
     'text-davinci-003': partial(gpt_nll_fn, model='text-davinci-003'),
+    'mistral': partial(mistral_nll_fn, model='mistral'),
+    'mistral-api-tiny': partial(mistral_api_nll_fn, model='mistral-tiny'),
+    'mistral-api-small': partial(mistral_api_nll_fn, model='mistral-small'),
+    'mistral-api-medium': partial(mistral_api_nll_fn, model='mistral-medium'),
+    'llama-7b': partial(llama_completion_fn, model='7b'),
     'llama-7b': partial(llama_nll_fn, model='7b'),
     'llama-13b': partial(llama_nll_fn, model='13b'),
     'llama-70b': partial(llama_nll_fn, model='70b'),
@@ -67,6 +84,10 @@
 tokenization_fns = {
     'text-davinci-003': partial(gpt_tokenize_fn, model='text-davinci-003'),
     'gpt-3.5-turbo-instruct': partial(gpt_tokenize_fn, model='gpt-3.5-turbo-instruct'),
+    'mistral': partial(mistral_tokenize_fn, model='mistral'),
+    'mistral-api-tiny': partial(mistral_api_tokenize_fn, model='mistral-tiny'),
+    'mistral-api-small': partial(mistral_api_tokenize_fn, model='mistral-small'),
+    'mistral-api-medium': partial(mistral_api_tokenize_fn, model='mistral-medium'),
     'llama-7b': partial(llama_tokenize_fn, model='7b'),
     'llama-13b': partial(llama_tokenize_fn, model='13b'),
     'llama-70b': partial(llama_tokenize_fn, model='70b'),
@@ -79,6 +100,10 @@
 context_lengths = {
     'text-davinci-003': 4097,
     'gpt-3.5-turbo-instruct': 4097,
+    'mistral-api-tiny': 4097,
+    'mistral-api-small': 4097,
+    'mistral-api-medium': 4097,
+    'mistral': 4096,
     'llama-7b': 4096,
     'llama-13b': 4096,
     'llama-70b': 4096,
diff --git a/models/mistral.py b/models/mistral.py
new file mode 100644
index 0000000..5717207
--- /dev/null
+++ b/models/mistral.py
@@ -0,0 +1,148 @@
+import torch
+import numpy as np
+from jax import grad,vmap
+from tqdm import tqdm
+import argparse
+from transformers import (
+    AutoModelForCausalLM, 
+    AutoTokenizer, 
+)
+from data.serialize import serialize_arr, deserialize_str, SerializerSettings
+
+DEFAULT_EOS_TOKEN = "</s>"
+DEFAULT_BOS_TOKEN = "<s>"
+DEFAULT_UNK_TOKEN = "<unk>"
+
+loaded = {}
+
+def get_tokenizer():
+    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+    special_tokens_dict = dict()
+    if tokenizer.eos_token is None:
+        special_tokens_dict["eos_token"] = DEFAULT_EOS_TOKEN
+    if tokenizer.bos_token is None:
+        special_tokens_dict["bos_token"] = DEFAULT_BOS_TOKEN
+    if tokenizer.unk_token is None:
+        special_tokens_dict["unk_token"] = DEFAULT_UNK_TOKEN
+    tokenizer.add_special_tokens(special_tokens_dict)
+    tokenizer.pad_token = tokenizer.eos_token
+    return tokenizer
+
+def get_model_and_tokenizer(model_name, cache_model=False):
+    if model_name in loaded:
+        return loaded[model_name]
+    tokenizer = get_tokenizer()
+   
+    model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1",device_map="cpu")
+    model.eval()
+    if cache_model:
+        loaded[model_name] = model, tokenizer
+    return model, tokenizer
+
+def tokenize_fn(str, model):
+    tokenizer = get_tokenizer()
+    return tokenizer(str)
+
+def mistral_nll_fn(model, input_arr, target_arr, settings:SerializerSettings, transform, count_seps=True, temp=1, cache_model=True):
+    """ Returns the NLL/dimension (log base e) of the target array (continuous) according to the LM 
+        conditioned on the input array. Applies relevant log determinant for transforms and
+        converts from discrete NLL of the LLM to continuous by assuming uniform within the bins.
+    inputs:
+        input_arr: (n,) context array
+        target_arr: (n,) ground truth array
+        cache_model: whether to cache the model and tokenizer for faster repeated calls
+    Returns: NLL/D
+    """
+    model, tokenizer = get_model_and_tokenizer(model, cache_model=cache_model)
+
+    input_str = serialize_arr(vmap(transform)(input_arr), settings)
+    target_str = serialize_arr(vmap(transform)(target_arr), settings)
+    full_series = input_str + target_str
+    
+    batch = tokenizer(
+        [full_series], 
+        return_tensors="pt",
+        add_special_tokens=True
+    )
+    batch = {k: v.cuda() for k, v in batch.items()}
+
+    with torch.no_grad():
+        out = model(**batch)
+
+    good_tokens_str = list("0123456789" + settings.time_sep)
+    good_tokens = [tokenizer.convert_tokens_to_ids(token) for token in good_tokens_str]
+    bad_tokens = [i for i in range(len(tokenizer)) if i not in good_tokens]
+    out['logits'][:,:,bad_tokens] = -100
+
+    input_ids = batch['input_ids'][0][1:]
+    input_ids = input_ids.to('cpu')
+    logprobs = torch.nn.functional.log_softmax(out['logits'], dim=-1)[0][:-1]
+    logprobs = logprobs[torch.arange(len(input_ids)), input_ids].cpu().numpy()
+
+
+    tokens = tokenizer.batch_decode(
+        input_ids,
+        skip_special_tokens=False, 
+        clean_up_tokenization_spaces=False
+    )
+    
+    input_len = len(tokenizer([input_str], return_tensors="pt",)['input_ids'][0])
+    input_len = input_len - 2 # remove the BOS token
+
+    logprobs = logprobs[input_len:]
+    tokens = tokens[input_len:]
+    BPD = -logprobs.sum()/len(target_arr)
+
+    #print("BPD unadjusted:", -logprobs.sum()/len(target_arr), "BPD adjusted:", BPD)
+    # log p(x) = log p(token) - log bin_width = log p(token) + prec * log base
+    transformed_nll = BPD - settings.prec*np.log(settings.base)
+    avg_logdet_dydx = np.log(vmap(grad(transform))(target_arr)).mean()
+    return transformed_nll-avg_logdet_dydx
+
+def mistral_completion_fn(
+    model,
+    input_str,
+    steps,
+    settings,
+    batch_size=5,
+    num_samples=20,
+    temp=0.9, 
+    top_p=0.9,
+    cache_model=True
+):
+    avg_tokens_per_step = len(tokenize_fn(input_str, model)['input_ids']) / len(input_str.split(settings.time_sep))
+    max_tokens = int(avg_tokens_per_step*steps)
+    
+    model, tokenizer = get_model_and_tokenizer(model, cache_model=cache_model)
+
+    gen_strs = []
+    for _ in tqdm(range(num_samples // batch_size)):
+        batch = tokenizer(
+            [input_str], 
+            return_tensors="pt",
+        )
+
+        batch = {k: v.repeat(batch_size, 1) for k, v in batch.items()}
+        batch = {k: v.cpu() for k, v in batch.items()}
+        num_input_ids = batch['input_ids'].shape[1]
+
+        good_tokens_str = list("0123456789" + settings.time_sep)
+        good_tokens = [tokenizer.convert_tokens_to_ids(token) for token in good_tokens_str]
+        # good_tokens += [tokenizer.eos_token_id]
+        bad_tokens = [i for i in range(len(tokenizer)) if i not in good_tokens]
+
+        generate_ids = model.generate(
+            **batch,
+            do_sample=True,
+            max_new_tokens=max_tokens,
+            temperature=temp, 
+            top_p=top_p, 
+            bad_words_ids=[[t] for t in bad_tokens],
+            renormalize_logits=True,
+        )
+        gen_strs += tokenizer.batch_decode(
+            generate_ids[:, num_input_ids:],
+            skip_special_tokens=True, 
+            clean_up_tokenization_spaces=False
+        )
+    return gen_strs
diff --git a/models/mistral_api.py b/models/mistral_api.py
new file mode 100644
index 0000000..c677921
--- /dev/null
+++ b/models/mistral_api.py
@@ -0,0 +1,108 @@
+from data.serialize import serialize_arr, SerializerSettings
+from mistralai.client import MistralClient
+from mistralai.models.chat_completion import ChatMessage
+import tiktoken
+import os
+import numpy as np
+from jax import grad,vmap
+
+loaded_model=''
+mistral_client={}
+
+def init_mistral_client(model):
+    """
+    Initialize the Mistral client for a specific LLM model.
+    """
+    global loaded_model, mistral_client
+    if mistral_client == {} or loaded_model != model:
+        loaded_model = model
+        mistral_client = MistralClient(os.environ['MISTRAL_KEY'])
+    return mistral_client
+
+def tokenize_fn(str, model):
+    """
+    Retrieve the token IDs for a string for a specific GPT model.
+
+    Args:
+        str (list of str): str to be tokenized.
+        model (str): Name of the LLM model.
+
+    Returns:
+        list of int: List of corresponding token IDs.
+    """
+    encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
+    #encoding = init_mistral_client(model).embeddings(model="mistral-embed",input=str)
+    return encoding.encode(str)
+
+def get_allowed_ids(strs, model):
+    """
+    Retrieve the token IDs for a given list of strings for a specific GPT model.
+
+    Args:
+        strs (list of str): strs to be converted.
+        model (str): Name of the LLM model.
+
+    Returns:
+        list of int: List of corresponding token IDs.
+    """
+    encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
+    ids = []
+    for s in strs:
+        id = encoding.encode(s) #init_mistral_client(model).embeddings(model="mistral-embed",input=s)
+        ids.extend(id)
+    return ids
+
+def mistral_api_completion_fn(model, input_str, steps, settings, num_samples, temp):
+    """
+    Generate text completions from GPT using OpenAI's API.
+
+    Args:
+        model (str): Name of the GPT-3 model to use.
+        input_str (str): Serialized input time series data.
+        steps (int): Number of time steps to predict.
+        settings (SerializerSettings): Serialization settings.
+        num_samples (int): Number of completions to generate.
+        temp (float): Temperature for sampling.
+
+    Returns:
+        list of str: List of generated samples.
+    """
+    avg_tokens_per_step = len(tokenize_fn(input_str, model)) / len(input_str.split(settings.time_sep))
+    # define logit bias to prevent GPT-3 from producing unwanted tokens
+    allowed_tokens = [settings.bit_sep + str(i) for i in range(settings.base)] 
+    allowed_tokens += [settings.time_sep, settings.plus_sign, settings.minus_sign]
+    allowed_tokens = [t for t in allowed_tokens if len(t) > 0] # remove empty tokens like an implicit plus sign
+    if model in ['mistral-tiny','mistral-small','mistral-medium']:
+        mistral_sys_message = "You are a helpful assistant that performs time series predictions. The user will provide a sequence and you will predict the remaining sequence. The sequence is represented by decimal strings separated by commas."
+        extra_input = "Please continue the following sequence without producing any additional text. Do not say anything like 'the next terms in the sequence are', just return the numbers. Sequence:\n"
+        response = init_mistral_client(model).chat(
+            model=model,
+            messages=[ChatMessage(role="system", content = mistral_sys_message),ChatMessage(role="user", content= (extra_input+input_str+settings.time_sep))],
+            max_tokens=int(avg_tokens_per_step*steps), 
+            temperature=temp,
+        )
+        return [choice.message.content for choice in response.choices]
+    
+def mistral_api_nll_fn(model, input_arr, target_arr, settings:SerializerSettings, transform, count_seps=True, temp=1):
+    """
+    Calculate the Negative Log-Likelihood (NLL) per dimension of the target array according to the LLM.
+
+    Args:
+        model (str): Name of the LLM model to use.
+        input_arr (array-like): Input array (history).
+        target_arr (array-like): Ground target array (future).
+        settings (SerializerSettings): Serialization settings.
+        transform (callable): Transformation applied to the numerical values before serialization.
+        count_seps (bool, optional): Whether to account for separators in the calculation. Should be true for models that generate a variable number of digits. Defaults to True.
+        temp (float, optional): Temperature for sampling. Defaults to 1.
+
+    Returns:
+        float: Calculated NLL per dimension.
+    """
+    input_str = serialize_arr(vmap(transform)(input_arr), settings)
+    target_str = serialize_arr(vmap(transform)(target_arr), settings)
+    assert input_str.endswith(settings.time_sep), f'Input string must end with {settings.time_sep}, got {input_str}'
+    full_series = input_str + target_str
+    response = init_mistral_client(model).chat_stream(model=model, messages=[ChatMessage(role="user",content=full_series)], max_tokens=0, temperature=temp,)
+    #print(response['choices'][0])
+    return -1

From 0dc4c83a0867f852793ef8adcc42ea8a44e4cab0 Mon Sep 17 00:00:00 2001
From: Nate Gruver <nvg7279@nyu.edu>
Date: Sun, 4 Feb 2024 22:38:32 -0500
Subject: [PATCH 02/10] Update LICENSE

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index 6a454af..564b92f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2023 Nate Gruver
+Copyright (c) 2023 Nate Gruver, Marc Finzi, Shikai Qiu
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

From 9e9cf27973d94f5a23342d03a38b7cb2413063a6 Mon Sep 17 00:00:00 2001
From: Nate Gruver <nvg7279@nyu.edu>
Date: Sat, 2 Mar 2024 15:12:18 -0500
Subject: [PATCH 03/10] Update README.md

---
 README.md | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index db406b3..b06c093 100644
--- a/README.md
+++ b/README.md
@@ -20,14 +20,10 @@ conda activate llmtime
 ```
 If you prefer not using conda, you can also install the dependencies listed in `install.sh` manually. 
 
-Add your openai api key to `~/.bashrc` with
+If you want to run OpenAI models through their API (doesn't require access to a GPU), add your openai api key to `~/.bashrc` with
 ```
 echo "export OPENAI_API_KEY=<your key>" >> ~/.bashrc
 ```
-Add your mistral api key to `~/.bashrc` with
-```
-echo "export MISTRAL_KEY=<your key>" >> ~/.bashrc
-```
 
 Finally, if you have a diffferent OpenAI API base, change it in your `~/.bashrc` with
 ```
@@ -38,7 +34,12 @@ echo "export OPENAI_API_BASE=<your base url>" >> ~/.bashrc
 Want a quick taste of the power of LLMTime? Run the quick demo in the `demo.ipynb` notebook. No GPUs required!
 
 ## 🤖 Plugging in other LLMs
-We currently support GPT-3, GPT-3.5, GPT-4, and LLaMA 2. It's easy to plug in other LLMs by simply specifying how to generate text completions from them in `models/llms.py`.
+We currently support GPT-3, GPT-3.5, GPT-4, Mistral, and LLaMA 2. It's easy to plug in other LLMs by simply specifying how to generate text completions from them in `models/llms.py`.
+
+To run Mistral models, add your mistral api key to `~/.bashrc` with
+```
+echo "export MISTRAL_KEY=<your key>" >> ~/.bashrc
+```
 
 ## 💡 Tips 
 Here are some tips for using LLMTime:

From f14e06abcfd5666476a10453d55dadf53d15132a Mon Sep 17 00:00:00 2001
From: Becky Sweger <rsweger@umass.edu>
Date: Fri, 15 Mar 2024 14:45:17 -0400
Subject: [PATCH 04/10] Use quotes when pip installing jax[cpu]

In some shells (e.g., zsh), the bracket syntax throws an error
without the quotes.
---
 install.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install.sh b/install.sh
index e3002a8..c011fb5 100644
--- a/install.sh
+++ b/install.sh
@@ -1,7 +1,7 @@
 conda create -n llmtime python=3.9
 conda activate llmtime
 pip install numpy
-pip install -U jax[cpu] # we don't need GPU for jax
+pip install -U "jax[cpu]" # we don't need GPU for jax
 pip install torch --index-url https://download.pytorch.org/whl/cu118
 pip install openai==0.28.1
 pip install tiktoken

From 47d36528705ab2ca1690fc58e69d276ca93b293f Mon Sep 17 00:00:00 2001
From: Becky Sweger <rsweger@umass.edu>
Date: Fri, 15 Mar 2024 14:54:59 -0400
Subject: [PATCH 05/10] add .gitignore

---
 .gitignore | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..73a2b6c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.env
+.envrc
+
+**/__pycache__
+

From fb35d93e2d5af10ef911540f1cc1584a4641173e Mon Sep 17 00:00:00 2001
From: Becky Sweger <rsweger@umass.edu>
Date: Fri, 15 Mar 2024 14:55:28 -0400
Subject: [PATCH 06/10] Modify the list of models used by the demo

Although demo.ipynb seems to be an older version of the example code in demo.py,
let's fix up the model list so it won't throw errors when people run it.
- text-davinci-003 is deprecated, and OpenAI recommends gpt-3.5-turbo-instruct instead
- team's OpenAI account doesn't have access to gpt-4
---
 demo.ipynb | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/demo.ipynb b/demo.ipynb
index ff552b6..6a909f3 100644
--- a/demo.ipynb
+++ b/demo.ipynb
@@ -99,16 +99,13 @@
     "\n",
     "model_hypers = {\n",
     "    'LLMTime GPT-3.5': {'model': 'gpt-3.5-turbo-instruct', **gpt3_hypers},\n",
-    "    'LLMTime GPT-4': {'model': 'gpt-4', **gpt4_hypers},\n",
-    "    'LLMTime GPT-3': {'model': 'text-davinci-003', **gpt3_hypers},\n",
-    "    'PromptCast GPT-3': {'model': 'text-davinci-003', **promptcast_hypers},\n",
+    "    'PromptCast GPT-3': {'model': 'gpt-3.5-turbo-instruct', **promptcast_hypers},\n",
     "    'ARIMA': arima_hypers,\n",
     "    \n",
     "}\n",
     "\n",
     "model_predict_fns = {\n",
-    "    'LLMTime GPT-3': get_llmtime_predictions_data,\n",
-    "    'LLMTime GPT-4': get_llmtime_predictions_data,\n",
+    "    'LLMTime GPT-3.5': get_llmtime_predictions_data,\n",
     "    'PromptCast GPT-3': get_promptcast_predictions_data,\n",
     "    'ARIMA': get_arima_predictions_data,\n",
     "}\n",

From 2b9027328d38ee36ce8e18feb99c270b24418c5e Mon Sep 17 00:00:00 2001
From: Becky Sweger <rsweger@umass.edu>
Date: Fri, 15 Mar 2024 15:13:26 -0400
Subject: [PATCH 07/10] Fix keyword argument error when using promptcast model

Make sure 'parallel' is always in **kwargs and don't pass it in
explicitly when calling generate_predictions.
---
 models/promptcast.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/models/promptcast.py b/models/promptcast.py
index ecdcea5..1835198 100644
--- a/models/promptcast.py
+++ b/models/promptcast.py
@@ -273,11 +273,15 @@ def get_promptcast_predictions_data(train, test, model, settings, num_samples=10
     medians = None
     completions_list = None
     input_strs = None
+
+    if kwargs.get('parallel') is None:
+        kwargs = {**kwargs, 'parallel': True}
+
     if num_samples > 0:
         # Generate predictions
         preds, completions_list, input_strs = generate_predictions(model, inputs, steps, settings, scalers,
                                                                     num_samples=num_samples, temp=temp, prompts=prompts, post_prompts=post_prompts,
-                                                                    parallel=True, return_input_strs=True, constrain_tokens=False, strict_handling=True, **kwargs)
+                                                                    return_input_strs=True, constrain_tokens=False, strict_handling=True, **kwargs)
         # skip bad samples
         samples = [pd.DataFrame(np.array([p for p in preds[i] if p is not None]), columns=test[i].index) for i in range(len(preds))] 
         medians = [sample.median(axis=0) for sample in samples]

From f8066d6198fb7a27c5cf2e4a8b7ba67258f2b4f6 Mon Sep 17 00:00:00 2001
From: Becky Sweger <rsweger@umass.edu>
Date: Fri, 15 Mar 2024 15:17:30 -0400
Subject: [PATCH 08/10] Remove reference to autoreload extension

Referencing this iPython extension can cause errors for people
running the Jupyter notebook for the first time (some IDEs
are smart enough to prompt for an install, but some are not). Because
%autoreload isn't really useful for someone running a demo, let's
remove it.
---
 demo.ipynb | 2 --
 1 file changed, 2 deletions(-)

diff --git a/demo.ipynb b/demo.ipynb
index 6a909f3..6330619 100644
--- a/demo.ipynb
+++ b/demo.ipynb
@@ -23,8 +23,6 @@
     "from data.small_context import get_datasets\n",
     "from models.validation_likelihood_tuning import get_autotuned_predictions_data\n",
     "\n",
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
     "\n",
     "def plot_preds(train, test, pred_dict, model_name, show_samples=False):\n",
     "    pred = pred_dict['median']\n",

From c448d2a79c4dafd88082600211a24c9d3be774b8 Mon Sep 17 00:00:00 2001
From: Becky Sweger <rsweger@umass.edu>
Date: Fri, 15 Mar 2024 15:29:25 -0400
Subject: [PATCH 09/10] Add python-dotenv for handling environment variables

This is an optional convenience for people who don't want to
store secrets in dotfiles such as .zshrc or .bashrc (or are
having trouble doint so).
---
 .env-example | 3 +++
 README.md    | 1 +
 demo.ipynb   | 9 +++++++++
 install.sh   | 1 +
 4 files changed, 14 insertions(+)
 create mode 100644 .env-example

diff --git a/.env-example b/.env-example
new file mode 100644
index 0000000..82966c5
--- /dev/null
+++ b/.env-example
@@ -0,0 +1,3 @@
+# put your openai key here and rename the file from .env-example to .env
+OPENAI_API_KEY=pasteyouropenaikeyhere
+MISTRAL_KEY=pasteyourmistralkeyhere
\ No newline at end of file
diff --git a/README.md b/README.md
index b06c093..d4682a3 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@ echo "export MISTRAL_KEY=<your key>" >> ~/.bashrc
 
 ## 💡 Tips 
 Here are some tips for using LLMTime:
+- If you don't want to add OpenAI and Mistral keys to `~/.bashrc` or other dotfiles, you can add them to `.env-example` and rename the file to `.env` (which is in `.gitignore`). The demo code will add the contents of `.env` to your session's environment variables.
 - Performance is not too sensitive to the data scaling hyperparameters `alpha, beta, basic`. A good default is `alpha=0.95, beta=0.3, basic=False`. For data exhibiting symmetry around 0 (e.g. a sine wave), we recommend setting `basic=True` to avoid shifting the data.
 - The recently released `gpt-3.5-turbo-instruct` seems to require a lower temperature (e.g. 0.3) than other models, and tends to not outperform `text-davinci-003` from our limited experiments.
 - Tuning hyperparameters based on validation likelihoods, as done by `get_autotuned_predictions_data`, will often yield better test likelihoods, but won't necessarily yield better samples. 
diff --git a/demo.ipynb b/demo.ipynb
index 6330619..10aca7e 100644
--- a/demo.ipynb
+++ b/demo.ipynb
@@ -23,6 +23,15 @@
     "from data.small_context import get_datasets\n",
     "from models.validation_likelihood_tuning import get_autotuned_predictions_data\n",
     "\n",
+    "# get OPENAI info from environment\n",
+    "# if python-dotenv is installed, try loading from .env first\n",
+    "try:\n",
+    "    from dotenv import load_dotenv\n",
+    "    load_dotenv(override=True)\n",
+    "except ImportError:\n",
+    "    print('python-dotenv not installed, not loading .env file')\n",
+    "openai.api_key = os.environ['OPENAI_API_KEY']\n",
+    "openai.api_base = os.environ.get(\"OPENAI_API_BASE\", \"https://api.openai.com/v1\")\n",
     "\n",
     "def plot_preds(train, test, pred_dict, model_name, show_samples=False):\n",
     "    pred = pred_dict['median']\n",
diff --git a/install.sh b/install.sh
index c011fb5..eb9ad23 100644
--- a/install.sh
+++ b/install.sh
@@ -17,4 +17,5 @@ pip install SentencePiece
 pip install accelerate
 pip install gdown
 pip install mistralai #for mistral models
+pip install python-dotenv #optional convenience for handling environment variables
 conda deactivate

From 9ffe8988aea8447e7397a9f1f079921940a40bff Mon Sep 17 00:00:00 2001
From: Becky Sweger <rsweger@umass.edu>
Date: Fri, 15 Mar 2024 16:19:07 -0400
Subject: [PATCH 10/10] Add demo_openai.py

demo_openai.py is a copy of demo.ipynb, since it will be easier for
lab meeting to collaborate outside of a notebook environment.
---
 demo_openai.py | 115 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 demo_openai.py

diff --git a/demo_openai.py b/demo_openai.py
new file mode 100644
index 0000000..883ec47
--- /dev/null
+++ b/demo_openai.py
@@ -0,0 +1,115 @@
+import os
+os.environ['OMP_NUM_THREADS'] = '4'
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import openai
+openai.api_key = os.environ['OPENAI_API_KEY']
+openai.api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")
+from data.serialize import SerializerSettings
+from models.utils import grid_iter
+from models.promptcast import get_promptcast_predictions_data
+from models.darts import get_arima_predictions_data
+from models.llmtime import get_llmtime_predictions_data
+from data.small_context import get_datasets
+from models.validation_likelihood_tuning import get_autotuned_predictions_data
+
+# get OPENAI info from environment
+# if python-dotenv is installed, try loading from .env first
+try:
+    from dotenv import load_dotenv
+    load_dotenv(override=True)
+except ImportError:
+    print('python-dotenv not installed, not loading .env file')
+openai.api_key = os.environ['OPENAI_API_KEY']
+openai.api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")
+
+def plot_preds(train, test, pred_dict, model_name, show_samples=False):
+    pred = pred_dict['median']
+    pred = pd.Series(pred, index=test.index)
+    plt.figure(figsize=(8, 6), dpi=100)
+    plt.plot(train)
+    plt.plot(test, label='Truth', color='black')
+    plt.plot(pred, label=model_name, color='purple')
+    # shade 90% confidence interval
+    samples = pred_dict['samples']
+    lower = np.quantile(samples, 0.05, axis=0)
+    upper = np.quantile(samples, 0.95, axis=0)
+    plt.fill_between(pred.index, lower, upper, alpha=0.3, color='purple')
+    if show_samples:
+        samples = pred_dict['samples']
+        # convert df to numpy array
+        samples = samples.values if isinstance(samples, pd.DataFrame) else samples
+        for i in range(min(10, samples.shape[0])):
+            plt.plot(pred.index, samples[i], color='purple', alpha=0.3, linewidth=1)
+    plt.legend(loc='upper left')
+    if 'NLL/D' in pred_dict:
+        nll = pred_dict['NLL/D']
+        if nll is not None:
+            plt.text(0.03, 0.85, f'NLL/D: {nll:.2f}', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
+    plt.show(block=False)
+
+
+# DEFINE MODELS
+gpt4_hypers = dict(
+    alpha=0.3,
+    basic=True,
+    temp=1.0,
+    top_p=0.8,
+    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
+)
+
+gpt3_hypers = dict(
+    temp=0.7,
+    alpha=0.95,
+    beta=0.3,
+    basic=False,
+    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
+)
+
+
+promptcast_hypers = dict(
+    temp=0.7,
+    settings=SerializerSettings(base=10, prec=0, signed=True, 
+                                time_sep=', ',
+                                bit_sep='',
+                                plus_sign='',
+                                minus_sign='-',
+                                half_bin_correction=False,
+                                decimal_point='')
+)
+
+arima_hypers = dict(p=[12,30], d=[1,2], q=[0])
+
+model_hypers = {
+    'LLMTime GPT-3.5': {'model': 'gpt-3.5-turbo-instruct', **gpt3_hypers},
+    'PromptCast GPT-3': {'model': 'gpt-3.5-turbo-instruct', **promptcast_hypers},
+    'ARIMA': arima_hypers,
+    
+}
+
+model_predict_fns = {
+    'LLMTime GPT-3.5': get_llmtime_predictions_data,
+    'PromptCast GPT-3': get_promptcast_predictions_data,
+    'ARIMA': get_arima_predictions_data,
+}
+
+model_names = list(model_predict_fns.keys())
+
+# RUN LLMTIME AND VISUALIZE RESULTS
+datasets = get_datasets()
+ds_name = 'AirPassengersDataset'
+
+data = datasets[ds_name]
+train, test = data # or change to your own data
+out = {}
+for model in model_names: # GPT-4 takes a about a minute to run
+    model_hypers[model].update({'dataset_name': ds_name}) # for promptcast
+    hypers = list(grid_iter(model_hypers[model]))
+    num_samples = 10
+    pred_dict = get_autotuned_predictions_data(train, test, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)
+    out[model] = pred_dict
+    plot_preds(train, test, pred_dict, model, show_samples=True)
+
+# Keep all plot windows open
+plt.show()
\ No newline at end of file