reichlab · bsweger · Mar 18, 2024 · Jan 27, 2024 · Feb 5, 2024 · Feb 5, 2024
diff --git a/.env-example b/.env-example
@@ -0,0 +1,3 @@
+# put your openai key here and rename the file from .env-example to .env
+OPENAI_API_KEY=pasteyouropenaikeyhere
+MISTRAL_KEY=pasteyourmistralkeyhere
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+.env
+.envrc
+
+**/__pycache__
+
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2023 Nate Gruver
+Copyright (c) 2023 Nate Gruver, Marc Finzi, Shikai Qiu
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ conda activate llmtime
 ```
 If you prefer not using conda, you can also install the dependencies listed in `install.sh` manually. 
 
-Add your openai api key to `~/.bashrc` with
+If you want to run OpenAI models through their API (doesn't require access to a GPU), add your openai api key to `~/.bashrc` with
 ```
 echo "export OPENAI_API_KEY=<your key>" >> ~/.bashrc
 ```
@@ -34,10 +34,16 @@ echo "export OPENAI_API_BASE=<your base url>" >> ~/.bashrc
 Want a quick taste of the power of LLMTime? Run the quick demo in the `demo.ipynb` notebook. No GPUs required!
 
 ## 🤖 Plugging in other LLMs
-We currently support GPT-3, GPT-3.5, GPT-4, and LLaMA 2. It's easy to plug in other LLMs by simply specifying how to generate text completions from them in `models/llms.py`.
+We currently support GPT-3, GPT-3.5, GPT-4, Mistral, and LLaMA 2. It's easy to plug in other LLMs by simply specifying how to generate text completions from them in `models/llms.py`.
+
+To run Mistral models, add your mistral api key to `~/.bashrc` with
+```
+echo "export MISTRAL_KEY=<your key>" >> ~/.bashrc
+```
 
 ## 💡 Tips 
 Here are some tips for using LLMTime:
+- If you don't want to add OpenAI and Mistral keys to `~/.bashrc` or other dotfiles, you can add them to `.env-example` and rename the file to `.env` (which is in `.gitignore`). The demo code will add the contents of `.env` to your session's environment variables.
 - Performance is not too sensitive to the data scaling hyperparameters `alpha, beta, basic`. A good default is `alpha=0.95, beta=0.3, basic=False`. For data exhibiting symmetry around 0 (e.g. a sine wave), we recommend setting `basic=True` to avoid shifting the data.
 - The recently released `gpt-3.5-turbo-instruct` seems to require a lower temperature (e.g. 0.3) than other models, and tends to not outperform `text-davinci-003` from our limited experiments.
 - Tuning hyperparameters based on validation likelihoods, as done by `get_autotuned_predictions_data`, will often yield better test likelihoods, but won't necessarily yield better samples. 

diff --git a/demo.ipynb b/demo.ipynb
@@ -23,8 +23,15 @@
     "from data.small_context import get_datasets\n",
     "from models.validation_likelihood_tuning import get_autotuned_predictions_data\n",
     "\n",
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
+    "# get OPENAI info from environment\n",
+    "# if python-dotenv is installed, try loading from .env first\n",
+    "try:\n",
+    "    from dotenv import load_dotenv\n",
+    "    load_dotenv(override=True)\n",
+    "except ImportError:\n",
+    "    print('python-dotenv not installed, not loading .env file')\n",
+    "openai.api_key = os.environ['OPENAI_API_KEY']\n",
+    "openai.api_base = os.environ.get(\"OPENAI_API_BASE\", \"https://api.openai.com/v1\")\n",
     "\n",
     "def plot_preds(train, test, pred_dict, model_name, show_samples=False):\n",
     "    pred = pred_dict['median']\n",
@@ -99,16 +106,13 @@
     "\n",
     "model_hypers = {\n",
     "    'LLMTime GPT-3.5': {'model': 'gpt-3.5-turbo-instruct', **gpt3_hypers},\n",
-    "    'LLMTime GPT-4': {'model': 'gpt-4', **gpt4_hypers},\n",
-    "    'LLMTime GPT-3': {'model': 'text-davinci-003', **gpt3_hypers},\n",
-    "    'PromptCast GPT-3': {'model': 'text-davinci-003', **promptcast_hypers},\n",
+    "    'PromptCast GPT-3': {'model': 'gpt-3.5-turbo-instruct', **promptcast_hypers},\n",
     "    'ARIMA': arima_hypers,\n",
     "    \n",
     "}\n",
     "\n",
     "model_predict_fns = {\n",
-    "    'LLMTime GPT-3': get_llmtime_predictions_data,\n",
-    "    'LLMTime GPT-4': get_llmtime_predictions_data,\n",
+    "    'LLMTime GPT-3.5': get_llmtime_predictions_data,\n",
     "    'PromptCast GPT-3': get_promptcast_predictions_data,\n",
     "    'ARIMA': get_arima_predictions_data,\n",
     "}\n",

diff --git a/demo.py b/demo.py
@@ -0,0 +1,134 @@
+import os
+import torch
+os.environ['OMP_NUM_THREADS'] = '4'
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import openai
+openai.api_key = os.environ['OPENAI_API_KEY']
+openai.api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")
+from data.serialize import SerializerSettings
+from models.utils import grid_iter
+from models.promptcast import get_promptcast_predictions_data
+from models.darts import get_arima_predictions_data
+from models.llmtime import get_llmtime_predictions_data
+from data.small_context import get_datasets
+from models.validation_likelihood_tuning import get_autotuned_predictions_data
+
+def plot_preds(train, test, pred_dict, model_name, show_samples=False):
+    pred = pred_dict['median']
+    pred = pd.Series(pred, index=test.index)
+    plt.figure(figsize=(8, 6), dpi=100)
+    plt.plot(train)
+    plt.plot(test, label='Truth', color='black')
+    plt.plot(pred, label=model_name, color='purple')
+    # shade 90% confidence interval
+    samples = pred_dict['samples']
+    lower = np.quantile(samples, 0.05, axis=0)
+    upper = np.quantile(samples, 0.95, axis=0)
+    plt.fill_between(pred.index, lower, upper, alpha=0.3, color='purple')
+    if show_samples:
+        samples = pred_dict['samples']
+        # convert df to numpy array
+        samples = samples.values if isinstance(samples, pd.DataFrame) else samples
+        for i in range(min(10, samples.shape[0])):
+            plt.plot(pred.index, samples[i], color='purple', alpha=0.3, linewidth=1)
+    plt.legend(loc='upper left')
+    if 'NLL/D' in pred_dict:
+        nll = pred_dict['NLL/D']
+        if nll is not None:
+            plt.text(0.03, 0.85, f'NLL/D: {nll:.2f}', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
+    plt.show()
+
+
+
+print(torch.cuda.max_memory_allocated())
+print()
+
+gpt4_hypers = dict(
+    alpha=0.3,
+    basic=True,
+    temp=1.0,
+    top_p=0.8,
+    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
+)
+
+mistral_api_hypers = dict(
+    alpha=0.3,
+    basic=True,
+    temp=1.0,
+    top_p=0.8,
+    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
+)
+
+gpt3_hypers = dict(
+    temp=0.7,
+    alpha=0.95,
+    beta=0.3,
+    basic=False,
+    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
+)
+
+
+llma2_hypers = dict(
+    temp=0.7,
+    alpha=0.95,
+    beta=0.3,
+    basic=False,
+    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
+)
+
+
+promptcast_hypers = dict(
+    temp=0.7,
+    settings=SerializerSettings(base=10, prec=0, signed=True, 
+                                time_sep=', ',
+                                bit_sep='',
+                                plus_sign='',
+                                minus_sign='-',
+                                half_bin_correction=False,
+                                decimal_point='')
+)
+
+arima_hypers = dict(p=[12,30], d=[1,2], q=[0])
+
+model_hypers = {
+     'LLMTime GPT-3.5': {'model': 'gpt-3.5-turbo-instruct', **gpt3_hypers},
+     'LLMTime GPT-4': {'model': 'gpt-4', **gpt4_hypers},
+     'LLMTime GPT-3': {'model': 'text-davinci-003', **gpt3_hypers},
+     'PromptCast GPT-3': {'model': 'text-davinci-003', **promptcast_hypers},
+     'LLMA2': {'model': 'llama-7b', **llma2_hypers},
+     'mistral': {'model': 'mistral', **llma2_hypers},
+     'mistral-api-tiny': {'model': 'mistral-api-tiny', **mistral_api_hypers},
+     'mistral-api-small': {'model': 'mistral-api-tiny', **mistral_api_hypers},
+     'mistral-api-medium': {'model': 'mistral-api-tiny', **mistral_api_hypers},
+     'ARIMA': arima_hypers,
+
+ }
+
+
+model_predict_fns = {
+    #'LLMA2': get_llmtime_predictions_data,
+    #'mistral': get_llmtime_predictions_data,
+    #'LLMTime GPT-4': get_llmtime_predictions_data,
+    'mistral-api-tiny': get_llmtime_predictions_data
+}
+
+
+model_names = list(model_predict_fns.keys())
+
+datasets = get_datasets()
+ds_name = 'AirPassengersDataset'
+
+
+data = datasets[ds_name]
+train, test = data # or change to your own data
+out = {}
+
+for model in model_names: # GPT-4 takes a about a minute to run
+    model_hypers[model].update({'dataset_name': ds_name}) # for promptcast
+    hypers = list(grid_iter(model_hypers[model]))
+    num_samples = 10
+    pred_dict = get_autotuned_predictions_data(train, test, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)
+    out[model] = pred_dict
+    plot_preds(train, test, pred_dict, model, show_samples=True)
diff --git a/demo_openai.py b/demo_openai.py
@@ -0,0 +1,115 @@
+import os
+os.environ['OMP_NUM_THREADS'] = '4'
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import openai
+openai.api_key = os.environ['OPENAI_API_KEY']
+openai.api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")
+from data.serialize import SerializerSettings
+from models.utils import grid_iter
+from models.promptcast import get_promptcast_predictions_data
+from models.darts import get_arima_predictions_data
+from models.llmtime import get_llmtime_predictions_data
+from data.small_context import get_datasets
+from models.validation_likelihood_tuning import get_autotuned_predictions_data
+
+# get OPENAI info from environment
+# if python-dotenv is installed, try loading from .env first
+try:
+    from dotenv import load_dotenv
+    load_dotenv(override=True)
+except ImportError:
+    print('python-dotenv not installed, not loading .env file')
+openai.api_key = os.environ['OPENAI_API_KEY']
+openai.api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")
+
+def plot_preds(train, test, pred_dict, model_name, show_samples=False):
+    pred = pred_dict['median']
+    pred = pd.Series(pred, index=test.index)
+    plt.figure(figsize=(8, 6), dpi=100)
+    plt.plot(train)
+    plt.plot(test, label='Truth', color='black')
+    plt.plot(pred, label=model_name, color='purple')
+    # shade 90% confidence interval
+    samples = pred_dict['samples']
+    lower = np.quantile(samples, 0.05, axis=0)
+    upper = np.quantile(samples, 0.95, axis=0)
+    plt.fill_between(pred.index, lower, upper, alpha=0.3, color='purple')
+    if show_samples:
+        samples = pred_dict['samples']
+        # convert df to numpy array
+        samples = samples.values if isinstance(samples, pd.DataFrame) else samples
+        for i in range(min(10, samples.shape[0])):
+            plt.plot(pred.index, samples[i], color='purple', alpha=0.3, linewidth=1)
+    plt.legend(loc='upper left')
+    if 'NLL/D' in pred_dict:
+        nll = pred_dict['NLL/D']
+        if nll is not None:
+            plt.text(0.03, 0.85, f'NLL/D: {nll:.2f}', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
+    plt.show(block=False)
+
+
+# DEFINE MODELS
+gpt4_hypers = dict(
+    alpha=0.3,
+    basic=True,
+    temp=1.0,
+    top_p=0.8,
+    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
+)
+
+gpt3_hypers = dict(
+    temp=0.7,
+    alpha=0.95,
+    beta=0.3,
+    basic=False,
+    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
+)
+
+
+promptcast_hypers = dict(
+    temp=0.7,
+    settings=SerializerSettings(base=10, prec=0, signed=True, 
+                                time_sep=', ',
+                                bit_sep='',
+                                plus_sign='',
+                                minus_sign='-',
+                                half_bin_correction=False,
+                                decimal_point='')
+)
+
+arima_hypers = dict(p=[12,30], d=[1,2], q=[0])
+
+model_hypers = {
+    'LLMTime GPT-3.5': {'model': 'gpt-3.5-turbo-instruct', **gpt3_hypers},
+    'PromptCast GPT-3': {'model': 'gpt-3.5-turbo-instruct', **promptcast_hypers},
+    'ARIMA': arima_hypers,
+
+}
+
+model_predict_fns = {
+    'LLMTime GPT-3.5': get_llmtime_predictions_data,
+    'PromptCast GPT-3': get_promptcast_predictions_data,
+    'ARIMA': get_arima_predictions_data,
+}
+
+model_names = list(model_predict_fns.keys())
+
+# RUN LLMTIME AND VISUALIZE RESULTS
+datasets = get_datasets()
+ds_name = 'AirPassengersDataset'
+
+data = datasets[ds_name]
+train, test = data # or change to your own data
+out = {}
+for model in model_names: # GPT-4 takes a about a minute to run
+    model_hypers[model].update({'dataset_name': ds_name}) # for promptcast
+    hypers = list(grid_iter(model_hypers[model]))
+    num_samples = 10
+    pred_dict = get_autotuned_predictions_data(train, test, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)
+    out[model] = pred_dict
+    plot_preds(train, test, pred_dict, model, show_samples=True)
+
+# Keep all plot windows open
+plt.show()
diff --git a/install.sh b/install.sh
@@ -1,7 +1,7 @@
 conda create -n llmtime python=3.9
 conda activate llmtime
 pip install numpy
-pip install -U jax[cpu] # we don't need GPU for jax
+pip install -U "jax[cpu]" # we don't need GPU for jax
 pip install torch --index-url https://download.pytorch.org/whl/cu118
 pip install openai==0.28.1
 pip install tiktoken
@@ -16,4 +16,6 @@ pip install multiprocess
 pip install SentencePiece
 pip install accelerate
 pip install gdown
+pip install mistralai #for mistral models
+pip install python-dotenv #optional convenience for handling environment variables
 conda deactivate
diff --git a/models/gpt.py b/models/gpt.py
@@ -58,9 +58,9 @@ def gpt_completion_fn(model, input_str, steps, settings, num_samples, temp):
     allowed_tokens = [settings.bit_sep + str(i) for i in range(settings.base)] 
     allowed_tokens += [settings.time_sep, settings.plus_sign, settings.minus_sign]
     allowed_tokens = [t for t in allowed_tokens if len(t) > 0] # remove empty tokens like an implicit plus sign
-    if (model not in ['gpt-3.5-turbo','gpt-4']): # logit bias not supported for chat models
+    if (model not in ['gpt-3.5-turbo','gpt-4','gpt-4-1106-preview']): # logit bias not supported for chat models
         logit_bias = {id: 30 for id in get_allowed_ids(allowed_tokens, model)}
-    if model in ['gpt-3.5-turbo','gpt-4']:
+    if model in ['gpt-3.5-turbo','gpt-4','gpt-4-1106-preview']:
         chatgpt_sys_message = "You are a helpful assistant that performs time series predictions. The user will provide a sequence and you will predict the remaining sequence. The sequence is represented by decimal strings separated by commas."
         extra_input = "Please continue the following sequence without producing any additional text. Do not say anything like 'the next terms in the sequence are', just return the numbers. Sequence:\n"
         response = openai.ChatCompletion.create(