Skip to content

Commit a22366d

Browse files
committed
disallow unversioned gpt-4 model
1 parent 5c6b426 commit a22366d

File tree

6 files changed

+17
-13
lines changed

6 files changed

+17
-13
lines changed

API.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
`POST /assessment`: Issue a rubric assessment to the AI agent and wait for a response.
1515

16-
* `model`: The model to use. Default: `gpt-4`
16+
* `model`: The model to use. Default: see DEFAULT_MODEL
1717
* `api-key`: The API key associated with the model. Default: the configured key
1818
* `code`: The code to assess. Required.
1919
* `prompt`: The system prompt. Required.
@@ -60,7 +60,7 @@
6060

6161
`(GET|POST) /test/assessment`: Issue a test rubric assessment to the AI agent and wait for a response.
6262

63-
* `model`: The model to use. Default: `gpt-4`
63+
* `model`: The model to use. Default: see DEFAULT_MODEL
6464
* `api-key`: The API key associated with the model. Default: the configured key
6565
* `remove-comments`: When `1`, attempts to strip comments out of the code before assessment. Default: 0
6666
* `num-responses`: The number of times it should ask the AI model. It votes on the final answer. Default: 1

TESTING.md

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

lib/assessment/assess.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
import logging
88

99
# Import our support classes
10-
from lib.assessment.config import SUPPORTED_MODELS, VALID_LABELS
10+
from lib.assessment.config import SUPPORTED_MODELS, DEFAULT_MODEL, VALID_LABELS
1111
from lib.assessment.label import Label
1212

1313
class KeyConceptError(Exception):
1414
pass
1515

16-
def label(code, prompt, rubric, examples=[], api_key='', llm_model='gpt-4', num_responses=1, temperature=0.2, remove_comments=False):
16+
def label(code, prompt, rubric, examples=[], api_key='', llm_model=DEFAULT_MODEL, num_responses=1, temperature=0.2, remove_comments=False):
1717
OPENAI_API_KEY = api_key
1818

1919
# Set the key

lib/assessment/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
VALID_LABELS = ["Extensive Evidence", "Convincing Evidence", "Limited Evidence", "No Evidence"]
2-
SUPPORTED_MODELS = ['gpt-4', 'gpt-4-0314', 'gpt-4-32k', 'gpt-4-32k-0314']
2+
# do not include gpt-4, so that we always know what version of the model we are using.
3+
SUPPORTED_MODELS = ['gpt-4-0314', 'gpt-4-32k', 'gpt-4-32k-0314', 'gpt-4-0613', 'gpt-4-32k-0613']
4+
DEFAULT_MODEL = 'gpt-4-0613'
35
LESSONS = {
46
"U3-2022-L10" : "1ROCbvHb3yWGVoQqzKAjwdaF0dSRPUjy_",
57
"U3-2022-L13" : "1kGHeY5LRpFJ9xVRoBEWbyOJyKm4wClqw",

lib/assessment/rubric_tester.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from sklearn.metrics import accuracy_score, confusion_matrix
1616
from collections import defaultdict
1717

18-
from lib.assessment.config import SUPPORTED_MODELS, VALID_LABELS, LESSONS
18+
from lib.assessment.config import SUPPORTED_MODELS, DEFAULT_MODEL, VALID_LABELS, LESSONS
1919
from lib.assessment.label import Label
2020
from lib.assessment.report import Report
2121

@@ -37,8 +37,8 @@ def command_line_options():
3737
help='Output filename within output directory')
3838
parser.add_argument('-c', '--use-cached', action='store_true',
3939
help='Use cached responses from the API.')
40-
parser.add_argument('-l', '--llm-model', type=str, default='gpt-4',
41-
help=f"Which LLM model to use. Supported models: {', '.join(SUPPORTED_MODELS)}. Default: gpt-4")
40+
parser.add_argument('-l', '--llm-model', type=str, default=DEFAULT_MODEL,
41+
help=f"Which LLM model to use. Supported models: {', '.join(SUPPORTED_MODELS)}. Default: {DEFAULT_MODEL}")
4242
parser.add_argument('-n', '--num-responses', type=int, default=1,
4343
help='Number of responses to generate for each student. Defaults to 1.')
4444
parser.add_argument('-p', '--num-passing-labels', type=int,

src/assessment.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import openai
88
import json
99

10+
from lib.assessment.config import DEFAULT_MODEL
11+
1012
# Our assessment code
1113
from lib.assessment import assess
1214
from lib.assessment.assess import KeyConceptError
@@ -37,7 +39,7 @@ def post_assessment():
3739
rubric=request.values.get("rubric", ""),
3840
examples=examples,
3941
api_key=request.values.get("api-key", openai.api_key),
40-
llm_model=request.values.get("model", "gpt-4"),
42+
llm_model=request.values.get("model", DEFAULT_MODEL),
4143
remove_comments=(request.values.get("remove-comments", "0") != "0"),
4244
num_responses=int(request.values.get("num-responses", "1")),
4345
temperature=float(request.values.get("temperature", "0.2")),
@@ -76,7 +78,7 @@ def test_assessment():
7678
prompt=prompt,
7779
rubric=rubric,
7880
api_key=request.values.get("api-key", openai.api_key),
79-
llm_model=request.values.get("model", "gpt-4"),
81+
llm_model=request.values.get("model", DEFAULT_MODEL),
8082
remove_comments=(request.values.get("remove-comments", "0") != "0"),
8183
num_responses=int(request.values.get("num-responses", "1")),
8284
temperature=float(request.values.get("temperature", "0.2")),
@@ -110,7 +112,7 @@ def test_assessment_blank():
110112
prompt=prompt,
111113
rubric=rubric,
112114
api_key=request.values.get("api-key", openai.api_key),
113-
llm_model=request.values.get("model", "gpt-4"),
115+
llm_model=request.values.get("model", DEFAULT_MODEL),
114116
remove_comments=(request.values.get("remove-comments", "0") != "0"),
115117
num_responses=int(request.values.get("num-responses", "1")),
116118
temperature=float(request.values.get("temperature", "0.2")),
@@ -152,7 +154,7 @@ def test_assessment_examples():
152154
rubric=rubric,
153155
examples=[examples],
154156
api_key=request.values.get("api-key", openai.api_key),
155-
llm_model=request.values.get("model", "gpt-4"),
157+
llm_model=request.values.get("model", DEFAULT_MODEL),
156158
remove_comments=(request.values.get("remove-comments", "0") != "0"),
157159
num_responses=int(request.values.get("num-responses", "1")),
158160
temperature=float(request.values.get("temperature", "0.2")),

0 commit comments

Comments
 (0)