Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

Commit

Permalink
CI test failures after BB2 merge (#3823)
Browse files Browse the repository at this point in the history
* testing with torch 1.8

* reverting the requirements

* moved retriver tests to nightly with GPU

* removed the end line

* skipping transformer imports

* skipping if not transformers

* sorted the requiements list

* remvoed the extra skips in tests + regen the glue tests

* fairscale 0.3.7

* reordering requirements

* nit fix

* reverted transformer skips

* skip transformer not installed

* long cpu test: reducing bs

* turn on fp16

* mv searchquery retrievers test and bump down gpu mem usage

* fix bsz

* less memory for tests?

* sgd

* update reqs

Co-authored-by: klshuster <kshuster@fb.com>
  • Loading branch information
mojtaba-komeili and klshuster committed Jul 22, 2021
1 parent 17656e6 commit dd16d3f
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 103 deletions.
12 changes: 6 additions & 6 deletions .circleci/config.yml
Expand Up @@ -231,26 +231,26 @@ commands:
- setupcuda
- fixgit
- restore_cache:
key: deps-20210426-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
key: deps-20210722-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
- setup
- installdeps
- << parameters.more_installs >>
- save_cache:
key: deps-20210426-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
key: deps-20210722-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
paths:
- "~/venv/bin"
- "~/venv/lib"
- findtests:
marker: << parameters.marker >>
- restore_cache:
key: data-20210426-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
key: data-20210722-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
- run:
name: Run tests
no_output_timeout: 60m
command: |
coverage run -m pytest -m << parameters.marker >> << parameters.pytest_flags >> --junitxml=test-results/junit.xml
- save_cache:
key: data-20210426-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
key: data-20210722-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
paths:
- "~/ParlAI/data"
- codecov
Expand All @@ -267,12 +267,12 @@ commands:
- checkout
- fixgit
- restore_cache:
key: deps-20210426-bw-{{ checksum "requirements.txt" }}
key: deps-20210722-bw-{{ checksum "requirements.txt" }}
- setup
- installdeps
- installtorchgpu17
- save_cache:
key: deps-20210426-bw-{{ checksum "requirements.txt" }}
key: deps-20210722-bw-{{ checksum "requirements.txt" }}
paths:
- "~/venv/bin"
- "~/venv/lib"
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Expand Up @@ -4,6 +4,7 @@ coloredlogs==14.0
datasets>=1.4.1
docutils<0.16,>=0.14
emoji==0.5.4
fairscale==0.3.7
docformatter==1.3.0
flake8-bugbear==19.8.0
flake8==3.7.8
Expand Down Expand Up @@ -40,11 +41,10 @@ Sphinx~=2.2.0
subword-nmt==0.3.7
tensorboard==2.3.0
tensorboardX==2.1
transformers==4.6.1
tokenizers>=0.8.0
torchtext>=0.5.0
tornado==6.0.4
tqdm~=4.38.0
tqdm~=4.42.0
typing-extensions==3.7.4.1
Unidecode==1.1.1
urllib3>=1.26.5
Expand Down
9 changes: 5 additions & 4 deletions tests/nightly/gpu/test_bart.py
Expand Up @@ -24,7 +24,7 @@ def test_bart(self):
Test out-of-the-box BART on repeat task.
"""
valid, _ = testing_utils.eval_model(
dict(task='integration_tests', model='bart')
dict(task='integration_tests', model='bart', num_examples=10)
)
self.assertAlmostEqual(valid['ppl'].value(), 1.0, places=1)

Expand Down Expand Up @@ -75,14 +75,15 @@ def test_bart_ft(self):
task='integration_tests:reverse',
model='bart',
dict_file='zoo:bart/bart_large/model.dict',
optimizer='adam',
learningrate=3e-5,
batchsize=4,
optimizer='sgd',
learningrate=1,
batchsize=2,
num_epochs=1,
short_final_eval=True,
validation_max_exs=12,
model_file=mf,
model_parallel=True,
fp16=True,
)
)
self.assertAlmostEqual(valid['ppl'].value(), 1.0, places=1)
Expand Down
94 changes: 51 additions & 43 deletions tests/nightly/gpu/test_bb2.py
Expand Up @@ -8,52 +8,59 @@
import unittest

import parlai.utils.testing as testing_utils
from projects.blenderbot2.agents.blenderbot2 import (
ZOO_MEMORY_DECODER,
ZOO_QUERY_GENERATOR,
)
from projects.blenderbot2.agents.sub_modules import KnowledgeAccessMethod

try:
# blenderbot2 imports `transformer` and crashes the CPU tests.
# These CPU tests will be skipped anyway with the decorators on each test.
from projects.blenderbot2.agents.sub_modules import KnowledgeAccessMethod
from projects.blenderbot2.agents.blenderbot2 import (
ZOO_MEMORY_DECODER,
ZOO_QUERY_GENERATOR,
)

TRANSFORMER_INSTALLED = True
except ImportError:
TRANSFORMER_INSTALLED = False

LOCAL = True

SEARCH_QUERY_MODEL = ZOO_MEMORY_DECODER
PERSONA_SUMMARY_MODEL = ZOO_QUERY_GENERATOR
ZOO_BB2 = 'zoo:blenderbot2/blenderbot2_400M/model'
ZOO_BB2_3B = 'zoo:blenderbot2/blenderbot2_3B/model'
SEARCH_SERVER = '<SERVER_API>'
common_opt = {
'model': 'projects.blenderbot2.agents.blenderbot2:BlenderBot2RagAgent',
# rag args
'init_opt': 'arch/bart_large',
'generation_model': 'bart',
'retriever_debug_index': 'compressed',
'label_truncate': 128,
'text_truncate': 512,
'batchsize': 4,
'fp16': True,
'model_parallel': True,
# train args
'task': 'convai2,wizard_of_wikipedia',
'num_examples': 8,
}


def _test_bb2_rag(retrieval_method: KnowledgeAccessMethod, **kwargs):
opt = copy.deepcopy(common_opt)
opt['knowledge_access_method'] = retrieval_method.value
opt.update(dict(kwargs))
print(' '.join([f'--{k} {v}' for k, v in opt.items()]))
testing_utils.eval_model(opt, skip_test=True)
torch.cuda.empty_cache()


def _test_bb2_fid(retrieval_method: KnowledgeAccessMethod, **kwargs):
opt = copy.deepcopy(common_opt)
opt['model'] = 'projects.blenderbot2.agents.blenderbot2:BlenderBot2FidAgent'
opt['knowledge_access_method'] = retrieval_method.value
opt.update(dict(kwargs))
testing_utils.eval_model(opt, skip_test=True)
torch.cuda.empty_cache()
if TRANSFORMER_INSTALLED:
SEARCH_QUERY_MODEL = ZOO_MEMORY_DECODER
PERSONA_SUMMARY_MODEL = ZOO_QUERY_GENERATOR
ZOO_BB2 = 'zoo:blenderbot2/blenderbot2_400M/model'
ZOO_BB2_3B = 'zoo:blenderbot2/blenderbot2_3B/model'
SEARCH_SERVER = '<SERVER_API>'
common_opt = {
'model': 'projects.blenderbot2.agents.blenderbot2:BlenderBot2RagAgent',
# rag args
'init_opt': 'arch/bart_large',
'generation_model': 'bart',
'retriever_debug_index': 'compressed',
'label_truncate': 128,
'text_truncate': 512,
'batchsize': 4,
'fp16': True,
'model_parallel': True,
# train args
'task': 'convai2,wizard_of_wikipedia',
'num_examples': 8,
}

def _test_bb2_rag(retrieval_method: KnowledgeAccessMethod, **kwargs):
opt = copy.deepcopy(common_opt)
opt['knowledge_access_method'] = retrieval_method.value
opt.update(dict(kwargs))
print(' '.join([f'--{k} {v}' for k, v in opt.items()]))
testing_utils.eval_model(opt, skip_test=True)
torch.cuda.empty_cache()

def _test_bb2_fid(retrieval_method: KnowledgeAccessMethod, **kwargs):
opt = copy.deepcopy(common_opt)
opt['model'] = 'projects.blenderbot2.agents.blenderbot2:BlenderBot2FidAgent'
opt['knowledge_access_method'] = retrieval_method.value
opt.update(dict(kwargs))
testing_utils.eval_model(opt, skip_test=True)
torch.cuda.empty_cache()


@testing_utils.skipUnlessGPU
Expand Down Expand Up @@ -262,6 +269,7 @@ def test_rag(self):
)


@unittest.skipUnless(TRANSFORMER_INSTALLED, "Needs transformer, not installed.")
class TestBB2ZooModel(unittest.TestCase):
"""
Test Zoo Model.
Expand Down
7 changes: 5 additions & 2 deletions tests/nightly/gpu/test_gpt2.py
Expand Up @@ -19,7 +19,7 @@ def test_custom_special_tokens(self):
from parlai.core.params import ParlaiParser

parser = ParlaiParser(False, False)
parser.set_defaults(gpt2_size="small", add_special_tokens=True)
parser.set_defaults(gpt2_size="small", add_special_tokens=True, fp16=True)
Gpt2DictionaryAgent.add_cmdline_args(parser, partial_opt=None)
with testing_utils.tempdir() as tmpdir:
opt = parser.parse_kwargs(dict_file=os.path.join(tmpdir, 'dict'))
Expand Down Expand Up @@ -118,6 +118,7 @@ def test_nospecialtok(self):
'beam_size': 1,
'batchsize': 1,
'add_special_tokens': False,
'fp16': True,
}
gpt2 = create_agent(opt)
gpt2.observe({'text': 'My name is', 'episode_done': True})
Expand All @@ -136,9 +137,10 @@ class TestDistributed(unittest.TestCase):
'beam_min_length': 8,
'inference': 'beam',
'beam_size': 1,
'batchsize': 4,
'batchsize': 2,
'add_special_tokens': True,
'validation_metric': 'ppl',
'fp16': True,
}

def setUp(self):
Expand Down Expand Up @@ -186,6 +188,7 @@ def test_distributed(self):
config['learningrate'] = 1.0
config['momentum'] = 0.90
config['skip_generation'] = True
config['fp16'] = True
valid, test = self._distributed_train_model(config)

self.assertLessEqual(valid['ppl'], 10)
Expand Down
8 changes: 3 additions & 5 deletions tests/nightly/gpu/test_style_gen.py
Expand Up @@ -29,7 +29,7 @@ def test_simple(self):
optimizer='adamax',
truncate=8,
learningrate=7e-3,
batchsize=32,
batchsize=16,
num_epochs=5,
n_layers=1,
n_heads=1,
Expand All @@ -51,7 +51,7 @@ def test_accuracy(self):
_, test = testing_utils.eval_model(
opt={
'batchsize': 4,
'fp16': False,
'fp16': True,
'num_examples': 16,
'model_file': 'zoo:style_gen/prev_curr_classifier/model',
'model': 'projects.style_gen.classifier:ClassifierAgent',
Expand All @@ -61,7 +61,6 @@ def test_accuracy(self):
},
skip_valid=True,
)
# We turn off FP16 because emulation of this is likely slow on the CI GPUs
self.assertAlmostEqual(test['accuracy'], 1.0, delta=0.0)


Expand All @@ -75,7 +74,7 @@ def test_perplexities(self):
_, test = testing_utils.eval_model(
opt={
'batchsize': 4,
'fp16': False,
'fp16': True,
'num_examples': 16,
'model_file': f'zoo:style_gen/{model_name}/model',
'model': 'projects.style_gen.style_gen:StyleGenAgent',
Expand All @@ -85,7 +84,6 @@ def test_perplexities(self):
},
skip_valid=True,
)
# We turn off FP16 because emulation of this is likely slow on the CI GPUs
self.assertAlmostEqual(test['ppl'], desired_ppl, delta=0.005)


Expand Down

0 comments on commit dd16d3f

Please sign in to comment.