diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fc3cdae4..4dad4ba1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,19 +17,25 @@ jobs:
     timeout-minutes: 120
     strategy:
       matrix:
-        python-version: [3.6,3.7]
-        torch-version: [1.1.0,1.2.0,1.3.0,1.4.0,1.5.0,1.6.0,1.7.0,1.8.1]
+        python-version: [3.6,3.7,3.8]
+        torch-version: [1.1.0,1.2.0,1.3.0,1.4.0,1.5.0,1.6.0,1.7.1,1.8.1,1.9.0,1.10.2,1.11.0]
         
-#        exclude:
-#          - python-version: 3.5
-#            tf-version: 1.1.0
+        exclude:
+          - python-version: 3.6
+            torch-version: 1.11.0
+          - python-version: 3.8
+            torch-version: 1.1.0
+          - python-version: 3.8
+            torch-version: 1.2.0
+          - python-version: 3.8
+            torch-version: 1.3.0
 
     steps:
     
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v3
     
     - name: Setup python environment
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
 
@@ -47,7 +53,7 @@ jobs:
         pip install -q sklearn
         pytest --cov=deepctr_torch --cov-report=xml
     - name: Upload coverage to Codecov  
-      uses: codecov/codecov-action@v1.0.2
+      uses: codecov/codecov-action@v3.1.0
       with:
         token: ${{secrets.CODECOV_TOKEN}}
         file: ./coverage.xml
diff --git a/README.md b/README.md
index 7c646933..6d02554e 100644
--- a/README.md
+++ b/README.md
@@ -47,34 +47,19 @@ Let's [**Get Started!**](https://deepctr-torch.readthedocs.io/en/latest/Quick-St
 
 ## DisscussionGroup & Related Projects
 
-<html>
-    <table style="margin-left: 20px; margin-right: auto;">
-        <tr>
-            <td>
-                公众号：<b>浅梦学习笔记</b><br><br>
-                <a href="https://github.com/shenweichen/deepctr-torch">
-  <img align="center" src="./docs/pics/code.png" />
-</a>
-            </td>
-            <td>
-                微信：<b>deepctrbot</b><br><br>
- <a href="https://github.com/shenweichen/deepctr-torch">
-  <img align="center" src="./docs/pics/deepctrbot.png" />
-</a>
-            </td>
-            <td>
-<ul>
-<li><a href="https://github.com/shenweichen/AlgoNotes">AlgoNotes</a></li>
-<li><a href="https://github.com/shenweichen/DeepCTR">DeepCTR</a></li>
-<li><a href="https://github.com/shenweichen/DeepMatch">DeepMatch</a></li>
-<li><a href="https://github.com/shenweichen/GraphEmbedding">GraphEmbedding</a></li>
-</ul>
-            </td>
-        </tr>
-    </table>
-</html>
+- [Github Discussions](https://github.com/shenweichen/DeepCTR/discussions)
+- Wechat Discussions
 
+|公众号：浅梦学习笔记|微信：deepctrbot|学习小组 [加入](https://t.zsxq.com/026UJEuzv) [主题集合](https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MjM5MzY4NzE3MA==&action=getalbum&album_id=1361647041096843265&scene=126#wechat_redirect)|
+|:--:|:--:|:--:|
+| [![公众号](./docs/pics/code.png)](https://github.com/shenweichen/AlgoNotes)| [![微信](./docs/pics/deepctrbot.png)](https://github.com/shenweichen/AlgoNotes)|[![学习小组](./docs/pics/planet_github.png)](https://t.zsxq.com/026UJEuzv)|
 
+- Related Projects
+
+  - [AlgoNotes](https://github.com/shenweichen/AlgoNotes)
+  - [DeepCTR](https://github.com/shenweichen/DeepCTR)
+  - [DeepMatch](https://github.com/shenweichen/DeepMatch)
+  - [GraphEmbedding](https://github.com/shenweichen/GraphEmbedding)
 
 ## Main Contributors([welcome to join us!](./CONTRIBUTING.md))
 
@@ -84,59 +69,58 @@ Let's [**Get Started!**](https://deepctr-torch.readthedocs.io/en/latest/Quick-St
       <td>
         ​ <a href="https://github.com/shenweichen"><img width="70" height="70" src="https://github.com/shenweichen.png?s=40" alt="pic"></a><br>
         ​ <a href="https://github.com/shenweichen">Shen Weichen</a> ​
-        <p>Core Dev<br> Zhejiang Unversity <br> <br>  </p>​
+        <p> Alibaba Group </p>​
       </td>
       <td>
         ​ <a href="https://github.com/zanshuxun"><img width="70" height="70" src="https://github.com/zanshuxun.png?s=40" alt="pic"></a><br>
         ​ <a href="https://github.com/zanshuxun">Zan Shuxun</a>
-        <p>Core Dev<br> Beijing University <br> of  Posts and <br> Telecommunications</p>​
+        <p> Alibaba Group </p>​
       </td>
       <td>
          <a href="https://github.com/weberrr"><img width="70" height="70" src="https://github.com/weberrr.png?s=40" alt="pic"></a><br>
          <a href="https://github.com/weberrr">Wang Ze</a> ​
-        <p>Core Dev<br> Beihang University <br> <br>  </p>​
+        <p> Meituan </p>​
       </td>
       <td>
         ​ <a href="https://github.com/wutongzhang"><img width="70" height="70" src="https://github.com/wutongzhang.png?s=40" alt="pic"></a><br>
          <a href="https://github.com/wutongzhang">Zhang Wutong</a>
-         <p>Core Dev<br> Beijing University <br> of  Posts and <br> Telecommunications</p>​
+         <p> Tencent </p>​
       </td>
       <td>
         ​ <a href="https://github.com/ZhangYuef"><img width="70" height="70" src="https://github.com/ZhangYuef.png?s=40" alt="pic"></a><br>
         ​ <a href="https://github.com/ZhangYuef">Zhang Yuefeng</a>
-        <p>Core Dev<br>
-        Peking University <br>  <br>  </p>​
+        <p> Peking University  </p>​
       </td>
     </tr>
     <tr align="center">
       <td>
         ​ <a href="https://github.com/JyiHUO"><img width="70" height="70" src="https://github.com/JyiHUO.png?s=40" alt="pic"></a><br>
         ​ <a href="https://github.com/JyiHUO">Huo Junyi</a>
-        <p>Core Dev<br>
+        <p>
         University of Southampton <br> <br>  </p>​
       </td>
       <td>
         ​ <a href="https://github.com/Zengai"><img width="70" height="70" src="https://github.com/Zengai.png?s=40" alt="pic"></a><br>
         ​ <a href="https://github.com/Zengai">Zeng Kai</a> ​
-        <p>Dev<br>
+        <p>
         SenseTime <br> <br>  </p>​
       </td>
       <td>
         ​ <a href="https://github.com/chenkkkk"><img width="70" height="70" src="https://github.com/chenkkkk.png?s=40" alt="pic"></a><br>
         ​ <a href="https://github.com/chenkkkk">Chen K</a> ​
-        <p>Dev<br>
+        <p>
         NetEase <br>  <br>  </p>​
       </td>
       <td>
         ​ <a href="https://github.com/WeiyuCheng"><img width="70" height="70" src="https://github.com/WeiyuCheng.png?s=40" alt="pic"></a><br>
         ​ <a href="https://github.com/WeiyuCheng">Cheng Weiyu</a> ​
-        <p>Dev<br>
+        <p>
         Shanghai Jiao Tong University</p>​
       </td>
       <td>
         ​ <a href="https://github.com/tangaqi"><img width="70" height="70" src="https://github.com/tangaqi.png?s=40" alt="pic"></a><br>
         ​ <a href="https://github.com/tangaqi">Tang</a>
-        <p>Test<br>
+        <p>
         Tongji University <br> <br>  </p>​
       </td>
     </tr>
diff --git a/deepctr_torch/__init__.py b/deepctr_torch/__init__.py
index 88508515..4be7a5bc 100644
--- a/deepctr_torch/__init__.py
+++ b/deepctr_torch/__init__.py
@@ -2,5 +2,5 @@
 from . import models
 from .utils import check_version
 
-__version__ = '0.2.7'
+__version__ = '0.2.8'
 check_version(__version__)
\ No newline at end of file
diff --git a/deepctr_torch/models/basemodel.py b/deepctr_torch/models/basemodel.py
index 4235ad38..17e57b90 100644
--- a/deepctr_torch/models/basemodel.py
+++ b/deepctr_torch/models/basemodel.py
@@ -3,6 +3,7 @@
 
 Author:
     Weichen Shen,weichenswc@163.com
+    zanshuxun, zanshuxun@aliyun.com
 
 """
 from __future__ import print_function
@@ -75,7 +76,7 @@ def forward(self, X, sparse_feat_refine_weight=None):
 
         sparse_embedding_list += varlen_embedding_list
 
-        linear_logit = torch.zeros([X.shape[0], 1]).to(sparse_embedding_list[0].device)
+        linear_logit = torch.zeros([X.shape[0], 1]).to(self.device)
         if len(sparse_embedding_list) > 0:
             sparse_embedding_cat = torch.cat(sparse_embedding_list, dim=-1)
             if sparse_feat_refine_weight is not None:
@@ -476,6 +477,10 @@ def _log_loss(self, y_true, y_pred, eps=1e-7, normalize=True, sample_weight=None
                         sample_weight,
                         labels)
 
+    @staticmethod
+    def _accuracy_score(y_true, y_pred):
+        return accuracy_score(y_true, np.where(y_pred > 0.5, 1, 0))
+
     def _get_metrics(self, metrics, set_eps=False):
         metrics_ = {}
         if metrics:
@@ -490,8 +495,7 @@ def _get_metrics(self, metrics, set_eps=False):
                 if metric == "mse":
                     metrics_[metric] = mean_squared_error
                 if metric == "accuracy" or metric == "acc":
-                    metrics_[metric] = lambda y_true, y_pred: accuracy_score(
-                        y_true, np.where(y_pred > 0.5, 1, 0))
+                    metrics_[metric] = self._accuracy_score
                 self.metrics_names.append(metric)
         return metrics_
 
diff --git a/docs/pics/code2.jpg b/docs/pics/code2.jpg
new file mode 100644
index 00000000..e191f297
Binary files /dev/null and b/docs/pics/code2.jpg differ
diff --git a/docs/pics/planet_github.png b/docs/pics/planet_github.png
new file mode 100644
index 00000000..67efe968
Binary files /dev/null and b/docs/pics/planet_github.png differ
diff --git a/docs/requirements.readthedocs.txt b/docs/requirements.readthedocs.txt
index 80bc5e6d..793412bd 100644
--- a/docs/requirements.readthedocs.txt
+++ b/docs/requirements.readthedocs.txt
@@ -1,2 +1,3 @@
 Cython>=0.28.5
-tensorflow==1.15.4
+tensorflow==2.7.2
+scikit-learn==1.0
diff --git a/docs/source/FAQ.md b/docs/source/FAQ.md
index 102e35bc..1006bf42 100644
--- a/docs/source/FAQ.md
+++ b/docs/source/FAQ.md
@@ -6,7 +6,7 @@ To save/load weights:
 
 ```python
 import torch
-model = DeepFM()
+model = DeepFM(...)
 torch.save(model.state_dict(), 'DeepFM_weights.h5')
 model.load_state_dict(torch.load('DeepFM_weights.h5'))
 ```
@@ -15,7 +15,7 @@ To save/load models:
 
 ```python
 import torch
-model = DeepFM()
+model = DeepFM(...)
 torch.save(model, 'DeepFM.h5')
 model = torch.load('DeepFM.h5')
 ```
diff --git a/docs/source/History.md b/docs/source/History.md
index ec68a102..4984dfc4 100644
--- a/docs/source/History.md
+++ b/docs/source/History.md
@@ -1,5 +1,6 @@
 # History
-- 06/14/2021 : [v0.2.7](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.6) released.Add [AFN](./Features.html#afn-adaptive-factorization-network-learning-adaptive-order-feature-interactions) and fix some bugs.
+- 06/19/2022 : [v0.2.8](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.8) released.Fix some bugs.
+- 06/14/2021 : [v0.2.7](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.7) released.Add [AFN](./Features.html#afn-adaptive-factorization-network-learning-adaptive-order-feature-interactions) and fix some bugs.
 - 04/04/2021 : [v0.2.6](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.6) released.Add [IFM](./Features.html#ifm-input-aware-factorization-machine) and [DIFM](./Features.html#difm-dual-input-aware-factorization-machine);Support multi-gpus running([example](./FAQ.html#how-to-run-the-demo-with-multiple-gpus)).
 - 02/12/2021 : [v0.2.5](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.5) released.Fix bug in DCN-M.
 - 12/05/2020 : [v0.2.4](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.4) released.Imporve compatibility & fix issues.Add History callback.([example](https://deepctr-torch.readthedocs.io/en/latest/FAQ.html#set-learning-rate-and-use-earlystopping)).
diff --git a/docs/source/conf.py b/docs/source/conf.py
index e99b48ea..615f48b0 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -26,7 +26,7 @@
 # The short X.Y version
 version = ''
 # The full version, including alpha/beta/rc tags
-release = '0.2.7'
+release = '0.2.8'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 1701d403..497d232b 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -34,18 +34,21 @@ You can read the latest code at https://github.com/shenweichen/DeepCTR-Torch and
 
 News
 -----
+06/19/2022 : Fix some bugs.  `Changelog <https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.8>`_
+
 06/14/2021 : Add `AFN <./Features.html#afn-adaptive-factorization-network-learning-adaptive-order-feature-interactions>`_ and fix some bugs.  `Changelog <https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.7>`_
 
 04/04/2021 : Add `IFM <./Features.html#ifm-input-aware-factorization-machine>`_ and `DIFM <./Features.html#difm-dual-input-aware-factorization-machine>`_ . Support multi-gpus running(`example <./FAQ.html#how-to-run-the-demo-with-multiple-gpus>`_). `Changelog <https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.6>`_
 
-02/12/2021 : Fix bug in DCN-M. `Changelog <https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.4>`_
 
 DisscussionGroup
 -----------------------
 
-公众号：**浅梦学习笔记**  wechat ID: **deepctrbot**
+  公众号：**浅梦学习笔记**  wechat ID: **deepctrbot**
+
+  `Discussions <https://github.com/shenweichen/DeepCTR/discussions>`_ `学习小组主题集合 <https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MjM5MzY4NzE3MA==&action=getalbum&album_id=1361647041096843265&scene=126#wechat_redirect>`_
 
-.. image:: ../pics/code.png
+.. image:: ../pics/code2.jpg
 
 .. toctree::
    :maxdepth: 2
diff --git a/setup.py b/setup.py
index 4e44fe13..705a9905 100644
--- a/setup.py
+++ b/setup.py
@@ -4,12 +4,12 @@
     long_description = fh.read()
 
 REQUIRED_PACKAGES = [
-    'torch>=1.1.0', 'tqdm', 'sklearn', 'tensorflow'
+    'torch>=1.1.0', 'tqdm', 'scikit-learn', 'tensorflow'
 ]
 
 setuptools.setup(
     name="deepctr-torch",
-    version="0.2.7",
+    version="0.2.8",
     author="Weichen Shen",
     author_email="weichenswc@163.com",
     description="Easy-to-use,Modular and Extendible package of deep learning based CTR(Click Through Rate) prediction models with PyTorch",
@@ -37,6 +37,7 @@
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
         'Topic :: Scientific/Engineering',
         'Topic :: Scientific/Engineering :: Artificial Intelligence',
         'Topic :: Software Development',
diff --git a/tests/models/DeepFM_test.py b/tests/models/DeepFM_test.py
index b11b5cb4..a11dc3bd 100644
--- a/tests/models/DeepFM_test.py
+++ b/tests/models/DeepFM_test.py
@@ -6,21 +6,33 @@
 
 
 @pytest.mark.parametrize(
-    'use_fm,hidden_size,sparse_feature_num',
-    [(True, (32,), 3),
-     (False, (32,), 3),
-     (False, (32,), 2), (False, (32,), 1), (True, (), 1), (False, (), 2)
+    'use_fm,hidden_size,sparse_feature_num,dense_feature_num',
+    [(True, (32,), 3, 3),
+     (False, (32,), 3, 3),
+     (False, (32,), 2, 2),
+     (False, (32,), 1, 1),
+     (True, (), 1, 1),
+     (False, (), 2, 2),
+     (True, (32,), 0, 3),
+     (True, (32,), 3, 0),
+     (False, (32,), 0, 3),
+     (False, (32,), 3, 0),
      ]
 )
-def test_DeepFM(use_fm, hidden_size, sparse_feature_num):
+def test_DeepFM(use_fm, hidden_size, sparse_feature_num, dense_feature_num):
     model_name = "DeepFM"
     sample_size = SAMPLE_SIZE
     x, y, feature_columns = get_test_data(
-        sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num)
+        sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num)
 
     model = DeepFM(feature_columns, feature_columns, use_fm=use_fm,
                    dnn_hidden_units=hidden_size, dnn_dropout=0.5, device=get_device())
     check_model(model, model_name, x, y)
 
+    # no linear part
+    model = DeepFM([], feature_columns, use_fm=use_fm,
+                   dnn_hidden_units=hidden_size, dnn_dropout=0.5, device=get_device())
+    check_model(model, model_name + '_no_linear', x, y)
+
 if __name__ == "__main__":
     pass
diff --git a/tests/utils.py b/tests/utils.py
index 10abcecb..28f3010b 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -4,6 +4,7 @@
 import numpy as np
 import torch as torch
 
+from deepctr_torch.callbacks import EarlyStopping, ModelCheckpoint
 from deepctr_torch.inputs import SparseFeat, DenseFeat, VarLenSparseFeat
 
 SAMPLE_SIZE = 64
@@ -17,46 +18,46 @@ def gen_sequence(dim, max_len, sample_size):
 def get_test_data(sample_size=1000, embedding_size=4, sparse_feature_num=1, dense_feature_num=1,
                   sequence_feature=['sum', 'mean', 'max'], classification=True, include_length=False,
                   hash_flag=False, prefix=''):
-
-
     feature_columns = []
     model_input = {}
 
-
-    if 'weight'  in sequence_feature:
-        feature_columns.append(VarLenSparseFeat(SparseFeat(prefix+"weighted_seq",vocabulary_size=2,embedding_dim=embedding_size),maxlen=3,length_name=prefix+"weighted_seq"+"_seq_length",weight_name=prefix+"weight"))
+    if 'weight' in sequence_feature:
+        feature_columns.append(
+            VarLenSparseFeat(SparseFeat(prefix + "weighted_seq", vocabulary_size=2, embedding_dim=embedding_size),
+                             maxlen=3, length_name=prefix + "weighted_seq" + "_seq_length",
+                             weight_name=prefix + "weight"))
         s_input, s_len_input = gen_sequence(
             2, 3, sample_size)
 
-        model_input[prefix+"weighted_seq"] = s_input
-        model_input[prefix+'weight'] = np.random.randn(sample_size,3,1)
-        model_input[prefix+"weighted_seq"+"_seq_length"] = s_len_input
+        model_input[prefix + "weighted_seq"] = s_input
+        model_input[prefix + 'weight'] = np.random.randn(sample_size, 3, 1)
+        model_input[prefix + "weighted_seq" + "_seq_length"] = s_len_input
         sequence_feature.pop(sequence_feature.index('weight'))
 
-
     for i in range(sparse_feature_num):
         dim = np.random.randint(1, 10)
-        feature_columns.append(SparseFeat(prefix+'sparse_feature_'+str(i), dim,embedding_size,dtype=torch.int32))
+        feature_columns.append(SparseFeat(prefix + 'sparse_feature_' + str(i), dim, embedding_size, dtype=torch.int32))
     for i in range(dense_feature_num):
-        feature_columns.append(DenseFeat(prefix+'dense_feature_'+str(i), 1,dtype=torch.float32))
+        feature_columns.append(DenseFeat(prefix + 'dense_feature_' + str(i), 1, dtype=torch.float32))
     for i, mode in enumerate(sequence_feature):
         dim = np.random.randint(1, 10)
         maxlen = np.random.randint(1, 10)
         feature_columns.append(
-            VarLenSparseFeat(SparseFeat(prefix +'sequence_' + mode,vocabulary_size=dim,  embedding_dim=embedding_size), maxlen=maxlen, combiner=mode))
+            VarLenSparseFeat(SparseFeat(prefix + 'sequence_' + mode, vocabulary_size=dim, embedding_dim=embedding_size),
+                             maxlen=maxlen, combiner=mode))
 
     for fc in feature_columns:
-        if isinstance(fc,SparseFeat):
-            model_input[fc.name]= np.random.randint(0, fc.vocabulary_size, sample_size)
-        elif isinstance(fc,DenseFeat):
+        if isinstance(fc, SparseFeat):
+            model_input[fc.name] = np.random.randint(0, fc.vocabulary_size, sample_size)
+        elif isinstance(fc, DenseFeat):
             model_input[fc.name] = np.random.random(sample_size)
         else:
             s_input, s_len_input = gen_sequence(
                 fc.vocabulary_size, fc.maxlen, sample_size)
             model_input[fc.name] = s_input
             if include_length:
-                fc.length_name = prefix+"sequence_"+str(i)+'_seq_length'
-                model_input[prefix+"sequence_"+str(i)+'_seq_length'] = s_len_input
+                fc.length_name = prefix + "sequence_" + str(i) + '_seq_length'
+                model_input[prefix + "sequence_" + str(i) + '_seq_length'] = s_len_input
 
     if classification:
         y = np.random.randint(0, 2, sample_size)
@@ -66,7 +67,7 @@ def get_test_data(sample_size=1000, embedding_size=4, sparse_feature_num=1, dens
     return model_input, y, feature_columns
 
 
-def layer_test(layer_cls, kwargs = {}, input_shape=None, 
+def layer_test(layer_cls, kwargs={}, input_shape=None,
                input_dtype=torch.float32, input_data=None, expected_output=None,
                expected_output_shape=None, expected_output_dtype=None, fixed_batch_size=False):
     '''check layer is valid or not
@@ -90,7 +91,7 @@ def layer_test(layer_cls, kwargs = {}, input_shape=None,
         for i, e in enumerate(input_data_shape):
             if e is None:
                 input_data_shape[i] = np.random.randint(1, 4)
-        
+
         if all(isinstance(e, tuple) for e in input_data_shape):
             input_data = []
             for e in input_data_shape:
@@ -104,37 +105,37 @@ def layer_test(layer_cls, kwargs = {}, input_shape=None,
         # use input_data to update other parameters
         if input_shape is None:
             input_shape = input_data.shape
-    
+
     if expected_output_dtype is None:
         expected_output_dtype = input_dtype
-    
+
     # layer initialization
     layer = layer_cls(**kwargs)
-    
+
     if fixed_batch_size:
         inputs = torch.tensor(input_data.unsqueeze(0), dtype=input_dtype)
     else:
         inputs = torch.tensor(input_data, dtype=input_dtype)
-    
+
     # calculate layer's output
     output = layer(inputs)
 
     if not output.dtype == expected_output_dtype:
         raise AssertionError("layer output dtype does not match with the expected one")
-    
+
     if not expected_output_shape:
-            raise ValueError("expected output shape should not be none")
+        raise ValueError("expected output shape should not be none")
 
     actual_output_shape = output.shape
     for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape):
         if expected_dim is not None:
             if not expected_dim == actual_dim:
                 raise AssertionError(f"expected_dim:{expected_dim}, actual_dim:{actual_dim}")
-    
+
     if expected_output is not None:
         # check whether output equals to expected output
         assert_allclose(output, expected_output, rtol=1e-3)
-    
+
     return output
 
 
@@ -148,10 +149,14 @@ def check_model(model, model_name, x, y, check_model_io=True):
     :param check_model_io:
     :return:
     '''
+    early_stopping = EarlyStopping(monitor='val_acc', min_delta=0, verbose=1, patience=0, mode='max')
+    model_checkpoint = ModelCheckpoint(filepath='model.ckpt', monitor='val_acc', verbose=1,
+                                       save_best_only=True,
+                                       save_weights_only=False, mode='max', period=1)
 
     model.compile('adam', 'binary_crossentropy',
-                  metrics=['binary_crossentropy'])
-    model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5)
+                  metrics=['binary_crossentropy', 'acc'])
+    model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5, callbacks=[early_stopping, model_checkpoint])
 
     print(model_name + 'test, train valid pass!')
     torch.save(model.state_dict(), model_name + '_weights.h5')
@@ -165,9 +170,10 @@ def check_model(model, model_name, x, y, check_model_io=True):
         print(model_name + 'test save load model pass!')
     print(model_name + 'test pass!')
 
-def get_device(use_cuda = True):
+
+def get_device(use_cuda=True):
     device = 'cpu'
     if use_cuda and torch.cuda.is_available():
         print('cuda ready...')
         device = 'cuda:0'
-    return device
\ No newline at end of file
+    return device