diff --git a/.gitmodules b/.gitmodules
index d1337b3..31495b9 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "spacy-dev-resources"]
 	path = spacy-dev-resources
-	url = git@github.com:howl-anderson/spacy-dev-resources.git
+	url = https://github.com/howl-anderson/spacy-dev-resources.git
 [submodule "third-part/brown-cluster"]
 	path = third-part/brown-cluster
-	url = https://github.com/percyliang/brown-cluster.git
+	url = https://github.com/howl-anderson/brown-cluster.git
diff --git a/all_in_one.bash b/all_in_one.bash
new file mode 100644
index 0000000..5b22596
--- /dev/null
+++ b/all_in_one.bash
@@ -0,0 +1,17 @@
+./create_wikipedia_corpus.bash
+./move_wikipedia_corpus.bash
+./compute_words_freq.bash
+./merge_all_text_files.bash
+./download_and_compile_brown_cluster.bash
+./compute_plain_word_vec.bash
+./create_init_model.bash
+./update_model_meta.py
+./download_UD_Chinese-GSD_corpus.bash
+./extract_UD_Chinese-GSD_corpus.bash
+./convert_UD_Chinese-GSD_corpus.bash
+./format_convertor.bash
+./init_model.bash
+./train_model.bash
+./onto_to_spacy_json.bash
+./train_ner.bash
+./merge_submodel.py
diff --git a/compute_plain_word_vec.bash b/compute_plain_word_vec.bash
index 7483360..63dfcbf 100755
--- a/compute_plain_word_vec.bash
+++ b/compute_plain_word_vec.bash
@@ -3,4 +3,4 @@
 cpu_count=`nproc --all`
 process_count=$(expr $cpu_count - 1)
 
-python ./spacy-dev-resources/training/plain_word_vectors.py -i 200 -n ${process_count} ../chinese-wikipedia-corpus-creator/token_cleaned_plain_files WORDS_VECS.txt
+python ./spacy-dev-resources/training/plain_word_vectors.py -i 200 -n ${process_count} ./WORDS.txt WORDS_VECS.txt
diff --git a/create_init_model.bash b/create_init_model.bash
index 4087d32..ed0ca62 100755
--- a/create_init_model.bash
+++ b/create_init_model.bash
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-python -m spacy init-model -c ./WORDS-c1000-p1.out/paths -v WORDS_VECS.txt zh zh_wiki_core WORDS_FREQ.txt
+python -m spacy init-model zh spacy_models/base_model --jsonl-loc ./spacy_corpus.jsonl --vectors-loc WORDS_VECS.txt --vectors-name zh_core_web_sm.vectors
diff --git a/create_jsonl_corpus.bash b/create_jsonl_corpus.bash
new file mode 100755
index 0000000..d50b4c3
--- /dev/null
+++ b/create_jsonl_corpus.bash
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+python ./create_jsonl_vocabulary.py zh spacy_corpus.jsonl WORDS_FREQ.txt ./WORDS-c1000-p1.out/paths
diff --git a/create_jsonl_vocabulary.py b/create_jsonl_vocabulary.py
new file mode 100644
index 0000000..4ad5b9f
--- /dev/null
+++ b/create_jsonl_vocabulary.py
@@ -0,0 +1,269 @@
+import json
+import math
+import string
+from ast import literal_eval
+from pathlib import Path
+
+import ftfy
+import jsonlines
+import plac
+import validators
+from preshed.counter import PreshCounter
+from spacy.lang.en import stop_words as en_stop_words
+from spacy.lang.zh import stop_words as zh_stop_words
+from tqdm import tqdm
+
+
+class Word:
+    counter = -1
+
+    def __init__(self, word_str, cluster, probs):
+        self._word = word_str
+        self._cluster = cluster
+        self._probs = probs
+
+        chinese_punct = "！？｡。＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏."
+        self._punct_list = list(set(string.punctuation + chinese_punct))
+
+        chinese_whitespace = ""
+        self._whitespace_list = list(set(string.whitespace + chinese_whitespace))
+
+        english_stopword = en_stop_words.STOP_WORDS
+        chinese_stopword = zh_stop_words.STOP_WORDS
+        self._stopword_list = {*english_stopword, *chinese_stopword}
+
+        chinese_quote = "“”‘’"
+        english_quote = "\"'"
+        self._qute_list = list(set(english_quote + chinese_quote))
+
+        chinese_left_punct = "<([{"
+        english_left_punct = "＜（［「『【〔〖〘〚｛"
+        self._left_punct_list = list(set(english_left_punct + chinese_left_punct))
+
+        chinese_right_punct = ">)]}"
+        english_right_punct = "＞）］」』】〕〗〙〛｝"
+        self._right_punct_list = list(set(english_right_punct + chinese_right_punct))
+
+    @property
+    def orth(self):
+        return self._word
+
+    @property
+    def id(self):
+        self.__class__.counter += 1
+
+        return self.__class__.counter
+
+    @property
+    def lower(self):
+        return self._word.lower()
+
+    @property
+    def norm(self):
+        return self._word
+
+    @property
+    def shape(self):
+        return "".join(map(lambda x: "X" if x.isupper() else "x", self._word))
+
+    @property
+    def prefix(self):
+        return self._word[0]
+
+    @property
+    def suffix(self):
+        return self._word[-1]
+
+    @property
+    def length(self):
+        return len(self._word)
+
+    @property
+    def cluster(self):
+        return self._cluster
+
+    @property
+    def prob(self):
+        return self._probs.get(self, 0)
+
+    @property
+    def is_alpha(self):
+        return self._word.isalpha()
+
+    @property
+    def is_ascii(self):
+        # only for py 3.7
+        # return self._word.isascii()
+        try:
+            self._word.encode('ascii')
+        except UnicodeEncodeError:
+            return False
+
+        return True
+
+    @property
+    def is_digit(self):
+        return self._word.isdigit()
+
+    @property
+    def is_lower(self):
+        return self._word.islower()
+
+    @property
+    def is_punct(self):
+        return self._word in self._punct_list
+
+    @property
+    def is_space(self):
+        return self._word in self._whitespace_list
+
+    @property
+    def is_title(self):
+        return self._word.istitle()
+
+    @property
+    def is_upper(self):
+        return self._word.isupper()
+
+    @property
+    def like_url(self):
+        return bool(validators.url(self._word))
+
+    @property
+    def like_num(self):
+        # TODO(howl-anderson): fix it later
+        return False
+
+    @property
+    def like_email(self):
+        return bool(validators.email(self._word))
+
+    @property
+    def is_stop(self):
+        return self._word in self._stopword_list
+
+    @property
+    def is_oov(self):
+        return not self._word in self._probs
+
+    @property
+    def is_quote(self):
+        return self._word in self._qute_list
+
+    @property
+    def is_left_punct(self):
+        return self._word in self._left_punct_list
+
+    @property
+    def is_right_punct(self):
+        return self._word in self._right_punct_list
+
+
+def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50):
+    print("Counting frequencies...")
+    counts = PreshCounter()
+    total = 0
+    with freqs_loc.open() as f:
+        for i, line in enumerate(f):
+            freq, doc_freq, key = line.rstrip().split("\t", 2)
+            freq = int(freq)
+            counts.inc(i + 1, freq)
+            total += freq
+    counts.smooth()
+    log_total = math.log(total)
+    probs = {}
+    with freqs_loc.open() as f:
+        for line in tqdm(f):
+            freq, doc_freq, key = line.rstrip().split("\t", 2)
+            doc_freq = int(doc_freq)
+            freq = int(freq)
+            if doc_freq >= min_doc_freq and freq >= min_freq and len(key) < max_length:
+                word = literal_eval(key)
+                smooth_count = counts.smoother(int(freq))
+                probs[word] = math.log(smooth_count) - log_total
+    oov_prob = math.log(counts.smoother(0)) - log_total
+    return probs, oov_prob
+
+
+def read_clusters(clusters_loc):
+    print("Reading clusters...")
+    clusters = {}
+    with clusters_loc.open() as f:
+        for line in tqdm(f):
+            try:
+                cluster, word, freq = line.split()
+                word = ftfy.fix_text(word)
+            except ValueError:
+                continue
+            # If the clusterer has only seen the word a few times, its
+            # cluster is unreliable.
+            if int(freq) >= 3:
+                clusters[word] = cluster
+            else:
+                clusters[word] = "0"
+    # Expand clusters with re-casing
+    for word, cluster in list(clusters.items()):
+        if word.lower() not in clusters:
+            clusters[word.lower()] = cluster
+        if word.title() not in clusters:
+            clusters[word.title()] = cluster
+        if word.upper() not in clusters:
+            clusters[word.upper()] = cluster
+    return clusters
+
+
+@plac.annotations(
+    lang=("model language", "positional", None, str),
+    output_loc=("model output directory", "positional", None, str),
+    freqs_loc=("location of words frequencies file", "positional", None, Path),
+    clusters_loc=("location of brown clusters data", "positional", None, Path),
+)
+def main(lang, output_loc, freqs_loc, clusters_loc):
+    clusters = read_clusters(clusters_loc)
+    probs, oov_prob = read_freqs(freqs_loc)
+
+    with jsonlines.open(output_loc, mode="w") as writer:
+        header = {"lang": lang, "settings": {"oov_prob": oov_prob}}
+
+        writer.write(header)
+
+        for word_str, cluster in clusters.items():
+
+            if not word_str:
+                continue
+
+            word = Word(word_str, cluster, probs)
+            row = {
+                "orth": word.orth,  # the word text
+                "id": word.id,  # can correspond to row in vectors table
+                "lower": word.lower,
+                "norm": word.norm,
+                "shape": word.shape,
+                "prefix": word.prefix,
+                "suffix": word.suffix,
+                "length": word.length,
+                "cluster": word.cluster,
+                "prob": word.prob,
+                "is_alpha": word.is_alpha,
+                "is_ascii": word.is_ascii,
+                "is_digit": word.is_digit,
+                "is_lower": word.is_lower,
+                "is_punct": word.is_punct,
+                "is_space": word.is_space,
+                "is_title": word.is_title,
+                "is_upper": word.is_upper,
+                "like_url": word.like_url,
+                "like_num": word.like_num,
+                "like_email": word.like_email,
+                "is_stop": word.is_stop,
+                "is_oov": word.is_oov,
+                "is_quote": word.is_quote,
+                "is_left_punct": word.is_left_punct,
+                "is_right_punct": word.is_right_punct,
+            }
+
+            writer.write(row)
+
+
+if __name__ == "__main__":
+    plac.call(main)
diff --git a/create_model_package.bash b/create_model_package.bash
new file mode 100755
index 0000000..6d81fdd
--- /dev/null
+++ b/create_model_package.bash
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+python -m spacy package spacy_models/final_model spacy_models/model_package --force
+
+cd spacy_models/model_package/zh_core_web_sm-0.1.0
+python ./setup.py sdist
+
diff --git a/create_wikipedia_corpus.bash b/create_wikipedia_corpus.bash
new file mode 100644
index 0000000..beaf722
--- /dev/null
+++ b/create_wikipedia_corpus.bash
@@ -0,0 +1,2 @@
+cd chinese-wikipedia-corpus-creator
+bash ./allinone_process.bash
diff --git a/merge_all_text_files.py b/merge_all_text_files.py
index 4943dfa..0e6e24e 100644
--- a/merge_all_text_files.py
+++ b/merge_all_text_files.py
@@ -9,10 +9,10 @@
 output_path = pathlib.Path(output_file)
 
 
-with output_path.open('wt') as outfile:
+with output_path.open("wt") as outfile:
     for fname in input_files:
-        with fname.open('rt') as infile:
+        with fname.open("rt") as infile:
             for line in infile:
-                if not line.endswith('\n'):
-                    line = line + '\n'
+                if not line.endswith("\n"):
+                    line = line + "\n"
                 outfile.write(line)
diff --git a/merge_submodel.py b/merge_submodel.py
new file mode 100644
index 0000000..01053ed
--- /dev/null
+++ b/merge_submodel.py
@@ -0,0 +1,51 @@
+#!/usr/bin/python3
+
+import shutil
+import json
+from pathlib import Path
+
+
+def read_pipeline(meta_file):
+    with open(meta_file) as fd:
+        data = json.load(fd)
+        return data["pipeline"]
+
+
+def update_pipeline(meta_file, pipeline):
+    with open(meta_file) as fd:
+        data = json.load(fd)
+
+    data["pipeline"] = pipeline
+
+    with open(meta_file, "w") as fd:
+        json.dump(data, fd)
+
+
+def copy_tree(src: Path, dst: Path, folder: str):
+    shutil.copytree(src / folder, dst / folder)
+
+
+def main():
+    target_dir = Path("./spacy_models/final_model")
+    target_dir.mkdir(exist_ok=True)
+
+    pipeline = []
+
+    source_dir = Path("./spacy_models/dependency_model/model-best")
+    copy_tree(source_dir, target_dir, "parser")
+    copy_tree(source_dir, target_dir, "tagger")
+    copy_tree(source_dir, target_dir, "vocab")
+
+    pipeline.extend(read_pipeline(source_dir / "meta.json"))
+
+    source_dir = Path("./spacy_models/ner_model/model-best")
+    copy_tree(source_dir, target_dir, "ner")
+    shutil.copy(source_dir / "meta.json", target_dir / "meta.json")
+
+    pipeline.extend(read_pipeline(source_dir / "meta.json"))
+
+    update_pipeline(target_dir / "meta.json", pipeline)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/meta.json b/meta.json
index 7c175e5..d7765fb 100644
--- a/meta.json
+++ b/meta.json
@@ -1,40 +1,14 @@
-{
-  "lang":"zh",
-  "pipeline":[
-    "tagger",
-    "parser",
-    "ner"
-  ],
-  "accuracy":{
-    "token_acc":99.8698372794,
-    "ents_p":84.9664503965,
-    "ents_r":85.6312524451,
-    "uas":91.7237657538,
-    "tags_acc":97.0403350292,
-    "ents_f":85.2975560875,
-    "las":89.800872413
-  },
-  "name":"core_web_sm",
-  "license":"CC BY-SA 3.0",
-  "author":"Xiaoquan Kong",
-  "url":"https://xiaoquankong.ai",
-  "vectors":{
-    "keys":0,
-    "width":0,
-    "vectors":0
-  },
-  "sources":[
-    "OntoNotes 5",
-    "Common Crawl"
-  ],
-  "version":"2.0.0",
-  "spacy_version":">=2.0.0a18",
-  "parent_package":"spacy",
-  "speed":{
-    "gpu":null,
-    "nwords":291344,
-    "cpu":5122.3040471407
-  },
-  "email":"u1mail2me@gmail.com",
-  "description":"Chinese multi-task CNN trained on OntoNotes, with GloVe vectors trained on Common Crawl. Assigns word vectors, context-specific token vectors, POS tags, dependency parse and named entities."
-}
+ {
+     "name": "core_web_sm",
+     "version": "0.1.0",
+     "license": "CC BY-SA 3.0",
+     "author": "Xiaoquan Kong",
+     "url": "https://xiaoquankong.ai",
+     "sources": [
+         "OntoNotes 5",
+         "Common Crawl",
+         "Universal Dependencies"
+     ],
+     "email": "u1mail2me@gmail.com",
+     "description": "Chinese multi-task CNN trained on OntoNotes, with GloVe vectors trained on Common Crawl. Assigns word vectors, context-specific token vectors, POS tags, dependency parse and named entities."
+ }
diff --git a/move_wikipedia_corpus.bash b/move_wikipedia_corpus.bash
new file mode 100644
index 0000000..84d4db4
--- /dev/null
+++ b/move_wikipedia_corpus.bash
@@ -0,0 +1 @@
+cp -r chinese-wikipedia-corpus-creator/token_cleaned_plain_files  token_cleaned_plain_files
diff --git a/onto_to_spacy_json.bash b/onto_to_spacy_json.bash
index 7777ff9..9c84bfa 100755
--- a/onto_to_spacy_json.bash
+++ b/onto_to_spacy_json.bash
@@ -1,3 +1,3 @@
 #!/usr/bin/env bash
 
-python onto_to_spacy_json.py -i "/home/howl/Datasets/ontonotes-release-5.0/data/files/data/chinese/annotations/" -t "china_ner_train.json" -e "china_ner_eval.json" -v 0.1
+python onto_to_spacy_json.py -i "./ontonotes-release-5.0/data/files/data/chinese/annotations/" -t "china_ner_train.json" -e "china_ner_eval.json" -v 0.05
diff --git a/onto_to_spacy_json.py b/onto_to_spacy_json.py
index 0a4adfa..914a349 100644
--- a/onto_to_spacy_json.py
+++ b/onto_to_spacy_json.py
@@ -22,12 +22,12 @@ def get_root_filename(onto_dir):
 
 
 def split_sentence(text):
-    text = text.strip().split('\n')[1:-1]
+    text = text.strip().split("\n")[1:-1]
     return text
 
 
 def split_doc(text):
-    text_list = text.strip().split('</DOC>\s<DOC')
+    text_list = text.strip().split("</DOC>\s<DOC")
     ids = [re.findall('<DOC DOCNO="(.+?)">', t)[0] for t in text_list]
     text_list = [re.sub('<DOC DOCNO=".+?">', "", t).strip() for t in text_list]
     return ids, text_list
@@ -35,7 +35,7 @@ def split_doc(text):
 
 def clean_ent(ent):
     tag = re.findall('TYPE="(.+?)"', ent)[0]
-    text = re.findall('>(.+)', ent)[0]
+    text = re.findall(">(.+)", ent)[0]
     text = re.sub("\$", "\$", text)
     return (text, tag)
 
@@ -78,10 +78,11 @@ def onf_to_raw(onf_file):
     """
     with open(onf_file, "r") as f:
         onf = f.read()
-    sentences = re.findall("Plain sentence\:\n\-+?\n(.+?)Treebanked sentence",
-                           onf, re.DOTALL)
+    sentences = re.findall(
+        "Plain sentence\:\n\-+?\n(.+?)Treebanked sentence", onf, re.DOTALL
+    )
     sentences = [re.sub("\n+?\s*", " ", i).strip() for i in sentences]
-    paragraph = ' '.join(sentences)
+    paragraph = " ".join(sentences)
     return paragraph
 
 
@@ -98,16 +99,18 @@ def name_to_sentences(ner_filename):
     for sent in onto_sents:
         offsets = text_to_spacy(sent)
         doc = nlp(offsets[0])
-        tags = biluo_tags_from_offsets(doc, offsets[1]['entities'])
+        tags = biluo_tags_from_offsets(doc, offsets[1]["entities"])
         ner_info = list(zip(doc, tags))
         tokens = []
         for n, i in enumerate(ner_info):
-            token = {"head": 0,
-                     "dep": "",
-                     "tag": "",
-                     "orth": i[0].string,
-                     "ner": i[1],
-                     "id": n}
+            token = {
+                "head": 0,
+                "dep": "",
+                "tag": "",
+                "orth": i[0].string,
+                "ner": i[1],
+                "id": n,
+            }
             tokens.append(token)
         sentences.append({"tokens": tokens})
     return sentences
@@ -124,10 +127,7 @@ def dir_to_annotation(onto_dir):
         try:
             raw = onf_to_raw(onf_filename)
             sentences = name_to_sentences(ner_filename)
-            final = {"id": "fake",
-                     "paragraphs": [
-                         {"raw": raw,
-                          "sentences": sentences}]}
+            final = {"id": "fake", "paragraphs": [{"raw": raw, "sentences": sentences}]}
             all_annotations.append(final)
         except Exception as e:
             print("Error formatting ", fn, e)
@@ -138,7 +138,8 @@ def dir_to_annotation(onto_dir):
     onto_dir=("Directory of OntoNotes data to traverse", "option", "i", str),
     train_file=("File to write training spaCy JSON out to", "option", "t", str),
     val_file=("File to write validation spaCy JSON out to", "option", "e", str),
-    val_split=("Percentage to use for evaluation", "option", "v", float))
+    val_split=("Percentage to use for evaluation", "option", "v", float),
+)
 def main(onto_dir, train_file, val_file, val_split=0.75):
     print("Reading and formatting annotations")
     all_annotations = dir_to_annotation(onto_dir)
@@ -147,8 +148,11 @@ def main(onto_dir, train_file, val_file, val_split=0.75):
     val = all_annotations[:cutpoint]
     train = all_annotations[cutpoint:]
 
-    print("Saving {0} training examples and {1} validation examples".format(
-        len(train), len(val)))
+    print(
+        "Saving {0} training examples and {1} validation examples".format(
+            len(train), len(val)
+        )
+    )
     with open(train_file, "w") as f:
         json.dump(train, f, ensure_ascii=False, indent=4)
     with open(val_file, "w") as f:
diff --git a/plain_word_vectors.py b/plain_word_vectors.py
new file mode 100644
index 0000000..1f9759f
--- /dev/null
+++ b/plain_word_vectors.py
@@ -0,0 +1,51 @@
+import plac
+import gensim
+from gensim import utils
+
+
+class Corpus:
+    def __init__(self, corpus_file):
+        self.corpus_file = corpus_file
+
+    def __iter__(self):
+        with open(self.corpus_file) as fd:
+            for line in fd:
+                yield utils.simple_preprocess(line)
+
+
+@plac.annotations(
+    in_dir=("Location of input directory"),
+    out_loc=("Location of output file"),
+    n_workers=("Number of workers", "option", "n", int),
+    size=("Dimension of the word vectors", "option", "d", int),
+    window=("Context window size", "option", "w", int),
+    min_count=("Min count", "option", "m", int),
+    negative=("Number of negative samples", "option", "g", int),
+    nr_iter=("Number of iterations", "option", "i", int),
+)
+def main(
+    in_dir,
+    out_loc,
+    negative=5,
+    n_workers=4,
+    window=5,
+    size=128,
+    min_count=10,
+    nr_iter=2,
+):
+    sentences = Corpus(in_dir)
+    model = gensim.models.Word2Vec(
+        sentences=sentences,
+        size=size,
+        window=window,
+        min_count=min_count,
+        workers=n_workers,
+        sample=1e-5,
+        negative=negative,
+        iter=nr_iter,
+    )
+    model.wv.save_word2vec_format(out_loc, binary=False)
+
+
+if __name__ == "__main__":
+    plac.call(main)
diff --git a/requirements.txt b/requirements.txt
index 3f23f45..d155217 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,5 @@ plac
 spacy
 pandas
 jieba
+ftfy
+validators
diff --git a/spacy-dev-resources b/spacy-dev-resources
deleted file mode 160000
index 4ba4bea..0000000
--- a/spacy-dev-resources
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 4ba4bea947c4bb6779066f4ef6a30decaad304e4
diff --git a/spacy-dev-resources b/spacy-dev-resources
new file mode 120000
index 0000000..89e29cd
--- /dev/null
+++ b/spacy-dev-resources
@@ -0,0 +1 @@
+../spacy-dev-resources
\ No newline at end of file
diff --git a/test_as_model_dir.py b/test_as_model_dir.py
new file mode 100755
index 0000000..5b3a5d6
--- /dev/null
+++ b/test_as_model_dir.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+from spacy import displacy
+import spacy
+
+nlp = spacy.load("./spacy_models/final_model")
+
+
+def main():
+    doc = nlp("王小明在北京的清华大学读书")
+    for token in doc:
+        print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
+              token.shape_, token.is_alpha, token.is_stop, token.has_vector,
+              token.ent_iob_, token.ent_type_,
+              token.vector_norm, token.is_oov)
+
+    # displacy.serve(doc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/train_model.bash b/train_model.bash
index 5f6f38e..4ccf4de 100755
--- a/train_model.bash
+++ b/train_model.bash
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-python -m spacy train zh depedency_model corpus/spacy/zh-simplified-ud-train.json corpus/spacy/zh-simplified-ud-dev.json -v zh_model -m ./meta.json --no-entities
+python -m spacy train zh spacy_models/dependency_model corpus/spacy/zh-simplified-ud-train.json corpus/spacy/zh-simplified-ud-dev.json --pipeline tagger,parser -v spacy_models/base_model -m meta.json -V 0.1.0 -n 1
diff --git a/train_ner.bash b/train_ner.bash
index f034f39..630857e 100755
--- a/train_ner.bash
+++ b/train_ner.bash
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-python -m spacy train zh ner_model china_ner_train.json china_ner_eval.json --no-tagger --no-parser -verbose True -g 0 --vectors ./zh_model
+python -m spacy train zh spacy_models/ner_model ./china_ner_train.json ./china_ner_eval.json --pipeline ner -m meta.json -v ./spacy_models/dependency_model/model-best -n 1
diff --git a/update_model_meta.py b/update_model_meta.py
new file mode 100644
index 0000000..b43e701
--- /dev/null
+++ b/update_model_meta.py
@@ -0,0 +1,15 @@
+import json
+
+
+def main():
+    with open("./spacy_models/base_model/meta.json") as fd:
+        data = json.load(infd)
+
+    data["name"] = "core_web_sm"
+
+    with open("./spacy_models/base_model/meta.json", "wt") as fd:
+        json.dump(data, fd)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/workflow.md b/workflow.md
index a005de1..17dc154 100644
--- a/workflow.md
+++ b/workflow.md
@@ -2,6 +2,16 @@
 
 ## get preprocessed Chinese Wikipedia corpus
    see project [chinese-wikipedia-corpus-creator](https://github.com/howl-anderson/chinese-wikipedia-corpus-creator) for more details.
+   
+### produce wikipedia corpus ###
+   * input: -
+   * output: `token_cleaned_plain_files/`
+   * script: `create_wikipedia_corpus.bash`
+
+### copy corpus to workspace ###
+   * input: `chinese-wikipedia-corpus-creator/token_cleaned_plain_files/``
+   * output: `token_cleaned_plain_files/`
+   * script: `move_wikipedia_corpus.bash`
 
 ## computing word frequency
    * input: `token_cleaned_plain_files/*`
@@ -32,10 +42,18 @@
    * output: `WORDS_VECS.txt`
    * script: `compute_plain_word_vec.bash`
 
-## initial SpaCy model [TODO: may be removed]
+## initial SpaCy model
+
+### build base model
    * input: `./WORDS-c1000-p1.out/paths  WORDS_VECS.txt  WORDS_FREQ.txt`
-   * output: `zh_wiki_core/**/*`
+   * output: `spacy_models/base_model/**/*`
    * script: `create_init_model.bash`
+   
+### modify model name
+   * input: `spacy_models/base_model/meta.json`
+   * output: `spacy_models/base_model/meta.json`
+   * script: `update_model_meta.py`
+   
 
 ## getting UD_Chinese-GSD corpus
 
@@ -78,3 +96,8 @@
    * input: `zh_model china_ner_train.json china_ner_eval.json`
    * output: `ner_model`
    * script: `train_ner.bash`
+
+## merge sub-model
+   * input: `spacy_models/dependency_model`, `spacy_models/ner_model`
+   * output: `spacy_models/final_model`
+   * script: `merge_submodel.py`