CodeMixed-Text-Generator/config.ini

[GENERAL]
# verbose output
; verbose = 
# source language; default = HINDI
language_1 =
# target language; default = ENGLISH
language_2 = 
# choose which stages of the pipeline are going to be run; default: pregcm, gcm
stages_to_run = 
# whether to run the pregcm and gcm stages parallely; default: 0 ; set to 1 to run parellely 
; parallel_run = 
# path of input directory where input files are present; default: data
input_loc =
# path for output directory; default: data
output_loc = 

[ALIGNER]
# name of input file with non-english data; default: hi-to-en-input_lang1
source_inp_file = 
# name of file with english data; default: hi-to-en-input_lang2
target_inp_file = 
# name of output file with non-english data; default: hi-to-en-input_lang1
source_op_file =
# name of output file with english data; default: hi-to-en-input_lang2
target_op_file =
# name of PFMS file if present; if not given a pfms file will be generated with default name using initials of the two languages, example for hindi and english: hi-to-en_pfms.txt
pfms_file = 
# name of output file for saving parallel alignments; default: hi-to-en-input_parallel_alignments
align_op_file = 

[PREGCM]
# path for directory to store output of pregcm stage, this directory will be saved as a sub-directory of the output_loc. if not given then a directory name using initials of the two languages will be used, example for hindi and english: hi-to-en/ inside output_loc directory.
pregcm_output_loc =
# cut-off value for PFMS score
max_pfms = 
# select the parser to be used from available parsers - stanford and benepar; default: benepar
parser = 

[GCM]
# directory that contains output of the pregcm; default: data/hi-to-en
gcm_input_loc = 
# path for directory to store output of gcm stage, this directory will be saved as a sub-directory of the output_loc. if not given then a directory name using initials of the two languages will be used, example for hindi and english: hi-to-en-gcm/ inside output_loc directory.
gcm_output_loc = 
# max number of sentences to generate per sentence, set -1 for getting all the generations; default: 5
k = -1
# which theory to choose for generating cm sentences 'ec' (ec theory) or 'ml' (ml theory); default ec
linguistic_theory =

[OUTPUT]
# language tag at word level in each output code-mixed sentence; default 0
lid_output = 
# visualize DFAs that were used to make generations; default 0
; dfa_output = 
# sampling technique to use - random or spf; default: random
sampling = 
# file name of the input rcm file which is needed for spf sampling; default is rcm_lang_tagged.txt
rcm_file = 

[WEB]
# value of your azure token that will be used to generate translations using Azure API
azure_subscription_key =