Skip to content

Commit

Permalink
Merge branch 'master' into master-dl
Browse files Browse the repository at this point in the history
  • Loading branch information
parkjh80 committed Jul 20, 2020
2 parents 87362fd + 6f78021 commit 7db4a5b
Show file tree
Hide file tree
Showing 358 changed files with 289,633 additions and 21,632 deletions.
3 changes: 3 additions & 0 deletions README.md
Expand Up @@ -70,6 +70,9 @@ This section explain how to install some python packages to Brightics Studio pyt
/brightics-studio/lib/python/python get-pip.py : This step is only needed once.
/brightics-studio/lib/python/Scripts/pip install <ANY_PACKAGES>

### Notes
Tokenizer (Korean) function will not work properly if the installation path contains Korean characters. You need to install Brightics Studio in a folder whose full path does not contain Korean characters in order to make use of this function.

## Development
### Prerequisite
* JDK 1.8, 1.9 (64bit)
Expand Down
1 change: 1 addition & 0 deletions api-server/brightics-server/bin/start-server.cmd
Expand Up @@ -21,6 +21,7 @@ if "%USER_ID%"=="" (
EXIT /b 1
)

SET _JAVA_OPTIONS=
SET GC_OPTS=-XX:+UseConcMarkSweepGC -verbose:gc --Xloggc:"%BRIGHTICS_SERVER_HOME%gc.out" -XX:+CMSClassUnloadingEnabled
SET JAVA_OPTS=-Xms1g -Xmx2g -XX:MaxMetaspaceSize=512m -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true -Djava.io.tmpdir="%BRIGHTICS_SERVER_HOME%tmp" -Dbrightics.local.token=%ACCESS_TOKEN% -Dbrightics.local.user=%USER_ID% -Dfile.encoding=utf-8
SET MAIN=org.springframework.boot.loader.JarLauncher -Dspring.config.location="%BRIGHTICS_SERVER_HOME%BOOT-INF\classes\"
Expand Down
Binary file not shown.
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion common/network/pom.xml
Expand Up @@ -88,7 +88,7 @@
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
<version>4.1.17.Final</version>
<version>4.1.42.Final</version>
</dependency>
</dependencies>
</project>
2 changes: 1 addition & 1 deletion docker/Dockerfile
Expand Up @@ -28,8 +28,8 @@ WORKDIR /brightics-studio
RUN mv /git/studio/build/target/dist/brightics-studio /
RUN sed -i "s/\"127.0.0.1\",/\"0.0.0.0\",/g" /brightics-studio/visual-analytics/conf.json
RUN sed -i "s/\%\*\ //g" /brightics-studio/setup.sh
RUN sed -i '24d' /brightics-studio/start-brightics.sh
RUN ./setup.sh
RUN mv /brightics-studio/lib/nltk_data /brightics-studio/lib/brightics_python_env/
RUN rm -rf /brightics-studio/lib/etc /brightics-studio/lib/graphviz /brightics-studio/lib/hadoop /brightics-studio/lib/shortcut /brightics-studio/lib/node/node_modules/npm/changelogs /brightics-studio/lib/node/node_modules/npm/doc /brightics-studio/lib/node/node_modules/npm/html /brightics-studio/lib/node/node_modules/npm/man /brightics-studio/lib/node/node_modules/npm/scripts /brightics-studio/lib/node/node_modules/npm/*.md /brightics-studio/lib/node/node_modules/npm/AUTHORS /brightics-studio/lib/node/node_modules/npm/TODO.org /brightics-studio/lib/node/node_modules/npm/.github /opt/zulu8.46.0.19-ca-jre8.0.252-linux_x64/man


Expand Down
5 changes: 3 additions & 2 deletions function/python/brightics/brightics_data_api.py
Expand Up @@ -30,7 +30,7 @@

import brightics.common.data.table_data_reader as table_reader
import brightics.common.data.utils as data_util
import brightics.common.json as data_json
import brightics.common.datajson as data_json
from brightics.brightics_java_gateway import brtc_java_gateway as gateway
from brightics.brightics_kv_store_client import KVStoreClient

Expand Down Expand Up @@ -126,7 +126,8 @@ def col_dtype(col):

def ensure_none(df):
val = df.values
val[isna(df.values)] = None
if isna(val).any():
val[isna(df.values)] = None
return val

return data_json.to_json({
Expand Down
2 changes: 1 addition & 1 deletion function/python/brightics/brightics_kv_store_client.py
Expand Up @@ -14,7 +14,7 @@
limitations under the License.
"""

import brightics.common.json as data_json
import brightics.common.datajson as data_json
from brightics.brightics_java_gateway import brtc_java_gateway as gateway


Expand Down
4 changes: 3 additions & 1 deletion function/python/brightics/brightics_python_runner.py
Expand Up @@ -27,7 +27,8 @@
import json
import matplotlib
matplotlib.use("agg")

import multiprocessing
multiprocessing.set_start_method('spawn', True)
try:
from StringIO import StringIO
except ImportError:
Expand Down Expand Up @@ -125,6 +126,7 @@ def _executer(self):
interactive_code_object = compile(ast.Interactive(single_code), '<string>', 'single')

with redirect_stderr():
# exec("print(' ')")
exec(def_object, globals())
try:
exec(exec_code_object)
Expand Down
24 changes: 24 additions & 0 deletions function/python/brightics/common/datajson/__init__.py
@@ -0,0 +1,24 @@
"""
Copyright 2019 Samsung SDS
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

def to_json(data, for_redis=False):
from .encoder import encode
return encode(data, for_redis)


def from_json(json_str):
from .decoder import decode
return decode(json_str)
38 changes: 38 additions & 0 deletions function/python/brightics/common/datajson/decoder.py
@@ -0,0 +1,38 @@
"""
Copyright 2019 Samsung SDS
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import json
import pickle
import numpy


def decode(obj):
def redis_read_hook(o):
if '__inf__' in o:
return float(o['__inf__'])
if '__set__' in o:
return set(o['__set__'])
if '__tuple__' in o:
return tuple(o['__tuple__'])
if '__numpy__' in o:
return numpy.array(o['__numpy__'])
# TODO add more support types
if '__pickled__' in o:
import array
return pickle.loads(array.array('B', o['__pickled__']).tobytes())
return o

return json.loads(obj, object_hook=redis_read_hook)
126 changes: 126 additions & 0 deletions function/python/brightics/common/datajson/encoder.py
@@ -0,0 +1,126 @@
"""
Copyright 2019 Samsung SDS
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import json
import pickle
import numpy
import pandas as pd
from brightics.common.repr import BrtcReprBuilder

def _to_default_list(np_arr):
return numpy.where(pd.isnull(np_arr), None, np_arr).tolist()

class DefaultEncoder(json.JSONEncoder):
"""
DefaultEncoder is used for building viewable json string for in browser
"""

def default(self, obj):
# TODO add more support types
if isinstance(obj, set):
return list(obj)
elif isinstance(obj, numpy.ndarray):
return _to_default_list(obj)
else:
rb = BrtcReprBuilder()
rb.addRawTextMD(str(obj))
return {'type':'python object', '_repr_brtc_':rb.get()}


class PickleEncoder(DefaultEncoder):
"""
PickleEncoder is used for building json string saved in redis
"""

def encode(self, obj):

def hint_tuples(item):
if isinstance(item, tuple):
new_tuple = []
for i in item:
if (isinstance(i,numpy.floating) or isinstance(i,float)) and i == numpy.inf:
new_tuple.append({'__inf__':'inf'})
elif (isinstance(i,numpy.floating) or isinstance(i,float)) and i == -numpy.inf:
new_tuple.append({'__inf__':'-inf'})
elif (isinstance(i,numpy.floating) or isinstance(i,float)) and pd.isnull(i):
new_tuple.append(None)
else:
new_tuple.append(hint_tuples(i))
return {'__tuple__': new_tuple}
if isinstance(item, list):
new_list = []
for i in item:
if (isinstance(i,numpy.floating) or isinstance(i,float)) and i == numpy.inf:
new_list.append({'__inf__':'inf'})
elif (isinstance(i,numpy.floating) or isinstance(i,float)) and i == -numpy.inf:
new_list.append({'__inf__':'-inf'})
elif (isinstance(i,numpy.floating) or isinstance(i,float)) and pd.isnull(i):
new_list.append(None)
else:
new_list.append(hint_tuples(i))
return new_list
if isinstance(item, dict):
new_dict = {}
for key in item:
if (isinstance(item[key],numpy.floating) or isinstance(item[key],float)) and item[key] == numpy.inf:
new_dict[key] = {'__inf__':'inf'}
elif (isinstance(item[key],numpy.floating) or isinstance(item[key],float)) and item[key] == -numpy.inf:
new_dict[key] = ({'__inf__':'-inf'})
elif (isinstance(item[key],numpy.floating) or isinstance(item[key],float)) and pd.isnull(item[key]):
new_dict[key] = None
else:
new_dict[key] = hint_tuples(item[key])
return new_dict
else:
return item

return super(DefaultEncoder, self).encode(hint_tuples(obj))

def default(self, o):
# TODO add more support types
if isinstance(o, set):
return {'__set__': _to_default_list(list(o))}
elif isinstance(o, numpy.ndarray):
return {'__numpy__': _to_default_list(o)}
elif hasattr(o, '_repr_html_'):
rb = BrtcReprBuilder()
rb.addHTML(o._repr_html_())
if isinstance(o, pd.DataFrame):
return {'_repr_brtc_':rb.get(), '__pickled__': list(pickle.dumps(o)), 'type':'table'}
else:
return {'_repr_brtc_':rb.get(), '__pickled__': list(pickle.dumps(o))}
elif hasattr(o, 'savefig'):
rb = BrtcReprBuilder()
rb.addPlt(o)
if isinstance(o, pd.DataFrame):
return {'_repr_brtc_':rb.get(), '__pickled__': list(pickle.dumps(o)), 'type':'table'}
else:
return {'_repr_brtc_':rb.get(), '__pickled__': list(pickle.dumps(o))}
else:
rb = BrtcReprBuilder()
rb.addRawTextMD(str(o))
if isinstance(o, pd.DataFrame):
return {'_repr_brtc_':rb.get(), '__pickled__': list(pickle.dumps(o)), 'type':'table'}
else:
return {'_repr_brtc_':rb.get(), '__pickled__': list(pickle.dumps(o))}



def encode(obj, for_redis):
if for_redis:
return json.dumps(obj, cls=PickleEncoder)
else:
return json.dumps(obj, cls=DefaultEncoder)
Expand Up @@ -123,6 +123,16 @@ def ada_boost_classification_predict(table, model, **params):


def _ada_boost_classification_predict(table, model, pred_col_name='prediction', prob_col_prefix='probability', suffix='index'):
if (table.shape[0] == 0):
new_cols = table.columns.tolist() + [pred_col_name]
classes = model['classifier'].classes_
if suffix == 'index':
prob_cols = [prob_col_prefix + '_{}'.format(i) for i in range(len(classes))]
else:
prob_cols = [prob_col_prefix + '_{}'.format(i) for i in classes]
new_cols += prob_cols
out_table = pd.DataFrame(columns=new_cols)
return {'out_table': out_table}
out_table = table.copy()
classifier = model['classifier']
_, test_data = check_col_type(table, model['params']['feature_cols'])
Expand Down
Expand Up @@ -20,6 +20,9 @@
from .decision_tree_classification import decision_tree_classification_predict
from .random_forest_classification import random_forest_classification_predict
from .naive_bayes_classification import naive_bayes_predict
from .ada_boost_classification import ada_boost_classification_predict
from .mlp_classification import mlp_classification_predict
from .xgb_classification import xgb_classification_predict
import numpy as np


Expand All @@ -35,7 +38,7 @@ def classification_predict(table, model, prediction_col='prediction', prob_prefi
return logistic_regression_predict(table=table, model=model, prediction_col=prediction_col, prob_prefix=prob_prefix,
output_log_prob=output_log_prob, log_prob_prefix=log_prob_prefix, thresholds=thresholds,
suffix=suffix)
if tmp_model['_type'] == 'svm_model':
if tmp_model['_type'] == 'svc_model':
return svm_classification_predict(table=table, model=model, prediction_col=prediction_col, prob_prefix=prob_prefix,
display_log_prob=output_log_prob, log_prob_prefix=log_prob_prefix, thresholds=thresholds,
suffix=suffix)
Expand All @@ -52,4 +55,26 @@ def classification_predict(table, model, prediction_col='prediction', prob_prefi
if tmp_model['_type'] == 'naive_bayes_model':
return naive_bayes_predict(table=table, model=model, prediction_col=prediction_col, prob_prefix=prob_prefix,
display_log_prob=output_log_prob, log_prob_prefix=log_prob_prefix, suffix=suffix)
if tmp_model['_type'] == 'ada_boost_classification_model':
return ada_boost_classification_predict(
table=table, model=model,
pred_col_name=prediction_col,
prob_col_prefix=prob_prefix, suffix=suffix
)
if tmp_model['_type'] == 'mlp_classification_model':
return mlp_classification_predict(
table=table, model=model,
prediction_col=prediction_col,
prob_prefix=prob_prefix,
output_log_prob=output_log_prob,
log_prob_prefix=log_prob_prefix,
suffix=suffix, thresholds=thresholds
)
if tmp_model['_type'] == 'xgb_classification_model':
return xgb_classification_predict(
table=table, model=model,
prediction_col=prediction_col,
probability_col=prob_prefix,
suffix=suffix, thresholds=thresholds
)
raise_runtime_error('''It is not supported yet.''')
Expand Up @@ -51,10 +51,19 @@ def _knn_classification(train_table, test_table, feature_cols, label_col, k=5, a

# Predict the class labels for the provided data
knn.fit(X_train, y_train)
classes = knn.classes_
if (test_table.shape[0] == 0):
new_cols = test_table.columns.tolist() + [pred_col_name]
if suffix == 'index':
prob_cols = [prob_col_prefix + '_{}'.format(i) for i in range(len(classes))]
else:
prob_cols = [prob_col_prefix + '_{}'.format(i) for i in classes]
new_cols += prob_cols
out_table = pd.DataFrame(columns=new_cols)
return {'out_table': out_table}
pred = knn.predict(X_test)
out_col_pred = pd.DataFrame(pred, columns=[pred_col_name])

classes = knn.classes_
if suffix == 'index':
suffixes = [i for i, _ in enumerate(classes)]
else:
Expand Down
Expand Up @@ -206,6 +206,23 @@ def logistic_regression_predict(table, model, **params):
def _logistic_regression_predict(table, model, prediction_col='prediction', prob_prefix='probability',
output_log_prob=False, log_prob_prefix='log_probability', thresholds=None,
suffix='index'):
if (table.shape[0] == 0):
new_cols = table.columns.tolist() + [prediction_col]
classes = model['lr_model'].classes_
if suffix == 'index':
prob_cols = [prob_prefix + '_{}'.format(i) for i in range(len(classes))]
else:
prob_cols = [prob_prefix + '_{}'.format(i) for i in classes]
if output_log_prob:
if suffix == 'index':
log_cols = [log_prob_prefix + '_{}'.format(i) for i in range(len(classes))]
else:
log_cols = [log_prob_prefix + '_{}'.format(i) for i in classes]
else:
log_cols = []
new_cols += prob_cols + log_cols
out_table = pd.DataFrame(columns=new_cols)
return {'out_table': out_table}
if 'features' in model:
feature_cols = model['features']
else:
Expand Down
Expand Up @@ -75,7 +75,7 @@
},
{
"id": "max_depth",
"label": "Maximum Depth",
"label": "Max Depth",
"description": "The maximum depth of the base estimator. In this Brightics Studio, the base estimator for Adaboost Regression model is Decision Tree Regression model. Note that it is one of the main parameters to tune to obtain good results by controlling the complexity of the base estimmators.",
"visibleOption": [],
"control": "InputBox",
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

0 comments on commit 7db4a5b

Please sign in to comment.