Skip to content

Commit

Permalink
Merge branch 'master' into master-dl
Browse files Browse the repository at this point in the history
  • Loading branch information
parkjh80 committed May 27, 2020
2 parents c57e1fe + 346714e commit 87362fd
Show file tree
Hide file tree
Showing 50 changed files with 148 additions and 77 deletions.
7 changes: 5 additions & 2 deletions README.md
Expand Up @@ -16,15 +16,17 @@ Charts and report generators are also provided to visualize data in various ways
Please visit our web site http://www.brightics.ai

## Getting started
### Docker Image
The Brightics Studio Docker image is available on [Docker Hub](https://hub.docker.com/r/brightics/studio).

### Prerequisite
* macOS users have to install [graphviz](http://graphviz.org/download/) using [Homebrew](https://brew.sh/)
* macOS users have to install [graphviz](http://graphviz.org/download/) using [Homebrew](https://brew.sh/) to plot tree figures of Decision Tree. However if it is not easy to install it just skip it.
```
brew install graphviz
```
* Some functions which interact with database need client libraries such as [Oracle Instant Client](http://www.oracle.com/technetwork/database/database-technologies/instant-client/overview/index.html)

### Download
In the beta phase, the releases and patches will probably be updated every week.<br>
Download release files are available from github release or our web site http://www.brightics.ai/downloads

### Installation
Expand All @@ -43,6 +45,7 @@ Details of the directory is as follows:
You don't need to prepare anything before running it. Our releases contain all requirements in a package itself.<br>
Go to unzipped directory and run.

Brightics-Studio-Launcher.exe : Launcher for windows
start-brightics.cmd : for windows
start-brightics.sh : for linux and mac

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion bin/start-brightics.sh
Expand Up @@ -29,6 +29,6 @@ cd $BRIGHTICS_PACKAGES_HOME/brightics-server

# Visual Analytics
cd $BRIGHTICS_PACKAGES_HOME/visual-analytics
$BRIGHTICS_PACKAGES_HOME/lib/node/node app.js --user_id $USER_ID --access_token $ACCESS_TOKEN &>/dev/null &
node app.js --user_id $USER_ID --access_token $ACCESS_TOKEN &>/dev/null &
echo $! > va.pid

69 changes: 44 additions & 25 deletions docker/Dockerfile
@@ -1,43 +1,62 @@
FROM centos:7
# Stage 1: builder
FROM centos:centos7 as builder

WORKDIR /opt

# set basic environment
RUN yum install -y git java-1.8.0-openjdk-devel bzip2 \
&& yum groupinstall -y "Development Tools" \
&& curl -s http://apache.mirror.cdnetworks.com/maven/maven-3/3.6.0/binaries/apache-maven-3.6.0-bin.tar.gz | tar xzv \
&& curl -s https://nodejs.org/download/release/v8.11.2/node-v8.11.2-linux-x64.tar.gz | tar xzv \
&& curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
&& sh Miniconda3-latest-Linux-x86_64.sh -b -p /opt/miniconda3

ENV PYTHON_HOME=/opt/miniconda3 \
JAVA_HOME=/usr/lib/jvm/java \
NODEJS_HOME=/opt/node-v8.11.2-linux-x64 \
M2_HOME=/opt/apache-maven-3.6.0

RUN yum install -y git java-1.8.0-openjdk-devel bzip2
RUN yum groupinstall -y "Development Tools"
RUN curl -s http://apache.mirror.cdnetworks.com/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz | tar xzv
RUN curl -s https://nodejs.org/download/release/v8.11.2/node-v8.11.2-linux-x64.tar.gz | tar xzv
RUN curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
RUN sh Miniconda3-latest-Linux-x86_64.sh -b -p /opt/miniconda3

ENV PYTHON_HOME=/opt/miniconda3
ENV JAVA_HOME=/usr/lib/jvm/java
ENV NODEJS_HOME=/opt/node-v8.11.2-linux-x64
ENV M2_HOME=/opt/apache-maven-3.6.3
ENV PATH=$PYTHON_HOME/bin:$JAVA_HOME/bin:$NODEJS_HOME/bin:${M2_HOME}/bin:$PATH


# clone and package
RUN mkdir /git \
&& cd /git \
&& git clone https://github.com/brightics/studio.git \
&& cd /git/studio \
&& mvn clean package -DskipTests

RUN mkdir /git
RUN cd /git && git clone https://github.com/brightics/studio.git
RUN cd /git/studio && mvn clean package -DskipTests

# setup
WORKDIR /brightics-studio
RUN mv /git/studio/build/target/dist/brightics-studio / \
&& cd /brightics-studio \
&& sed -i "s/\%\*\ //g" setup.sh \
&& ./setup.sh

RUN mv /git/studio/build/target/dist/brightics-studio /
RUN sed -i "s/\"127.0.0.1\",/\"0.0.0.0\",/g" /brightics-studio/visual-analytics/conf.json
RUN sed -i "s/\%\*\ //g" /brightics-studio/setup.sh
RUN sed -i '24d' /brightics-studio/start-brightics.sh
RUN ./setup.sh
RUN rm -rf /brightics-studio/lib/etc /brightics-studio/lib/graphviz /brightics-studio/lib/hadoop /brightics-studio/lib/shortcut /brightics-studio/lib/node/node_modules/npm/changelogs /brightics-studio/lib/node/node_modules/npm/doc /brightics-studio/lib/node/node_modules/npm/html /brightics-studio/lib/node/node_modules/npm/man /brightics-studio/lib/node/node_modules/npm/scripts /brightics-studio/lib/node/node_modules/npm/*.md /brightics-studio/lib/node/node_modules/npm/AUTHORS /brightics-studio/lib/node/node_modules/npm/TODO.org /brightics-studio/lib/node/node_modules/npm/.github /opt/zulu8.46.0.19-ca-jre8.0.252-linux_x64/man


# Stage 2
FROM centos:centos7

COPY --from=builder /brightics-studio /brightics-studio
COPY --from=builder /opt/miniconda3 /opt/miniconda3

WORKDIR /opt

RUN yum install -y graphviz && \
yum clean all && \
rm -rf /var/cache/yum && \
curl -s https://cdn.azul.com/zulu/bin/zulu8.46.0.19-ca-jre8.0.252-linux_x64.tar.gz | tar xzv

ENV PYTHON_HOME=/opt/miniconda3 \
JAVA_HOME=/opt/zulu8.46.0.19-ca-jre8.0.252-linux_x64 \
NODEJS_HOME=/brightics-studio/lib/node

ENV PATH=$PYTHON_HOME/bin:$JAVA_HOME/bin:$NODEJS_HOME:$PATH

WORKDIR /brightics-studio

# expose the port for visual-analytics
EXPOSE 3000


ADD entrypoint.sh /etc/entrypoint.sh
RUN chmod +x /etc/entrypoint.sh
ENTRYPOINT ["/etc/entrypoint.sh"]
Expand Up @@ -20,7 +20,6 @@
from brightics.common.validation import greater_than, greater_than_or_equal_to, over_under
import pandas as pd
import numpy as np
import kss
from nltk import tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import normalize
Expand Down Expand Up @@ -68,8 +67,15 @@ def _tokenizer_kor(texts, normalization=True, stemming=True, pos_extraction=None
def _tokenize_for_summarize(text):

# Sentence separator
splitted_array = kss.split_sentences(text)

import platform
os = platform.system()
if os == 'Linux':
import kss
splitted_array = kss.split_sentences(text)
else:
from brightics.function.textanalytics.pykss import pykss
splitted_array = pykss.split_sentences(text)

# Tokenizer
tokenized_table = _tokenizer_kor(texts=splitted_array, pos_extraction=['Noun'])
len_doc = len(tokenized_table)
Expand Down
Expand Up @@ -16,7 +16,6 @@

from brightics.common.utils import check_required_parameters
from nltk import tokenize
import kss
import pandas as pd
import numpy as np

Expand All @@ -31,7 +30,14 @@ def _split_sentences(table, input_col, language='kor'):
doc_col = table[input_col].values

if (language == 'kor'):
sent_tokenizer = kss.split_sentences
import platform
os = platform.system()
if os == 'Linux':
import kss
sent_tokenizer = kss.split_sentences
else:
from brightics.function.textanalytics.pykss import pykss
sent_tokenizer = pykss.split_sentences
elif (language == 'eng'):
sent_tokenizer = tokenize.sent_tokenize

Expand Down
125 changes: 81 additions & 44 deletions lib/requirements.txt
@@ -1,44 +1,81 @@
arch
asn1crypto
boto3
botocore
cached-property
cffi
cryptography
cx-Oracle
cycler
Cython
docutils
graphviz
idna
jmespath
kiwisolver
lightgbm
matplotlib
numpy
pandas
pandasql
patsy
pg8000
py4j
pyarrow
pycparser
pydotplus
PyMySQL
pyparsing
python-dateutil
pytz
redis
s3transfer
scikit-learn
scipy
seaborn
six
sklearn
SQLAlchemy
tqdm
urllib3
xgboost
statsmodels
pandas-profiling
numexpr==2.6.4
arch==4.7.0
asn1crypto==0.24.0
boto==2.49.0
boto3==1.9.67
botocore==1.12.67
cached-property==1.5.1
certifi==2018.11.29
cffi==1.11.5
chardet==3.0.4
cryptography==2.4.2
cx-Oracle==7.0.0
cycler==0.10.0
Cython==0.29.2
decorator==4.3.0
docker==3.7.0
docker-compose==1.23.2
docker-pycreds==0.4.0
dockerpty==0.4.1
docopt==0.6.2
docutils==0.14
dtaidistance==1.2.3
gensim==3.7.3
graphviz==0.9
idna==2.7
imbalanced-learn==0.5.0
imblearn==0.0
implicit==0.4.0
Jinja2==2.10.1
jmespath==0.9.3
joblib==0.13.2
JPype1==0.6.3
jsonschema==2.6.0
kiwisolver==1.0.1
lightgbm==2.2.2
MarkupSafe==1.1.1
matplotlib==3.0.2
networkx==2.2
nltk==3.4.3
numexpr==2.6.2
numpy==1.18.1
opencv-python-headless==4.0.0.21
pandas==0.23.4
pandas-profiling==1.4.1
pandasql==0.7.3
passlib==1.7.1
patsy==0.5.1
pg8000==1.13.1
pmdarima==1.1.0
protobuf==3.7.0
protobuf3-to-dict==0.1.5
psycopg2-binary==2.7.5
py4j==0.10.8.1
pyarrow==0.11.1
pycparser==2.19
pydotplus==2.0.2
pymssql==2.1.4
PyMySQL==0.9.3
pyparsing==2.3.0
python-dateutil==2.7.5
pytz==2018.7
PyYAML==3.13
redis==2.10.6
requests==2.20.1
s3transfer==0.1.13
sagemaker==1.18.5
scikit-learn==0.21.3
scipy==1.2.0
seaborn==0.9.0
six==1.12.0
sklearn==0.0
smart-open==1.8.3
soupsieve==1.9.1
SQLAlchemy==1.2.15
statsmodels==0.9.0
texttable==0.9.1
tqdm==4.28.1
twkorean==0.1.5
urllib3==1.24.1
websocket-client==0.55.0
xgboost==0.80
xlrd==1.2.0

0 comments on commit 87362fd

Please sign in to comment.