Skip to content

Commit

Permalink
ss-development: separated out
Browse files Browse the repository at this point in the history
  • Loading branch information
Sulstice committed Apr 11, 2023
1 parent 0f9665a commit 7fe23f9
Show file tree
Hide file tree
Showing 4 changed files with 9,644 additions and 0 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/discord_pca_analysis.yml
@@ -0,0 +1,31 @@
name: PRINCIPAL COMPONENT ANALYSIS BOT

on: [ workflow_dispatch ]

jobs:
test:
runs-on: "ubuntu-latest"
steps:
- name: Checkout source
uses: actions/checkout@v2

- name: Setup python
uses: actions/setup-python@v2
with:
python-version: 3.9
architecture: x64
- name: Install
run: |
pip install pandas
pip install click
pip install sklearn
pip install numpy
pip install rdkit-pypi
pip install bokeh
- name: Run Checker
env:
GITHUB_TOKEN: ${{ secrets.PCA_BOT_TOKEN }}
run: |
cd bot_services/discord
python principal_component_analysis.py
# python principal_component_analysis.py --smiles_list --morgan_radius --bit_representation --number_of_clusters --number_of_components --random_state --file_name --principal_component_x --principal_component_y --x_axis_label --y_axis_label --plot_width --plot_height --title,
43 changes: 43 additions & 0 deletions global_chem/convert_to_rdf.py
@@ -0,0 +1,43 @@
import rdflib
import csv
import pandas as pd

if __name__ == '__main__':

df = pd.read_csv('/Users/sulimansharif/projects/global-chem/global_chem/global_chem.tsv', delimiter='\t', header=None, names=['name', 'smiles', 'node', 'predicate', 'path'])

# Create the graph object which holds the triples
graph = rdflib.Graph()

for i, row in df.iterrows():
s = rdflib.URIRef(f'#/{row["name"]}')
p = rdflib.URIRef("#connectsTo")
o = rdflib.URIRef(f'#/{row["node"]}')
graph.add((s, p, o))

for i, row in df.iterrows():

predicate = row['predicate']

if str(predicate) == 'nan':
predicate = 'miscellaenous'

s = rdflib.URIRef(f'#/{row["node"]}')
p = rdflib.URIRef("#connectsTo")
o = rdflib.URIRef(f'#/{predicate}')
graph.add((s, p, o))

for i, row in df.iterrows():

predicate = row['predicate']

if str(predicate) == 'nan':
predicate = 'miscellaenous'

s = rdflib.URIRef(f'#/{predicate}')
p = rdflib.URIRef("#connectsTo")
o = rdflib.URIRef(f'#/{"global-chem"}')

graph.add((s, p, o))

graph.serialize(destination='graph.ttl', format='application/rdf+xml')

0 comments on commit 7fe23f9

Please sign in to comment.