/
violins.py
431 lines (391 loc) · 21 KB
/
violins.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
import os
import sys
import traceback
from typing import Callable, Generator, List, Tuple
import pandas as pd
import seaborn as sns
from PySide2.QtCore import QEvent, QObject, QRunnable, QThreadPool, Qt, Signal, Slot
from PySide2.QtGui import QIcon, QPixmap
from PySide2.QtWidgets import (QApplication, QCheckBox, QComboBox, QDialog, QFileDialog, QFormLayout, QFrame, QLabel,
QLineEdit, QMainWindow, QMessageBox, QPushButton, QSizePolicy, QVBoxLayout, QWidget)
from matplotlib import use as set_backend
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas, NavigationToolbar2QT as NavBar
from matplotlib.figure import Figure
from matplotlib.lines import Line2D
from xlrd import XLRDError
from src.utils import get_project_root
set_backend('Qt5Agg')
sns.set(style="whitegrid")
class ViolinGUI(QMainWindow):
"""Main Window Widget for ViolinGUI."""
style = {
'title': 'QLabel {font-size: 18pt; font-weight: 600}',
'header': 'QLabel {font-size: 12pt; font-weight: 520}',
'label': 'QLabel {font-size: 10pt}',
'button': 'QPushButton {font-size: 10pt}',
'run button': 'QPushButton {font-size: 18pt; font-weight: 600}',
'line edit': 'QLineEdit {font-size: 10pt}',
'checkbox': 'QCheckBox {font-size: 10pt}',
'drop down': 'QComboBox {font-size: 10pt}'
}
def __init__(self) -> None:
"""ViolinGUI Constructor. Defines all aspects of the GUI."""
# ## Setup section
# Inherits from QMainWindow
super().__init__()
self.rootdir = get_project_root()
# QMainWindow basic properties
self.setWindowTitle("SCOUTS - Violins")
self.setWindowIcon(QIcon(os.path.abspath(os.path.join(self.rootdir, 'src', 'scouts.ico'))))
# Creates QWidget as QMainWindow's central widget
self.page = QWidget(self)
self.setCentralWidget(self.page)
# Miscellaneous initialization values
self.threadpool = QThreadPool() # Threadpool for workers
self.population_df = None # DataFrame of whole population (raw data)
self.summary_df = None # DataFrame indicating which SCOUTS output corresponds to which rule
self.summary_path = None # path to all DataFrames generated by SCOUTS
self.main_layout = QVBoxLayout(self.page)
# Title section
# Title
self.title = QLabel(self.page)
self.title.setText('SCOUTS - Violins')
self.title.setStyleSheet(self.style['title'])
self.title.adjustSize()
self.main_layout.addWidget(self.title)
# ## Input section
# Input header
self.input_header = QLabel(self.page)
self.input_header.setText('Load data')
self.input_header.setStyleSheet(self.style['header'])
self.input_header.adjustSize()
self.main_layout.addWidget(self.input_header)
# Input/Output frame
self.input_frame = QFrame(self.page)
self.input_frame.setFrameShape(QFrame.StyledPanel)
self.input_frame.setLayout(QFormLayout())
self.main_layout.addWidget(self.input_frame)
# Raw data button
self.input_button = QPushButton(self.page)
self.input_button.setStyleSheet(self.style['button'])
self.set_icon(self.input_button, 'x-office-spreadsheet')
self.input_button.setObjectName('file')
self.input_button.setText(' Load raw data file')
self.input_button.setToolTip('Load raw data file (the file given to SCOUTS as the input file)')
self.input_button.clicked.connect(self.get_path)
# SCOUTS results button
self.output_button = QPushButton(self.page)
self.output_button.setStyleSheet(self.style['button'])
self.set_icon(self.output_button, 'folder')
self.output_button.setObjectName('folder')
self.output_button.setText(' Load SCOUTS results')
self.output_button.setToolTip('Load data from SCOUTS analysis '
'(the folder given to SCOUTS as the output folder)')
self.output_button.clicked.connect(self.get_path)
# Add widgets above to input frame Layout
self.input_frame.layout().addRow(self.input_button)
self.input_frame.layout().addRow(self.output_button)
# ## Samples section
# Samples header
self.samples_header = QLabel(self.page)
self.samples_header.setText('Select sample names')
self.samples_header.setStyleSheet(self.style['header'])
self.samples_header.adjustSize()
self.main_layout.addWidget(self.samples_header)
# Samples frame
self.samples_frame = QFrame(self.page)
self.samples_frame.setFrameShape(QFrame.StyledPanel)
self.samples_frame.setLayout(QFormLayout())
self.main_layout.addWidget(self.samples_frame)
# Samples label
self.samples_label = QLabel(self.page)
self.samples_label.setText('Write sample names delimited by semicolons below.\nEx: Control;Treat_01;Pac-03')
self.samples_label.setStyleSheet(self.style['label'])
# Sample names line edit
self.sample_names = QLineEdit(self.page)
self.sample_names.setStyleSheet(self.style['line edit'])
# Add widgets above to samples frame Layout
self.samples_frame.layout().addRow(self.samples_label)
self.samples_frame.layout().addRow(self.sample_names)
# ## Analysis section
# Analysis header
self.analysis_header = QLabel(self.page)
self.analysis_header.setText('Plot parameters')
self.analysis_header.setStyleSheet(self.style['header'])
self.analysis_header.adjustSize()
self.main_layout.addWidget(self.analysis_header)
# Analysis frame
self.analysis_frame = QFrame(self.page)
self.analysis_frame.setFrameShape(QFrame.StyledPanel)
self.analysis_frame.setLayout(QFormLayout())
self.main_layout.addWidget(self.analysis_frame)
# Analysis labels
self.analysis_label_01 = QLabel(self.page)
self.analysis_label_01.setText('Compare')
self.analysis_label_01.setStyleSheet(self.style['label'])
self.analysis_label_02 = QLabel(self.page)
self.analysis_label_02.setText('with')
self.analysis_label_02.setStyleSheet(self.style['label'])
self.analysis_label_03 = QLabel(self.page)
self.analysis_label_03.setText('for marker')
self.analysis_label_03.setStyleSheet(self.style['label'])
self.analysis_label_04 = QLabel(self.page)
self.analysis_label_04.setText('Outlier type')
self.analysis_label_04.setStyleSheet(self.style['label'])
# Analysis drop-down boxes
self.drop_down_01 = QComboBox(self.page)
self.drop_down_01.addItems(['whole population', 'non-outliers', 'top outliers', 'bottom outliers', 'none'])
self.drop_down_01.setStyleSheet(self.style['drop down'])
self.drop_down_01.setCurrentIndex(2)
self.drop_down_02 = QComboBox(self.page)
self.drop_down_02.addItems(['whole population', 'non-outliers', 'top outliers', 'bottom outliers', 'none'])
self.drop_down_02.setStyleSheet(self.style['drop down'])
self.drop_down_02.setCurrentIndex(0)
self.drop_down_03 = QComboBox(self.page)
self.drop_down_03.setStyleSheet(self.style['drop down'])
self.drop_down_04 = QComboBox(self.page)
self.drop_down_04.addItems(['OutS', 'OutR'])
self.drop_down_04.setStyleSheet(self.style['drop down'])
# Add widgets above to samples frame Layout
self.analysis_frame.layout().addRow(self.analysis_label_01, self.drop_down_01)
self.analysis_frame.layout().addRow(self.analysis_label_02, self.drop_down_02)
self.analysis_frame.layout().addRow(self.analysis_label_03, self.drop_down_03)
self.analysis_frame.layout().addRow(self.analysis_label_04, self.drop_down_04)
self.legend_checkbox = QCheckBox(self.page)
self.legend_checkbox.setText('Add legend to the plot')
self.legend_checkbox.setStyleSheet(self.style['checkbox'])
self.main_layout.addWidget(self.legend_checkbox)
# Plot button (stand-alone)
self.plot_button = QPushButton(self.page)
self.set_icon(self.plot_button, 'system-run')
self.plot_button.setText(' Plot')
self.plot_button.setToolTip('Plot data after loading the input data and selecting parameters')
self.plot_button.setStyleSheet(self.style['run button'])
self.plot_button.setEnabled(False)
self.plot_button.clicked.connect(self.run_plot)
self.main_layout.addWidget(self.plot_button)
# ## Secondary Window
# This is used to plot the violins only
self.secondary_window = QMainWindow(self)
self.secondary_window.resize(720, 720)
self.dynamic_canvas = DynamicCanvas(self.secondary_window, width=6, height=6, dpi=120)
self.secondary_window.setCentralWidget(self.dynamic_canvas)
self.secondary_window.addToolBar(NavBar(self.dynamic_canvas, self.secondary_window))
def set_icon(self, widget: QWidget, icon: str) -> None:
"""Associates an icon to a widget."""
i = QIcon()
i.addPixmap(QPixmap(os.path.abspath(os.path.join(self.rootdir, 'src', 'default_icons', f'{icon}.svg'))))
widget.setIcon(QIcon.fromTheme(icon, i))
def get_path(self) -> None:
"""Opens a dialog box and loads the corresponding data into memory, depending on the caller widget."""
options = QFileDialog.Options()
options |= QFileDialog.DontUseNativeDialog
query = None
func = None
if self.sender().objectName() == 'file':
query, _ = QFileDialog.getOpenFileName(self, "Select file", "", "All Files (*)", options=options)
func = self.load_scouts_input_data
elif self.sender().objectName() == 'folder':
query = QFileDialog.getExistingDirectory(self, "Select Directory", options=options)
func = self.load_scouts_results
if query:
self.load_data(query, func)
def load_data(self, query: str, func: Callable) -> None:
"""Loads input data into memory, while displaying a loading message as a separate worker."""
worker = Worker(func=func, query=query)
message = self.loading_message()
worker.signals.started.connect(message.show)
worker.signals.started.connect(self.page.setDisabled)
worker.signals.error.connect(self.generic_error_message)
worker.signals.error.connect(message.destroy)
worker.signals.failed.connect(self.plot_button.setDisabled)
worker.signals.success.connect(message.destroy)
worker.signals.success.connect(self.enable_plot)
worker.signals.finished.connect(self.page.setEnabled)
self.threadpool.start(worker)
def loading_message(self) -> QDialog:
"""Returns the message box to be displayed while the user waits for the input data to load."""
message = QDialog(self)
message.setWindowTitle('Loading')
message.resize(300, 50)
label = QLabel('loading DataFrame into memory...', message)
label.setStyleSheet(self.style['label'])
label.adjustSize()
label.setAlignment(Qt.AlignCenter)
label.move(int((message.width() - label.width())/2), int((message.height() - label.height())/2))
return message
def load_scouts_input_data(self, query: str) -> None:
"""Loads data for whole population prior to SCOUTS into memory (used for plotting the whole population)."""
try:
self.population_df = pd.read_excel(query, index_col=0)
except XLRDError:
self.population_df = pd.read_csv(query, index_col=0)
self.drop_down_03.clear()
self.drop_down_03.addItems(list(self.population_df.columns))
self.drop_down_03.setCurrentIndex(0)
def load_scouts_results(self, query: str) -> None:
"""Loads the SCOUTS summary file into memory, in order to dynamically locate SCOUTS output files later when
the user chooses which data to plot."""
self.summary_df = pd.read_excel(os.path.join(query, 'summary.xlsx'), index_col=None)
self.summary_path = query
def enable_plot(self) -> None:
"""Enables plot button if all necessary files are placed in memory."""
if isinstance(self.summary_df, pd.DataFrame) and isinstance(self.population_df, pd.DataFrame):
self.plot_button.setEnabled(True)
def run_plot(self) -> None:
"""Sets and starts the plot worker."""
worker = Worker(func=self.plot)
worker.signals.error.connect(self.generic_error_message)
worker.signals.success.connect(self.secondary_window.show)
self.threadpool.start(worker)
def plot(self) -> None:
"""Logic for plotting data based on user selection of populations, markers, etc."""
# Clear figure currently on plot
self.dynamic_canvas.axes.cla()
# Initialize values and get parameters from GUI
columns = ['sample', 'marker', 'population', 'expression']
samples = self.parse_sample_names()
pop_01 = self.drop_down_01.currentText()
pop_02 = self.drop_down_02.currentText()
pops_to_analyse = [pop_01, pop_02]
marker = self.drop_down_03.currentText()
cutoff_from_reference = True if self.drop_down_04.currentText() == 'OutR' else False
violin_df = pd.DataFrame(columns=columns)
# Start fetching data from files
# Whole population
for pop in pops_to_analyse:
if pop == 'whole population':
for partial_df in self.yield_violin_values(df=self.population_df, population='whole population',
samples=samples, marker=marker, columns=columns):
violin_df = violin_df.append(partial_df)
# Other comparisons
elif pop != 'none':
for file_number in self.yield_selected_file_numbers(summary_df=self.summary_df, population=pop,
cutoff_from_reference=cutoff_from_reference,
marker=marker):
df_path = os.path.join(self.summary_path, 'data', f'{"%04d" % file_number}.')
try:
sample_df = pd.read_excel(df_path + 'xlsx', index_col=0)
except FileNotFoundError:
sample_df = pd.read_csv(df_path + 'csv', index_col=0)
if not sample_df.empty:
for partial_df in self.yield_violin_values(df=sample_df, population=pop, samples=samples,
marker=marker, columns=columns):
violin_df = violin_df.append(partial_df)
# Plot data
pops_to_analyse = [p for p in pops_to_analyse if p != 'none']
violin_df = violin_df[violin_df['marker'] == marker]
for pop in pops_to_analyse:
pop_subset = violin_df.loc[violin_df['population'] == pop]
for sample in samples:
sample_subset = pop_subset.loc[pop_subset['sample'] == sample]
sat = 1.0 - samples.index(sample) / (len(samples) + 1)
self.dynamic_canvas.update_figure(subset_by_sample=sample_subset, pop=pop, sat=sat, samples=samples)
# Draw plotted data on canvas
if self.legend_checkbox.isChecked():
self.dynamic_canvas.add_legend()
self.dynamic_canvas.axes.set_title(f'{marker} expression - {self.drop_down_04.currentText()}')
self.dynamic_canvas.fig.canvas.draw()
def parse_sample_names(self) -> List[str]:
"""Parse sample names from the QLineEdit Widget."""
return self.sample_names.text().split(';')
def generic_error_message(self, error: Tuple[Exception, str]) -> None:
"""Error message box used to display any error message (including traceback) for any uncaught errors."""
name, trace = error
QMessageBox.critical(self, 'An error occurred!', f"Error: {str(name)}\n\nfull traceback:\n{trace}")
def closeEvent(self, event: QEvent) -> None:
"""Defines the message box for when the user wants to quit ViolinGUI."""
title = 'Quit Application'
mes = "Are you sure you want to quit?"
reply = QMessageBox.question(self, title, mes, QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
if reply == QMessageBox.Yes:
self.setEnabled(False)
self.threadpool.waitForDone()
event.accept()
else:
event.ignore()
@staticmethod
def yield_violin_values(df: pd.DataFrame, population: str, samples: List[str], marker: str,
columns: List[str]) -> pd.DataFrame:
"""Returns a DataFrame from expression values, along with information of sample, marker and population. This
DataFrame is appended to the violin plot DataFrame in order to simplify plotting the violins afterwards."""
for sample in samples:
series = df.loc[df.index.str.contains(sample)].loc[:, marker]
yield pd.DataFrame({'sample': sample, 'marker': marker, 'population': population, 'expression': series},
columns=columns)
@staticmethod
def yield_selected_file_numbers(summary_df: pd.DataFrame, population: str, cutoff_from_reference: bool,
marker: str) -> Generator[pd.DataFrame, None, None]:
"""Yields file numbers from DataFrames resulting from SCOUTS analysis. DataFrames are yielded based on
global values, i.e. the comparisons the user wants to perform."""
cutoff = 'sample'
if cutoff_from_reference is True:
cutoff = 'reference'
for index, (file_number, cutoff_from, reference, outliers_for, category) in summary_df.iterrows():
if cutoff_from == cutoff and outliers_for == marker and category == population:
yield file_number
class DynamicCanvas(FigureCanvas):
"""Class for the plot canvas in the window independent from the main GUI window."""
colors = {
'top outliers': [0.988, 0.553, 0.384], # green
'bottom outliers': [0.259, 0.455, 0.643], # blue
'non-outliers': [0.400, 0.761, 0.647], # orange
'whole population': [0.600, 0.600, 0.600] # gray
}
def __init__(self, parent=None, width=5, height=4, dpi=100) -> None:
self.fig = Figure(figsize=(width, height), dpi=dpi)
self.axes = self.fig.add_subplot(111)
FigureCanvas.__init__(self, self.fig)
self.setParent(parent)
FigureCanvas.setSizePolicy(self, QSizePolicy.Expanding, QSizePolicy.Expanding)
FigureCanvas.updateGeometry(self)
def update_figure(self, subset_by_sample: pd.DataFrame, pop: str, sat: float, samples: List[str]) -> None:
"""Updates the figure shown based on the passed in as arguments."""
color = self.colors[pop]
sns.violinplot(ax=self.axes, data=subset_by_sample, x='sample', y='expression', color=color, saturation=sat,
order=samples)
def add_legend(self) -> None:
"""Adds legends to the figure (if the user chose to do so)."""
labels = {name: Line2D([], [], color=color, marker='s', linestyle='None')
for name, color in self.colors.items()}
self.axes.legend(labels.values(), labels.keys(), fontsize=8)
class Worker(QRunnable):
"""Worker thread for loading DataFrames and generating plots. Avoids unresponsive GUI."""
def __init__(self, func: Callable, *args, **kwargs) -> None:
super().__init__()
self.func = func
self.args = args
self.kwargs = kwargs
self.signals = WorkerSignals()
@Slot()
def run(self) -> None:
"""Runs the Worker thread."""
self.signals.started.emit(True)
try:
self.func(*self.args, **self.kwargs)
except Exception as error:
trace = traceback.format_exc()
self.signals.error.emit((error, trace))
self.signals.failed.emit()
else:
self.signals.success.emit()
finally:
self.signals.finished.emit(True)
class WorkerSignals(QObject):
"""Defines the signals available from a running worker thread. Supported signals are:
Started: Worker has started its job. Emits a boolean.
Error: an Exception was raised. Emits a tuple containing an Exception object and the traceback as a string.
Failed: Worker has not finished its job due to an error. Nothing is emitted.
Success: Worker has finished executing without errors. Nothing is emitted.
Finished: Worker has stopped working (either naturally or by raising an Exception). Emits a boolean."""
started = Signal(bool)
error = Signal(Exception)
failed = Signal()
success = Signal()
finished = Signal(bool)
def main() -> None:
"""Entry point function for ViolinGUI."""
app = QApplication(sys.argv)
violin_gui = ViolinGUI()
violin_gui.show()
sys.exit(app.exec_())