Skip to content

Commit

Permalink
fix dratio
Browse files Browse the repository at this point in the history
- update equation(s) to match paper
- add option to use sd or mad
- add second version of dratio from paper
- add tests
  • Loading branch information
grlloyd committed Jul 20, 2023
1 parent 106ae6f commit 611668f
Show file tree
Hide file tree
Showing 3 changed files with 189 additions and 17 deletions.
72 changes: 63 additions & 9 deletions R/d_ratio_filter_class.R
Expand Up @@ -4,11 +4,19 @@
#' M = dratio_filter(threshold=20,qc_label='QC',factor_name='Class')
#' M = model_apply(M,D)
#' @export dratio_filter
dratio_filter = function(threshold=20, qc_label='QC', factor_name, ...) {
dratio_filter = function(
threshold=20,
qc_label='QC',
factor_name,
method='ratio',
dispersion='sd',
...) {
out=struct::new_struct('dratio_filter',
threshold=threshold,
qc_label=qc_label,
factor_name=factor_name,
method=method,
dispersion=dispersion,
...)
return(out)
}
Expand All @@ -21,7 +29,9 @@ dratio_filter = function(threshold=20, qc_label='QC', factor_name, ...) {
factor_name='entity',
filtered='entity',
flags='entity',
d_ratio='data.frame'
d_ratio='data.frame',
method='enum',
dispersion='enum'
),
prototype=list(name = 'Dispersion ratio filter',
description = paste0('The dispersion ratio (d-ratio) compares the ',
Expand All @@ -33,7 +43,7 @@ dratio_filter = function(threshold=20, qc_label='QC', factor_name, ...) {
'the feature is removed.'),
type = 'filter',
predicted = 'filtered',
.params=c('threshold','qc_label','factor_name'),
.params=c('threshold','qc_label','factor_name','method','dispersion'),
.outputs=c('filtered','flags','d_ratio'),
citations=list(
bibentry(
Expand Down Expand Up @@ -75,8 +85,35 @@ dratio_filter = function(threshold=20, qc_label='QC', factor_name, ...) {
description = 'Flag indicating whether the feature was rejected by the filter or not.',
type='data.frame',
value=data.frame()
),
method=enum(
name='dratio method',
description = c(
'ratio' = paste0('Dispersion of the QCs divided by the ',
'dispersion of the samples. Corresponds to Eq 4 in ',
' Broadhurst et al (2018).'),
'euclidean' = paste0('Dispersion of the QCs divided by the ',
'euclidean length of the total dispersion. Total dispersion ',
'is estimated from the QC and Sample dispersion by assuming ',
'that they are orthogonal. Corresponds to Eq 5 in ',
'Broadhurst et al (2018)')),
allowed=c('ratio','euclidean'),
value='ratio',
type='character',
max_length = 1
),
dispersion=enum(
name='Dispersion method',
description = c(
'sd' = paste0('Dispersion is estimated using the ',
'standard deviation.'),
'mad' = paste0('Dispersion is estimated using the median ',
'absolute deviation.')),
allowed=c('sd','mad'),
value='sd',
type='character',
max_length = 1
)

)
)

Expand All @@ -86,25 +123,42 @@ setMethod(f="model_train",
signature=c("dratio_filter","DatasetExperiment"),
definition=function(M,D)
{
# mad QC samples
# dispersion QC samples
QC = filter_smeta(
mode='include',
levels=M$qc_label,
factor_name=M$factor_name)
QC = model_apply(QC,D)
QC = predicted(QC)$data
QC = apply(QC,2,mad,na.rm=TRUE)

if (M$dispersion=='mad') {
QC = apply(QC,2,mad,na.rm=TRUE)
} else {
QC = apply(QC,2,sd,na.rm=TRUE)
}

# mad not qc samples
# dispersion (not QC) samples
S = filter_smeta(
mode='exclude',
levels=M$qc_label,
factor_name=M$factor_name)
S = model_apply(S,D)
S = predicted(S)$data
S = apply(S,2,mad,na.rm=TRUE)

if (M$dispersion=='mad') {
S = apply(S,2,mad,na.rm=TRUE) # constant = 1.4826 default
} else {
S = apply(S,2,sd,na.rm=TRUE)
}

d_ratio=(QC/(QC+S))*100
# dispersion ratio
if (M$method=='ratio') {
# eq 4
d_ratio=(QC/S)*100
} else {
# eq 5
d_ratio= (QC / sqrt((QC^2) + (S^2))) * 100
}

OUT=d_ratio>M$threshold

Expand Down
13 changes: 12 additions & 1 deletion man/dratio_filter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

121 changes: 114 additions & 7 deletions tests/testthat/test-dratio-filter.R
@@ -1,12 +1,119 @@
# filter d-ratio
test_that('d-ratio filter works as expected',{
test_that('d-ratio filter works as expected using sd ratio',{
set.seed('57475')
# DatasetExperiment
DE = MTBLS79_DatasetExperiment(filtered=TRUE)
M = dratio_filter(
threshold=20,
qc_label = 'QC',
factor_name='Class'
factor_name='Class',
method = 'ratio',
dispersion = 'sd'
)
M = model_apply(M,DE)

# manually compute d-ratio
qc_mad=unlist(
lapply(DE$data[DE$sample_meta$Class=='QC',],sd,na.rm=TRUE)
)
sa_mad=unlist(
lapply(DE$data[DE$sample_meta$Class!='QC',],sd,na.rm=TRUE)
)
dr = (qc_mad/(sa_mad))*100

# check manual vs fcn
expect_true(all(dr==M$d_ratio$d_ratio))

# check values havent changed
expect_equal(dr[[1]],23.05762,tolerance = 5e-6)
expect_equal(dr[[100]],64.49242,tolerance = 5e-6)

# just number of filtered columns
expect_true(ncol(predicted(M))==725)
expect_true(ncol(DE)-ncol(predicted(M))==854)
expect_true(ncol(DE)-ncol(predicted(M))==sum(dr>20))
})

test_that('d-ratio filter works as expected using mad ratio',{
set.seed('57475')
# DatasetExperiment
DE = MTBLS79_DatasetExperiment(filtered=TRUE)
M = dratio_filter(
threshold=20,
qc_label = 'QC',
factor_name='Class',
method = 'ratio',
dispersion = 'mad'
)
M = model_apply(M,DE)

# manually compute d-ratio
qc_mad=unlist(
lapply(DE$data[DE$sample_meta$Class=='QC',],mad,na.rm=TRUE)
)
sa_mad=unlist(
lapply(DE$data[DE$sample_meta$Class!='QC',],mad,na.rm=TRUE)
)
dr = (qc_mad/(sa_mad))*100

# check manual vs fcn
expect_true(all(dr==M$d_ratio$d_ratio))

# check values havent changed
expect_equal(dr[[1]],14.14758,tolerance = 5e-6)
expect_equal(dr[[100]],48.23871,tolerance = 5e-6)

# just number of filtered columns
expect_true(ncol(predicted(M))==834)
expect_true(ncol(DE)-ncol(predicted(M))==745)
expect_true(ncol(DE)-ncol(predicted(M))==sum(dr>20))
})

test_that('d-ratio filter works as expected using sd euclidean',{
set.seed('57475')
# DatasetExperiment
DE = MTBLS79_DatasetExperiment(filtered=TRUE)
M = dratio_filter(
threshold=20,
qc_label = 'QC',
factor_name='Class',
method = 'euclidean',
dispersion = 'sd'
)
M = model_apply(M,DE)

# manually compute d-ratio
qc_mad=unlist(
lapply(DE$data[DE$sample_meta$Class=='QC',],sd,na.rm=TRUE)
)
sa_mad=unlist(
lapply(DE$data[DE$sample_meta$Class!='QC',],sd,na.rm=TRUE)
)
dr = qc_mad/sqrt((qc_mad^2) + (sa_mad^2))*100

# check manual vs fcn
expect_true(all(dr==M$d_ratio$d_ratio))

# check values havent changed
expect_equal(dr[[1]],22.46809,tolerance = 5e-6)
expect_equal(dr[[100]],54.19862,tolerance = 5e-6)

# just number of filtered columns
expect_true(ncol(predicted(M))==747)
expect_true(ncol(DE)-ncol(predicted(M))==832)
expect_true(ncol(DE)-ncol(predicted(M))==sum(dr>20))
})

test_that('d-ratio filter works as expected using mad euclidean',{
set.seed('57475')
# DatasetExperiment
DE = MTBLS79_DatasetExperiment(filtered=TRUE)
M = dratio_filter(
threshold=20,
qc_label = 'QC',
factor_name='Class',
method = 'euclidean',
dispersion = 'mad'
)
M = model_apply(M,DE)

Expand All @@ -17,17 +124,17 @@ test_that('d-ratio filter works as expected',{
sa_mad=unlist(
lapply(DE$data[DE$sample_meta$Class!='QC',],mad,na.rm=TRUE)
)
dr = (qc_mad/(qc_mad+sa_mad))*100
dr = qc_mad/sqrt((qc_mad^2) + (sa_mad^2))*100

# check manual vs fcn
expect_true(all(dr==M$d_ratio$d_ratio))

# check values havent changed
expect_equal(dr[[1]],12.39,tolerance = 1e3)
expect_equal(dr[[100]],32.54,tolerance = 1e3)
expect_equal(dr[[1]],14.00808,tolerance = 5e-6)
expect_equal(dr[[100]],43.44776,tolerance = 5e-6)

# just number of filtered columns
expect_true(ncol(predicted(M))==1052)
expect_true(ncol(DE)-ncol(predicted(M))==527)
expect_true(ncol(predicted(M))==850)
expect_true(ncol(DE)-ncol(predicted(M))==729)
expect_true(ncol(DE)-ncol(predicted(M))==sum(dr>20))
})

0 comments on commit 611668f

Please sign in to comment.