Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Suggests:
boot,
purrr,
gridExtra,
plotly,
knitr,
rmarkdown,
BiocStyle,
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ export(MSstatsPrepareDoseResponseFit)
export(convertGroupToNumericDose)
export(doseResponseFit)
export(futureExperimentSimulation)
export(plot_tpr_power_curve)
export(predictIC50)
export(predictIC50Parallel)
export(run_tpr_simulation)
export(visualizeResponseProtein)
import(dplyr)
importFrom(BiocParallel,bplapply)
Expand All @@ -15,6 +17,7 @@ importFrom(dplyr,arrange)
importFrom(dplyr,distinct)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,if_else)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
Expand All @@ -32,9 +35,11 @@ importFrom(ggplot2,ggplot)
importFrom(ggplot2,labs)
importFrom(ggplot2,scale_color_manual)
importFrom(ggplot2,scale_fill_manual)
importFrom(ggplot2,scale_linetype_manual)
importFrom(ggplot2,scale_x_continuous)
importFrom(ggplot2,scale_y_continuous)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_bw)
importFrom(ggplot2,theme_classic)
importFrom(ggplot2,theme_minimal)
importFrom(ggplot2,ylim)
Expand Down
142 changes: 142 additions & 0 deletions R/TPR_Power_Curve.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# Hardcoded concentration ladders: control (0) and highest dose always present,
# intermediate doses filled from a biologically motivated set.
CONC_MAP <- list(
"2" = c(0, 3000),
"3" = c(0, 1000, 3000),
"4" = c(0, 1, 1000, 3000),
"5" = c(0, 1, 100, 1000, 3000),
"6" = c(0, 1, 100, 300, 1000, 3000),
"7" = c(0, 1, 10, 100, 300, 1000, 3000),
"8" = c(0, 1, 10, 30, 100, 300, 1000, 3000),
"9" = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000)
)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed with Sarah, this should not be hard coded but rather each subsequent value should be picked based on farthest distance from the log(median) among a user's set of doses, not this hard-coded list.

A user will have any arbitrary set of doses as well (i.e. it's not just 0, 1, 3, 10. You might have 0, 2, 4, 20, ...)

This also means that another input parameter should be the concentrations themselves (e.g. 0,1,3,10,30,100,300,1000,3000) and the dose_range (e.g. c(2,9) being doses 2 through 9).

#' Run TPR simulation across a grid of concentration counts and replicate counts
#'
#' Sweeps over combinations of dose counts (2-9) and replicate counts,
#' calling \code{futureExperimentSimulation()} for each combination.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

People don't know what TPR simulation means or what it means to sweep over dose counts / replicate counts calling this futureExperimentSimulation function.

Can you be more thorough in a way so that a biologist with minimal coding experience would understand what this function does?

#'
#' @param rep_range Integer vector of length 2, c(min, max) for replicate sweep.
#' @param n_proteins Integer. Number of proteins to simulate. Default: 1000.
#'
#' @return A data.frame with columns: Interaction, TPR, N_rep, NumConcs.
#'
Comment thread
tonywu1999 marked this conversation as resolved.
#' @importFrom dplyr filter mutate select if_else
#' @export
run_tpr_simulation <- function(rep_range, n_proteins = 1000) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should be unit tests for this function

k_grid <- as.integer(names(CONC_MAP))
rep_grid <- seq(rep_range[1], rep_range[2])
Comment thread
tonywu1999 marked this conversation as resolved.
Outdated

run_one <- function(n_rep, k_conc, seed = 123) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would define this function outside of run_tpr_simulation as a private helper function. And make the name of it more clear, like run_one_tpr_simulation

set.seed(seed + n_rep * 100 + k_conc)
concs_k <- CONC_MAP[[as.character(k_conc)]]

sim_args <- list(
N_proteins = n_proteins,
N_rep = n_rep,
Concentrations = concs_k,
IC50_Prediction = FALSE
)

temp_res <- futureExperimentSimulation(

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little confused. Users should also pass in the actual dataset and the Protein ID that is considered a strong interaction. But I don't see where this is being passed to the function.

N_proteins = sim_args$N_proteins,
N_rep = sim_args$N_rep,
Concentrations = sim_args$Concentrations,
IC50_Prediction = sim_args$IC50_Prediction
)
temp_res$Hit_Rates_Data |>
dplyr::filter(Category %in% c("TPR (Strong)", "TPR (Weak)")) |>
dplyr::mutate(
N_rep = n_rep,
NumConcs = length(concs_k),
Interaction = dplyr::if_else(Category == "TPR (Strong)", "Strong", "Weak")
) |>
dplyr::select(Interaction, TPR = Percent, N_rep, NumConcs)
}

grid_df <- expand.grid(N_rep = rep_grid, k_conc = k_grid)
results <- do.call(rbind, lapply(seq_len(nrow(grid_df)), function(i) {
tryCatch(
run_one(n_rep = grid_df$N_rep[i], k_conc = grid_df$k_conc[i]),
error = function(e) {
warning(paste("Simulation failed for N_rep=", grid_df$N_rep[i],
", k_conc=", grid_df$k_conc[i], ":", conditionMessage(e)))
NULL
}
)
}))

if (is.null(results) || nrow(results) == 0) {
stop("All simulation runs failed. Please check your input parameters.")
}
return(results)
}

#' Plot TPR power curves for dose response proteomics experiments
#'
#' Creates a two-panel interactive plot showing True Positive Rate
#' for Strong and Weak interactions across concentration and replicate counts.
#'
#' @param simulation_results A data.frame from \code{run_tpr_simulation()}.
#'
#' @return A plotly object with two panels (Strong and Weak interaction).
#'
#' @importFrom ggplot2 ggplot aes geom_line geom_point scale_x_continuous
#' scale_y_continuous scale_linetype_manual labs theme_bw theme element_text
#' @export
plot_tpr_power_curve <- function(simulation_results) {
k_grid <- sort(unique(simulation_results$NumConcs))
rep_levels <- sort(unique(simulation_results$N_rep))

ltypes <- c("dotted", "dotdash", "dashed", "longdash", "solid")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On second thought - I thought we discussed doing a color gradient instead? In that case, 5 replicate validation is not needed either.

ltype_values <- setNames(ltypes[seq_along(rep_levels)], as.character(rep_levels))
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

make_panel <- function(data, color, show_legend = FALSE) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put this outside of the plotting function instead of inside it and rename it so that it's clear what it's doing, aka plotting the TPR curves using ggplot2/plotly.

p <- ggplot2::ggplot(data,
ggplot2::aes(x = NumConcs, y = TPR, linetype = factor(N_rep))) +
ggplot2::geom_line(linewidth = 1.2, color = color) +
ggplot2::geom_point(size = 2, color = color) +
ggplot2::scale_x_continuous(breaks = k_grid) +
ggplot2::scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 20)) +
ggplot2::scale_linetype_manual(values = ltype_values) +
ggplot2::labs(
x = "Number of concentrations",
y = "True Positive Rate (%)",
linetype = "Replicates"
) +
ggplot2::theme_bw(base_size = 14) +
ggplot2::theme(
plot.title = ggplot2::element_text(face = "bold", hjust = 0.5)
)

if (!show_legend) {
p <- p + ggplot2::theme(legend.position = "none")
}
p
}

results_strong <- simulation_results[simulation_results$Interaction == "Strong", ]
results_weak <- simulation_results[simulation_results$Interaction == "Weak", ]

if (!requireNamespace("plotly", quietly = TRUE)) {
stop("Package 'plotly' is required for interactive plots. Please install it.")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need this because plotly should already be a dependency of this package, meaning users will have it automatically installed when they install MSstatsResponse

}

p_strong <- plotly::ggplotly(make_panel(results_strong, "#1b9e77", FALSE))

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you import plotly functions via @importFrom?

p_weak <- plotly::ggplotly(make_panel(results_weak, "#d95f02", TRUE))

plotly::subplot(p_strong, p_weak,
nrows = 1, shareY = TRUE, titleX = TRUE, titleY = TRUE,
margin = 0.12
) |> plotly::layout(
margin = list(t = 60),
annotations = list(
list(text = "<b>Strong interaction detection power</b>", x = 0.22, y = 1.08,
xref = "paper", yref = "paper", showarrow = FALSE,
font = list(size = 12), xanchor = "center"),
list(text = "<b>Weak interaction detection power</b>", x = 0.78, y = 1.08,
xref = "paper", yref = "paper", showarrow = FALSE,
font = list(size = 12), xanchor = "center")
)
)
}
35 changes: 0 additions & 35 deletions man/ConvertGroupToNumericDose.Rd

This file was deleted.

82 changes: 0 additions & 82 deletions man/DoseResponseFit.Rd

This file was deleted.

Loading
Loading