-
Notifications
You must be signed in to change notification settings - Fork 1
Merge TPR simulation function into Development #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
5a26bdc
8b7eaf9
5d87188
b1ed032
cfabb47
38a10d5
bf82171
6236e60
73527a2
71a46e9
308e2d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,7 +31,8 @@ Imports: | |
| dplyr, | ||
| stats, | ||
| parallel, | ||
| data.table | ||
| data.table, | ||
| plotly | ||
| Suggests: | ||
| MSstats, | ||
| MSstatsTMT, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -222,23 +222,38 @@ futureExperimentSimulation = function(N_proteins = 300, | |
| )) | ||
| } | ||
|
|
||
| # Get mean profile across proteins | ||
| # Get mean profile across proteins using string-based grouping to avoid floating point precision issues | ||
| profile = drug_data %>% | ||
| filter(protein %in% protein_ids) %>% | ||
| mutate(dose_nM = round(dose * 1e9)) %>% # Convert M to nM and round to handle precision | ||
| group_by(dose_nM) %>% | ||
| mutate(dose_str = as.character(dose)) %>% | ||
| group_by(dose_str) %>% | ||
| summarise( | ||
| LogIntensities = mean(response, na.rm = TRUE), | ||
| dose_numeric = dose[1], | ||
| .groups = 'drop' | ||
| ) %>% | ||
| rename(dose = dose_nM) | ||
| ) | ||
|
|
||
| # Create complete profile with all concentrations | ||
| # Match profile doses to target concentrations using nearest-neighbor on log scale. | ||
| # This handles unit mismatches without assuming a fixed conversion factor. | ||
| complete_profile = data.frame(dose = concentrations) | ||
|
|
||
| # Merge with existing data | ||
| complete_profile = complete_profile %>% | ||
| left_join(profile, by = "dose") | ||
| complete_profile$LogIntensities = NA_real_ | ||
|
|
||
| for (i in seq_len(nrow(complete_profile))) { | ||
| target = complete_profile$dose[i] | ||
| if (target == 0) { | ||
| # Control: match any zero-dose entry | ||
| zero_match = profile[profile$dose_numeric == 0, ] | ||
| if (nrow(zero_match) > 0) { | ||
| complete_profile$LogIntensities[i] = zero_match$LogIntensities[1] | ||
| } | ||
| } else if (any(profile$dose_numeric != 0)) { | ||
| # Non-zero: find closest on log scale | ||
| non_zero = profile[profile$dose_numeric != 0, ] | ||
| log_ratios = abs(log10(non_zero$dose_numeric) - log10(target)) | ||
| best = which.min(log_ratios) | ||
| complete_profile$LogIntensities[i] = non_zero$LogIntensities[best] | ||
| } | ||
| } | ||
|
|
||
| # Fill missing values with interpolation or nearest neighbor | ||
| for (i in which(is.na(complete_profile$LogIntensities))) { | ||
|
|
@@ -306,11 +321,32 @@ futureExperimentSimulation = function(N_proteins = 300, | |
| return(complete_profile) | ||
| } | ||
|
|
||
| # Extract templates for each category | ||
| # Load default templates as fallback for unspecified categories | ||
| default_template <- NULL | ||
| if (is.null(weak_proteins) || is.null(no_interaction_proteins)) { | ||
| template1_path <- system.file("extdata", "template1.RDS", package = "MSstatsResponse") | ||
| if (file.exists(template1_path)) { | ||
| default_template <- readRDS(template1_path) | ||
| } | ||
| } | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems to be a hard-coded template specific to the dataset you were given, but not extensible to other datasets. I think if a weak interaction / no interaction protein is not defined, we should fallback to giving the baseline profile. |
||
| # Extract templates: use user data for specified proteins, defaults for others | ||
| template = list( | ||
| strong_interaction = .getMeanProfile(strong_proteins, drug_data, concentrations), | ||
|
tonywu1999 marked this conversation as resolved.
|
||
| weak_interaction = .getMeanProfile(weak_proteins, drug_data, concentrations), | ||
| no_interaction = .getMeanProfile(no_interaction_proteins, drug_data, concentrations) | ||
| weak_interaction = if (!is.null(weak_proteins)) { | ||
| .getMeanProfile(weak_proteins, drug_data, concentrations) | ||
| } else if (!is.null(default_template)) { | ||
| default_template$weak_interaction[default_template$weak_interaction$dose %in% concentrations, ] | ||
| } else { | ||
| .getMeanProfile(NULL, drug_data, concentrations) | ||
| }, | ||
| no_interaction = if (!is.null(no_interaction_proteins)) { | ||
| .getMeanProfile(no_interaction_proteins, drug_data, concentrations) | ||
| } else if (!is.null(default_template)) { | ||
| default_template$no_interaction[default_template$no_interaction$dose %in% concentrations, ] | ||
| } else { | ||
| .getMeanProfile(NULL, drug_data, concentrations) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will anything break in the package if weak_interaction / no_interaction is set to the baseline profile (given we never provided any weak interaction / no interaction protein)? |
||
| } | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
| ) | ||
|
|
||
| # Validate template format | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this loop is overly complex. Shouldn't the dose_str column of the
profilevariable match the string version of the complete_profile$dose column? This could be a simple left_join if you have a string version of the dose column incomplete_profile.