Skip to content

Commit a06ae0e

Browse files
authored
Merge pull request #78 from shahcompbio/develop
Update database and metadata tables to version matching final paper r…
2 parents b82f407 + 66d8a1d commit a06ae0e

File tree

9 files changed

+2183
-2098
lines changed

9 files changed

+2183
-2098
lines changed

110_cohort_overview.Rmd

Lines changed: 63 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,8 @@ names(cna) <- tolower(names(cna))
231231
# Filter samples and variants
232232
cna <- cna %>%
233233
# Add custom IMPACT annotations
234-
dplyr::rename("isabl_id" = "sample_id") %>%
235-
dplyr::left_join(db$sequencing_bulk_dna, by = "isabl_id") %>%
234+
dplyr::rename("spectrum_sample_id" = "sample_id") %>%
235+
dplyr::left_join(db$sequencing_bulk_dna, by = "spectrum_sample_id") %>%
236236
# Keep included patients
237237
filter(patient_id %in% included_patients) %>%
238238
# Convert to factor
@@ -246,14 +246,14 @@ cna
246246

247247
```{r}
248248
249-
snv_tbl <- snv %>%
250-
complete(patient_id, nesting(hugo_symbol))
249+
snv_tbl <- snv #%>%
250+
# complete(patient_id, nesting(hugo_symbol))
251251
252-
cna_tbl <- cna %>%
253-
complete(patient_id, nesting(hugo_symbol))
252+
cna_tbl <- cna# %>%
253+
# complete(patient_id, nesting(hugo_symbol))
254254
255-
fusions_tbl <- fusions %>%
256-
complete(patient_id, nesting(hugo_symbol))
255+
fusions_tbl <- fusions #%>%
256+
# complete(patient_id, nesting(hugo_symbol))
257257
258258
snv_cna_fusions_tbl <- bind_rows(snv_tbl, cna_tbl, fusions_tbl) %>%
259259
unite("type", c("variant_type_short","variant_classification_short","cna_type","fusion_type","mutation_status"), sep=",", na.rm = TRUE, remove = FALSE)
@@ -295,6 +295,11 @@ db$mpif_slide <- db$mpif_slide %>%
295295
# tumor_type != "Unknown",
296296
# therapy == "pre-Rx")
297297
298+
# Remove mpIF FOVs that are not included
299+
db$mpif_fov <- db$mpif_fov %>%
300+
mutate(sample_type = "Tumor") %>%
301+
filter(tme_inclusion_status == "Yes")
302+
298303
# Remove normal WGS samples and failed samples
299304
db$sequencing_bulk_dna <- db$sequencing_bulk_dna %>%
300305
mutate(sample_type = ifelse(!is.na(tumor_site), "Tumor", "Normal"),
@@ -351,12 +356,13 @@ gyn_diagnosis <- db$gyn_diagnosis %>%
351356
```{r merge_sample_inventory}
352357
inventory_list <-
353358
list(
354-
"scRNA" = db$sequencing_scrna,
355-
"H&E" = db$he_slide,
356-
"mpIF"= db$mpif_slide,
357-
"Bulk WGS" = db$sequencing_bulk_dna,
358-
"Myriad" = db$sequencing_myriad,
359-
"MSK-IMPACT" = db$sequencing_msk_impact_custom
359+
"scRNA - Sample metadata" = db$sequencing_scrna,
360+
"H&E - Sample metadata" = db$he_slide,
361+
"mpIF - Sample metadata"= db$mpif_slide,
362+
# "mpIF - FOV metadata"= db$mpif_fov,
363+
"Bulk WGS - Sample metadata" = db$sequencing_bulk_dna,
364+
"Myriad - Sample metadata" = db$sequencing_myriad,
365+
"MSK-IMPACT - Sample metadata" = db$sequencing_msk_impact_custom
360366
)
361367
362368
inventory <- plyr::join_all(
@@ -377,6 +383,44 @@ inventory
377383
```
378384

379385
```{r}
386+
387+
data_clinical_sample <- readr::read_tsv("/work/shah/vazquezi/projects/dmp-2022/mskimpact/data_clinical_sample.txt", skip = 4)
388+
389+
```
390+
391+
```{r}
392+
inventory_list <-
393+
list(
394+
"scRNA - Sample metadata" = db$sequencing_scrna %>%
395+
select(-c("technique","aliquot_id","sample_id","submission_status","qc_status","tme_inclusion_status")),
396+
"H&E - Sample metadata" = db$he_slide %>%
397+
select(-c("technique","image_hid","case_hid","is_site_matched","is_adjacent","submission_status","qc_status","tme_inclusion_status")),
398+
"mpIF - Sample metadata"= db$mpif_slide %>%
399+
select(-c("technique","pici_id","elab_id","aliquot_id","sample_id","submission_status","qc_status","tme_inclusion_status")),
400+
"mpIF - FOV metadata"= db$mpif_fov %>%
401+
select(-c("pici_id","fov_status","fov_qc")) %>%
402+
left_join(
403+
db$mpif_slide %>%
404+
select(-c("technique","project","pici_id","elab_id","isabl_experiment_system_id","aliquot_id","sample_id","patient_id","sample_type","submission_status","qc_status","tme_inclusion_status","panel","batch")),
405+
by = "spectrum_sample_id"),
406+
"Bulk WGS - Sample metadata" = db$sequencing_bulk_dna %>%
407+
select(-c("technique","aliquot_id","sample_id","submission_status","qc_status","tme_inclusion_status")),
408+
# arrange(-c("sample_type")),
409+
"Myriad - Sample metadata" = db$sequencing_myriad %>%
410+
select(-c("technique","aliquot_id","sample_id","submission_status","qc_status","tme_inclusion_status")),
411+
"MSK-IMPACT - Sample metadata" = db$sequencing_msk_impact_custom %>%
412+
select(-c("technique","aliquot_id","sample_id","submission_status","qc_status","tme_inclusion_status")) %>%
413+
left_join(
414+
data_clinical_sample %>%
415+
select(c("SAMPLE_ID","GENE_PANEL")),
416+
by = c("impact_dmp_sample_id"="SAMPLE_ID")
417+
)
418+
# arrange(-sample_type)
419+
)
420+
421+
inventory_list <- inventory_list %>%
422+
map(~.x %>% arrange(desc(sample_type), patient_id, tumor_megasite, tumor_supersite, tumor_site, tumor_subsite, procedure))
423+
380424
wb <- createWorkbook()
381425
lapply(seq_along(inventory_list), function(i){
382426
addWorksheet(wb=wb, sheetName = names(inventory_list[i]))
@@ -513,8 +557,8 @@ top_df <- db$mutational_signatures %>%
513557
filter(patient_id %in% colnames(oncoprint_main_mat)) %>%
514558
distinct(patient_id, .keep_all = TRUE) %>% # HACK
515559
arrange(match(patient_id, colnames(oncoprint_main_mat))) %>%
516-
mutate(wgs_signature = ordered(wgs_signature, levels = names(clrs$wgs_signature))) %>%
517-
dplyr::select(wgs_signature,
560+
mutate(consensus_signature = ordered(consensus_signature, levels = names(clrs$consensus_signature))) %>%
561+
dplyr::select(consensus_signature,
518562
patient_age,
519563
gyn_diagnosis_figo_stage,
520564
gyn_diagnosis_procedure_description) %>%
@@ -648,8 +692,8 @@ top_df <- patients %>%
648692
filter(patient_id %in% colnames(oncoprint_supp_mat)) %>%
649693
distinct(patient_id, .keep_all = TRUE) %>% # HACK
650694
arrange(match(patient_id, colnames(oncoprint_supp_mat))) %>%
651-
mutate(wgs_signature = ordered(wgs_signature, levels = names(clrs$wgs_signature))) %>%
652-
dplyr::select(wgs_signature,
695+
mutate(consensus_signature = ordered(consensus_signature, levels = names(clrs$consensus_signature))) %>%
696+
dplyr::select(consensus_signature,
653697
patient_age,
654698
gyn_diagnosis_figo_stage,
655699
gyn_diagnosis_procedure_description) %>%
@@ -1933,5 +1977,5 @@ patient_inventory_filtered <- inventory_filtered %>%
19331977
# Session
19341978

19351979
```{r}
1936-
sessionInfo()
1980+
devtools::session_info()
19371981
```

resources/db/tme/SPECTRUM.rds

191 KB
Binary file not shown.

0 commit comments

Comments
 (0)