Skip to content

Commit

Permalink
Merge pull request #183 from PNNL-CompBio/sample-schema-fix
Browse files Browse the repository at this point in the history
Sample schema fix
  • Loading branch information
sgosline authored May 21, 2024
2 parents d733f56 + 1cc9d0b commit 90153a2
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 6 deletions.
3 changes: 2 additions & 1 deletion build/beatAML/GetBeatAML.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,10 @@ def generate_samples_file(prev_samples_path):
prot_samples.rename(columns={"specimenType": "common_name"}, inplace=True)
prot_samples["cancer_type"] = "Acute Myeloid Leukaemia"
prot_samples["model_type"] = "ex vivo"
prot_samples["other_id_source"] = "beatAML"
prot_samples["other_id_source"] = "beatAML"

all_samples = pd.concat([prot_samples, full_samples])
all_samples['species'] = 'Homo sapiens'
maxval = max(pd.read_csv(prev_samples_path).improve_sample_id)
mapping = {labId: i for i, labId in enumerate(all_samples['other_id'].unique(), start=(int(maxval)+1))}
all_samples['improve_sample_id'] = all_samples['other_id'].map(mapping)
Expand Down
1 change: 1 addition & 0 deletions build/hcmi/01-createHCMISamplesFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def align_to_linkml_schema(input_df):
}

# Apply mapping
input_df['species'] = 'Homo sapiens' ##i assume they're lal human?
input_df['model_type'] = input_df['model_type'].map(mapping_dict)
input_df.dropna(subset=['model_type'], inplace=True)

Expand Down
4 changes: 2 additions & 2 deletions build/mpnst/00_sample_gen.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()
##first create samples for the original tumors
tumorTable<-manifest|>
dplyr::select(common_name='Sample')|>
dplyr::mutate(other_id_source='NF Data Portal',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='tumor')|>
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='tumor')|>
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)

##then create samples for the PDX
sampTable<-manifest|>
dplyr::select(c(common_name='Sample',MicroTissueDrugFolder))|>
dplyr::mutate(other_id_source='NF Data Portal',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='patient derived xenograft')|>
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='patient derived xenograft')|>
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)


Expand Down
3 changes: 2 additions & 1 deletion build/mpnst/01_mpnst_get_omics.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ cnv<-do.call(rbind,lapply(setdiff(combined$CopyNumber,NA),function(x){
subset(!is.na(entrez_id))|>
dplyr::select(entrez_id,log2)|>
dplyr::distinct()|>
dplyr::mutate(copy_number=2^log2)
dplyr::mutate(copy_number=2^log2)|>
dplyr::select(-log2)

res<-long_df|> ##deep del < 0.5210507 < het loss < 0.7311832 < diploid < 1.214125 < gain < 1.422233 < amp
dplyr::mutate(copy_call=ifelse(copy_number<0.5210507,'deep del',
Expand Down
4 changes: 2 additions & 2 deletions build/utils/fit_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ def main():
fname = args.output or 'combined_single_response_agg'
process_df_part(df_all, fname, beataml=args.beataml)#, start=args.start, count=args.count)

if args.beataml == False:
format_coderd_schema(fname+'.0')
# if args.beataml == False:
format_coderd_schema(fname+'.0')

if __name__ == '__main__':
main()

0 comments on commit 90153a2

Please sign in to comment.