Skip to content

Commit

Permalink
getting scores by transcript
Browse files Browse the repository at this point in the history
  • Loading branch information
tkmamidi committed Jan 8, 2024
1 parent 0fbc9ca commit 1fdaa89
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 26 deletions.
17 changes: 2 additions & 15 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,10 @@ def read_root():
return {"Hello": "World"}


@app.get("/items/{item_id}")
def read_item(item_id: int, q: Union[str, None] = None):
return {"item_id": item_id, "q": q}


@app.get("/loc/{chromosome}/{start}/{end}")
def get_scores(chromosome: str, start: int, end: int):
# TODO create interface to tabix to query range for DITTO scores
return {"loc": f"{chromosome}:{start}-{end}"}


@app.get("/var/{chromosome}/{position}/{ref}/{alt}")
@app.get("/var/{chromosome}-{position}-{ref}-{alt}")
def get_variant_score(chromosome, position, ref, alt):
# TODO call the get_scores function to perform look up, if score not precomputed then call dynamic generation
scores = get_ditto_score(chrom=chromosome, pos=position, ref=ref, alt=alt)
# return {"variant": f"chr{chromosome}:g.{position}{ref}>{alt}"}
return {"scores": scores}
return {"scores_by_transcript": scores}


@app.get("/hgvs/{hgvs_cdna}")
Expand Down
9 changes: 7 additions & 2 deletions src/utils/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
def parse_and_predict(dataframe, config_dict, clf):
# Drop variant info columns so we can perform one-hot encoding
dataframe["so"] = dataframe["consequence"].copy()
var = dataframe[["transcript","gene","consequence","chrom","pos","ref_base","alt_base"]]
dataframe = dataframe.drop(config_dict["id_cols"], axis=1)
dataframe = dataframe.replace([".", "-", ""], np.nan)
for key in dataframe.columns:
Expand Down Expand Up @@ -44,5 +45,9 @@ def parse_and_predict(dataframe, config_dict, clf):
)

y_score = 1 - clf.predict(df2, verbose=0)
del temp_df,df2
return y_score
y_score = pd.DataFrame(y_score, columns=["DITTO"])

var = pd.concat([var.reset_index(drop=True), y_score.reset_index(drop=True)], axis=1)

del temp_df,df2, y_score
return var
13 changes: 4 additions & 9 deletions src/utils/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,8 @@ def get_ditto_score(chrom: str, pos: int, ref: str, alt: str):
"Could not get variant annotations from OpenCravat's API. Please check the variant info and try again."
)
else:
score_dict = {}
for transcript in overall["transcript"].unique():
transcript_data = overall[overall["transcript"] == transcript].reset_index(
drop=True
)
y_score = parse_and_predict(transcript_data, config_dict, clf)
y_score = round(y_score[0][0], 2)
score_dict[transcript] = str(y_score)
return score_dict
var_df_scores = parse_and_predict(overall, config_dict, clf)
var_df_scores = var_df_scores.set_index('transcript')
var_df_scores = var_df_scores.astype({"DITTO": str, "pos": str})
return json.loads(var_df_scores.to_json(orient="index"))

0 comments on commit 1fdaa89

Please sign in to comment.