diff --git a/src/main.py b/src/main.py index 695b30b..e014082 100644 --- a/src/main.py +++ b/src/main.py @@ -11,23 +11,10 @@ def read_root(): return {"Hello": "World"} -@app.get("/items/{item_id}") -def read_item(item_id: int, q: Union[str, None] = None): - return {"item_id": item_id, "q": q} - - -@app.get("/loc/{chromosome}/{start}/{end}") -def get_scores(chromosome: str, start: int, end: int): - # TODO create interface to tabix to query range for DITTO scores - return {"loc": f"{chromosome}:{start}-{end}"} - - -@app.get("/var/{chromosome}/{position}/{ref}/{alt}") +@app.get("/var/{chromosome}-{position}-{ref}-{alt}") def get_variant_score(chromosome, position, ref, alt): - # TODO call the get_scores function to perform look up, if score not precomputed then call dynamic generation scores = get_ditto_score(chrom=chromosome, pos=position, ref=ref, alt=alt) - # return {"variant": f"chr{chromosome}:g.{position}{ref}>{alt}"} - return {"scores": scores} + return {"scores_by_transcript": scores} @app.get("/hgvs/{hgvs_cdna}") diff --git a/src/utils/predict.py b/src/utils/predict.py index a462c55..69b8072 100644 --- a/src/utils/predict.py +++ b/src/utils/predict.py @@ -5,6 +5,7 @@ def parse_and_predict(dataframe, config_dict, clf): # Drop variant info columns so we can perform one-hot encoding dataframe["so"] = dataframe["consequence"].copy() + var = dataframe[["transcript","gene","consequence","chrom","pos","ref_base","alt_base"]] dataframe = dataframe.drop(config_dict["id_cols"], axis=1) dataframe = dataframe.replace([".", "-", ""], np.nan) for key in dataframe.columns: @@ -44,5 +45,9 @@ def parse_and_predict(dataframe, config_dict, clf): ) y_score = 1 - clf.predict(df2, verbose=0) - del temp_df,df2 - return y_score + y_score = pd.DataFrame(y_score, columns=["DITTO"]) + + var = pd.concat([var.reset_index(drop=True), y_score.reset_index(drop=True)], axis=1) + + del temp_df,df2, y_score + return var diff --git a/src/utils/query.py b/src/utils/query.py index ac2d662..899c97b 100644 --- a/src/utils/query.py +++ b/src/utils/query.py @@ -81,13 +81,8 @@ def get_ditto_score(chrom: str, pos: int, ref: str, alt: str): "Could not get variant annotations from OpenCravat's API. Please check the variant info and try again." ) else: - score_dict = {} - for transcript in overall["transcript"].unique(): - transcript_data = overall[overall["transcript"] == transcript].reset_index( - drop=True - ) - y_score = parse_and_predict(transcript_data, config_dict, clf) - y_score = round(y_score[0][0], 2) - score_dict[transcript] = str(y_score) - return score_dict + var_df_scores = parse_and_predict(overall, config_dict, clf) + var_df_scores = var_df_scores.set_index('transcript') + var_df_scores = var_df_scores.astype({"DITTO": str, "pos": str}) + return json.loads(var_df_scores.to_json(orient="index"))