Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dynamically choosing proteins from volcano plot #389

Merged
merged 14 commits into from
Jan 16, 2025
Merged
5 changes: 0 additions & 5 deletions alphastats/gui/pages/05_Analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
from alphastats.gui.utils.analysis_helper import (
display_analysis_result_with_buttons,
gather_parameters_and_do_analysis,
gather_uniprot_data,
get_regulated_features,
)
from alphastats.gui.utils.ui_helper import (
StateKeys,
Expand Down Expand Up @@ -94,9 +92,6 @@ def show_start_llm_button(analysis_method: str) -> None:
if StateKeys.LLM_INTEGRATION in st.session_state:
del st.session_state[StateKeys.LLM_INTEGRATION]
st.session_state[StateKeys.LLM_INPUT] = (analysis_object, parameters)
regulated_features = get_regulated_features(analysis_object)
# TODO: Add confirmation prompt if an excessive number of proteins is to be looked up.
gather_uniprot_data(regulated_features)

st.toast("LLM analysis created!", icon="✅")
st.page_link("pages/06_LLM.py", label="=> Go to LLM page..")
Expand Down
67 changes: 40 additions & 27 deletions alphastats/gui/pages/06_LLM.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from alphastats.dataset.plotting import plotly_object
from alphastats.gui.utils.analysis_helper import (
display_figure,
gather_uniprot_data,
)
from alphastats.gui.utils.llm_helper import (
create_protein_editor,
display_uniprot,
get_display_proteins_html,
llm_connection_test,
set_api_key,
)
Expand Down Expand Up @@ -99,7 +100,7 @@ def llm_config():
volcano_plot, plot_parameters = st.session_state[StateKeys.LLM_INPUT]

st.markdown(f"Parameters used for analysis: `{plot_parameters}`")
c1, c2 = st.columns((1, 2))
c1, c2 = st.columns((2, 1))

with c2:
display_figure(volcano_plot.plot)
Expand All @@ -114,43 +115,55 @@ def llm_config():
st.text("No genes of interest found.")
st.stop()

# Separate upregulated and downregulated genes
upregulated_genes = [
key for key in regulated_genes_dict if regulated_genes_dict[key] == "up"
]
downregulated_genes = [
key for key in regulated_genes_dict if regulated_genes_dict[key] == "down"
]

# Create dataframes with checkboxes for selection
upregulated_genes_df = pd.DataFrame(
{"Protein": upregulated_genes, "Selected": [True] * len(upregulated_genes)}
JuliaS92 marked this conversation as resolved.
Show resolved Hide resolved
)

downregulated_genes_df = pd.DataFrame(
{"Protein": downregulated_genes, "Selected": [True] * len(downregulated_genes)}
)

st.markdown("##### Genes of interest")
c11, c12 = st.columns((1, 2), gap="medium")
c11, c12 = st.columns((1, 1), gap="medium")

with c11:
st.write("Upregulated genes")
st.markdown(
get_display_proteins_html(
upregulated_genes,
True,
annotation_store=st.session_state[StateKeys.ANNOTATION_STORE],
feature_to_repr_map=st.session_state[
StateKeys.DATASET
]._feature_to_repr_map,
),
unsafe_allow_html=True,
selected_upregulated_genes = create_protein_editor(
upregulated_genes_df, "Upregulated Proteins"
)

with c12:
st.write("Downregulated genes")
st.markdown(
get_display_proteins_html(
downregulated_genes,
False,
annotation_store=st.session_state[StateKeys.ANNOTATION_STORE],
feature_to_repr_map=st.session_state[
StateKeys.DATASET
]._feature_to_repr_map,
),
unsafe_allow_html=True,
selected_downregulated_genes = create_protein_editor(
downregulated_genes_df, "Downregulated Proteins"
)

# Combine the selected genes into a new regulated_genes_dict
selected_regulated_genes = selected_upregulated_genes + selected_downregulated_genes
regulated_genes_dict = {
gene: "up" if gene in selected_upregulated_genes else "down"
for gene in selected_regulated_genes
}

# If no genes are selected, stop the script
if not regulated_genes_dict:
st.text("No genes selected for analysis.")
st.stop()

if c1.button("Gather UniProt data for selected proteins"):
gather_uniprot_data(selected_regulated_genes)

if not st.session_state[StateKeys.ANNOTATION_STORE]:
JuliaS92 marked this conversation as resolved.
Show resolved Hide resolved
st.info("No UniProt data found. Please run UniProt data fetching first.")
st.stop()


model_name = st.session_state[StateKeys.MODEL_NAME]
llm_integration_set_for_model = (
Expand Down Expand Up @@ -181,8 +194,8 @@ def llm_config():
"",
value=get_initial_prompt(
plot_parameters,
list(map(feature_to_repr_map.get, upregulated_genes)),
list(map(feature_to_repr_map.get, downregulated_genes)),
list(map(feature_to_repr_map.get, selected_upregulated_genes)),
list(map(feature_to_repr_map.get, selected_downregulated_genes)),
),
height=200,
disabled=llm_integration_set_for_model,
Expand Down
6 changes: 3 additions & 3 deletions alphastats/gui/utils/analysis_helper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import io
from typing import Any, Callable, Dict, Optional, Tuple, Union
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import pandas as pd
import streamlit as st
Expand Down Expand Up @@ -202,14 +202,14 @@ def gather_parameters_and_do_analysis(
raise ValueError(f"Analysis method {analysis_method} not found.")


def gather_uniprot_data(features: list) -> None:
def gather_uniprot_data(features: List[str]) -> None:
"""
Gathers UniProt data for a list of features and stores it in the session state.

Features that are already in the session state are skipped.

Args:
features (list): A list of features for which UniProt data needs to be gathered.
features (List[str]): A list of features for which UniProt data needs to be gathered.
Returns:
None
"""
Expand Down
43 changes: 43 additions & 0 deletions alphastats/gui/utils/llm_helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pathlib import Path
from typing import List, Optional

import pandas as pd
import streamlit as st

from alphastats.gui.utils.ui_helper import DefaultStates, StateKeys
Expand All @@ -11,6 +12,48 @@
)


def create_protein_editor(df: pd.DataFrame, title: str) -> List[str]:
"""Creates a data editor for protein selection and returns the selected proteins.

Args:
df: DataFrame containing protein data with 'Protein' and 'Selected' columns
title: Title to display above the editor

Returns:
selected_proteins (List[str]): A list of selected proteins.
"""
st.write(title)
df.insert(
JuliaS92 marked this conversation as resolved.
Show resolved Hide resolved
0,
"Gene",
[
st.session_state[StateKeys.DATASET]._feature_to_repr_map[protein]
JuliaS92 marked this conversation as resolved.
Show resolved Hide resolved
for protein in df["Protein"]
],
)
df.insert(2, "Protein", df.pop("Protein"))
edited_df = st.data_editor(
df,
column_config={
"Selected": st.column_config.CheckboxColumn(
"Include?",
help="Uncheck to exclude this gene from analysis",
JuliaS92 marked this conversation as resolved.
Show resolved Hide resolved
default=True,
),
"Gene": st.column_config.TextColumn(
"Gene",
help="The gene name to be included in the analysis",
width="medium",
),
},
disabled=["Gene"],
hide_index=True,
)
# Extract the selected genes
selected_proteins = edited_df.loc[edited_df["Selected"], "Protein"].tolist()
return selected_proteins


def get_display_proteins_html(
protein_ids: List[str], is_upregulated: True, annotation_store, feature_to_repr_map
) -> str:
Expand Down
Loading