Spaces:

melvinalves
/

protein_function_prediction

Sleeping

App Files Files Community

melvinalves commited on Jun 23

Commit

3b8f083

verified ·

1 Parent(s): 166688e

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -32

app.py CHANGED Viewed

@@ -42,7 +42,7 @@ def load_hf_encoder(repo_id, subfolder=None, base_tok=None):
     """
     • repo_id   : repositório HF ou caminho local
     • subfolder : subpasta onde vivem pesos/config (None se não houver)
-    • base_tok  : repo para o tokenizer      (None ⇒ usa repo_id)
     Converte tf_model.h5 → PyTorch on-the-fly (from_tf=True).
     """
     if base_tok is None:
@@ -100,7 +100,7 @@ GO       = mlb.classes_
 st.set_page_config(page_title="Predição de Funções Moleculares de Proteínas",
                    page_icon="🧬", layout="centered")
-# CSS global: fundo branco, divisão de colunas e margem do logo
 st.markdown(
     """
     <style>
@@ -108,14 +108,11 @@ st.markdown(
         .block-container { padding-top: 1.5rem; }
         textarea { font-size: 0.9rem !important; }
-        /* traço vertical entre as colunas de resultados */
         div[data-testid="column"]:first-child {
             border-right: 1px solid #E0E0E0;
-            padding-right: 1.2rem;
         }
-        /* margem extra para o logo */
-        img.logo { margin-top: 0.5rem; }
     </style>
     """,
     unsafe_allow_html=True
@@ -124,8 +121,7 @@ st.markdown(
 # Logo (coloca logo.png na raiz do Space)
 LOGO_PATH = "logo.png"
 if os.path.exists(LOGO_PATH):
-    st.markdown(f"<img src='{LOGO_PATH}' width='180' class='logo'/>",
-                unsafe_allow_html=True)
 st.title("Predição de Funções Moleculares de Proteínas (GO:MF)")
@@ -143,49 +139,57 @@ def parse_fasta_multiple(fasta_str):
         if not entry.strip():
             continue
         lines = entry.strip().splitlines()
-        if i > 0:
             header = lines[0].strip()
             seq = "".join(lines[1:]).replace(" ", "").upper()
-        else:
             header = f"Seq_{i+1}"
             seq = "".join(lines).replace(" ", "").upper()
         if seq:
             parsed.append((header, seq))
     return parsed
-# ———————————————————  FUNÇÕES AUXILIARES  ——————————————————— #
 def go_link(go_id, name=""):
-    """Markdown link para QuickGO."""
     url = f"https://www.ebi.ac.uk/QuickGO/term/{go_id}"
     label = f"{go_id} — {name}" if name else go_id
     return f"[{label}]({url})"
-def uni_url(header):
-    """URL UniProt (primeiro token do header)."""
-    return f"https://www.uniprot.org/uniprotkb/{header.split()[0]}"
-# ———————————————————  MOSTRAR RESULTADOS  ——————————————————— #
 def mostrar(header, y_pred):
-    """Expander com botão UniProt + duas colunas de resultados."""
-    with st.expander(header, expanded=True):
-        # botão que abre UniProt noutra aba (necessita Streamlit ≥1.23)
-        try:
-            st.link_button("🔗 Ver na UniProt", uni_url(header), type="primary")
-        except AttributeError:
-            st.markdown(f"[🔗 Ver na UniProt]({uni_url(header)})")
-        st.markdown("---")
         col1, col2 = st.columns(2)
-        # --- coluna 1 : termos acima do threshold
         with col1:
             st.markdown(f"**GO terms com prob ≥ {THRESH}**")
             hits = mlb.inverse_transform((y_pred >= THRESH).astype(int))[0]
             if hits:
                 for go_id in hits:
                     name, defin = GO_INFO.get(go_id, ("— sem nome —", ""))
-                    defin = re.sub(r'^\\s*\"?(.+?)\"?\\s*(\\[[^\\]]*\\])?\\s*$', r'\\1',
                                    defin or "")
                     st.markdown(f"- {go_link(go_id, name)}")
                     if defin:
@@ -193,7 +197,7 @@ def mostrar(header, y_pred):
             else:
                 st.code("— nenhum —")
-        # --- coluna 2 : top-N mais prováveis
         with col2:
             st.markdown(f"**Top {TOP_N} GO terms mais prováveis**")
             for rank, idx in enumerate(np.argsort(-y_pred[0])[:TOP_N], start=1):
@@ -210,22 +214,23 @@ if predict_clicked:
     for header, seq in parsed_seqs:
         with st.spinner(f"A processar {header}… (pode demorar alguns minutos)"):
-            # ---------- EMBEDDINGS ----------
             emb_pb  = embed_seq(FINETUNED_PB,  seq, CHUNK_PB)
             emb_bfd = embed_seq(FINETUNED_BFD, seq, CHUNK_PB)
             emb_esm = embed_seq(BASE_ESM,       seq, CHUNK_ESM)
-            # ---------- PREDIÇÕES ----------
             y_pb  = mlp_pb.predict(emb_pb)
             y_bfd = mlp_bfd.predict(emb_bfd)
             y_esm = mlp_esm.predict(emb_esm)[:, :597]  # alinhar nº de termos
             X     = np.concatenate([y_pb, y_bfd, y_esm], axis=1)
             y_ens = stacking.predict(X)
         mostrar(header, y_ens)
-# ———————————————————  LISTA COMPLETA  ——————————————————— #
 with st.expander("Mostrar lista completa dos 597 GO terms possíveis", expanded=False):
     cols = st.columns(3)
     for i, go_id in enumerate(GO):

     """
     • repo_id   : repositório HF ou caminho local
     • subfolder : subpasta onde vivem pesos/config (None se não houver)
+    • base_tok  : repo para o tokenizer (None => usa repo_id)
     Converte tf_model.h5 → PyTorch on-the-fly (from_tf=True).
     """
     if base_tok is None:
 st.set_page_config(page_title="Predição de Funções Moleculares de Proteínas",
                    page_icon="🧬", layout="centered")
+# Fundo branco + separador de colunas
 st.markdown(
     """
     <style>
         .block-container { padding-top: 1.5rem; }
         textarea { font-size: 0.9rem !important; }
+        /* traço vertical entre primeiras colunas */
         div[data-testid="column"]:first-child {
             border-right: 1px solid #E0E0E0;
+            padding-right: 1rem !important;
         }
     </style>
     """,
     unsafe_allow_html=True
 # Logo (coloca logo.png na raiz do Space)
 LOGO_PATH = "logo.png"
 if os.path.exists(LOGO_PATH):
+    st.image(LOGO_PATH, width=180)
 st.title("Predição de Funções Moleculares de Proteínas (GO:MF)")
         if not entry.strip():
             continue
         lines = entry.strip().splitlines()
+        if i > 0:          # bloco típico FASTA
             header = lines[0].strip()
             seq = "".join(lines[1:]).replace(" ", "").upper()
+        else:              # sequência sem '>'
             header = f"Seq_{i+1}"
             seq = "".join(lines).replace(" ", "").upper()
         if seq:
             parsed.append((header, seq))
     return parsed
+# ———————————————————  FUNÇÕES AUXILIARES DE LAYOUT  ——————————————————— #
 def go_link(go_id, name=""):
+    """Cria link para página do GO term (QuickGO)."""
     url = f"https://www.ebi.ac.uk/QuickGO/term/{go_id}"
     label = f"{go_id} — {name}" if name else go_id
     return f"[{label}]({url})"
+# ———————————————————  FUNÇÃO PRINCIPAL DE RESULTADOS  ——————————————————— #
 def mostrar(header, y_pred):
+    """Mostra resultados (botão UniProt + duas colunas)."""
+    prot_id  = header.split()[0]
+    prot_url = f"https://www.uniprot.org/uniprotkb/{prot_id}"
+    with st.expander(header, expanded=True):
+        # Botão «Visitar UniProt»
+        st.markdown(
+            f"""
+            <div style="text-align:right; margin-bottom:0.5rem;">
+              <a href="{prot_url}" target="_blank">
+                 <button style="
+                     background:#2b8cbe;border:none;border-radius:4px;
+                     padding:0.35rem 0.8rem;color:#fff;font-size:0.9rem;
+                     cursor:pointer;">
+                     Visitar UniProt
+                 </button>
+              </a>
+            </div>
+            """,
+            unsafe_allow_html=True
+        )
         col1, col2 = st.columns(2)
+        # ——— coluna 1 : termos acima do threshold
         with col1:
             st.markdown(f"**GO terms com prob ≥ {THRESH}**")
             hits = mlb.inverse_transform((y_pred >= THRESH).astype(int))[0]
             if hits:
                 for go_id in hits:
                     name, defin = GO_INFO.get(go_id, ("— sem nome —", ""))
+                    defin = re.sub(r'^\\s*"?(.+?)"?\\s*(\\[[^\\]]*\\])?\\s*$', r'\\1',
                                    defin or "")
                     st.markdown(f"- {go_link(go_id, name)}")
                     if defin:
             else:
                 st.code("— nenhum —")
+        # ——— coluna 2 : top-N mais prováveis
         with col2:
             st.markdown(f"**Top {TOP_N} GO terms mais prováveis**")
             for rank, idx in enumerate(np.argsort(-y_pred[0])[:TOP_N], start=1):
     for header, seq in parsed_seqs:
         with st.spinner(f"A processar {header}… (pode demorar alguns minutos)"):
+            # ————————————  EMBEDDINGS  ———————————— #
             emb_pb  = embed_seq(FINETUNED_PB,  seq, CHUNK_PB)
             emb_bfd = embed_seq(FINETUNED_BFD, seq, CHUNK_PB)
             emb_esm = embed_seq(BASE_ESM,       seq, CHUNK_ESM)
+            # ————————————  PREDIÇÕES MLPs  ———————————— #
             y_pb  = mlp_pb.predict(emb_pb)
             y_bfd = mlp_bfd.predict(emb_bfd)
             y_esm = mlp_esm.predict(emb_esm)[:, :597]  # alinhar nº de termos
+            # ————————————  STACKING  ———————————— #
             X     = np.concatenate([y_pb, y_bfd, y_esm], axis=1)
             y_ens = stacking.predict(X)
         mostrar(header, y_ens)
+# ———————————————————  LISTA COMPLETA DE TERMOS SUPORTADOS  ——————————————————— #
 with st.expander("Mostrar lista completa dos 597 GO terms possíveis", expanded=False):
     cols = st.columns(3)
     for i, go_id in enumerate(GO):