melvinalves commited on
Commit
2bea969
Β·
verified Β·
1 Parent(s): 5406932

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -28
app.py CHANGED
@@ -97,19 +97,24 @@ mlb = joblib.load(download_file("data/mlb_597.pkl"))
97
  GO = mlb.classes_
98
 
99
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” UI β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
100
- # --- aspecto geral da pΓ‘gina
101
  st.set_page_config(page_title="PrediΓ§Γ£o de FunΓ§Γ΅es Moleculares de ProteΓ­nas",
102
  page_icon="🧬", layout="centered")
103
 
104
- # CSS: fundo branco sΓ³lido + pequenos ajustes
105
  st.markdown(
106
  """
107
  <style>
108
- body, .stApp {
109
- background-color: #FFFFFF !important;
110
- }
111
  .block-container { padding-top: 1.5rem; }
112
  textarea { font-size: 0.9rem !important; }
 
 
 
 
 
 
 
 
113
  </style>
114
  """,
115
  unsafe_allow_html=True
@@ -118,7 +123,7 @@ st.markdown(
118
  # Logo (coloca logo.png na raiz do Space)
119
  LOGO_PATH = "logo.png"
120
  if os.path.exists(LOGO_PATH):
121
- st.image(LOGO_PATH, width=180)
122
 
123
  st.title("PrediΓ§Γ£o de FunΓ§Γ΅es Moleculares de ProteΓ­nas (GO:MF)")
124
 
@@ -136,51 +141,53 @@ def parse_fasta_multiple(fasta_str):
136
  if not entry.strip():
137
  continue
138
  lines = entry.strip().splitlines()
139
- if i > 0: # bloco tΓ­pico FASTA
140
  header = lines[0].strip()
141
  seq = "".join(lines[1:]).replace(" ", "").upper()
142
- else: # sequΓͺncia sem '>'
143
  header = f"Seq_{i+1}"
144
  seq = "".join(lines).replace(" ", "").upper()
145
  if seq:
146
  parsed.append((header, seq))
147
  return parsed
148
 
149
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” FUNÇÕES AUXILIARES DE LAYOUT β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
150
  def go_link(go_id, name=""):
151
- """Cria link para pΓ‘gina do GO term (QuickGO)."""
152
  url = f"https://www.ebi.ac.uk/QuickGO/term/{go_id}"
153
  label = f"{go_id} β€” {name}" if name else go_id
154
  return f"[{label}]({url})"
155
 
156
- def prot_link(header):
157
- """Tenta gerar link para UniProt usando o primeiro token do header."""
158
- pid = header.split()[0]
159
- url = f"https://www.uniprot.org/uniprotkb/{pid}"
160
- return f"[{header}]({url})"
 
 
 
 
 
 
161
 
162
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” FUNÇÃO PRINCIPAL DE RESULTADOS β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
163
- def mostrar(tag, y_pred):
164
- """Mostra resultados em duas colunas dentro de um expander."""
165
- with st.expander(tag, expanded=True):
166
  col1, col2 = st.columns(2)
167
 
168
- # β€”β€”β€” coluna 1 : termos acima do threshold
169
  with col1:
170
  st.markdown(f"**GO terms com prob β‰₯ {THRESH}**")
171
  hits = mlb.inverse_transform((y_pred >= THRESH).astype(int))[0]
172
  if hits:
173
  for go_id in hits:
174
  name, defin = GO_INFO.get(go_id, ("β€” sem nome β€”", ""))
175
- defin = re.sub(r'^\\s*"?(.+?)"?\\s*(\\[[^\\]]*\\])?\\s*$', r'\\1',
176
  defin or "")
177
- st.markdown(f"- {go_link(go_id, name)} ")
178
  if defin:
179
  st.caption(defin)
180
  else:
181
  st.code("β€” nenhum β€”")
182
 
183
- # β€”β€”β€” coluna 2 : top-N mais provΓ‘veis
184
  with col2:
185
  st.markdown(f"**Top {TOP_N} GO terms mais provΓ‘veis**")
186
  for rank, idx in enumerate(np.argsort(-y_pred[0])[:TOP_N], start=1):
@@ -197,23 +204,22 @@ if predict_clicked:
197
 
198
  for header, seq in parsed_seqs:
199
  with st.spinner(f"A processar {header}… (pode demorar alguns minutos)"):
200
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” EMBEDDINGS β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
201
  emb_pb = embed_seq(FINETUNED_PB, seq, CHUNK_PB)
202
  emb_bfd = embed_seq(FINETUNED_BFD, seq, CHUNK_PB)
203
  emb_esm = embed_seq(BASE_ESM, seq, CHUNK_ESM)
204
 
205
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” PREDIÇÕES MLPs β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
206
  y_pb = mlp_pb.predict(emb_pb)
207
  y_bfd = mlp_bfd.predict(emb_bfd)
208
  y_esm = mlp_esm.predict(emb_esm)[:, :597] # alinhar nΒΊ de termos
209
 
210
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” STACKING β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
211
  X = np.concatenate([y_pb, y_bfd, y_esm], axis=1)
212
  y_ens = stacking.predict(X)
213
 
214
- mostrar(prot_link(header), y_ens)
215
 
216
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” LISTA COMPLETA DE TERMOS SUPORTADOS β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
217
  with st.expander("Mostrar lista completa dos 597 GO terms possΓ­veis", expanded=False):
218
  cols = st.columns(3)
219
  for i, go_id in enumerate(GO):
 
97
  GO = mlb.classes_
98
 
99
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” UI β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
 
100
  st.set_page_config(page_title="PrediΓ§Γ£o de FunΓ§Γ΅es Moleculares de ProteΓ­nas",
101
  page_icon="🧬", layout="centered")
102
 
103
+ # CSS: fundo branco + logo + traΓ§o vertical entre colunas
104
  st.markdown(
105
  """
106
  <style>
107
+ body, .stApp { background-color: #FFFFFF !important; }
 
 
108
  .block-container { padding-top: 1.5rem; }
109
  textarea { font-size: 0.9rem !important; }
110
+
111
+ /* traΓ§o vertical: primeiro column do par recebe border-right */
112
+ div[data-testid="column"]:first-child {
113
+ border-right: 1px solid #E0E0E0;
114
+ padding-right: 1.2rem;
115
+ }
116
+ /* empurra logo ligeiramente para baixo */
117
+ img.logo { margin-top: 0.5rem; }
118
  </style>
119
  """,
120
  unsafe_allow_html=True
 
123
  # Logo (coloca logo.png na raiz do Space)
124
  LOGO_PATH = "logo.png"
125
  if os.path.exists(LOGO_PATH):
126
+ st.image(LOGO_PATH, width=180, output_format="auto", use_column_width=False, caption=None, clamp=False, channels="RGB", format="PNG", class_name="logo")
127
 
128
  st.title("PrediΓ§Γ£o de FunΓ§Γ΅es Moleculares de ProteΓ­nas (GO:MF)")
129
 
 
141
  if not entry.strip():
142
  continue
143
  lines = entry.strip().splitlines()
144
+ if i > 0:
145
  header = lines[0].strip()
146
  seq = "".join(lines[1:]).replace(" ", "").upper()
147
+ else:
148
  header = f"Seq_{i+1}"
149
  seq = "".join(lines).replace(" ", "").upper()
150
  if seq:
151
  parsed.append((header, seq))
152
  return parsed
153
 
154
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” FUNÇÕES AUXILIARES β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
155
  def go_link(go_id, name=""):
156
+ """Markdown link para QuickGO."""
157
  url = f"https://www.ebi.ac.uk/QuickGO/term/{go_id}"
158
  label = f"{go_id} β€” {name}" if name else go_id
159
  return f"[{label}]({url})"
160
 
161
+ def uni_url(header):
162
+ """URL UniProt (primeiro token do header)."""
163
+ return f"https://www.uniprot.org/uniprotkb/{header.split()[0]}"
164
+
165
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” MOSTRAR RESULTADOS β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
166
+ def mostrar(header, y_pred):
167
+ """Expander com botΓ£o UniProt + duas colunas de resultados."""
168
+ with st.expander(header, expanded=True):
169
+ # botΓ£o que abre UniProt noutra aba
170
+ st.link_button("πŸ”— Ver na UniProt", uni_url(header), type="primary")
171
+ st.markdown("---")
172
 
 
 
 
 
173
  col1, col2 = st.columns(2)
174
 
175
+ # --- coluna 1 : termos acima do threshold
176
  with col1:
177
  st.markdown(f"**GO terms com prob β‰₯ {THRESH}**")
178
  hits = mlb.inverse_transform((y_pred >= THRESH).astype(int))[0]
179
  if hits:
180
  for go_id in hits:
181
  name, defin = GO_INFO.get(go_id, ("β€” sem nome β€”", ""))
182
+ defin = re.sub(r'^\\s*\"?(.+?)\"?\\s*(\\[[^\\]]*\\])?\\s*$', r'\\1',
183
  defin or "")
184
+ st.markdown(f"- {go_link(go_id, name)}")
185
  if defin:
186
  st.caption(defin)
187
  else:
188
  st.code("β€” nenhum β€”")
189
 
190
+ # --- coluna 2 : top-N mais provΓ‘veis
191
  with col2:
192
  st.markdown(f"**Top {TOP_N} GO terms mais provΓ‘veis**")
193
  for rank, idx in enumerate(np.argsort(-y_pred[0])[:TOP_N], start=1):
 
204
 
205
  for header, seq in parsed_seqs:
206
  with st.spinner(f"A processar {header}… (pode demorar alguns minutos)"):
207
+ # ---------- EMBEDDINGS ----------
208
  emb_pb = embed_seq(FINETUNED_PB, seq, CHUNK_PB)
209
  emb_bfd = embed_seq(FINETUNED_BFD, seq, CHUNK_PB)
210
  emb_esm = embed_seq(BASE_ESM, seq, CHUNK_ESM)
211
 
212
+ # ---------- PREDIÇÕES ----------
213
  y_pb = mlp_pb.predict(emb_pb)
214
  y_bfd = mlp_bfd.predict(emb_bfd)
215
  y_esm = mlp_esm.predict(emb_esm)[:, :597] # alinhar nΒΊ de termos
216
 
 
217
  X = np.concatenate([y_pb, y_bfd, y_esm], axis=1)
218
  y_ens = stacking.predict(X)
219
 
220
+ mostrar(header, y_ens)
221
 
222
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” LISTA COMPLETA β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
223
  with st.expander("Mostrar lista completa dos 597 GO terms possΓ­veis", expanded=False):
224
  cols = st.columns(3)
225
  for i, go_id in enumerate(GO):