melvinalves commited on
Commit
bd36deb
Β·
verified Β·
1 Parent(s): a5e2965

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -37
app.py CHANGED
@@ -14,10 +14,10 @@ login(os.environ["HF_TOKEN"])
14
 
15
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” CONFIG β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
16
  SPACE_ID = "melvinalves/protein_function_prediction"
17
- TOP_N = 10
18
  THRESH = 0.37
19
- CHUNK_PB = 512 # janela ProtBERT / ProtBERT-BFD
20
- CHUNK_ESM = 1024 # janela ESM-2
21
 
22
  # repositΓ³rios HF
23
  FINETUNED_PB = ("melvinalves/FineTune", "fineTunedProtbert")
@@ -97,11 +97,25 @@ mlb = joblib.load(download_file("data/mlb_597.pkl"))
97
  GO = mlb.classes_
98
 
99
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” UI β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
100
- st.title("PrediΓ§Γ£o de FunΓ§Γ΅es Moleculares de ProteΓ­nas")
101
-
102
- # Pequeno ajuste de fonte no textarea
103
- st.markdown("<style> textarea { font-size: 0.9rem !important; } </style>",
104
- unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  fasta_input = st.text_area("Insere uma ou mais sequΓͺncias FASTA:", height=300)
107
  predict_clicked = st.button("Prever GO terms")
@@ -127,6 +141,48 @@ def parse_fasta_multiple(fasta_str):
127
  parsed.append((header, seq))
128
  return parsed
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” INFERÊNCIA β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
131
  if predict_clicked:
132
  parsed_seqs = parse_fasta_multiple(fasta_input)
@@ -150,32 +206,12 @@ if predict_clicked:
150
  X = np.concatenate([y_pb, y_bfd, y_esm], axis=1)
151
  y_ens = stacking.predict(X)
152
 
153
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” RESULTADOS β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
154
- def mostrar(tag, y_pred):
155
- with st.expander(tag, expanded=True):
156
- # GO terms acima do threshold
157
- st.markdown(f"**GO terms com prob β‰₯ {THRESH}**")
158
- hits = mlb.inverse_transform((y_pred >= THRESH).astype(int))[0]
159
- if hits:
160
- for go_id in hits:
161
- name, defin = GO_INFO.get(go_id, ("β€” sem nome β€”", ""))
162
- defin = re.sub(r'^\s*"?(.+?)"?\s*(\[[^\]]*\])?\s*$', r'\1',
163
- defin or "")
164
- st.write(f"**{go_id} β€” {name}**")
165
- st.caption(defin)
166
- else:
167
- st.code("β€” nenhum β€”")
168
-
169
- # Top-N mais provΓ‘veis
170
- st.markdown(f"**Top {TOP_N} GO terms mais provΓ‘veis**")
171
- for idx in np.argsort(-y_pred[0])[:TOP_N]:
172
- go_id = GO[idx]
173
- name, _ = GO_INFO.get(go_id, ("", ""))
174
- st.write(f"{go_id} β€” {name} : {y_pred[0][idx]:.4f}")
175
-
176
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” ESCOLHE QUAIS MOSTRAR β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
177
- # Descomenta se quiseres ver as saΓ­das individuais
178
- # mostrar(f"{header} β€” ProtBERT (MLP)", y_pb)
179
- # mostrar(f"{header} β€” ProtBERT-BFD (MLP)", y_bfd)
180
- # mostrar(f"{header} β€” ESM-2 (MLP)", y_esm)
181
- mostrar(header, y_ens) # ensemble
 
14
 
15
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” CONFIG β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
16
  SPACE_ID = "melvinalves/protein_function_prediction"
17
+ TOP_N = 20 # mostra agora top-20
18
  THRESH = 0.37
19
+ CHUNK_PB = 512 # janela ProtBERT / ProtBERT-BFD
20
+ CHUNK_ESM = 1024 # janela ESM-2
21
 
22
  # repositΓ³rios HF
23
  FINETUNED_PB = ("melvinalves/FineTune", "fineTunedProtbert")
 
97
  GO = mlb.classes_
98
 
99
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” UI β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
100
+ # --- aspecto geral
101
+ st.set_page_config(page_title="PrediΓ§Γ£o de FunΓ§Γ΅es Moleculares de ProteΓ­nas",
102
+ page_icon="🧬", layout="centered")
103
+
104
+ # fundo branco + pequenos ajustes de margem/padding
105
+ st.markdown("""
106
+ <style>
107
+ body { background-color: #FFFFFF; }
108
+ .block-container{ padding-top: 1.5rem; }
109
+ textarea { font-size: 0.9rem !important; }
110
+ </style>
111
+ """, unsafe_allow_html=True)
112
+
113
+ # logo (coloca um ficheiro logo.png na pasta raiz do Space)
114
+ LOGO_PATH = "logo.png"
115
+ if os.path.exists(LOGO_PATH):
116
+ st.image(LOGO_PATH, width=180)
117
+
118
+ st.title("PrediΓ§Γ£o de FunΓ§Γ΅es Moleculares de ProteΓ­nas (GO:MF)")
119
 
120
  fasta_input = st.text_area("Insere uma ou mais sequΓͺncias FASTA:", height=300)
121
  predict_clicked = st.button("Prever GO terms")
 
141
  parsed.append((header, seq))
142
  return parsed
143
 
144
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” FUNÇÕES AUXILIARES DE LAYOUT β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
145
+ def go_link(go_id, name=""):
146
+ """Cria link para pΓ‘gina do GO term (QuickGO)."""
147
+ url = f"https://www.ebi.ac.uk/QuickGO/term/{go_id}"
148
+ label = f"{go_id} β€” {name}" if name else go_id
149
+ return f"[{label}]({url})"
150
+
151
+ def prot_link(header):
152
+ """Tenta gerar link para UniProt usando o primeiro token do header."""
153
+ pid = header.split()[0]
154
+ url = f"https://www.uniprot.org/uniprotkb/{pid}"
155
+ return f"[{header}]({url})"
156
+
157
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” FUNÇÃO PRINCIPAL DE RESULTADOS β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
158
+ def mostrar(tag, y_pred):
159
+ """Mostra resultados em duas colunas dentro de um expander."""
160
+ with st.expander(tag, expanded=True):
161
+ col1, col2 = st.columns(2)
162
+
163
+ # β€”β€”β€” coluna 1 : termos acima do threshold
164
+ with col1:
165
+ st.markdown(f"**GO terms com prob β‰₯ {THRESH}**")
166
+ hits = mlb.inverse_transform((y_pred >= THRESH).astype(int))[0]
167
+ if hits:
168
+ for go_id in hits:
169
+ name, defin = GO_INFO.get(go_id, ("β€” sem nome β€”", ""))
170
+ defin = re.sub(r'^\\s*"?(.+?)"?\\s*(\\[[^\\]]*\\])?\\s*$', r'\\1',
171
+ defin or "")
172
+ st.markdown(f"- {go_link(go_id, name)} ")
173
+ if defin:
174
+ st.caption(defin)
175
+ else:
176
+ st.code("β€” nenhum β€”")
177
+
178
+ # β€”β€”β€” coluna 2 : top-N mais provΓ‘veis
179
+ with col2:
180
+ st.markdown(f"**Top {TOP_N} GO terms mais provΓ‘veis**")
181
+ for rank, idx in enumerate(np.argsort(-y_pred[0])[:TOP_N], start=1):
182
+ go_id = GO[idx]
183
+ name, _ = GO_INFO.get(go_id, ("", ""))
184
+ st.markdown(f"{rank}. {go_link(go_id, name)} : {y_pred[0][idx]:.4f}")
185
+
186
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” INFERÊNCIA β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
187
  if predict_clicked:
188
  parsed_seqs = parse_fasta_multiple(fasta_input)
 
206
  X = np.concatenate([y_pb, y_bfd, y_esm], axis=1)
207
  y_ens = stacking.predict(X)
208
 
209
+ # header como link para UniProt
210
+ mostrar(prot_link(header), y_ens)
211
+
212
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” LISTA COMPLETA DE TERMOS SUPORTADOS β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
213
+ with st.expander("Mostrar lista completa dos 597 GO terms possΓ­veis", expanded=False):
214
+ cols = st.columns(3)
215
+ for i, go_id in enumerate(GO):
216
+ name, _ = GO_INFO.get(go_id, ("", ""))
217
+ cols[i % 3].markdown(f"- {go_link(go_id, name)}")