Spaces:
Sleeping
Sleeping
T Le
commited on
Commit
·
cd8b282
1
Parent(s):
d054553
Topic modeling model update
Browse files
pages/2 Topic Modeling.py
CHANGED
|
@@ -196,7 +196,7 @@ if uploaded_file is not None:
|
|
| 196 |
method = c1.selectbox(
|
| 197 |
'Choose method',
|
| 198 |
('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
|
| 199 |
-
ColCho = c2.selectbox('Choose column', (
|
| 200 |
num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
|
| 201 |
|
| 202 |
d1, d2 = st.columns([3,7])
|
|
@@ -235,8 +235,8 @@ if uploaded_file is not None:
|
|
| 235 |
if fine_tuning:
|
| 236 |
topic_labelling = st.toggle("Automatic topic labelling")
|
| 237 |
if topic_labelling:
|
| 238 |
-
|
| 239 |
-
if
|
| 240 |
api_key = st.text_input("API Key")
|
| 241 |
|
| 242 |
else:
|
|
@@ -527,21 +527,36 @@ if uploaded_file is not None:
|
|
| 527 |
"MMR": mmr,
|
| 528 |
}
|
| 529 |
if topic_labelling:
|
| 530 |
-
if
|
| 531 |
client = openai.OpenAI(api_key=api_key)
|
| 532 |
representation_model = {
|
| 533 |
"KeyBERT": keybert,
|
| 534 |
"MMR": mmr,
|
| 535 |
"test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
|
| 536 |
}
|
| 537 |
-
elif
|
| 538 |
-
|
| 539 |
-
clientmod = TextGeneration(
|
| 540 |
representation_model = {
|
| 541 |
"KeyBERT": keybert,
|
| 542 |
"MMR": mmr,
|
| 543 |
"test": clientmod
|
| 544 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
|
| 546 |
vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
|
| 547 |
topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
|
|
|
|
| 196 |
method = c1.selectbox(
|
| 197 |
'Choose method',
|
| 198 |
('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
|
| 199 |
+
ColCho = c2.selectbox('Choose column', (["Title","Abstract"]))
|
| 200 |
num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
|
| 201 |
|
| 202 |
d1, d2 = st.columns([3,7])
|
|
|
|
| 235 |
if fine_tuning:
|
| 236 |
topic_labelling = st.toggle("Automatic topic labelling")
|
| 237 |
if topic_labelling:
|
| 238 |
+
llm_model = st.selectbox("Model",["OpenAI/gpt-4o","Google/Flan-t5","OpenAI/gpt-oss"])
|
| 239 |
+
if llm_model == "OpenAI/gpt-4o":
|
| 240 |
api_key = st.text_input("API Key")
|
| 241 |
|
| 242 |
else:
|
|
|
|
| 527 |
"MMR": mmr,
|
| 528 |
}
|
| 529 |
if topic_labelling:
|
| 530 |
+
if llm_model == "OpenAI/gpt-4o":
|
| 531 |
client = openai.OpenAI(api_key=api_key)
|
| 532 |
representation_model = {
|
| 533 |
"KeyBERT": keybert,
|
| 534 |
"MMR": mmr,
|
| 535 |
"test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
|
| 536 |
}
|
| 537 |
+
elif llm_model == "Google/Flan-t5":
|
| 538 |
+
gen = pipeline("text2text-generation", model = "google/flan-t5-base")
|
| 539 |
+
clientmod = TextGeneration(gen)
|
| 540 |
representation_model = {
|
| 541 |
"KeyBERT": keybert,
|
| 542 |
"MMR": mmr,
|
| 543 |
"test": clientmod
|
| 544 |
}
|
| 545 |
+
elif llm_model == "OpenAI/gpt-oss":
|
| 546 |
+
gen = pipeline("text-generation",
|
| 547 |
+
model = "openai/gpt-oss-20b",
|
| 548 |
+
torch_dtype = "auto",
|
| 549 |
+
device_map = "auto",
|
| 550 |
+
)
|
| 551 |
+
clientmod = TextGeneration(gen)
|
| 552 |
+
|
| 553 |
+
representation_model = {
|
| 554 |
+
"KeyBERT": keybert,
|
| 555 |
+
"MMR": mmr,
|
| 556 |
+
"test": gen
|
| 557 |
+
}
|
| 558 |
+
|
| 559 |
+
|
| 560 |
|
| 561 |
vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
|
| 562 |
topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
|
tools/__pycache__/sourceformat.cpython-310.pyc
DELETED
|
Binary file (5.74 kB)
|
|
|