Spaces:
Sleeping
Sleeping
T Le
commited on
Commit
·
e0f3355
1
Parent(s):
ea7c172
Update to latest version
Browse files- pages/10 WordCloud.py +79 -19
- pages/9 Summarization.py +11 -14
- pages/Rtest.py +2 -2
- pages/testr.R +7 -1
pages/10 WordCloud.py
CHANGED
|
@@ -2,6 +2,7 @@ import streamlit as st
|
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
from wordcloud import WordCloud
|
|
|
|
| 5 |
|
| 6 |
# ===config===
|
| 7 |
st.set_page_config(
|
|
@@ -33,7 +34,6 @@ with st.popover("🔗 Menu"):
|
|
| 33 |
st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
|
| 34 |
st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
|
| 35 |
st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
|
| 36 |
-
st.page_link("pages/Rtest.py",label = "rtesting")
|
| 37 |
|
| 38 |
st.header("Wordcloud", anchor=False)
|
| 39 |
st.subheader('Put your file here...', anchor=False)
|
|
@@ -59,21 +59,57 @@ def reset_all():
|
|
| 59 |
st.cache_data.clear()
|
| 60 |
|
| 61 |
#===text reading===
|
|
|
|
| 62 |
def read_txt(intext):
|
| 63 |
return (intext.read()).decode()
|
| 64 |
|
| 65 |
-
|
| 66 |
-
def
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
|
|
|
|
|
|
|
|
|
| 77 |
if uploaded_file is not None:
|
| 78 |
|
| 79 |
tab1, tab2, tab3 = st.tabs(["📈 Generate visualization", "📃 Reference", "⬇️ Download Help"])
|
|
@@ -81,26 +117,36 @@ if uploaded_file is not None:
|
|
| 81 |
with tab1:
|
| 82 |
c1, c2 = st.columns(2)
|
| 83 |
|
|
|
|
| 84 |
with c1:
|
| 85 |
-
max_font = st.number_input("Max Font Size", min_value = 1, value = 100
|
| 86 |
-
max_words = st.number_input("Max Word Count", min_value = 1, value = 250
|
| 87 |
-
background = st.selectbox("Background color", ["white","black"]
|
| 88 |
|
| 89 |
|
| 90 |
with c2:
|
| 91 |
words_to_remove = st.text_input("Remove specific words. Separate words by semicolons (;)")
|
| 92 |
stopwords = words_to_remove.split(';')
|
| 93 |
-
image_width = st.number_input("Image width", value = 400
|
| 94 |
-
image_height = st.number_input("Image height", value = 200
|
| 95 |
-
scale = st.number_input("Scale", value = 1
|
| 96 |
|
| 97 |
try:
|
| 98 |
extype = get_ext(uploaded_file)
|
| 99 |
|
| 100 |
if extype.endswith(".txt"):
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
wordcloud = WordCloud(max_font_size = max_font,
|
| 105 |
max_words = max_words,
|
| 106 |
background_color=background,
|
|
@@ -114,11 +160,25 @@ if uploaded_file is not None:
|
|
| 114 |
st.image(img, use_container_width=True)
|
| 115 |
|
| 116 |
elif extype.endswith(".csv"):
|
| 117 |
-
texts =
|
| 118 |
|
|
|
|
| 119 |
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
|
|
|
|
|
|
| 122 |
|
| 123 |
except Exception as e:
|
| 124 |
st.write(e)
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
from wordcloud import WordCloud
|
| 5 |
+
from tools import sourceformat as sf
|
| 6 |
|
| 7 |
# ===config===
|
| 8 |
st.set_page_config(
|
|
|
|
| 34 |
st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
|
| 35 |
st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
|
| 36 |
st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
|
|
|
|
| 37 |
|
| 38 |
st.header("Wordcloud", anchor=False)
|
| 39 |
st.subheader('Put your file here...', anchor=False)
|
|
|
|
| 59 |
st.cache_data.clear()
|
| 60 |
|
| 61 |
#===text reading===
|
| 62 |
+
@st.cache_data(ttl=3600)
|
| 63 |
def read_txt(intext):
|
| 64 |
return (intext.read()).decode()
|
| 65 |
|
| 66 |
+
@st.cache_data(ttl=3600)
|
| 67 |
+
def conv_txt(extype):
|
| 68 |
+
if("PMID" in (uploaded_file.read()).decode()):
|
| 69 |
+
uploaded_file.seek(0)
|
| 70 |
+
papers = sf.medline(uploaded_file)
|
| 71 |
+
print(papers)
|
| 72 |
+
return papers
|
| 73 |
+
col_dict = {'TI': 'Title',
|
| 74 |
+
'SO': 'Source title',
|
| 75 |
+
'DE': 'Author Keywords',
|
| 76 |
+
'DT': 'Document Type',
|
| 77 |
+
'AB': 'Abstract',
|
| 78 |
+
'TC': 'Cited by',
|
| 79 |
+
'PY': 'Year',
|
| 80 |
+
'ID': 'Keywords Plus',
|
| 81 |
+
'rights_date_used': 'Year'}
|
| 82 |
+
uploaded_file.seek(0)
|
| 83 |
+
papers = pd.read_csv(uploaded_file, sep='\t')
|
| 84 |
|
| 85 |
+
#if text just has one column (or is not csv) return nothing
|
| 86 |
+
if(len(papers.columns)==1):
|
| 87 |
+
return
|
| 88 |
|
| 89 |
+
if("htid" in papers.columns):
|
| 90 |
+
papers = sf.htrc(papers)
|
| 91 |
+
papers.rename(columns=col_dict, inplace=True)
|
| 92 |
+
print(papers)
|
| 93 |
+
return papers
|
| 94 |
|
| 95 |
+
#===csv reading===
|
| 96 |
+
@st.cache_data(ttl=3600)
|
| 97 |
+
def upload(file):
|
| 98 |
+
papers = pd.read_csv(uploaded_file)
|
| 99 |
+
if "About the data" in papers.columns[0]:
|
| 100 |
+
papers = sf.dim(papers)
|
| 101 |
+
col_dict = {'MeSH terms': 'Keywords',
|
| 102 |
+
'PubYear': 'Year',
|
| 103 |
+
'Times cited': 'Cited by',
|
| 104 |
+
'Publication Type': 'Document Type'
|
| 105 |
+
}
|
| 106 |
+
papers.rename(columns=col_dict, inplace=True)
|
| 107 |
+
|
| 108 |
+
return papers
|
| 109 |
|
| 110 |
+
#===Read data===
|
| 111 |
+
uploaded_file = st.file_uploader('', type=['txt','csv'], on_change=reset_all)
|
| 112 |
+
|
| 113 |
if uploaded_file is not None:
|
| 114 |
|
| 115 |
tab1, tab2, tab3 = st.tabs(["📈 Generate visualization", "📃 Reference", "⬇️ Download Help"])
|
|
|
|
| 117 |
with tab1:
|
| 118 |
c1, c2 = st.columns(2)
|
| 119 |
|
| 120 |
+
|
| 121 |
with c1:
|
| 122 |
+
max_font = st.number_input("Max Font Size", min_value = 1, value = 100)
|
| 123 |
+
max_words = st.number_input("Max Word Count", min_value = 1, value = 250)
|
| 124 |
+
background = st.selectbox("Background color", ["white","black"])
|
| 125 |
|
| 126 |
|
| 127 |
with c2:
|
| 128 |
words_to_remove = st.text_input("Remove specific words. Separate words by semicolons (;)")
|
| 129 |
stopwords = words_to_remove.split(';')
|
| 130 |
+
image_width = st.number_input("Image width", value = 400)
|
| 131 |
+
image_height = st.number_input("Image height", value = 200)
|
| 132 |
+
scale = st.number_input("Scale", value = 1)
|
| 133 |
|
| 134 |
try:
|
| 135 |
extype = get_ext(uploaded_file)
|
| 136 |
|
| 137 |
if extype.endswith(".txt"):
|
| 138 |
+
|
| 139 |
+
try:
|
| 140 |
+
texts = conv_txt(uploaded_file)
|
| 141 |
+
colcho = c1.selectbox("Choose Column", list(texts))
|
| 142 |
+
fulltext = " ".join(list(texts[colcho]))
|
| 143 |
|
| 144 |
+
except:
|
| 145 |
+
fulltext = read_txt(uploaded_file)
|
| 146 |
+
|
| 147 |
+
if st.button("Submit"):
|
| 148 |
+
|
| 149 |
+
|
| 150 |
wordcloud = WordCloud(max_font_size = max_font,
|
| 151 |
max_words = max_words,
|
| 152 |
background_color=background,
|
|
|
|
| 160 |
st.image(img, use_container_width=True)
|
| 161 |
|
| 162 |
elif extype.endswith(".csv"):
|
| 163 |
+
texts = upload(uploaded_file)
|
| 164 |
|
| 165 |
+
colcho = c1.selectbox("Choose Column", list(texts))
|
| 166 |
|
| 167 |
+
fullcolumn = " ".join(list(texts[colcho]))
|
| 168 |
+
|
| 169 |
+
if st.button("Submit"):
|
| 170 |
+
|
| 171 |
+
wordcloud = WordCloud(max_font_size = max_font,
|
| 172 |
+
max_words = max_words,
|
| 173 |
+
background_color=background,
|
| 174 |
+
stopwords = stopwords,
|
| 175 |
+
height = image_height,
|
| 176 |
+
width = image_width,
|
| 177 |
+
scale = scale).generate(fullcolumn)
|
| 178 |
+
img = wordcloud.to_image()
|
| 179 |
|
| 180 |
+
with st.container(border=True):
|
| 181 |
+
st.image(img, use_container_width=True)
|
| 182 |
|
| 183 |
except Exception as e:
|
| 184 |
st.write(e)
|
pages/9 Summarization.py
CHANGED
|
@@ -96,9 +96,9 @@ if uploaded_file is not None:
|
|
| 96 |
max_length = st.number_input("Maximum length", min_value = 1)
|
| 97 |
|
| 98 |
if method == "Extractive":
|
| 99 |
-
ex_method = st.selectbox("Extractive method", ("t5","
|
| 100 |
-
if ex_method == "
|
| 101 |
-
phrase_limit = st.number_input("Phrase
|
| 102 |
sentence_limit = st.number_input("Sentence limit", min_value = 0)
|
| 103 |
elif ex_method == "t5" or ex_method == "FalconsAI t5":
|
| 104 |
min_length = st.number_input("Minimum length", min_value = 0)
|
|
@@ -154,6 +154,8 @@ if uploaded_file is not None:
|
|
| 154 |
summary = summed[0]["summary_text"]
|
| 155 |
return summary
|
| 156 |
|
|
|
|
|
|
|
| 157 |
def transformersum(text,model):
|
| 158 |
summarizer = pipeline("summarization", model = model)
|
| 159 |
summed = summarizer(text, max_length = max_length, min_length = min_length, do_sample = False)
|
|
@@ -196,7 +198,7 @@ if uploaded_file is not None:
|
|
| 196 |
st.write(fulltext)
|
| 197 |
|
| 198 |
if method == "Extractive":
|
| 199 |
-
if(ex_method == "
|
| 200 |
summary = SpacyRank(fulltext)
|
| 201 |
elif(ex_method == "t5"):
|
| 202 |
summary = t5summ(fulltext)
|
|
@@ -226,7 +228,7 @@ if uploaded_file is not None:
|
|
| 226 |
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
|
| 227 |
rougescores = scorer.score(reference, candidate)
|
| 228 |
|
| 229 |
-
st.write(f"BLEU Score
|
| 230 |
st.write(f"ROUGE-1 F1 Score: {rougescores['rouge1'].fmeasure:.2f}")
|
| 231 |
|
| 232 |
text_file = summary
|
|
@@ -239,7 +241,7 @@ if uploaded_file is not None:
|
|
| 239 |
|
| 240 |
elif(extype.endswith(".csv")):
|
| 241 |
if method == "Extractive":
|
| 242 |
-
if(ex_method == "
|
| 243 |
summaries = texts['texts'].apply(SpacyRank)
|
| 244 |
fullnsums = summaries.to_frame()
|
| 245 |
fullnsums['full'] = texts['texts']
|
|
@@ -293,25 +295,20 @@ if uploaded_file is not None:
|
|
| 293 |
label = "Download scores and results",
|
| 294 |
data = result2,
|
| 295 |
file_name = "ScoredSummaries.csv",
|
| 296 |
-
mime = "
|
| 297 |
on_click = "ignore"
|
| 298 |
)
|
| 299 |
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
#do this
|
| 305 |
with tab2:
|
| 306 |
st.write("")
|
| 307 |
|
| 308 |
with tab3:
|
| 309 |
-
st.header("Summarization result
|
| 310 |
st.write("Click the download button (example) to get the text file result")
|
| 311 |
st.button(label = "Download Results")
|
| 312 |
|
| 313 |
|
| 314 |
-
except Exception
|
| 315 |
-
st.write(e)
|
| 316 |
st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
|
| 317 |
st.stop()
|
|
|
|
| 96 |
max_length = st.number_input("Maximum length", min_value = 1)
|
| 97 |
|
| 98 |
if method == "Extractive":
|
| 99 |
+
ex_method = st.selectbox("Extractive method", ("t5","PyTextRank"))
|
| 100 |
+
if ex_method == "PyTextRank":
|
| 101 |
+
phrase_limit = st.number_input("Phrase limit", min_value = 0)
|
| 102 |
sentence_limit = st.number_input("Sentence limit", min_value = 0)
|
| 103 |
elif ex_method == "t5" or ex_method == "FalconsAI t5":
|
| 104 |
min_length = st.number_input("Minimum length", min_value = 0)
|
|
|
|
| 154 |
summary = summed[0]["summary_text"]
|
| 155 |
return summary
|
| 156 |
|
| 157 |
+
#used for any other huggingface model not used above
|
| 158 |
+
|
| 159 |
def transformersum(text,model):
|
| 160 |
summarizer = pipeline("summarization", model = model)
|
| 161 |
summed = summarizer(text, max_length = max_length, min_length = min_length, do_sample = False)
|
|
|
|
| 198 |
st.write(fulltext)
|
| 199 |
|
| 200 |
if method == "Extractive":
|
| 201 |
+
if(ex_method == "PyTextRank"):
|
| 202 |
summary = SpacyRank(fulltext)
|
| 203 |
elif(ex_method == "t5"):
|
| 204 |
summary = t5summ(fulltext)
|
|
|
|
| 228 |
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
|
| 229 |
rougescores = scorer.score(reference, candidate)
|
| 230 |
|
| 231 |
+
st.write(f"BLEU Score: {BLEuscore:.2f}")
|
| 232 |
st.write(f"ROUGE-1 F1 Score: {rougescores['rouge1'].fmeasure:.2f}")
|
| 233 |
|
| 234 |
text_file = summary
|
|
|
|
| 241 |
|
| 242 |
elif(extype.endswith(".csv")):
|
| 243 |
if method == "Extractive":
|
| 244 |
+
if(ex_method == "PyTextRank"):
|
| 245 |
summaries = texts['texts'].apply(SpacyRank)
|
| 246 |
fullnsums = summaries.to_frame()
|
| 247 |
fullnsums['full'] = texts['texts']
|
|
|
|
| 295 |
label = "Download scores and results",
|
| 296 |
data = result2,
|
| 297 |
file_name = "ScoredSummaries.csv",
|
| 298 |
+
mime = "text\csv",
|
| 299 |
on_click = "ignore"
|
| 300 |
)
|
| 301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
#do this
|
| 303 |
with tab2:
|
| 304 |
st.write("")
|
| 305 |
|
| 306 |
with tab3:
|
| 307 |
+
st.header("Summarization result")
|
| 308 |
st.write("Click the download button (example) to get the text file result")
|
| 309 |
st.button(label = "Download Results")
|
| 310 |
|
| 311 |
|
| 312 |
+
except Exception:
|
|
|
|
| 313 |
st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
|
| 314 |
st.stop()
|
pages/Rtest.py
CHANGED
|
@@ -53,8 +53,8 @@ if st.button("Test"):
|
|
| 53 |
process2 = subprocess.Popen(["Rscript", "pages/testr.R"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 54 |
result2 = process2.communicate()
|
| 55 |
st.write(result2)
|
| 56 |
-
|
| 57 |
-
|
| 58 |
|
| 59 |
|
| 60 |
|
|
|
|
| 53 |
process2 = subprocess.Popen(["Rscript", "pages/testr.R"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 54 |
result2 = process2.communicate()
|
| 55 |
st.write(result2)
|
| 56 |
+
with st.container(border=True):
|
| 57 |
+
st.image('plot.png')
|
| 58 |
|
| 59 |
|
| 60 |
|
pages/testr.R
CHANGED
|
@@ -1 +1,7 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
library(ggplot2)
|
| 2 |
+
|
| 3 |
+
ggplot(mtcars,aes(mpg,wt))+
|
| 4 |
+
geom_point()
|
| 5 |
+
|
| 6 |
+
ggsave('plot.png')
|
| 7 |
+
|