T Le commited on
Commit
e0f3355
·
1 Parent(s): ea7c172

Update to latest version

Browse files
pages/10 WordCloud.py CHANGED
@@ -2,6 +2,7 @@ import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  from wordcloud import WordCloud
 
5
 
6
  # ===config===
7
  st.set_page_config(
@@ -33,7 +34,6 @@ with st.popover("🔗 Menu"):
33
  st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
34
  st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
35
  st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
36
- st.page_link("pages/Rtest.py",label = "rtesting")
37
 
38
  st.header("Wordcloud", anchor=False)
39
  st.subheader('Put your file here...', anchor=False)
@@ -59,21 +59,57 @@ def reset_all():
59
  st.cache_data.clear()
60
 
61
  #===text reading===
 
62
  def read_txt(intext):
63
  return (intext.read()).decode()
64
 
65
- #===csv reading===
66
- def read_csv(uploaded_file):
67
- fulltexts = pd.read_csv(uploaded_file)
68
- fulltexts.rename(columns={fulltexts.columns[0]: "texts"}, inplace = True)
69
- return fulltexts
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
 
 
 
71
 
72
- #===Read data===
73
- uploaded_file = st.file_uploader('', type=['txt'], on_change=reset_all)
74
-
 
 
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
 
 
 
77
  if uploaded_file is not None:
78
 
79
  tab1, tab2, tab3 = st.tabs(["📈 Generate visualization", "📃 Reference", "⬇️ Download Help"])
@@ -81,26 +117,36 @@ if uploaded_file is not None:
81
  with tab1:
82
  c1, c2 = st.columns(2)
83
 
 
84
  with c1:
85
- max_font = st.number_input("Max Font Size", min_value = 1, value = 100, on_change=reset_all)
86
- max_words = st.number_input("Max Word Count", min_value = 1, value = 250, on_change=reset_all)
87
- background = st.selectbox("Background color", ["white","black"], on_change=reset_all)
88
 
89
 
90
  with c2:
91
  words_to_remove = st.text_input("Remove specific words. Separate words by semicolons (;)")
92
  stopwords = words_to_remove.split(';')
93
- image_width = st.number_input("Image width", value = 400, on_change=reset_all)
94
- image_height = st.number_input("Image height", value = 200, on_change=reset_all)
95
- scale = st.number_input("Scale", value = 1, on_change=reset_all)
96
 
97
  try:
98
  extype = get_ext(uploaded_file)
99
 
100
  if extype.endswith(".txt"):
101
- if st.button("Submit"):
102
- fulltext = read_txt(uploaded_file)
 
 
 
103
 
 
 
 
 
 
 
104
  wordcloud = WordCloud(max_font_size = max_font,
105
  max_words = max_words,
106
  background_color=background,
@@ -114,11 +160,25 @@ if uploaded_file is not None:
114
  st.image(img, use_container_width=True)
115
 
116
  elif extype.endswith(".csv"):
117
- texts = read_csv(uploaded_file)
118
 
 
119
 
120
-
 
 
 
 
 
 
 
 
 
 
 
121
 
 
 
122
 
123
  except Exception as e:
124
  st.write(e)
 
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  from wordcloud import WordCloud
5
+ from tools import sourceformat as sf
6
 
7
  # ===config===
8
  st.set_page_config(
 
34
  st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
35
  st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
36
  st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
 
37
 
38
  st.header("Wordcloud", anchor=False)
39
  st.subheader('Put your file here...', anchor=False)
 
59
  st.cache_data.clear()
60
 
61
  #===text reading===
62
+ @st.cache_data(ttl=3600)
63
  def read_txt(intext):
64
  return (intext.read()).decode()
65
 
66
+ @st.cache_data(ttl=3600)
67
+ def conv_txt(extype):
68
+ if("PMID" in (uploaded_file.read()).decode()):
69
+ uploaded_file.seek(0)
70
+ papers = sf.medline(uploaded_file)
71
+ print(papers)
72
+ return papers
73
+ col_dict = {'TI': 'Title',
74
+ 'SO': 'Source title',
75
+ 'DE': 'Author Keywords',
76
+ 'DT': 'Document Type',
77
+ 'AB': 'Abstract',
78
+ 'TC': 'Cited by',
79
+ 'PY': 'Year',
80
+ 'ID': 'Keywords Plus',
81
+ 'rights_date_used': 'Year'}
82
+ uploaded_file.seek(0)
83
+ papers = pd.read_csv(uploaded_file, sep='\t')
84
 
85
+ #if text just has one column (or is not csv) return nothing
86
+ if(len(papers.columns)==1):
87
+ return
88
 
89
+ if("htid" in papers.columns):
90
+ papers = sf.htrc(papers)
91
+ papers.rename(columns=col_dict, inplace=True)
92
+ print(papers)
93
+ return papers
94
 
95
+ #===csv reading===
96
+ @st.cache_data(ttl=3600)
97
+ def upload(file):
98
+ papers = pd.read_csv(uploaded_file)
99
+ if "About the data" in papers.columns[0]:
100
+ papers = sf.dim(papers)
101
+ col_dict = {'MeSH terms': 'Keywords',
102
+ 'PubYear': 'Year',
103
+ 'Times cited': 'Cited by',
104
+ 'Publication Type': 'Document Type'
105
+ }
106
+ papers.rename(columns=col_dict, inplace=True)
107
+
108
+ return papers
109
 
110
+ #===Read data===
111
+ uploaded_file = st.file_uploader('', type=['txt','csv'], on_change=reset_all)
112
+
113
  if uploaded_file is not None:
114
 
115
  tab1, tab2, tab3 = st.tabs(["📈 Generate visualization", "📃 Reference", "⬇️ Download Help"])
 
117
  with tab1:
118
  c1, c2 = st.columns(2)
119
 
120
+
121
  with c1:
122
+ max_font = st.number_input("Max Font Size", min_value = 1, value = 100)
123
+ max_words = st.number_input("Max Word Count", min_value = 1, value = 250)
124
+ background = st.selectbox("Background color", ["white","black"])
125
 
126
 
127
  with c2:
128
  words_to_remove = st.text_input("Remove specific words. Separate words by semicolons (;)")
129
  stopwords = words_to_remove.split(';')
130
+ image_width = st.number_input("Image width", value = 400)
131
+ image_height = st.number_input("Image height", value = 200)
132
+ scale = st.number_input("Scale", value = 1)
133
 
134
  try:
135
  extype = get_ext(uploaded_file)
136
 
137
  if extype.endswith(".txt"):
138
+
139
+ try:
140
+ texts = conv_txt(uploaded_file)
141
+ colcho = c1.selectbox("Choose Column", list(texts))
142
+ fulltext = " ".join(list(texts[colcho]))
143
 
144
+ except:
145
+ fulltext = read_txt(uploaded_file)
146
+
147
+ if st.button("Submit"):
148
+
149
+
150
  wordcloud = WordCloud(max_font_size = max_font,
151
  max_words = max_words,
152
  background_color=background,
 
160
  st.image(img, use_container_width=True)
161
 
162
  elif extype.endswith(".csv"):
163
+ texts = upload(uploaded_file)
164
 
165
+ colcho = c1.selectbox("Choose Column", list(texts))
166
 
167
+ fullcolumn = " ".join(list(texts[colcho]))
168
+
169
+ if st.button("Submit"):
170
+
171
+ wordcloud = WordCloud(max_font_size = max_font,
172
+ max_words = max_words,
173
+ background_color=background,
174
+ stopwords = stopwords,
175
+ height = image_height,
176
+ width = image_width,
177
+ scale = scale).generate(fullcolumn)
178
+ img = wordcloud.to_image()
179
 
180
+ with st.container(border=True):
181
+ st.image(img, use_container_width=True)
182
 
183
  except Exception as e:
184
  st.write(e)
pages/9 Summarization.py CHANGED
@@ -96,9 +96,9 @@ if uploaded_file is not None:
96
  max_length = st.number_input("Maximum length", min_value = 1)
97
 
98
  if method == "Extractive":
99
- ex_method = st.selectbox("Extractive method", ("t5","Spacy PyTextRank"))
100
- if ex_method == "Spacy PyTextRank":
101
- phrase_limit = st.number_input("Phrase length limit", min_value = 0)
102
  sentence_limit = st.number_input("Sentence limit", min_value = 0)
103
  elif ex_method == "t5" or ex_method == "FalconsAI t5":
104
  min_length = st.number_input("Minimum length", min_value = 0)
@@ -154,6 +154,8 @@ if uploaded_file is not None:
154
  summary = summed[0]["summary_text"]
155
  return summary
156
 
 
 
157
  def transformersum(text,model):
158
  summarizer = pipeline("summarization", model = model)
159
  summed = summarizer(text, max_length = max_length, min_length = min_length, do_sample = False)
@@ -196,7 +198,7 @@ if uploaded_file is not None:
196
  st.write(fulltext)
197
 
198
  if method == "Extractive":
199
- if(ex_method == "Spacy PyTextRank"):
200
  summary = SpacyRank(fulltext)
201
  elif(ex_method == "t5"):
202
  summary = t5summ(fulltext)
@@ -226,7 +228,7 @@ if uploaded_file is not None:
226
  scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
227
  rougescores = scorer.score(reference, candidate)
228
 
229
- st.write(f"BLEU Score (NLTK): {BLEuscore:.2f}")
230
  st.write(f"ROUGE-1 F1 Score: {rougescores['rouge1'].fmeasure:.2f}")
231
 
232
  text_file = summary
@@ -239,7 +241,7 @@ if uploaded_file is not None:
239
 
240
  elif(extype.endswith(".csv")):
241
  if method == "Extractive":
242
- if(ex_method == "Spacy PyTextRank"):
243
  summaries = texts['texts'].apply(SpacyRank)
244
  fullnsums = summaries.to_frame()
245
  fullnsums['full'] = texts['texts']
@@ -293,25 +295,20 @@ if uploaded_file is not None:
293
  label = "Download scores and results",
294
  data = result2,
295
  file_name = "ScoredSummaries.csv",
296
- mime = "test\csv",
297
  on_click = "ignore"
298
  )
299
 
300
-
301
-
302
-
303
-
304
  #do this
305
  with tab2:
306
  st.write("")
307
 
308
  with tab3:
309
- st.header("Summarization result (.txt)")
310
  st.write("Click the download button (example) to get the text file result")
311
  st.button(label = "Download Results")
312
 
313
 
314
- except Exception as e:
315
- st.write(e)
316
  st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
317
  st.stop()
 
96
  max_length = st.number_input("Maximum length", min_value = 1)
97
 
98
  if method == "Extractive":
99
+ ex_method = st.selectbox("Extractive method", ("t5","PyTextRank"))
100
+ if ex_method == "PyTextRank":
101
+ phrase_limit = st.number_input("Phrase limit", min_value = 0)
102
  sentence_limit = st.number_input("Sentence limit", min_value = 0)
103
  elif ex_method == "t5" or ex_method == "FalconsAI t5":
104
  min_length = st.number_input("Minimum length", min_value = 0)
 
154
  summary = summed[0]["summary_text"]
155
  return summary
156
 
157
+ #used for any other huggingface model not used above
158
+
159
  def transformersum(text,model):
160
  summarizer = pipeline("summarization", model = model)
161
  summed = summarizer(text, max_length = max_length, min_length = min_length, do_sample = False)
 
198
  st.write(fulltext)
199
 
200
  if method == "Extractive":
201
+ if(ex_method == "PyTextRank"):
202
  summary = SpacyRank(fulltext)
203
  elif(ex_method == "t5"):
204
  summary = t5summ(fulltext)
 
228
  scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
229
  rougescores = scorer.score(reference, candidate)
230
 
231
+ st.write(f"BLEU Score: {BLEuscore:.2f}")
232
  st.write(f"ROUGE-1 F1 Score: {rougescores['rouge1'].fmeasure:.2f}")
233
 
234
  text_file = summary
 
241
 
242
  elif(extype.endswith(".csv")):
243
  if method == "Extractive":
244
+ if(ex_method == "PyTextRank"):
245
  summaries = texts['texts'].apply(SpacyRank)
246
  fullnsums = summaries.to_frame()
247
  fullnsums['full'] = texts['texts']
 
295
  label = "Download scores and results",
296
  data = result2,
297
  file_name = "ScoredSummaries.csv",
298
+ mime = "text\csv",
299
  on_click = "ignore"
300
  )
301
 
 
 
 
 
302
  #do this
303
  with tab2:
304
  st.write("")
305
 
306
  with tab3:
307
+ st.header("Summarization result")
308
  st.write("Click the download button (example) to get the text file result")
309
  st.button(label = "Download Results")
310
 
311
 
312
+ except Exception:
 
313
  st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
314
  st.stop()
pages/Rtest.py CHANGED
@@ -53,8 +53,8 @@ if st.button("Test"):
53
  process2 = subprocess.Popen(["Rscript", "pages/testr.R"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
54
  result2 = process2.communicate()
55
  st.write(result2)
56
- #with st.container(border=True):
57
- #st.image('plot.png')
58
 
59
 
60
 
 
53
  process2 = subprocess.Popen(["Rscript", "pages/testr.R"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
54
  result2 = process2.communicate()
55
  st.write(result2)
56
+ with st.container(border=True):
57
+ st.image('plot.png')
58
 
59
 
60
 
pages/testr.R CHANGED
@@ -1 +1,7 @@
1
- print('loaded script')
 
 
 
 
 
 
 
1
+ library(ggplot2)
2
+
3
+ ggplot(mtcars,aes(mpg,wt))+
4
+ geom_point()
5
+
6
+ ggsave('plot.png')
7
+