Spaces:

haotle
/

LibTesting

Sleeping

App Files Files

T Le commited on Sep 4

Commit

e0f3355

1 Parent(s): ea7c172

Update to latest version

Browse files

Files changed (4) hide show

pages/10 WordCloud.py +79 -19
pages/9 Summarization.py +11 -14
pages/Rtest.py +2 -2
pages/testr.R +7 -1

pages/10 WordCloud.py CHANGED Viewed

@@ -2,6 +2,7 @@ import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
 from wordcloud import WordCloud
 # ===config===
 st.set_page_config(
@@ -33,7 +34,6 @@ with st.popover("🔗 Menu"):
     st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
     st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
     st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
-    st.page_link("pages/Rtest.py",label = "rtesting")
 st.header("Wordcloud", anchor=False)
 st.subheader('Put your file here...', anchor=False)
@@ -59,21 +59,57 @@ def reset_all():
     st.cache_data.clear()
 #===text reading===
 def read_txt(intext):
     return (intext.read()).decode()
-#===csv reading===
-def read_csv(uploaded_file):
-    fulltexts = pd.read_csv(uploaded_file)
-    fulltexts.rename(columns={fulltexts.columns[0]: "texts"}, inplace = True)
-    return fulltexts
-#===Read data===
-uploaded_file = st.file_uploader('', type=['txt'], on_change=reset_all)
 if uploaded_file is not None:
     tab1, tab2, tab3 = st.tabs(["📈 Generate visualization", "📃 Reference", "⬇️ Download Help"])
@@ -81,26 +117,36 @@ if uploaded_file is not None:
     with tab1:
         c1, c2 = st.columns(2)
         with c1:
-            max_font = st.number_input("Max Font Size", min_value = 1, value = 100, on_change=reset_all)
-            max_words = st.number_input("Max Word Count", min_value = 1, value = 250, on_change=reset_all)
-            background = st.selectbox("Background color", ["white","black"], on_change=reset_all)
         with c2:
             words_to_remove = st.text_input("Remove specific words. Separate words by semicolons (;)")
             stopwords = words_to_remove.split(';')
-            image_width = st.number_input("Image width", value = 400, on_change=reset_all)
-            image_height = st.number_input("Image height", value = 200, on_change=reset_all)
-            scale = st.number_input("Scale", value = 1, on_change=reset_all)
         try:
             extype = get_ext(uploaded_file)
             if extype.endswith(".txt"):
-                if st.button("Submit"):
-                    fulltext = read_txt(uploaded_file)
                     wordcloud = WordCloud(max_font_size = max_font,
                     max_words = max_words,
                     background_color=background,
@@ -114,11 +160,25 @@ if uploaded_file is not None:
                         st.image(img, use_container_width=True)
             elif extype.endswith(".csv"):
-                texts = read_csv(uploaded_file)
         except Exception as e:
             st.write(e)

 import pandas as pd
 import matplotlib.pyplot as plt
 from wordcloud import WordCloud
+from tools import sourceformat as sf
 # ===config===
 st.set_page_config(
     st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
     st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
     st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
 st.header("Wordcloud", anchor=False)
 st.subheader('Put your file here...', anchor=False)
     st.cache_data.clear()
 #===text reading===
+@st.cache_data(ttl=3600)
 def read_txt(intext):
     return (intext.read()).decode()
+@st.cache_data(ttl=3600)
+def conv_txt(extype):
+    if("PMID" in (uploaded_file.read()).decode()):
+        uploaded_file.seek(0)
+        papers = sf.medline(uploaded_file)
+        print(papers)
+        return papers
+    col_dict = {'TI': 'Title',
+            'SO': 'Source title',
+            'DE': 'Author Keywords',
+            'DT': 'Document Type',
+            'AB': 'Abstract',
+            'TC': 'Cited by',
+            'PY': 'Year',
+            'ID': 'Keywords Plus',
+            'rights_date_used': 'Year'}
+    uploaded_file.seek(0)
+    papers = pd.read_csv(uploaded_file, sep='\t')
+    #if text just has one column (or is not csv) return nothing
+    if(len(papers.columns)==1):
+        return
+    if("htid" in papers.columns):
+        papers = sf.htrc(papers)
+    papers.rename(columns=col_dict, inplace=True)
+    print(papers)
+    return papers
+#===csv reading===
+@st.cache_data(ttl=3600)
+def upload(file):
+    papers = pd.read_csv(uploaded_file)
+    if "About the data" in papers.columns[0]:
+        papers = sf.dim(papers)
+        col_dict = {'MeSH terms': 'Keywords',
+        'PubYear': 'Year',
+        'Times cited': 'Cited by',
+        'Publication Type': 'Document Type'
+        }
+        papers.rename(columns=col_dict, inplace=True)
+    return papers
+#===Read data===
+uploaded_file = st.file_uploader('', type=['txt','csv'], on_change=reset_all)
 if uploaded_file is not None:
     tab1, tab2, tab3 = st.tabs(["📈 Generate visualization", "📃 Reference", "⬇️ Download Help"])
     with tab1:
         c1, c2 = st.columns(2)
         with c1:
+            max_font = st.number_input("Max Font Size", min_value = 1, value = 100)
+            max_words = st.number_input("Max Word Count", min_value = 1, value = 250)
+            background = st.selectbox("Background color", ["white","black"])
         with c2:
             words_to_remove = st.text_input("Remove specific words. Separate words by semicolons (;)")
             stopwords = words_to_remove.split(';')
+            image_width = st.number_input("Image width", value = 400)
+            image_height = st.number_input("Image height", value = 200)
+            scale = st.number_input("Scale", value = 1)
         try:
             extype = get_ext(uploaded_file)
             if extype.endswith(".txt"):
+                try:
+                    texts = conv_txt(uploaded_file)
+                    colcho = c1.selectbox("Choose Column", list(texts))
+                    fulltext = " ".join(list(texts[colcho]))
+                except:
+                    fulltext = read_txt(uploaded_file)
+                if st.button("Submit"):
                     wordcloud = WordCloud(max_font_size = max_font,
                     max_words = max_words,
                     background_color=background,
                         st.image(img, use_container_width=True)
             elif extype.endswith(".csv"):
+                texts = upload(uploaded_file)
+                colcho = c1.selectbox("Choose Column", list(texts))
+                fullcolumn = " ".join(list(texts[colcho]))
+                if st.button("Submit"):
+                    wordcloud = WordCloud(max_font_size = max_font,
+                    max_words = max_words,
+                    background_color=background,
+                    stopwords = stopwords,
+                    height = image_height,
+                    width = image_width,
+                    scale = scale).generate(fullcolumn)
+                    img = wordcloud.to_image()
+                    with st.container(border=True):
+                        st.image(img, use_container_width=True)
         except Exception as e:
             st.write(e)

pages/9 Summarization.py CHANGED Viewed

@@ -96,9 +96,9 @@ if uploaded_file is not None:
             max_length = st.number_input("Maximum length", min_value = 1)
         if method == "Extractive":
-            ex_method = st.selectbox("Extractive method", ("t5","Spacy PyTextRank"))
-            if ex_method == "Spacy PyTextRank":
-                phrase_limit = st.number_input("Phrase length limit", min_value = 0)
                 sentence_limit = st.number_input("Sentence limit", min_value = 0)
             elif ex_method == "t5" or ex_method == "FalconsAI t5":
                 min_length = st.number_input("Minimum length", min_value = 0)
@@ -154,6 +154,8 @@ if uploaded_file is not None:
                     summary = summed[0]["summary_text"]
                     return summary
                 def transformersum(text,model):
                     summarizer = pipeline("summarization", model = model)
                     summed = summarizer(text, max_length = max_length, min_length = min_length, do_sample = False)
@@ -196,7 +198,7 @@ if uploaded_file is not None:
                                     st.write(fulltext)
                             if method == "Extractive":
-                                if(ex_method == "Spacy PyTextRank"):
                                     summary = SpacyRank(fulltext)
                                 elif(ex_method == "t5"):
                                     summary = t5summ(fulltext)
@@ -226,7 +228,7 @@ if uploaded_file is not None:
                                 scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
                                 rougescores = scorer.score(reference, candidate)
-                                st.write(f"BLEU Score (NLTK): {BLEuscore:.2f}")
                                 st.write(f"ROUGE-1 F1 Score: {rougescores['rouge1'].fmeasure:.2f}")
                                 text_file = summary
@@ -239,7 +241,7 @@ if uploaded_file is not None:
                     elif(extype.endswith(".csv")):
                         if method == "Extractive":
-                            if(ex_method == "Spacy PyTextRank"):
                                 summaries = texts['texts'].apply(SpacyRank)
                                 fullnsums = summaries.to_frame()
                                 fullnsums['full'] = texts['texts']
@@ -293,25 +295,20 @@ if uploaded_file is not None:
                                 label = "Download scores and results",
                                 data = result2,
                                 file_name = "ScoredSummaries.csv",
-                                mime = "test\csv",
                                 on_click = "ignore"
                             )
             #do this
             with tab2:
                 st.write("")
             with tab3:
-                st.header("Summarization result (.txt)")
                 st.write("Click the download button (example) to get the text file result")
                 st.button(label = "Download Results")
-    except Exception as e:
-        st.write(e)
         st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
         st.stop()

             max_length = st.number_input("Maximum length", min_value = 1)
         if method == "Extractive":
+            ex_method = st.selectbox("Extractive method", ("t5","PyTextRank"))
+            if ex_method == "PyTextRank":
+                phrase_limit = st.number_input("Phrase limit", min_value = 0)
                 sentence_limit = st.number_input("Sentence limit", min_value = 0)
             elif ex_method == "t5" or ex_method == "FalconsAI t5":
                 min_length = st.number_input("Minimum length", min_value = 0)
                     summary = summed[0]["summary_text"]
                     return summary
+                #used for any other huggingface model not used above
                 def transformersum(text,model):
                     summarizer = pipeline("summarization", model = model)
                     summed = summarizer(text, max_length = max_length, min_length = min_length, do_sample = False)
                                     st.write(fulltext)
                             if method == "Extractive":
+                                if(ex_method == "PyTextRank"):
                                     summary = SpacyRank(fulltext)
                                 elif(ex_method == "t5"):
                                     summary = t5summ(fulltext)
                                 scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
                                 rougescores = scorer.score(reference, candidate)
+                                st.write(f"BLEU Score: {BLEuscore:.2f}")
                                 st.write(f"ROUGE-1 F1 Score: {rougescores['rouge1'].fmeasure:.2f}")
                                 text_file = summary
                     elif(extype.endswith(".csv")):
                         if method == "Extractive":
+                            if(ex_method == "PyTextRank"):
                                 summaries = texts['texts'].apply(SpacyRank)
                                 fullnsums = summaries.to_frame()
                                 fullnsums['full'] = texts['texts']
                                 label = "Download scores and results",
                                 data = result2,
                                 file_name = "ScoredSummaries.csv",
+                                mime = "text\csv",
                                 on_click = "ignore"
                             )
             #do this
             with tab2:
                 st.write("")
             with tab3:
+                st.header("Summarization result")
                 st.write("Click the download button (example) to get the text file result")
                 st.button(label = "Download Results")
+    except Exception:
         st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
         st.stop()

pages/Rtest.py CHANGED Viewed

@@ -53,8 +53,8 @@ if st.button("Test"):
     process2 = subprocess.Popen(["Rscript", "pages/testr.R"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
     result2 = process2.communicate()
     st.write(result2)
-    #with st.container(border=True):
-        #st.image('plot.png')

     process2 = subprocess.Popen(["Rscript", "pages/testr.R"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
     result2 = process2.communicate()
     st.write(result2)
+    with st.container(border=True):
+        st.image('plot.png')

pages/testr.R CHANGED Viewed

	@@ -1 +1,7 @@
1	- ~~print~~(~~'loaded script'~~)

+library(ggplot2)
+ggplot(mtcars,aes(mpg,wt))+
+    geom_point()
+ggsave('plot.png')