Spaces:

conversantech
/

humanizer-ai

Running

File size: 5,363 Bytes

9d7abc9
f974a84
 
 
 
 
 
 
5a4c20a
9d7abc9
 
5a4c20a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f974a84
 
 
 
9d7abc9
f974a84
5a4c20a
fbecdef
9d7abc9
5a4c20a
f974a84
5a4c20a
f974a84
 
 
9d7abc9
 
5a4c20a
 
 
 
9d7abc9
fbecdef
 
 
 
 
 
5a4c20a
fbecdef
 
5a4c20a
 
 
 
 
 
 
 
 
 
 
fbecdef
 
5a4c20a
fbecdef
 
 
 
5a4c20a
fbecdef
 
5a4c20a
9d7abc9
5a4c20a
fbecdef
 
f974a84
5a4c20a
 
f974a84
 
5a4c20a
f974a84
 
9d7abc9
fbecdef
9d7abc9
 
 
fbecdef
9d7abc9
 
 
 
 
5a4c20a
9d7abc9
 
 
 
5a4c20a
9d7abc9
 
 
5a4c20a
f974a84
 
 
fbecdef
9d7abc9

import os
import gradio as gr
import random
import re
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from textstat import flesch_reading_ease, flesch_kincaid_grade

# Setup NLTK download path for Hugging Face Spaces
os.environ['NLTK_DATA'] = '/tmp/nltk_data'

def download_nltk_data():
    """Download required NLTK data with proper error handling"""
    try:
        # Create the directory if it doesn't exist
        os.makedirs('/tmp/nltk_data', exist_ok=True)
        
        # Add the path to NLTK's data path
        nltk.data.path.append('/tmp/nltk_data')
        
        # Download required NLTK data - use punkt_tab for newer NLTK versions
        required_data = [
            'punkt_tab',  # For newer NLTK versions (3.9+)
            'punkt',      # Fallback for older versions
            'averaged_perceptron_tagger',
            'stopwords'
        ]
        
        for data in required_data:
            try:
                nltk.download(data, download_dir='/tmp/nltk_data', quiet=True)
                print(f"Successfully downloaded {data}")
            except Exception as e:
                print(f"Failed to download {data}: {e}")
                
        print("NLTK data download completed")
        print(f"NLTK data paths: {nltk.data.path}")
        
    except Exception as e:
        print(f"NLTK setup error: {e}")

# Download NLTK data at startup
download_nltk_data()

class AIContentHumanizer:
    def __init__(self):
        self.setup_humanization_patterns()

    def setup_humanization_patterns(self):
        # Your existing patterns code here...
        self.ai_replacements = {
            r'\bit is important to note that\b': ["worth mentioning that", "keep in mind that", "note that"],
            # ... rest of your patterns
        }
        # ... rest of your existing code

    def get_readability_score(self, text):
        try:
            score = flesch_reading_ease(text)
            grade = flesch_kincaid_grade(text)
            level = ("Very Easy" if score >= 90 else "Easy" if score >= 80 else 
                    "Fairly Easy" if score >= 70 else "Standard" if score >= 60 else 
                    "Fairly Difficult" if score >= 50 else "Difficult" if score >= 30 else 
                    "Very Difficult")
            return f"Flesch Score: {score:.1f} ({level})\nGrade Level: {grade:.1f}"
        except Exception as e:
            return f"Could not calculate readability: {str(e)}"

    def humanize_text(self, text, intensity="medium"):
        if not text or not text.strip():
            return "Please provide text to humanize."
        
        try:
            text = text.strip()
            
            # Test NLTK functionality before proceeding
            try:
                # Try to tokenize a simple sentence to verify NLTK is working
                test_tokens = sent_tokenize("This is a test sentence.")
                if not test_tokens:
                    raise Exception("NLTK tokenization failed")
            except Exception as nltk_error:
                return f"NLTK Error: {str(nltk_error)}. Please try again or contact support."
            
            # Your existing humanization logic here...
            text = self.replace_ai_phrases(text)
            text = self.add_contractions(text)
            
            if intensity in ["medium", "heavy"]:
                text = self.vary_sentence_structure(text)
                text = self.add_personal_touches(text)
                text = self.add_casual_punctuation(text)
                
            if intensity == "heavy":
                text = self.add_natural_fillers(text)
                
            return self.clean_text(text)
            
        except Exception as e:
            return f"Error processing text: {str(e)}\n\nOriginal text: {text}"

    # ... rest of your existing methods

def create_interface():
    humanizer = AIContentHumanizer()
    
    def process_text(input_text, intensity):
        if not input_text:
            return "Please enter some text to humanize.", "No text provided."
        try:
            result = humanizer.humanize_text(input_text, intensity)
            score = humanizer.get_readability_score(result)
            return result, score
        except Exception as e:
            return f"Error: {str(e)}", "Processing error"

    with gr.Blocks(title="AI Content Humanizer") as interface:
        gr.Markdown("""# 🤖➡️👤 AI Content Humanizer
Transform AI-generated content into human-sounding, casual, and readable text!""")
        
        input_text = gr.Textbox(label="AI-generated Text", lines=8)
        intensity = gr.Radio(["light", "medium", "heavy"], value="medium", label="Humanization Level")
        output_text = gr.Textbox(label="Humanized Text", lines=8, show_copy_button=True)
        readability = gr.Textbox(label="Readability Score", lines=2)
        
        btn = gr.Button("Humanize Text")
        btn.click(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
        input_text.submit(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
    
    return interface

if __name__ == "__main__":
    print("Starting AI Content Humanizer...")
    app = create_interface()
    app.launch(server_name="0.0.0.0", server_port=7860, show_error=True)