Spaces:

ritvik77
/

Finance_Stock_Prediction_v1

Running

File size: 7,225 Bytes

import gradio as gr
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import datetime
import time
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob
import ssl
import certifi
import sqlite3

# Configure SSL to use certifi certificates
ssl._create_default_https_context = lambda: ssl.create_default_context(cafile=certifi.where())

# Download VADER lexicon
try:
    nltk.download('vader_lexicon')
    print("✅ VADER lexicon downloaded successfully!")
except Exception as e:
    print(f"❌ Error downloading VADER lexicon: {str(e)}")
    raise

# Initialize VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

FINVIZ_URL = 'https://finviz.com/quote.ashx?t='
session = requests.Session()
session.headers.update({
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
})


def get_news(ticker):
    """ Fetch stock news from FinViz with error handling """
    try:
        url = FINVIZ_URL + ticker
        for attempt in range(3):
            response = session.get(url)
            if response.status_code == 200:
                break
            time.sleep(5)

        if response.status_code != 200:
            return None, f"❌ Error: Received status code {response.status_code}"

        html = BeautifulSoup(response.text, "html.parser")
        news_table = html.find('table', class_='fullview-news-outer')

        if not news_table:
            return None, "❌ News table not found!"

        return news_table, None
    except Exception as e:
        return None, f"❌ Error fetching stock news: {str(e)}"


def parse_news(news_table):
    """ Extracts and parses stock news headlines from FinViz """
    parsed_news = []
    today_string = datetime.datetime.today().strftime('%Y-%m-%d')

    for row in news_table.find_all('tr'):
        try:
            news_container = row.find('div', class_='news-link-container')
            link = news_container.find('a', class_='tab-link-news') if news_container else row.find('a')

            if not link or not link.get_text().strip():
                continue
            text = link.get_text().strip()

            date_td = row.find('td', align='right')
            date_scrape = date_td.text.strip().split() if date_td and date_td.text.strip() else []
            if len(date_scrape) == 1:
                date, time_ = today_string, date_scrape[0]
            else:
                date, time_ = datetime.datetime.strptime(date_scrape[0], '%b-%d-%y').strftime('%Y-%m-%d'), date_scrape[
                    1]

            time_ = datetime.datetime.strptime(time_, '%I:%M%p').strftime('%H:%M')
            parsed_news.append([date, time_, text])
        except Exception:
            continue

    df = pd.DataFrame(parsed_news, columns=['date', 'time', 'headline'])
    df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'], format="%Y-%m-%d %H:%M", errors='coerce')
    return df.dropna(subset=['datetime'])


def analyze_sentiment(text):
    """ Combines VADER and TextBlob for better sentiment accuracy """
    vader_score = sia.polarity_scores(text)['compound']
    textblob_score = TextBlob(text).sentiment.polarity  # -1 to 1 range
    return (vader_score + textblob_score) / 2  # Averaging both


def score_news(df):
    """ Applies sentiment analysis using both VADER and TextBlob """
    df['sentiment_score'] = df['headline'].apply(analyze_sentiment)
    return df[['datetime', 'headline', 'sentiment_score']]


def save_to_db(df, ticker):
    """ Stores sentiment analysis results in SQLite for historical tracking """
    conn = sqlite3.connect("sentiment_data.db")
    df.to_sql(f"{ticker}_news", conn, if_exists="append", index=False)
    conn.close()


def plot_sentiment(df, ticker, interval):
    """ Generates sentiment trend plots with correct filtering """
    if df.empty:
        return None

    df['datetime'] = pd.to_datetime(df['datetime'])
    df = df.set_index('datetime')

    # ✅ Resample only within the available range
    df = df.resample(interval).mean(numeric_only=True).dropna()

    # ✅ Use rolling average to smooth the graph
    df['rolling_avg'] = df['sentiment_score'].rolling(5, min_periods=1).mean()

    fig = px.line(df, x=df.index, y=['sentiment_score', 'rolling_avg'],
                  labels={"value": "Sentiment Score"},
                  title=f"{ticker} {interval.capitalize()} Sentiment Trends")
    return fig


def analyze_stock_sentiment(ticker, days):
    """ Fetches news, analyzes sentiment, and filters by user-selected date range. """
    if not ticker:
        return "❌ Please enter a stock ticker!", None, None, None

    print(f"📅 Selected Days: {days}")  # Debugging

    news_table, error = get_news(ticker.upper())
    if error:
        return error, None, None, None

    df_news = parse_news(news_table)

    # ✅ Convert `days` to an integer and filter news based on the correct time range
    days = int(days)
    today = datetime.datetime.today()
    start_date = today - datetime.timedelta(days=days)

    print(f"🔍 Filtering News from {start_date} to {today}")  # Debugging

    df_news['datetime'] = pd.to_datetime(df_news['datetime'])
    df_news = df_news[df_news['datetime'] >= start_date]

    if df_news.empty:
        return f"⚠️ No news found for {ticker.upper()} in the last {days} days.", None, None, None

    df_scored = score_news(df_news).sort_values(by="datetime", ascending=False)

    print(f"📊 Filtered News Count: {len(df_scored)}")  # Debugging

    save_to_db(df_scored, ticker.upper())

    fig_hourly = plot_sentiment(df_scored, ticker, interval='h')
    fig_daily = plot_sentiment(df_scored, ticker, interval='D')

    return f"✅ Analysis for {ticker.upper()} (Last {days} Days) Complete!", df_scored, fig_hourly, fig_daily


# Gradio Interface
with gr.Blocks(title="📈 Stock News Sentiment Analyzer") as iface:
    with gr.Row():
        gr.Markdown("## 📈Stock News Sentiment Analyzer")
        gr.Markdown("Analyze stock news sentiment using VADER and TextBlob.")

    ticker_dropdown = gr.Dropdown(choices=["AAPL", "TSLA", "AMZN", "MSFT"], label="Stock Ticker")
    days_slider = gr.Slider(minimum=1, maximum=30, step=1, label="Days of News History", value=7, interactive=True)
    status = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        submit_btn = gr.Button("Analyze", variant="primary")
        clear_btn = gr.Button("Clear", variant="secondary")

    table = gr.Dataframe(label="Sentiment Analysis", interactive=False)
    hourly_plot = gr.Plot(label="Hourly Sentiment Scores")
    daily_plot = gr.Plot(label="Daily Sentiment Scores")

    submit_btn.click(fn=analyze_stock_sentiment, inputs=[ticker_dropdown, days_slider],
                     outputs=[status, table, hourly_plot, daily_plot])
    clear_btn.click(fn=lambda: ("", None, None, None), inputs=None, outputs=[status, table, hourly_plot, daily_plot])

# ✅ Keeps the App Running on Hugging Face
if __name__ == "__main__":
    print("🚀 App is running on Hugging Face Spaces...")
    iface.launch(server_name="0.0.0.0", server_port=7860)