File size: 7,225 Bytes
c615548
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65c6d06
c615548
 
 
 
 
 
65c6d06
 
c615548
65c6d06
 
c615548
65c6d06
 
 
c615548
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65c6d06
c615548
 
 
 
 
 
 
 
 
 
 
 
 
65c6d06
 
 
c615548
 
 
 
 
 
 
 
65c6d06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6a85c2
004ad50
 
 
 
65c6d06
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import gradio as gr
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import datetime
import time
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob
import ssl
import certifi
import sqlite3

# Configure SSL to use certifi certificates
ssl._create_default_https_context = lambda: ssl.create_default_context(cafile=certifi.where())

# Download VADER lexicon
try:
    nltk.download('vader_lexicon')
    print("βœ… VADER lexicon downloaded successfully!")
except Exception as e:
    print(f"❌ Error downloading VADER lexicon: {str(e)}")
    raise

# Initialize VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

FINVIZ_URL = 'https://finviz.com/quote.ashx?t='
session = requests.Session()
session.headers.update({
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
})


def get_news(ticker):
    """ Fetch stock news from FinViz with error handling """
    try:
        url = FINVIZ_URL + ticker
        for attempt in range(3):
            response = session.get(url)
            if response.status_code == 200:
                break
            time.sleep(5)

        if response.status_code != 200:
            return None, f"❌ Error: Received status code {response.status_code}"

        html = BeautifulSoup(response.text, "html.parser")
        news_table = html.find('table', class_='fullview-news-outer')

        if not news_table:
            return None, "❌ News table not found!"

        return news_table, None
    except Exception as e:
        return None, f"❌ Error fetching stock news: {str(e)}"


def parse_news(news_table):
    """ Extracts and parses stock news headlines from FinViz """
    parsed_news = []
    today_string = datetime.datetime.today().strftime('%Y-%m-%d')

    for row in news_table.find_all('tr'):
        try:
            news_container = row.find('div', class_='news-link-container')
            link = news_container.find('a', class_='tab-link-news') if news_container else row.find('a')

            if not link or not link.get_text().strip():
                continue
            text = link.get_text().strip()

            date_td = row.find('td', align='right')
            date_scrape = date_td.text.strip().split() if date_td and date_td.text.strip() else []
            if len(date_scrape) == 1:
                date, time_ = today_string, date_scrape[0]
            else:
                date, time_ = datetime.datetime.strptime(date_scrape[0], '%b-%d-%y').strftime('%Y-%m-%d'), date_scrape[
                    1]

            time_ = datetime.datetime.strptime(time_, '%I:%M%p').strftime('%H:%M')
            parsed_news.append([date, time_, text])
        except Exception:
            continue

    df = pd.DataFrame(parsed_news, columns=['date', 'time', 'headline'])
    df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'], format="%Y-%m-%d %H:%M", errors='coerce')
    return df.dropna(subset=['datetime'])


def analyze_sentiment(text):
    """ Combines VADER and TextBlob for better sentiment accuracy """
    vader_score = sia.polarity_scores(text)['compound']
    textblob_score = TextBlob(text).sentiment.polarity  # -1 to 1 range
    return (vader_score + textblob_score) / 2  # Averaging both


def score_news(df):
    """ Applies sentiment analysis using both VADER and TextBlob """
    df['sentiment_score'] = df['headline'].apply(analyze_sentiment)
    return df[['datetime', 'headline', 'sentiment_score']]


def save_to_db(df, ticker):
    """ Stores sentiment analysis results in SQLite for historical tracking """
    conn = sqlite3.connect("sentiment_data.db")
    df.to_sql(f"{ticker}_news", conn, if_exists="append", index=False)
    conn.close()


def plot_sentiment(df, ticker, interval):
    """ Generates sentiment trend plots with correct filtering """
    if df.empty:
        return None

    df['datetime'] = pd.to_datetime(df['datetime'])
    df = df.set_index('datetime')

    # βœ… Resample only within the available range
    df = df.resample(interval).mean(numeric_only=True).dropna()

    # βœ… Use rolling average to smooth the graph
    df['rolling_avg'] = df['sentiment_score'].rolling(5, min_periods=1).mean()

    fig = px.line(df, x=df.index, y=['sentiment_score', 'rolling_avg'],
                  labels={"value": "Sentiment Score"},
                  title=f"{ticker} {interval.capitalize()} Sentiment Trends")
    return fig


def analyze_stock_sentiment(ticker, days):
    """ Fetches news, analyzes sentiment, and filters by user-selected date range. """
    if not ticker:
        return "❌ Please enter a stock ticker!", None, None, None

    print(f"πŸ“… Selected Days: {days}")  # Debugging

    news_table, error = get_news(ticker.upper())
    if error:
        return error, None, None, None

    df_news = parse_news(news_table)

    # βœ… Convert `days` to an integer and filter news based on the correct time range
    days = int(days)
    today = datetime.datetime.today()
    start_date = today - datetime.timedelta(days=days)

    print(f"πŸ” Filtering News from {start_date} to {today}")  # Debugging

    df_news['datetime'] = pd.to_datetime(df_news['datetime'])
    df_news = df_news[df_news['datetime'] >= start_date]

    if df_news.empty:
        return f"⚠️ No news found for {ticker.upper()} in the last {days} days.", None, None, None

    df_scored = score_news(df_news).sort_values(by="datetime", ascending=False)

    print(f"πŸ“Š Filtered News Count: {len(df_scored)}")  # Debugging

    save_to_db(df_scored, ticker.upper())

    fig_hourly = plot_sentiment(df_scored, ticker, interval='h')
    fig_daily = plot_sentiment(df_scored, ticker, interval='D')

    return f"βœ… Analysis for {ticker.upper()} (Last {days} Days) Complete!", df_scored, fig_hourly, fig_daily


# Gradio Interface
with gr.Blocks(title="πŸ“ˆ Stock News Sentiment Analyzer") as iface:
    with gr.Row():
        gr.Markdown("## πŸ“ˆStock News Sentiment Analyzer")
        gr.Markdown("Analyze stock news sentiment using VADER and TextBlob.")

    ticker_dropdown = gr.Dropdown(choices=["AAPL", "TSLA", "AMZN", "MSFT"], label="Stock Ticker")
    days_slider = gr.Slider(minimum=1, maximum=30, step=1, label="Days of News History", value=7, interactive=True)
    status = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        submit_btn = gr.Button("Analyze", variant="primary")
        clear_btn = gr.Button("Clear", variant="secondary")

    table = gr.Dataframe(label="Sentiment Analysis", interactive=False)
    hourly_plot = gr.Plot(label="Hourly Sentiment Scores")
    daily_plot = gr.Plot(label="Daily Sentiment Scores")

    submit_btn.click(fn=analyze_stock_sentiment, inputs=[ticker_dropdown, days_slider],
                     outputs=[status, table, hourly_plot, daily_plot])
    clear_btn.click(fn=lambda: ("", None, None, None), inputs=None, outputs=[status, table, hourly_plot, daily_plot])

# βœ… Keeps the App Running on Hugging Face
if __name__ == "__main__":
    print("πŸš€ App is running on Hugging Face Spaces...")
    iface.launch(server_name="0.0.0.0", server_port=7860)