Files
docker-configs/backtest/create_fear_greed_index.py
2025-07-18 00:00:01 -05:00

213 lines
6.9 KiB
Python
Executable File

import yfinance as yf
import pandas as pd
import numpy as np
import sqlite3
from datetime import datetime, timedelta
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
def download_fear_greed_data():
"""Download required data for Fear & Greed index synthesis"""
print("Downloading Fear & Greed index components...")
# Download required ETFs and indices
tickers = {
'SPY': 'S&P 500 ETF',
'^GSPC': 'S&P 500 Index',
'HYG': 'High Yield Bond ETF',
'TLT': '20+ Year Treasury ETF',
'GLD': 'Gold ETF'
}
start_date = "2000-01-01"
end_date = datetime.now().strftime("%Y-%m-%d")
data = {}
for ticker, name in tickers.items():
try:
print(f"Downloading {ticker} ({name})...")
stock = yf.Ticker(ticker)
hist = stock.history(start=start_date, end=end_date)
if not hist.empty:
# Store close prices and calculate returns
hist['returns'] = hist['Close'].pct_change()
hist['20d_return'] = hist['Close'].pct_change(20)
data[ticker] = hist[['Close', 'returns', '20d_return']].copy()
print(f" {ticker}: {len(hist)} records")
else:
print(f" {ticker}: No data")
except Exception as e:
print(f" Error downloading {ticker}: {e}")
return data
def calculate_fear_greed_components(data):
"""Calculate the 5 components of Fear & Greed index"""
print("Calculating Fear & Greed components...")
# Align all data to common dates
spy_data = data['SPY']
gspc_data = data['^GSPC']
hyg_data = data['HYG']
tlt_data = data['TLT']
gld_data = data['GLD']
# Find common date range
start_date = max([d.index.min() for d in data.values() if len(d) > 0])
end_date = min([d.index.max() for d in data.values() if len(d) > 0])
print(f"Common date range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
# Create date range and align data
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
# Align and interpolate data
spy_aligned = spy_data.reindex(date_range).fillna(method='ffill')
gspc_aligned = gspc_data.reindex(date_range).fillna(method='ffill')
hyg_aligned = hyg_data.reindex(date_range).fillna(method='ffill')
tlt_aligned = tlt_data.reindex(date_range).fillna(method='ffill')
gld_aligned = gld_data.reindex(date_range).fillna(method='ffill')
# Calculate components
fg_data = pd.DataFrame(index=date_range)
# 1. Stock Price Momentum (S&P 500 vs 125-day MA)
gspc_125ma = gspc_aligned['Close'].rolling(125).mean()
momentum = ((gspc_aligned['Close'] - gspc_125ma) / gspc_125ma * 100)
fg_data['momentum'] = np.clip((momentum + 10) * 5, 0, 100) # Normalize to 0-100
# 2. Stock Price Strength (RSI of S&P 500)
def calculate_rsi(prices, window=14):
delta = prices.diff()
gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
rs = gain / loss
rsi = 100 - (100 / (1 + rs))
return rsi
fg_data['strength'] = calculate_rsi(gspc_aligned['Close'])
# 3. Market Breadth (simplified using SPY performance vs volatility)
spy_20d_vol = spy_aligned['returns'].rolling(20).std() * np.sqrt(252)
spy_20d_ret = spy_aligned['20d_return']
breadth = (spy_20d_ret / spy_20d_vol) * 100 + 50
fg_data['breadth'] = np.clip(breadth, 0, 100)
# 4. Junk Bond Demand (HYG vs TLT)
bond_spread = hyg_aligned['20d_return'] - tlt_aligned['20d_return']
fg_data['junk_demand'] = np.clip((bond_spread * 100 + 50), 0, 100)
# 5. Safe Haven Demand (SPY vs GLD)
safe_haven = spy_aligned['20d_return'] - gld_aligned['20d_return']
fg_data['safe_haven'] = np.clip((safe_haven * 100 + 50), 0, 100)
# Calculate final Fear & Greed Index (equal weighted average)
fg_data['fear_greed_index'] = (
fg_data['momentum'] +
fg_data['strength'] +
fg_data['breadth'] +
fg_data['junk_demand'] +
fg_data['safe_haven']
) / 5
# Add SPY close price for benchmark
fg_data['spy_close'] = spy_aligned['Close']
# Remove rows with NaN values
fg_data = fg_data.dropna()
print(f"Generated Fear & Greed index for {len(fg_data)} days")
print(f"Fear & Greed range: {fg_data['fear_greed_index'].min():.1f} - {fg_data['fear_greed_index'].max():.1f}")
return fg_data
def save_to_database(fg_data):
"""Save Fear & Greed data to database"""
print("Saving Fear & Greed data to database...")
conn = sqlite3.connect('data/stock_data.db')
cursor = conn.cursor()
# Create fear_greed_index table
cursor.execute('DROP TABLE IF EXISTS fear_greed_index')
cursor.execute('''
CREATE TABLE fear_greed_index (
date DATE PRIMARY KEY,
fear_greed_index REAL,
momentum REAL,
strength REAL,
breadth REAL,
junk_demand REAL,
safe_haven REAL
)
''')
# Create fear_greed_data table for benchmark data
cursor.execute('DROP TABLE IF EXISTS fear_greed_data')
cursor.execute('''
CREATE TABLE fear_greed_data (
date DATE PRIMARY KEY,
spy_close REAL
)
''')
# Insert data
for date, row in tqdm(fg_data.iterrows(), desc="Inserting Fear & Greed data", total=len(fg_data)):
cursor.execute('''
INSERT INTO fear_greed_index
(date, fear_greed_index, momentum, strength, breadth, junk_demand, safe_haven)
VALUES (?, ?, ?, ?, ?, ?, ?)
''', (
date.strftime('%Y-%m-%d'),
row['fear_greed_index'],
row['momentum'],
row['strength'],
row['breadth'],
row['junk_demand'],
row['safe_haven']
))
cursor.execute('''
INSERT INTO fear_greed_data (date, spy_close)
VALUES (?, ?)
''', (
date.strftime('%Y-%m-%d'),
row['spy_close']
))
conn.commit()
conn.close()
print(f"Saved {len(fg_data)} records to database")
def main():
print("Creating Fear & Greed Index...")
# Download required data
data = download_fear_greed_data()
if len(data) < 5:
print("Error: Not enough data downloaded. Need SPY, ^GSPC, HYG, TLT, GLD")
return
# Calculate Fear & Greed components
fg_data = calculate_fear_greed_components(data)
# Save to database
save_to_database(fg_data)
# Show sample data
print("\\nSample Fear & Greed data (last 5 days):")
print(fg_data[['fear_greed_index', 'spy_close']].tail().round(2))
print("\\nFear & Greed Index creation completed!")
if __name__ == "__main__":
main()