Files
docker-configs/backtest/download_data.py
2025-07-18 00:00:01 -05:00

145 lines
4.4 KiB
Python
Executable File

import yfinance as yf
import pandas as pd
import sqlite3
from datetime import datetime
import time
from tqdm import tqdm
import os
def create_database():
db_path = "data/stock_data.db"
os.makedirs("data", exist_ok=True)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS qqq_constituents (
ticker TEXT,
start_date DATE,
end_date DATE,
PRIMARY KEY (ticker, start_date)
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS stock_prices (
ticker TEXT,
date DATE,
open REAL,
high REAL,
low REAL,
close REAL,
adj_close REAL,
volume INTEGER,
PRIMARY KEY (ticker, date)
)
''')
conn.commit()
conn.close()
print("Database created successfully")
def get_qqq_tickers():
return [
'AAPL', 'ADBE', 'ADP', 'ALGN', 'AMAT', 'AMD', 'AMGN', 'AMZN',
'ANSS', 'ASML', 'AVGO', 'BIDU', 'BIIB', 'BKNG', 'CDNS', 'CEG',
'CHTR', 'CMCSA', 'COST', 'CPRT', 'CRWD', 'CSCO', 'CSGP', 'CSX',
'CTAS', 'CTSH', 'DDOG', 'DLTR', 'DXCM', 'EA', 'EBAY', 'EXC',
'FANG', 'FAST', 'FTNT', 'GILD', 'GOOG', 'GOOGL', 'HON', 'IDXX',
'ILMN', 'INTC', 'INTU', 'ISRG', 'JD', 'KDP', 'KHC', 'KLAC',
'LCID', 'LRCX', 'LULU', 'MAR', 'MCHP', 'MDB', 'MDLZ', 'MELI',
'META', 'MNST', 'MRNA', 'MRVL', 'MSFT', 'MU', 'NFLX', 'NTES',
'NVDA', 'NXPI', 'ODFL', 'ON', 'ORLY', 'PANW', 'PAYX', 'PCAR',
'PDD', 'PEP', 'PYPL', 'QCOM', 'REGN', 'ROST', 'SBUX', 'SIRI',
'SNPS', 'TEAM', 'TMUS', 'TSLA', 'TTD', 'TTWO', 'TXN', 'VRSK',
'VRTX', 'WBD', 'WDAY', 'XEL', 'ZM', 'ZS'
]
def download_ticker_data(ticker, start_date, end_date):
try:
print(f"Downloading {ticker}...")
stock = yf.Ticker(ticker)
data = stock.history(start=start_date, end=end_date)
if data.empty:
print(f"No data for {ticker}")
return 0
# Insert into database
conn = sqlite3.connect("data/stock_data.db")
records_added = 0
for date, row in data.iterrows():
try:
conn.execute('''
INSERT OR REPLACE INTO stock_prices
(ticker, date, open, high, low, close, adj_close, volume)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', (
ticker,
date.strftime('%Y-%m-%d'),
float(row['Open']),
float(row['High']),
float(row['Low']),
float(row['Close']),
float(row['Close']),
int(row['Volume'])
))
records_added += 1
except Exception as e:
print(f"Error inserting row for {ticker} on {date}: {e}")
continue
conn.commit()
conn.close()
print(f"Success {ticker}: {records_added} records")
return records_added
except Exception as e:
print(f"Error downloading {ticker}: {e}")
return 0
def main():
print("Starting QQQ stock data download...")
# Create database
create_database()
# Get tickers
tickers = get_qqq_tickers()
# Add constituents to database
conn = sqlite3.connect("data/stock_data.db")
cursor = conn.cursor()
start_date = "2000-01-01"
end_date = datetime.now().strftime("%Y-%m-%d")
for ticker in tickers:
cursor.execute('''
INSERT OR REPLACE INTO qqq_constituents (ticker, start_date, end_date)
VALUES (?, ?, ?)
''', (ticker, start_date, end_date))
conn.commit()
conn.close()
print(f"Added {len(tickers)} tickers to constituents table")
# Download data
total_records = 0
successful_downloads = 0
for ticker in tqdm(tickers, desc="Downloading data"):
records = download_ticker_data(ticker, start_date, end_date)
if records > 0:
successful_downloads += 1
total_records += records
time.sleep(0.1) # Rate limiting
print(f"\nDownload completed!")
print(f"Successfully downloaded: {successful_downloads}/{len(tickers)} stocks")
print(f"Total records: {total_records}")
if __name__ == "__main__":
main()