import yfinance as yf import pandas as pd import sqlite3 from datetime import datetime import time from tqdm import tqdm import os def create_database(): db_path = "data/stock_data.db" os.makedirs("data", exist_ok=True) conn = sqlite3.connect(db_path) cursor = conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS qqq_constituents ( ticker TEXT, start_date DATE, end_date DATE, PRIMARY KEY (ticker, start_date) ) ''') cursor.execute(''' CREATE TABLE IF NOT EXISTS stock_prices ( ticker TEXT, date DATE, open REAL, high REAL, low REAL, close REAL, adj_close REAL, volume INTEGER, PRIMARY KEY (ticker, date) ) ''') conn.commit() conn.close() print("Database created successfully") def get_qqq_tickers(): return [ 'AAPL', 'ADBE', 'ADP', 'ALGN', 'AMAT', 'AMD', 'AMGN', 'AMZN', 'ANSS', 'ASML', 'AVGO', 'BIDU', 'BIIB', 'BKNG', 'CDNS', 'CEG', 'CHTR', 'CMCSA', 'COST', 'CPRT', 'CRWD', 'CSCO', 'CSGP', 'CSX', 'CTAS', 'CTSH', 'DDOG', 'DLTR', 'DXCM', 'EA', 'EBAY', 'EXC', 'FANG', 'FAST', 'FTNT', 'GILD', 'GOOG', 'GOOGL', 'HON', 'IDXX', 'ILMN', 'INTC', 'INTU', 'ISRG', 'JD', 'KDP', 'KHC', 'KLAC', 'LCID', 'LRCX', 'LULU', 'MAR', 'MCHP', 'MDB', 'MDLZ', 'MELI', 'META', 'MNST', 'MRNA', 'MRVL', 'MSFT', 'MU', 'NFLX', 'NTES', 'NVDA', 'NXPI', 'ODFL', 'ON', 'ORLY', 'PANW', 'PAYX', 'PCAR', 'PDD', 'PEP', 'PYPL', 'QCOM', 'REGN', 'ROST', 'SBUX', 'SIRI', 'SNPS', 'TEAM', 'TMUS', 'TSLA', 'TTD', 'TTWO', 'TXN', 'VRSK', 'VRTX', 'WBD', 'WDAY', 'XEL', 'ZM', 'ZS' ] def download_ticker_data(ticker, start_date, end_date): try: print(f"Downloading {ticker}...") stock = yf.Ticker(ticker) data = stock.history(start=start_date, end=end_date) if data.empty: print(f"No data for {ticker}") return 0 # Insert into database conn = sqlite3.connect("data/stock_data.db") records_added = 0 for date, row in data.iterrows(): try: conn.execute(''' INSERT OR REPLACE INTO stock_prices (ticker, date, open, high, low, close, adj_close, volume) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ''', ( ticker, date.strftime('%Y-%m-%d'), float(row['Open']), float(row['High']), float(row['Low']), float(row['Close']), float(row['Close']), int(row['Volume']) )) records_added += 1 except Exception as e: print(f"Error inserting row for {ticker} on {date}: {e}") continue conn.commit() conn.close() print(f"Success {ticker}: {records_added} records") return records_added except Exception as e: print(f"Error downloading {ticker}: {e}") return 0 def main(): print("Starting QQQ stock data download...") # Create database create_database() # Get tickers tickers = get_qqq_tickers() # Add constituents to database conn = sqlite3.connect("data/stock_data.db") cursor = conn.cursor() start_date = "2000-01-01" end_date = datetime.now().strftime("%Y-%m-%d") for ticker in tickers: cursor.execute(''' INSERT OR REPLACE INTO qqq_constituents (ticker, start_date, end_date) VALUES (?, ?, ?) ''', (ticker, start_date, end_date)) conn.commit() conn.close() print(f"Added {len(tickers)} tickers to constituents table") # Download data total_records = 0 successful_downloads = 0 for ticker in tqdm(tickers, desc="Downloading data"): records = download_ticker_data(ticker, start_date, end_date) if records > 0: successful_downloads += 1 total_records += records time.sleep(0.1) # Rate limiting print(f"\nDownload completed!") print(f"Successfully downloaded: {successful_downloads}/{len(tickers)} stocks") print(f"Total records: {total_records}") if __name__ == "__main__": main()