Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

with fast_yahoo.py #49

Open
tankisstank opened this issue Nov 9, 2023 · 0 comments
Open

with fast_yahoo.py #49

tankisstank opened this issue Nov 9, 2023 · 0 comments

Comments

@tankisstank
Copy link

I face an error while build and running this project with my old computer, luckily it still handle it well.
Here is my problem.

with latest fast_yahoo.py, you use v10 to get data, but we need to send crumb and cookie to make it work

here is my fix


import scheduled_tasks.reddit.stocks.shared as shared
import requests
import numbers
import multitasking as multitasking
import time
import json
import pandas as pd
from datetime import datetime, timedelta

apiBase = 'https://query2.finance.yahoo.com'
headers = { 
  "User-Agent": 
  "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"
}

def getCredentials(cookieUrl='https://fc.yahoo.com', crumbUrl=apiBase+'/v1/test/getcrumb'):
  cookie = requests.get(cookieUrl).cookies
  crumb = requests.get(url=crumbUrl, cookies=cookie, headers=headers).text
  return {'cookie': cookie, 'crumb': crumb}
  
# Refer to https://stackoverflow.com/questions/44030983/yahoo-finance-url-not-working for more configs
# key is the website link, value is the part to extract
config = {'summaryDetail': ['regularMarketOpen', 'previousClose', 'dayHigh', 'fiftyTwoWeekHigh', 'regularMarketDayLow',
                            'fiftyTwoWeekLow', 'regularMarketVolume', 'averageDailyVolume10Day', 'fiftyDayAverage',
                            'twoHundredDayAverage', 'trailingPE', 'forwardPE', 'marketCap', 'beta',
                            'trailingAnnualDividendYield', 'trailingAnnualDividendRate', 'totalAssets', 'navPrice'],
          'defaultKeyStatistics': ['sharesOutstanding', 'floatShares', 'shortRatio', 'shortPercentOfFloat',
                                   'trailingEps', 'pegRatio', 'enterpriseToRevenue', 'netIncomeToCommon',
                                   'threeYearAverageReturn', 'fiveYearAverageReturn'],
          'summaryProfile': ['industry', 'sector', 'website', 'longBusinessSummary', 'fullTimeEmployees', 'country'],
          'price': ['longName', 'symbol', 'regularMarketPrice', 'quoteType', 'marketState',
                    'regularMarketChangePercent', 'regularMarketChange',
                    'postMarketChangePercent', 'postMarketChange', 'preMarketChangePercent', 'preMarketChange'],
          'topHoldings': ['holdings', 'sectorWeightings']}


def download_advanced_stats(symbol_list,  threads=True):
    """
    Downloads advanced yahoo stats for many tickers by doing one request per ticker.
    """
    num_requests = len(symbol_list)
    if threads:
        num_threads = min([num_requests, multitasking.cpu_count() * 2])
        multitasking.set_max_threads(num_threads)

    # get raw responses
    credentials = getCredentials()
    for request_idx, symbol in enumerate(symbol_list):
        if threads:
            get_ticker_stats_threaded(symbol, symbol, credentials)
        else:
            shared.response_dict[symbol] = get_ticker_stats(symbol, credentials)

    if threads:
        while len(shared.response_dict) < num_requests:
            time.sleep(0.01)

    # construct stats table from responses
    stats_table = []
    for symbol, retrieved_modules_dict in shared.response_dict.items():

        stats_list = [symbol]

        for module_name, stat_name_dict in config.items():
            retrieved_module_dict = None
            if retrieved_modules_dict is not None and module_name in retrieved_modules_dict:
                retrieved_module_dict = retrieved_modules_dict[module_name]

            if retrieved_module_dict is not None:
                for stat_name in stat_name_dict:
                    stat_val = 'N/A'
                    if stat_name in retrieved_module_dict:
                        stat = retrieved_module_dict[stat_name]
                        if isinstance(stat, dict):
                            if stat:  # only if non-empty otherwise N/A
                                stat_val = stat['fmt']
                        elif isinstance(stat, str) or isinstance(stat, numbers.Number) or isinstance(stat, list):
                            stat_val = stat
                    stats_list.append(stat_val)
            else:
                stats_list.extend(['N/A'] * len(stat_name_dict))

        stats_table.append(stats_list)

    # reset for future reuse
    shared.response_dict = {}

    columns = ['Symbol']
    for stat_name_dict in config.values():
        columns.extend(stat_name_dict)

    financial_data_df = pd.DataFrame(stats_table, columns=columns)
    financial_data_df["next_update"] = str(datetime.utcnow() + timedelta(seconds=600))
    financial_data_df.set_index('Symbol', inplace=True)
    financial_data_df = financial_data_df.to_json(orient="index").replace("\/", "/")
    financial_data_df = json.loads(financial_data_df)
    return financial_data_df


@multitasking.task
def get_ticker_stats_threaded(request_idx, symbol, credentials = getCredentials()):
    shared.response_dict[request_idx] = get_ticker_stats(symbol, credentials)

    

  
def get_ticker_stats(symbol, credentials=getCredentials()):
    """
    Returns advanced stats for one ticker
    """    
    url = f'https://query2.finance.yahoo.com/v10/finance/quoteSummary/{symbol}?' \
          f'modules=summaryDetail%2CdefaultKeyStatistics%2CsummaryProfile%2Cprice%2CtopHoldings&' \
          f'crumb={credentials["crumb"]}' \
          
    result = requests.get(url, cookies=credentials['cookie'], headers=headers)
    json_dict = result.json()
    if "quoteSummary" not in json_dict:
        return None
    if json_dict['quoteSummary']['result'] is None:
        return None
    module_dict = json_dict['quoteSummary']['result'][0]

    return module_dict

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant