bugfix: institute db file did not collect all trades in holding list

This commit is contained in:
MuslemRahimi 2024-05-30 15:32:55 +02:00
parent ddce30ca71
commit 6e993c2504

View File

@ -131,32 +131,26 @@ class InstituteDatabase:
async def save_portfolio_data(self, session, cik): async def save_portfolio_data(self, session, cik):
try: try:
urls = [ # Fetch summary data
#f"https://financialmodelingprep.com/api/v4/institutional-ownership/industry/portfolio-holdings-summary?cik={cik}&date={quarter_date}&page=0&apikey={api_key}", summary_url = f"https://financialmodelingprep.com/api/v4/institutional-ownership/portfolio-holdings-summary?cik={cik}&page=0&apikey={api_key}"
f"https://financialmodelingprep.com/api/v4/institutional-ownership/portfolio-holdings?cik={cik}&date={quarter_date}&page=0&apikey={api_key}", async with session.get(summary_url) as response:
f"https://financialmodelingprep.com/api/v4/institutional-ownership/portfolio-holdings-summary?cik={cik}&page=0&apikey={api_key}" summary_data = await response.text()
] summary_parsed_data = get_jsonparsed_data(summary_data)
portfolio_data = {} portfolio_data = {}
holdings_data = []
for url in urls: # Fetch paginated holdings data
async with session.get(url) as response: for page in range(0,100):
holdings_url = f"https://financialmodelingprep.com/api/v4/institutional-ownership/portfolio-holdings?cik={cik}&date={quarter_date}&page={page}&apikey={api_key}"
async with session.get(holdings_url) as response:
data = await response.text() data = await response.text()
parsed_data = get_jsonparsed_data(data) parsed_data = get_jsonparsed_data(data)
try: if not parsed_data: # Stop if no more data
''' break
if isinstance(parsed_data, list) and "industry/portfolio-holdings-summary" in url:
# Handle list response, save as JSON object
portfolio_data['industry'] = json.dumps(parsed_data)
'''
if isinstance(parsed_data, list) and "https://financialmodelingprep.com/api/v4/institutional-ownership/portfolio-holdings?cik=" in url:
# Handle list response, save as JSON object
#Bug: My provider does include the symbol with None even though the stock exist with the correct name. if isinstance(parsed_data, list):
#Solution: Find the name in the database and replace the symbol.
# Replace the symbol in the securities list if it is None
for item in parsed_data: for item in parsed_data:
if item['symbol'] is None: if item['symbol'] is None:
normalized_security_name = normalize_name(item['securityName']) normalized_security_name = normalize_name(item['securityName'])
@ -165,7 +159,6 @@ class InstituteDatabase:
elif normalized_security_name in etf_dict: elif normalized_security_name in etf_dict:
item['symbol'] = etf_dict[normalized_security_name] item['symbol'] = etf_dict[normalized_security_name]
parsed_data = [ parsed_data = [
{**item, 'type': ('stocks' if item['symbol'] in stock_symbols else {**item, 'type': ('stocks' if item['symbol'] in stock_symbols else
'crypto' if item['symbol'] in crypto_symbols else 'crypto' if item['symbol'] in crypto_symbols else
@ -174,68 +167,44 @@ class InstituteDatabase:
if 'symbol' in item and item['symbol'] is not None and item['symbol'] in total_symbols if 'symbol' in item and item['symbol'] is not None and item['symbol'] in total_symbols
] ]
portfolio_data['holdings'] = json.dumps(parsed_data) holdings_data.extend(parsed_data)
portfolio_data['holdings'] = json.dumps(holdings_data)
if not holdings_data:
number_of_stocks = len(parsed_data)
#total_market_value = sum(item['marketValue'] for item in parsed_data)
#avg_performance_percentage = sum(item['performancePercentage'] for item in parsed_data) / len(parsed_data)
performance_percentages = [item.get("performancePercentage", 0) for item in parsed_data]
positive_performance_count = sum(1 for percentage in performance_percentages if percentage > 0)
try:
win_rate = round(positive_performance_count / len(performance_percentages) * 100,2)
except:
win_rate = 0
data_dict = {
'winRate': win_rate,
'numberOfStocks': number_of_stocks,
#'marketValue': total_market_value,
}
portfolio_data.update(data_dict)
if isinstance(parsed_data, list) and "https://financialmodelingprep.com/api/v4/institutional-ownership/portfolio-holdings-summary" in url:
# Handle list response, save as JSON object
portfolio_data['summary'] = json.dumps(parsed_data)
data_dict = {
'numberOfStocks': parsed_data[0]['portfolioSize'],
'marketValue': parsed_data[0]['marketValue'],
'averageHoldingPeriod': parsed_data[0]['averageHoldingPeriod'],
'turnover': parsed_data[0]['turnover'],
'performancePercentage': parsed_data[0]['performancePercentage'],
'performancePercentage3year': parsed_data[0]['performancePercentage3year'],
'performancePercentage5year': parsed_data[0]['performancePercentage5year'],
'performanceSinceInceptionPercentage': parsed_data[0]['performanceSinceInceptionPercentage']
}
portfolio_data.update(data_dict)
except Exception as e:
print(e)
# Check if columns already exist in the table
self.cursor.execute("PRAGMA table_info(institutes)")
columns = {column[1]: column[2] for column in self.cursor.fetchall()}
holdings_list = json.loads(portfolio_data['holdings'])
symbols_to_check = {holding['symbol'] for holding in holdings_list[:3]} # Extract the first two symbols
symbols_not_in_list = not any(symbol in total_symbols for symbol in symbols_to_check)
#if symbols_not_in_list or 'industry' not in portfolio_data or len(json.loads(portfolio_data['industry'])) == 0:
if symbols_not_in_list:
# If 'industry' is not a list, delete the row and return
#print(f"Deleting row for cik {cik} because 'industry' is not a list.")
self.cursor.execute("DELETE FROM institutes WHERE cik = ?", (cik,)) self.cursor.execute("DELETE FROM institutes WHERE cik = ?", (cik,))
self.conn.commit() self.conn.commit()
return return
# Update column definitions with keys from portfolio_data #Filter information out that is not needed (yet)!
holdings_data = [{"symbol": item["symbol"], "securityName": item["securityName"], 'weight': item['weight'], 'sharesNumber': item['sharesNumber'], 'changeInSharesNumberPercentage': item['changeInSharesNumberPercentage'], 'putCallShare': item['putCallShare'], "marketValue": item["marketValue"], 'avgPricePaid': item['avgPricePaid']} for item in holdings_data]
number_of_stocks = len(holdings_data)
performance_percentages = [item.get("performancePercentage", 0) for item in holdings_data]
positive_performance_count = sum(1 for percentage in performance_percentages if percentage > 0)
win_rate = round(positive_performance_count / len(performance_percentages) * 100, 2) if performance_percentages else 0
portfolio_data.update({
'winRate': win_rate,
'numberOfStocks': number_of_stocks
})
# Process and add summary data
if isinstance(summary_parsed_data, list) and summary_parsed_data:
portfolio_data['summary'] = json.dumps(summary_parsed_data)
data_dict = {
'marketValue': summary_parsed_data[0].get('marketValue', 0),
'averageHoldingPeriod': summary_parsed_data[0].get('averageHoldingPeriod', 0),
'turnover': summary_parsed_data[0].get('turnover', 0),
'performancePercentage': summary_parsed_data[0].get('performancePercentage', 0),
'performancePercentage3year': summary_parsed_data[0].get('performancePercentage3year', 0),
'performancePercentage5year': summary_parsed_data[0].get('performancePercentage5year', 0),
'performanceSinceInceptionPercentage': summary_parsed_data[0].get('performanceSinceInceptionPercentage', 0)
}
portfolio_data.update(data_dict)
self.cursor.execute("PRAGMA table_info(institutes)")
columns = {column[1]: column[2] for column in self.cursor.fetchall()}
column_definitions = { column_definitions = {
key: (self.get_column_type(portfolio_data.get(key, None)), self.remove_null(portfolio_data.get(key, None))) key: (self.get_column_type(portfolio_data.get(key, None)), self.remove_null(portfolio_data.get(key, None)))
for key in portfolio_data for key in portfolio_data
@ -253,7 +222,6 @@ class InstituteDatabase:
print(f"Failed to fetch portfolio data for cik {cik}: {str(e)}") print(f"Failed to fetch portfolio data for cik {cik}: {str(e)}")
async def save_insitute(self, institutes): async def save_insitute(self, institutes):
institute_data = [] institute_data = []
@ -294,7 +262,7 @@ class InstituteDatabase:
tasks.append(self.save_portfolio_data(session, cik)) tasks.append(self.save_portfolio_data(session, cik))
i += 1 i += 1
if i % 1000 == 0: if i % 400 == 0:
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
tasks = [] tasks = []
print('sleeping mode: ', i) print('sleeping mode: ', i)
@ -318,6 +286,6 @@ async def fetch_tickers():
db = InstituteDatabase('backup_db/institute.db') db = InstituteDatabase('backup_db/institute.db')
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
all_tickers = loop.run_until_complete(fetch_tickers()) all_tickers = loop.run_until_complete(fetch_tickers())
#all_tickers = [{'cik': '0000102909', 'name': "GARDA CAPITAL PARTNERS LP"}] #all_tickers = [{'cik': '0001364742', 'name': "GARDA CAPITAL PARTNERS LP"}]
loop.run_until_complete(db.save_insitute(all_tickers)) loop.run_until_complete(db.save_insitute(all_tickers))
db.close_connection() db.close_connection()