refactor code

This commit is contained in:
MuslemRahimi 2024-11-18 13:26:10 +01:00
parent a93edd72fa
commit 418802df37
3 changed files with 59 additions and 176 deletions

View File

@ -1,4 +1,4 @@
import ujson
import orjson
import asyncio
import aiohttp
import aiofiles
@ -11,13 +11,19 @@ from tqdm import tqdm
from dotenv import load_dotenv
import os
with open(f"json/stock-screener/data.json", 'rb') as file:
stock_screener_data = orjson.loads(file.read())
stock_screener_data_dict = {item['symbol']: item for item in stock_screener_data}
load_dotenv()
api_key = os.getenv('FMP_API_KEY')
async def save_json_data(symbol, data):
async with aiofiles.open(f"json/congress-trading/company/{symbol}.json", 'w') as file:
await file.write(ujson.dumps(data))
await file.write(orjson.dumps(data).decode("utf-8"))
async def get_congress_data(symbols, session):
tasks = []
@ -234,35 +240,49 @@ def create_politician_db(data, stock_symbols, stock_raw_data, etf_symbols, etf_r
grouped_data_list = list(grouped_data.values())
for item in tqdm(grouped_data_list):
# Sort items by 'transactionDate'
item = sorted(item, key=lambda x: x['transactionDate'], reverse=True)
try:
# Sort items by 'transactionDate'
item = sorted(item, key=lambda x: x['transactionDate'], reverse=True)
# Calculate top sectors
sector_counts = Counter()
for holding in item:
symbol = holding['ticker']
sector = next((entry['sector'] for entry in stock_raw_data if entry['symbol'] == symbol), None)
if sector:
sector_counts[sector] += 1
# Calculate top sectors
sector_list = []
industry_list = []
# Calculate the total number of holdings
total_holdings = sum(sector_counts.values())
for holding in item:
symbol = holding['symbol']
ticker_data = stock_screener_data_dict.get(symbol, {})
# Calculate the percentage for each sector and get the top 5
top_5_sectors_percentage = [
{sector: round((count / total_holdings) * 100, 2)}
for sector, count in sector_counts.most_common(5)
]
# Extract specified columns data for each ticker
sector = ticker_data.get('sector',None)
industry = ticker_data.get('industry',None)
# Prepare the data to save in the file
result = {
'topSectors': top_5_sectors_percentage,
'history': item
}
# Append data to relevant lists if values are present
if sector:
sector_list.append(sector)
if industry:
industry_list.append(industry)
# Save to JSON file
with open(f"json/congress-trading/politician-db/{item[0]['id']}.json", 'w') as file:
ujson.dump(result, file)
# Get the top 3 most common sectors and industries
sector_counts = Counter(sector_list)
industry_counts = Counter(industry_list)
main_sectors = [item2[0] for item2 in sector_counts.most_common(3)]
main_industries = [item2[0] for item2 in industry_counts.most_common(3)]
# Prepare the data to save in the file
result = {
'mainSectors': main_sectors,
'mainIndustries': main_industries,
'history': item
}
# Save to JSON file
if result:
with open(f"json/congress-trading/politician-db/{item[0]['id']}.json", 'w') as file:
file.write(orjson.dumps(result).decode("utf-8"))
print(result)
except Exception as e:
print(e)
def create_search_list():
@ -277,7 +297,7 @@ def create_search_list():
file_path = os.path.join(folder_path, filename)
# Open and read the JSON file
with open(file_path, 'r') as file:
data = ujson.load(file)
data = orjson.loads(file)
# Access the history, which is a list of transactions
history = data.get('history', [])
@ -305,7 +325,7 @@ def create_search_list():
# Write the search list to a JSON file
with open('json/congress-trading/search_list.json', 'w') as file:
ujson.dump(search_politician_list, file)
file.write(orjson.dumps(search_politician_list).decode("utf-8"))
async def run():
try:
@ -364,11 +384,15 @@ async def run():
connector = aiohttp.TCPConnector(limit=100) # Adjust the limit as needed
async with aiohttp.ClientSession(connector=connector) as session:
for i in tqdm(range(0, len(total_symbols), chunk_size)):
symbols_chunk = total_symbols[i:i + chunk_size]
data = await get_congress_data(symbols_chunk,session)
politician_list +=data
print('sleeping for 30 sec')
await asyncio.sleep(30) # Wait for 60 seconds between chunks
try:
symbols_chunk = total_symbols[i:i + chunk_size]
data = await get_congress_data(symbols_chunk,session)
politician_list +=data
print('sleeping for 30 sec')
await asyncio.sleep(30) # Wait for 60 seconds between chunks
except Exception as e:
print(e)
pass
create_politician_db(politician_list, stock_symbols, stock_raw_data, etf_symbols, etf_raw_data, crypto_symbols, crypto_raw_data)

View File

@ -74,8 +74,8 @@ def all_hedge_funds(con):
'marketValue': row[3],
'winRate': row[4],
'turnover': row[5],
'performancePercentage3year': row[6]
} for row in all_ciks if row[2] >= 3 and abs(row[6]) < 500]
'performancePercentage3Year': row[6]
} for row in all_ciks if row[2] >= 3 and row[4] >= 10 and row[6] >= 10 and abs(row[6]) < 500]
sorted_res_list = sorted(res_list, key=lambda x: x['marketValue'], reverse=True)

View File

@ -1522,147 +1522,6 @@ async def get_congress_rss_feed(symbols, etf_symbols, crypto_symbols):
return data
async def get_analysts_rss_feed(con, symbols, etf_symbols):
urls = [
f"https://financialmodelingprep.com/api/v4/price-target-rss-feed?page=0&apikey={api_key}",
f"https://financialmodelingprep.com/api/v4/upgrades-downgrades-rss-feed?page=0&apikey={api_key}",
]
query_template = """
SELECT
name, quote
FROM
stocks
WHERE
symbol = ?
"""
async with aiohttp.ClientSession() as session:
tasks = [session.get(url) for url in urls]
responses = await asyncio.gather(*tasks)
data = [await response.json() for response in responses]
price_targets_list = [
{
"symbol": entry["symbol"],
"publishedDate": entry["publishedDate"],
"analystName": entry["analystName"],
"adjPriceTarget": entry["adjPriceTarget"],
"priceWhenPosted": entry["priceWhenPosted"],
"analystCompany": entry["analystCompany"],
}
for entry in data[0]
]
#Add ticker name
for entry in price_targets_list:
try:
symbol = entry['symbol']
df = pd.read_sql_query(query_template, con, params=(symbol,))
entry['name'] = df['name'].iloc[0]
except:
entry['name'] = 'n/a'
#Add ticker assetType
for item in price_targets_list:
symbol = item.get("symbol")
symbol = symbol.replace('BRK.A','BRK-A')
symbol = symbol.replace('BRK.B','BRK-B')
item['symbol'] = symbol
if symbol in symbols:
item["assetType"] = "Stock"
elif symbol in etf_symbols:
item["assetType"] = "ETF"
else:
item['assetType'] = ''
#Remove elements who have assetType = '' or priceWhenPosted = 0
#price_targets_list = [item for item in price_targets_list if item.get("assetType") != ""]
price_targets_list = [item for item in price_targets_list if item.get("assetType") != ""]
price_targets_list = [item for item in price_targets_list if item.get("priceWhenPosted") != 0]
upgrades_downgrades_list = [
{
"symbol": entry["symbol"],
"publishedDate": entry["publishedDate"],
"newGrade": entry["newGrade"],
"previousGrade": entry["previousGrade"],
"priceWhenPosted": entry["priceWhenPosted"],
"gradingCompany": entry["gradingCompany"],
"action": entry["action"],
}
for entry in data[1]
]
#Add ticker name
new_upgrades_downgrades_list = []
for entry in upgrades_downgrades_list:
try:
symbol = entry['symbol']
df = pd.read_sql_query(query_template, con, params=(symbol,))
entry['name'] = df['name'].iloc[0]
entry['currentPrice'] = (ujson.loads(df['quote'].iloc[0])[0]).get('price')
new_upgrades_downgrades_list.append(entry)
except:
#Remove all elements that don't have a name and currentPrice in the db for better UX with new_upgrades_downgrades_list
pass
#Add ticker assetType
for item in new_upgrades_downgrades_list:
symbol = item.get("symbol")
symbol = symbol.replace('BRK.A','BRK-A')
symbol = symbol.replace('BRK.B','BRK-B')
item['symbol'] = symbol
if symbol in symbols:
item["assetType"] = "Stock"
elif symbol in etf_symbols:
item["assetType"] = "ETF"
else:
item['assetType'] = ''
#Remove elements who have assetType = ''
new_upgrades_downgrades_list = [item for item in new_upgrades_downgrades_list if item.get("assetType") != ""]
new_upgrades_downgrades_list = [item for item in new_upgrades_downgrades_list if item.get("priceWhenPosted") != 0]
return price_targets_list, new_upgrades_downgrades_list
async def ticker_mentioning(con):
results = pb.collection("posts").get_full_list()
symbol_list = []
query_template = """
SELECT
name, marketCap
FROM
stocks
WHERE
symbol = ?
"""
for x in results:
if len(x.tagline) != 0:
symbol_list.append(x.tagline)
symbol_counts = Counter(symbol_list)
symbol_counts_list = [{'symbol': symbol, 'count': count} for symbol, count in symbol_counts.items()]
sorted_symbol_list = sorted(symbol_counts_list, key=lambda x: x['count'], reverse=True)
for entry in sorted_symbol_list:
try:
symbol = entry['symbol']
data = pd.read_sql_query(query_template, con, params=(symbol,))
entry['name'] = data['name'].iloc[0]
entry['marketCap'] = int(data['marketCap'].iloc[0])
except:
entry['name'] = 'n/a'
entry['marketCap'] = None
return sorted_symbol_list
async def get_all_etf_tickers(etf_con):
cursor = etf_con.cursor()