update analyst insight

This commit is contained in:
MuslemRahimi 2024-10-07 17:56:17 +02:00
parent 0513fced3d
commit 1127900d04
3 changed files with 63 additions and 69 deletions

View File

@ -349,7 +349,7 @@ async def fine_tune_and_evaluate(ticker, con, start_date, end_date, test_size, s
pass pass
async def run(): async def run():
train_mode = True # Set this to False for fine-tuning and evaluation train_mode = False # Set this to False for fine-tuning and evaluation
skip_downloading = False skip_downloading = False
con = sqlite3.connect('stocks.db') con = sqlite3.connect('stocks.db')
cursor = con.cursor() cursor = con.cursor()

View File

@ -1,20 +1,19 @@
from openai import OpenAI import os
import time
import ujson import ujson
import sqlite3 import sqlite3
import requests import aiohttp
import os import asyncio
from dotenv import load_dotenv from dotenv import load_dotenv
from tqdm import tqdm from tqdm import tqdm
from datetime import datetime from datetime import datetime
from openai import OpenAI
import aiofiles
# Load environment variables # Load environment variables
load_dotenv() load_dotenv()
# Initialize OpenAI client # Initialize OpenAI client
benzinga_api_key = os.getenv('BENZINGA_API_KEY') benzinga_api_key = os.getenv('BENZINGA_API_KEY')
openai_api_key = os.getenv('OPENAI_API_KEY') openai_api_key = os.getenv('OPENAI_API_KEY')
org_id = os.getenv('OPENAI_ORG') org_id = os.getenv('OPENAI_ORG')
client = OpenAI( client = OpenAI(
@ -22,24 +21,23 @@ client = OpenAI(
organization=org_id, organization=org_id,
) )
headers = {"accept": "application/json"} headers = {"accept": "application/json"}
url = "https://api.benzinga.com/api/v1/analyst/insights" url = "https://api.benzinga.com/api/v1/analyst/insights"
# Save JSON asynchronously
async def save_json(symbol, data):
async with aiofiles.open(f"json/analyst/insight/{symbol}.json", 'w') as file:
await file.write(ujson.dumps(data))
def save_json(symbol, data): # Fetch analyst insights for a specific ticker
with open(f"json/analyst/insight/{symbol}.json", 'w') as file: async def get_analyst_insight(session, ticker):
ujson.dump(data, file)
def get_analyst_insight(ticker):
res_dict = {} res_dict = {}
try: try:
querystring = {"token": benzinga_api_key, "symbols": ticker} querystring = {"token": benzinga_api_key, "symbols": ticker}
response = requests.request("GET", url, params=querystring) async with session.get(url, params=querystring) as response:
output = ujson.loads(response.text)['analyst-insights'][0] #get the latest insight only output = await response.json()
# Extracting required fields if 'analyst-insights' in output and output['analyst-insights']:
output = output['analyst-insights'][0]
res_dict = { res_dict = {
'insight': output['analyst_insights'], 'insight': output['analyst_insights'],
'id': output['id'], 'id': output['id'],
@ -47,20 +45,12 @@ def get_analyst_insight(ticker):
} }
except: except:
pass pass
return res_dict return res_dict
# Summarize insights using OpenAI
# Function to summarize the text using GPT-3.5-turbo async def get_summary(data):
def get_summary(data): data_string = f"Insights: {data['insight']}"
# Define the data to be summarized response = await client.chat.completions.create(
# Format the data as a string
data_string = (
f"Insights: {data['insight']}"
)
response = client.chat.completions.create(
model="gpt-4o-mini", model="gpt-4o-mini",
messages=[ messages=[
{"role": "system", "content": "Summarize analyst insights clearly and concisely in under 400 characters. Ensure the summary is professional and easy to understand. Conclude with whether the report is bullish or bearish."}, {"role": "system", "content": "Summarize analyst insights clearly and concisely in under 400 characters. Ensure the summary is professional and easy to understand. Conclude with whether the report is bullish or bearish."},
@ -69,44 +59,48 @@ def get_summary(data):
max_tokens=150, max_tokens=150,
temperature=0.7 temperature=0.7
) )
summary = response.choices[0].message.content summary = response.choices[0].message.content
data = { data['insight'] = summary
'insight': summary,
'id': data['id'],
'date': data['date']
}
return data return data
# Process individual symbol
async def process_symbol(session, symbol):
try: try:
stock_con = sqlite3.connect('stocks.db') data = await get_analyst_insight(session, symbol)
stock_cursor = stock_con.cursor() if data:
stock_cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
stock_symbols = [row[0] for row in stock_cursor.fetchall()]
stock_con.close()
for symbol in tqdm(stock_symbols):
try:
data = get_analyst_insight(symbol)
new_report_id = data.get('id', '') new_report_id = data.get('id', '')
try: try:
with open(f"json/analyst/insight/{symbol}.json", 'r') as file: async with aiofiles.open(f"json/analyst/insight/{symbol}.json", 'r') as file:
old_report_id = ujson.load(file).get('id', '') old_report_id = ujson.loads(await file.read()).get('id', '')
except: except:
old_report_id = '' old_report_id = ''
#check first if new report id exist already to save money before sending it to closedai company if new_report_id != old_report_id and data['insight']:
if new_report_id != old_report_id and len(data['insight']) > 0: res = await get_summary(data)
res = get_summary(data) await save_json(symbol, res)
save_json(symbol, res)
else: else:
print('skipped') print(f'Skipped: {symbol}')
except: except:
pass pass
# Function to split list into batches
def chunk_list(lst, n):
for i in range(0, len(lst), n):
yield lst[i:i + n]
except Exception as e: # Main function with batch processing
print(e) async def main():
# Fetch stock symbols from SQLite database
con = sqlite3.connect('stocks.db')
cursor = con.cursor()
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
stock_symbols = [row[0] for row in cursor.fetchall()]
con.close()
async with aiohttp.ClientSession(headers=headers) as session:
# Process in batches of 100
for batch in chunk_list(stock_symbols, 100):
print(f"Processing batch of {len(batch)} tickers")
await asyncio.gather(*[process_symbol(session, symbol) for symbol in tqdm(batch)])
if __name__ == "__main__":
asyncio.run(main())