update analyst insight
This commit is contained in:
parent
0513fced3d
commit
1127900d04
@ -349,7 +349,7 @@ async def fine_tune_and_evaluate(ticker, con, start_date, end_date, test_size, s
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
async def run():
|
async def run():
|
||||||
train_mode = True # Set this to False for fine-tuning and evaluation
|
train_mode = False # Set this to False for fine-tuning and evaluation
|
||||||
skip_downloading = False
|
skip_downloading = False
|
||||||
con = sqlite3.connect('stocks.db')
|
con = sqlite3.connect('stocks.db')
|
||||||
cursor = con.cursor()
|
cursor = con.cursor()
|
||||||
|
|||||||
@ -1,20 +1,19 @@
|
|||||||
from openai import OpenAI
|
import os
|
||||||
import time
|
|
||||||
import ujson
|
import ujson
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import requests
|
import aiohttp
|
||||||
import os
|
import asyncio
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from openai import OpenAI
|
||||||
|
import aiofiles
|
||||||
|
|
||||||
# Load environment variables
|
# Load environment variables
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Initialize OpenAI client
|
# Initialize OpenAI client
|
||||||
|
|
||||||
benzinga_api_key = os.getenv('BENZINGA_API_KEY')
|
benzinga_api_key = os.getenv('BENZINGA_API_KEY')
|
||||||
|
|
||||||
openai_api_key = os.getenv('OPENAI_API_KEY')
|
openai_api_key = os.getenv('OPENAI_API_KEY')
|
||||||
org_id = os.getenv('OPENAI_ORG')
|
org_id = os.getenv('OPENAI_ORG')
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
@ -22,24 +21,23 @@ client = OpenAI(
|
|||||||
organization=org_id,
|
organization=org_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
headers = {"accept": "application/json"}
|
headers = {"accept": "application/json"}
|
||||||
url = "https://api.benzinga.com/api/v1/analyst/insights"
|
url = "https://api.benzinga.com/api/v1/analyst/insights"
|
||||||
|
|
||||||
|
# Save JSON asynchronously
|
||||||
|
async def save_json(symbol, data):
|
||||||
|
async with aiofiles.open(f"json/analyst/insight/{symbol}.json", 'w') as file:
|
||||||
|
await file.write(ujson.dumps(data))
|
||||||
|
|
||||||
def save_json(symbol, data):
|
# Fetch analyst insights for a specific ticker
|
||||||
with open(f"json/analyst/insight/{symbol}.json", 'w') as file:
|
async def get_analyst_insight(session, ticker):
|
||||||
ujson.dump(data, file)
|
|
||||||
|
|
||||||
def get_analyst_insight(ticker):
|
|
||||||
|
|
||||||
res_dict = {}
|
res_dict = {}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
querystring = {"token": benzinga_api_key, "symbols": ticker}
|
querystring = {"token": benzinga_api_key, "symbols": ticker}
|
||||||
response = requests.request("GET", url, params=querystring)
|
async with session.get(url, params=querystring) as response:
|
||||||
output = ujson.loads(response.text)['analyst-insights'][0] #get the latest insight only
|
output = await response.json()
|
||||||
# Extracting required fields
|
if 'analyst-insights' in output and output['analyst-insights']:
|
||||||
|
output = output['analyst-insights'][0]
|
||||||
res_dict = {
|
res_dict = {
|
||||||
'insight': output['analyst_insights'],
|
'insight': output['analyst_insights'],
|
||||||
'id': output['id'],
|
'id': output['id'],
|
||||||
@ -47,20 +45,12 @@ def get_analyst_insight(ticker):
|
|||||||
}
|
}
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return res_dict
|
return res_dict
|
||||||
|
|
||||||
|
# Summarize insights using OpenAI
|
||||||
# Function to summarize the text using GPT-3.5-turbo
|
async def get_summary(data):
|
||||||
def get_summary(data):
|
data_string = f"Insights: {data['insight']}"
|
||||||
# Define the data to be summarized
|
response = await client.chat.completions.create(
|
||||||
|
|
||||||
# Format the data as a string
|
|
||||||
data_string = (
|
|
||||||
f"Insights: {data['insight']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model="gpt-4o-mini",
|
model="gpt-4o-mini",
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": "Summarize analyst insights clearly and concisely in under 400 characters. Ensure the summary is professional and easy to understand. Conclude with whether the report is bullish or bearish."},
|
{"role": "system", "content": "Summarize analyst insights clearly and concisely in under 400 characters. Ensure the summary is professional and easy to understand. Conclude with whether the report is bullish or bearish."},
|
||||||
@ -69,44 +59,48 @@ def get_summary(data):
|
|||||||
max_tokens=150,
|
max_tokens=150,
|
||||||
temperature=0.7
|
temperature=0.7
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
summary = response.choices[0].message.content
|
summary = response.choices[0].message.content
|
||||||
data = {
|
data['insight'] = summary
|
||||||
'insight': summary,
|
|
||||||
'id': data['id'],
|
|
||||||
'date': data['date']
|
|
||||||
}
|
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
# Process individual symbol
|
||||||
|
async def process_symbol(session, symbol):
|
||||||
try:
|
try:
|
||||||
stock_con = sqlite3.connect('stocks.db')
|
data = await get_analyst_insight(session, symbol)
|
||||||
stock_cursor = stock_con.cursor()
|
if data:
|
||||||
stock_cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
|
|
||||||
stock_symbols = [row[0] for row in stock_cursor.fetchall()]
|
|
||||||
|
|
||||||
stock_con.close()
|
|
||||||
|
|
||||||
for symbol in tqdm(stock_symbols):
|
|
||||||
try:
|
|
||||||
data = get_analyst_insight(symbol)
|
|
||||||
new_report_id = data.get('id', '')
|
new_report_id = data.get('id', '')
|
||||||
try:
|
try:
|
||||||
with open(f"json/analyst/insight/{symbol}.json", 'r') as file:
|
async with aiofiles.open(f"json/analyst/insight/{symbol}.json", 'r') as file:
|
||||||
old_report_id = ujson.load(file).get('id', '')
|
old_report_id = ujson.loads(await file.read()).get('id', '')
|
||||||
except:
|
except:
|
||||||
old_report_id = ''
|
old_report_id = ''
|
||||||
#check first if new report id exist already to save money before sending it to closedai company
|
if new_report_id != old_report_id and data['insight']:
|
||||||
if new_report_id != old_report_id and len(data['insight']) > 0:
|
res = await get_summary(data)
|
||||||
res = get_summary(data)
|
await save_json(symbol, res)
|
||||||
save_json(symbol, res)
|
|
||||||
else:
|
else:
|
||||||
print('skipped')
|
print(f'Skipped: {symbol}')
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Function to split list into batches
|
||||||
|
def chunk_list(lst, n):
|
||||||
|
for i in range(0, len(lst), n):
|
||||||
|
yield lst[i:i + n]
|
||||||
|
|
||||||
except Exception as e:
|
# Main function with batch processing
|
||||||
print(e)
|
async def main():
|
||||||
|
# Fetch stock symbols from SQLite database
|
||||||
|
con = sqlite3.connect('stocks.db')
|
||||||
|
cursor = con.cursor()
|
||||||
|
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
|
||||||
|
stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession(headers=headers) as session:
|
||||||
|
# Process in batches of 100
|
||||||
|
for batch in chunk_list(stock_symbols, 100):
|
||||||
|
print(f"Processing batch of {len(batch)} tickers")
|
||||||
|
await asyncio.gather(*[process_symbol(session, symbol) for symbol in tqdm(batch)])
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
|
|||||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user