update clinical trial
This commit is contained in:
parent
1b3abf060b
commit
6903e9acd3
@ -12,33 +12,47 @@ import pandas as pd
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Function to extract the string after the first 'DRUG:'
|
||||||
|
def extract_drug(string):
|
||||||
|
parts = string.split('|')
|
||||||
|
for part in parts:
|
||||||
|
if part.startswith('DRUG:'):
|
||||||
|
return part.split(': ', 1)[1].strip() # Extract the part after 'DRUG:'
|
||||||
|
return '' # Return empty string if no 'DRUG:' found
|
||||||
|
|
||||||
|
|
||||||
ct = ClinicalTrials()
|
ct = ClinicalTrials()
|
||||||
|
|
||||||
async def get_data(company_name):
|
async def get_data(company_name):
|
||||||
try:
|
try:
|
||||||
get_ct_data = ct.get_study_fields(
|
|
||||||
|
company_name = company_name.replace('&','and')
|
||||||
|
get_ct_data = ct.get_study_fields(
|
||||||
search_expr=f"{company_name}",
|
search_expr=f"{company_name}",
|
||||||
fields=["Study Results","Funder Type","Start Date", "Completion Date","Study Status","Study Title", 'Phases', 'Brief Summary', 'Age','Sex', 'Enrollment','Study Type','Sponsor','Study URL','NCT Number'],
|
fields=["Study Results","Interventions","Funder Type","Start Date", "Completion Date","Study Status","Study Title", 'Phases', 'Brief Summary', 'Age','Sex', 'Enrollment','Study Type','Sponsor','Study URL','NCT Number'],
|
||||||
max_studies=1000,
|
max_studies=1000,
|
||||||
)
|
)
|
||||||
df = pd.DataFrame.from_records(get_ct_data[1:], columns=get_ct_data[0])
|
df = pd.DataFrame.from_records(get_ct_data[1:], columns=get_ct_data[0])
|
||||||
df['Completion Date'] = pd.to_datetime(df['Completion Date'],errors='coerce')
|
df['Start Date'] = pd.to_datetime(df['Start Date'],errors='coerce')
|
||||||
df_sorted = df.sort_values(by='Completion Date', ascending=False)
|
df_sorted = df.sort_values(by='Start Date', ascending=False)
|
||||||
# Convert 'Completion Date' back to string format
|
|
||||||
df_sorted['Completion Date'] = df_sorted['Completion Date'].apply(lambda x: x.strftime('%Y-%m-%d') if pd.notnull(x) else None)
|
|
||||||
df_sorted['Phases'] = df_sorted['Phases'].replace('PHASE2|PHASE3', 'Phase 2/3')
|
|
||||||
df_sorted['Phases'] = df_sorted['Phases'].replace('PHASE1|PHASE2', 'Phase 1/2')
|
|
||||||
df_sorted['Phases'] = df_sorted['Phases'].replace('EARLY_PHASE1', 'Phase 1')
|
|
||||||
|
|
||||||
df_sorted['Study Status'] = df_sorted['Study Status'].replace('ACTIVE_NOT_RECRUITING', 'Active')
|
df_sorted['Start Date'] = df_sorted['Start Date'].apply(lambda x: x.strftime('%Y-%m-%d') if pd.notnull(x) else None)
|
||||||
df_sorted['Study Status'] = df_sorted['Study Status'].replace('NOT_YET_RECRUITING', 'Active')
|
df_sorted['Phases'] = df_sorted['Phases'].replace('PHASE2|PHASE3', 'Phase 2/3')
|
||||||
df_sorted['Study Status'] = df_sorted['Study Status'].replace('UNKNOWN', '-')
|
df_sorted['Phases'] = df_sorted['Phases'].replace('PHASE1|PHASE2', 'Phase 1/2')
|
||||||
|
df_sorted['Phases'] = df_sorted['Phases'].replace('EARLY_PHASE1', 'Phase 1')
|
||||||
|
|
||||||
data = df_sorted.to_dict('records')
|
df_sorted['Study Status'] = df_sorted['Study Status'].replace('ACTIVE_NOT_RECRUITING', 'Active')
|
||||||
return data
|
df_sorted['Study Status'] = df_sorted['Study Status'].replace('NOT_YET_RECRUITING', 'Active')
|
||||||
except Exception as e:
|
df_sorted['Study Status'] = df_sorted['Study Status'].replace('UNKNOWN', '-')
|
||||||
print(f"Error fetching data for {ticker}: {e}")
|
|
||||||
return []
|
df_sorted['Interventions'] = df_sorted['Interventions'].apply(extract_drug)
|
||||||
|
data = df_sorted.to_dict('records')
|
||||||
|
return data
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching data for {company_name}: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
async def save_json(symbol, data):
|
async def save_json(symbol, data):
|
||||||
# Use async file writing to avoid blocking the event loop
|
# Use async file writing to avoid blocking the event loop
|
||||||
@ -56,12 +70,12 @@ async def run():
|
|||||||
|
|
||||||
cursor = con.cursor()
|
cursor = con.cursor()
|
||||||
cursor.execute("PRAGMA journal_mode = wal")
|
cursor.execute("PRAGMA journal_mode = wal")
|
||||||
cursor.execute("SELECT DISTINCT symbol, name FROM stocks WHERE industry = 'Biotechnology' AND symbol NOT LIKE '%.%'")
|
cursor.execute("SELECT DISTINCT symbol, name FROM stocks WHERE (industry = 'Biotechnology' OR industry LIKE '%Drug%') AND symbol NOT LIKE '%.%'")
|
||||||
company_data = [{'symbol': row[0], 'name': row[1]} for row in cursor.fetchall()]
|
company_data = [{'symbol': row[0], 'name': row[1]} for row in cursor.fetchall()]
|
||||||
con.close()
|
con.close()
|
||||||
#test mode
|
#test mode
|
||||||
#company_data = [{'symbol': 'DSGN', 'name': 'Design Therapeutics, Inc.'}]
|
#company_data = [{'symbol': 'MRK', 'name': 'Merck & Co. Inc.'}]
|
||||||
|
print(len(company_data))
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
tasks = []
|
tasks = []
|
||||||
for item in company_data:
|
for item in company_data:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user