From 6903e9acd3c3dcd2e93e62895f06d124734e90ca Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Wed, 26 Jun 2024 20:57:14 +0200 Subject: [PATCH] update clinical trial --- app/cron_clinical_trial.py | 58 +++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/app/cron_clinical_trial.py b/app/cron_clinical_trial.py index 7a03743..95b110b 100644 --- a/app/cron_clinical_trial.py +++ b/app/cron_clinical_trial.py @@ -12,33 +12,47 @@ import pandas as pd import time + +# Function to extract the string after the first 'DRUG:' +def extract_drug(string): + parts = string.split('|') + for part in parts: + if part.startswith('DRUG:'): + return part.split(': ', 1)[1].strip() # Extract the part after 'DRUG:' + return '' # Return empty string if no 'DRUG:' found + + ct = ClinicalTrials() async def get_data(company_name): - try: - get_ct_data = ct.get_study_fields( + try: + + company_name = company_name.replace('&','and') + get_ct_data = ct.get_study_fields( search_expr=f"{company_name}", - fields=["Study Results","Funder Type","Start Date", "Completion Date","Study Status","Study Title", 'Phases', 'Brief Summary', 'Age','Sex', 'Enrollment','Study Type','Sponsor','Study URL','NCT Number'], + fields=["Study Results","Interventions","Funder Type","Start Date", "Completion Date","Study Status","Study Title", 'Phases', 'Brief Summary', 'Age','Sex', 'Enrollment','Study Type','Sponsor','Study URL','NCT Number'], max_studies=1000, ) - df = pd.DataFrame.from_records(get_ct_data[1:], columns=get_ct_data[0]) - df['Completion Date'] = pd.to_datetime(df['Completion Date'],errors='coerce') - df_sorted = df.sort_values(by='Completion Date', ascending=False) - # Convert 'Completion Date' back to string format - df_sorted['Completion Date'] = df_sorted['Completion Date'].apply(lambda x: x.strftime('%Y-%m-%d') if pd.notnull(x) else None) - df_sorted['Phases'] = df_sorted['Phases'].replace('PHASE2|PHASE3', 'Phase 2/3') - df_sorted['Phases'] = df_sorted['Phases'].replace('PHASE1|PHASE2', 'Phase 1/2') - df_sorted['Phases'] = df_sorted['Phases'].replace('EARLY_PHASE1', 'Phase 1') + df = pd.DataFrame.from_records(get_ct_data[1:], columns=get_ct_data[0]) + df['Start Date'] = pd.to_datetime(df['Start Date'],errors='coerce') + df_sorted = df.sort_values(by='Start Date', ascending=False) - df_sorted['Study Status'] = df_sorted['Study Status'].replace('ACTIVE_NOT_RECRUITING', 'Active') - df_sorted['Study Status'] = df_sorted['Study Status'].replace('NOT_YET_RECRUITING', 'Active') - df_sorted['Study Status'] = df_sorted['Study Status'].replace('UNKNOWN', '-') + df_sorted['Start Date'] = df_sorted['Start Date'].apply(lambda x: x.strftime('%Y-%m-%d') if pd.notnull(x) else None) + df_sorted['Phases'] = df_sorted['Phases'].replace('PHASE2|PHASE3', 'Phase 2/3') + df_sorted['Phases'] = df_sorted['Phases'].replace('PHASE1|PHASE2', 'Phase 1/2') + df_sorted['Phases'] = df_sorted['Phases'].replace('EARLY_PHASE1', 'Phase 1') - data = df_sorted.to_dict('records') - return data - except Exception as e: - print(f"Error fetching data for {ticker}: {e}") - return [] + df_sorted['Study Status'] = df_sorted['Study Status'].replace('ACTIVE_NOT_RECRUITING', 'Active') + df_sorted['Study Status'] = df_sorted['Study Status'].replace('NOT_YET_RECRUITING', 'Active') + df_sorted['Study Status'] = df_sorted['Study Status'].replace('UNKNOWN', '-') + + df_sorted['Interventions'] = df_sorted['Interventions'].apply(extract_drug) + data = df_sorted.to_dict('records') + return data + + except Exception as e: + print(f"Error fetching data for {company_name}: {e}") + return [] async def save_json(symbol, data): # Use async file writing to avoid blocking the event loop @@ -56,12 +70,12 @@ async def run(): cursor = con.cursor() cursor.execute("PRAGMA journal_mode = wal") - cursor.execute("SELECT DISTINCT symbol, name FROM stocks WHERE industry = 'Biotechnology' AND symbol NOT LIKE '%.%'") + cursor.execute("SELECT DISTINCT symbol, name FROM stocks WHERE (industry = 'Biotechnology' OR industry LIKE '%Drug%') AND symbol NOT LIKE '%.%'") company_data = [{'symbol': row[0], 'name': row[1]} for row in cursor.fetchall()] con.close() #test mode - #company_data = [{'symbol': 'DSGN', 'name': 'Design Therapeutics, Inc.'}] - + #company_data = [{'symbol': 'MRK', 'name': 'Merck & Co. Inc.'}] + print(len(company_data)) async with aiohttp.ClientSession() as session: tasks = [] for item in company_data: