From 9a15f2002f4cae111926da78c46b4274766ba44f Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Mon, 24 Mar 2025 13:18:52 +0100 Subject: [PATCH] update fine tune code --- app/fine_tune_preprocessor.py | 57 +++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 app/fine_tune_preprocessor.py diff --git a/app/fine_tune_preprocessor.py b/app/fine_tune_preprocessor.py new file mode 100644 index 0000000..4c369f8 --- /dev/null +++ b/app/fine_tune_preprocessor.py @@ -0,0 +1,57 @@ +from pocketbase import PocketBase # Client also works the same +import asyncio +import aiohttp +import ujson +import os +from dotenv import load_dotenv + +load_dotenv() + +async def save_json(name, data): + path = f"json/fine-tune-model/{name}.jsonl" + directory = os.path.dirname(path) + + # Ensure the directory exists + os.makedirs(directory, exist_ok=True) + + # Write each data entry as a separate line in the JSONL file + with open(path, 'w') as file: + for entry in data: + ujson.dump(entry, file) + file.write('\n') # Write each JSON object as a new line + + +pb_admin_email = os.getenv('POCKETBASE_ADMIN_EMAIL') +pb_password = os.getenv('POCKETBASE_PASSWORD') + +pb = PocketBase('http://127.0.0.1:8090') +admin_data = pb.collection('_superusers').auth_with_password(pb_admin_email, pb_password) + +async def run(): + result = pb.collection('articles').get_full_list(query_params={"filter": f"category='term'"}) + + jsonl_data = [] # Initialize an empty list to hold the data + + for item in result: + # Assuming item has 'prompt' and 'description' attributes + prompt = item.prompt + description = item.description + + # Create the structured data for each entry + jsonl_dict = { + "messages": [ + {"role": "user", "content": prompt}, + {"role": "assistant", "content": description} + ] + } + + # Append the entry to the list + jsonl_data.append(jsonl_dict) + + if jsonl_data: + await save_json('file-financial-term', jsonl_data) + +try: + asyncio.run(run()) +except Exception as e: + print(e)