### Проверка работы с модулем 25 через API #### Обучение модели и генерация данных ##### Файловый API ```python import requests import json from requests.auth import HTTPBasicAuth import time import datetime ``` ```python module_name = "pu-mgalynchik-pa-mm25-synthdata" username = "" password = "" REQUESTS_TIMEOUT = 20 basic_auth = HTTPBasicAuth(username, password) headers = { "Content-Type": "application/json" } service_url = f"https://platform-dev-cs-hse.objectoriented.ru/{module_name}" ``` ```python # Путь к файлу с входными данными, как локально, так и в файловом хранилище фреймворка filename = "data/telecom_data.csv" ``` ```python response = requests.put(f"{service_url}/files/synthdata-box/{filename}", auth=basic_auth) if response.status_code in [400, ]: response = requests.get(f"{service_url}/files/synthdata-box/{filename}", auth=basic_auth) display(response.json()) ``` {'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/data/telecom_data.csv', 'presigned_put_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/data/telecom_data.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185235Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=e816ff92cb18b85f8f65e6c6536f9757271e12f28afb2c1dfc12616b462ce640'} ```python if response.status_code in [200, 201]: result_urls = json.loads(response.content) with open(filename, "rb") as f: response = requests.put(result_urls["presigned_put_url"], data=f.read()) print(response.status_code) ``` 200 ##### Запуск пайплайна генерации данных ```python pipeline_name = "train-generate" full_url = f"https://platform-dev-cs-hse.objectoriented.ru/{module_name}/pipelines/{pipeline_name}/trials" ``` ```python synthdata_request = { "inputs": [ { "name": "input_data", "datatype": "FILE", "content_type": "text/csv", "shape": [7043, 20], "data": "data/" } ], "output_vars": [ { "name": "report_file", "data": "results/syntesize_data/report/" }, { "name": "output_data", "data": "results/syntesize_data/output/" }, { "name": "model", "data": "results/syntesize_data/model/" }, ] } ``` ```python response = requests.post(full_url, headers=headers, auth=basic_auth, json=synthdata_request) tracking_url = json.loads(response.content)['_links']['self']['href'] if response.status_code in [200, ]: tracking_info = requests.get(tracking_url, auth=basic_auth) last_status = json.loads(tracking_info.content)['status']['conditions'][-1] status_history = [last_status] while last_status['type'] != 'Completed': if last_status not in status_history: status_history.append(last_status) time.sleep(REQUESTS_TIMEOUT) tracking_info = requests.get(tracking_url, auth=basic_auth) last_status = json.loads(tracking_info.content)['status']['conditions'][-1] status_conditions = json.loads(tracking_info.content)['status']['conditions'] start_time = datetime.datetime.strptime(status_conditions[0]['last_transition_time'][:-5], '%Y-%m-%dT%H:%M:%S.%f') for idx, status in enumerate(status_conditions): status_time = datetime.datetime.strptime(status['last_transition_time'][:-5], '%Y-%m-%dT%H:%M:%S.%f') print(f"[{str(status_time - start_time).split('.', maxsplit=1)[0]}] Pipeline job status - {status['type']}") display(last_status) ``` [0:00:00] Pipeline job status - Started [0:02:34] Pipeline job status - OutputsAreValidated [0:02:36] Pipeline job status - Completed {'type': 'Completed', 'condition_status': 'True', 'last_transition_time': '2024-12-01T18:55:13.943784+0000', 'message': '', 'reason': '', 'stage': None} ##### Результаты ```python for filename in synthdata_request["output_vars"]: response = requests.get(f"{service_url}/files/synthdata-box/{filename['data']}", auth=basic_auth) if response.status_code in [200, 201]: display(response.json()) ``` {'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/report/', 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/report/report.json', 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/report/report.json?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=58608407ac224691cacd571fe7f5edb3b80ccb1fbb25632b2d3cad6be541d049'}]} {'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/output/', 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/output/prediction.csv', 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/output/prediction.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=5346314f79abfb31563be22b814a1286035cbb0d19f782f626166651b5691036'}]} {'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/model/', 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/model/model.joblib', 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/model/model.joblib?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=41d8ded4948d817538aa65a12b6cfc028d7df5e0af963e5ce7c84d509c458b06'}]} ##### Процент совпадений с входными данными: ```python report_fname = synthdata_request["output_vars"][0]['data'] response = requests.get(f"{service_url}/files/synthdata-box/{report_fname}", auth=basic_auth) if response.status_code in [200, 201]: response = requests.get(response.json()['files'][0]['presigned_get_url'], auth=basic_auth) display(response.json()) ``` {'Overall Quality Score': 0.9413369301611234}