mm25-deploy/mm25-module-card.md
2024-12-14 14:48:59 +03:00

183 lines
7.1 KiB
Markdown
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

### Проверка работы с модулем 25 через API
#### Обучение модели и генерация данных
##### Файловый API
```python
import requests
import json
from requests.auth import HTTPBasicAuth
import time
import datetime
```
```python
module_name = "pu-mgalynchik-pa-mm25-synthdata"
username = ""
password = ""
REQUESTS_TIMEOUT = 20
basic_auth = HTTPBasicAuth(username, password)
headers = {
"Content-Type": "application/json"
}
service_url = f"https://platform-dev-cs-hse.objectoriented.ru/{module_name}"
```
```python
# Путь к файлу с входными данными, как локально, так и в файловом хранилище фреймворка
filename = "data/telecom_data.csv"
```
```python
response = requests.put(f"{service_url}/files/synthdata-box/{filename}", auth=basic_auth)
if response.status_code in [400, ]:
response = requests.get(f"{service_url}/files/synthdata-box/{filename}", auth=basic_auth)
display(response.json())
```
{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/data/telecom_data.csv',
'presigned_put_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/data/telecom_data.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185235Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=e816ff92cb18b85f8f65e6c6536f9757271e12f28afb2c1dfc12616b462ce640'}
```python
if response.status_code in [200, 201]:
result_urls = json.loads(response.content)
with open(filename, "rb") as f:
response = requests.put(result_urls["presigned_put_url"], data=f.read())
print(response.status_code)
```
200
##### Запуск пайплайна генерации данных
```python
pipeline_name = "train-generate"
full_url = f"https://platform-dev-cs-hse.objectoriented.ru/{module_name}/pipelines/{pipeline_name}/trials"
```
```python
synthdata_request = {
"inputs": [
{
"name": "input_data",
"datatype": "FILE",
"content_type": "text/csv",
"shape": [7043, 20],
"data": "data/"
}
],
"output_vars": [
{
"name": "report_file",
"data": "results/syntesize_data/report/"
},
{
"name": "output_data",
"data": "results/syntesize_data/output/"
},
{
"name": "model",
"data": "results/syntesize_data/model/"
},
]
}
```
```python
response = requests.post(full_url, headers=headers, auth=basic_auth, json=synthdata_request)
tracking_url = json.loads(response.content)['_links']['self']['href']
if response.status_code in [200, ]:
tracking_info = requests.get(tracking_url, auth=basic_auth)
last_status = json.loads(tracking_info.content)['status']['conditions'][-1]
status_history = [last_status]
while last_status['type'] != 'Completed':
if last_status not in status_history:
status_history.append(last_status)
time.sleep(REQUESTS_TIMEOUT)
tracking_info = requests.get(tracking_url, auth=basic_auth)
last_status = json.loads(tracking_info.content)['status']['conditions'][-1]
status_conditions = json.loads(tracking_info.content)['status']['conditions']
start_time = datetime.datetime.strptime(status_conditions[0]['last_transition_time'][:-5], '%Y-%m-%dT%H:%M:%S.%f')
for idx, status in enumerate(status_conditions):
status_time = datetime.datetime.strptime(status['last_transition_time'][:-5], '%Y-%m-%dT%H:%M:%S.%f')
print(f"[{str(status_time - start_time).split('.', maxsplit=1)[0]}] Pipeline job status - {status['type']}")
display(last_status)
```
[0:00:00] Pipeline job status - Started
[0:02:34] Pipeline job status - OutputsAreValidated
[0:02:36] Pipeline job status - Completed
{'type': 'Completed',
'condition_status': 'True',
'last_transition_time': '2024-12-01T18:55:13.943784+0000',
'message': '',
'reason': '',
'stage': None}
##### Результаты
```python
for filename in synthdata_request["output_vars"]:
response = requests.get(f"{service_url}/files/synthdata-box/{filename['data']}", auth=basic_auth)
if response.status_code in [200, 201]:
display(response.json())
```
{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/report/',
'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/report/report.json',
'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/report/report.json?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=58608407ac224691cacd571fe7f5edb3b80ccb1fbb25632b2d3cad6be541d049'}]}
{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/output/',
'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/output/prediction.csv',
'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/output/prediction.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=5346314f79abfb31563be22b814a1286035cbb0d19f782f626166651b5691036'}]}
{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/model/',
'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/model/model.joblib',
'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/model/model.joblib?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=41d8ded4948d817538aa65a12b6cfc028d7df5e0af963e5ce7c84d509c458b06'}]}
##### Процент совпадений с входными данными:
```python
report_fname = synthdata_request["output_vars"][0]['data']
response = requests.get(f"{service_url}/files/synthdata-box/{report_fname}", auth=basic_auth)
if response.status_code in [200, 201]:
response = requests.get(response.json()['files'][0]['presigned_get_url'], auth=basic_auth)
display(response.json())
```
{'Overall Quality Score': 0.9413369301611234}