rename papp

This commit is contained in:
maksgalinchik 2024-12-14 17:02:40 +03:00
parent f04dc7e19d
commit 174fc67c3f
7 changed files with 717 additions and 6 deletions

View file

@ -2,7 +2,7 @@ apiVersion: "unified-platform.cs.hse.ru/v1"
kind: APIComponent
metadata:
name: api-pipelines
namespace: pu-mgalynchik-pa-mm25-synthdata
namespace: pu-mgalynchik-pa-mm25-synth
spec:
published: true
pipelines:

View file

@ -2,7 +2,7 @@ apiVersion: "unified-platform.cs.hse.ru/v1"
kind: DataBox
metadata:
name: synthdata-box
namespace: pu-mgalynchik-pa-mm25-synthdata
namespace: pu-mgalynchik-pa-mm25-synth
spec:
s3DefaultStorage: {}
---
@ -11,6 +11,6 @@ apiVersion: "unified-platform.cs.hse.ru/v1"
kind: DataBox
metadata:
name: users
namespace: pu-mgalynchik-pa-mm25-synthdata
namespace: pu-mgalynchik-pa-mm25-synth
spec:
s3DefaultStorage: {}

View file

@ -2,7 +2,7 @@ apiVersion: "unified-platform.cs.hse.ru/v1"
kind: APIComponent
metadata:
name: files-api
namespace: pu-mgalynchik-pa-mm25-synthdata
namespace: pu-mgalynchik-pa-mm25-synth
spec:
published: true
files:

View file

@ -3,7 +3,7 @@ apiVersion: "unified-platform.cs.hse.ru/v1"
kind: APIComponent
metadata:
name: train-generate-api
namespace: pu-mgalynchik-pa-mm25-synthdata
namespace: pu-mgalynchik-pa-mm25-synth
spec:
published: true
experimentPipeline:

View file

@ -2,7 +2,7 @@ apiVersion: "unified-platform.cs.hse.ru/v1"
kind: ExperimentPipeline
metadata:
name: train-generate
namespace: pu-mgalynchik-pa-mm25-synthdata
namespace: pu-mgalynchik-pa-mm25-synth
spec:
vars:
- name: model

360
logs.txt Normal file

File diff suppressed because one or more lines are too long

351
mm25-module-card.ipynb Normal file
View file

@ -0,0 +1,351 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Проверка работы с модулем 25 через API"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Обучение модели и генерация данных"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Файловый API"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"from requests.auth import HTTPBasicAuth\n",
"import time\n",
"import datetime"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"module_name = \"pu-mgalynchik-pa-mm25-synthdata\"\n",
"username = \"developer\"\n",
"password = \"WpNJxt7vCdP4Q9TYFX5Sh6ukGZHyrD\"\n",
"REQUESTS_TIMEOUT = 20\n",
"\n",
"basic_auth = HTTPBasicAuth(username, password)\n",
"\n",
"headers = {\n",
" \"Content-Type\": \"application/json\"\n",
"}\n",
"\n",
"service_url = f\"https://platform-dev.stratpro.hse.ru/{module_name}\""
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"# Путь к файлу с входными данными, как локально, так и в файловом хранилище фреймворка\n",
"filename = \"data/telecom_data.csv\""
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Response [503]>"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"ename": "JSONDecodeError",
"evalue": "Expecting value: line 1 column 1 (char 0)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/requests/models.py:971\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcomplexjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 972\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m JSONDecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 973\u001b[0m \u001b[38;5;66;03m# Catch JSON-related errors and raise as requests.JSONDecodeError\u001b[39;00m\n\u001b[1;32m 974\u001b[0m \u001b[38;5;66;03m# This aliases json.JSONDecodeError and simplejson.JSONDecodeError\u001b[39;00m\n",
"File \u001b[0;32m/usr/lib/python3.10/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 344\u001b[0m parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 345\u001b[0m parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 347\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[0;32m/usr/lib/python3.10/json/decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;124;03mcontaining a JSON document).\u001b[39;00m\n\u001b[1;32m 335\u001b[0m \n\u001b[1;32m 336\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m--> 337\u001b[0m obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraw_decode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43midx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_w\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m end \u001b[38;5;241m=\u001b[39m _w(s, end)\u001b[38;5;241m.\u001b[39mend()\n",
"File \u001b[0;32m/usr/lib/python3.10/json/decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 355\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpecting value\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, err\u001b[38;5;241m.\u001b[39mvalue) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj, end\n",
"\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[22], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;241m400\u001b[39m, ]:\n\u001b[1;32m 3\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mservice_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/files/synthdata-box/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfilename\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, auth\u001b[38;5;241m=\u001b[39mbasic_auth)\n\u001b[0;32m----> 4\u001b[0m display(\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n",
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/requests/models.py:975\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m complexjson\u001b[38;5;241m.\u001b[39mloads(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtext, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 972\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m JSONDecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 973\u001b[0m \u001b[38;5;66;03m# Catch JSON-related errors and raise as requests.JSONDecodeError\u001b[39;00m\n\u001b[1;32m 974\u001b[0m \u001b[38;5;66;03m# This aliases json.JSONDecodeError and simplejson.JSONDecodeError\u001b[39;00m\n\u001b[0;32m--> 975\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RequestsJSONDecodeError(e\u001b[38;5;241m.\u001b[39mmsg, e\u001b[38;5;241m.\u001b[39mdoc, e\u001b[38;5;241m.\u001b[39mpos)\n",
"\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)"
]
}
],
"source": [
"response = requests.put(f\"{service_url}/files/synthdata-box/{filename}\", auth=basic_auth)\n",
"if response.status_code in [400, ]:\n",
" response = requests.get(f\"{service_url}/files/synthdata-box/{filename}\", auth=basic_auth)\n",
"display(response.json())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"200\n"
]
}
],
"source": [
"if response.status_code in [200, 201]:\n",
" result_urls = json.loads(response.content) \n",
" \n",
" with open(filename, \"rb\") as f:\n",
" response = requests.put(result_urls[\"presigned_put_url\"], data=f.read())\n",
" print(response.status_code)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Запуск пайплайна генерации данных"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"pipeline_name = \"train-generate\"\n",
"full_url = f\"https://platform-dev-cs-hse.objectoriented.ru/{module_name}/pipelines/{pipeline_name}/trials\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"synthdata_request = {\n",
" \"inputs\": [\n",
" {\n",
" \"name\": \"input_data\",\n",
" \"datatype\": \"FILE\",\n",
" \"content_type\": \"text/csv\",\n",
" \"shape\": [7043, 20],\n",
" \"data\": \"data/\"\n",
" }\n",
" ],\n",
" \"output_vars\": [\n",
" {\n",
" \"name\": \"report_file\",\n",
" \"data\": \"results/syntesize_data/report/\"\n",
" },\n",
" {\n",
" \"name\": \"output_data\",\n",
" \"data\": \"results/syntesize_data/output/\"\n",
" },\n",
" {\n",
" \"name\": \"model\",\n",
" \"data\": \"results/syntesize_data/model/\"\n",
" },\n",
" ]\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0:00:00] Pipeline job status - Started\n",
"[0:02:34] Pipeline job status - OutputsAreValidated\n",
"[0:02:36] Pipeline job status - Completed\n"
]
},
{
"data": {
"text/plain": [
"{'type': 'Completed',\n",
" 'condition_status': 'True',\n",
" 'last_transition_time': '2024-12-01T18:55:13.943784+0000',\n",
" 'message': '',\n",
" 'reason': '',\n",
" 'stage': None}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"response = requests.post(full_url, headers=headers, auth=basic_auth, json=synthdata_request)\n",
"tracking_url = json.loads(response.content)['_links']['self']['href']\n",
"\n",
"if response.status_code in [200, ]:\n",
" tracking_info = requests.get(tracking_url, auth=basic_auth)\n",
" last_status = json.loads(tracking_info.content)['status']['conditions'][-1]\n",
" status_history = [last_status]\n",
" while last_status['type'] != 'Completed':\n",
" if last_status not in status_history:\n",
" status_history.append(last_status)\n",
" \n",
" time.sleep(REQUESTS_TIMEOUT)\n",
" \n",
" tracking_info = requests.get(tracking_url, auth=basic_auth)\n",
" last_status = json.loads(tracking_info.content)['status']['conditions'][-1]\n",
"\n",
" status_conditions = json.loads(tracking_info.content)['status']['conditions']\n",
" start_time = datetime.datetime.strptime(status_conditions[0]['last_transition_time'][:-5], '%Y-%m-%dT%H:%M:%S.%f')\n",
" for idx, status in enumerate(status_conditions):\n",
" status_time = datetime.datetime.strptime(status['last_transition_time'][:-5], '%Y-%m-%dT%H:%M:%S.%f')\n",
" print(f\"[{str(status_time - start_time).split('.', maxsplit=1)[0]}] Pipeline job status - {status['type']}\")\n",
" \n",
" display(last_status)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Результаты"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/report/',\n",
" 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/report/report.json',\n",
" 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/report/report.json?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=58608407ac224691cacd571fe7f5edb3b80ccb1fbb25632b2d3cad6be541d049'}]}"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/output/',\n",
" 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/output/prediction.csv',\n",
" 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/output/prediction.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=5346314f79abfb31563be22b814a1286035cbb0d19f782f626166651b5691036'}]}"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/model/',\n",
" 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/model/model.joblib',\n",
" 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/model/model.joblib?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=41d8ded4948d817538aa65a12b6cfc028d7df5e0af963e5ce7c84d509c458b06'}]}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for filename in synthdata_request[\"output_vars\"]:\n",
" response = requests.get(f\"{service_url}/files/synthdata-box/{filename['data']}\", auth=basic_auth)\n",
" if response.status_code in [200, 201]:\n",
" display(response.json())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Процент совпадений с входными данными:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'Overall Quality Score': 0.9413369301611234}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"report_fname = synthdata_request[\"output_vars\"][0]['data']\n",
"response = requests.get(f\"{service_url}/files/synthdata-box/{report_fname}\", auth=basic_auth)\n",
"if response.status_code in [200, 201]:\n",
" response = requests.get(response.json()['files'][0]['presigned_get_url'], auth=basic_auth)\n",
" display(response.json())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}