mm25-deploy/mm25-module-card.ipynb
2024-12-14 17:02:40 +03:00

351 lines
17 KiB
Text
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Проверка работы с модулем 25 через API"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Обучение модели и генерация данных"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Файловый API"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"from requests.auth import HTTPBasicAuth\n",
"import time\n",
"import datetime"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"module_name = \"pu-mgalynchik-pa-mm25-synthdata\"\n",
"username = \"developer\"\n",
"password = \"WpNJxt7vCdP4Q9TYFX5Sh6ukGZHyrD\"\n",
"REQUESTS_TIMEOUT = 20\n",
"\n",
"basic_auth = HTTPBasicAuth(username, password)\n",
"\n",
"headers = {\n",
" \"Content-Type\": \"application/json\"\n",
"}\n",
"\n",
"service_url = f\"https://platform-dev.stratpro.hse.ru/{module_name}\""
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"# Путь к файлу с входными данными, как локально, так и в файловом хранилище фреймворка\n",
"filename = \"data/telecom_data.csv\""
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Response [503]>"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"ename": "JSONDecodeError",
"evalue": "Expecting value: line 1 column 1 (char 0)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/requests/models.py:971\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcomplexjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 972\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m JSONDecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 973\u001b[0m \u001b[38;5;66;03m# Catch JSON-related errors and raise as requests.JSONDecodeError\u001b[39;00m\n\u001b[1;32m 974\u001b[0m \u001b[38;5;66;03m# This aliases json.JSONDecodeError and simplejson.JSONDecodeError\u001b[39;00m\n",
"File \u001b[0;32m/usr/lib/python3.10/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 344\u001b[0m parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 345\u001b[0m parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 347\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[0;32m/usr/lib/python3.10/json/decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;124;03mcontaining a JSON document).\u001b[39;00m\n\u001b[1;32m 335\u001b[0m \n\u001b[1;32m 336\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m--> 337\u001b[0m obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraw_decode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43midx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_w\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m end \u001b[38;5;241m=\u001b[39m _w(s, end)\u001b[38;5;241m.\u001b[39mend()\n",
"File \u001b[0;32m/usr/lib/python3.10/json/decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 355\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpecting value\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, err\u001b[38;5;241m.\u001b[39mvalue) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj, end\n",
"\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[22], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;241m400\u001b[39m, ]:\n\u001b[1;32m 3\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mservice_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/files/synthdata-box/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfilename\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, auth\u001b[38;5;241m=\u001b[39mbasic_auth)\n\u001b[0;32m----> 4\u001b[0m display(\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n",
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/requests/models.py:975\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m complexjson\u001b[38;5;241m.\u001b[39mloads(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtext, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 972\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m JSONDecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 973\u001b[0m \u001b[38;5;66;03m# Catch JSON-related errors and raise as requests.JSONDecodeError\u001b[39;00m\n\u001b[1;32m 974\u001b[0m \u001b[38;5;66;03m# This aliases json.JSONDecodeError and simplejson.JSONDecodeError\u001b[39;00m\n\u001b[0;32m--> 975\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RequestsJSONDecodeError(e\u001b[38;5;241m.\u001b[39mmsg, e\u001b[38;5;241m.\u001b[39mdoc, e\u001b[38;5;241m.\u001b[39mpos)\n",
"\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)"
]
}
],
"source": [
"response = requests.put(f\"{service_url}/files/synthdata-box/{filename}\", auth=basic_auth)\n",
"if response.status_code in [400, ]:\n",
" response = requests.get(f\"{service_url}/files/synthdata-box/{filename}\", auth=basic_auth)\n",
"display(response.json())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"200\n"
]
}
],
"source": [
"if response.status_code in [200, 201]:\n",
" result_urls = json.loads(response.content) \n",
" \n",
" with open(filename, \"rb\") as f:\n",
" response = requests.put(result_urls[\"presigned_put_url\"], data=f.read())\n",
" print(response.status_code)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Запуск пайплайна генерации данных"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"pipeline_name = \"train-generate\"\n",
"full_url = f\"https://platform-dev-cs-hse.objectoriented.ru/{module_name}/pipelines/{pipeline_name}/trials\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"synthdata_request = {\n",
" \"inputs\": [\n",
" {\n",
" \"name\": \"input_data\",\n",
" \"datatype\": \"FILE\",\n",
" \"content_type\": \"text/csv\",\n",
" \"shape\": [7043, 20],\n",
" \"data\": \"data/\"\n",
" }\n",
" ],\n",
" \"output_vars\": [\n",
" {\n",
" \"name\": \"report_file\",\n",
" \"data\": \"results/syntesize_data/report/\"\n",
" },\n",
" {\n",
" \"name\": \"output_data\",\n",
" \"data\": \"results/syntesize_data/output/\"\n",
" },\n",
" {\n",
" \"name\": \"model\",\n",
" \"data\": \"results/syntesize_data/model/\"\n",
" },\n",
" ]\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0:00:00] Pipeline job status - Started\n",
"[0:02:34] Pipeline job status - OutputsAreValidated\n",
"[0:02:36] Pipeline job status - Completed\n"
]
},
{
"data": {
"text/plain": [
"{'type': 'Completed',\n",
" 'condition_status': 'True',\n",
" 'last_transition_time': '2024-12-01T18:55:13.943784+0000',\n",
" 'message': '',\n",
" 'reason': '',\n",
" 'stage': None}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"response = requests.post(full_url, headers=headers, auth=basic_auth, json=synthdata_request)\n",
"tracking_url = json.loads(response.content)['_links']['self']['href']\n",
"\n",
"if response.status_code in [200, ]:\n",
" tracking_info = requests.get(tracking_url, auth=basic_auth)\n",
" last_status = json.loads(tracking_info.content)['status']['conditions'][-1]\n",
" status_history = [last_status]\n",
" while last_status['type'] != 'Completed':\n",
" if last_status not in status_history:\n",
" status_history.append(last_status)\n",
" \n",
" time.sleep(REQUESTS_TIMEOUT)\n",
" \n",
" tracking_info = requests.get(tracking_url, auth=basic_auth)\n",
" last_status = json.loads(tracking_info.content)['status']['conditions'][-1]\n",
"\n",
" status_conditions = json.loads(tracking_info.content)['status']['conditions']\n",
" start_time = datetime.datetime.strptime(status_conditions[0]['last_transition_time'][:-5], '%Y-%m-%dT%H:%M:%S.%f')\n",
" for idx, status in enumerate(status_conditions):\n",
" status_time = datetime.datetime.strptime(status['last_transition_time'][:-5], '%Y-%m-%dT%H:%M:%S.%f')\n",
" print(f\"[{str(status_time - start_time).split('.', maxsplit=1)[0]}] Pipeline job status - {status['type']}\")\n",
" \n",
" display(last_status)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Результаты"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/report/',\n",
" 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/report/report.json',\n",
" 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/report/report.json?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=58608407ac224691cacd571fe7f5edb3b80ccb1fbb25632b2d3cad6be541d049'}]}"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/output/',\n",
" 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/output/prediction.csv',\n",
" 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/output/prediction.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=5346314f79abfb31563be22b814a1286035cbb0d19f782f626166651b5691036'}]}"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/model/',\n",
" 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/model/model.joblib',\n",
" 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/model/model.joblib?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=41d8ded4948d817538aa65a12b6cfc028d7df5e0af963e5ce7c84d509c458b06'}]}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for filename in synthdata_request[\"output_vars\"]:\n",
" response = requests.get(f\"{service_url}/files/synthdata-box/{filename['data']}\", auth=basic_auth)\n",
" if response.status_code in [200, 201]:\n",
" display(response.json())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Процент совпадений с входными данными:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'Overall Quality Score': 0.9413369301611234}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"report_fname = synthdata_request[\"output_vars\"][0]['data']\n",
"response = requests.get(f\"{service_url}/files/synthdata-box/{report_fname}\", auth=basic_auth)\n",
"if response.status_code in [200, 201]:\n",
" response = requests.get(response.json()['files'][0]['presigned_get_url'], auth=basic_auth)\n",
" display(response.json())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}