{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Проверка работы с модулем 25 через API" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Обучение модели и генерация данных" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Файловый API" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import requests\n", "import json\n", "from requests.auth import HTTPBasicAuth\n", "import time\n", "import datetime" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "module_name = \"pu-mgalynchik-pa-mm25-synthdata\"\n", "username = \"developer\"\n", "password = \"WpNJxt7vCdP4Q9TYFX5Sh6ukGZHyrD\"\n", "REQUESTS_TIMEOUT = 20\n", "\n", "basic_auth = HTTPBasicAuth(username, password)\n", "\n", "headers = {\n", " \"Content-Type\": \"application/json\"\n", "}\n", "\n", "service_url = f\"https://platform-dev.stratpro.hse.ru/{module_name}\"" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# Путь к файлу с входными данными, как локально, так и в файловом хранилище фреймворка\n", "filename = \"data/telecom_data.csv\"" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "response" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "ename": "JSONDecodeError", "evalue": "Expecting value: line 1 column 1 (char 0)", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/requests/models.py:971\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcomplexjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 972\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m JSONDecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 973\u001b[0m \u001b[38;5;66;03m# Catch JSON-related errors and raise as requests.JSONDecodeError\u001b[39;00m\n\u001b[1;32m 974\u001b[0m \u001b[38;5;66;03m# This aliases json.JSONDecodeError and simplejson.JSONDecodeError\u001b[39;00m\n", "File \u001b[0;32m/usr/lib/python3.10/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 344\u001b[0m parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 345\u001b[0m parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 347\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "File \u001b[0;32m/usr/lib/python3.10/json/decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;124;03mcontaining a JSON document).\u001b[39;00m\n\u001b[1;32m 335\u001b[0m \n\u001b[1;32m 336\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m--> 337\u001b[0m obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraw_decode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43midx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_w\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m end \u001b[38;5;241m=\u001b[39m _w(s, end)\u001b[38;5;241m.\u001b[39mend()\n", "File \u001b[0;32m/usr/lib/python3.10/json/decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 355\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpecting value\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, err\u001b[38;5;241m.\u001b[39mvalue) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj, end\n", "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[22], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;241m400\u001b[39m, ]:\n\u001b[1;32m 3\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mservice_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/files/synthdata-box/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfilename\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, auth\u001b[38;5;241m=\u001b[39mbasic_auth)\n\u001b[0;32m----> 4\u001b[0m display(\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/requests/models.py:975\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m complexjson\u001b[38;5;241m.\u001b[39mloads(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtext, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 972\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m JSONDecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 973\u001b[0m \u001b[38;5;66;03m# Catch JSON-related errors and raise as requests.JSONDecodeError\u001b[39;00m\n\u001b[1;32m 974\u001b[0m \u001b[38;5;66;03m# This aliases json.JSONDecodeError and simplejson.JSONDecodeError\u001b[39;00m\n\u001b[0;32m--> 975\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RequestsJSONDecodeError(e\u001b[38;5;241m.\u001b[39mmsg, e\u001b[38;5;241m.\u001b[39mdoc, e\u001b[38;5;241m.\u001b[39mpos)\n", "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)" ] } ], "source": [ "response = requests.put(f\"{service_url}/files/synthdata-box/{filename}\", auth=basic_auth)\n", "if response.status_code in [400, ]:\n", " response = requests.get(f\"{service_url}/files/synthdata-box/{filename}\", auth=basic_auth)\n", "display(response.json())" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "200\n" ] } ], "source": [ "if response.status_code in [200, 201]:\n", " result_urls = json.loads(response.content) \n", " \n", " with open(filename, \"rb\") as f:\n", " response = requests.put(result_urls[\"presigned_put_url\"], data=f.read())\n", " print(response.status_code)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Запуск пайплайна генерации данных" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "pipeline_name = \"train-generate\"\n", "full_url = f\"https://platform-dev-cs-hse.objectoriented.ru/{module_name}/pipelines/{pipeline_name}/trials\"" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "synthdata_request = {\n", " \"inputs\": [\n", " {\n", " \"name\": \"input_data\",\n", " \"datatype\": \"FILE\",\n", " \"content_type\": \"text/csv\",\n", " \"shape\": [7043, 20],\n", " \"data\": \"data/\"\n", " }\n", " ],\n", " \"output_vars\": [\n", " {\n", " \"name\": \"report_file\",\n", " \"data\": \"results/syntesize_data/report/\"\n", " },\n", " {\n", " \"name\": \"output_data\",\n", " \"data\": \"results/syntesize_data/output/\"\n", " },\n", " {\n", " \"name\": \"model\",\n", " \"data\": \"results/syntesize_data/model/\"\n", " },\n", " ]\n", "}" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0:00:00] Pipeline job status - Started\n", "[0:02:34] Pipeline job status - OutputsAreValidated\n", "[0:02:36] Pipeline job status - Completed\n" ] }, { "data": { "text/plain": [ "{'type': 'Completed',\n", " 'condition_status': 'True',\n", " 'last_transition_time': '2024-12-01T18:55:13.943784+0000',\n", " 'message': '',\n", " 'reason': '',\n", " 'stage': None}" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "response = requests.post(full_url, headers=headers, auth=basic_auth, json=synthdata_request)\n", "tracking_url = json.loads(response.content)['_links']['self']['href']\n", "\n", "if response.status_code in [200, ]:\n", " tracking_info = requests.get(tracking_url, auth=basic_auth)\n", " last_status = json.loads(tracking_info.content)['status']['conditions'][-1]\n", " status_history = [last_status]\n", " while last_status['type'] != 'Completed':\n", " if last_status not in status_history:\n", " status_history.append(last_status)\n", " \n", " time.sleep(REQUESTS_TIMEOUT)\n", " \n", " tracking_info = requests.get(tracking_url, auth=basic_auth)\n", " last_status = json.loads(tracking_info.content)['status']['conditions'][-1]\n", "\n", " status_conditions = json.loads(tracking_info.content)['status']['conditions']\n", " start_time = datetime.datetime.strptime(status_conditions[0]['last_transition_time'][:-5], '%Y-%m-%dT%H:%M:%S.%f')\n", " for idx, status in enumerate(status_conditions):\n", " status_time = datetime.datetime.strptime(status['last_transition_time'][:-5], '%Y-%m-%dT%H:%M:%S.%f')\n", " print(f\"[{str(status_time - start_time).split('.', maxsplit=1)[0]}] Pipeline job status - {status['type']}\")\n", " \n", " display(last_status)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Результаты" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/report/',\n", " 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/report/report.json',\n", " 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/report/report.json?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=58608407ac224691cacd571fe7f5edb3b80ccb1fbb25632b2d3cad6be541d049'}]}" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/output/',\n", " 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/output/prediction.csv',\n", " 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/output/prediction.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=5346314f79abfb31563be22b814a1286035cbb0d19f782f626166651b5691036'}]}" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/model/',\n", " 'files': [{'name': 'pu-mgalynchik-pa-mm25-synthdata/files/synthdata-box/results/syntesize_data/model/model.joblib',\n", " 'presigned_get_url': 'https://storage.yandexcloud.net/platform-default-user-data/pu-mgalynchik-pa-mm25-synthdata/synthdata-box/users/developer/file_groups/results/syntesize_data/model/model.joblib?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=YCAJEw7KWeJzGZz9pXuFdhLPP%2F20241201%2Fru-central1%2Fs3%2Faws4_request&X-Amz-Date=20241201T185519Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Signature=41d8ded4948d817538aa65a12b6cfc028d7df5e0af963e5ce7c84d509c458b06'}]}" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "for filename in synthdata_request[\"output_vars\"]:\n", " response = requests.get(f\"{service_url}/files/synthdata-box/{filename['data']}\", auth=basic_auth)\n", " if response.status_code in [200, 201]:\n", " display(response.json())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Процент совпадений с входными данными:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Overall Quality Score': 0.9413369301611234}" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "report_fname = synthdata_request[\"output_vars\"][0]['data']\n", "response = requests.get(f\"{service_url}/files/synthdata-box/{report_fname}\", auth=basic_auth)\n", "if response.status_code in [200, 201]:\n", " response = requests.get(response.json()['files'][0]['presigned_get_url'], auth=basic_auth)\n", " display(response.json())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }