{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "GePtyVmwFtQg"
},
"source": [
"# Datasets and Relations\n",
"[](https://github.com/Qrlew/docs/blob/main/tutorials/dataset_from_queries.ipynb)\n",
"[](https://colab.research.google.com/github/Qrlew/pyqrlew/blob/main/examples/dataset_from_queries.ipynb)\n",
"\n",
"In this short tutorial, we setup a local database, insert a demo dataset from [`qrlew-dataset`](https://pypi.org/project/qrlew-datasets/) and show how to create `Relation`s from SQL queries and derived `Dataset`s (set of `Relation`s)."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "YLLCzFIyPbZi"
},
"source": [
"## Setup a local database"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "BdGIrJE56tVk"
},
"outputs": [],
"source": [
"%%capture\n",
"# Load the database\n",
"# Inspired by https://colab.research.google.com/github/tensorflow/io/blob/master/docs/tutorials/postgresql.ipynb#scrollTo=YUj0878jPyz7\n",
"!sudo apt-get -y -qq update\n",
"!sudo apt-get -y -qq install postgresql-14 graphviz\n",
"# Start postgresql server\n",
"!sudo sed -i \"s/port = 5432/port = 5433/g\" /etc/postgresql/14/main/postgresql.conf\n",
"!sudo service postgresql start\n",
"# Set password\n",
"!sudo -u postgres psql -U postgres -c \"ALTER USER postgres PASSWORD 'pyqrlew-db'\"\n",
"!pip install -U pyqrlew matplotlib graphviz"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "EMrWiv7PPbZj"
},
"source": [
"## Insert data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "3qzlDS-s5Gid"
},
"outputs": [],
"source": [
"import pyqrlew as qrl\n",
"from pyqrlew.io import PostgreSQL\n",
"\n",
"# Setup a default database\n",
"DB = PostgreSQL()\n",
"# Insert a demo dataset\n",
"DB.load_extract()\n",
"dataset = qrl.Dataset.from_database(name='extract', engine=DB.engine(), schema_name='extract')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 559
},
"id": "wFd1_0Qa5Gih",
"outputId": "8d2fe718-2a7d-4bcb-ff3d-c8a935959b34"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['extract', 'extract', 'beacon']\n",
"SELECT * FROM extract.beacon\n"
]
},
{
"output_type": "display_data",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"['extract', 'extract', 'census']\n",
"SELECT * FROM extract.census\n"
]
},
{
"output_type": "display_data",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {}
}
],
"source": [
"from graphviz import Source\n",
"from IPython.display import display\n",
"\n",
"for path, relation in dataset.relations():\n",
" print(path)\n",
" print(relation.to_query())\n",
" display(Source(relation.dot()))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "JXsDDcaTPbZk"
},
"source": [
"## Build `Relation`s and `Dataset`s"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 664
},
"id": "633lkfje5Gii",
"outputId": "dd83ec87-9774-4a6a-b67c-eedc390898ed"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 4
}
],
"source": [
"derived_relation = dataset.relation('SELECT age, count(age) FROM extract.census GROUP BY age')\n",
"Source(derived_relation.dot())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "QjHhJF5q5Gii"
},
"outputs": [],
"source": [
"queries = [\n",
" ((\"schema_name\", \"sch\", \"tab1\"), 'SELECT age, count(age) FROM extract.census GROUP BY age LIMIT 10'),\n",
" ((\"schema_name\", \"sch\", \"sum_age\"), 'SELECT SUM(age) FROM extract.census'),\n",
" ((\"schema_name\", \"new_sch\", \"bacon\"), 'SELECT * FROM extract.beacon')\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 559
},
"id": "APTQNOJ05Gii",
"outputId": "67a1accb-3419-48f7-9e9f-9d404d47d924"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Dataset: {}\n",
"Schema: {\"name\": \"schema_name\", \"type\": {\"name\": \"Struct\", \"struct\": {\"fields\": [{\"name\": \"sarus_data\", \"type\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"new_sch\", \"type\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"bacon\", \"type\": {\"name\": \"Struct\", \"struct\": {\"fields\": [{\"name\": \"検知日時\", \"type\": {\"name\": \"Datetime\", \"datetime\": {\"format\": \"%Y-%m-%d %H:%M:%S.%9f\", \"min\": \"0001-01-01 00:00:00.000000000\", \"max\": \"9999-12-31 00:00:00.000000000\"}}}, {\"name\": \"UserId\", \"type\": {\"name\": \"Text\", \"text\": {}}}, {\"name\": \"所属部署\", \"type\": {\"name\": \"Text\", \"text\": {}}}, {\"name\": \"フロア名\", \"type\": {\"name\": \"Text\", \"text\": {}}}, {\"name\": \"Beacon名\", \"type\": {\"name\": \"Text\", \"text\": {}}}, {\"name\": \"RSSI\", \"type\": {\"name\": \"Integer\", \"integer\": {\"min\": \"-9223372036854775808\", \"max\": \"9223372036854775807\"}}}, {\"name\": \"マップのX座標\", \"type\": {\"name\": \"Integer\", \"integer\": {\"min\": \"-9223372036854775808\", \"max\": \"9223372036854775807\"}}}, {\"name\": \"マップのY座標\", \"type\": {\"name\": \"Integer\", \"integer\": {\"min\": \"-9223372036854775808\", \"max\": \"9223372036854775807\"}}}]}}}]}}}, {\"name\": \"sch\", \"type\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"tab1\", \"type\": {\"name\": \"Struct\", \"struct\": {\"fields\": [{\"name\": \"age\", \"type\": {\"name\": \"Integer\", \"integer\": {\"min\": \"-9223372036854775808\", \"max\": \"9223372036854775807\"}, \"properties\": {\"_CONSTRAINT_\": \"_UNIQUE_\"}}}, {\"name\": \"field_fxi8\", \"type\": {\"name\": \"Integer\", \"integer\": {\"max\": \"199\"}}}]}}}, {\"name\": \"sum_age\", \"type\": {\"name\": \"Struct\", \"struct\": {\"fields\": [{\"name\": \"field_0_go\", \"type\": {\"name\": \"Integer\", \"integer\": {\"min\": \"-9223372036854775808\", \"max\": \"9223372036854775807\"}}}]}}}]}}}]}}}, {\"name\": \"sarus_protected_entity\", \"type\": {\"name\": \"Optional\", \"optional\": {\"type\": {\"name\": \"Id\", \"id\": {}}}}}, {\"name\": \"sarus_is_public\", \"type\": {\"name\": \"Boolean\", \"boolean\": {}}}, {\"name\": \"sarus_weights\", \"type\": {\"name\": \"Float\", \"float\": {\"max\": 1.7976931348623157e308}}}]}}}\n",
"Size: {\"statistics\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"sch\", \"statistics\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"sum_age\", \"statistics\": {\"struct\": {\"size\": \"199\"}}}, {\"name\": \"tab1\", \"statistics\": {\"struct\": {\"size\": \"10\"}}}]}}}, {\"name\": \"new_sch\", \"statistics\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"bacon\", \"statistics\": {\"struct\": {\"size\": \"100\"}}}]}}}]}}}\n",
"['schema_name', 'new_sch', 'bacon']\n",
"SELECT * FROM new_sch.bacon\n"
]
},
{
"output_type": "display_data",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"['schema_name', 'sch', 'sum_age']\n",
"SELECT * FROM sch.sum_age\n"
]
},
{
"output_type": "display_data",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"['schema_name', 'sch', 'tab1']\n",
"SELECT * FROM sch.tab1\n"
]
},
{
"output_type": "display_data",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {}
}
],
"source": [
"derived_dataset = dataset.from_queries(queries)\n",
"print(derived_dataset)\n",
"for path, relation in derived_dataset.relations():\n",
" print(path)\n",
" print(relation.to_query())\n",
" display(Source(relation.dot()))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 143
},
"id": "mcLW9UE75Gii",
"outputId": "5b789be7-d311-4276-934f-de7bea60dfc7"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'{\"name\": \"schema_name\", \"type\": {\"name\": \"Struct\", \"struct\": {\"fields\": [{\"name\": \"sarus_data\", \"type\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"new_sch\", \"type\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"bacon\", \"type\": {\"name\": \"Struct\", \"struct\": {\"fields\": [{\"name\": \"検知日時\", \"type\": {\"name\": \"Datetime\", \"datetime\": {\"format\": \"%Y-%m-%d %H:%M:%S.%9f\", \"min\": \"0001-01-01 00:00:00.000000000\", \"max\": \"9999-12-31 00:00:00.000000000\"}}}, {\"name\": \"UserId\", \"type\": {\"name\": \"Text\", \"text\": {}}}, {\"name\": \"所属部署\", \"type\": {\"name\": \"Text\", \"text\": {}}}, {\"name\": \"フロア名\", \"type\": {\"name\": \"Text\", \"text\": {}}}, {\"name\": \"Beacon名\", \"type\": {\"name\": \"Text\", \"text\": {}}}, {\"name\": \"RSSI\", \"type\": {\"name\": \"Integer\", \"integer\": {\"min\": \"-9223372036854775808\", \"max\": \"9223372036854775807\"}}}, {\"name\": \"マップのX座標\", \"type\": {\"name\": \"Integer\", \"integer\": {\"min\": \"-9223372036854775808\", \"max\": \"9223372036854775807\"}}}, {\"name\": \"マップのY座標\", \"type\": {\"name\": \"Integer\", \"integer\": {\"min\": \"-9223372036854775808\", \"max\": \"9223372036854775807\"}}}]}}}]}}}, {\"name\": \"sch\", \"type\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"tab1\", \"type\": {\"name\": \"Struct\", \"struct\": {\"fields\": [{\"name\": \"age\", \"type\": {\"name\": \"Integer\", \"integer\": {\"min\": \"-9223372036854775808\", \"max\": \"9223372036854775807\"}, \"properties\": {\"_CONSTRAINT_\": \"_UNIQUE_\"}}}, {\"name\": \"field_fxi8\", \"type\": {\"name\": \"Integer\", \"integer\": {\"max\": \"199\"}}}]}}}, {\"name\": \"sum_age\", \"type\": {\"name\": \"Struct\", \"struct\": {\"fields\": [{\"name\": \"field_0_go\", \"type\": {\"name\": \"Integer\", \"integer\": {\"min\": \"-9223372036854775808\", \"max\": \"9223372036854775807\"}}}]}}}]}}}]}}}, {\"name\": \"sarus_protected_entity\", \"type\": {\"name\": \"Optional\", \"optional\": {\"type\": {\"name\": \"Id\", \"id\": {}}}}}, {\"name\": \"sarus_is_public\", \"type\": {\"name\": \"Boolean\", \"boolean\": {}}}, {\"name\": \"sarus_weights\", \"type\": {\"name\": \"Float\", \"float\": {\"max\": 1.7976931348623157e308}}}]}}}'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 7
}
],
"source": [
"derived_dataset.schema"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 71
},
"id": "tMwewanw5Gij",
"outputId": "8b7751a8-1d8d-45c5-b199-2709c74ac275"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'{\"statistics\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"sch\", \"statistics\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"sum_age\", \"statistics\": {\"struct\": {\"size\": \"199\"}}}, {\"name\": \"tab1\", \"statistics\": {\"struct\": {\"size\": \"10\"}}}]}}}, {\"name\": \"new_sch\", \"statistics\": {\"name\": \"Union\", \"union\": {\"fields\": [{\"name\": \"bacon\", \"statistics\": {\"struct\": {\"size\": \"100\"}}}]}}}]}}}'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 8
}
],
"source": [
"derived_dataset.size"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"id": "6nyELtnmPbZl"
},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 0
}