-- Create the table
---------------------------------------
"DimDate"
CREATE TABLE public.
(
dateid integer NOT NULL,
date date,
Year integer,
Quarter integer,50),
QuarterName character(
Month integer,50),
Monthname character(
Day integer,
Weekday integer,50),
WeekdayName character("DimDate_pkey" PRIMARY KEY (dateid)
CONSTRAINT ;
)
-------------------------------------------------------
"DimCategory"
CREATE TABLE public.
(
categoryid integer NOT NULL,50),
category character("DimCategory_pkey" PRIMARY KEY (categoryid)
CONSTRAINT ;
)
-------------------------------------------------------
"DimCountry"
CREATE TABLE public.
(
countryid integer NOT NULL,50),
country character("DimCountry_pkey" PRIMARY KEY (countryid)
CONSTRAINT ;
)
-----------------------------------------------------------
"FactSales"
CREATE TABLE public.
(
orderid integer NOT NULL,
dateid integer,
countryid integer,
categoryid integer,
amount integer,"FactSales_pkey" PRIMARY KEY (orderid)
CONSTRAINT ; )
Generate Reports using DW
In the last section we designed and created a PostgreSQL DW, in this section we will create the reports we specifically designed the DW to be able to answer with ease.
Objectives
I’ll be working this project at times on the cloud and at times on the local machine. Here are the overall tasks we will perform
- Load data into Data Warehouse
- Write aggregation queries
- Create MQTs
Setup
We will import an SQL file that contains the schema for all the tables, here is the content of the file
Create db
- Create PostgreSQL Instance
- Save passw:
- Open pgAdmin UI
- Right click Databases
- Create a new db: Test1
Import Schema
- Now that we have a new db staging we’ll import the schema we just created above
- Choose the db from left tree pane
- Scroll down to Schemas
- Right click > Query Tool
- Click on Folder icon > right side the …
- Make sure the directory is set to /var/lib/pgadmin > Upload
- Drag the sql file over
- X out the prompt
- Select the file
- Click on Select
- Now the script is in the SQL query pane
- Run
- Tables are created
Import the sql file above to create the following tables
Import Tables Data
- Download all the csv locally
- Right click on DimDate table
- Import/Export Data
- Next window > Import
- Folder icon
- Click on … dots
- Make sure the directory is set to /var/lib/pgadmin/
- Drag all the data files at once into the window
- X out of the popup window
- You end up at the screen above
- Go from table to table and choose the appropriate csv file to populate all of them
- Make sure to check the HEADER option to yes so the headers are imported accordingly
Populate DimDate
- The data for the tables is located in file: DimDate.csv so import it locally
- Save the data file in /var/lib/pgadmin/
- Load the data into DimDate table
- View the first 5 rows
* FROM public."DimDate"
SELECT 5 LIMIT
Populate DimCategory
- The data for the tables is located in file: DimCategory.csv so import it locally
- Save the data file in /var/lib/pgadmin/
- Load the data into DimCategory table
- View the first 5 rows
* FROM public."DimCategory"
SELECT 5 LIMIT
Populate DimCountry
- The data for the tables is located in file: DimCountry.csv so import it locally
- Save the data file in /var/lib/pgadmin/
- Load the data into DimCountry table
- View the first 5 rows
* FROM public."DimCountry"
SELECT 5 LIMIT
Populate FactSales
- The data for the tables is located in file: FactSales.csv so import it locally
- Save the data file in /var/lib/pgadmin/
- Load the data into FactSales table
- View the first 5 rows ( I Ordered them by ascending order as well, not that it would make a diff)
* FROM public."FactSales"
SELECT 5 ORDER BY orderid ASC LIMIT
Query the Data
Create a Grouping Sets
- Create a grouping sets query using the columns country, category, totalsales.
SELECT
b.country,
f.categoryid,
SUM(f.amount) AS totalsales
FROM"FactSales" f
public.
INNER JOIN"DimCountry" b ON f.countryid = b.countryid
public.
GROUP BY GROUPING SETS (
(b.country, f.categoryid),
b.country,
f.categoryid,
()
)
ORDER BY
b.country,; f.categoryid
Since the request was to group by category not by categoryid we need to join another table to be able to extract the category itself from the id
SELECT
b.country,
c.category,
SUM(f.amount) AS totalsales
FROM"FactSales" f
public.
INNER JOIN"DimCountry" b ON f.countryid = b.countryid
public.
INNER JOIN"DimCategory" c ON f.categoryid = c.categoryid
public.
GROUP BY GROUPING SETS (
(b.country, f.categoryid, c.category),
b.country,
f.categoryid,
()
)
ORDER BY
b.country,; f.categoryid
Another way by using category to group instead of categoryid
SELECT
b.country,
c.category,
SUM(f.amount) AS totalsales
FROM"FactSales" f
public.
INNER JOIN"DimCountry" b ON f.countryid = b.countryid
public.
INNER JOIN"DimCategory" c ON f.categoryid = c.categoryid
public.
GROUP BY GROUPING SETS (
(b.country, c.category),
b.country,
c.category,
()
)
ORDER BY
b.country,; c.category
Create a Rollup Query
- Create a rollup query using the columns year, country, and totalsales.
Without Rollup
SELECT
d.year,
b.country,
SUM(f.amount) AS totalsales
FROM"FactSales" f
public.
INNER JOIN"DimCountry" b ON f.countryid = b.countryid
public.
INNER JOIN"DimDate" d ON f.dateid = d.dateid
public.
GROUP BY GROUPING SETS (
(d.year, b.country),
d.year,
b.country,
()
)
ORDER BY
d.year,; b.country
With Rollup
SELECT
d.year,
b.country,
SUM(f.amount) AS totalsales
FROM"FactSales" f
public.
INNER JOIN"DimCountry" b ON f.countryid = b.countryid
public.
INNER JOIN"DimDate" d ON f.dateid = d.dateid
public.
GROUP BY ROLLUP
(d.year, b.country)
ORDER BY
d.year,; b.country
Create a Cube Query
- Create a cube query using the columns year, country, and average sales.
SELECT
d.year,
b.country,
AVG(f.amount) AS AverageSales
FROM"FactSales" f
public.
INNER JOIN"DimCountry" b ON f.countryid = b.countryid
public.
INNER JOIN"DimDate" d ON f.dateid = d.dateid
public.
GROUP BY
CUBE (d.year, b.country)
ORDER BY
d.year,; b.country
Create an Materialized Query Table
- Create a MQT (materialized query View) named total_sales_per_country that has the columns country and total_sales.
- Open a Query pane
CREATE MATERIALIZED VIEW total_sales_per_country AS
SELECT
b.country,
SUM(f.amount) AS total_sales
FROM"FactSales" f
public.
INNER JOIN"DimCountry" b ON f.countryid = b.countryid
public.
GROUP BY
b.country
ORDER BY
b.country; WITH DATA
; REFRESH MATERIALIZED VIEW total_sales_per_country
* FROM total_sales_per_country; SELECT