Skip to content

Commit

Permalink
#2035: PG data model and functions for Dataset (#2054)
Browse files Browse the repository at this point in the history
* #2035: PG data model and functions for Dataset
* First commit

* * Added check on entity type

* * get fucntion
* dataset fields

* * get comment fixes

* * refactored to use id in connection of entities and versions
* add function
  • Loading branch information
benedeki authored Apr 26, 2022
1 parent 7b1e259 commit fedce11
Show file tree
Hide file tree
Showing 7 changed files with 672 additions and 0 deletions.
19 changes: 19 additions & 0 deletions database/src/main/dataset/_.ddl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

CREATE SCHEMA IF NOT EXISTS dataset;
ALTER SCHEMA dataset OWNER TO enceladus;

GRANT USAGE ON SCHEMA dataset TO menas;
138 changes: 138 additions & 0 deletions database/src/main/dataset/_add.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

CREATE OR REPLACE FUNCTION dataset._add(
IN i_entity_name TEXT,
IN i_entity_version INTEGER,
IN i_entity_description TEXT,
IN i_source_path TEXT,
IN i_publish_path TEXT,
IN i_key_schema BIGINT,
IN i_conformance JSON[],
IN i_user_name TEXT,
OUT status INTEGER,
OUT status_text TEXT,
OUT key_entity_version BIGINT
) RETURNS record AS
$$
-------------------------------------------------------------------------------
--
-- Function: jobs_configuration._add(8)
-- Stores a new version of the dataset.
--
-- Parameters:
-- i_entity_name - name of the dataset
-- i_entity_version - version of the dataset
-- i_entity_description - description of the dataset
-- i_source_path - source path for the dataset
-- i_publish_path - output path for the dataset
-- i_key_schema - reference to the schema of the dataset
-- i_conformance - array of conformance rules
-- i_user_name - the user who submitted the changes
--
-- Returns:
-- status - Status code
-- status_text - Status text
-- key_entity_version - id of the newly created dataset record
--
-- Status codes:
-- 11 - OK
-- 31 - Dataset has been disabled
-- 32 - Dataset is locked
-- 50 - Dataset version wrong
-- 51 - Dataset already exists
--
-------------------------------------------------------------------------------
DECLARE
_entity_type CHAR := 'D';
_key_entity BIGINT;
_new_entity BOOLEAN;
_latest_version INTEGER;
_locked BOOLEAN;
_disabled BOOLEAN;
BEGIN
IF i_entity_version = 1 THEN
-- lock on stats to prevent competing inserts of new entity
PERFORM 1
FROM entity_base.stats S
WHERE S.entity_type = _entity_type
FOR UPDATE;
END IF;

SELECT E.id_entity, E.entity_latest_version, E.locked_at IS NOT NULL, E.disabled_at IS NOT NULL
FROM dataset.entities E
WHERE E.entity_name = i_entity_name
FOR UPDATE
INTO _key_entity, _latest_version, _locked, _disabled;

_new_entity := NOT found;

IF _new_entity THEN
IF i_entity_version != 1 THEN
status := 50;
status_text := 'Dataset version wrong';
RETURN;
END IF;

UPDATE entity_base.stats
SET entity_count = stats.entity_count + 1
WHERE entity_type = _entity_type;

INSERT INTO dataset.entities(entity_name, entity_latest_version, created_by)
VALUES (i_entity_name, i_entity_version, i_user_name)
RETURNING id_entity
INTO _key_entity;
ELSE
IF _disabled THEN
status := 31;
status_text := 'Dataset has been disabled';
RETURN ;
ELSIF _locked THEN
status := 32;
status_text := 'Dataset is locked';
RETURN;
ELSIF _latest_version >= i_entity_version THEN
status := 51;
status_text := 'Dataset already exists';
RETURN;
ELSIF _latest_version + 1 < i_entity_version THEN
status := 50;
status_text := 'Dataset version wrong';
RETURN;
END IF;

END IF;

INSERT INTO dataset.versions(key_entity, entity_version, entity_description, updated_by,
source_path, publish_path, key_schema, conformance)
VALUES (_key_entity, i_entity_version, i_entity_description, i_user_name,
i_source_path, i_publish_path, i_key_schema, i_conformance)
RETURNING dataset.versions.id_entity_version
INTO key_entity_version;

IF NOT _new_entity THEN
UPDATE dataset.entities
SET entity_latest_version = i_entity_version
WHERE id_entity = _key_entity;
END IF;

status := 11;
status_text := 'OK';
RETURN;
END;
$$
LANGUAGE plpgsql VOLATILE SECURITY DEFINER;

ALTER FUNCTION dataset._add(TEXT, INTEGER, TEXT, TEXT, TEXT, BIGINT, JSON[], TEXT) OWNER TO enceladus;
159 changes: 159 additions & 0 deletions database/src/main/dataset/add.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

CREATE OR REPLACE FUNCTION dataset.add(
IN i_entity_name TEXT,
IN i_entity_version INTEGER,
IN i_entity_description TEXT,
IN i_source_path TEXT,
IN i_publish_path TEXT,
IN i_key_schema BIGINT,
IN i_conformance JSON[],
IN i_user_name TEXT,
OUT status INTEGER,
OUT status_text TEXT,
OUT key_entity_version BIGINT
) RETURNS record AS
$$
-------------------------------------------------------------------------------
--
-- Function: jobs_configuration.add(8)
-- Stores a new version of the dataset.
-- The i_entity_version has to be an increment of the latest version of an existing dataset or 1 in the case of a
-- new one
--
-- Parameters:
-- i_entity_name - name of the dataset
-- i_entity_version - version of the dataset
-- i_entity_description - description of the dataset
-- i_source_path - source path for the dataset
-- i_publish_path - output path for the dataset
-- i_key_schema - reference to the schema of the dataset
-- i_conformance - array of conformance rules
-- i_user_name - the user who submitted the changes
--
-- Returns:
-- status - Status code
-- status_text - Status text
-- key_entity_version - id of the newly created dataset record
--
-- Status codes:
-- 11 - OK
-- 31 - Dataset has been disabled
-- 32 - Dataset is locked
-- 42 - Schema does not exists
-- 50 - Dataset version wrong
-- 51 - Dataset already exists
--
-------------------------------------------------------------------------------
DECLARE
BEGIN
PERFORM 1
FROM dataset_schema.versions V
WHERE V.id_entity_version = i_key_schema;

IF NOT found THEN
status := 42;
status_text := 'Schema does not exists';
RETURN;
END IF;

SELECT A.status, A.status_text, A.key_entity_version
FROM dataset._add(i_entity_name, i_entity_version, i_entity_description, i_source_path,
i_publish_path, i_key_schema, i_conformance, i_user_name) A
INTO status, status_text, key_entity_version;

RETURN;
END;
$$
LANGUAGE plpgsql VOLATILE SECURITY DEFINER;

ALTER FUNCTION dataset.add(TEXT, INTEGER, TEXT, TEXT, TEXT, BIGINT, JSON[], TEXT) OWNER TO enceladus;
GRANT EXECUTE ON FUNCTION dataset.add(TEXT, INTEGER, TEXT, TEXT, TEXT, BIGINT, JSON[], TEXT) TO menas;

CREATE OR REPLACE FUNCTION dataset.add(
IN i_entity_name TEXT,
IN i_entity_version INTEGER,
IN i_entity_description TEXT,
IN i_source_path TEXT,
IN i_publish_path TEXT,
IN i_schema_name TEXT,
IN i_schema_version INTEGER,
IN i_conformance JSON[],
IN i_user_name TEXT,
OUT status INTEGER,
OUT status_text TEXT,
OUT key_entity_version BIGINT
) RETURNS record AS
$$
-------------------------------------------------------------------------------
--
-- Function: jobs_configuration.add(9)
-- Stores a new version of the mapping table.
-- The i_entity_version has to be an increment of the latest version of an existing dataset or 1 in the case of a
-- new one
--
-- Parameters:
-- i_entity_name - name of the dataset
-- i_entity_version - version of the dataset
-- i_entity_description - description of the dataset
-- i_source_path - source path for the dataset
-- i_publish_path - output path for the dataset
-- i_schema_name - name of the referenced schema of the dataset
-- i_schema_version - version of the referenced schema of the dataset
-- i_conformance - array of conformance rules
-- i_user_name - the user who submitted the changes
--
-- Returns:
-- status - Status code
-- status_text - Status text
-- key_entity_version - id of the newly created dataset record
--
-- Status codes:
-- 11 - OK
-- 31 - Dataset has been disabled
-- 32 - Dataset is locked
-- 42 - Schema does not exists
-- 50 - Dataset version wrong
-- 51 - Dataset already exists
--
-------------------------------------------------------------------------------
DECLARE
_key_schema BIGINT;
BEGIN

SELECT G.id_entity_version
FROM dataset_schema.get(i_schema_name, i_schema_version) G
WHERE G.status = 10
INTO _key_schema;

IF NOT found THEN
status := 42;
status_text := 'Schema does not exists';
RETURN;
END IF;

SELECT A.status, A.status_text, A.key_entity_version
FROM mapping_table._add(i_entity_name, i_entity_version, i_entity_description, i_source_path,
i_publish_path, _key_schema, i_conformance, i_user_name) A
INTO status, status_text, key_entity_version;

RETURN;
END;
$$
LANGUAGE plpgsql VOLATILE SECURITY DEFINER;

ALTER FUNCTION dataset.add(TEXT, INTEGER, TEXT, TEXT, TEXT, TEXT, INTEGER, JSON[], TEXT) OWNER TO enceladus;
GRANT EXECUTE ON FUNCTION dataset.add(TEXT, INTEGER, TEXT, TEXT, TEXT, TEXT, INTEGER, JSON[], TEXT) TO menas;
32 changes: 32 additions & 0 deletions database/src/main/dataset/entities.ddl
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

-- DROP TABLE IF EXISTS dataset.entities;

CREATE TABLE dataset.entities
(
entity_type CHAR NOT NULL DEFAULT 'D',
CONSTRAINT entities_pk PRIMARY KEY (id_entity)
)
INHERITS (entity_base.entities);

ALTER TABLE dataset.entities
ADD CONSTRAINT entities_unq UNIQUE (entity_name);

ALTER TABLE IF EXISTS dataset.entities
ADD CONSTRAINT check_dataset_entity_type CHECK (entity_type = 'D')
NOT VALID;

ALTER TABLE dataset.entities OWNER to enceladus;
Loading

0 comments on commit fedce11

Please sign in to comment.