Skip to content
Snippets Groups Projects
Commit 7325882e authored by Christopher Lenke's avatar Christopher Lenke
Browse files

23.09.2024

parent 180ebffb
No related branches found
No related tags found
No related merge requests found
-- DROP PROCEDURE schema_xyz.run_icode_iv_pg(varchar, varchar, varchar, varchar);
CREATE OR REPLACE PROCEDURE schema_xyz.run_icode_iv_pg(input_table character varying, process_name character varying, create_tables character varying, result_suffix character varying)
LANGUAGE plpgsql
AS $procedure$
/* ### Parameters:
* Input table (table with addresses, column names must match)
* Process_name: the process can be found under this name in schema_xyz.process_info
* create_tables on 'Y': create KB tables. Else: up to schema_xyz.result_V_III, no KB tables
* result_suffix: suffix for the result tables for labelling and backup purposes
### Input-Table:
# Must contain the following columns:
* item_id (varchar)
* aff_seq_nr (int)
* address_full (varchar)
* city (varchar)
* pubyear (int)
### Steps:
* Transformation (preparation with group by address_full, city, then perform transformation, city_cleaning, renaming cty & address_full)
* Preparation of the tables (create adr_id and grouping address_full, city, py; not ut-specific)
* Add adr_id in the source table (for later assignment) * Pattern_matching (pattern assignment with LIKE)
* Pattern_matchingII (exclusion, lowest level, residual classes, in between group by)
* Create hierarchy tables: Mode A, S (script by Holger) * Assignment to HI based on hierarchy tables: Mode A,S
* Return to et level * Create tables
* schema_xyz.result_I: SCP: Preparation for WoS transformation
* schema_xyz.result_II: IN for pattern_matching (adr_id, pk_institutions, address_full (possibly transformed), city (possibly transformed), py)
* schema_xyz.result_III: address_full, pattern_nf, city, pattern_city, adr_id, pid, uid_,
OUT_I pattern_matching by pattern assignment with columns for exclude, remaining categories, lowest level
* schema_xyz.result_IV: OUT_II excluded are out, grouped by uid_ (pid, i.e. which pattern has been matched, is no longer included here;
address_full, city, adr_id, uid_)
* schema_xyz.result_V: Mappings after exclusion by exclusion pattern and remaining categories, grouped
* schema_xyz.result_VI: Assignment at the lowest level after exclusion by exclusion patterns and remaining categories, grouped
---->>> Sample assignment step completed <<<-----
* Tables with X or Y in front:
Intermediate tables from the creation of the hierarchy tables mode A and S (script by Holger)
* schema_xyz.result_VIII_(A/S): Assignment adr_id --> uid taking into account the HI assignments under mode A or S
*/
DECLARE
starttime timestamp;
endtime timestamp;
BEGIN
starttime := current_timestamp;
/*############################## schema_xyz.process_info ###############################*/
execute ‘insert into schema_xyz.process_info select '''||process_name||''', ''Start run_icode'', '''||current_timestamp::varchar||''' ';
commit;
/* +++++++ Start of preparation. +++++++
Preparation steps:
* WoS transformation (with corresponding preparation/intermediate table).
*/
/*############################## schema_xyz.process_info ###############################*/
execute ‘insert into schema_xyz.process_info select '''||process_name||''', ''Start preparation'', '''||current_timestamp::varchar||''' ';
commit;
/* Transformation: is no longer only used for SCP, but also for WOS */
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_I';
--- Create table for the transformation.
execute 'create table schema_xyz.result_I as (select address_full, city
from '||input_table||' group by address_full, city)';
commit;
/*############################## schema_xyz.process_info ###############################*/
execute ‘insert into schema_xyz.process_info select '''||process_name||''', ''End indices for schema_xyz.result_I'', '''||current_timestamp::varchar||''' ';
commit;
/* +++++++ Preparation: Wostransfo (city_cleaning is included) +++++++ */
/*############################## schema_xyz.process_info ###############################*/
execute ‘insert into schema_xyz.process_info select '''||process_name||''', ''Start WoSTransfo'', '''||current_timestamp::varchar||''' ';
commit;
call schema_xyz.wostransfo_proc_pg('schema_xyz.result_I',2);
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End WoSTransfo'', '''||current_timestamp::varchar||''' ';
commit;
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_I_PKAFCITY';
execute 'create index I_result_I_PKAFCITY on schema_xyz.result_I(address_full, COALESCE(City,''0''))';
commit;
/* +++++++ Preparation of the intermediate table schema_xyz.result_II: addr_id is inserted. +++++++ */
EXECUTE 'DROP SEQUENCE IF EXISTS schema_xyz.seq_adr_id';
execute 'create sequence schema_xyz.seq_adr_id start with 1 increment by 1';
commit;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_II';
execute 'create table schema_xyz.result_II (adr_id integer not null, address_full varchar(2000) not null, city varchar(1000))';
commit;
execute 'insert into schema_xyz.result_II (
select nextval(''schema_xyz.seq_adr_id''), address_full, city from (select address_full, city from schema_xyz.result_I group by address_full, city) as x)';
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''schema_xyz.result_II created, start indices on schema_xyz.result_II'', '''||current_timestamp::varchar||''' ';
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_II_address_full';
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_II_city';
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_II_AFCI_II';
execute 'create index I_result_II_address_full on schema_xyz.result_II(address_full)';
execute 'create index I_result_II_city on schema_xyz.result_II(city)';
execute 'create index I_result_II_AFCI_II on schema_xyz.result_II(address_full, COALESCE(city,''0''))';
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End indices on schema_xyz.result_II'', '''||current_timestamp::varchar||''' ';
commit;
/* ++++++++ Pattern_matching +++++++*/
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start pattern_matching_d'', '''||current_timestamp::varchar||''' ';
commit;
raise notice 'process_name: %s', process_name;
call schema_xyz.pattern_matching_d_pg('schema_xyz.result_II','schema_xyz.result_III','schema_xyz.result_IV',process_name);
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End pattern_matching_d, Start Indizes on schema_xyz.result_IV'', '''||current_timestamp::varchar||''' ';
commit;
execute 'DROP INDEX IF EXISTS schema_xyz.i_result_IV_address_full';
execute 'DROP INDEX IF EXISTS schema_xyz.i_result_IV_city';
execute 'DROP INDEX IF EXISTS schema_xyz.i_result_IV_uid';
commit;
execute 'create index I_result_IV_address_full on schema_xyz.result_IV(address_full)';
execute 'create index I_result_IV_city on schema_xyz.result_IV(city)';
execute 'create index I_result_IV_uid on schema_xyz.result_IV(uid_)';
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End Indizes on schema_xyz.result_IV'', '''||current_timestamp::varchar||''' ';
commit;
/*
Result:
schema_xyz.result_III: Assignments with pid,
schema_xyz.result_IV: Assignment grouped by uid_, without pid.
schema_xyz.result_IV: address_full, city, adr_id, uid_
*/
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start pattern_matching_b_II'', '''||current_timestamp::varchar||''' ';
commit;
call schema_xyz.pattern_matching_b_II_pg('schema_xyz.result_IV', 'schema_xyz.result_V', process_name);
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End pattern_matching_b_II'', '''||current_timestamp::varchar||''' ';
commit;
/*
Result:
schema_xyz.result_V: adr_id, address_full, city, lowest_level
Assignments after exclusion by exclusion pattern and residual categories, grouped.
*/
/* Next step: Assignments at the lowest level */.
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start pattern_matching_b_III: Find lowest level'', '''||current_timestamp::varchar||''' ';
commit;
/* This must be compiled beforehand, as the tables involved are only created during the process. */
call schema_xyz.pattern_matching_b_III_pg('schema_xyz.result_V','schema_xyz.result_VI', process_name);
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End pattern_matching_b_III'', '''||current_timestamp::varchar||''' ';
commit;
/*
Result: Table schema_xyz.result_VI with assignment at the lowest level after exclusion by exclusion pattern and remaining categories, grouped.
*/
/* +++++++ Creates the hierarchy tables (uid_ to HI) for mode S and mode A +++++++ */
/* Creates the hierarchy tables for mode S:
* X_top_s3
* X_closed
* x_pre_start*/
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start modus_s_holger'', '''||current_timestamp::varchar||''' ';
commit;
call schema_xyz.modus_s_holger_pg();
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End modus_s_holger'', '''||current_timestamp::varchar||''' ';
commit;
/*Creates the hierarchy tables for mode A (CONNECT BY variant); */
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start modus_a_connect'', '''||current_timestamp::varchar||''' ';
commit;
call schema_xyz.modus_a_connect_pg('schema_xyz.modus_a_connect_plus_7', process_name);
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End modus_a_connect'', '''||current_timestamp::varchar||''' ';
commit;
/* +++++++ add adr_id to Input_table +++++++ */
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_I_adrfull_o';
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_I_city_o';
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_I_PKAFCITY';
execute 'create index I_result_I_adrfull_o on schema_xyz.result_I(address_full_original)';
execute 'create index I_result_I_city_o on schema_xyz.result_I(COALESCE(City_original,''0''))';
execute 'create index I_result_I_PKAFCITY on schema_xyz.result_I(address_full, COALESCE(City,''0''))';
execute 'DROP INDEX IF EXISTS schema_xyz.result_II_address_full';
execute 'DROP INDEX IF EXISTS schema_xyz.result_II_city';
execute 'DROP INDEX IF EXISTS schema_xyz.result_II_AFCI';
execute 'DROP INDEX IF EXISTS schema_xyz.result_II_AFCI_II';
execute 'create index result_II_address_full on schema_xyz.result_II(address_full)';
execute 'create index result_II_city on schema_xyz.result_II(city)';
execute 'create index result_II_AFCI_II on schema_xyz.result_II(address_full, COALESCE(city,''0''))';
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start adr_id'', '''||current_timestamp::varchar||''' ';
commit;
execute 'alter table '||input_table||' drop column if exists adr_id';
execute 'alter table '||input_table||' add column adr_id bigint';
execute 'update '||input_table||' set adr_id=schema_xyz.get_adr_id_b_pg(address_full, city)';
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End adr_id'', '''||current_timestamp::varchar||''' ';
commit;
execute 'DROP INDEX IF EXISTS '||split_part(input_table,'.',1)||'.i_in_adrid';
execute 'create index i_in_adrid on '||input_table||'(adr_id)';
commit;
/* Assignment of addresses to HI in A and S mode */
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start allocation at item level...'', '''||current_timestamp::varchar||''' ';
commit;
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_VI_adrid';
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_VI_address_full';
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_VI_city';
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_VI_uid';
execute 'create index I_result_VI_address_full on schema_xyz.result_VI(address_full)';
execute 'create index I_result_VI_city on schema_xyz.result_VI(city)';
execute 'create index I_result_VI_uid on schema_xyz.result_VI(uid_)';
execute 'create index I_result_VI_adrid on schema_xyz.result_VI(adr_id)';
commit;
/* +++++++ Assignment in S mode +++++++ */
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start assignment at item level Mode S...'', '''||current_timestamp::varchar||''' ';
commit;
call schema_xyz.address_to_top_uid_pg(input_table::varchar, 'schema_xyz.result_VI', 'schema_xyz.result_VII', 'schema_xyz.result_VIII', 'S', process_name);
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start assignment at item level End mode S...'', '''||current_timestamp::varchar||''' ';
commit;
/* +++++++ Assignment mode A +++++++ */
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start assignment at item level Mode A...'', '''||current_timestamp::varchar||''' ';
commit;
call schema_xyz.address_to_top_uid_pg(input_table, 'schema_xyz.result_VI', 'schema_xyz.result_VII', 'schema_xyz.result_VIII', 'A', process_name);
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start assignment at item level End mode A...'', '''||current_timestamp::varchar||''' ';
commit;
/* +++++++ further exclude step for HIen +++++++ */
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_II_adr_id';
execute 'create index I_result_II_adr_id on schema_xyz.result_II (adr_id)';
commit;
--- Index jeweils gesetzt (adr_id)
call schema_xyz.exclude_II_pg('schema_xyz.result_VIII_A');
commit;
call schema_xyz.exclude_II_pg('schema_xyz.result_VIII_S');
commit;
/* +++++++ add ending to result tables +++++++ */
raise notice 'DROP TABLE IF EXISTS schema_xyz.result_I_%s ', result_suffix ;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_I_'||result_suffix;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_II_'||result_suffix;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_III_'||result_suffix;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_IV_'||result_suffix;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_V_'||result_suffix;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VI_'||result_suffix;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VII_a_'||result_suffix;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VII_s_'||result_suffix;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VIII_a_'||result_suffix;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VIII_s_'||result_suffix;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VIII_a_ex_'||result_suffix;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VIII_s_ex_'||result_suffix;
execute 'alter table schema_xyz.result_I rename to result_I_'||result_suffix;
execute 'alter table schema_xyz.result_II rename to result_II_'||result_suffix;
execute 'alter table schema_xyz.result_III rename to result_III_'||result_suffix;
execute 'alter table schema_xyz.result_IV rename to result_IV_'||result_suffix;
execute 'alter table schema_xyz.result_V rename to result_V_'||result_suffix;
execute 'alter table schema_xyz.result_VI rename to result_VI_'||result_suffix;
execute 'alter table schema_xyz.result_VII_a rename to result_VII_a_'||result_suffix;
execute 'alter table schema_xyz.result_VII_s rename to result_VII_s_'||result_suffix;
execute 'alter table schema_xyz.result_VIII_a rename to result_VIII_a_'||result_suffix;
execute 'alter table schema_xyz.result_VIII_s rename to result_VIII_s_'||result_suffix;
execute 'alter table schema_xyz.result_VIII_a_ex rename to result_VIII_a_ex_'||result_suffix;
execute 'alter table schema_xyz.result_VIII_s_ex rename to result_VIII_s_ex_'||result_suffix;
commit;
/* creation of result tables */
/*############################## schema_xyz.process_info ###############################*/
if(upper(create_tables)='Y')
then
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start KB-Tabellen erstellen...'', '''||current_timestamp::varchar||''' ';
commit;
call schema_xyz.create_kb_tables_pg('schema_xyz.result_VIII_a_'||result_suffix, 'schema_xyz.result_VIII_s_'||result_suffix, result_suffix);
commit;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End KB-Tabellen erstellen...'', '''||current_timestamp::varchar||''' ';
commit;
end if;
commit;
endtime := current_timestamp;
/*############################## schema_xyz.process_info ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End run_icode_IV, time needed: '||endtime-starttime||' '', '''||current_timestamp::varchar||''' ';
commit;
END;
$procedure$
;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment