From 7325882efee2fd9dca8a0f8815a619c6849145fa Mon Sep 17 00:00:00 2001 From: Christopher Lenke <christopher.lenke@uni-bielefeld.de> Date: Mon, 23 Sep 2024 18:39:59 +0200 Subject: [PATCH] 23.09.2024 --- ...pg_varchar__varchar__varchar__varchar_.txt | 417 ++++++++++++++++++ 1 file changed, 417 insertions(+) create mode 100644 Procedures/run_icode_iv_pg_varchar__varchar__varchar__varchar_.txt diff --git a/Procedures/run_icode_iv_pg_varchar__varchar__varchar__varchar_.txt b/Procedures/run_icode_iv_pg_varchar__varchar__varchar__varchar_.txt new file mode 100644 index 0000000..3190778 --- /dev/null +++ b/Procedures/run_icode_iv_pg_varchar__varchar__varchar__varchar_.txt @@ -0,0 +1,417 @@ +-- DROP PROCEDURE schema_xyz.run_icode_iv_pg(varchar, varchar, varchar, varchar); + +CREATE OR REPLACE PROCEDURE schema_xyz.run_icode_iv_pg(input_table character varying, process_name character varying, create_tables character varying, result_suffix character varying) + LANGUAGE plpgsql +AS $procedure$ + +/* ### Parameters: + +* Input table (table with addresses, column names must match) +* Process_name: the process can be found under this name in schema_xyz.process_info +* create_tables on 'Y': create KB tables. Else: up to schema_xyz.result_V_III, no KB tables +* result_suffix: suffix for the result tables for labelling and backup purposes + +### Input-Table: + +# Must contain the following columns: + +* item_id (varchar) +* aff_seq_nr (int) +* address_full (varchar) +* city (varchar) +* pubyear (int) + + +### Steps: + +* Transformation (preparation with group by address_full, city, then perform transformation, city_cleaning, renaming cty & address_full) +* Preparation of the tables (create adr_id and grouping address_full, city, py; not ut-specific) +* Add adr_id in the source table (for later assignment) * Pattern_matching (pattern assignment with LIKE) +* Pattern_matchingII (exclusion, lowest level, residual classes, in between group by) +* Create hierarchy tables: Mode A, S (script by Holger) * Assignment to HI based on hierarchy tables: Mode A,S +* Return to et level * Create tables + +* schema_xyz.result_I: SCP: Preparation for WoS transformation +* schema_xyz.result_II: IN for pattern_matching (adr_id, pk_institutions, address_full (possibly transformed), city (possibly transformed), py) +* schema_xyz.result_III: address_full, pattern_nf, city, pattern_city, adr_id, pid, uid_, + OUT_I pattern_matching by pattern assignment with columns for exclude, remaining categories, lowest level +* schema_xyz.result_IV: OUT_II excluded are out, grouped by uid_ (pid, i.e. which pattern has been matched, is no longer included here; + address_full, city, adr_id, uid_) +* schema_xyz.result_V: Mappings after exclusion by exclusion pattern and remaining categories, grouped +* schema_xyz.result_VI: Assignment at the lowest level after exclusion by exclusion patterns and remaining categories, grouped + + + +---->>> Sample assignment step completed <<<----- + +* Tables with X or Y in front: + Intermediate tables from the creation of the hierarchy tables mode A and S (script by Holger) + +* schema_xyz.result_VIII_(A/S): Assignment adr_id --> uid taking into account the HI assignments under mode A or S + +*/ + +DECLARE + +starttime timestamp; +endtime timestamp; + + +BEGIN + +starttime := current_timestamp; + + +/*############################## schema_xyz.process_info ###############################*/ +execute ‘insert into schema_xyz.process_info select '''||process_name||''', ''Start run_icode'', '''||current_timestamp::varchar||''' '; +commit; + +/* +++++++ Start of preparation. +++++++ + +Preparation steps: + +* WoS transformation (with corresponding preparation/intermediate table). + +*/ + + +/*############################## schema_xyz.process_info ###############################*/ +execute ‘insert into schema_xyz.process_info select '''||process_name||''', ''Start preparation'', '''||current_timestamp::varchar||''' '; +commit; +/* Transformation: is no longer only used for SCP, but also for WOS */ + + +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_I'; +--- Create table for the transformation. +execute 'create table schema_xyz.result_I as (select address_full, city +from '||input_table||' group by address_full, city)'; +commit; + +/*############################## schema_xyz.process_info ###############################*/ +execute ‘insert into schema_xyz.process_info select '''||process_name||''', ''End indices for schema_xyz.result_I'', '''||current_timestamp::varchar||''' '; +commit; + +/* +++++++ Preparation: Wostransfo (city_cleaning is included) +++++++ */ + +/*############################## schema_xyz.process_info ###############################*/ +execute ‘insert into schema_xyz.process_info select '''||process_name||''', ''Start WoSTransfo'', '''||current_timestamp::varchar||''' '; +commit; + +call schema_xyz.wostransfo_proc_pg('schema_xyz.result_I',2); +commit; + + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End WoSTransfo'', '''||current_timestamp::varchar||''' '; +commit; + +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_I_PKAFCITY'; +execute 'create index I_result_I_PKAFCITY on schema_xyz.result_I(address_full, COALESCE(City,''0''))'; +commit; + + +/* +++++++ Preparation of the intermediate table schema_xyz.result_II: addr_id is inserted. +++++++ */ + + +EXECUTE 'DROP SEQUENCE IF EXISTS schema_xyz.seq_adr_id'; +execute 'create sequence schema_xyz.seq_adr_id start with 1 increment by 1'; +commit; + +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_II'; +execute 'create table schema_xyz.result_II (adr_id integer not null, address_full varchar(2000) not null, city varchar(1000))'; +commit; + +execute 'insert into schema_xyz.result_II ( +select nextval(''schema_xyz.seq_adr_id''), address_full, city from (select address_full, city from schema_xyz.result_I group by address_full, city) as x)'; +commit; + + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''schema_xyz.result_II created, start indices on schema_xyz.result_II'', '''||current_timestamp::varchar||''' '; + +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_II_address_full'; +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_II_city'; +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_II_AFCI_II'; + +execute 'create index I_result_II_address_full on schema_xyz.result_II(address_full)'; +execute 'create index I_result_II_city on schema_xyz.result_II(city)'; +execute 'create index I_result_II_AFCI_II on schema_xyz.result_II(address_full, COALESCE(city,''0''))'; +commit; + + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End indices on schema_xyz.result_II'', '''||current_timestamp::varchar||''' '; +commit; + + +/* ++++++++ Pattern_matching +++++++*/ + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start pattern_matching_d'', '''||current_timestamp::varchar||''' '; +commit; + +raise notice 'process_name: %s', process_name; + +call schema_xyz.pattern_matching_d_pg('schema_xyz.result_II','schema_xyz.result_III','schema_xyz.result_IV',process_name); +commit; + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End pattern_matching_d, Start Indizes on schema_xyz.result_IV'', '''||current_timestamp::varchar||''' '; +commit; + +execute 'DROP INDEX IF EXISTS schema_xyz.i_result_IV_address_full'; +execute 'DROP INDEX IF EXISTS schema_xyz.i_result_IV_city'; +execute 'DROP INDEX IF EXISTS schema_xyz.i_result_IV_uid'; +commit; + +execute 'create index I_result_IV_address_full on schema_xyz.result_IV(address_full)'; +execute 'create index I_result_IV_city on schema_xyz.result_IV(city)'; +execute 'create index I_result_IV_uid on schema_xyz.result_IV(uid_)'; +commit; + + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End Indizes on schema_xyz.result_IV'', '''||current_timestamp::varchar||''' '; +commit; + +/* + +Result: +schema_xyz.result_III: Assignments with pid, +schema_xyz.result_IV: Assignment grouped by uid_, without pid. +schema_xyz.result_IV: address_full, city, adr_id, uid_ + +*/ + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start pattern_matching_b_II'', '''||current_timestamp::varchar||''' '; +commit; + +call schema_xyz.pattern_matching_b_II_pg('schema_xyz.result_IV', 'schema_xyz.result_V', process_name); +commit; + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End pattern_matching_b_II'', '''||current_timestamp::varchar||''' '; +commit; + +/* + +Result: +schema_xyz.result_V: adr_id, address_full, city, lowest_level +Assignments after exclusion by exclusion pattern and residual categories, grouped. + +*/ + + +/* Next step: Assignments at the lowest level */. + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start pattern_matching_b_III: Find lowest level'', '''||current_timestamp::varchar||''' '; +commit; + +/* This must be compiled beforehand, as the tables involved are only created during the process. */ + +call schema_xyz.pattern_matching_b_III_pg('schema_xyz.result_V','schema_xyz.result_VI', process_name); +commit; + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End pattern_matching_b_III'', '''||current_timestamp::varchar||''' '; +commit; + +/* +Result: Table schema_xyz.result_VI with assignment at the lowest level after exclusion by exclusion pattern and remaining categories, grouped. +*/ + +/* +++++++ Creates the hierarchy tables (uid_ to HI) for mode S and mode A +++++++ */ + +/* Creates the hierarchy tables for mode S: + +* X_top_s3 +* X_closed +* x_pre_start*/ + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start modus_s_holger'', '''||current_timestamp::varchar||''' '; +commit; + +call schema_xyz.modus_s_holger_pg(); +commit; +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End modus_s_holger'', '''||current_timestamp::varchar||''' '; +commit; + +/*Creates the hierarchy tables for mode A (CONNECT BY variant); */ + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start modus_a_connect'', '''||current_timestamp::varchar||''' '; +commit; + +call schema_xyz.modus_a_connect_pg('schema_xyz.modus_a_connect_plus_7', process_name); +commit; + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End modus_a_connect'', '''||current_timestamp::varchar||''' '; +commit; + + +/* +++++++ add adr_id to Input_table +++++++ */ + +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_I_adrfull_o'; +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_I_city_o'; +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_I_PKAFCITY'; + +execute 'create index I_result_I_adrfull_o on schema_xyz.result_I(address_full_original)'; +execute 'create index I_result_I_city_o on schema_xyz.result_I(COALESCE(City_original,''0''))'; +execute 'create index I_result_I_PKAFCITY on schema_xyz.result_I(address_full, COALESCE(City,''0''))'; + +execute 'DROP INDEX IF EXISTS schema_xyz.result_II_address_full'; +execute 'DROP INDEX IF EXISTS schema_xyz.result_II_city'; +execute 'DROP INDEX IF EXISTS schema_xyz.result_II_AFCI'; +execute 'DROP INDEX IF EXISTS schema_xyz.result_II_AFCI_II'; + +execute 'create index result_II_address_full on schema_xyz.result_II(address_full)'; +execute 'create index result_II_city on schema_xyz.result_II(city)'; +execute 'create index result_II_AFCI_II on schema_xyz.result_II(address_full, COALESCE(city,''0''))'; +commit; + + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start adr_id'', '''||current_timestamp::varchar||''' '; +commit; + +execute 'alter table '||input_table||' drop column if exists adr_id'; +execute 'alter table '||input_table||' add column adr_id bigint'; +execute 'update '||input_table||' set adr_id=schema_xyz.get_adr_id_b_pg(address_full, city)'; +commit; + + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End adr_id'', '''||current_timestamp::varchar||''' '; +commit; + +execute 'DROP INDEX IF EXISTS '||split_part(input_table,'.',1)||'.i_in_adrid'; +execute 'create index i_in_adrid on '||input_table||'(adr_id)'; +commit; + + +/* Assignment of addresses to HI in A and S mode */ + + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start allocation at item level...'', '''||current_timestamp::varchar||''' '; +commit; + +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_VI_adrid'; +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_VI_address_full'; +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_VI_city'; +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_VI_uid'; + +execute 'create index I_result_VI_address_full on schema_xyz.result_VI(address_full)'; +execute 'create index I_result_VI_city on schema_xyz.result_VI(city)'; +execute 'create index I_result_VI_uid on schema_xyz.result_VI(uid_)'; +execute 'create index I_result_VI_adrid on schema_xyz.result_VI(adr_id)'; +commit; + +/* +++++++ Assignment in S mode +++++++ */ + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start assignment at item level Mode S...'', '''||current_timestamp::varchar||''' '; +commit; + +call schema_xyz.address_to_top_uid_pg(input_table::varchar, 'schema_xyz.result_VI', 'schema_xyz.result_VII', 'schema_xyz.result_VIII', 'S', process_name); +commit; + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start assignment at item level End mode S...'', '''||current_timestamp::varchar||''' '; +commit; + +/* +++++++ Assignment mode A +++++++ */ + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start assignment at item level Mode A...'', '''||current_timestamp::varchar||''' '; +commit; +call schema_xyz.address_to_top_uid_pg(input_table, 'schema_xyz.result_VI', 'schema_xyz.result_VII', 'schema_xyz.result_VIII', 'A', process_name); +commit; + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start assignment at item level End mode A...'', '''||current_timestamp::varchar||''' '; +commit; + +/* +++++++ further exclude step for HIen +++++++ */ + + +execute 'DROP INDEX IF EXISTS schema_xyz.I_result_II_adr_id'; +execute 'create index I_result_II_adr_id on schema_xyz.result_II (adr_id)'; +commit; + +--- Index jeweils gesetzt (adr_id) +call schema_xyz.exclude_II_pg('schema_xyz.result_VIII_A'); +commit; +call schema_xyz.exclude_II_pg('schema_xyz.result_VIII_S'); +commit; + +/* +++++++ add ending to result tables +++++++ */ + +raise notice 'DROP TABLE IF EXISTS schema_xyz.result_I_%s ', result_suffix ; + +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_I_'||result_suffix; +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_II_'||result_suffix; +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_III_'||result_suffix; +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_IV_'||result_suffix; +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_V_'||result_suffix; +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VI_'||result_suffix; +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VII_a_'||result_suffix; +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VII_s_'||result_suffix; +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VIII_a_'||result_suffix; +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VIII_s_'||result_suffix; +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VIII_a_ex_'||result_suffix; +EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_VIII_s_ex_'||result_suffix; + +execute 'alter table schema_xyz.result_I rename to result_I_'||result_suffix; +execute 'alter table schema_xyz.result_II rename to result_II_'||result_suffix; +execute 'alter table schema_xyz.result_III rename to result_III_'||result_suffix; +execute 'alter table schema_xyz.result_IV rename to result_IV_'||result_suffix; +execute 'alter table schema_xyz.result_V rename to result_V_'||result_suffix; +execute 'alter table schema_xyz.result_VI rename to result_VI_'||result_suffix; +execute 'alter table schema_xyz.result_VII_a rename to result_VII_a_'||result_suffix; +execute 'alter table schema_xyz.result_VII_s rename to result_VII_s_'||result_suffix; +execute 'alter table schema_xyz.result_VIII_a rename to result_VIII_a_'||result_suffix; +execute 'alter table schema_xyz.result_VIII_s rename to result_VIII_s_'||result_suffix; +execute 'alter table schema_xyz.result_VIII_a_ex rename to result_VIII_a_ex_'||result_suffix; +execute 'alter table schema_xyz.result_VIII_s_ex rename to result_VIII_s_ex_'||result_suffix; + +commit; + +/* creation of result tables */ + +/*############################## schema_xyz.process_info ###############################*/ + +if(upper(create_tables)='Y') +then +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start KB-Tabellen erstellen...'', '''||current_timestamp::varchar||''' '; +commit; + +call schema_xyz.create_kb_tables_pg('schema_xyz.result_VIII_a_'||result_suffix, 'schema_xyz.result_VIII_s_'||result_suffix, result_suffix); +commit; + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End KB-Tabellen erstellen...'', '''||current_timestamp::varchar||''' '; +commit; + +end if; + + + +commit; + +endtime := current_timestamp; + +/*############################## schema_xyz.process_info ###############################*/ +execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End run_icode_IV, time needed: '||endtime-starttime||' '', '''||current_timestamp::varchar||''' '; +commit; + + +END; + +$procedure$ +; -- GitLab