-- DROP PROCEDURE schema_xyz.run_icode_iv_pg(varchar, varchar, varchar, varchar);
CREATE OR REPLACE PROCEDURE schema_xyz.run_icode_iv_pg(input_table character varying, process_name character varying, create_tables character varying, result_suffix character varying)
LANGUAGE plpgsql
AS $procedure$
/* ### Parameters:
* Input table (table with addresses, column names must match)
* Process_name: the process can be found under this name in schema_xyz.process_info
* create_tables on 'Y': create KB tables. Else: up to schema_xyz.result_V_III, no KB tables
* result_suffix: suffix for the result tables for labelling and backup purposes
### Input-Table:
# Must contain the following columns:
* item_id (varchar)
* aff_seq_nr (int)
* address_full (varchar)
* city (varchar)
* pubyear (int)
### Steps:
* Transformation (preparation with group by address_full, city, then perform transformation, city_cleaning, renaming cty & address_full)
* Preparation of the tables (create adr_id and grouping address_full, city, py; not ut-specific)
* Add adr_id in the source table (for later assignment) * Pattern_matching (pattern assignment with LIKE)
* Pattern_matchingII (exclusion, lowest level, residual classes, in between group by)
* Create hierarchy tables: Mode A, S (script by Holger) * Assignment to HI based on hierarchy tables: Mode A,S
* Return to et level * Create tables
* schema_xyz.result_I: SCP: Preparation for WoS transformation
* schema_xyz.result_II: IN for pattern_matching (adr_id, pk_institutions, address_full (possibly transformed), city (possibly transformed), py)
* schema_xyz.result_III: address_full, pattern_nf, city, pattern_city, adr_id, pid, uid_,
OUT_I pattern_matching by pattern assignment with columns for exclude, remaining categories, lowest level
* schema_xyz.result_IV: OUT_II excluded are out, grouped by uid_ (pid, i.e. which pattern has been matched, is no longer included here;
address_full, city, adr_id, uid_)
* schema_xyz.result_V: Mappings after exclusion by exclusion pattern and remaining categories, grouped
* schema_xyz.result_VI: Assignment at the lowest level after exclusion by exclusion patterns and remaining categories, grouped
---->>> Sample assignment step completed <<<-----
* Tables with X or Y in front:
Intermediate tables from the creation of the hierarchy tables mode A and S (script by Holger)
* schema_xyz.result_VIII_(A/S): Assignment adr_id --> uid taking into account the HI assignments under mode A or S
execute ‘insert into schema_xyz.process_info select '''||process_name||''', ''End indices for schema_xyz.result_I'', '''||current_timestamp::varchar||''' ';
commit;
/* +++++++ Preparation: Wostransfo (city_cleaning is included) +++++++ */
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End WoSTransfo'', '''||current_timestamp::varchar||''' ';
commit;
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_I_PKAFCITY';
execute 'create index I_result_I_PKAFCITY on schema_xyz.result_I(address_full, COALESCE(City,''0''))';
commit;
/* +++++++ Preparation of the intermediate table schema_xyz.result_II: addr_id is inserted. +++++++ */
EXECUTE 'DROP SEQUENCE IF EXISTS schema_xyz.seq_adr_id';
execute 'create sequence schema_xyz.seq_adr_id start with 1 increment by 1';
commit;
EXECUTE 'DROP TABLE IF EXISTS schema_xyz.result_II';
execute 'create table schema_xyz.result_II (adr_id integer not null, address_full varchar(2000) not null, city varchar(1000))';
commit;
execute 'insert into schema_xyz.result_II (
select nextval(''schema_xyz.seq_adr_id''), address_full, city from (select address_full, city from schema_xyz.result_I group by address_full, city) as x)';
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''schema_xyz.result_II created, start indices on schema_xyz.result_II'', '''||current_timestamp::varchar||''' ';
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_II_address_full';
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_II_city';
execute 'DROP INDEX IF EXISTS schema_xyz.I_result_II_AFCI_II';
execute 'create index I_result_II_address_full on schema_xyz.result_II(address_full)';
execute 'create index I_result_II_city on schema_xyz.result_II(city)';
execute 'create index I_result_II_AFCI_II on schema_xyz.result_II(address_full, COALESCE(city,''0''))';
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End indices on schema_xyz.result_II'', '''||current_timestamp::varchar||''' ';