diff --git a/Procedures/pattern_matching_b_ii_pg_text__text__text_.txt b/Procedures/pattern_matching_b_ii_pg_text__text__text_.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e90b4fc3b620130d461068f0379789f228f6797 --- /dev/null +++ b/Procedures/pattern_matching_b_ii_pg_text__text__text_.txt @@ -0,0 +1,384 @@ +-- DROP PROCEDURE schema_xyz.pattern_matching_b_ii_pg(text, text, text); + +CREATE OR REPLACE PROCEDURE schema_xyz.pattern_matching_b_ii_pg(input_table text, output_table text, process_name text) + LANGUAGE plpgsql +AS $procedure$ + #variable_conflict use_variable + +DECLARE + +epc cursor is SELECT epid, uid_, pattern_nf, pattern_city from schema_xyz.exclude_pattern; + +/* +Exclusion pattern, remaining categories, lowest level. + +input_table: result_IV +output_table: result_V + +*/ + exclude_pattern RECORD; + +BEGIN + +/*############################## schema_xyz.process_info ###############################*/ + +EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Start pattern_matching_b_II'', '''||current_timestamp::varchar||''''; + +/* +++++++ EXCLUDE: Application of exclusion patterns. No differentiation between pattern-specific and non-pattern-specific exclusion - only at address level ++++++++ */ + +EXECUTE 'alter table '||input_table||' add column exclude numeric, add column exclude_id numeric'; + +for exclude_pattern in epc + + LOOP + + EXECUTE 'update '||input_table||' set exclude=1, exclude_id='||exclude_pattern.epid|| ' + where (uid_='||exclude_pattern.uid_||' and upper(address_full) like '''||exclude_pattern.pattern_nf||''' and (city is null or upper(city) like '''||exclude_pattern.pattern_city||'''))'; + + END LOOP; + + +/*############################## schema_xyz.process_info ###############################*/ + +EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Exclude completed'', '''||current_timestamp::varchar||''''; + + + +/* + +Remaining and collective categories: the pattern should only apply here if no other applies. + +4751 Remaining category: Companies, not yet assigned (domain_id=1) +1005 MPG remaining category, without unique institute address (domain_id=6) +230 Collective category practices (domain_id=19) +4138 HGF remaining category, without unique institute address (domain_id=4) +4127 FhG remaining category, without unique institute address (domain_id=3) +4128 Leibniz remaining category, without unique institute address (domain_id=10) +4752 Remaining category: Clinics, not yet assigned (domain_id=17) +4428 Remaining category: Universities (domain_id=9) +4429 Remaining category: Universities of Applied Sciences (domain_id=15) + + +*/ + + + +EXECUTE 'alter table '||input_table||' add column delete_rest numeric'; + +/* Remaining category Germany: 3987 */ + +EXECUTE 'update '||input_table||' +set delete_rest = 1 +where uid_ = 3987 +and adr_id in ( +select adr_id +from '||input_table||' where exclude is null +group by adr_id having count(distinct uid_)>1) +and adr_id in ( +select adr_id +from '||input_table||' +where uid_ <> 3987 +and exclude is null +)'; + + +/* Remaining category Companies, domain_id=1, Restuid= 4751 */ + +CALL schema_xyz.build_tree_for_sector_pg('1'); + +EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4751'; + +EXECUTE 'update '||input_table||' +set delete_rest=1 +where uid_ = 4751 +and adr_id in ( +select adr_id +from '||input_table||' where exclude is null +group by adr_id having count(distinct uid_)>1) +and adr_id in ( +select adr_id +from '||input_table||' +where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree) +and exclude is null +)'; + + +EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree'; + + +/* Remaining category MPG, domain_id=6, Restuid= 1005 */ + +call schema_xyz.build_tree_for_sector_pg('6'); + +EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=1005'; + +EXECUTE 'update '||input_table||' +set delete_rest=1 +where uid_ = 1005 +and adr_id in ( +select adr_id +from '||input_table||' where exclude is null +group by adr_id having count(distinct uid_)>1) +and adr_id in ( +select adr_id +from '||input_table||' +where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree) +and exclude is null +)'; + +EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree'; + +/* Remaining category practices, domain_id=19, Restuid= 230 */ + +CALL schema_xyz.build_tree_for_sector_pg('19'); + +EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=230'; + +EXECUTE 'update '||input_table||' +set delete_rest=1 +where uid_ = 230 +and adr_id in ( +select adr_id +from '||input_table||' where exclude is null +group by adr_id having count(distinct uid_)>1) +and adr_id in ( +select adr_id +from '||input_table||' +where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree) +and exclude is null +)'; + +EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree'; + + +/* Restkategorie HGF, domain_id=4, Restuid= 4138 */ + +CALL schema_xyz.build_tree_for_sector_pg('4'); + +EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4138'; + +EXECUTE 'update '||input_table||' +set delete_rest=1 +where uid_ = 4138 +and adr_id in ( +select adr_id +from '||input_table||' where exclude is null +group by adr_id having count(distinct uid_)>1) +and adr_id in ( +select adr_id +from '||input_table||' +where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree) +and exclude is null +)'; + +EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree'; + + +/* Restkategorie FhG, domain_id=3, Restuid= 4127 */ + +CALL schema_xyz.build_tree_for_sector_pg('3'); + +EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4127'; + +EXECUTE 'update '||input_table||' +set delete_rest=1 +where uid_ = 4127 +and adr_id in ( +select adr_id +from '||input_table||' where exclude is null +group by adr_id having count(distinct uid_)>1) +and adr_id in ( +select adr_id +from '||input_table||' +where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree) +and exclude is null +)'; + +EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree'; + + +/* Remaining category Leibniz, domain_id=10, Restuid= 4128 */ + +CALL schema_xyz.build_tree_for_sector_pg('10'); + +EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4128'; + +EXECUTE 'update '||input_table||' +set delete_rest=1 +where uid_ = 4128 +and adr_id in ( +select adr_id +from '||input_table||' where exclude is null +group by adr_id having count(distinct uid_)>1) +and adr_id in ( +select adr_id +from '||input_table||' +where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree) +and exclude is null +)'; + +EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree'; + +/* Remaining category clinics, domain_id=17, Restuid= 4752 */ + +CALL schema_xyz.build_tree_for_sector_pg('17'); + +EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4752'; + +EXECUTE 'update '||input_table||' +set delete_rest=1 +where uid_ = 4752 +and adr_id in ( +select adr_id +from '||input_table||' where exclude is null +group by adr_id having count(distinct uid_)>1) +and adr_id in ( +select adr_id +from '||input_table||' +where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree) +and exclude is null +)'; + +EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree'; + + +/* Restkategorie Firmen, domain_id=9, Restuid= 4428 */ + +CALL schema_xyz.build_tree_for_sector_pg('9'); + +EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4428'; + +EXECUTE 'update '||input_table||' +set delete_rest=1 +where uid_ = 4428 +and adr_id in ( +select adr_id +from '||input_table||' where exclude is null +group by adr_id having count(distinct uid_)>1) +and adr_id in ( +select adr_id +from '||input_table||' +where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree) +and exclude is null +)'; + +EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree'; + + +/* Restkategorie Firmen, domain_id=15, Restuid= 4429 */ + +CALL schema_xyz.build_tree_for_sector_pg('15'); + +EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4429'; + +EXECUTE 'update '||input_table||' +set delete_rest=1 +where uid_ = 4429 +and adr_id in ( +select adr_id +from '||input_table||' where exclude is null +group by adr_id having count(distinct uid_)>1) +and adr_id in ( +select adr_id +from '||input_table||' +where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree) +and exclude is null +)'; + +/* Remaining category University Hospitals Munich for everything that cannot be assigned to 101 or 102, Restuid= 5140 */ + +execute 'drop table if exists schema_xyz.relation_01'; +execute 'create table schema_xyz.relation_01 as (select * from schema_xyz.relation where type in (0,1))'; + + +execute 'drop table if exists schema_xyz.zwischen_sector_tree'; + +execute 'create table schema_xyz.zwischen_sector_tree as ( +SELECT uid_1 +FROM connectby(''schema_xyz.relation_01'', ''uid_1'', ''uid_0'', ''101'',0) +AS t(uid_1 bigint, uid_0 bigint, level int) +)'; + +commit; + +execute 'insert into schema_xyz.zwischen_sector_tree ( +SELECT uid_1 +FROM connectby(''schema_xyz.relation_01'', ''uid_1'', ''uid_0'', ''102'',0) +AS t(uid_1 bigint, uid_0 bigint, level int) +)'; + + +execute 'update '||input_table||' +set delete_rest=1 +where uid_ = 5140 +and adr_id in ( +select adr_id +from '||input_table||' where exclude is null +group by adr_id having count(distinct uid_)>1) +and adr_id in ( +select adr_id +from '||input_table||' +where uid_ in (select uid_ from schema_xyz.zwischen_sector_tree) +and exclude is null +)'; + +EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree'; + + EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', + ''Remaining categories finalised'', '''||current_timestamp::varchar||''''; + + +/* Application of the pattern table for collection categories: PATTERN_SAMMELKAT_LIKE. */ + +---Uncoded temporary storage +EXECUTE 'drop table if exists schema_xyz.zzz_sammelkat_I'; +EXECUTE 'drop table if exists schema_xyz.zzz_sammelkat_II'; +EXECUTE 'create table schema_xyz.zzz_sammelkat_I as (select adr_id, address_full, city +from schema_xyz.result_II +where not adr_id in (select adr_id from schema_xyz.result_IV +where delete_rest is null and exclude is null))'; + +---Assignment +EXECUTE 'create table schema_xyz.zzz_sammelkat_II as ( +select address_full, city, adr_id, uid_, pid_sk +from schema_xyz.zzz_sammelkat_I a +join schema_xyz.pattern_sammelkat_like b +on a.address_full like b.pattern_nf and ((a.city is null and b.pattern_city=''%'') or a.city like b.pattern_city))'; +commit; + + +--- Insert in result_IV +EXECUTE 'insert into schema_xyz.result_IV ( +select address_full, city, adr_id, uid_, null, null, null +from (select address_full, city, adr_id, uid_ from schema_xyz.zzz_sammelkat_II group by address_full, city, adr_id, uid_) as skii)'; + +EXECUTE 'drop table if exists schema_xyz.zzz_sammelkat_I'; + + +/* Exclude and group to reduce the calculation time for the lowest level. Index */ +EXECUTE 'drop table if exists '||output_table||''; + +EXECUTE 'create table '||output_table||' as (select +adr_id, address_full, city, uid_ +from '||input_table||' +where exclude is null and delete_rest is null +group by adr_id, address_full, city, uid_)'; +commit; + +EXECUTE 'alter table '||output_table||' add column is_TE bigint'; + +EXECUTE 'drop index if exists schema_xyz.i_'||replace(output_table,'.','_')||'_uid'; +EXECUTE 'create index i_'||replace(output_table,'.','_')||'_uid on '||output_table||' (uid_)'; + + +EXECUTE 'drop index if exists schema_xyz.i_'||replace(output_table,'.','_')||'_afci'; +EXECUTE 'create index i_'||replace(output_table,'.','_')||'_afci on '||output_table||' (address_full, city)'; + + + EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', + ''End pattern_matching_b_II'', '''||current_timestamp::varchar||''''; + +END; + +$procedure$ +;