Skip to content
Snippets Groups Projects
Commit 586974c5 authored by Christopher Lenke's avatar Christopher Lenke
Browse files

23.09.2024

parent d8393155
No related branches found
No related tags found
No related merge requests found
-- DROP PROCEDURE schema_xyz.pattern_matching_b_ii_pg(text, text, text);
CREATE OR REPLACE PROCEDURE schema_xyz.pattern_matching_b_ii_pg(input_table text, output_table text, process_name text)
LANGUAGE plpgsql
AS $procedure$
#variable_conflict use_variable
DECLARE
epc cursor is SELECT epid, uid_, pattern_nf, pattern_city from schema_xyz.exclude_pattern;
/*
Exclusion pattern, remaining categories, lowest level.
input_table: result_IV
output_table: result_V
*/
exclude_pattern RECORD;
BEGIN
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Start pattern_matching_b_II'', '''||current_timestamp::varchar||'''';
/* +++++++ EXCLUDE: Application of exclusion patterns. No differentiation between pattern-specific and non-pattern-specific exclusion - only at address level ++++++++ */
EXECUTE 'alter table '||input_table||' add column exclude numeric, add column exclude_id numeric';
for exclude_pattern in epc
LOOP
EXECUTE 'update '||input_table||' set exclude=1, exclude_id='||exclude_pattern.epid|| '
where (uid_='||exclude_pattern.uid_||' and upper(address_full) like '''||exclude_pattern.pattern_nf||''' and (city is null or upper(city) like '''||exclude_pattern.pattern_city||'''))';
END LOOP;
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Exclude completed'', '''||current_timestamp::varchar||'''';
/*
Remaining and collective categories: the pattern should only apply here if no other applies.
4751 Remaining category: Companies, not yet assigned (domain_id=1)
1005 MPG remaining category, without unique institute address (domain_id=6)
230 Collective category practices (domain_id=19)
4138 HGF remaining category, without unique institute address (domain_id=4)
4127 FhG remaining category, without unique institute address (domain_id=3)
4128 Leibniz remaining category, without unique institute address (domain_id=10)
4752 Remaining category: Clinics, not yet assigned (domain_id=17)
4428 Remaining category: Universities (domain_id=9)
4429 Remaining category: Universities of Applied Sciences (domain_id=15)
*/
EXECUTE 'alter table '||input_table||' add column delete_rest numeric';
/* Remaining category Germany: 3987 */
EXECUTE 'update '||input_table||'
set delete_rest = 1
where uid_ = 3987
and adr_id in (
select adr_id
from '||input_table||' where exclude is null
group by adr_id having count(distinct uid_)>1)
and adr_id in (
select adr_id
from '||input_table||'
where uid_ <> 3987
and exclude is null
)';
/* Remaining category Companies, domain_id=1, Restuid= 4751 */
CALL schema_xyz.build_tree_for_sector_pg('1');
EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4751';
EXECUTE 'update '||input_table||'
set delete_rest=1
where uid_ = 4751
and adr_id in (
select adr_id
from '||input_table||' where exclude is null
group by adr_id having count(distinct uid_)>1)
and adr_id in (
select adr_id
from '||input_table||'
where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree)
and exclude is null
)';
EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree';
/* Remaining category MPG, domain_id=6, Restuid= 1005 */
call schema_xyz.build_tree_for_sector_pg('6');
EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=1005';
EXECUTE 'update '||input_table||'
set delete_rest=1
where uid_ = 1005
and adr_id in (
select adr_id
from '||input_table||' where exclude is null
group by adr_id having count(distinct uid_)>1)
and adr_id in (
select adr_id
from '||input_table||'
where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree)
and exclude is null
)';
EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree';
/* Remaining category practices, domain_id=19, Restuid= 230 */
CALL schema_xyz.build_tree_for_sector_pg('19');
EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=230';
EXECUTE 'update '||input_table||'
set delete_rest=1
where uid_ = 230
and adr_id in (
select adr_id
from '||input_table||' where exclude is null
group by adr_id having count(distinct uid_)>1)
and adr_id in (
select adr_id
from '||input_table||'
where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree)
and exclude is null
)';
EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree';
/* Restkategorie HGF, domain_id=4, Restuid= 4138 */
CALL schema_xyz.build_tree_for_sector_pg('4');
EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4138';
EXECUTE 'update '||input_table||'
set delete_rest=1
where uid_ = 4138
and adr_id in (
select adr_id
from '||input_table||' where exclude is null
group by adr_id having count(distinct uid_)>1)
and adr_id in (
select adr_id
from '||input_table||'
where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree)
and exclude is null
)';
EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree';
/* Restkategorie FhG, domain_id=3, Restuid= 4127 */
CALL schema_xyz.build_tree_for_sector_pg('3');
EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4127';
EXECUTE 'update '||input_table||'
set delete_rest=1
where uid_ = 4127
and adr_id in (
select adr_id
from '||input_table||' where exclude is null
group by adr_id having count(distinct uid_)>1)
and adr_id in (
select adr_id
from '||input_table||'
where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree)
and exclude is null
)';
EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree';
/* Remaining category Leibniz, domain_id=10, Restuid= 4128 */
CALL schema_xyz.build_tree_for_sector_pg('10');
EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4128';
EXECUTE 'update '||input_table||'
set delete_rest=1
where uid_ = 4128
and adr_id in (
select adr_id
from '||input_table||' where exclude is null
group by adr_id having count(distinct uid_)>1)
and adr_id in (
select adr_id
from '||input_table||'
where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree)
and exclude is null
)';
EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree';
/* Remaining category clinics, domain_id=17, Restuid= 4752 */
CALL schema_xyz.build_tree_for_sector_pg('17');
EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4752';
EXECUTE 'update '||input_table||'
set delete_rest=1
where uid_ = 4752
and adr_id in (
select adr_id
from '||input_table||' where exclude is null
group by adr_id having count(distinct uid_)>1)
and adr_id in (
select adr_id
from '||input_table||'
where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree)
and exclude is null
)';
EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree';
/* Restkategorie Firmen, domain_id=9, Restuid= 4428 */
CALL schema_xyz.build_tree_for_sector_pg('9');
EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4428';
EXECUTE 'update '||input_table||'
set delete_rest=1
where uid_ = 4428
and adr_id in (
select adr_id
from '||input_table||' where exclude is null
group by adr_id having count(distinct uid_)>1)
and adr_id in (
select adr_id
from '||input_table||'
where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree)
and exclude is null
)';
EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree';
/* Restkategorie Firmen, domain_id=15, Restuid= 4429 */
CALL schema_xyz.build_tree_for_sector_pg('15');
EXECUTE 'DELETE FROM schema_xyz.zwischen_sector_tree where uid_=4429';
EXECUTE 'update '||input_table||'
set delete_rest=1
where uid_ = 4429
and adr_id in (
select adr_id
from '||input_table||' where exclude is null
group by adr_id having count(distinct uid_)>1)
and adr_id in (
select adr_id
from '||input_table||'
where uid_ in (select uid_ FROM schema_xyz.zwischen_sector_tree)
and exclude is null
)';
/* Remaining category University Hospitals Munich for everything that cannot be assigned to 101 or 102, Restuid= 5140 */
execute 'drop table if exists schema_xyz.relation_01';
execute 'create table schema_xyz.relation_01 as (select * from schema_xyz.relation where type in (0,1))';
execute 'drop table if exists schema_xyz.zwischen_sector_tree';
execute 'create table schema_xyz.zwischen_sector_tree as (
SELECT uid_1
FROM connectby(''schema_xyz.relation_01'', ''uid_1'', ''uid_0'', ''101'',0)
AS t(uid_1 bigint, uid_0 bigint, level int)
)';
commit;
execute 'insert into schema_xyz.zwischen_sector_tree (
SELECT uid_1
FROM connectby(''schema_xyz.relation_01'', ''uid_1'', ''uid_0'', ''102'',0)
AS t(uid_1 bigint, uid_0 bigint, level int)
)';
execute 'update '||input_table||'
set delete_rest=1
where uid_ = 5140
and adr_id in (
select adr_id
from '||input_table||' where exclude is null
group by adr_id having count(distinct uid_)>1)
and adr_id in (
select adr_id
from '||input_table||'
where uid_ in (select uid_ from schema_xyz.zwischen_sector_tree)
and exclude is null
)';
EXECUTE 'drop table if exists schema_xyz.zwischen_sector_tree';
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''',
''Remaining categories finalised'', '''||current_timestamp::varchar||'''';
/* Application of the pattern table for collection categories: PATTERN_SAMMELKAT_LIKE. */
---Uncoded temporary storage
EXECUTE 'drop table if exists schema_xyz.zzz_sammelkat_I';
EXECUTE 'drop table if exists schema_xyz.zzz_sammelkat_II';
EXECUTE 'create table schema_xyz.zzz_sammelkat_I as (select adr_id, address_full, city
from schema_xyz.result_II
where not adr_id in (select adr_id from schema_xyz.result_IV
where delete_rest is null and exclude is null))';
---Assignment
EXECUTE 'create table schema_xyz.zzz_sammelkat_II as (
select address_full, city, adr_id, uid_, pid_sk
from schema_xyz.zzz_sammelkat_I a
join schema_xyz.pattern_sammelkat_like b
on a.address_full like b.pattern_nf and ((a.city is null and b.pattern_city=''%'') or a.city like b.pattern_city))';
commit;
--- Insert in result_IV
EXECUTE 'insert into schema_xyz.result_IV (
select address_full, city, adr_id, uid_, null, null, null
from (select address_full, city, adr_id, uid_ from schema_xyz.zzz_sammelkat_II group by address_full, city, adr_id, uid_) as skii)';
EXECUTE 'drop table if exists schema_xyz.zzz_sammelkat_I';
/* Exclude and group to reduce the calculation time for the lowest level. Index */
EXECUTE 'drop table if exists '||output_table||'';
EXECUTE 'create table '||output_table||' as (select
adr_id, address_full, city, uid_
from '||input_table||'
where exclude is null and delete_rest is null
group by adr_id, address_full, city, uid_)';
commit;
EXECUTE 'alter table '||output_table||' add column is_TE bigint';
EXECUTE 'drop index if exists schema_xyz.i_'||replace(output_table,'.','_')||'_uid';
EXECUTE 'create index i_'||replace(output_table,'.','_')||'_uid on '||output_table||' (uid_)';
EXECUTE 'drop index if exists schema_xyz.i_'||replace(output_table,'.','_')||'_afci';
EXECUTE 'create index i_'||replace(output_table,'.','_')||'_afci on '||output_table||' (address_full, city)';
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''',
''End pattern_matching_b_II'', '''||current_timestamp::varchar||'''';
END;
$procedure$
;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment