Skip to content
Snippets Groups Projects
Commit 180ebffb authored by Christopher Lenke's avatar Christopher Lenke
Browse files

23.09.2024

parent dc5cc281
No related branches found
No related tags found
No related merge requests found
-- DROP PROCEDURE schema_xyz.pattern_matching_d_pg(text, text, text, text);
CREATE OR REPLACE PROCEDURE schema_xyz.pattern_matching_d_pg(table_in text, table_out text, table_out_ii text, process_name text)
LANGUAGE plpgsql
AS $procedure$
#variable_conflict use_variable
DECLARE
mc cursor is select /*+ parallel(24) */ pattern_nf, pattern_city, pid, uid_ from schema_xyz.pattern_ges_like;
mc2 cursor is SELECT /*+ parallel(24) */ pattern_nf, pattern_city, pid, uid_ from schema_xyz.pattern_ges_like where pattern_city='%';
ac cursor is SELECT /*+ parallel(24) */ address_full, address_full_uf, city, adr_id from schema_xyz.zzz_unkodierte where city is not null and not address_full=address_full_uf;
ac2 cursor is SELECT /*+ parallel(24) */ address_full, address_full_uf, city, adr_id from schema_xyz.zzz_unkodierte where city is null and not address_full=address_full_uf;
ac3 cursor is SELECT /*+ parallel(24) */ address_full, address_full_uf, city, adr_id from schema_xyz.zzz_unkodierte_II where city is not null and not address_full=address_full_uf;
ac4 cursor is SELECT /*+ parallel(24) */ address_full, address_full_uf, city, adr_id from schema_xyz.zzz_unkodierte_II where city is null and not address_full=address_full_uf;
ac5 CURSOR IS select /*+ parallel(24) */ address_full, city, adr_id from schema_xyz.zzz_unkodierte_I where city is not NULL;
ac6 CURSOR IS select /*+ parallel(24) */ address_full, city, adr_id from schema_xyz.zzz_unkodierte_I where city is NULL;
test_str varchar;
muster RECORD;
adresse RECORD;
Begin
raise notice 'prozess name: %s', prozess_name ;
raise notice 'table_out: %s', table_out ;
execute 'drop table if exists '||table_out||'';
EXECUTE 'create table '||table_out||'(address_full varchar not null, pattern_nf varchar not null, city varchar,
pattern_city varchar, adr_id numeric not null, pid numeric not null, uid_ numeric not null, reason varchar)';
/* +++++++ If city is not null, everything can be assigned as usual (sample comparison for address and city) +++++++ */
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Loop 1: city not null'', '''||current_timestamp::varchar||'''';
commit;
For muster in mc
loop
EXECUTE 'insert /*+ parallel(24) */ into '||table_out||' (
select /*+ parallel(24) */ address_full, '''||muster.pattern_nf||''', city,
'''||muster.pattern_city||''', adr_id, '||muster.pid||', '||muster.uid_||', ''normal''
from '||table_in||'
where city is not null
and address_full like '''||muster.pattern_nf||''' and city like '''||muster.pattern_city||''')';
commit;
END LOOP;
/* +++++++ If city is null, only the patterns with pattern_city='%' are unproblematic, others should not be used. This case does not occur with WoS. */
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Loop 2: city null'', '''||current_timestamp::varchar||'''';
commit;
FOR muster in mc2
LOOP
EXECUTE 'insert /*+ parallel(24) */ into '||table_out||' (
select /*+ parallel(24) */ address_full, '''||muster.pattern_nf||''', city,
'''||muster.pattern_city||''', adr_id, '||muster.pid||', '||muster.uid_||', ''normal''
from '||table_in||'
where city is null
and address_full like '''||muster.pattern_nf||''')';
commit;
END LOOP;
/* +++++++ Additional step for pattern_matching: * all those that are not yet coded and have 'UNIV' in the address * are rotated again and then compared with the patterns. Intermediate table for this: schema_xyz.zzz_unkodierte. +++++++ */
execute 'drop index if exists schema_xyz.i_result_ii_adr_id';
execute 'drop index if exists schema_xyz.i_result_iii_adr_id';
execute 'drop index if exists schema_xyz.i_result_iii_uid_';
execute 'drop index if exists schema_xyz.i_result_iii_address_full';
execute 'drop index if exists schema_xyz.i_result_iii_city';
commit;
execute 'create index i_result_ii_adr_id on schema_xyz.result_II(adr_id)';
commit;
execute 'create index i_result_iii_adr_id on schema_xyz.result_III(adr_id)';
commit;
execute 'create index i_result_iii_uid_ on schema_xyz.result_III(uid_)';
commit;
execute 'create index i_result_iii_address_full on schema_xyz.result_III(address_full)';
commit;
execute 'create index i_result_iii_city on schema_xyz.result_III(city)';
commit;
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''...indexes on result_iii done'', '''||current_timestamp::varchar||'''';
commit;
execute 'drop table if exists schema_xyz.zzz_unkodierte';
EXECUTE 'create table schema_xyz.zzz_unkodierte as (
select adr_id,address_full,city, '''' as address_full_uf
from schema_xyz.result_II
where not adr_id in (select adr_id from schema_xyz.result_III)
and address_full like ''%,%UNIV%'')';
commit;
execute 'create index on schema_xyz.zzz_unkodierte(address_full)';
commit;
execute 'create index on schema_xyz.zzz_unkodierte(address_full_uf)';
commit;
execute 'create index on schema_xyz.zzz_unkodierte(city)';
commit;
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''......creation of schema_xyz.zzz_unkodierte done'', '''||current_timestamp::varchar||'''';
commit;
EXECUTE 'update schema_xyz.zzz_unkodierte set address_full_uf=schema_xyz.univ_first_pg(address_full)';
commit;
EXECUTE 'delete from schema_xyz.zzz_unkodierte where address_full_uf is null';
commit;
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''univ_first done'', '''||current_timestamp::varchar||'''';
commit;
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Additional step uncoded: turn so that the UNIV part is at the front'', '''||current_timestamp::varchar||'''';
commit;
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Loop 1: city not null'', '''||current_timestamp::varchar||'''';
commit;
for adresse in ac
LOOP
EXECUTE 'insert /*+ parallel(24) */ into '||table_out||' (
select /*+ parallel(24) */ '''||adresse.ADDRESS_FULL||''',
pattern_nf, '''||adresse.CITY||''',
pattern_city, '||adresse.ADR_ID||', pid, uid_ , ''univ first''
from schema_xyz.pattern_ges_like
where (upper('''||adresse.ADDRESS_FULL_UF||''') LIKE pattern_nf
and upper('''||adresse.CITY||''') LIKE pattern_city))';
commit;
END LOOP;
/* If city is null, only the patterns with pattern_city='%' * are unproblematic, others should not be used.
* This case does not occur with WoS. */
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Loop 2: city null'', '''||current_timestamp::varchar||'''';
commit;
for adresse in ac2
LOOP
EXECUTE 'insert /*+ parallel(24) */ into '||table_out||' (
select /*+ parallel(24) */ '''||adresse.ADDRESS_FULL||''', pattern_nf, null,
pattern_city, '||adresse.ADR_ID||', pid, uid_ , ''univ first''
from (select * from schema_xyz.pattern_ges_like where pattern_city=''%'') a
where (upper('''||adresse.ADDRESS_FULL_UF||''') LIKE pattern_nf))';
commit;
END LOOP;
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''End of additional step uncoded.'', '''||current_timestamp::varchar||'''';
commit;
/*############################## PROZESS_INFO ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start additional step uncoded: all NF patterns ending in .% can also end directly (without .%)'', '''||current_timestamp::varchar||'''';
commit;
execute 'drop table if exists schema_xyz.zzz_unkodierte_I';
execute 'create table schema_xyz.zzz_unkodierte_I as (
select a.adr_id,a.address_full,a.city
from schema_xyz.result_II a
left join schema_xyz.result_iii b
on a.adr_id = b.adr_id
where b.adr_id is null)';
commit;
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''......creation of schema_xyz.zzz_unkodierte_I done'', '''||current_timestamp::varchar||'''';
commit;
/*############################## PROZESS_INFO ###############################*/
execute 'drop table if exists schema_xyz.pgs_comma';
execute 'create table schema_xyz.pgs_comma as (select pid, uid_,
substr(pattern_nf, 1, length(pattern_nf)-2) as pattern_nf, pattern_city
from schema_xyz.PATTERN_GES_LIKE where pattern_nf like ''%,\%'')';
execute 'drop index if exists schema_xyz.i_pgs_comma_pattern_nf';
execute 'create index on schema_xyz.pgs_comma(pattern_nf)';
commit;
execute 'drop index if exists schema_xyz.i_pgs_comma_pattern_city';
execute 'create index on schema_xyz.pgs_comma(pattern_city)';
commit;
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start Loop 1: city not null'', '''||current_timestamp::varchar||'''';
commit;
for adresse in ac5
LOOP
execute 'insert /*+ parallel(24) */ into '||table_out||' (
select /*+ parallel(24) */ '''||adresse.ADDRESS_FULL||''',
pattern_nf, '''||adresse.CITY||''',
pattern_city, '||adresse.ADR_ID||', pid, uid_ , ''pattern without comma''
from schema_xyz.pgs_comma a
where (upper('''||adresse.ADDRESS_FULL||''') LIKE pattern_nf
and upper('''||adresse.CITY||''') LIKE pattern_city))';
commit;
END LOOP;
/*############################## PROZESS_INFO ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''Start Loop 2: city null'', '''||current_timestamp::varchar||'''';
commit;
for adresse in ac6
LOOP
EXECUTE 'insert /*+ parallel(24) */ into '||table_out||' (select /*+ parallel(24) */ '''||adresse.ADDRESS_FULL||''', pattern_nf, null,
pattern_city, '||adresse.ADR_ID||', pid, uid_ , ''pattern without comma''
from (select * from schema_xyz.pgs_comma where pattern_city=''%'') a
where (upper('''||adresse.ADDRESS_FULL||''') LIKE pattern_nf))';
commit;
END LOOP;
drop index if exists zzz_unkodierte_i_address_full_idx;
drop index if exists zzz_unkodierte_i_address_full_uf_idx;
drop index if exists zzz_unkodierte_i_city_idx;
drop table if exists zzz_unkodierte_i;
execute 'drop table if exists schema_xyz.pgs_comma';
/*############################## PROZESS_INFO ###############################*/
execute 'insert into schema_xyz.process_info select '''||process_name||''', ''End Additional step uncoded, NF pattern end.'', '''||current_timestamp::varchar||'''';
commit;
/* +++++++ Additional step for pattern_matching: compare all those that * have not yet been coded with the original address again, * in the event that very special patterns were present; for example, patterns that contain spelling errors, etc.
Intermediate table for this: schema_xyz.zzz_unkodierte_II. +++++++ */
--EXECUTE 'truncate table schema_xyz.zzz_unkodierte_II';
execute 'drop table if exists schema_xyz.zzz_unkodierte_II';
/*EXECUTE 'create table schema_xyz.zzz_unkodierte_II(address_full varchar not null, city varchar,
adr_id varchar not null, address_full_original varchar, address_full_uf varchar)';*/
execute 'drop index if exists schema_xyz.i_result_ii_adrid';
execute 'create index i_result_ii_adrid on schema_xyz.result_II(adr_id)';
commit;
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''...adr_id index on result_ii created'', '''||current_timestamp::varchar||'''';
commit;
execute 'drop index if exists schema_xyz.i_result_i_address_full';
execute 'create index i_result_i_address_full on schema_xyz.result_I(address_full)';
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''...address full index on result_i created'', '''||current_timestamp::varchar||'''';
commit;
execute 'drop table if exists schema_xyz.zzz_unkodierte_II_pre';
execute 'create table schema_xyz.zzz_unkodierte_II_pre as (
select a.adr_id, a.address_full
from schema_xyz.result_ii a
left join schema_xyz.result_iii b
on a.adr_id=b.adr_id
where b.adr_id is null)';
commit;
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''...create zzz_II_pre done'', '''||current_timestamp::varchar||'''';
commit;
execute 'drop index if exists schema_xyz.i_zzz_ii_pre';
execute 'create index i_zzz_ii_pre on schema_xyz.zzz_unkodierte_II_pre(address_full)';
commit;
execute 'drop table if exists schema_xyz.zzz_unkodierte_II';
execute 'create table schema_xyz.zzz_unkodierte_II as (
select b.adr_id,a.address_full,a.city,upper(address_full_original) as address_full_original,
upper(a.address_full_original) as address_full_uf
from schema_xyz.result_i a
join schema_xyz.zzz_unkodierte_II_pre b
on a.address_full = b.address_full)';
commit;
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''...create zzz_II done'', '''||current_timestamp::varchar||'''';
commit;
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''...indexes zzz_II done'', '''||current_timestamp::varchar||'''';
commit;
EXECUTE 'update schema_xyz.zzz_unkodierte_II
set address_full_uf=replace(address_full_uf,'''''''','''')';
commit;
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Additional step unencoded: Use originals'', '''||current_timestamp::varchar||'''';
commit;
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Loop 1: city not null'', '''||current_timestamp::varchar||'''';
commit;
for adresse in ac3
LOOP
EXECUTE 'insert /*+ parallel(24) */ into '||table_out||' (select /*+ parallel(24) */ '''||adresse.ADDRESS_FULL||''',
pattern_nf, '''||adresse.CITY||''',
pattern_city, '||adresse.ADR_ID||', pid, uid_ , ''original''
from schema_xyz.pattern_ges_like
where (upper('''||adresse.ADDRESS_FULL_UF||''') LIKE pattern_nf
and upper('''||adresse.CITY||''') LIKE pattern_city))';
commit;
END LOOP;
/* If city is null, only the patterns with pattern_city='%' are unproblematic, others should not be used. This case does not occur with WoS. */
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''Loop 2: city null'', '''||current_timestamp::varchar||'''';
commit;
for adresse in ac4
LOOP
EXECUTE 'insert /*+ parallel(24) */ into '||table_out||' (
select /*+ parallel(24) */ '''||adresse.ADDRESS_FULL||''', pattern_nf, null,
pattern_city, '||adresse.ADR_ID||', pid, uid_ , ''original''
from (select * from schema_xyz.pattern_ges_like where pattern_city=''%'') a
where (upper('''||adresse.ADDRESS_FULL_UF||''') LIKE pattern_nf))';
commit;
END LOOP;
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||Process_name||''', ''End of additional step unencoded: Use originals'', '''||current_timestamp::varchar||'''';
commit;
/* +++++++ Grouping for the intermediate result +++++++ */
EXECUTE 'drop table if exists '||table_out_II||'';
EXECUTE 'create table '||table_out_II||' as (select address_full, city,
adr_id, uid_
from '||table_out||'
group by address_full, city,
adr_id, uid_)';
commit;
/*############################## schema_xyz.process_info ###############################*/
EXECUTE 'insert into schema_xyz.process_info select '''||process_name||''', ''End pattern_matching_d'', '''||current_timestamp::varchar||'''';
commit;
END;
$procedure$
;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment