diff --git a/Procedures/check_kodierung_solo_pg_text__text__text__text__text__text__text__text__text__text__text__text__text__text__text_.txt b/Procedures/check_kodierung_solo_pg_text__text__text__text__text__text__text__text__text__text__text__text__text__text__text_.txt new file mode 100644 index 0000000000000000000000000000000000000000..d0f2eafdadc7056df078a61b6b207cecc3b24c6a --- /dev/null +++ b/Procedures/check_kodierung_solo_pg_text__text__text__text__text__text__text__text__text__text__text__text__text__text__text_.txt @@ -0,0 +1,297 @@ +-- DROP PROCEDURE schema_xyz.check_kodierung_solo_pg(text, text, text, text, text, text, text, text, text, text, text, text, text, text, text); + +CREATE OR REPLACE PROCEDURE schema_xyz.check_kodierung_solo_pg(ergebnis_tab_name text, basis_tab text, tab_addr_inst_sec_a text, tab_addr_inst_sec_s text, tab_inst text, tab_sectors text, tab_s_inst_sec text, tab_a_inst_sec text, tab_inst_trans text, adr_spalte text, dok_spalte text, name_stichprobentab_1 text, name_stichprobentab_2 text, py_spalte text, unassigned_name text) + LANGUAGE plpgsql +AS $procedure$ + BEGIN + + + DECLARE + +str varchar(1000); +endyear integer; +cnt_total integer; + + +BEGIN + +EXECUTE 'drop table if exists '||ergebnis_tab_name; +EXECUTE 'create table '||ergebnis_tab_name||'(Thema varchar(1000), Beschreibung varchar(1000), +ergebnis numeric, gewuenschtes_ergebnis varchar(100),sql_ varchar(1000))'; +commit; + + +/*** Constraints Check ***/ + +str:='select count(*) from '||tab_inst_trans||' where not inst_ante in (select kb_inst_id from '||tab_inst||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', ''Anzahl inst_ante (aus inst_trans) nicht in kb_inst'', count(*), 0, '''||str||''' +from '||tab_inst_trans||' where not inst_ante in (select kb_inst_id from '||tab_inst||')'; + +str:='select count(*) from '||tab_inst_trans||' where not inst_post in (select kb_inst_id from '||tab_inst||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', ''Anzahl inst_post (aus inst_trans) nicht in kb_inst'', count(*), 0, '''||str||''' +from '||tab_inst_trans||' where not inst_post in (select kb_inst_id from '||tab_inst||')'; + +str:='select count(*) from '||tab_addr_inst_sec_s||' where not kb_inst_id in (select kb_inst_id from '||tab_inst||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', ''Anzahl kb_inst_id (aus addr_inst_s) nicht in kb_inst'', count(*), 0, '''||str||''' +from '||tab_addr_inst_sec_s||' where not kb_inst_id in (select kb_inst_id from '||tab_inst||')'; + +str:='select count(*) from '||tab_addr_inst_sec_a||' where not kb_inst_id in (select kb_inst_id from '||tab_inst||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', ''Anzahl kb_inst_id (aus addr_inst_a) nicht in kb_inst'', count(*), 0, '''||str||''' +from '||tab_addr_inst_sec_a||' where not kb_inst_id in (select +kb_inst_id from '||tab_inst||')'; + +commit; + +str:='select count(*) from '||tab_a_inst_sec||' where not kb_inst_id in (select kb_inst_id from '||tab_inst||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', ''Anzahl kb_inst_id (aus a_inst_sec) nicht in kb_inst'', count(*), 0, '''||str||''' +from '||tab_a_inst_sec||' where not kb_inst_id in (select kb_inst_id from '||tab_inst||')'; + +str:='select count(*) from '||tab_s_inst_sec||' where not kb_inst_id in (select kb_inst_id from '||tab_inst||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', ''Anzahl kb_inst_id (aus s_inst_sec) nicht in kb_inst'', count(*), 0, '''||str||''' +from '||tab_s_inst_sec||' where not kb_inst_id in (select kb_inst_id from '||tab_inst||')'; + +str:='select count(*) from '||tab_a_inst_sec||' where not kb_sector_id in (select kb_sector_id from '||tab_sectors||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', ''Anzahl kb_sector_id (aus a_inst_sec) nicht in kb_sectors'', count(*), 0, '''||str||''' +from '||tab_a_inst_sec||' where not kb_sector_id in (select kb_sector_id from '||tab_sectors||')'; + +str:='select count(*) from '||tab_s_inst_sec||' where not kb_sector_id in (select kb_sector_id from '||tab_sectors||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', ''Anzahl kb_sector_id (aus s_inst_sec) nicht in kb_sectors'', count(*), 0, '''||str||''' +from '||tab_s_inst_sec||' where not kb_sector_id in (select kb_sector_id from '||tab_sectors||')'; + +str:='select count(*) from '||tab_addr_inst_sec_s||' where not kb_inst_id in (select kb_inst_id from '||tab_inst||') and not kb_inst_id=3987 and not kb_inst_id=3830'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', ''Anzahl fk_inst aus addr_inst_s nicht in kb_inst'', count(*), 0, '''||str||''' +from '||tab_addr_inst_sec_s||' where not kb_inst_id in (select kb_inst_id from '||tab_inst||') and not kb_inst_id=3987 and not kb_inst_id=3830'; + +str:='select count(*) from '||tab_addr_inst_sec_a||' where not kb_inst_id in (select kb_inst_id from '||tab_inst||') and not kb_inst_id=3987 and not kb_inst_id=3830'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', ''Anzahl fk_inst aus addr_inst_a nicht in kb_inst'', count(*), 0, '''||str||''' +from '||tab_addr_inst_sec_a||' where not kb_inst_id in (select kb_inst_id from '||tab_inst||') and not kb_inst_id=3987 and not kb_inst_id=3830'; + +commit; + + +str:='select count(distinct(xxx.single_sector_id) from (select unnest(kb_sector_id) as single_kb_sector_id +from '||tab_addr_inst_sec_s||') as xxx +where not single_kb_sector_id in (select kb_sector_id from '||tab_sectors||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', +''Anzahl distinkter kb_sector_id aus addr_inst_sec_s nicht in kb_sector'', +count(distinct(xxx.single_kb_sector_id)), +0, +'''||str||''' +from (select unnest(kb_sector_id) as single_kb_sector_id +from '||tab_addr_inst_sec_s||') as xxx +where not single_kb_sector_id in (select kb_sector_id from '||tab_sectors||')'; +commit; + +str:='select count(distinct(xxx.single_sector_id) from (select unnest(kb_sector_id) as single_kb_sector_id +from '||tab_addr_inst_sec_a||') as xxx +where not single_kb_sector_id in (select kb_sector_id from '||tab_sectors||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Constraints Check'', +''Anzahl distinkter kb_sector_id aus addr_inst_sec_s nicht in kb_sector'', +count(distinct(xxx.single_kb_sector_id)), +0, +'''||str||''' +from (select unnest(kb_sector_id) as single_kb_sector_id +from '||tab_addr_inst_sec_a||') as xxx +where not single_kb_sector_id in (select kb_sector_id from '||tab_sectors||')'; +commit; + + +/*** Create sampling tables ***/ + +EXECUTE 'drop table if exists '||name_stichprobentab_1; +EXECUTE 'create table '||name_stichprobentab_1||' as ( +select * from (select * from '||tab_addr_inst_sec_s||' +order by random()) as xyz limit 100)'; +commit; +EXECUTE 'insert into '||ergebnis_tab_name||' values (''Name Stichprobentabelle 1 (Zufallsauswahl 100 Zuordnungen Modus S)'','''||name_stichprobentab_1||''',null, null, null)'; +commit; + +execute 'drop table if exists '||name_stichprobentab_2||'_tmp'; +execute 'create index on '||tab_addr_inst_sec_s||'('||adr_spalte||')'; +execute 'create table '||name_stichprobentab_2||'_tmp as ( +select a.'||adr_spalte||' +from '||basis_tab||' a +left join '||tab_addr_inst_sec_s||' b +on a.'||adr_spalte||' = b.'||adr_spalte||' +where b.'||adr_spalte||' is null)'; +commit; +execute 'drop table if exists '||name_stichprobentab_2||'_tmp_1'; +execute 'create table '||name_stichprobentab_2||'_tmp_1 as ( +select distinct(upper('||adr_spalte||')) as '||adr_spalte||' +from '||name_stichprobentab_2||'_tmp)'; +commit; + +execute 'drop index if exists processing.i_check_kodierung_upper_adr'; +execute 'create index i_check_kodierung_upper_adr on '||tab_addr_inst_sec_s||'(upper('||adr_spalte||'))'; +execute 'delete from '||name_stichprobentab_2||'_tmp_1 +where '||adr_spalte||' in (select upper('||adr_spalte||') from '||tab_addr_inst_sec_s||')'; +commit; +EXECUTE 'drop table if exists '||name_stichprobentab_2; +EXECUTE 'create table '||name_stichprobentab_2||' as ( +select * from '||name_stichprobentab_2||'_tmp_1 +where random() < 0.01 +limit 100 +)'; +commit; +EXECUTE 'insert into '||ergebnis_tab_name||' values (''Name Stichprobentabelle 2 (Zufallsauswahl 100 unkodierte)'','''||name_stichprobentab_2||''',null, null, null)'; +commit; +EXECUTE 'drop table if exists '||name_stichprobentab_2||'_tmp'; +EXECUTE 'drop table if exists '||name_stichprobentab_2||'_tmp_1'; +commit; + +/*** Statistics ***/ + +/* Addresses */ + +str:='select count(distinct upper('||adr_spalte||')) from '||basis_tab; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Statistik'', +''Anzahl dist Adressen'', count(distinct upper('||adr_spalte||')), null, '''||str||''' +from '||basis_tab; +commit; + +str:='select count(distinct upper('||adr_spalte||')) from '||basis_tab||' +where upper('||adr_spalte||') in ( +select upper('||adr_spalte||') from '||tab_addr_inst_sec_s||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Statistik'', +''Anzahl dist Adressen, zugeordnet'', count(distinct upper('||adr_spalte||')), null, '''||str||''' +from '||basis_tab||' where upper('||adr_spalte||') in ( +select upper('||adr_spalte||') from '||tab_addr_inst_sec_s||')'; +commit; + +EXECUTE 'insert into '||ergebnis_tab_name||' +select ''Statistik'', ''Anteil (in %) zugeordnete Adressen'', +round((100*(select ergebnis from '||ergebnis_tab_name||' +where beschreibung=''Anzahl dist Adressen, zugeordnet'') +/(select ergebnis from '||ergebnis_tab_name||' where beschreibung=''Anzahl dist Adressen'')),2), null, null'; +commit; + + +/* Address-ut combinations */ + +str:='select '||dok_spalte||',upper('||adr_spalte||') +from '||basis_tab||' group by '||dok_spalte||',upper('||adr_spalte||')'; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Statistik'', ''Anzahl dist Adress-Dokument-Kombinationen'', count(*), null, '''||str||''' +from ( +select '||dok_spalte||',upper('||adr_spalte||') +from '||basis_tab||' group by '||dok_spalte||',upper('||adr_spalte||') +) xyz'; +commit; + +str:='select '||dok_spalte||',upper('||adr_spalte||') from '||basis_tab||' +where ('||dok_spalte||',upper('||adr_spalte||')) in ( +select '||dok_spalte||',upper('||adr_spalte||' from '||tab_addr_inst_sec_s||') +group by '||dok_spalte||',upper('||adr_spalte||')'; + +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Statistik'', ''Anzahl dist Adress-Dokument-Kombinationen, zugeordnet'', count(*), null, '''||str||''' +from ( +select '||dok_spalte||',upper('||adr_spalte||') from '||basis_tab||' +where ('||dok_spalte||',upper('||adr_spalte||')) in (select '||dok_spalte||',upper('||adr_spalte||') from '||tab_addr_inst_sec_s||') +group by '||dok_spalte||',upper('||adr_spalte||') +) xyz'; +commit; + +EXECUTE 'insert into '||ergebnis_tab_name||' +select ''Statistik'', ''Anteil (in %) zugeordnete Adress-Dokument-Kombinationen'', +round((100*(select ergebnis from '||ergebnis_tab_name||' +where beschreibung=''Anzahl dist Adress-Dokument-Kombinationen, zugeordnet'') +/(select ergebnis from '||ergebnis_tab_name||' +where beschreibung=''Anzahl dist Adress-Dokument-Kombinationen'')),2), null, null'; +commit; + +raise notice 'start 10 year slots....'; + +--- 10-year periods: --- 2006-2015 + +for py_counter in 1986..date_part('year', now()) by 10 loop + +endyear := py_counter + 9; + +str:='select '||dok_spalte||',upper('||adr_spalte||') from '||basis_tab||' +where '||py_spalte||'<='||endyear::varchar||' +and '||py_spalte||'>='||py_counter::varchar||' +group by '||dok_spalte||',upper('||adr_spalte||')'; + +raise notice '%s', str; + +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Statistik'', +''Anzahl dist Adress-Dokument-Kombinationen '||py_counter::varchar||'-'||endyear::varchar||''', +count(*), null, '''||str||''' +from (select '||dok_spalte||',upper('||adr_spalte||') from '||basis_tab||' +where '||py_spalte||'<='||endyear::varchar||' and '||py_spalte||'>='||py_counter::varchar||' +group by '||dok_spalte||',upper('||adr_spalte||')) x'; + +commit; + +str:='select '||dok_spalte||',upper('||adr_spalte||') from '||basis_tab||' +where ('||dok_spalte||',upper('||adr_spalte||')) in (select '||dok_spalte||',upper('||adr_spalte||' +from '||tab_addr_inst_sec_s||') +and '||py_spalte||'<='||endyear::varchar||' and '||py_spalte||'>='||py_counter::varchar||' +group by '||dok_spalte||',upper('||adr_spalte||')'; + +raise notice '%s', str; + +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Statistik'', ''Anzahl dist Adress-Dokument-Kombinationen, zugeordnet '||py_counter::varchar||'-'||endyear::varchar||''', count(*), null, '''||str||''' +from (select '||dok_spalte||',upper('||adr_spalte||') from '||basis_tab||' +where ('||dok_spalte||',upper('||adr_spalte||')) in (select '||dok_spalte||',upper('||adr_spalte||') +from '||tab_addr_inst_sec_s||') +and '||py_spalte||'<='||endyear::varchar||' and '||py_spalte||'>='||py_counter::varchar||' +group by '||dok_spalte||',upper('||adr_spalte||')) xyz'; + +commit; + +EXECUTE format('select ergebnis from %s +where beschreibung=''Anzahl dist Adress-Dokument-Kombinationen '||py_counter::varchar||'-'||endyear::varchar||''' +', ergebnis_tab_name) +INTO cnt_total; + +if (cnt_total > 0) +then EXECUTE 'insert into '||ergebnis_tab_name||' +select ''Statistik'', ''Anteil (in %) zugeordnete Adress-Dokument-Kombinationen '||py_counter::varchar||'-'||endyear::varchar||''', +round((100*(select ergebnis from '||ergebnis_tab_name||' +where beschreibung=''Anzahl dist Adress-Dokument-Kombinationen, zugeordnet '||py_counter::varchar||'-'||endyear::varchar||''') +/(select ergebnis from '||ergebnis_tab_name||' +where beschreibung=''Anzahl dist Adress-Dokument-Kombinationen '||py_counter::varchar||'-'||endyear::varchar||''')),2), null, null +'; +end if; +commit; + +end loop; + +/* Documents with at least one assigned address */ + +str:='select '||dok_spalte||' from '||basis_tab||' group by '||dok_spalte; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Statistik'', ''Anzahl dist Dokumente'', count(*), null, '''||str||''' +from (select '||dok_spalte||' from '||basis_tab||' group by '||dok_spalte||') x '; + +commit; + +str:='select '||dok_spalte||' from '||basis_tab||' +where '||dok_spalte||' in (select '||dok_spalte||' from '||tab_addr_inst_sec_s||') +group by '||dok_spalte; +EXECUTE 'insert into '||ergebnis_tab_name||' select ''Statistik'', ''Anzahl dist Dokumente mit mind einer Zuordnung'', count(*), null, '''||str||''' +from (select '||dok_spalte||' from '||basis_tab||' +where '||dok_spalte||' in (select '||dok_spalte||' from '||tab_addr_inst_sec_s||') +group by '||dok_spalte||') y '; + +commit; + +EXECUTE 'insert into '||ergebnis_tab_name||' +select ''Statistik'', ''Anteil (in %) Dokumente mit mind einer Zuordnung'',round((100*(select ergebnis from '||ergebnis_tab_name||' +where beschreibung=''Anzahl dist Dokumente mit mind einer Zuordnung'') +/(select ergebnis from '||ergebnis_tab_name||' where beschreibung=''Anzahl dist Dokumente'')),2), null, null'; + +commit; + +call schema_xyz.get_unassigned_pg( +unassigned_name, +basis_tab, +tab_addr_inst_sec_s +); + +commit; + + +end; + END; +$procedure$ +;