From 3d31828e1f37814f5eb0f5452f7f02b991bd2880 Mon Sep 17 00:00:00 2001
From: Christopher Lenke <christopher.lenke@uni-bielefeld.de>
Date: Mon, 23 Sep 2024 18:38:24 +0200
Subject: [PATCH] 23.09.2024

---
 Procedures/get_unassigned_pg.txt | 48 ++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 Procedures/get_unassigned_pg.txt

diff --git a/Procedures/get_unassigned_pg.txt b/Procedures/get_unassigned_pg.txt
new file mode 100644
index 0000000..25bf139
--- /dev/null
+++ b/Procedures/get_unassigned_pg.txt
@@ -0,0 +1,48 @@
+-- DROP PROCEDURE schema_xyz.get_unassigned_pg(text, text, text);
+
+CREATE OR REPLACE PROCEDURE schema_xyz.get_unassigned_pg(ergebnis_tab_name text, basis_tab text, tab_s_inst_sec text)
+ LANGUAGE plpgsql
+AS $procedure$
+	BEGIN
+
+
+ DECLARE
+
+/*example: 
+
+call schema_xyz.get_unassigned_pg(
+'schema_xyz.test_get_unassigned', 
+'schema_xyz.download_adr_wos21_new_struct_rand_1mio', 
+'schema_xyz.kb_s_addr_inst_sec_testrun_wos_21b_1mio')
+*/
+
+
+BEGIN
+
+raise notice 'Please make sure to have indexes on item_id and aff_seq_id both on the given download table as well as the given kb_s_... table.';
+	
+execute 'drop table if exists '||ergebnis_tab_name||'_unassigned';
+execute 'drop table if exists '||ergebnis_tab_name||'_cnts';
+
+execute 'create table '||ergebnis_tab_name||'_unassigned as
+(select a.* 
+from '||basis_tab||' a
+left join '||tab_s_inst_sec||' b
+on a.item_id = b.item_id and a.aff_seq_nr=b.aff_seq_nr 
+where b.aff_seq_nr is NULL)';
+commit;
+
+-- group by fk_institutions from before does not makes sense any more with aff_seq_nr, 
+-- so group by address_full, also grouping wit upper address_full as more useful
+execute 'create table '||ergebnis_tab_name||'_cnts as
+(select count(distinct item_id) as item_cnt, upper(address_full) as upper_address_full, 
+upper(city) as upper_city, 
+max(pubyear) as max_pubyear
+from '||ergebnis_tab_name||'_unassigned 
+group by upper(address_full), upper(city))';
+commit;
+
+end;
+END;
+$procedure$
+;
-- 
GitLab