diff --git a/Procedures/create_download_pg_varchar__varchar__varchar__varchar__varchar__varchar__int8__bool_.txt b/Procedures/create_download_pg_varchar__varchar__varchar__varchar__varchar__varchar__int8__bool_.txt new file mode 100644 index 0000000000000000000000000000000000000000..a30ece8670c4bb791d8bec43cc85c09614ebbff2 --- /dev/null +++ b/Procedures/create_download_pg_varchar__varchar__varchar__varchar__varchar__varchar__int8__bool_.txt @@ -0,0 +1,100 @@ +-- DROP PROCEDURE schema_xyz.create_download_pg(varchar, varchar, varchar, varchar, varchar, varchar, int8, bool); + +CREATE OR REPLACE PROCEDURE schema_xyz.create_download_pg(source_schema_name character varying, source_item_tablename character varying, source_affiliation_tablename character varying, target_schema character varying, desired_tablename character varying, data_format character varying, limit_size bigint, use_random boolean) + LANGUAGE plpgsql +AS $procedure$ + + +DECLARE + +create_string varchar(2000); +source_item_tab varchar(100); +source_aff_tab varchar(100); +desired_tab varchar(300); + +BEGIN + +/* +data format has to be wos or scp +limit_size without random will be much faster than with random +restriction to countrycode 'DEU' +limit_size of 0 means no limitation, all deu addresses +**/ + +source_item_tab = source_schema_name || '.' || source_item_tablename; +source_aff_tab = source_schema_name || '.' || source_affiliation_tablename; +desired_tab = target_schema || '.' || desired_tablename; + +EXECUTE 'drop table if exists '||desired_tab; + + + +if (lower(data_format) = 'wos') then + + create_string = 'create table '||desired_tab||' as ( + select b.item_id, aff_seq_nr, pubyear, + trim(both from city) as city, + array_to_string(organization || wos_suborganization || + trim(both from wos_street) || + trim(both from postal_code|| '' '' ||city) || + trim(both from country) + , '', '') as address_full + from '||source_aff_tab||' a + join '||source_item_tab||' b + on a.item_id=b.item_id '; + +elsif (lower(data_format) = 'scp') then + + create_string = 'create table '||desired_tab||' as ( + select b.item_id, aff_seq_nr, pubyear, +CASE + when a.city is null then trim(both from a.scopus_city_group) + when a.scopus_city_group is null then trim(both from a.city) +End as city, + array_to_string(organization || + trim(both from scopus_address_part) || + trim(both from scopus_city_group) || + trim(both from country) || + trim(both from countrycode) + , '', '') as address_full + from '||source_aff_tab||' a + join '||source_item_tab||' b + on a.item_id=b.item_id '; + +else + + RAISE EXCEPTION 'unknown data format, has to be wos or scp'; + +end if; + + +-- random without limt does not make sense and will be ignored, else long runtime for nothing +if ( limit_size::integer > 0 and use_random = True) THEN + create_string = create_string || 'WHERE (a.item_id, a.aff_seq_nr) IN + (SELECT item_id, aff_seq_nr FROM '||source_aff_tab||' where countrycode=''DEU'' + ORDER BY random() LIMIT '||limit_size||')'; +elsif (limit_size::integer>0) then + create_string = create_string || 'WHERE (a.item_id, a.aff_seq_nr) IN + (SELECT item_id, aff_seq_nr FROM '||source_aff_tab||' where countrycode=''DEU'' LIMIT '||limit_size||')'; +else + create_string = create_string || 'WHERE (a.item_id, a.aff_seq_nr) IN + (SELECT item_id, aff_seq_nr FROM '||source_aff_tab||' where countrycode=''DEU'')'; +end if; + + +create_string = create_string || ')'; + +raise notice 'execute string %s', create_string ; + +execute create_string; + +-- cleanup +execute 'update '||desired_tab||' +set address_full = regexp_replace(address_full, ''(, ){2,}'', '', '') +where address_full like ''%, , %'' '; + +commit; + +END; +$procedure$ +;