Skip to content
Snippets Groups Projects
Commit 0a3fcb97 authored by Christopher Lenke's avatar Christopher Lenke
Browse files

23.09.2024

parent 5657bbed
No related merge requests found
-- DROP FUNCTION schema_xyz.wos_transformation_pg(varchar, int8);
CREATE OR REPLACE FUNCTION schema_xyz.wos_transformation_pg(spalte_in character varying, art bigint)
RETURNS character varying
LANGUAGE plpgsql
AS $function$
DECLARE
Ergebnis varchar(2000);
str varchar(2000);
w1 cursor is select /*+ parallel(auto)*/ REPLACE(REPLACE(SCP,'.','\.'),'?','.') as SCP,WOS
from basics.wos_standard_plus_erg
where in_transfo=1
order by length(SCP) desc;
std_entry0 RECORD;
w2 cursor is select abbrev,cont
from basics.WOS_STANDARD_PLUS
where str like '%'||cont||'%' order by length(cont) desc;
std_entry RECORD;
BEGIN
str:=upper(spalte_in);
str:=REGEXP_REPLACE(str,'''', '', 'g');
if (art=1) THEN
for std_entry0 in w1
LOOP
str:=REGEXP_REPLACE(str, '(\W|^|\s)'||std_entry0.SCP||'(\W|$|\s)','\1'||std_entry0.WOS||'\2', 'g');
END LOOP;
END IF;
-- double spaces out
str:=REGEXP_REPLACE(str,' {2,}', ' ', 'g');
str:=REGEXP_REPLACE(str,'^(, )(.*)', '\2', 'g');
-- all special characters out
str:=REPLACE(str,'Æ', 'AE');
str:=REPLACE(str,'À', 'A');
--str:=REPLACE(str,'à', 'a');
str:=REPLACE(str,'Á', 'A');
--str:=REPLACE(str,'á', 'a');
str:=REPLACE(str,'Â', 'A');
--str:=REPLACE(str,'â', 'a');
str:=REPLACE(str,'Ä', 'A');
--str:=REPLACE(str,'ä', 'a');
str:=REPLACE(str,'Å', 'A');
--str:=REPLACE(str,'å', 'a');
str:=REPLACE(str,'Ç', 'C');
--str:=REPLACE(str,'ç', 'c');
str:=REPLACE(str,'Ð', 'D');
--str:=REPLACE(str,'ð', 'd');
str:=REPLACE(str,'É', 'E');
--str:=REPLACE(str,'é', 'e');
str:=REPLACE(str,'Ì', 'I');
--str:=REPLACE(str,'ì', 'i');
str:=REPLACE(str,'Í', 'I');
--str:=REPLACE(str,'í', 'i');
str:=REPLACE(str,'Ñ', 'N');
--str:=REPLACE(str,'ñ', 'n');
str:=REPLACE(str,'Ò', 'O');
--str:=REPLACE(str,'ò', 'o');
str:=REPLACE(str,'Ó', 'O');
--str:=REPLACE(str,'ó', 'o');
str:=REPLACE(str,'Ö', 'O');
--str:=REPLACE(str,'ö', 'o');
str:=REPLACE(str,'Ø', 'O');
--str:=REPLACE(str,'ø', 'o');
--str:=REPLACE(str,'ß', 'SS');
str:=REPLACE(str,'Ú', 'U');
--str:=REPLACE(str,'ú', 'u');
str:=REPLACE(str,'Ü', 'U');
--str:=REPLACE(str,'ü', 'u');
str:=REPLACE(str,'Ä', 'A');
--str:=REPLACE(str,'ä', 'a');
str:=REPLACE(str,'Ö', 'O');
--str:=REPLACE(str,'ö', 'o');
str:=REPLACE(str,'Ü', 'U');
--str:=REPLACE(str,'ü', 'u');
str:=REPLACE(str,'ß', 'SS');
str:=upper(str);
str:=REGEXP_REPLACE(str,'[ÈË]','E', 'g');
str:=REGEXP_REPLACE(str,'[Ã]','A', 'g');
str:=REGEXP_REPLACE(str,'[ŰŨÛṲÙ]','U', 'g');
str:=REGEXP_REPLACE(str,'[ÔŐ]','O', 'g');
str:=REGEXP_REPLACE(str,'[Ń]','N', 'g');
str:=schema_xyz.unaccent(str);
--character
str:=REGEXP_REPLACE(str,'\. ', ' ', 'g');
str:=REGEXP_REPLACE(str,'\.,', ',', 'g');
str:=REGEXP_REPLACE(str,'\+', '&', 'g');
str:=REGEXP_REPLACE(str,'-([^0-9])', ' \1', 'g');
str:=REGEXP_REPLACE(str,'''', '', 'g');
str:=REGEXP_REPLACE(str,'\(', ' ', 'g');
str:=REGEXP_REPLACE(str,'\)', ' ', 'g');
str:=REGEXP_REPLACE(str,'/', ' ', 'g');
str:=REGEXP_REPLACE(str,'\:', ' ', 'g');
str:=REGEXP_REPLACE(str,',([a-zA-Z])', ', \1', 'g');
--Filler words
str:=REGEXP_REPLACE(str,' for ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' fur ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' fuer ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' zu ', ' ','ig');
str:=REGEXP_REPLACE(str,' des ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' the ', ' ', 'ig');
if (upper(str) not like '%CARE OF%')
THEN str:=REGEXP_REPLACE(str,' of ', ' ', 'ig');
END IF;
if (not str ~ '(AN|IN|AUF|RECHTS|KLIN) DER ')
THEN str:=REGEXP_REPLACE(str,' der ', ' ', 'ig');
END IF;
--all streets
str:=REGEXP_REPLACE(str,'STRASSE(\W|$|\s)', 'STR\1', 'g');
--all streets
str:=REGEXP_REPLACE(str,'PLATZ(\W|$|\s)', 'PL\1', 'g');
--Replacement according to table WoS_Standard_Plus
for std_entry in w2
loop
str:=REGEXP_REPLACE(str, '(\W|^|\s)'||std_entry.CONT||'(\W|$|\s)','\1'||std_entry.ABBREV||'\2', 'g');
END LOOP;
--- Points out?? Not useful in every case....
str:=REGEXP_REPLACE(str,'\.', '', 'g');
-- double spaces out
str:=REGEXP_REPLACE(str,' {2,}', ' ', 'g');
str:=REPLACE(str,' , ',', ');
str:=ltrim(rtrim(str));
--- Delete country code at the end of the address
str:=REGEXP_REPLACE(str,', DEUTSCH DEM REP$','', 'g');
str:=REGEXP_REPLACE(str,', PRUSSIA$','', 'g');
str:=REGEXP_REPLACE(str,',GER DEM REP$','', 'g');
str:=REGEXP_REPLACE(str,', W GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,',FED REP GER$','', 'g');
str:=REGEXP_REPLACE(str,',GER DEM$','', 'g');
str:=REGEXP_REPLACE(str,',FED RER GER$','', 'g');
str:=REGEXP_REPLACE(str,', GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', GERMANY;$','', 'g');
str:=REGEXP_REPLACE(str,', FED REP GER$','', 'g');
str:=REGEXP_REPLACE(str,', EAST GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', GER DEM REP$','', 'g');
str:=REGEXP_REPLACE(str,', ALEMANIA$','', 'g');
str:=REGEXP_REPLACE(str,', W GER$','', 'g');
str:=REGEXP_REPLACE(str,',GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', WEST GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', DEU$','', 'g');
str:=REGEXP_REPLACE(str,', DEUTSCHLAND$','', 'g');
ergebnis:=str;
RETURN ergebnis;
END;
$function$
;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment