Skip to content
Snippets Groups Projects
Commit 0a3fcb97 authored by Christopher Lenke's avatar Christopher Lenke
Browse files

23.09.2024

parent 5657bbed
No related branches found
No related tags found
No related merge requests found
-- DROP FUNCTION schema_xyz.wos_transformation_pg(varchar, int8);
CREATE OR REPLACE FUNCTION schema_xyz.wos_transformation_pg(spalte_in character varying, art bigint)
RETURNS character varying
LANGUAGE plpgsql
AS $function$
DECLARE
Ergebnis varchar(2000);
str varchar(2000);
w1 cursor is select /*+ parallel(auto)*/ REPLACE(REPLACE(SCP,'.','\.'),'?','.') as SCP,WOS
from basics.wos_standard_plus_erg
where in_transfo=1
order by length(SCP) desc;
std_entry0 RECORD;
w2 cursor is select abbrev,cont
from basics.WOS_STANDARD_PLUS
where str like '%'||cont||'%' order by length(cont) desc;
std_entry RECORD;
BEGIN
str:=upper(spalte_in);
str:=REGEXP_REPLACE(str,'''', '', 'g');
if (art=1) THEN
for std_entry0 in w1
LOOP
str:=REGEXP_REPLACE(str, '(\W|^|\s)'||std_entry0.SCP||'(\W|$|\s)','\1'||std_entry0.WOS||'\2', 'g');
END LOOP;
END IF;
-- double spaces out
str:=REGEXP_REPLACE(str,' {2,}', ' ', 'g');
str:=REGEXP_REPLACE(str,'^(, )(.*)', '\2', 'g');
-- all special characters out
str:=REPLACE(str,'Æ', 'AE');
str:=REPLACE(str,'À', 'A');
--str:=REPLACE(str,'à', 'a');
str:=REPLACE(str,'Á', 'A');
--str:=REPLACE(str,'á', 'a');
str:=REPLACE(str,'Â', 'A');
--str:=REPLACE(str,'â', 'a');
str:=REPLACE(str,'Ä', 'A');
--str:=REPLACE(str,'ä', 'a');
str:=REPLACE(str,'Å', 'A');
--str:=REPLACE(str,'å', 'a');
str:=REPLACE(str,'Ç', 'C');
--str:=REPLACE(str,'ç', 'c');
str:=REPLACE(str,'Ð', 'D');
--str:=REPLACE(str,'ð', 'd');
str:=REPLACE(str,'É', 'E');
--str:=REPLACE(str,'é', 'e');
str:=REPLACE(str,'Ì', 'I');
--str:=REPLACE(str,'ì', 'i');
str:=REPLACE(str,'Í', 'I');
--str:=REPLACE(str,'í', 'i');
str:=REPLACE(str,'Ñ', 'N');
--str:=REPLACE(str,'ñ', 'n');
str:=REPLACE(str,'Ò', 'O');
--str:=REPLACE(str,'ò', 'o');
str:=REPLACE(str,'Ó', 'O');
--str:=REPLACE(str,'ó', 'o');
str:=REPLACE(str,'Ö', 'O');
--str:=REPLACE(str,'ö', 'o');
str:=REPLACE(str,'Ø', 'O');
--str:=REPLACE(str,'ø', 'o');
--str:=REPLACE(str,'ß', 'SS');
str:=REPLACE(str,'Ú', 'U');
--str:=REPLACE(str,'ú', 'u');
str:=REPLACE(str,'Ü', 'U');
--str:=REPLACE(str,'ü', 'u');
str:=REPLACE(str,'Ä', 'A');
--str:=REPLACE(str,'ä', 'a');
str:=REPLACE(str,'Ö', 'O');
--str:=REPLACE(str,'ö', 'o');
str:=REPLACE(str,'Ü', 'U');
--str:=REPLACE(str,'ü', 'u');
str:=REPLACE(str,'ß', 'SS');
str:=upper(str);
str:=REGEXP_REPLACE(str,'[ÈË]','E', 'g');
str:=REGEXP_REPLACE(str,'[Ã]','A', 'g');
str:=REGEXP_REPLACE(str,'[ŰŨÛṲÙ]','U', 'g');
str:=REGEXP_REPLACE(str,'[ÔŐ]','O', 'g');
str:=REGEXP_REPLACE(str,'[Ń]','N', 'g');
str:=schema_xyz.unaccent(str);
--character
str:=REGEXP_REPLACE(str,'\. ', ' ', 'g');
str:=REGEXP_REPLACE(str,'\.,', ',', 'g');
str:=REGEXP_REPLACE(str,'\+', '&', 'g');
str:=REGEXP_REPLACE(str,'-([^0-9])', ' \1', 'g');
str:=REGEXP_REPLACE(str,'''', '', 'g');
str:=REGEXP_REPLACE(str,'\(', ' ', 'g');
str:=REGEXP_REPLACE(str,'\)', ' ', 'g');
str:=REGEXP_REPLACE(str,'/', ' ', 'g');
str:=REGEXP_REPLACE(str,'\:', ' ', 'g');
str:=REGEXP_REPLACE(str,',([a-zA-Z])', ', \1', 'g');
--Filler words
str:=REGEXP_REPLACE(str,' for ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' fur ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' fuer ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' zu ', ' ','ig');
str:=REGEXP_REPLACE(str,' des ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' the ', ' ', 'ig');
if (upper(str) not like '%CARE OF%')
THEN str:=REGEXP_REPLACE(str,' of ', ' ', 'ig');
END IF;
if (not str ~ '(AN|IN|AUF|RECHTS|KLIN) DER ')
THEN str:=REGEXP_REPLACE(str,' der ', ' ', 'ig');
END IF;
--all streets
str:=REGEXP_REPLACE(str,'STRASSE(\W|$|\s)', 'STR\1', 'g');
--all streets
str:=REGEXP_REPLACE(str,'PLATZ(\W|$|\s)', 'PL\1', 'g');
--Replacement according to table WoS_Standard_Plus
for std_entry in w2
loop
str:=REGEXP_REPLACE(str, '(\W|^|\s)'||std_entry.CONT||'(\W|$|\s)','\1'||std_entry.ABBREV||'\2', 'g');
END LOOP;
--- Points out?? Not useful in every case....
str:=REGEXP_REPLACE(str,'\.', '', 'g');
-- double spaces out
str:=REGEXP_REPLACE(str,' {2,}', ' ', 'g');
str:=REPLACE(str,' , ',', ');
str:=ltrim(rtrim(str));
--- Delete country code at the end of the address
str:=REGEXP_REPLACE(str,', DEUTSCH DEM REP$','', 'g');
str:=REGEXP_REPLACE(str,', PRUSSIA$','', 'g');
str:=REGEXP_REPLACE(str,',GER DEM REP$','', 'g');
str:=REGEXP_REPLACE(str,', W GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,',FED REP GER$','', 'g');
str:=REGEXP_REPLACE(str,',GER DEM$','', 'g');
str:=REGEXP_REPLACE(str,',FED RER GER$','', 'g');
str:=REGEXP_REPLACE(str,', GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', GERMANY;$','', 'g');
str:=REGEXP_REPLACE(str,', FED REP GER$','', 'g');
str:=REGEXP_REPLACE(str,', EAST GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', GER DEM REP$','', 'g');
str:=REGEXP_REPLACE(str,', ALEMANIA$','', 'g');
str:=REGEXP_REPLACE(str,', W GER$','', 'g');
str:=REGEXP_REPLACE(str,',GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', WEST GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', DEU$','', 'g');
str:=REGEXP_REPLACE(str,', DEUTSCHLAND$','', 'g');
ergebnis:=str;
RETURN ergebnis;
END;
$function$
;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment