23.09.2024

0a3fcb97 · Christopher Lenke · 5657bbed · 0a3fcb97
Commit 0a3fcb97 authored 5 months ago by Christopher Lenke
--- a/Functions/wos_transformation_pg_spalte_in_character_varying__art_bigint_.txt
+++ b/Functions/wos_transformation_pg_spalte_in_character_varying__art_bigint_.txt
+-- DROP FUNCTION schema_xyz.wos_transformation_pg(varchar, int8);
+
+CREATE OR REPLACE FUNCTION schema_xyz.wos_transformation_pg(spalte_in character varying, art bigint)
+ RETURNS character varying
+ LANGUAGE plpgsql
+AS $function$
+	
+
+DECLARE
+
+Ergebnis varchar(2000);
+str varchar(2000);
+
+w1 cursor is select /*+ parallel(auto)*/ REPLACE(REPLACE(SCP,'.','\.'),'?','.') as SCP,WOS 
+           from basics.wos_standard_plus_erg
+           where in_transfo=1
+           order by length(SCP) desc;
+std_entry0 RECORD;
+
+
+w2 cursor is select abbrev,cont 
+from basics.WOS_STANDARD_PLUS 
+where str like '%'||cont||'%' order by length(cont) desc;
+std_entry RECORD;
+
+BEGIN
+   
+   
+str:=upper(spalte_in);
+str:=REGEXP_REPLACE(str,'''', '', 'g');
+       
+if (art=1) THEN                        
+	for std_entry0 in w1
+		LOOP
+			str:=REGEXP_REPLACE(str, '(\W|^|\s)'||std_entry0.SCP||'(\W|$|\s)','\1'||std_entry0.WOS||'\2', 'g');
+		END LOOP; 
+END IF;
+ 
+  -- double spaces out
+ str:=REGEXP_REPLACE(str,' {2,}', ' ', 'g');      
+ str:=REGEXP_REPLACE(str,'^(, )(.*)', '\2', 'g');
+    
+                      
+ -- all special characters out
+
+str:=REPLACE(str,'Æ', 'AE');
+ str:=REPLACE(str,'À', 'A');
+ --str:=REPLACE(str,'à', 'a');
+ str:=REPLACE(str,'Á', 'A');
+ --str:=REPLACE(str,'á', 'a');
+ str:=REPLACE(str,'Â', 'A');
+ --str:=REPLACE(str,'â', 'a');
+ str:=REPLACE(str,'Ä', 'A');
+ --str:=REPLACE(str,'ä', 'a');
+ str:=REPLACE(str,'Å', 'A');
+ --str:=REPLACE(str,'å', 'a');
+
+str:=REPLACE(str,'Ç', 'C');
+ --str:=REPLACE(str,'ç', 'c');
+
+str:=REPLACE(str,'Ð', 'D');
+ --str:=REPLACE(str,'ð', 'd');
+
+str:=REPLACE(str,'É', 'E');
+ --str:=REPLACE(str,'é', 'e');
+
+str:=REPLACE(str,'Ì', 'I');
+ --str:=REPLACE(str,'ì', 'i');
+ str:=REPLACE(str,'Í', 'I');
+ --str:=REPLACE(str,'í', 'i');
+
+str:=REPLACE(str,'Ñ', 'N');
+ --str:=REPLACE(str,'ñ', 'n');
+
+str:=REPLACE(str,'Ò', 'O');
+ --str:=REPLACE(str,'ò', 'o');
+ str:=REPLACE(str,'Ó', 'O');
+ --str:=REPLACE(str,'ó', 'o');
+ str:=REPLACE(str,'Ö', 'O');
+ --str:=REPLACE(str,'ö', 'o');
+ str:=REPLACE(str,'Ø', 'O');
+ --str:=REPLACE(str,'ø', 'o');
+ --str:=REPLACE(str,'ß', 'SS');
+
+str:=REPLACE(str,'Ú', 'U');
+ --str:=REPLACE(str,'ú', 'u');
+ str:=REPLACE(str,'Ü', 'U');
+ --str:=REPLACE(str,'ü', 'u');
+
+str:=REPLACE(str,'Ä', 'A');
+ --str:=REPLACE(str,'ä', 'a');
+ str:=REPLACE(str,'Ö', 'O');
+ --str:=REPLACE(str,'ö', 'o');
+ str:=REPLACE(str,'Ü', 'U');
+ --str:=REPLACE(str,'ü', 'u');
+ 
+
+  str:=REPLACE(str,'ß', 'SS');
+ 
+ str:=upper(str);
+
+
+str:=REGEXP_REPLACE(str,'[ÈË]','E', 'g');
+str:=REGEXP_REPLACE(str,'[Ã]','A', 'g');
+str:=REGEXP_REPLACE(str,'[ŰŨÛṲÙ]','U', 'g');
+str:=REGEXP_REPLACE(str,'[ÔŐ]','O', 'g');
+str:=REGEXP_REPLACE(str,'[Ń]','N', 'g');
+
+str:=schema_xyz.unaccent(str);
+
+ --character
+ str:=REGEXP_REPLACE(str,'\. ', ' ', 'g');
+ str:=REGEXP_REPLACE(str,'\.,', ',', 'g');
+ str:=REGEXP_REPLACE(str,'\+', '&', 'g');
+ str:=REGEXP_REPLACE(str,'-([^0-9])', ' \1', 'g');
+ str:=REGEXP_REPLACE(str,'''', '', 'g');
+ str:=REGEXP_REPLACE(str,'\(', ' ', 'g');
+ str:=REGEXP_REPLACE(str,'\)', ' ', 'g');
+ str:=REGEXP_REPLACE(str,'/', ' ', 'g');
+ str:=REGEXP_REPLACE(str,'\:', ' ', 'g');
+ str:=REGEXP_REPLACE(str,',([a-zA-Z])', ', \1', 'g');
+ 
+ --Filler words
+ str:=REGEXP_REPLACE(str,' for ', ' ', 'ig');
+ str:=REGEXP_REPLACE(str,' fur ', ' ', 'ig');
+ str:=REGEXP_REPLACE(str,' fuer ', ' ', 'ig');
+ str:=REGEXP_REPLACE(str,' zu ', ' ','ig');
+ str:=REGEXP_REPLACE(str,' des ', ' ', 'ig');
+ str:=REGEXP_REPLACE(str,' the ', ' ', 'ig');
+ 
+ if (upper(str) not like '%CARE OF%')
+ THEN str:=REGEXP_REPLACE(str,' of ', ' ', 'ig');
+  END IF;
+ 
+ 
+ if (not str ~ '(AN|IN|AUF|RECHTS|KLIN) DER ')
+ THEN str:=REGEXP_REPLACE(str,' der ', ' ', 'ig');
+ END IF;
+ 
+ 
+ --all streets
+ str:=REGEXP_REPLACE(str,'STRASSE(\W|$|\s)', 'STR\1', 'g');
+ 
+ --all streets
+ str:=REGEXP_REPLACE(str,'PLATZ(\W|$|\s)', 'PL\1', 'g');
+ 
+           
+--Replacement according to table WoS_Standard_Plus
+for std_entry in w2 
+	loop
+		str:=REGEXP_REPLACE(str, '(\W|^|\s)'||std_entry.CONT||'(\W|$|\s)','\1'||std_entry.ABBREV||'\2', 'g');
+	END LOOP;
+
+--- Points out?? Not useful in every case....
+str:=REGEXP_REPLACE(str,'\.', '', 'g');
+  
+-- double spaces out
+str:=REGEXP_REPLACE(str,' {2,}', ' ', 'g');
+
+str:=REPLACE(str,' , ',', ');
+        str:=ltrim(rtrim(str));
+   
+         
+--- Delete country code at the end of the address
+str:=REGEXP_REPLACE(str,', DEUTSCH DEM REP$','', 'g');
+str:=REGEXP_REPLACE(str,', PRUSSIA$','', 'g');
+str:=REGEXP_REPLACE(str,',GER DEM REP$','', 'g');
+str:=REGEXP_REPLACE(str,', W GERMANY$','', 'g');
+str:=REGEXP_REPLACE(str,',FED REP GER$','', 'g');
+str:=REGEXP_REPLACE(str,',GER DEM$','', 'g');
+str:=REGEXP_REPLACE(str,',FED RER GER$','', 'g');
+str:=REGEXP_REPLACE(str,', GERMANY$','', 'g');
+str:=REGEXP_REPLACE(str,', GERMANY;$','', 'g');
+str:=REGEXP_REPLACE(str,', FED REP GER$','', 'g');
+str:=REGEXP_REPLACE(str,', EAST GERMANY$','', 'g');
+str:=REGEXP_REPLACE(str,', GER DEM REP$','', 'g');
+str:=REGEXP_REPLACE(str,', ALEMANIA$','', 'g');
+str:=REGEXP_REPLACE(str,', W GER$','', 'g');
+str:=REGEXP_REPLACE(str,',GERMANY$','', 'g');
+str:=REGEXP_REPLACE(str,', WEST GERMANY$','', 'g');
+str:=REGEXP_REPLACE(str,', DEU$','', 'g');
+str:=REGEXP_REPLACE(str,', DEUTSCHLAND$','', 'g');
+             
+ergebnis:=str; 
+   
+   
+RETURN ergebnis;
+END;
+
+
+$function$
+;