From 91c89698d2670a271c8c0882c3bef4bbf252fa77 Mon Sep 17 00:00:00 2001
From: Christopher Lenke <christopher.lenke@uni-bielefeld.de>
Date: Mon, 23 Sep 2024 18:59:20 +0200
Subject: [PATCH] 23.09.2024

---
 .../city_cleaning_pg_spalte_in_text_.txt      | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 Functions/city_cleaning_pg_spalte_in_text_.txt

diff --git a/Functions/city_cleaning_pg_spalte_in_text_.txt b/Functions/city_cleaning_pg_spalte_in_text_.txt
new file mode 100644
index 0000000..6da15f9
--- /dev/null
+++ b/Functions/city_cleaning_pg_spalte_in_text_.txt
@@ -0,0 +1,49 @@
+-- DROP FUNCTION schema_xyz.city_cleaning_pg(text);
+
+CREATE OR REPLACE FUNCTION schema_xyz.city_cleaning_pg(spalte_in text)
+ RETURNS character varying
+ LANGUAGE plpgsql
+AS $function$
+
+DECLARE
+
+
+ergebnis varchar;
+city_cleaned varchar;
+
+
+
+BEGIN
+
+   
+city_cleaned:=regexp_replace(upper(spalte_in),'D?E?O?W?-? ?[0-9]{4,6} ?',' ' , 'g');
+city_cleaned:=regexp_replace(city_cleaned,'D?E?O?W?-? ?[0-9]{2,2} [0-9]{3,3} ?',' ' , 'g');
+city_cleaned:=regexp_replace(city_cleaned,'D?E?o?W?-? ?[0-9]{3,3} [0-9]{2,2} ?',' ' , 'g');
+city_cleaned:=ltrim(rtrim(city_cleaned));
+city_cleaned:=replace(city_cleaned,', ', ' ');
+city_cleaned:=replace(city_cleaned,'  ', ' ');
+city_cleaned:=replace(city_cleaned,'  ', ' ');
+
+if (city_cleaned like '%,')
+then
+city_cleaned:=substr(city_cleaned, 1, length(city_cleaned)-1);
+end if;
+
+city_cleaned:=ltrim(rtrim(city_cleaned));
+city_cleaned:=regexp_replace(city_cleaned,'^[0-9]{1,3} ',' ' , 'g');
+city_cleaned:=regexp_replace(city_cleaned,' [0-9]{1,3}$',' ' , 'g');
+city_cleaned:=regexp_replace(city_cleaned,' [0-9]{1,3} ',' ' , 'g');
+city_cleaned:=replace(city_cleaned,', ', ' ');
+city_cleaned:=replace(city_cleaned,'  ', ' ');
+city_cleaned:=replace(city_cleaned,'  ', ' ');
+city_cleaned:=ltrim(rtrim(city_cleaned));
+
+
+ ergebnis:=city_cleaned;
+   
+   
+   RETURN ergebnis;
+
+	END;
+$function$
+;
-- 
GitLab