From 91c89698d2670a271c8c0882c3bef4bbf252fa77 Mon Sep 17 00:00:00 2001 From: Christopher Lenke <christopher.lenke@uni-bielefeld.de> Date: Mon, 23 Sep 2024 18:59:20 +0200 Subject: [PATCH] 23.09.2024 --- .../city_cleaning_pg_spalte_in_text_.txt | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 Functions/city_cleaning_pg_spalte_in_text_.txt diff --git a/Functions/city_cleaning_pg_spalte_in_text_.txt b/Functions/city_cleaning_pg_spalte_in_text_.txt new file mode 100644 index 0000000..6da15f9 --- /dev/null +++ b/Functions/city_cleaning_pg_spalte_in_text_.txt @@ -0,0 +1,49 @@ +-- DROP FUNCTION schema_xyz.city_cleaning_pg(text); + +CREATE OR REPLACE FUNCTION schema_xyz.city_cleaning_pg(spalte_in text) + RETURNS character varying + LANGUAGE plpgsql +AS $function$ + +DECLARE + + +ergebnis varchar; +city_cleaned varchar; + + + +BEGIN + + +city_cleaned:=regexp_replace(upper(spalte_in),'D?E?O?W?-? ?[0-9]{4,6} ?',' ' , 'g'); +city_cleaned:=regexp_replace(city_cleaned,'D?E?O?W?-? ?[0-9]{2,2} [0-9]{3,3} ?',' ' , 'g'); +city_cleaned:=regexp_replace(city_cleaned,'D?E?o?W?-? ?[0-9]{3,3} [0-9]{2,2} ?',' ' , 'g'); +city_cleaned:=ltrim(rtrim(city_cleaned)); +city_cleaned:=replace(city_cleaned,', ', ' '); +city_cleaned:=replace(city_cleaned,' ', ' '); +city_cleaned:=replace(city_cleaned,' ', ' '); + +if (city_cleaned like '%,') +then +city_cleaned:=substr(city_cleaned, 1, length(city_cleaned)-1); +end if; + +city_cleaned:=ltrim(rtrim(city_cleaned)); +city_cleaned:=regexp_replace(city_cleaned,'^[0-9]{1,3} ',' ' , 'g'); +city_cleaned:=regexp_replace(city_cleaned,' [0-9]{1,3}$',' ' , 'g'); +city_cleaned:=regexp_replace(city_cleaned,' [0-9]{1,3} ',' ' , 'g'); +city_cleaned:=replace(city_cleaned,', ', ' '); +city_cleaned:=replace(city_cleaned,' ', ' '); +city_cleaned:=replace(city_cleaned,' ', ' '); +city_cleaned:=ltrim(rtrim(city_cleaned)); + + + ergebnis:=city_cleaned; + + + RETURN ergebnis; + + END; +$function$ +; -- GitLab