Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
I
Institutional Coding
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
AG Bibliometrie
Institutional Coding
Commits
0a3fcb97
Commit
0a3fcb97
authored
5 months ago
by
Christopher Lenke
Browse files
Options
Downloads
Patches
Plain Diff
23.09.2024
parent
5657bbed
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Functions/wos_transformation_pg_spalte_in_character_varying__art_bigint_.txt
+192
-0
192 additions, 0 deletions
...formation_pg_spalte_in_character_varying__art_bigint_.txt
with
192 additions
and
0 deletions
Functions/wos_transformation_pg_spalte_in_character_varying__art_bigint_.txt
0 → 100644
+
192
−
0
View file @
0a3fcb97
-- DROP FUNCTION schema_xyz.wos_transformation_pg(varchar, int8);
CREATE OR REPLACE FUNCTION schema_xyz.wos_transformation_pg(spalte_in character varying, art bigint)
RETURNS character varying
LANGUAGE plpgsql
AS $function$
DECLARE
Ergebnis varchar(2000);
str varchar(2000);
w1 cursor is select /*+ parallel(auto)*/ REPLACE(REPLACE(SCP,'.','\.'),'?','.') as SCP,WOS
from basics.wos_standard_plus_erg
where in_transfo=1
order by length(SCP) desc;
std_entry0 RECORD;
w2 cursor is select abbrev,cont
from basics.WOS_STANDARD_PLUS
where str like '%'||cont||'%' order by length(cont) desc;
std_entry RECORD;
BEGIN
str:=upper(spalte_in);
str:=REGEXP_REPLACE(str,'''', '', 'g');
if (art=1) THEN
for std_entry0 in w1
LOOP
str:=REGEXP_REPLACE(str, '(\W|^|\s)'||std_entry0.SCP||'(\W|$|\s)','\1'||std_entry0.WOS||'\2', 'g');
END LOOP;
END IF;
-- double spaces out
str:=REGEXP_REPLACE(str,' {2,}', ' ', 'g');
str:=REGEXP_REPLACE(str,'^(, )(.*)', '\2', 'g');
-- all special characters out
str:=REPLACE(str,'Æ', 'AE');
str:=REPLACE(str,'À', 'A');
--str:=REPLACE(str,'à', 'a');
str:=REPLACE(str,'Á', 'A');
--str:=REPLACE(str,'á', 'a');
str:=REPLACE(str,'Â', 'A');
--str:=REPLACE(str,'â', 'a');
str:=REPLACE(str,'Ä', 'A');
--str:=REPLACE(str,'ä', 'a');
str:=REPLACE(str,'Å', 'A');
--str:=REPLACE(str,'å', 'a');
str:=REPLACE(str,'Ç', 'C');
--str:=REPLACE(str,'ç', 'c');
str:=REPLACE(str,'Ð', 'D');
--str:=REPLACE(str,'ð', 'd');
str:=REPLACE(str,'É', 'E');
--str:=REPLACE(str,'é', 'e');
str:=REPLACE(str,'Ì', 'I');
--str:=REPLACE(str,'ì', 'i');
str:=REPLACE(str,'Í', 'I');
--str:=REPLACE(str,'í', 'i');
str:=REPLACE(str,'Ñ', 'N');
--str:=REPLACE(str,'ñ', 'n');
str:=REPLACE(str,'Ò', 'O');
--str:=REPLACE(str,'ò', 'o');
str:=REPLACE(str,'Ó', 'O');
--str:=REPLACE(str,'ó', 'o');
str:=REPLACE(str,'Ö', 'O');
--str:=REPLACE(str,'ö', 'o');
str:=REPLACE(str,'Ø', 'O');
--str:=REPLACE(str,'ø', 'o');
--str:=REPLACE(str,'ß', 'SS');
str:=REPLACE(str,'Ú', 'U');
--str:=REPLACE(str,'ú', 'u');
str:=REPLACE(str,'Ü', 'U');
--str:=REPLACE(str,'ü', 'u');
str:=REPLACE(str,'Ä', 'A');
--str:=REPLACE(str,'ä', 'a');
str:=REPLACE(str,'Ö', 'O');
--str:=REPLACE(str,'ö', 'o');
str:=REPLACE(str,'Ü', 'U');
--str:=REPLACE(str,'ü', 'u');
str:=REPLACE(str,'ß', 'SS');
str:=upper(str);
str:=REGEXP_REPLACE(str,'[ÈË]','E', 'g');
str:=REGEXP_REPLACE(str,'[Ã]','A', 'g');
str:=REGEXP_REPLACE(str,'[ŰŨÛṲÙ]','U', 'g');
str:=REGEXP_REPLACE(str,'[ÔŐ]','O', 'g');
str:=REGEXP_REPLACE(str,'[Ń]','N', 'g');
str:=schema_xyz.unaccent(str);
--character
str:=REGEXP_REPLACE(str,'\. ', ' ', 'g');
str:=REGEXP_REPLACE(str,'\.,', ',', 'g');
str:=REGEXP_REPLACE(str,'\+', '&', 'g');
str:=REGEXP_REPLACE(str,'-([^0-9])', ' \1', 'g');
str:=REGEXP_REPLACE(str,'''', '', 'g');
str:=REGEXP_REPLACE(str,'\(', ' ', 'g');
str:=REGEXP_REPLACE(str,'\)', ' ', 'g');
str:=REGEXP_REPLACE(str,'/', ' ', 'g');
str:=REGEXP_REPLACE(str,'\:', ' ', 'g');
str:=REGEXP_REPLACE(str,',([a-zA-Z])', ', \1', 'g');
--Filler words
str:=REGEXP_REPLACE(str,' for ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' fur ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' fuer ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' zu ', ' ','ig');
str:=REGEXP_REPLACE(str,' des ', ' ', 'ig');
str:=REGEXP_REPLACE(str,' the ', ' ', 'ig');
if (upper(str) not like '%CARE OF%')
THEN str:=REGEXP_REPLACE(str,' of ', ' ', 'ig');
END IF;
if (not str ~ '(AN|IN|AUF|RECHTS|KLIN) DER ')
THEN str:=REGEXP_REPLACE(str,' der ', ' ', 'ig');
END IF;
--all streets
str:=REGEXP_REPLACE(str,'STRASSE(\W|$|\s)', 'STR\1', 'g');
--all streets
str:=REGEXP_REPLACE(str,'PLATZ(\W|$|\s)', 'PL\1', 'g');
--Replacement according to table WoS_Standard_Plus
for std_entry in w2
loop
str:=REGEXP_REPLACE(str, '(\W|^|\s)'||std_entry.CONT||'(\W|$|\s)','\1'||std_entry.ABBREV||'\2', 'g');
END LOOP;
--- Points out?? Not useful in every case....
str:=REGEXP_REPLACE(str,'\.', '', 'g');
-- double spaces out
str:=REGEXP_REPLACE(str,' {2,}', ' ', 'g');
str:=REPLACE(str,' , ',', ');
str:=ltrim(rtrim(str));
--- Delete country code at the end of the address
str:=REGEXP_REPLACE(str,', DEUTSCH DEM REP$','', 'g');
str:=REGEXP_REPLACE(str,', PRUSSIA$','', 'g');
str:=REGEXP_REPLACE(str,',GER DEM REP$','', 'g');
str:=REGEXP_REPLACE(str,', W GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,',FED REP GER$','', 'g');
str:=REGEXP_REPLACE(str,',GER DEM$','', 'g');
str:=REGEXP_REPLACE(str,',FED RER GER$','', 'g');
str:=REGEXP_REPLACE(str,', GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', GERMANY;$','', 'g');
str:=REGEXP_REPLACE(str,', FED REP GER$','', 'g');
str:=REGEXP_REPLACE(str,', EAST GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', GER DEM REP$','', 'g');
str:=REGEXP_REPLACE(str,', ALEMANIA$','', 'g');
str:=REGEXP_REPLACE(str,', W GER$','', 'g');
str:=REGEXP_REPLACE(str,',GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', WEST GERMANY$','', 'g');
str:=REGEXP_REPLACE(str,', DEU$','', 'g');
str:=REGEXP_REPLACE(str,', DEUTSCHLAND$','', 'g');
ergebnis:=str;
RETURN ergebnis;
END;
$function$
;
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment