Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
nopaque
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Monitor
Service Desk
Analyze
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
Looking for advice? Join the
Matrix channel for GitLab users in Bielefeld
!
Show more breadcrumbs
SFB 1288 - INF
nopaque
Commits
edc0b340
Commit
edc0b340
authored
4 years ago
by
Patrick Jentsch
Browse files
Options
Downloads
Patches
Plain Diff
Process corpus files in task, not in database model
parent
e882af88
No related branches found
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
app/corpora/tasks.py
+43
-9
43 additions, 9 deletions
app/corpora/tasks.py
app/corpora/views.py
+15
-14
15 additions, 14 deletions
app/corpora/views.py
app/models.py
+0
-26
0 additions, 26 deletions
app/models.py
docker-compose.yml
+5
-0
5 additions, 0 deletions
docker-compose.yml
with
63 additions
and
49 deletions
app/corpora/tasks.py
+
43
−
9
View file @
edc0b340
from
..
import
db
from
..decorators
import
background
from
..models
import
Corpus
,
CorpusFile
import
xml.etree.ElementTree
as
ET
import
os
import
shutil
@background
def
build_corpus
(
app
,
corpus_id
):
with
app
.
app_context
():
corpus
=
Corpus
.
query
.
get
(
corpus_id
)
if
corpus
is
None
:
return
corpus
.
status
=
'
File processing
'
db
.
session
.
commit
()
corpus_dir
=
os
.
path
.
join
(
app
.
config
[
'
NOPAQUE_STORAGE
'
],
str
(
corpus
.
user_id
),
'
corpora
'
,
str
(
corpus
.
id
))
output_dir
=
os
.
path
.
join
(
corpus_dir
,
'
merged
'
)
shutil
.
rmtree
(
output_dir
,
ignore_errors
=
True
)
os
.
mkdir
(
output_dir
)
master_element_tree
=
ET
.
ElementTree
(
ET
.
fromstring
(
'
<corpus>
\n
</corpus>
'
))
for
corpus_file
in
corpus
.
files
:
file
=
os
.
path
.
join
(
corpus_dir
,
corpus_file
.
filename
)
element_tree
=
ET
.
parse
(
file
)
text_node
=
element_tree
.
find
(
'
text
'
)
text_node
.
set
(
'
address
'
,
corpus_file
.
address
or
"
NULL
"
)
text_node
.
set
(
'
author
'
,
corpus_file
.
author
)
text_node
.
set
(
'
booktitle
'
,
corpus_file
.
booktitle
or
"
NULL
"
)
text_node
.
set
(
'
chapter
'
,
corpus_file
.
chapter
or
"
NULL
"
)
text_node
.
set
(
'
editor
'
,
corpus_file
.
editor
or
"
NULL
"
)
text_node
.
set
(
'
institution
'
,
corpus_file
.
institution
or
"
NULL
"
)
text_node
.
set
(
'
journal
'
,
corpus_file
.
journal
or
"
NULL
"
)
text_node
.
set
(
'
pages
'
,
corpus_file
.
pages
or
"
NULL
"
)
text_node
.
set
(
'
publisher
'
,
corpus_file
.
publisher
or
"
NULL
"
)
text_node
.
set
(
'
publishing_year
'
,
str
(
corpus_file
.
publishing_year
))
text_node
.
set
(
'
school
'
,
corpus_file
.
school
or
"
NULL
"
)
text_node
.
set
(
'
title
'
,
corpus_file
.
title
)
element_tree
.
write
(
file
)
master_element_tree
.
getroot
().
insert
(
1
,
text_node
)
output_file
=
os
.
path
.
join
(
output_dir
,
'
corpus.vrt
'
)
master_element_tree
.
write
(
output_file
,
xml_declaration
=
True
,
encoding
=
'
utf-8
'
)
corpus
.
status
=
'
submitted
'
db
.
session
.
commit
()
@background
def
delete_corpus
(
app
,
corpus_id
):
with
app
.
app_context
():
...
...
@@ -30,12 +73,3 @@ def delete_corpus_file(app, corpus_file_id):
pass
else
:
corpus_file
.
delete
()
@background
def
edit_corpus_file
(
app
,
corpus_file_id
):
with
app
.
app_context
():
corpus_file
=
CorpusFile
.
query
.
get
(
corpus_file_id
)
if
corpus_file
is
None
:
raise
Exception
(
'
Corpus file {} not found!
'
.
format
(
corpus_file_id
))
corpus_file
.
insert_metadata
()
This diff is collapsed.
Click to expand it.
app/corpora/views.py
+
15
−
14
View file @
edc0b340
...
...
@@ -60,14 +60,16 @@ def analyse_corpus(corpus_id):
query_form
=
QueryForm
(
prefix
=
'
query-form
'
,
query
=
request
.
args
.
get
(
'
query
'
))
query_download_form
=
QueryDownloadForm
(
prefix
=
'
query-download-form
'
)
inspect_display_options_form
=
InspectDisplayOptionsForm
(
prefix
=
'
inspect-display-options-form
'
)
return
render_template
(
'
corpora/analyse_corpus.html.j2
'
,
corpus_id
=
corpus_id
,
display_options_form
=
display_options_form
,
query_form
=
query_form
,
query_download_form
=
query_download_form
,
inspect_display_options_form
=
inspect_display_options_form
,
title
=
'
Corpus analysis
'
)
inspect_display_options_form
=
InspectDisplayOptionsForm
(
prefix
=
'
inspect-display-options-form
'
)
return
render_template
(
'
corpora/analyse_corpus.html.j2
'
,
corpus_id
=
corpus_id
,
display_options_form
=
display_options_form
,
query_form
=
query_form
,
query_download_form
=
query_download_form
,
inspect_display_options_form
=
inspect_display_options_form
,
title
=
'
Corpus analysis
'
)
@corpora.route
(
'
/<int:corpus_id>/delete
'
)
...
...
@@ -114,8 +116,8 @@ def add_corpus_file(corpus_id):
school
=
add_corpus_file_form
.
school
.
data
,
title
=
add_corpus_file_form
.
title
.
data
)
db
.
session
.
add
(
corpus_file
)
corpus
.
status
=
'
unprepared
'
db
.
session
.
commit
()
tasks
.
edit_corpus_file
(
corpus_file
.
id
)
flash
(
'
Corpus file added!
'
)
return
make_response
(
{
'
redirect_url
'
:
url_for
(
'
corpora.corpus
'
,
corpus_id
=
corpus
.
id
)},
...
...
@@ -181,8 +183,8 @@ def edit_corpus_file(corpus_id, corpus_file_id):
edit_corpus_file_form
.
publishing_year
.
data
corpus_file
.
school
=
edit_corpus_file_form
.
school
.
data
corpus_file
.
title
=
edit_corpus_file_form
.
title
.
data
corpus
.
status
=
'
unprepared
'
db
.
session
.
commit
()
tasks
.
edit_corpus_file
(
corpus_file_id
)
flash
(
'
Corpus file edited!
'
)
return
redirect
(
url_for
(
'
corpora.corpus
'
,
corpus_id
=
corpus_id
))
# If no form is submitted or valid, fill out fields with current values
...
...
@@ -211,9 +213,8 @@ def prepare_corpus(corpus_id):
if
not
(
corpus
.
creator
==
current_user
or
current_user
.
is_administrator
()):
abort
(
403
)
if
corpus
.
files
.
all
():
corpus
.
status
=
'
submitted
'
db
.
session
.
commit
()
flash
(
'
Corpus marked for preparation!
'
)
tasks
.
build_corpus
(
corpus_id
)
flash
(
'
Corpus gets build now.
'
)
else
:
flash
(
'
Can not
prepare
corpus, please add corpus file(s).
'
)
flash
(
'
Can not
build
corpus, please add corpus file(s).
'
)
return
redirect
(
url_for
(
'
corpora.corpus
'
,
corpus_id
=
corpus_id
))
This diff is collapsed.
Click to expand it.
app/models.py
+
0
−
26
View file @
edc0b340
...
...
@@ -7,7 +7,6 @@ from werkzeug.utils import secure_filename
from
.
import
db
,
logger
,
login_manager
import
os
import
shutil
import
xml.etree.ElementTree
as
ET
class
Permission
:
...
...
@@ -380,28 +379,6 @@ class CorpusFile(db.Model):
db
.
session
.
delete
(
self
)
db
.
session
.
commit
()
def
insert_metadata
(
self
):
file
=
os
.
path
.
join
(
current_app
.
config
[
'
NOPAQUE_STORAGE
'
],
self
.
dir
,
self
.
filename
)
element_tree
=
ET
.
parse
(
file
)
text_node
=
element_tree
.
find
(
'
text
'
)
# TODO: USE OR
text_node
.
set
(
'
address
'
,
self
.
address
if
self
.
address
else
"
NULL
"
)
text_node
.
set
(
'
author
'
,
self
.
author
)
text_node
.
set
(
'
booktitle
'
,
self
.
booktitle
if
self
.
booktitle
else
"
NULL
"
)
text_node
.
set
(
'
chapter
'
,
self
.
chapter
if
self
.
chapter
else
"
NULL
"
)
text_node
.
set
(
'
editor
'
,
self
.
editor
if
self
.
editor
else
"
NULL
"
)
text_node
.
set
(
'
institution
'
,
self
.
institution
if
self
.
institution
else
"
NULL
"
)
text_node
.
set
(
'
journal
'
,
self
.
journal
if
self
.
journal
else
"
NULL
"
)
text_node
.
set
(
'
pages
'
,
self
.
pages
if
self
.
pages
else
"
NULL
"
)
text_node
.
set
(
'
publisher
'
,
self
.
publisher
if
self
.
publisher
else
"
NULL
"
)
text_node
.
set
(
'
publishing_year
'
,
str
(
self
.
publishing_year
))
text_node
.
set
(
'
school
'
,
self
.
school
if
self
.
school
else
"
NULL
"
)
text_node
.
set
(
'
title
'
,
self
.
title
)
element_tree
.
write
(
file
)
self
.
corpus
.
status
=
'
unprepared
'
db
.
session
.
commit
()
def
to_dict
(
self
):
return
{
'
id
'
:
self
.
id
,
'
address
'
:
self
.
address
,
...
...
@@ -447,9 +424,6 @@ class Corpus(db.Model):
'
title
'
:
self
.
title
,
'
user_id
'
:
self
.
user_id
}
def
build
(
self
):
pass
def
delete
(
self
):
for
corpus_file
in
self
.
files
:
db
.
session
.
delete
(
corpus_file
)
...
...
This diff is collapsed.
Click to expand it.
docker-compose.yml
+
5
−
0
View file @
edc0b340
...
...
@@ -5,6 +5,9 @@ networks:
external
:
name
:
reverse-proxy
volumes
:
redis-trash1
:
services
:
web
:
depends_on
:
...
...
@@ -52,3 +55,5 @@ services:
-
"
/srv/nopaque/db:/var/lib/postgresql/data"
redis
:
image
:
redis:5
volumes
:
-
"
redis-trash1:/data"
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment