Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
nopaque
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Monitor
Service Desk
Analyze
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
Looking for advice? Join the
Matrix channel for GitLab users in Bielefeld
!
Show more breadcrumbs
SFB 1288 - INF
nopaque
Commits
cedc1f11
Commit
cedc1f11
authored
5 years ago
by
Patrick Jentsch
Browse files
Options
Downloads
Patches
Plain Diff
Update
parent
e881401a
No related branches found
No related tags found
No related merge requests found
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
app/corpora/CQiWrapper/CQi.py
+609
-0
609 additions, 0 deletions
app/corpora/CQiWrapper/CQi.py
app/corpora/CQiWrapper/CQiClient.py
+0
-620
0 additions, 620 deletions
app/corpora/CQiWrapper/CQiClient.py
app/corpora/CQiWrapper/CQiWrapper.py
+5
-6
5 additions, 6 deletions
app/corpora/CQiWrapper/CQiWrapper.py
with
614 additions
and
626 deletions
app/corpora/CQiWrapper/CQi.py
+
609
−
0
View file @
cedc1f11
...
...
@@ -6,6 +6,8 @@
# Modified by: Patrick Jentsch <p.jentsch@uni-bielefeld.de #
# Modified date: Thurs Oct 10 <Uhrzeit> #
# ########################################################################### #
import
socket
import
struct
"""
1. padding
"""
...
...
@@ -404,3 +406,610 @@ lookup = {
5392
:
'
CQI_CQP_FDIST_1
'
,
5393
:
'
CQI_CQP_FDIST_2
'
}
class
Client
:
def
__init__
(
self
,
host
=
'
127.0.0.1
'
,
port
=
4877
):
self
.
host
=
host
self
.
port
=
port
self
.
connection
=
socket
.
socket
()
self
.
connection
.
connect
((
self
.
host
,
self
.
port
))
def
ctrl_connect
(
self
,
username
,
password
):
# INPUT: (STRING username, STRING password)
# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
# print('CTRL_CONNECT')
self
.
__send_WORD
(
CTRL_CONNECT
)
self
.
__send_STRING
(
username
)
self
.
__send_STRING
(
password
)
return
self
.
__recv_response
()
def
ctrl_bye
(
self
):
# INPUT: ()
# OUTPUT: CQI_STATUS_BYE_OK
# print('CTRL_BYE')
self
.
__send_WORD
(
CTRL_BYE
)
return
self
.
__recv_response
()
def
ctrl_user_abort
(
self
):
# INPUT: ()
# OUTPUT:
# print('CTRL_USER_ABORT')
self
.
__send_WORD
(
CTRL_USER_ABORT
)
def
ctrl_ping
(
self
):
# INPUT: ()
# OUTPUT: CQI_STATUS_PING_OK
# print('CTRL_PING')
self
.
__send_WORD
(
CTRL_PING
)
return
self
.
__recv_response
()
def
ctrl_last_general_error
(
self
):
# INPUT: ()
# OUTPUT: CQI_DATA_STRING
# full-text error message for the last general error reported by the
# CQi server
# print('CTRL_LAST_GENERAL_ERROR')
self
.
__send_WORD
(
CTRL_LAST_GENERAL_ERROR
)
return
self
.
__recv_response
()
def
ask_feature_cqi_1_0
(
self
):
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL
# print('ASK_FEATURE_CQI_1_0')
self
.
__send_WORD
(
ASK_FEATURE_CQI_1_0
)
return
self
.
__recv_response
()
def
ask_feature_cl_2_3
(
self
):
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL
# print('ASK_FEATURE_CL_2_3')
self
.
__send_WORD
(
ASK_FEATURE_CL_2_3
)
return
self
.
__recv_response
()
def
ask_feature_cqp_2_3
(
self
):
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL
# print('ASK_FEATURE_CL_2_3')
self
.
__send_WORD
(
ASK_FEATURE_CL_2_3
)
return
self
.
__recv_response
()
def
corpus_list_coprora
(
self
):
# INPUT: ()
# OUTPUT: CQI_DATA_STRING_LIST
# print('CORPUS_LIST_CORPORA')
self
.
__send_WORD
(
CORPUS_LIST_CORPORA
)
return
self
.
__recv_response
()
def
corpus_charset
(
self
,
corpus
):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING
# print('CORPUS_CHARSET')
self
.
__send_WORD
(
CORPUS_CHARSET
)
self
.
__send_STRING
(
corpus
)
return
self
.
__recv_response
()
def
corpus_properties
(
self
,
corpus
):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# print('CORPUS_PROPERTIES')
self
.
__send_WORD
(
CORPUS_PROPERTIES
)
self
.
__send_STRING
(
corpus
)
return
self
.
__recv_response
()
def
corpus_positional_attributes
(
self
,
corpus
):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# print('CORPUS_POSITIONAL_ATTRIBUTES')
self
.
__send_WORD
(
CORPUS_POSITIONAL_ATTRIBUTES
)
self
.
__send_STRING
(
corpus
)
return
self
.
__recv_response
()
def
corpus_structural_attributes
(
self
,
corpus
):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# print('CORPUS_STRUCTURAL_ATTRIBUTES')
self
.
__send_WORD
(
CORPUS_STRUCTURAL_ATTRIBUTES
)
self
.
__send_STRING
(
corpus
)
return
self
.
__recv_response
()
def
corpus_structural_attribute_has_values
(
self
,
attribute
):
# INPUT: (STRING attribute)
# OUTPUT: CQI_DATA_BOOL
# print('CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES')
self
.
__send_WORD
(
CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES
)
self
.
__send_STRING
(
attribute
)
return
self
.
__recv_response
()
def
corpus_alignment_attributes
(
self
,
corpus
):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# print('CORPUS_ALIGNMENT_ATTRIBUTES')
self
.
__send_WORD
(
CORPUS_ALIGNMENT_ATTRIBUTES
)
self
.
__send_STRING
(
corpus
)
return
self
.
__recv_response
()
def
corpus_full_name
(
self
,
corpus
):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING
# the full name of <corpus> as specified in its registry entry
# print('CORPUS_FULL_NAME')
self
.
__send_WORD
(
CORPUS_FULL_NAME
)
self
.
__send_STRING
(
corpus
)
return
self
.
__recv_response
()
def
corpus_info
(
self
,
corpus
):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# returns the contents of the .info file of <corpus> as a list of lines
# print('CORPUS_INFO')
self
.
__send_WORD
(
CORPUS_INFO
)
self
.
__send_STRING
(
corpus
)
return
self
.
__recv_response
()
def
corpus_drop_corpus
(
self
,
corpus
):
'''
'
Broken
'
TODO: Check what type of return value is provided by the server.
'''
# INPUT: (STRING corpus)
# OUTPUT: CQI_STATUS_OK
# try to unload a corpus and all its attributes from memory
# print('CORPUS_DROP_CORPUS')
self
.
__send_WORD
(
CORPUS_DROP_CORPUS
)
self
.
__send_STRING
(
corpus
)
return
self
.
__recv_response
()
def
cl_attribute_size
(
self
,
attribute
):
# INPUT: (STRING attribute)
# OUTPUT: CQI_DATA_INT
# returns the size of <attribute>:
# number of tokens (positional)
# number of regions (structural)
# number of alignments (alignment)
# print('CL_ATTRIBUTE_SIZE')
self
.
__send_WORD
(
CL_ATTRIBUTE_SIZE
)
self
.
__send_STRING
(
attribute
)
return
self
.
__recv_response
()
def
cl_lexicon_size
(
self
,
attribute
):
# INPUT: (STRING attribute)
# OUTPUT: CQI_DATA_INT
# returns the number of entries in the lexicon of a positional
# attribute;
# valid lexicon IDs range from 0 .. (lexicon_size - 1)
# print('CL_LEXICON_SIZE')
self
.
__send_WORD
(
CL_LEXICON_SIZE
)
self
.
__send_STRING
(
attribute
)
return
self
.
__recv_response
()
def
cl_drop_attribute
(
self
,
attribute
):
# INPUT: (STRING attribute)
# OUTPUT: CQI_STATUS_OK
# unload attribute from memory
# print('CL_DROP_ATTRIBUTE')
self
.
__send_WORD
(
CL_LEXICON_SIZE
)
self
.
__send_STRING
(
attribute
)
return
self
.
__recv_response
()
"""
"
NOTE: simple (scalar) mappings are applied to lists (the returned list
"
has exactly the same length as the list passed as an argument)
"""
def
cl_str2id
(
self
,
attribute
,
strings
):
# INPUT: (STRING attribute, STRING_LIST strings)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every string in <strings> that is not found in the
# lexicon
# print('CL_STR2ID')
self
.
__send_WORD
(
CL_LEXICON_SIZE
)
self
.
__send_STRING
(
attribute
)
self
.
__send_STRING_LIST
(
strings
)
return
self
.
__recv_response
()
def
cl_id2str
(
self
,
attribute
,
id
):
# INPUT: (STRING attribute, INT_LIST id)
# OUTPUT: CQI_DATA_STRING_LIST
# returns "" for every ID in <id> that is out of range
# print('CL_ID2STR')
self
.
__send_WORD
(
CL_ID2STR
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT_LIST
(
id
)
return
self
.
__recv_response
()
def
cl_id2freq
(
self
,
attribute
,
id
):
# INPUT: (STRING attribute, INT_LIST id)
# OUTPUT: CQI_DATA_INT_LIST
# returns 0 for every ID in <id> that is out of range
# print('CL_ID2FREQ')
self
.
__send_WORD
(
CL_ID2FREQ
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT_LIST
(
id
)
return
self
.
__recv_response
()
def
cl_cpos2id
(
self
,
attribute
,
cpos
):
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position in <cpos> that is out of range
# print('CL_CPOS2ID')
self
.
__send_WORD
(
CL_ID2FREQ
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT_LIST
(
cpos
)
return
self
.
__recv_response
()
def
cl_cpos2str
(
self
,
attribute
,
cpos
):
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_STRING_LIST
# returns "" for every corpus position in <cpos> that is out of range
# print('CL_CPOS2STR')
self
.
__send_WORD
(
CL_CPOS2STR
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT_LIST
(
cpos
)
return
self
.
__recv_response
()
def
cl_cpos2struc
(
self
,
attribute
,
cpos
):
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position not inside a structure region
# print('CL_CPOS2STRUC')
self
.
__send_WORD
(
CL_CPOS2STRUC
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT_LIST
(
cpos
)
return
self
.
__recv_response
()
"""
"
NOTE: temporary addition for the Euralex2000 tutorial, but should
"
probably be included in CQi specs
"""
def
cl_cpos2lbound
(
self
,
attribute
,
cpos
):
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns left boundary of s-attribute region enclosing cpos, -1 if not
# in region
# print('CL_CPOS2LBOUND')
self
.
__send_WORD
(
CL_CPOS2LBOUND
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT_LIST
(
cpos
)
return
self
.
__recv_response
()
def
cl_cpos2rbound
(
self
,
attribute
,
cpos
):
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns right boundary of s-attribute region enclosing cpos, -1 if
# not in region
# print('CL_CPOS2RBOUND')
self
.
__send_WORD
(
CL_CPOS2RBOUND
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT_LIST
(
cpos
)
return
self
.
__recv_response
()
def
cl_cpos2alg
(
self
,
attribute
,
cpos
):
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position not inside an alignment
# print('CL_CPOS2ALG')
self
.
__send_WORD
(
CL_CPOS2ALG
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT_LIST
(
cpos
)
return
self
.
__recv_response
()
def
cl_struc2str
(
self
,
attribute
,
strucs
):
# INPUT: (STRING attribute, INT_LIST strucs)
# OUTPUT: CQI_DATA_STRING_LIST
# returns annotated string values of structure regions in <strucs>; ""
# if out of range
# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
# print('CL_STRUC2STR')
self
.
__send_WORD
(
CL_STRUC2STR
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT_LIST
(
strucs
)
return
self
.
__recv_response
()
"""
"
NOTE: the following mappings take a single argument and return multiple
"
values, including lists of arbitrary size
"""
def
cl_id2cpos
(
self
,
attribute
,
id
):
# INPUT: (STRING attribute, INT id)
# OUTPUT: CQI_DATA_INT_LIST
# returns all corpus positions where the given token occurs
# print('CL_ID2CPOS')
self
.
__send_WORD
(
CL_ID2CPOS
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT
(
id
)
return
self
.
__recv_response
()
def
cl_idlist2cpos
(
self
,
attribute
,
id_list
):
# INPUT: (STRING attribute, INT_LIST id_list)
# OUTPUT: CQI_DATA_INT_LIST
# returns all corpus positions where one of the tokens in <id_list>
# occurs; the returned list is sorted as a whole, not per token id
# print('CL_IDLIST2CPOS')
self
.
__send_WORD
(
CL_IDLIST2CPOS
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT_LIST
(
id_list
)
return
self
.
__recv_response
()
def
cl_regex2id
(
self
,
attribute
,
regex
):
# INPUT: (STRING attribute, STRING regex)
# OUTPUT: CQI_DATA_INT_LIST
# returns lexicon IDs of all tokens that match <regex>; the returned
# list may be empty (size 0);
# print('CL_REGEX2ID')
self
.
__send_WORD
(
CL_REGEX2ID
)
self
.
__send_STRING
(
attribute
)
self
.
__send_STRING
(
regex
)
return
self
.
__recv_response
()
def
cl_struc2cpos
(
self
,
attribute
,
struc
):
# INPUT: (STRING attribute, INT struc)
# OUTPUT: CQI_DATA_INT_INT
# returns start and end corpus positions of structure region <struc>
# print('CL_STRUC2CPOS')
self
.
__send_WORD
(
CL_STRUC2CPOS
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT
(
struc
)
return
self
.
__recv_response
()
def
cl_alg2cpos
(
self
,
attribute
,
alg
):
# INPUT: (STRING attribute, INT alg)
# OUTPUT: CQI_DATA_INT_INT_INT_INT
# returns (src_start, src_end, target_start, target_end)
# print('CL_ALG2CPOS')
self
.
__send_WORD
(
CL_ALG2CPOS
)
self
.
__send_STRING
(
attribute
)
self
.
__send_INT
(
alg
)
return
self
.
__recv_response
()
def
cqp_query
(
self
,
mother_corpus
,
subcorpus_name
,
query
):
# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
# OUTPUT: CQI_STATUS_OK
# <query> must include the ';' character terminating the query.
# print('CQP_QUERY')
self
.
__send_WORD
(
CQP_QUERY
)
self
.
__send_STRING
(
mother_corpus
)
self
.
__send_STRING
(
subcorpus_name
)
self
.
__send_STRING
(
query
)
return
self
.
__recv_response
()
def
cqp_list_subcorpora
(
self
,
corpus
):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# print('CQP_LIST_SUBCORPORA')
self
.
__send_WORD
(
CQP_LIST_SUBCORPORA
)
self
.
__send_STRING
(
corpus
)
return
self
.
__recv_response
()
def
cqp_subcorpus_size
(
self
,
subcorpus
):
# INPUT: (STRING subcorpus)
# OUTPUT: CQI_DATA_INT
# print('CQP_SUBCORPUS_SIZE')
self
.
__send_WORD
(
CQP_SUBCORPUS_SIZE
)
self
.
__send_STRING
(
subcorpus
)
return
self
.
__recv_response
()
def
cqp_subcorpus_has_field
(
self
,
subcorpus
,
field
):
# INPUT: (STRING subcorpus, BYTE field)
# OUTPUT: CQI_DATA_BOOL
# print('CQP_SUBCORPUS_HAS_FIELD')
self
.
__send_WORD
(
CQP_SUBCORPUS_HAS_FIELD
)
self
.
__send_STRING
(
subcorpus
)
self
.
__send_BYTE
(
field
)
return
self
.
__recv_response
()
def
cqp_dump_subcorpus
(
self
,
subcorpus
,
field
,
first
,
last
):
# INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
# OUTPUT: CQI_DATA_INT_LIST
# Dump the values of <field> for match ranges <first> .. <last>
# in <subcorpus>. <field> is one of the CQI_CONST_FIELD_* constants.
# print('CQP_DUMP_SUBCORPUS')
self
.
__send_WORD
(
CQP_DUMP_SUBCORPUS
)
self
.
__send_STRING
(
subcorpus
)
self
.
__send_BYTE
(
field
)
self
.
__send_INT
(
first
)
self
.
__send_INT
(
last
)
return
self
.
__recv_response
()
def
cqp_drop_subcorpus
(
self
,
subcorpus
):
# INPUT: (STRING subcorpus)
# OUTPUT: CQI_STATUS_OK
# delete a subcorpus from memory
# print('CQP_DROP_SUBCORPUS')
self
.
__send_WORD
(
CQP_DROP_SUBCORPUS
)
self
.
__send_STRING
(
subcorpus
)
return
self
.
__recv_response
()
"""
"
NOTE: The following two functions are temporarily included for the
"
Euralex 2000 tutorial demo
"""
def
cqp_fdist_1
(
self
,
subcorpus
,
cutoff
,
field
,
attribute
):
"""
NOTE: frequency distribution of single tokens
"""
# INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
# OUTPUT: CQI_DATA_INT_LIST
# returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
# CQI_CONST_FIELD_KEYWORD
# NB: pairs are sorted by frequency desc.
# print('CQP_FDIST_1')
self
.
__send_WORD
(
CQP_FDIST_1
)
self
.
__send_STRING
(
subcorpus
)
self
.
__send_INT
(
cutoff
)
self
.
__send_BYTE
(
field
)
self
.
__send_STRING
(
attribute
)
return
self
.
__recv_response
()
def
cqp_fdist_2
(
self
,
subcorpus
,
cutoff
,
field1
,
attribute1
,
field2
,
attribute2
):
"""
NOTE: frequency distribution of pairs of tokens
"""
# INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
# BYTE field2, STRING attribute2)
# OUTPUT: CQI_DATA_INT_LIST
# returns <n> (id1, id2, frequency) pairs flattened into a list of size
# 3*<n>
# NB: triples are sorted by frequency desc.
# print('CQP_FDIST_2')
self
.
__send_WORD
(
CQP_FDIST_2
)
self
.
__send_STRING
(
subcorpus
)
self
.
__send_INT
(
cutoff
)
self
.
__send_BYTE
(
field1
)
self
.
__send_STRING
(
attribute1
)
self
.
__send_BYTE
(
field2
)
self
.
__send_STRING
(
attribute2
)
return
self
.
__recv_response
()
def
__recv_response
(
self
):
byte_data
=
self
.
__recv_WORD
()
response_type
=
byte_data
>>
8
if
response_type
==
CL_ERROR
:
raise
Exception
(
lookup
[
byte_data
])
elif
response_type
==
CQP_ERROR
:
raise
Exception
(
lookup
[
byte_data
])
elif
response_type
==
DATA
:
return
self
.
__recv_DATA
(
byte_data
)
elif
response_type
==
ERROR
:
raise
Exception
(
lookup
[
byte_data
])
elif
response_type
==
STATUS
:
return
byte_data
else
:
raise
Exception
(
'
Unknown response type: {}
'
.
format
(
hex
(
response_type
))
)
def
__recv_DATA
(
self
,
data_type
):
if
data_type
==
DATA_BYTE
:
data
=
self
.
__recv_DATA_BYTE
()
elif
data_type
==
DATA_BOOL
:
data
=
self
.
__recv_DATA_BOOL
()
elif
data_type
==
DATA_INT
:
data
=
self
.
__recv_DATA_INT
()
elif
data_type
==
DATA_STRING
:
data
=
self
.
__recv_DATA_STRING
()
elif
data_type
==
DATA_BYTE_LIST
:
data
=
self
.
__recv_DATA_BYTE_LIST
()
elif
data_type
==
DATA_BOOL_LIST
:
data
=
self
.
__recv_DATA_BOOL_LIST
()
elif
data_type
==
DATA_INT_LIST
:
data
=
self
.
__recv_DATA_INT_LIST
()
elif
data_type
==
DATA_STRING_LIST
:
data
=
self
.
__recv_DATA_STRING_LIST
()
elif
data_type
==
DATA_INT_INT
:
data
=
self
.
__recv_DATA_INT_INT
()
elif
data_type
==
DATA_INT_INT_INT_INT
:
data
=
self
.
__recv_DATA_INT_INT_INT_INT
()
elif
data_type
==
DATA_INT_TABLE
:
data
=
self
.
__recv_DATA_INT_TABLE
()
else
:
raise
Exception
(
'
Unknown data type: {}
'
.
format
(
hex
(
data_type
)))
return
data
def
__recv_DATA_BYTE
(
self
):
byte_data
=
self
.
connection
.
recv
(
1
,
socket
.
MSG_WAITALL
)
return
struct
.
unpack
(
'
!B
'
,
byte_data
)[
0
]
def
__recv_DATA_BOOL
(
self
):
byte_data
=
self
.
connection
.
recv
(
1
,
socket
.
MSG_WAITALL
)
return
struct
.
unpack
(
'
!?
'
,
byte_data
)[
0
]
def
__recv_DATA_INT
(
self
):
byte_data
=
self
.
connection
.
recv
(
4
,
socket
.
MSG_WAITALL
)
return
struct
.
unpack
(
'
!i
'
,
byte_data
)[
0
]
def
__recv_DATA_STRING
(
self
):
n
=
self
.
__recv_WORD
()
byte_data
=
self
.
connection
.
recv
(
n
,
socket
.
MSG_WAITALL
)
return
struct
.
unpack
(
'
!{}s
'
.
format
(
n
),
byte_data
)[
0
].
decode
()
def
__recv_DATA_BYTE_LIST
(
self
):
data
=
[]
n
=
self
.
__recv_DATA_INT
()
while
n
>
0
:
data
.
append
(
self
.
__recv_DATA_BYTE
())
n
-=
1
return
data
def
__recv_DATA_BOOL_LIST
(
self
):
data
=
[]
n
=
self
.
__recv_DATA_INT
()
while
n
>
0
:
data
.
append
(
self
.
__recv_DATA_BOOL
())
n
-=
1
return
data
def
__recv_DATA_INT_LIST
(
self
):
data
=
[]
n
=
self
.
__recv_DATA_INT
()
while
n
>
0
:
data
.
append
(
self
.
__recv_DATA_INT
())
n
-=
1
return
data
def
__recv_DATA_STRING_LIST
(
self
):
data
=
[]
n
=
self
.
__recv_DATA_INT
()
while
n
>
0
:
data
.
append
(
self
.
__recv_DATA_STRING
())
n
-=
1
return
data
def
__recv_DATA_INT_INT
(
self
):
return
(
self
.
__recv_DATA_INT
(),
self
.
__recv_DATA_INT
())
def
__recv_DATA_INT_INT_INT_INT
(
self
):
return
(
self
.
__recv_DATA_INT
(),
self
.
__recv_DATA_INT
(),
self
.
__recv_DATA_INT
(),
self
.
__recv_DATA_INT
())
def
__recv_DATA_INT_TABLE
(
self
):
rows
=
self
.
__recv_DATA_INT
()
columns
=
self
.
__recv_DATA_INT
()
data
=
[]
for
i
in
range
(
0
,
rows
):
row
=
[]
for
j
in
range
(
0
,
columns
):
row
.
append
(
self
.
__recv_DATA_INT
())
data
.
append
(
row
)
return
data
def
__recv_WORD
(
self
):
byte_data
=
self
.
connection
.
recv
(
2
,
socket
.
MSG_WAITALL
)
return
struct
.
unpack
(
'
!H
'
,
byte_data
)[
0
]
def
__send_BYTE
(
self
,
byte_data
):
data
=
struct
.
pack
(
'
!B
'
,
byte_data
)
self
.
connection
.
sendall
(
data
)
def
__send_BOOL
(
self
,
bool_data
):
data
=
struct
.
pack
(
'
!?
'
,
bool_data
)
self
.
connection
.
sendall
(
data
)
def
__send_INT
(
self
,
int_data
):
data
=
struct
.
pack
(
'
!i
'
,
int_data
)
self
.
connection
.
sendall
(
data
)
def
__send_STRING
(
self
,
string_data
):
encoded_string_data
=
string_data
.
encode
(
'
utf-8
'
)
n
=
len
(
encoded_string_data
)
data
=
struct
.
pack
(
'
!H{}s
'
.
format
(
n
),
n
,
encoded_string_data
)
self
.
connection
.
sendall
(
data
)
def
__send_INT_LIST
(
self
,
int_list_data
):
n
=
len
(
int_list_data
)
self
.
__send_INT
(
n
)
for
int_data
in
int_list_data
:
self
.
__send_INT
(
int_data
)
def
__send_STRING_LIST
(
self
,
string_list_data
):
n
=
len
(
string_list_data
)
self
.
__send_INT
(
n
)
for
string_data
in
string_list_data
:
self
.
__send_STRING
(
string_data
)
def
__send_WORD
(
self
,
word_data
):
data
=
struct
.
pack
(
'
!H
'
,
word_data
)
self
.
connection
.
sendall
(
data
)
This diff is collapsed.
Click to expand it.
app/corpora/CQiWrapper/CQiClient.py
deleted
100644 → 0
+
0
−
620
View file @
e881401a
This diff is collapsed.
Click to expand it.
app/corpora/CQiWrapper/CQiWrapper.py
+
5
−
6
View file @
cedc1f11
from
.CQiClient
import
CQiClient
from
.CQi
import
CONST_FIELD_MATCH
,
CONST_FIELD_MATCHEND
import
time
from
app
import
logger
# only works if imported into opaque web app
from
.
import
CQi
import
time
class
CQiWrapper
(
CQiClient
):
class
CQiWrapper
(
CQi
.
Client
):
'''
CQIiWrapper object
...
...
@@ -155,11 +154,11 @@ class CQiWrapper(CQiClient):
offset_end
=
min
((
self
.
nr_matches
+
result_offset
-
1
),
self
.
match_count
-
1
)
logger
.
warning
(
'
Offset end is: {}
'
.
format
(
offset_end
))
match_boundaries
=
zip
(
self
.
cqp_dump_subcorpus
(
self
.
result_subcorpus
,
CONST_FIELD_MATCH
,
CQi
.
CONST_FIELD_MATCH
,
offset_start
,
offset_end
),
self
.
cqp_dump_subcorpus
(
self
.
result_subcorpus
,
CONST_FIELD_MATCHEND
,
CQi
.
CONST_FIELD_MATCHEND
,
offset_start
,
offset_end
))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment