diff --git a/app/main/routes.py b/app/main/routes.py index 79f19318848adce0ecf0513663ae3d47b4f994cf..cf87f0b5c4cc3871ff470bcb2b6b9a61a24fb1dc 100644 --- a/app/main/routes.py +++ b/app/main/routes.py @@ -33,6 +33,11 @@ def dashboard(): return render_template('main/dashboard.html.j2', title='Dashboard') +@bp.route('/user_manual') +def user_manual(): + return render_template('main/user_manual.html.j2', title='User manual') + + @bp.route('/news') def news(): return render_template('main/news.html.j2', title='News') diff --git a/app/static/images/manual/add-corpus-file.png b/app/static/images/manual/add-corpus-file.png new file mode 100644 index 0000000000000000000000000000000000000000..a696e4da4a7de12d770d1bdf47932bef1509386d Binary files /dev/null and b/app/static/images/manual/add-corpus-file.png differ diff --git a/app/static/images/manual/corpus.png b/app/static/images/manual/corpus.png new file mode 100644 index 0000000000000000000000000000000000000000..be081109b2fe80ef120aaf7d0ad8cdfc9adcb0e7 Binary files /dev/null and b/app/static/images/manual/corpus.png differ diff --git a/app/static/images/manual/create-a-corpus.png b/app/static/images/manual/create-a-corpus.png new file mode 100644 index 0000000000000000000000000000000000000000..a1d993e8ae608771bc889b9df9bd7212e54a7eb3 Binary files /dev/null and b/app/static/images/manual/create-a-corpus.png differ diff --git a/app/static/images/manual/dashboard.png b/app/static/images/manual/dashboard.png new file mode 100644 index 0000000000000000000000000000000000000000..3034a81910de009e86442054f9818fa59e4552fa Binary files /dev/null and b/app/static/images/manual/dashboard.png differ diff --git a/app/static/images/manual/job.png b/app/static/images/manual/job.png new file mode 100644 index 0000000000000000000000000000000000000000..98aa64bdcbd49963e3ea1bac4a65b457308776ca Binary files /dev/null and b/app/static/images/manual/job.png differ diff --git a/app/static/images/manual/registration-and-log-in.png b/app/static/images/manual/registration-and-log-in.png new file mode 100644 index 0000000000000000000000000000000000000000..3ffc274b29482caa46e0dc694336c5787725cb3b Binary files /dev/null and b/app/static/images/manual/registration-and-log-in.png differ diff --git a/app/static/images/manual/services.png b/app/static/images/manual/services.png new file mode 100644 index 0000000000000000000000000000000000000000..047b4bf15d7359d0ab0179df609deb55d580be0a Binary files /dev/null and b/app/static/images/manual/services.png differ diff --git a/app/templates/_navbar.html.j2 b/app/templates/_navbar.html.j2 index 485960b0d3a2e9859bf574b0f8d8ff4a56d53bf5..63631a011ce7b9bf9bf44fe4869b644010b3262e 100644 --- a/app/templates/_navbar.html.j2 +++ b/app/templates/_navbar.html.j2 @@ -27,6 +27,7 @@ </div> <ul class="dropdown-content" id="nav-more-dropdown"> + <li><a href="{{ url_for('main.user_manual') }}"><i class="material-icons left">help</i>Manual</a></li> {% if current_user.is_authenticated %} <li><a href="{{ url_for('settings.index') }}"><i class="material-icons left">settings</i>Settings</a></li> <li class="divider" tabindex="-1"></li> diff --git a/app/templates/_sidenav.html.j2 b/app/templates/_sidenav.html.j2 index c246b6bde255b2a9e485fd8159c2eda6bba4aac8..08d8f163d28c7ef24e35f129eb6e48a482d24fd5 100644 --- a/app/templates/_sidenav.html.j2 +++ b/app/templates/_sidenav.html.j2 @@ -8,7 +8,7 @@ </li> <li><a href="{{ url_for('main.index') }}">nopaque</a></li> <li><a href="{{ url_for('main.news') }}"><i class="material-icons left">email</i>News</a></li> - <li><a href="#"><i class="material-icons">linear_scale</i>Workflow</a></li> + <li><a href="{{ url_for('main.user_manual') }}"><i class="material-icons">help</i>Manual</a></li> <li><a href="{{ url_for('main.dashboard') }}"><i class="material-icons">dashboard</i>Dashboard</a></li> <li><a href="{{ url_for('main.dashboard', _anchor='corpora') }}" style="padding-left: 47px;"><i class="nopaque-icons">I</i>My Corpora</a></li> <li><a href="{{ url_for('main.dashboard', _anchor='jobs') }}" style="padding-left: 47px;"><i class="nopaque-icons">J</i>My Jobs</a></li> diff --git a/app/templates/corpora/analyse_corpus.concordance.html.j2 b/app/templates/corpora/analyse_corpus.concordance.html.j2 index 4a9cc1396939f25340cd32358625757102967c62..3f5560e68d4344105b19325db36dfdad0878a33c 100644 --- a/app/templates/corpora/analyse_corpus.concordance.html.j2 +++ b/app/templates/corpora/analyse_corpus.concordance.html.j2 @@ -6,9 +6,12 @@ <div class="row"> <div class="input-field col s12 m9"> <i class="material-icons prefix">search</i> - <input class="validate corpus-analysis-action" id="concordance-extension-form-query" name="query" type="text" required pattern=".*\S+.*"></input> + <input class="validate corpus-analysis-action" id="concordance-extension-form-query" name="query" type="text" required pattern=".*\S+.*" placeholder="<ent_type="PERSON"> []* </ent_type> []* [simple_pos="VERB"] :: match.text_publishing_year="1991";"></input> <label for="concordance-extension-form-query">Query</label> <span class="error-color-text helper-text hide" id="concordance-extension-error"></span> + <a class="modal-trigger" href="#cql-tutorial-modal" style="margin-left: 40px;"><i class="material-icons" style="font-size: inherit;">help</i> Corpus Query Language tutorial</a> + <span> | </span> + <a class="modal-trigger" href="#tagsets-modal"><i class="material-icons" style="font-size: inherit;">info</i> Tagsets</a> </div> <div class="input-field col s12 m3"> <i class="material-icons prefix">arrow_forward</i> diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2 index eac035ad05bebf52252ce1156cd17e6b9b9818f2..3ba55da962b7211fa3ff8d1ecd08507b46ec9f79 100644 --- a/app/templates/corpora/analyse_corpus.html.j2 +++ b/app/templates/corpora/analyse_corpus.html.j2 @@ -53,6 +53,198 @@ <p class="error-color-text hide" id="corpus-analysis-app-init-error"></p> </div> </div> + +<div class="modal" id="cql-tutorial-modal"> + <div class="modal-content"> + {% with headline_num=4 %} + {% include "main/manual/_08_cqp_query_language.html.j2" %} + {% endwith %} + </div> +</div> + +<div class="modal" id="tagsets-modal"> + <div class="modal-content"> + <h4>Tagsets</h4> + <ul class="tabs"> + <li class="tab"><a class="active" href="#simple_pos-tagset">simple_pos</a></li> + <li class="tab"><a href="#english-ent_type-tagset">English ent_type</a></li> + <li class="tab"><a href="#english-pos-tagset">English pos</a></li> + <li class="tab"><a href="#german-ent_type-tagset">German ent_type</a></li> + <li class="tab"><a href="#german-pos-tagset">German pos</a></li> + </ul> + + <div id="simple_pos-tagset"> + <h5>simple_pos tagset</h5> + <ul> + <li>ADJ: adjective</li> + <li>ADP: adposition</li> + <li>ADV: adverb</li> + <li>AUX: auxiliary verb</li> + <li>CONJ: coordinating conjunction</li> + <li>DET: determiner</li> + <li>INTJ: interjection</li> + <li>NOUN: noun</li> + <li>NUM: numeral</li> + <li>PART: particle</li> + <li>PRON: pronoun</li> + <li>PROPN: proper noun</li> + <li>PUNCT: punctuation</li> + <li>SCONJ: subordinating conjunction</li> + <li>SYM: symbol</li> + <li>VERB: verb</li> + <li>X: other</li> + </ul> + </div> + + <div id="english-ent_type-tagset"> + <h5>English ent_type tagset</h5> + <ul> + <li>CARDINAL: Numerals that do not fall under another type</li> + <li>DATE: Absolute or relative dates or periods</li> + <li>EVENT: Named hurricanes, battles, wars, sports events, etc.</li> + <li>FAC: Buildings, airports, highways, bridges, etc.</li> + <li>GPE: Countries, cities, states</li> + <li>LANGUAGE: Any named language</li> + <li>LAW: Named documents made into laws.</li> + <li>LOC: Non-GPE locations, mountain ranges, bodies of water</li> + <li>MONEY: Monetary values, including unit</li> + <li>NORP: Nationalities or religious or political groups</li> + <li>ORDINAL: "first" "second" etc.</li> + <li>ORG: Companies, agencies, institutions, etc.</li> + <li>PERCENT: Percentage, including "%"</li> + <li>PERSON: People, including fictional</li> + <li>PRODUCT: Objects, vehicles, foods, etc. (not services)</li> + <li>QUANTITY: Measurements, as of weight or distance</li> + <li>TIME: Times smaller than a day</li> + <li>WORK_OF_ART: Titles of books, songs, etc.</li> + </ul> + </div> + + <div id="english-pos-tagset"> + <h5>English pos tagset</h5> + <ul> + <li>ADD: email</li> + <li>AFX: affix</li> + <li>CC: conjunction, coordinating</li> + <li>CD: cardinal number</li> + <li>DT: determiner</li> + <li>EX: existential there</li> + <li>FW: foreign word</li> + <li>HYPH: punctuation mark, hyphen</li> + <li>IN: conjunction, subordinating or preposition</li> + <li>JJ: adjective</li> + <li>JJR: adjective, comparative</li> + <li>JJS: adjective, superlative</li> + <li>LS: list item marker</li> + <li>MD: verb, modal auxiliary</li> + <li>NFP: superfluous punctuation</li> + <li>NN: noun, singular or mass</li> + <li>NNP: noun, proper singular</li> + <li>NNPS: noun, proper plural</li> + <li>NNS: noun, plural</li> + <li>PDT: predeterminer</li> + <li>POS: possessive ending</li> + <li>PRP: pronoun, personal</li> + <li>PRP$: pronoun, possessive RB: adverb</li> + <li>RBR: adverb, comparative</li> + <li>RBS: adverb, superlative</li> + <li>RP: adverb, particle</li> + <li>SYM: symbol</li> + <li>TO: infinitival "to"</li> + <li>UH: interjection</li> + <li>VB: verb, base form</li> + <li>VBD: verb, past tense</li> + <li>VBG: verb, gerund or present participle</li> + <li>VBN: verb, past participle</li> + <li>VBP: verb, non-3rd person singular present</li> + <li>VBZ: verb, 3rd person singular present</li> + <li>WDT: wh-determiner</li> + <li>WP: wh-pronoun, personal</li> + <li>WP$: wh-pronoun, possessive</li> + <li>WRB: wh-adverb</li> + <li>XX: unknown</li> + <li>``: opening quotation mark</li> + <li>$: symbol, currency</li> + <li>'': closing quotation mark</li> + <li>: punctuation mark, comma</li> + <li>-LRB-: left round bracket</li> + <li>-RRB-: right round bracket</li> + <li>.: punctuation mark, sentence closer</li> + <li>:: punctuation mark, colon or ellipsis</li> + </ul> + </div> + + <div id="german-ent_type-tagset"> + <h5>German ent_type tagset</h5> + <ul> + <li>LOC: Non-GPE locations, mountain ranges, bodies of water</li> + <li>MISC: Miscellaneous entities, e.g. events, nationalities, products or works of art</li> + <li>ORG: Companies, agencies, institutions, etc.</li> + <li>PER: Named person or family.</li> + </ul> + </div> + + <div id="german-pos-tagset"> + <h5>German pos tagset</h5> + <ul> + <li>ADJA: adjective, attributive</li> + <li>ADJD: adjective, adverbial or predicative</li> + <li>ADV: adverb</li> + <li>APPO: postposition</li> + <li>APPR: preposition; circumposition left</li> + <li>APPRART: preposition with article</li> + <li>APZR: circumposition right</li> + <li>ART: definite or indefinite article</li> + <li>CARD: cardinal number</li> + <li>FM: foreign language material</li> + <li>ITJ: interjection</li> + <li>KOKOM: comparative conjunction</li> + <li>KON: coordinate conjunction</li> + <li>KOUI: subordinate conjunction with \zu\ and infinitive</li> + <li>KOUS: subordinate conjunction with sentence</li> + <li>NE: proper noun</li> + <li>NN: noun, singular or mass</li> + <li>NNE: proper noun</li> + <li>PDAT: attributive demonstrative pronoun</li> + <li>PDS: substituting demonstrative pronoun</li> + <li>PIAT: attributive indefinite pronoun without determiner</li> + <li>PIS: substituting indefinite pronoun</li> + <li>PPER: non-reflexive personal pronoun</li> + <li>PPOSAT: attributive possessive pronoun</li> + <li>PPOSS: substituting possessive pronoun</li> + <li>PRELAT: attributive relative pronoun</li> + <li>PRELS: substituting relative pronoun</li> + <li>PRF: reflexive personal pronoun</li> + <li>PROAV: pronominal adverb</li> + <li>PTKA: particle with adjective or adverb</li> + <li>PTKANT: answer particle</li> + <li>PTKNEG: negative particle</li> + <li>PTKVZ: separable verbal particle</li> + <li>PTKZU: "zu" before infinitive</li> + <li>PWAT: attributive interrogative pronoun</li> + <li>PWAV: adverbial interrogative or relative pronoun</li> + <li>PWS: substituting interrogative pronoun</li> + <li>TRUNC: word remnant</li> + <li>VAFIN: finite verb, auxiliary</li> + <li>VAIMP: imperative, auxiliary</li> + <li>VAINF: infinitive, auxiliary</li> + <li>VAPP: perfect participle, auxiliary</li> + <li>VMFIN: finite verb, modal</li> + <li>VMINF: infinitive, modal</li> + <li>VMPP: perfect participle, modal</li> + <li>VVFIN: finite verb, full</li> + <li>VVIMP: imperative, full</li> + <li>VVINF: infinitive, full</li> + <li>VVIZU: infinitive with "zu" full</li> + <li>VVPP: perfect participle, full</li> + <li>XY: non-word containing non-letter</li> + <li>$(: other sentence-internal punctuation mark</li> + <li>$,: comma</li> + <li>$.: sentence-final punctuation mark</li> + </ul> + </div> + </div> +</div> {% endblock modals %} {% block scripts %} diff --git a/app/templates/main/manual/_01_introduction.html.j2 b/app/templates/main/manual/_01_introduction.html.j2 new file mode 100644 index 0000000000000000000000000000000000000000..4606492410d5e11a102472ed3c140278ae1c2b9c --- /dev/null +++ b/app/templates/main/manual/_01_introduction.html.j2 @@ -0,0 +1,9 @@ +<h2>Introduction</h2> +<p> + nopaque is a web-based digital working environment. It implements a + workflow based on the research process in the humanities and supports its + users in processing their data in order to subsequently apply digital + analysis methods to them. All processes are implemented in a specially + provided cloud environment with established open source software. This + always ensures that no personal data of the users is disclosed. +</p> diff --git a/app/templates/main/manual/_02_registration_and_log_in.html.j2 b/app/templates/main/manual/_02_registration_and_log_in.html.j2 new file mode 100644 index 0000000000000000000000000000000000000000..93d061f3a169112e198e1b40150db403061c7304 --- /dev/null +++ b/app/templates/main/manual/_02_registration_and_log_in.html.j2 @@ -0,0 +1,18 @@ +<h2>Registration and Log in</h2> +<div class="row"> + <div class="col s12 m4"> + <img alt="Registration and Log in" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/registration-and-log-in.png') }}"> + </div> + <div class="col s12 m8"> + <p> + Before you can start using the web platform, you need to create a user + account. This requires only a few details: just a user name, an e-mail + address and a password are needed. In order to register yourself, fill out + the form on the <a href="{{ url_for('auth.register') }}">registration page</a>. After successful registration, the + created account must be verified. To do this, follow the instructions + given in the automatically sent e-mail. Afterwards, you can log in as + usual with your username/email address and password in the log-in form + located next to the registration button. + </p> + </div> +</div> diff --git a/app/templates/main/manual/_03_dashboard.html.j2 b/app/templates/main/manual/_03_dashboard.html.j2 new file mode 100644 index 0000000000000000000000000000000000000000..abfd0ba4afc14e2eaf8bbe1dbc05ceb2904eba06 --- /dev/null +++ b/app/templates/main/manual/_03_dashboard.html.j2 @@ -0,0 +1,46 @@ +<h2>Dashboard</h2> +<div class="row"> + <div class="col s12 m4"> + <img alt="Dashboard" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/dashboard.png') }}"> + </div> + <div class="col s12 m8"> + <p> + The <a href="{{ url_for('main.dashboard') }}">dashboard</a> provides a central overview of all resources assigned to the + user. These are <a href="{{ url_for('main.dashboard', _anchor='corpora') }}">corpora</a> and created <a href="{{ url_for('main.dashboard', _anchor='jobs') }}">jobs</a>. Corpora are freely composable + annotated text collections and jobs are the initiated file processing + procedures. Both the job and the corpus listings can be searched using + the search field displayed above them. + </p> + </div> + <div class="col s12"> </div> + <div class="col s12 m6"> + <div class="card"> + <div class="card-content"> + <span class="card-title"><i class="nopaque-icons">I</i> Corpus</span> + <p> + A corpus is a collection of texts that can be analyzed using the + Corpus Analysis service. All texts must be in the verticalized text + file format, which can be obtained via the Natrual Language + Processing service. It contains, in addition to the actual text, + further annotations that are searchable in combination with optional + addable metadata during your analysis. + </p> + </div> + </div> + </div> + <div class="col s12 m6"> + <div class="card"> + <div class="card-content"> + <span class="card-title"><i class="nopaque-icons">J</i> Job</span> + <p> + A job is a construct that represents the execution of a service. + It stores input files, output files, processing status, and options + selected during creation. After submitting a job, you get redirected + to a job overview page. This can be accessed again via the job list + on the dashboard. Jobs will be deleted three months after creation, + so we encourage you to download the results after a job is completed. + </p> + </div> + </div> + </div> +</div> diff --git a/app/templates/main/manual/_06_services.html.j2 b/app/templates/main/manual/_06_services.html.j2 new file mode 100644 index 0000000000000000000000000000000000000000..701e6c802d3e8d37ab67ab07088f4e3557d12183 --- /dev/null +++ b/app/templates/main/manual/_06_services.html.j2 @@ -0,0 +1,52 @@ +<h2>Services</h2> +<div class="row"> + <div class="col s12 m4"> + <img alt="Services" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/services.png') }}"> + </div> + <div class="col s12 m8"> + <p> + nopaque was designed from the ground up to be modular. This modularity + means that the offered workflow provides variable entry and exit points, + so that different starting points and goals can be flexibly addressed. + Each of these modules are implemented in a self-contained service, each of + which represents a step in the workflow. The services are coordinated in + such a way that they can be used consecutively. The order can either be + taken from the listing of the services in the left sidebar or from the + roadmap (accessible via the pink compass in the upper right corner). All + services are versioned, so the data generated with nopaque is always + reproducible. + </p> + </div> +</div> + +<h3>File Setup</h3> +<p> + The <a href="{{ url_for('services.file_setup_pipeline') }}">File Setup Service</a> bundles image data, such as scans and photos, + together in a handy PDF file. To use this service, use the job form to + select the images to be bundled, choose the desired service version, and + specify a title and description. Please note that the service sorts the + images into the resulting PDF file based on the file names. So naming the + images correctly is of great importance. It has proven to be a good practice + to name the files according to the following scheme: + page-01.png, page-02.jpg, page-03.tiff, etc. In general, you can assume + that the images will be sorted in the order in which the file explorer of + your operating system lists them when you view the files in a folder + sorted in ascending order by file name. +</p> + +<h3>Optical Character Recognition (OCR)</h3> +<p>Comming soon...</p> + +<h3>Handwritten Text Recognition (HTR)</h3> +<p>Comming soon...</p> + +<h3>Natural Language Processing (NLP)</h3> +<p>Comming soon...</p> + +<h3>Corpus Analysis</h3> +<p> + With the corpus analysis service, it is possible to create a text corpus + and then explore it in an analysis session. The analysis session is realized + on the server side by the Open Corpus Workbench software, which enables + efficient and complex searches with the help of the CQP Query Language. +</p> diff --git a/app/templates/main/manual/_07_a_closer_look_at_the_corpus_analysis.html.j2 b/app/templates/main/manual/_07_a_closer_look_at_the_corpus_analysis.html.j2 new file mode 100644 index 0000000000000000000000000000000000000000..f5a1213ed6a6b5c94ce2cf79f7fb33a5f1fa1de1 --- /dev/null +++ b/app/templates/main/manual/_07_a_closer_look_at_the_corpus_analysis.html.j2 @@ -0,0 +1,47 @@ +<h2>A closer look at the Corpus Analysis</h2> +<h3>Create a corpus</h3> +<div class="row"> + <div class="col s12 m4"> + <img alt="Create a Corpus" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/create-a-corpus.png') }}"> + </div> + <div class="col s12 m8"> + <p> + To <a href="{{ url_for('corpora.add_corpus') }}">create a corpus</a>, you + can use the "New Corpus" button, which can be found on both, the Corpus + Analysis Service page and the Dashboard below the corpus list. Fill in the input + mask to Create a corpus. After you have completed the input mask, you will + be automatically taken to the corpus overview page (which can be called up + again via the corpus lists) of your new and accordingly still empty corpus. + </p> + </div> + <div class="col s12"> </div> + <div class="col s12 m4"> + <img alt="Create a Corpus" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/add-corpus-file.png') }}"> + </div> + <div class="col s12 m8"> + <p> + Now you can add texts in vrt format (results of the NLP service) to your new + corpus. To do this, use the "Add Corpus File" button and fill in the form + that appears. You will get the possibility to add metadata to each text. + After you have added all the desired texts to the corpus, the corpus must be + prepared for the analysis, this process can be initiated by clicking on the + "Build" button. On the corpus overview page you can always see information + about the current status of the corpus in the upper right corner. After the + build process the status should be "built". + </p> + </div> +</div> + +<h3>Analyze a corpus</h3> +<p> + After you have created and built a corpus, it can be analyzed. To do this, + use the button labeled Analyze. The corpus analysis currently offers two + modules, the Reader and the Concordance module. The reader module can be + used to read your tokenized corpus in different ways. You can select a token + representation option, it determines the property of a token to be shown. + You can for example read your text completly lemmatized. You can also change + the way of how a token is displayed, by using the text style switch. The + concordance module offers some more options regarding the context size of + search results. If the context does not provide enough information you can + hop into the reader module by using the lupe icon next to a match. +</p> diff --git a/app/templates/main/manual/_08_cqp_query_language.html.j2 b/app/templates/main/manual/_08_cqp_query_language.html.j2 new file mode 100644 index 0000000000000000000000000000000000000000..0af61dd6b27c37504fd085d14e134586e7d705d6 --- /dev/null +++ b/app/templates/main/manual/_08_cqp_query_language.html.j2 @@ -0,0 +1,161 @@ +<h2>CQP Query Language</h2> +<p>Within the Corpus Query Language, a distinction is made between two types of annotations: positional attributes and structural attributes. Positional attributes refer to a token, e.g. the word "book" is assigned the part-of-speech tag "NN", the lemma "book" and the simplified part-of-speech tag "NOUN" within the token structure. Structural attributes refer to text structure-giving elements such as sentence and entity markup. For example, the markup of a sentence is represented in the background as follows:</p> +<pre> + <code> + <span class="green-text"><s> structural attribute</span> + <span class="blue-text">word pos lemma simple_pos positional attribute</span> + <span class="green-text"><ent type="PERSON"> structural attribute</span> + <span class="blue-text">word pos lemma simple_pos positional attribute</span> + <span class="blue-text">word pos lemma simple_pos positional attribute</span> + <span class="green-text"></ent> structural attribute</span> + <span class="blue-text">word pos lemma simple_pos positional attribute</span> + <span class="green-text"></s> structural attribute</span> + </code> +</pre> + +<h3>Positional attributes</h3> +<p>Before you can start searching for positional attributes (also called tokens), it is necessary to know what properties they contain.</p> +<ol> + <li><span class="blue-text"><b>word</b></span>: The string as it is also found in the original text</li> + <li> + <span class="blue-text"><b>pos</b></span>: A code for the word type, also called POS tag + <ol> + <li><span class="red-text"><b>IMPORTANT</b></span>: POS tags are language-dependent to best reflect language-specific properties.</li> + <li>The codes (= tagsets) can be taken from the Corpus Analysis Concordance page.</li> + </ol> + </li> + <li><span class="blue-text"><b>lemma</b></span>: The lemmatized representation of the word</li> + <li> + <span class="blue-text"><b>simple_pos</b></span>: A simplified code for the word type that covers fewer categories than the <span class="blue-text"><b>pos</b></span> property, but is the same across languages. + <ol> + <li>The codes (= tagsets) can be taken from the Corpus Analysis Concordance page.</li> + </ol> + </li> +</ol> + +<h4>Searching for positional attributes</h4> +<div> + <p> + <b>Token with no condition on any property (also called <span class="blue-text">wildcard token</span>)</b><br> + </p> + <pre><code>[]; Each token matches this pattern</code></pre> +</div> +<div> + <p> + <b>Token with a condition on its <span class="blue-text">word</span> property</b> + </p> + <pre><code>[word="begin"]; “beginâ€</code></pre> + <pre><code>[word="begin" %c]; same as above but ignores case</code></pre> +</div> +<div> + <p> + <b>Token with a condition on its <span class="blue-text">lemma</span> property</b> + </p> + <pre><code>[lemma="begin"]; “beginâ€, “beganâ€, “beginningâ€, …</code></pre> + <pre><code>[lemma="begin" %c]; same as above but ignores case</code></pre> +</div> +<div> + <p> + <b>Token with a condition on its <span class="blue-text">simple_pos</span> property</b> + </p> + <pre><code>[simple_pos="VERB"]; “beginâ€, “beganâ€, “beginningâ€, …</code></pre> +</div> +<div> + <p> + <b>Token with a condition on its <span class="blue-text">pos</span> property</b> + </p> + <pre><code>[pos="VBG"]; “beginâ€, “beganâ€, “beginningâ€, …</code></pre> +</div> +<div> + <p> + <b>Look for words with a variable character (also called <span class="blue-text">wildcard character</span>)</b><br> + </p> + <pre style="margin-bottom: 0;"><code>[word="beg.n"]; “beginâ€, “beganâ€, “begunâ€</code></pre> + <pre style="margin-top: 0;" ><code> ^ the dot represents the wildcard character</code></pre> +</div> +<div> + <p><b>Token with two conditions on its properties, where both must be fulfilled (<span class="blue-text">AND</span> operation)</b></p> + <pre style="margin-bottom: 0;"><code>[lemma="be" & simple_pos="VERB"]; Lemma “be†and simple_pos is Verb</code></pre> + <pre style="margin-top: 0;" ><code> ^ the ampersand represents the and operation</code></pre> +</div> +<div> + <p><b>Token with two conditions on its properties, where at least one must be fulfilled (<span class="blue-text">OR</span> operation)</b></p> + <pre style="margin-bottom: 0;"><code>[simple_pos="VERB" | simple_pos="ADJ"]; simple_pos VERB or simple_pos ADJ (adjective)</code></pre> + <pre style="margin-top: 0;"><code> ^ the line represents the or operation</code></pre> +</div> +<div> + <p><b>Sequences</b></p> + <pre><code>[simple_pos="NOUN"] [simple_pos="VERB"]; NOUN -> VERB</code></pre> + <pre><code>[simple_pos="NOUN"] [] [simple_pos="VERB"]; NOUN -> wildcard token -> VERB</code></pre> +</div> +<div> + <p> + <b>Incidence modifiers</b><br> + Incidence Modifiers are special characters or patterns, that control how often a character/token that stands in front of it should occur. + </p> + <ol> + <li><span class="blue-text"><b>+</b></span>: <span class="blue-text">One or more</span> occurrences of the character/token before</li> + <li><span class="blue-text"><b>*</b></span>: <span class="blue-text">Zero or more occurrences</span> of the character/token before</li> + <li><span class="blue-text"><b>?</b></span>: <span class="blue-text">Zero or one occurrences</span> of the character/token before</li> + <li><span class="blue-text"><b>{n}</b></span>: <span class="blue-text">Exactly n occurrences</span> of the character/token before</li> + <li><span class="blue-text"><b>{n,m}</b></span>: <span class="blue-text">Between n and m occurrences</span> of the character/token before</li> + </ol> + <pre><code>[word="beg.+"]; “beggingâ€, “beginâ€, “beganâ€, “begunâ€, …</code></pre> + <pre><code>[word="beg.*"]; “begâ€, “beggingâ€, “beginâ€, “begunâ€, …</code></pre> + <pre><code>[word="beg?"]; “beâ€, “begâ€</code></pre> + <pre><code>[word="beg.{2}"]; “beginâ€, “begunâ€, …</code></pre> + <pre><code>[word="beg.{2,4}"]; “beggingâ€, “beginâ€, “begunâ€, …</code></pre> + <pre><code>[word="beg{2}.*"]; “beggedâ€, “beggarâ€, …</code></pre> + <pre><code>[simple_pos="NOUN"] []? [simple_pos="VERB"]; NOUN -> wildcard token (x0 or x1) -> VERB</code></pre> + <pre><code>[simple_pos="NOUN"] []* [simple_pos="VERB"]; NOUN -> wildcard token (x0 or x1) -> VERB</code></pre> +</div> +<div> + <p> + <b>Option groups</b><br> + Find character sequences from a list of options. + </p> + <pre style="margin-bottom: 0;"><code>[word="be(g|gin|gan|gun)"]; “begâ€, “beginâ€, “beganâ€, “begunâ€</code></pre> + <pre style="margin-top: 0;" ><code> ^ ^ the braces indicate the start and end of an option group</code></pre> +</div> + +<h3>Structural attributes</h3> +<p>nopaque provides several structural attributes for query. A distinction is made between attributes with and without value.</p> +<ol> + <li><span class="green-text"><b>s</b></span>: Annotates a sentence</li> + <li> + <span class="green-text"><b>ent</b></span>: Annotates an entity + <ol> + <li> + <span class="green-text"><b>*ent_type</b></span>: Annotates an entity and has as value a code that identifies the type of the entity. + <ol> + <li>The codes (= tagsets) can be taken from the Corpus Analysis Concordance page.</li> + </ol> + </li> + </ol> + </li> + <li> + <span class="green-text"><b>text</b></span>: Annotates a text + <ol> + <li>Note that all the following attributes have the data entered during the corpus creation as value.</li> + <li><span class="green-text"><b>*text_address</b></span></li> + <li><span class="green-text"><b>*text_author</b></span></li> + <li><span class="green-text"><b>*text_booktitle</b></span></li> + <li><span class="green-text"><b>*text_chapter</b></span></li> + <li><span class="green-text"><b>*text_editor</b></span></li> + <li><span class="green-text"><b>*text_institution</b></span></li> + <li><span class="green-text"><b>*text_journal</b></span></li> + <li><span class="green-text"><b>*text_pages</b></span></li> + <li><span class="green-text"><b>*text_publisher</b></span></li> + <li><span class="green-text"><b>*text_publishing_year</b></span></li> + <li><span class="green-text"><b>*text_school</b></span></li> + <li><span class="green-text"><b>*text_title</b></span></li> + </ol> + </li> +</ol> + +<h4>Searching for structural attributes</h4> +<pre><code><ent> [] </ent>; A one token long entity of any type</code></pre> +<pre><code><ent_type="PERSON"> [] </ent_type>; A one token long entity of type PERSON</code></pre> +<pre><code><ent_type="PERSON"> []* </ent_type>; Entity of any length of type PERSON</code></pre> +<pre style="margin-bottom: 0;"><code><ent_type="PERSON"> []* </ent_type> []* [simple_pos="VERB"] :: match.text_publishing_year="1991";</code></pre> +<pre style="margin-top: 0;"><code>Arbitrarily long entity of type PERSON -> Arbitrarily many tokens -> VERB but only within texts with publication year 1991</code></pre> diff --git a/app/templates/main/user_manual.html.j2 b/app/templates/main/user_manual.html.j2 new file mode 100644 index 0000000000000000000000000000000000000000..0da85809df2ca2487b55756d61b7a69138c8bc23 --- /dev/null +++ b/app/templates/main/user_manual.html.j2 @@ -0,0 +1,44 @@ +{% extends "base.html.j2" %} +{% from "main/_breadcrumbs.html.j2" import breadcrumbs with context %} + +{% block page_content %} +<div class="container"> + <div class="row"> + <div class="col s12"> + <h1 id="title">{{ title }}</h1> + </div> + + <div class="col s12 m10"> + <div class="section scrollspy" id="introduction"> + {% include "main/manual/_01_introduction.html.j2" %} + </div> + <div class="section scrollspy" id="registration-and-log-in"> + {% include "main/manual/_02_registration_and_log_in.html.j2" %} + </div> + <div class="section scrollspy" id="dashboard"> + {% include "main/manual/_03_dashboard.html.j2" %} + </div> + <div class="section scrollspy" id="services"> + {% include "main/manual/_06_services.html.j2" %} + </div> + <div class="section scrollspy" id="a-closer-look-at-the-corpus-analysis"> + {% include "main/manual/_07_a_closer_look_at_the_corpus_analysis.html.j2" %} + </div> + <div class="section scrollspy" id="cqp-query-language"> + {% include "main/manual/_08_cqp_query_language.html.j2" %} + </div> + </div> + + <div class="col m2 hide-on-small-only"> + <ul class="section table-of-contents" style="position: fixed !important;"> + <li><a href="#introduction">Introduction</a></li> + <li><a href="#registration-and-log-in">Registration and Log in</a></li> + <li><a href="#dashboard">Dashboard</a></li> + <li><a href="#services">Services</a></li> + <li><a href="#a-closer-look-at-the-corpus-analysis">A closer look at the Corpus Analysis</a></li> + <li><a href="#cqp-query-language">CQP Query Language</a></li> + </ul> + </div> + </div> +</div> +{% endblock page_content %}