Skip to content
Snippets Groups Projects
Commit 8771bf1c authored by Günes Minareci's avatar Günes Minareci
Browse files

playing in die grm eingefügt

parent 01fcc506
No related branches found
No related tags found
No related merge requests found
# Original Author: Gernot A. Fink
# Description: Rules for generating customized recognition parameters for
# (ESMERALDA) 'isr' from a pre-defined base model $(BASE).*.
# The lexicon used is extracted from a given grammar $(NAME).grm
# possibly augmented by user defined addons.
#NAME = biron
#BASE = vm
include MakeVars.models
#
# lexica to use
#
LEXBASE = $(NAME).base.lex # will be trancribed!
#LEXADDON = Noises.addon.lex # will not be transcribed!
# The SAMPA alphabet is used for phoneme symbols.
# To make own extensions to the list of transcriptions
# use additionals 'Addon-<data>.phono' files.
#PHONOS = Total.phono
# for generating sub-word units from transcriptions
#
PMOD = mm_pmod
PMOD_OPTS = -l1 -r1 -w
PHONES = phones
#
# for generating the tree lexicon
#
TREE = mm_tree
TREE_OPTS =
#
# for generating sub-word unit parameter data
#
SWU = mm_swu
SWU_OPTS = -l 0.000001
#
# for cleaning up
#
CLEAN = $(NAME).base.lex \
$(NAME).lex.new.pho $(NAME).lex.new $(NAME).lex.nonex \
$(NAME).word2swu.def $(NAME).lex.def $(NAME).swu.def
REMOVE = $(NAME).swu $(NAME).lex $(NAME).tree \
$(BASE).lex $(BASE).3p.lex
#
# what we need in the end
#
all: $(NAME).swu $(NAME).lex $(NAME).tree
#
# extract lexicon of trained words from base model definitions
#
$(BASE).lex: $(BASE).swu.def
@cat $^ \
| grep -v " ==" \
| grep -v "^[<\[]" \
| cut -d" " -f1 \
| sort -u \
>$@
@echo "lexicon of trained words extracted:" $@
#
# extract list of known triphones from base model definitions
#
$(BASE).3p.lex: $(BASE).swu.def
@cat $^ \
| grep " ==" \
| cut -d" " -f1 \
| sort -u \
>$@
@echo "list of know tri-phones extracted:" $@
#
# extract central base lexicon from grammar definition
#
$(LEXBASE): $(NAME).grm
@grm_tab -L $@.tmp $(NAME).grm >/dev/null
@grep -v "^#" $@.tmp >$@
@rm $@.tmp
@echo "base lexicon extracted from grammar:" $@
#
# create mapping lexikon
#
# $(NAME).lex.map: $(BASE).lex
# @cat $(BASE).lex > $@
# @cut -f1 $(PHONOS) >> $@
#
# create effective lexicon from collection of base lexica
#
$(NAME).lex: $(LEXBASE) $(LEXADDON)
@if cat $^ \
| sort -u \
| uniq -ic \
| grep -v '^[[:space:]]*1'; then \
echo "ERROR: The words above occur multiple times in the lexicon but with different cases."; \
echo "ERROR: This will result in problems with the grammar module."; \
exit 1; \
fi
@cat $^ \
| sort -u \
>$@
@echo "effective lexicon created:" $@
#
# determine words not present in base model's lexicon
#
$(NAME).lex.new: $(NAME).lex $(BASE).lex
@cat $(NAME).lex \
| sort \
| cut -f2 \
| comm -23 - $(BASE).lex \
>$@
@echo "list of new words generated:" $@
#
# find transcriptions for new words
#
$(NAME).lex.new.pho: $(PHONOS) $(NAME).lex.new
@./mapper.py $(PHONOS) < $(NAME).lex.new >$@
@echo "transcriptions listed in:" $@
#
# check for transcriptions required for new words
#
# NOTE: 'test' need to support the '-s' option to check
# whether a file exists and has non-zero size.
#
$(NAME).lex.nonex: $(NAME).lex.new.pho
@grep "^MISS:" $^ \
| cut -f2- \
>$@
@if [ -s $@ ]; then \
echo "ERROR: no transcriptions found for:"; \
cat $@; \
exit 1; \
else \
echo "no missing transcriptions found."; \
fi
#
# generate mapping of words to appropriate sub-word unit definitions
#
# NOTE: Only done when no non-transcribed words were found
# above (in '$(NAME).lex.nonex').
#
$(NAME).word2swu.def: $(NAME).lex.new.pho $(NAME).lex.nonex
@cut -f2- $(NAME).lex.new.pho \
| $(PMOD) $(PMOD_OPTS) $(PHONES) \
>$@
@echo "words mapped to sub-word unit definitions:" $@
#
# generate required additional sub-word unit definitions
#
# 1. map unseed triphones to monophoes directly
# 2. add word to triphone mappings
# 3. map unnormalized word definitions to normalized ones
# NOTE: Words identical in normalized and unnormalized form are ignored!
#
$(NAME).swu.def: $(NAME).word2swu.def $(NAME).lex \
$(BASE).lex $(BASE).3p.lex $(LEXBASE)
@cut -f2 $(NAME).word2swu.def \
| sed "s/ ;//" \
| tr " " "\012" \
| sort -u \
| comm -23 - $(BASE).3p.lex \
| sed "s#\(.*\)/\(.*\)/\(.*\)#\1/\2/\3 := /\2/ ;#" \
>$@
@cat $(NAME).word2swu.def >>$@
@echo "additional sub-word unit definitions created:" $@
#
# generate lexicon definition (including silence model)
#
$(NAME).lex.def: $(NAME).lex
@echo "TASK.$(NAME) %= {" >$@
@cat $(NAME).lex \
| sed 's/$$/ |/' \
>>$@
@echo "<sil> } ;" \
>>$@
@echo "lexicon definition created:" $@
#
# generate tree lexicon for 'isr' recognizer
#
$(NAME).tree: $(NAME).lex.def $(NAME).swu.def \
$(BASE).model $(BASE).swu.def
@cat $(NAME).lex.def \
| mm_tree $(BASE).model $(BASE).swu.def $(NAME).swu.def \
| sort -u \
>$@
@echo "tree lexicon created:" $@
#
# generate sub-word unit parameter data for 'isr' recognizer
#
$(NAME).swu: $(NAME).lex.def $(NAME).swu.def \
$(BASE).state $(BASE).model $(BASE).swu.def
@cat $(NAME).lex.def \
| $(SWU) $(SWU_OPTS) $(BASE).state $(BASE).model $(BASE).swu.def \
$(NAME).swu.def \
>$@
@echo "sub-word unit parameter data created:" $@
#
# rules for cleaning up
#
clean:
rm -f $(CLEAN)
remove: clean
rm -f $(REMOVE)
../../Makefile.models
\ No newline at end of file
......@@ -5,11 +5,13 @@ $$S: $confirm_yes
| $everythingAlright
| $showingBP
| $medicine
| $gettingMR;
| $gettingMR
| $startPlaying;
$everythingAlright: $confirm_no there is something with $bodyparts
| $confirm_yes everything is ok
| $confirm_yes i am fine;
| $confirm_yes i am fine
| $confirm_yes;
$showingBP: $confirm_yes here is $bodyparts
| $confirm_no;
......@@ -65,6 +67,11 @@ $weather: what is the weather
$joke: tobi tell me a joke
|tell me a joke;
$startPlaying: let us play a game
| let us play
| lets play a game
| lets play;
$confirm_yes: tobi please
| tobi yes
| tobi yes please
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment