Skip to content
Snippets Groups Projects
Commit 3d2c401d authored by Leon Ziegler's avatar Leon Ziegler
Browse files

initial import

parents
No related branches found
No related tags found
No related merge requests found
Showing
with 929 additions and 0 deletions
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>SpeechRec</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
</buildSpec>
<natures>
</natures>
</projectDescription>
# Original Author: Gernot A. Fink
# Description: Rules for generating customized recognition parameters for
# (ESMERALDA) 'isr' from a pre-defined base model $(BASE).*.
# The lexicon used is extracted from a given grammar $(NAME).grm
# possibly augmented by user defined addons.
#NAME = biron
#BASE = vm
include MakeVars.models
#
# lexica to use
#
LEXBASE = $(NAME).base.lex # will be trancribed!
#LEXADDON = Noises.addon.lex # will not be transcribed!
# The SAMPA alphabet is used for phoneme symbols.
# To make own extensions to the list of transcriptions
# use additionals 'Addon-<data>.phono' files.
#PHONOS = Total.phono
# for generating sub-word units from transcriptions
#
PMOD = mm_pmod
PMOD_OPTS = -l1 -r1 -w
PHONES = phones
#
# for generating the tree lexicon
#
TREE = mm_tree
TREE_OPTS =
#
# for generating sub-word unit parameter data
#
SWU = mm_swu
SWU_OPTS = -l 0.000001
#
# for cleaning up
#
CLEAN = $(NAME).base.lex \
$(NAME).lex.new.pho $(NAME).lex.new $(NAME).lex.nonex \
$(NAME).word2swu.def $(NAME).lex.def $(NAME).swu.def
REMOVE = $(NAME).swu $(NAME).lex $(NAME).tree \
$(BASE).lex $(BASE).3p.lex
#
# what we need in the end
#
all: $(NAME).swu $(NAME).lex $(NAME).tree
#
# extract lexicon of trained words from base model definitions
#
$(BASE).lex: $(BASE).swu.def
@cat $^ \
| grep -v " ==" \
| grep -v "^[<\[]" \
| cut -d" " -f1 \
| sort -u \
>$@
@echo "lexicon of trained words extracted:" $@
#
# extract list of known triphones from base model definitions
#
$(BASE).3p.lex: $(BASE).swu.def
@cat $^ \
| grep " ==" \
| cut -d" " -f1 \
| sort -u \
>$@
@echo "list of know tri-phones extracted:" $@
#
# extract central base lexicon from grammar definition
#
$(LEXBASE): $(NAME).grm
@grm_tab -L $@.tmp $(NAME).grm >/dev/null
@grep -v "^#" $@.tmp >$@
@rm $@.tmp
@echo "base lexicon extracted from grammar:" $@
#
# create mapping lexikon
#
# $(NAME).lex.map: $(BASE).lex
# @cat $(BASE).lex > $@
# @cut -f1 $(PHONOS) >> $@
#
# create effective lexicon from collection of base lexica
#
$(NAME).lex: $(LEXBASE) $(LEXADDON)
@if cat $^ \
| sort -u \
| uniq -ic \
| grep -v '^[[:space:]]*1'; then \
echo "ERROR: The words above occur multiple times in the lexicon but with different cases."; \
echo "ERROR: This will result in problems with the grammar module."; \
exit 1; \
fi
@cat $^ \
| sort -u \
>$@
@echo "effective lexicon created:" $@
#
# determine words not present in base model's lexicon
#
$(NAME).lex.new: $(NAME).lex $(BASE).lex
@cat $(NAME).lex \
| sort \
| cut -f2 \
| comm -23 - $(BASE).lex \
>$@
@echo "list of new words generated:" $@
#
# find transcriptions for new words
#
$(NAME).lex.new.pho: $(PHONOS) $(NAME).lex.new
@./mapper.py $(PHONOS) < $(NAME).lex.new >$@
@echo "transcriptions listed in:" $@
#
# check for transcriptions required for new words
#
# NOTE: 'test' need to support the '-s' option to check
# whether a file exists and has non-zero size.
#
$(NAME).lex.nonex: $(NAME).lex.new.pho
@grep "^MISS:" $^ \
| cut -f2- \
>$@
@if [ -s $@ ]; then \
echo "ERROR: no transcriptions found for:"; \
cat $@; \
exit 1; \
else \
echo "no missing transcriptions found."; \
fi
#
# generate mapping of words to appropriate sub-word unit definitions
#
# NOTE: Only done when no non-transcribed words were found
# above (in '$(NAME).lex.nonex').
#
$(NAME).word2swu.def: $(NAME).lex.new.pho $(NAME).lex.nonex
@cut -f2- $(NAME).lex.new.pho \
| $(PMOD) $(PMOD_OPTS) $(PHONES) \
>$@
@echo "words mapped to sub-word unit definitions:" $@
#
# generate required additional sub-word unit definitions
#
# 1. map unseed triphones to monophoes directly
# 2. add word to triphone mappings
# 3. map unnormalized word definitions to normalized ones
# NOTE: Words identical in normalized and unnormalized form are ignored!
#
$(NAME).swu.def: $(NAME).word2swu.def $(NAME).lex \
$(BASE).lex $(BASE).3p.lex $(LEXBASE)
@cut -f2 $(NAME).word2swu.def \
| sed "s/ ;//" \
| tr " " "\012" \
| sort -u \
| comm -23 - $(BASE).3p.lex \
| sed "s#\(.*\)/\(.*\)/\(.*\)#\1/\2/\3 := /\2/ ;#" \
>$@
@cat $(NAME).word2swu.def >>$@
@echo "additional sub-word unit definitions created:" $@
#
# generate lexicon definition (including silence model)
#
$(NAME).lex.def: $(NAME).lex
@echo "TASK.$(NAME) %= {" >$@
@cat $(NAME).lex \
| sed 's/$$/ |/' \
>>$@
@echo "<sil> } ;" \
>>$@
@echo "lexicon definition created:" $@
#
# generate tree lexicon for 'isr' recognizer
#
$(NAME).tree: $(NAME).lex.def $(NAME).swu.def \
$(BASE).model $(BASE).swu.def
@cat $(NAME).lex.def \
| mm_tree $(BASE).model $(BASE).swu.def $(NAME).swu.def \
| sort -u \
>$@
@echo "tree lexicon created:" $@
#
# generate sub-word unit parameter data for 'isr' recognizer
#
$(NAME).swu: $(NAME).lex.def $(NAME).swu.def \
$(BASE).state $(BASE).model $(BASE).swu.def
@cat $(NAME).lex.def \
| $(SWU) $(SWU_OPTS) $(BASE).state $(BASE).model $(BASE).swu.def \
$(NAME).swu.def \
>$@
@echo "sub-word unit parameter data created:" $@
#
# rules for cleaning up
#
clean:
rm -f $(CLEAN)
remove: clean
rm -f $(REMOVE)
SpeechRec contains models required by the isr
Currently they are derived from acoustic models trained with the Verbmodbil (de)
and the Wall Street Journal Corpus (en)
Structure:
./de -- everything based upon german models
./en -- everything based upon english models
* Each subdirectory within these contains exactly one set of files required to
build models for the isr.
* build.sh documents the call to build/rebuild each model
* MakeVars.models contains set specific settings (files with phonetic
transcription, grammar name, base model, ...)
***** Howto create a new set *****
1. Use copyset.sh to copy one of the existing sets to a new one
!!! Do not use cp without additional options !!!
!!! Symlinks need to be preserved !!!
2. Delete all unnecessary stuff from the new set which has not already been
removed
3. Modify MakeVars.models and Makefile accordingly
#!/bin/bash
#HOME=/vol/speech/gernot/nl-isr/
#HOME=/vol/esmeralda/share/nl-isr
#
# Default-Parameter
#
verbose=;
nlines=500;
#
# Optionsbehandlung
#
while [ $# -gt 0 ];
do
case $1 in
-h) echo `basename $0`":"
cat <<EOF
From a given <lexicon> (e.g. 'test.lex') the generation of
data files is started in order to provide a set op parameters
that make it possible to use the 'isr' recognizer in a sort of
pseudo NL-mode:
1. 'test.swu' will hold the necessary sub-word unit definitions.
2. 'test.tree' will define the lecical prefix tree for the recognizer.
3. 'test.nl-input'
The 'isr' recognizer can then - in principle - be started in NL mode
by using the following command;
isr [options] test.swu test.lex test.tree <test.nl-input
EOF
exit 1;;
-v) verbose=1;;
-n) nlines=$2; shift;;
#-s) sync_delay=$2; shift;;
#-*) echo "$0: unknown option $1";
# exit 1;;
*) break;;
esac
shift
done
if [ -z "$*" ]; then
echo "Usage is:"
echo " "`basename $0`" [-h] [-v] [-n <num>] <lexicon>"
exit 1
fi
lexicon=$1
#
# extracting parameters' base name
#
name=`basename $lexicon .lex`
if [ ! -f $lexicon ]; then
echo "can't open lexicon file '"$lexicon"!"
exit 1;
fi
#
# create sub-word units
#
command=$(type -p makeswu)
dest=$name.swu
if [ ! -f $command ]; then
echo `basename $0`": can't execute command "$command"!"
exit 1;
fi
if [ $verbose ]; then
echo -n "creating sub-word units $dest ... "
fi
$command >$dest
if [ $verbose ]; then
echo "done."
fi
#
# create prefix tree
#
command=$(type -p maketree)
dest=$name.tree
if [ ! -f $command ]; then
echo `basename $0`": can't execute command '"$command"!"
exit 1;
fi
if [ $verbose ]; then
echo -n "creating prefix tree $dest ... "
fi
cat $lexicon \
| $command \
| sort -u \
>$dest
if [ $verbose ]; then
echo "done."
fi
#
# create input driving file for NL recognition
#
command=$(type -p txt2svq)
dest=$name.nl-input
if [ ! -f $command ]; then
echo `basename $0`": can't locate command '"$command"!"
exit 1;
fi
if [ $verbose ]; then
echo -n "creating input file $dest ... "
fi
while [ $nlines -gt 0 ];
do
echo "| $command </dev/tty"
nlines=`expr $nlines - 1`
done > "$dest"
if [ $verbose ]; then
echo "done."
fi
if [ $# -ne 2 ]; then
echo "Usage: $0 <sourcedir> <targetdir>"
exit 1
fi
cp -dR "$1" "$2"
cd "$2" && chmod g+sw . && rm -rf .svn
make -f Makefile.models remove
0 4 4.75
2.66537 0.460283 0.722534 0.881632 0.771319 0.647087 0.547872 0.471176 0.691693 0.834649 0.694927 0.725007 0.7158 0.72206 0.759329 0.839217 1.01634 0.996053 0.997086 1.02442 0.990176 1.00615 1.17781 1.25187 1.37796 1.54784 1.39192 1.39529 1.43478 1.31847 1.21826 1.76494 1.13124 0.951468
0 0.35
0.1 0.6
0.2 0.95
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.25 1
0.2 1
0.3 1
0.3 0.95
0.2 1
0.25 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.25 1
0.2 1
0.2 1
0.25 1
0.25 1
0.25 1
0.2 1
0.25 1
0.25 1
0.25 1
0.2 1
0.2 1
0.2 1
0.2 1
0.25 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.25 1
0.25 1
0.3 1
0.25 0.95
0.2 1
0.25 1
0.2 1
0.25 1
0.3 1
0.15 0.95
0.25 1
0.25 1
0.25 1
0.25 1
0.2 1
0.25 1
0.25 1
0.25 1
0.2 1
0.2 1
0.3 1
0.2 1
0.2 1
0.25 1
0.2 1
0.25 1
0.2 1
0.2 1
0.2 1
0.25 1
0.25 1
0.25 1
0.25 1
0.2 1
0.2 1
0.2 1
0.25 1
0.25 1
0.25 1
0.3 1
0.25 1
0.2 1
0.2 1
0.25 1
0.25 1
0.2 1
0.2 1
0.25 1
0.2 1
0.2 1
0.25 1
0.2 1
0.25 1
0.2 1
0.25 1
0.2 1
0.25 1
0.2 1
0.2 1
0.25 1
0.2 0.95
0.25 1
0.2 1
0.15 1
0.15 1
0.2 1
0.25 1
0.25 1
0.25 1
0.2 1
0.25 1
0.2 1
0.2 1
0.25 1
0.15 1
0.15 1
0.25 1
0.2 1
0.2 1
0.3 1
0.2 1
0.25 1
0.2 1
0.2 1
0.25 1
0.25 1
0.2 1
0.2 1
0.2 1
0.25 1
0.2 1
0.2 1
0.25 1
0.2 1
0.25 1
0.25 1
0.2 1
0.25 1
0.2 1
0.2 1
0.25 1
0.2 1
0.2 1
0.25 1
0.25 1
0.25 1
0.2 1
0.2 1
0.25 1
0.2 1
0.25 1
0.2 1
0.25 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.15 1
0.2 1
0.15 1
0.2 1
0.2 1
0.2 1
0.25 1
0.2 1
0.2 1
0.2 1
0.15 1
0.2 1
0.15 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.2 1
0.25 1
0.3 1
0.3 1
0.55 0.55
0.85 0.85
0.7 0.7
0.35 1
0.25 1
0.25 1
0.9 0.9
0.2 1
0.15 1
0.25 1
0.3 1
0.2 1
0.2 1
0.2 1
0.2 0.95
0.15 0.95
0.15 0.95
0.15 0.95
0.15 0.95
0.15 0.95
0.15 0.95
0.15 0.9
0.1 0.9
0.15 0.9
0.15 0.9
0.15 0.95
0.15 0.95
0.15 0.9
0.15 0.95
0.15 0.9
0.15 0.9
0.15 0.9
0.15 0.95
0.15 0.9
0.15 0.95
0.15 0.9
0.15 0.9
0.15 0.9
0.1 0.9
0.1 0.9
0.15 0.9
0.1 0.85
0.1 0.85
0.1 0.8
0.1 0.85
0.1 0.85
0.1 0.85
0.1 0.8
0.1 0.8
0.1 0.8
0.1 0.75
0.1 0.7
0.05 0.65
0.1 0.65
0.1 0.65
0.05 0.65
0.05 0.6
0.05 0.5
0.05 0.45
0.05 0.45
0.05 0.4
0.05 0.35
0.05 0.35
0.05 0.3
0.05 0.3
0.05 0.25
0 0.25
0 0.25
0 0.2
0 0.25
/vol/esmeralda/share/models/wsj1/wsj1.cl
\ No newline at end of file
$$S: $Greeting
| $Answer
| $GoodBye
| $Reset ;
$Greeting: $RobotName $GreetingWord
| $GreetingWord $RobotName
| $GreetingWord ;
$GreetingWord: hello
| hi
| hey ;
$Answer: $RobotName $AnswerContent
| $AnswerContent ;
$AnswerContent: $ok
| $not_ok
| $PaintingPosition
| about $PaintingPosition
| tell me something about $PaintingPosition ;
$PaintingPosition: $Patterns
| $GreatBritain
| $Japan ;
$Patterns: the bionic elephant trunk
| the bionic handling assistant
| the bionic stuff
| the right one
| to your right
| the one on your right ;
$GreatBritain: yourself
| your self
| humanoid robots
| behind you
| the one behind you ;
$Japan: citec
| the citec exhibits
| the exhibits
| the left ones
| the left side
| the ones on the left
| the stuff on the left side ;
$GoodBye: good bye $RobotName
| good bye ;
$Reset: $RobotName please restart
| $RobotName please reset
| $RobotName please start over ;
$ok: $ok_prefix
| $ok_prefix $ok_modifier ;
$ok_prefix: yes
| yeah
| ja
| yep
| of course ;
$ok_modifier: i do
| i want to
| i want that
| i am sure
| we do
| we want to
| we want that
| we are sure
| you should
| please ;
$not_ok: $not_ok_prefix
| $not_ok_prefix $not_ok_modifier ;
$not_ok_prefix: no
| nope ;
$not_ok_modifier: thanks
| thank you
| i don't
| i do not
| we don't
| we do not
| you shouldn't
| you should not ;
$RobotName: nao
| robot
| piper ;
HELLO h3l'oU
BED_ROOM b'3dr,um
BRITTA br'ItA
CAMPARI kAmp'Ari
CD-PLAYER s'id'ipl'eI3r
CELLPHONE s'3lf'oUn
COUCH_TABLE k'aUtS't'eIb@l
CYLINDRIC s@l'IndrIk
DINNER_TABLE d'In3r't'eIb@l
DINETTE daIn'3t
DINING-ROOM d'aInINr,um
DINING_ROOM d'aInINr,um
DISCMAN d'Iskm'En
EGG_SHAPED '3gS'eIpt
ELLIPTIC Il'IptIk
GERNOT g'ErnAt
GUIDE g'aId
HOLE_PUNCHER h'oUlp'VntS3r
IOANNIS j'EnIs
KITCHEN_TABLE k'ItS@n't'eIb@l
KITCHEN k'ItS@n
LEMON_JUICE '3m@ndZ'us
LINUX_BOOK l'In@ksb,Uk
LIVING_ROOM l'IvINr,um
NINETH n'aInT
PARTY p'Arti
PROGRAMMING_BOOK pr'oUgr,EmINb,Uk
PUNCHER p'VntS3r
REBOOT r'ibut
REMOTE_CONTROL rim'oUtk@ntr'oUl
ROBBI r'Abi
RUBIKS_CUBE r'ubikskj'ub
SETTEE s,3tt'i
SHUYIN S'uIn
SIDEBOARD s'aId'b'Ord
SOMETHING_TO_DRINK s'VmTINt'udr'INk
SOMETHING_TO_SMOKE s'VmTINt'usm'oUk
TAPEDECK t'eIpd'3k
THANK-YOU T'ENkj'u
THAT'S-IT D'Ets'It
TOY_COW t'Oik'aU
TV-SET tiv'is'3t
TOBI t'oUbi
TOILET t'Oil@t
ANNIKA AnI'kA
XLEON_ l'E'On
LEON_ eIAn
XSVEN_ svEn
SVEN_ sb3n
XMATTHIAS_ mVt'II'As
MATTHIAS_ nViti3s
TORBEN tObE'n
XJONATHAN_ j'On'Vt'A'n
JONATHAN_ joUnEtAn
JAN_ jEn
XJAN_ j'An
FREDERIC_ fr'@d'Er'Ik
ANDREAS_ V'nd'rEAs
LUCAS_ lU'kA's
XMARCO_ mA'kOO'
MARCO_ mA'koU
DENIS_ dEnI's
FLORIAN_ flO'rI'An
XRAPHAEL_ r'Vf'V'EE3l
SIDE_TABLE s,aId't'eIb@l
YOGHURT j'O'gh'rt
MAGGI mE'gI
THE_PERSON D@'p'3rs@n
XNOODLES mwUdst
NOODLES moUdls
CITEC sAIteIk
DRESSING_ROOM dr'3sINr,um
WASHING_MACHINE w'ASIN'm@S'in
TV tiv'i
DINING_TABLE d'aInIN't'eIb@l
CORNFLAKES k'Ornfl'eIks
PRINGLES pr'INg@ls
KITCHENARY k'ItS@n3ri
TOILET twaID@T
DRAWER D@rO
BACK_DOOR b'Ekd,Or
FRONT_DOOR fr'Vnt'd,Or
NAO n'aU
HUMANOID hj'um@n,Oid
BIONIC b,aI'AnIk
# MakeVars for biron german
# grammar name
NAME=HumavipsY2Demo
# acoustic base model
BASE=wsj1
# will not be transcribed
LEXADDON=Noises.addon.lex
# phonetic transcriptions
PHONOS=HumavipsY2Demo.phono Total.phono
# language for installation target
LANGUAGE=english
include MakeVars.models
prefix?=/vol/humavips/releases/y2demo
module?=$(LANGUAGE)/$(NAME)
GRM_FILE=$(NAME).grm
FILES_TO_INSTALL= \
$(NAME).ch \
$(NAME).cl \
$(NAME).lex \
$(NAME).swu \
$(NAME).tree
NLISR_FILES_TO_INSTALL= \
$(NAME).lex \
$(NAME).grm \
nlisr/$(NAME).nl-input \
nlisr/$(NAME).swu \
nlisr/$(NAME).tree \
nlisr/nl-isr.sh
all: isr nlisr
isr: $(NAME).tree
nlisr: nlisr/$(NAME).tree
$(NAME).tree: $(GRM_FILE)
make -f Makefile.models remove
make -f Makefile.models
nlisr/$(NAME).tree: $(GRM_FILE)
(cd nlisr && ./SetupNL ../$(NAME).lex)
install: install-isr install-nlisr
install-isr: isr
-install -d --mode 2775 $(prefix)/share/SpeechRec/$(module)
cp -Lvf $(FILES_TO_INSTALL) $(NAME).grm $(prefix)/share/SpeechRec/$(module)
install-nlisr: nlisr
-install -d --mode 2775 $(prefix)/share/SpeechRec/$(module)/nlisr
cp -Lvf $(NLISR_FILES_TO_INSTALL) $(prefix)/share/SpeechRec/$(module)/nlisr
../../Makefile.models
\ No newline at end of file
/vol/esmeralda/share/models/wsj1/Total.phono
\ No newline at end of file
../../mapper.py
\ No newline at end of file
../HumavipsY2Demo.grm
\ No newline at end of file
../../../SetupNL
\ No newline at end of file
#!/bin/bash
#
CFGDIR=$(dirname $0)
name=HumavipsY2Demo
#ISRPATH=/vol/robocup/trunk/bin/
OPTS="-l4 -o xcf:isr -m xcf:ShortTerm"
SWU="$CFGDIR/$name.swu"
LEX="$CFGDIR/$name.lex"
TREE="$CFGDIR/$name.tree"
GRM_OPTS="-p ALL"
GRM="-g $CFGDIR/$name.grm"
SR="-"
export POSIXLY_CORRECT=1
command="isr $OPTS $SWU $LEX $TREE $GRM_OPTS $GRM $LM_OPTS $LM $CL $AA $FE_OPTS $FE $SR_OPTS $SR < $CFGDIR/$name.nl-input"
#echo $command
isr_ctrl -rwt isr &
eval $command
/vol/esmeralda/share/models/wsj1/phones
\ No newline at end of file
/vol/esmeralda/share/models/wsj1/wsj1.model
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment