Ingmar Steiner
2018-07-30 – 2018-08-10
Hello world
MaryXML
<?xml version="1.0" encoding="UTF-8"?>
<maryxml xmlns="http://mary.dfki.de/2002/MaryXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="0.5" xml:lang="en-US">
<p>
<s>
<phrase>
<t accent="L+H*" g2p_method="lexicon" ph="h @ - ' l @U" pos="UH">
Hello
<syllable ph="h @">
<ph p="h"/>
<ph p="@"/>
</syllable>
<syllable accent="L+H*" ph="l @U" stress="1">
<ph p="l"/>
<ph p="@U"/>
</syllable>
</t>
<t accent="!H*" g2p_method="lexicon" ph="' w r= l d" pos="NN">
world
<syllable accent="!H*" ph="w r= l d" stress="1">
<ph p="w"/>
<ph p="r="/>
<ph p="l"/>
<ph p="d"/>
</syllable>
</t>
<boundary breakindex="5" tone="L-L%"/>
</phrase>
</s>
</p>
</maryxml>
phone | pos_in_syl | accented | ph_cplace |
---|---|---|---|
h | 0 | 0 | g |
@ | 1 | 0 | 0 |
l | 0 | 1 | a |
@U | 1 | 1 | 0 |
w | 0 | 1 | l |
r= | 1 | 1 | 0 |
l | 2 | 1 | a |
d | 3 | 1 | a |
_ | 0 | 0 | 0 |
(small selection)
Target feature vectors used to generate/retrieve audio:
Compute features vectors from text
then assign them to provided data.
./gradlew legacyInit
./gradlew build
./gradlew run
data
dependencyUsing Praat
./gradlew praatPitchmarker
wav/*.wav
pm/*.pm
Using ch_track
from EST
./gradlew mcepExtractor
wav/*.wav
mcep/*.mcep
Predict phone sequence from text using MaryTTS
./gradlew generateAllophones
text/*.txt
prompt_allophones/*.xml
./gradlew legacyTranscriptionAligner
prompt_allophones/*.xml
, lab/*.lab
allophones/*.xml
Compute and assign feature vector to each unit using MaryTTS
./gradlew legacyPhoneUnitFeatureComputer legacyHalfPhoneUnitFeatureComputer
allophones/*.xml
, mary/features.txt
phonefeatures/*.pfeats
, halfphonefeatures/*.hpfeats
Compile “timeline” files for audio, utterances, and acoustic features
./gradlew legacyWaveTimelineMaker legacyBasenameTimelineMaker legacyMCepTimelineMaker
wav/*.wav
, pm/*.pm
, mcep/*.mcep
mary/timeline_waveforms.mry
, mary/timeline_basenames.mry
, mary/timeline_mcep.mry
These contain the actual data from the wav
and mcep
files, in pitch-synchronous “datagram” packets.
Phone-level and halfphone-level unit and features files
./gradlew legacyPhoneUnitfileWriter legacyHalfPhoneUnitfileWriter legacyPhoneFeatureFileWriter legacyHalfPhoneFeatureFileWriter
pm/*.pm
, phonelab/*.lab
, phonefeatures/*.pfeats
, halfphonelab/*.hplab
mary/phoneUnits.mry
, mary/halfphoneUnits.mry
, mary/phoneFeatures.mry
, mary/phoneUnitFeatureDefinition.txt
, mary/halfphoneFeatures.mry
, mary/halfphoneUnitFeatureDefinition.txt
Using wagon
from EST
./gradlew legacyDurationCARTTrainer legacyF0CARTTrainer
mary/phoneUnits.mry
, mary/phoneFeatures.mry
, mary/timeline_waveforms.mry
mary/dur.tree
, mary/f0.left.tree
, mary/f0.mid.tree
, mary/f0.right.tree
Ready for deployment in MaryTTS installation
./gradlew assemble
mary/cart.mry
, featureSequence.txt
, mary/dur.tree
, mary/f0.left.tree
, mary/f0.mid.tree
, mary/f0.right.tree
, mary/halfphoneFeatures_ac.mry
, mary/joinCostFeatures.mry
, mary/joinCostWeights.txt
, mary/halfphoneUnits.mry
, mary/timeline_basenames.mry
, mary/timeline_waveforms.mry
my_voice.zip
, my_voice-component.xml
build.gradle
task foo {
doLast {
println "Doing stuff."
}
}
task bar {
dependsOn foo
doLast {
println "Doing more stuff."
}
}
buildSrc/src/main/groovy/DoStuff.groovy
class DoStuff extends DefaultTask {
@InputFile
final RegularFileProperty inputFile = newInputFile()
@OutputFile
final RegularFileProperty outputFile = newOutputFile()
@TaskAction
void doStuff() {
// open output file for writing
outputFile.get().asFile.withWriter { writer ->
// iterate over lines in input file
inputFile.get().asFile.eachLine { line ->
// write line contents to output, twice
writer.println line * 2
}
}
}
}
src/foo.txt
foo
bar
baz
build.gradle
task foo(type: DoStuff) {
inputFile = file('src/foo.txt')
outputFile = layout.buildDirectory.file('bar.txt')
}
task bar(type: DoStuff) {
inputFile = foo.outputFile
outputFile = layout.buildDirectory.file('baz.txt')
}
buildSrc/src/main/groovy/DoPlugin.groovy
class DoPlugin implements Plugin<Project> {
@Override
void apply(Project project) {
project.task('foo', type: DoStuff) {
outputFile = project.layout.buildDirectory.file('bar.txt')
}
project.task('bar', type: DoStuff) {
inputFile = project.foo.outputFile
outputFile = project.layout.buildDirectory.file('baz.txt')
}
}
}
build.gradle
apply plugin: DoPlugin
foo.inputFile = file('src/foo.txt')
build.gradle
repositories {
ivy {
url 'https://catalog.ldc.upenn.edu/docs'
layout 'pattern', {
artifact 'LDC93S1/[module].[ext]'
}
}
}
configurations {
data
}
dependencies {
data group: 'edu.upenn.ldc.timit', name: 'PROMPTS', version: '1988-10-31', ext: 'TXT'
}
tasks.register 'processPrompts', ProcessPrompts, {
srcFiles = files(configurations.data)
destDir = layout.buildDirectory.dir('text')
}
buildSrc/src/main/groovy/ProcessPrompts.groovy
import org.gradle.api.DefaultTask
import org.gradle.api.file.*
import org.gradle.api.tasks.*
class ProcessPrompts extends DefaultTask {
@InputFiles
FileCollection srcFiles = project.files()
@OutputDirectory
final DirectoryProperty destDir = newOutputDirectory()
@TaskAction
void process() {
project.copy {
from srcFiles
into destDir.get().asFile
eachFile { fileDetails ->
fileDetails.file.eachLine { line ->
if (line.startsWith(';'))
return
(line =~ /(.+) \((.+)\)/).each { all, prompt, code ->
destDir.file("${code}.txt").get().asFile.withWriter { writer ->
writer.println prompt
}
}
}
fileDetails.exclude()
}
}
}
}
Each group will need:
Phonetically balanced, e.g.,
See https://github.com/psibre/timit-prompts
Upgrade to use Gradle Pandoc reveal.js plugin
Resolve TIMIT text prompts as data dependency
Presentation laptop with HDMI output
Integrate Gradle FLAML Plugin
Forced alignment with one of
Integrate Gradle MaryTTS Kaldi MFA plugin
Don’t forget to analyze and check for errors!
Use Git.
But don’t store big binary files (such as audio) in Git!
Use solutions such as