diff options
Diffstat (limited to 'navit/support/espeak/synthesize.c')
-rw-r--r--[-rwxr-xr-x] | navit/support/espeak/synthesize.c | 818 |
1 files changed, 584 insertions, 234 deletions
diff --git a/navit/support/espeak/synthesize.c b/navit/support/espeak/synthesize.c index 3a48b1d72..d28634bfc 100755..100644 --- a/navit/support/espeak/synthesize.c +++ b/navit/support/espeak/synthesize.c @@ -1,5 +1,5 @@ /*************************************************************************** - * Copyright (C) 2005 to 2007 by Jonathan Duddington * + * Copyright (C) 2005 to 2014 by Jonathan Duddington * * email: jonsd@users.sourceforge.net * * * * This program is free software; you can redistribute it and/or modify * @@ -40,7 +40,7 @@ static void SmoothSpect(void); // list of phonemes in a clause int n_phoneme_list=0; -PHONEME_LIST phoneme_list[N_PHONEME_LIST]; +PHONEME_LIST phoneme_list[N_PHONEME_LIST+1]; int mbrola_delay; char mbrola_name[20]; @@ -54,6 +54,7 @@ static int last_wcmdq; static int pitch_length; static int amp_length; static int modn_flags; +static int fmt_amplitude=0; static int syllable_start; static int syllable_end; @@ -69,7 +70,7 @@ SOUND_ICON soundicon_tab[N_SOUNDICON_TAB]; #define VOWEL_FRONT_LENGTH 50 -#define long(x) ((long)(x)) + // a dummy phoneme_list entry which looks like a pause static PHONEME_LIST next_pause; @@ -81,7 +82,7 @@ const char *WordToString(unsigned int word) int ix; static char buf[5]; - for(ix=0; ix<3; ix++) + for(ix=0; ix<4; ix++) buf[ix] = word >> (ix*8); buf[4] = 0; return(buf); @@ -135,13 +136,13 @@ static void EndPitch(int voice_break) syllable_centre = -1; memset(vowel_transition,0,sizeof(vowel_transition)); } -} // end of Synthesize::EndPitch +} // end of EndPitch static void DoAmplitude(int amp, unsigned char *amp_env) {//===================================================== - long *q; + long64 *q; last_amp_cmd = wcmdq_tail; amp_length = 0; // total length of vowel with this amplitude envelope @@ -149,24 +150,24 @@ static void DoAmplitude(int amp, unsigned char *amp_env) q = wcmdq[wcmdq_tail]; q[0] = WCMD_AMPLITUDE; q[1] = 0; // fill in later from amp_length - q[2] = (long)amp_env; + q[2] = (long64)amp_env; q[3] = amp; WcmdqInc(); -} // end of Synthesize::DoAmplitude +} // end of DoAmplitude static void DoPitch(unsigned char *env, int pitch1, int pitch2) {//============================================================ - long *q; + long64 *q; EndPitch(0); - if(pitch1 == 1024) + if(pitch1 == 255) { // pitch was not set - pitch1 = 24; - pitch2 = 33; + pitch1 = 55; + pitch2 = 76; env = envelope_data[PITCHfall]; } last_pitch_cmd = wcmdq_tail; @@ -178,141 +179,272 @@ static void DoPitch(unsigned char *env, int pitch1, int pitch2) q = wcmdq[wcmdq_tail]; q[0] = WCMD_PITCH; q[1] = 0; // length, fill in later from pitch_length - q[2] = (long)env; + q[2] = (long64)env; q[3] = (pitch1 << 16) + pitch2; WcmdqInc(); -} // end of Synthesize::DoPitch +} // end of DoPitch int PauseLength(int pause, int control) {//==================================== - int len; + unsigned int len; if(control == 0) - len = (pause * speed.speed_factor1)/256; + { + if(pause >= 200) + len = (pause * speed.clause_pause_factor)/256; + else + len = (pause * speed.pause_factor)/256; + } else - len = (pause * speed.speed_factor2)/256; + len = (pause * speed.wav_factor)/256; - if(len < 5) len = 5; // mS, limit the amount to which pauses can be shortened + if(len < speed.min_pause) + { + len = speed.min_pause; // mS, limit the amount to which pauses can be shortened + } return(len); } static void DoPause(int length, int control) {//========================================= +// length in nominal mS // control = 1, less shortening at fast speeds - int len; + unsigned int len; + int srate2; - len = PauseLength(length, control); + if(length == 0) + len = 0; + else + { + len = PauseLength(length, control); - len = (len * samplerate) / 1000; // convert from mS to number of samples + if(len < 90000) + { + len = (len * samplerate) / 1000; // convert from mS to number of samples + } + else + { + srate2 = samplerate / 25; // avoid overflow + len = (len * srate2) / 40; + } + } EndPitch(1); wcmdq[wcmdq_tail][0] = WCMD_PAUSE; wcmdq[wcmdq_tail][1] = len; WcmdqInc(); last_frame = NULL; -} // end of Synthesize::DoPause + + if(fmt_amplitude != 0) + { + wcmdq[wcmdq_tail][0] = WCMD_FMT_AMPLITUDE; + wcmdq[wcmdq_tail][1] = fmt_amplitude = 0; + WcmdqInc(); + } +} // end of DoPause extern int seq_len_adjust; // temporary fix to advance the start point for playing the wav sample -static int DoSample2(int index, int which, int length_mod, int amp) -{//================================================================ +static int DoSample2(int index, int which, int std_length, int control, int length_mod, int amp) +{//============================================================================================= int length; - int length1; - int format; + int wav_length; + int wav_scale; int min_length; - int start=0; - long *q; + int x; + int len4; + long64 *q; unsigned char *p; index = index & 0x7fffff; p = &wavefile_data[index]; - format = p[2]; - length1 = (p[1] * 256); - length1 += p[0]; // length in bytes + wav_scale = p[2]; + wav_length = (p[1] * 256); + wav_length += p[0]; // length in bytes + + if(wav_length == 0) + return(0); + + min_length = speed.min_sample_len; - if(seq_len_adjust > 0) + if(wav_scale==0) + min_length *= 2; // 16 bit samples + else { - start = (seq_len_adjust * samplerate)/1000; - if(format == 0) - start *= 2; - length1 -= start; - index += start; + // increase consonant amplitude at high speeds, depending on the peak consonant amplitude +// x = ((35 - wav_scale) * speed.loud_consonants); +// if(x < 0) x = 0; +// wav_scale = (wav_scale * (x+256))/256; } + if(std_length > 0) + { + std_length = (std_length * samplerate)/1000; + if(wav_scale == 0) + std_length *= 2; - if(length_mod > 0) - length = (length1 * length_mod) / 256; + x = (min_length * std_length)/wav_length; + if(x > min_length) + min_length = x; + } else - length = length1; + { + // no length specified, use the length of the stored sound + std_length = wav_length; + } + if(length_mod > 0) + { + std_length = (std_length * length_mod)/256; + } - length = (length * speed.speed_factor2)/256; - min_length = speed.min_sample_len; - if(format==0) - min_length *= 2; + length = (std_length * speed.wav_factor)/256; + + if(control & pd_DONTLENGTHEN) + { + // this option is used for Stops, with short noise bursts. + // Don't change their length much. + if(length > std_length) + { + // don't let length exceed std_length + length = std_length; + } + else + { + // reduce the reduction in length +// length = (length + std_length)/2; + } + } if(length < min_length) length = min_length; - if(length > length1) - length = length1; // don't exceed wavefile length - if(format==0) - length /= 2; // 2 byte samples + if(wav_scale == 0) + { + // 16 bit samples + length /= 2; + wav_length /= 2; + } + + if(amp < 0) + return(length); + len4 = wav_length / 4; index += 4; - if(amp >= 0) + if(which & 0x100) { + // mix this with synthesised wave last_wcmdq = wcmdq_tail; q = wcmdq[wcmdq_tail]; - if(which & 0x100) - q[0] = WCMD_WAVE2; // mix this with synthesised wave - else - q[0] = WCMD_WAVE; + q[0] = WCMD_WAVE2; + q[1] = length | (wav_length << 16); // length in samples + q[2] = (long64)(&wavefile_data[index]); + q[3] = wav_scale + (amp << 8); + WcmdqInc(); + return(length); + } + + if(length > wav_length) + { + x = len4*3; + length -= x; + } + else + { + x = length; + length = 0; + } + + last_wcmdq = wcmdq_tail; + q = wcmdq[wcmdq_tail]; + q[0] = WCMD_WAVE; + q[1] = x; // length in samples + q[2] = (long64)(&wavefile_data[index]); + q[3] = wav_scale + (amp << 8); + WcmdqInc(); + + + while(length > len4*3) + { + x = len4; + if(wav_scale == 0) + x *= 2; + + last_wcmdq = wcmdq_tail; + q = wcmdq[wcmdq_tail]; + q[0] = WCMD_WAVE; + q[1] = len4*2; // length in samples + q[2] = (long64)(&wavefile_data[index+x]); + q[3] = wav_scale + (amp << 8); + WcmdqInc(); + + length -= len4*2; + } + + if(length > 0) + { + x = wav_length - length; + if(wav_scale == 0) + x *= 2; + last_wcmdq = wcmdq_tail; + q = wcmdq[wcmdq_tail]; + q[0] = WCMD_WAVE; q[1] = length; // length in samples - q[2] = long(&wavefile_data[index]); - q[3] = format + (amp << 8); + q[2] = (long64)(&wavefile_data[index+x]); + q[3] = wav_scale + (amp << 8); WcmdqInc(); } + return(length); -} // end of Synthesize::DoSample2 +} // end of DoSample2 -int DoSample(PHONEME_TAB *ph1, PHONEME_TAB *ph2, int which, int length_mod, int amp) -{//====================== ========================================================== - int index; - int match_level; - int amp2; - int result; +int DoSample3(PHONEME_DATA *phdata, int length_mod, int amp) +{//========================================================= + int amp2; + int len; EndPitch(1); - index = LookupSound(ph1,ph2,which & 0xff,&match_level,0); - if((index & 0x800000) == 0) - return(0); // not wavefile data - - amp2 = wavefile_amp; - if(amp != 0) - amp2 = (amp * wavefile_amp)/20; if(amp == -1) + { + // just get the length, don't produce sound amp2 = amp; + } + else + { + amp2 = phdata->sound_param[pd_WAV]; + if(amp2 == 0) + amp2 = 100; + amp2 = (amp2 * 32)/100; + } - result = DoSample2(index,which,length_mod,amp2); + seq_len_adjust=0; + + if(phdata->sound_addr[pd_WAV] == 0) + { + len = 0; + } + else + { + len = DoSample2(phdata->sound_addr[pd_WAV], 2, phdata->pd_param[pd_LENGTHMOD]*2, phdata->pd_control, length_mod, amp2); + } last_frame = NULL; - return(result); -} // end of Synthesize::DoSample + return(len); +} // end of DoSample3 -static frame_t *AllocFrame() +static frame_t *AllocFrame(void) {//========================= // Allocate a temporary spectrum frame for the wavegen queue. Use a pool which is big // enough to use a round-robin without checks. @@ -361,7 +493,7 @@ static void set_frame_rms(frame_t *fr, int new_rms) } return; } - + if(fr->rms == 0) return; // check for divide by zero x = (new_rms * 64)/fr->rms; if(x >= 200) x = 199; @@ -385,7 +517,7 @@ static void formants_reduce_hf(frame_t *fr, int level) if(voice->klattv[0]) return; - + for(ix=2; ix < 8; ix++) { x = fr->fheight[ix] * level; @@ -472,7 +604,7 @@ static void AdjustFormants(frame_t *fr, int target, int min, int max, int f1_adj fr->ffreq[1] += x; fr->ffreq[0] += x; } - formants_reduce_hf(fr,hf_reduce); + formants_reduce_hf(fr,hf_reduce); } @@ -535,7 +667,7 @@ static short vcolouring[N_VCOLOUR][5] = { // fprintf(stderr,"FMT%d %3s %3d-%3d f1=%d f2=%4d %4d %4d f3=%4d %3d\n", // which,WordToString(other_ph->mnemonic),len,rms,f1,f2,f2_min,f2_max,f3_adj,f3_amp); - if(other_ph->mnemonic == '?') + if((other_ph != NULL) && (other_ph->mnemonic == '?')) flags |= 8; if(which == 1) @@ -546,14 +678,15 @@ static short vcolouring[N_VCOLOUR][5] = { seq[0].length = VOWEL_FRONT_LENGTH; if(len > 0) seq[0].length = len; - seq[0].frflags |= FRFLAG_LEN_MOD; // reduce length modification - fr->frflags |= FRFLAG_LEN_MOD; + seq[0].frflags |= FRFLAG_LEN_MOD2; // reduce length modification + fr->frflags |= FRFLAG_LEN_MOD2; next_rms = seq[1].frame->rms; if(voice->klattv[0]) { - fr->klattp[KLATT_AV] = 53; // reduce the amplituide of the start of a vowel +// fr->klattp[KLATT_AV] = 53; // reduce the amplituide of the start of a vowel + fr->klattp[KLATT_AV] = seq[1].frame->klattp[KLATT_AV] - 4; } if(f2 != 0) { @@ -594,7 +727,7 @@ if(voice->klattv[0]) fr = CopyFrame(seq[*n_frames-1].frame,0); seq[*n_frames-1].frame = fr; rms = RMS_GLOTTAL1; - + // degree of glottal-stop effect depends on closeness of vowel (indicated by f1 freq) modn_flags = 0x400 + (VowelCloseness(fr) << 8); } @@ -603,7 +736,7 @@ if(voice->klattv[0]) fr = DuplicateLastFrame(seq,(*n_frames)++,len); if(len > 36) seq_len_adjust += (len - 36); - + if(f2 != 0) { AdjustFormants(fr, f2, f2_min, f2_max, f1, f3_adj, f3_amp, flags); @@ -614,11 +747,11 @@ if(voice->klattv[0]) if((vcolour > 0) && (vcolour <= N_VCOLOUR)) { - for(ix=0; ix<*n_frames; ix++) + for(ix=0; ix < *n_frames; ix++) { fr = CopyFrame(seq[ix].frame,0); seq[ix].frame = fr; - + for(formant=1; formant<=5; formant++) { int x; @@ -639,7 +772,7 @@ if(voice->klattv[0]) } if(flags & 0x40) - DoPause(12,0); // add a short pause after the consonant + DoPause(20,0); // add a short pause after the consonant if(flags & 16) return(len); @@ -652,7 +785,7 @@ static void SmoothSpect(void) {//========================== // Limit the rate of frequence change of formants, to reduce chirping - long *q; + long64 *q; frame_t *frame; frame_t *frame2; frame_t *frame1; @@ -694,7 +827,7 @@ static void SmoothSpect(void) frame1 = (frame_t *)q[3]; if(frame1 == frame) { - q[3] = (long)frame2; + q[3] = (long64)frame2; frame1 = frame2; } else @@ -742,7 +875,7 @@ static void SmoothSpect(void) modified = 1; } frame2->ffreq[pk] = frame1->ffreq[pk] + allowed; - q[2] = (long)frame2; + q[2] = (long64)frame2; } else if(diff < -allowed) @@ -753,7 +886,7 @@ static void SmoothSpect(void) modified = 1; } frame2->ffreq[pk] = frame1->ffreq[pk] - allowed; - q[2] = (long)frame2; + q[2] = (long64)frame2; } } } @@ -784,7 +917,7 @@ static void SmoothSpect(void) { if(frame1 == frame) { - q[2] = (long)frame2; + q[2] = (long64)frame2; frame1 = frame2; } else @@ -826,7 +959,7 @@ static void SmoothSpect(void) modified = 1; } frame2->ffreq[pk] = frame1->ffreq[pk] + allowed; - q[3] = (long)frame2; + q[3] = (long64)frame2; } else if(diff < -allowed) @@ -837,7 +970,7 @@ static void SmoothSpect(void) modified = 1; } frame2->ffreq[pk] = frame1->ffreq[pk] - allowed; - q[3] = (long)frame2; + q[3] = (long64)frame2; } } } @@ -860,10 +993,10 @@ static void StartSyllable(void) } -int DoSpect(PHONEME_TAB *this_ph, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph, - int which, PHONEME_LIST *plist, int modulation) -{//=================================================================================== - // which 1 start of phoneme, 2 body and end + +int DoSpect2(PHONEME_TAB *this_ph, int which, FMT_PARAMS *fmt_params, PHONEME_LIST *plist, int modulation) +{//======================================================================================================== + // which: 0 not a vowel, 1 start of vowel, 2 body and end of vowel // length_mod: 256 = 100% // modulation: -1 = don't write to wcmdq @@ -874,25 +1007,35 @@ int DoSpect(PHONEME_TAB *this_ph, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph, frame_t *frame2; frame_t *fr; int ix; - long *q; + long64 *q; int len; - int match_level; int frame_length; - int frame1_length; - int frame2_length; int length_factor; int length_mod; + int length_sum; + int length_min; int total_len = 0; static int wave_flag = 0; int wcmd_spect = WCMD_SPECT; + int frame_lengths[N_SEQ_FRAMES]; + + if(fmt_params->fmt_addr == 0) + return(0); length_mod = plist->length; if(length_mod==0) length_mod=256; + length_min = (samplerate/70); // greater than one cycle at low pitch (Hz) + if(which==2) + { + if((translator->langopts.param[LOPT_LONG_VOWEL_THRESHOLD] > 0) && ((this_ph->std_length >= translator->langopts.param[LOPT_LONG_VOWEL_THRESHOLD]) || (plist->synthflags & SFLAG_LENGTHEN) || (this_ph->phflags & phLONG))) + length_min *= 2; // ensure long vowels are longer + } + if(which==1) { // limit the shortening of sonorants before shortened (eg. unstressed vowels) - if((this_ph->type==phLIQUID) || (prev_ph->type==phLIQUID) || (prev_ph->type==phNASAL)) + if((this_ph->type==phLIQUID) || (plist[-1].type==phLIQUID) || (plist[-1].type==phNASAL)) { if(length_mod < (len = translator->langopts.param[LOPT_SONORANT_MIN])) { @@ -902,15 +1045,30 @@ if(which==1) } modn_flags = 0; - frames = LookupSpect(this_ph,prev_ph,next_ph,which,&match_level,&n_frames, plist); + frames = LookupSpect(this_ph, which, fmt_params, &n_frames, plist); if(frames == NULL) return(0); // not found + if(fmt_params->fmt_amp != fmt_amplitude) + { + // an amplitude adjustment is specified for this sequence + q = wcmdq[wcmdq_tail]; + q[0] = WCMD_FMT_AMPLITUDE; + q[1] = fmt_amplitude = fmt_params->fmt_amp; + WcmdqInc(); + } + frame1 = frames[0].frame; - frame1_length = frames[0].length; if(voice->klattv[0]) wcmd_spect = WCMD_KLATT; + wavefile_ix = fmt_params->wav_addr; + + if(fmt_params->wav_amp == 0) + wavefile_amp = 32; + else + wavefile_amp = (fmt_params->wav_amp * 32)/100; + if(wavefile_ix == 0) { if(wave_flag) @@ -935,7 +1093,7 @@ if(which==1) && !(last_frame->frflags & FRFLAG_BREAK)) { // last frame of previous sequence was zero-length, replace with first of this sequence - wcmdq[last_wcmdq][3] = (long)frame1; + wcmdq[last_wcmdq][3] = (long64)frame1; if(last_frame->frflags & FRFLAG_BREAK_LF) { @@ -947,7 +1105,7 @@ if(which==1) fr->ffreq[ix] = last_frame->ffreq[ix]; fr->fheight[ix] = last_frame->fheight[ix]; } - wcmdq[last_wcmdq][3] = (long)fr; + wcmdq[last_wcmdq][3] = (long64)fr; } } } @@ -960,29 +1118,50 @@ if(which==1) syllable_centre = wcmdq_tail; } - frame_length = frame1_length; + length_sum = 0; + for(frameix=1; frameix < n_frames; frameix++) + { + length_factor = length_mod; + if(frames[frameix-1].frflags & FRFLAG_LEN_MOD) // reduce effect of length mod + { + length_factor = (length_mod*(256-speed.lenmod_factor) + 256*speed.lenmod_factor)/256; + } + else + if(frames[frameix-1].frflags & FRFLAG_LEN_MOD2) // reduce effect of length mod, used for the start of a vowel + { + length_factor = (length_mod*(256-speed.lenmod2_factor) + 256*speed.lenmod2_factor)/256; + } + + frame_length = frames[frameix-1].length; + len = (frame_length * samplerate)/1000; + len = (len * length_factor)/256; + length_sum += len; + frame_lengths[frameix] = len; + } + + if((length_sum > 0) && (length_sum < length_min)) + { + // lengthen, so that the sequence is greater than one cycle at low pitch + for(frameix=1; frameix < n_frames; frameix++) + { + frame_lengths[frameix] = (frame_lengths[frameix] * length_min) / length_sum; + } + } + for(frameix=1; frameix<n_frames; frameix++) { frame2 = frames[frameix].frame; - frame2_length = frames[frameix].length; - if((wavefile_ix != 0) && ((frame1->frflags & FRFLAG_DEFER_WAV)==0)) + if((fmt_params->wav_addr != 0) && ((frame1->frflags & FRFLAG_DEFER_WAV)==0)) { // there is a wave file to play along with this synthesis seq_len_adjust = 0; - DoSample2(wavefile_ix,which+0x100,0,wavefile_amp); + DoSample2(fmt_params->wav_addr, which+0x100, 0, fmt_params->fmt_control, 0, wavefile_amp); wave_flag = 1; wavefile_ix = 0; + fmt_params->wav_addr = 0; } - length_factor = length_mod; - if(frame1->frflags & FRFLAG_LEN_MOD) // reduce effect of length mod - { - length_factor = (length_mod*(256-speed.speed_factor3) + 256*speed.speed_factor3)/256; - } - len = (frame_length * samplerate)/1000; - len = (len * length_factor)/256; - if(modulation >= 0) { if(frame1->frflags & FRFLAG_MODULATE) @@ -993,13 +1172,13 @@ if(which==1) modulation |= modn_flags; // before or after a glottal stop } + len = frame_lengths[frameix]; pitch_length += len; amp_length += len; - if(frame_length < 2) + if(len == 0) { last_frame = NULL; - frame_length = frame2_length; frame1 = frame2; } else @@ -1011,31 +1190,72 @@ if(which==1) q = wcmdq[wcmdq_tail]; q[0] = wcmd_spect; q[1] = len + (modulation << 16); - q[2] = long(frame1); - q[3] = long(frame2); - + q[2] = (long64)frame1; + q[3] = (long64)frame2; + WcmdqInc(); } last_frame = frame1 = frame2; - frame_length = frame2_length; total_len += len; } } + + if((which != 1) && (fmt_amplitude != 0)) + { + q = wcmdq[wcmdq_tail]; + q[0] = WCMD_FMT_AMPLITUDE; + q[1] = fmt_amplitude = 0; + WcmdqInc(); + } + + return(total_len); -} // end of Synthesize::DoSpect +} // end of DoSpect -static void DoMarker(int type, int char_posn, int length, int value) -{//================================================================= + + +void DoMarker(int type, int char_posn, int length, int value) +{//========================================================== // This could be used to return an index to the word currently being spoken // Type 1=word, 2=sentence, 3=named marker, 4=play audio, 5=end - wcmdq[wcmdq_tail][0] = WCMD_MARKER; - wcmdq[wcmdq_tail][1] = type; - wcmdq[wcmdq_tail][2] = (char_posn & 0xffffff) | (length << 24); - wcmdq[wcmdq_tail][3] = value; - WcmdqInc(); + if(WcmdqFree() > 5) + { + wcmdq[wcmdq_tail][0] = WCMD_MARKER + (type << 8); + wcmdq[wcmdq_tail][1] = (char_posn & 0xffffff) | (length << 24); + wcmdq[wcmdq_tail][2] = value; + WcmdqInc(); + } +} // end of DoMarker + + +void DoPhonemeMarker(int type, int char_posn, int length, char *name) +{//================================================================== +// This could be used to return an index to the word currently being spoken +// Type 7=phoneme + int *p; + + if(WcmdqFree() > 5) + { + wcmdq[wcmdq_tail][0] = WCMD_MARKER + (type << 8); + wcmdq[wcmdq_tail][1] = (char_posn & 0xffffff) | (length << 24); + p = (int *)name; + wcmdq[wcmdq_tail][2] = p[0]; // up to 8 bytes of UTF8 characters + wcmdq[wcmdq_tail][3] = p[1]; + WcmdqInc(); + } +} // end of DoMarker + -} // end of Synthesize::DoMarker +#ifdef INCLUDE_SONIC +void DoSonicSpeed(int value) +{//========================= +// value, multiplier * 1024 + wcmdq[wcmdq_tail][0] = WCMD_SONIC_SPEED; + wcmdq[wcmdq_tail][1] = value; + WcmdqInc(); +} // end of DoSonicSpeed +#endif void DoVoiceChange(voice_t *v) @@ -1046,23 +1266,28 @@ void DoVoiceChange(voice_t *v) v2 = (voice_t *)malloc(sizeof(voice_t)); memcpy(v2,v,sizeof(voice_t)); wcmdq[wcmdq_tail][0] = WCMD_VOICE; - wcmdq[wcmdq_tail][1] = (long)(v2); + wcmdq[wcmdq_tail][2] = (long64)v2; WcmdqInc(); } -static void DoEmbedded(int *embix, int sourceix) -{//============================================= +void DoEmbedded(int *embix, int sourceix) +{//====================================== // There were embedded commands in the text at this point unsigned int word; // bit 7=last command for this word, bits 5,6 sign, bits 0-4 command unsigned int value; int command; do { - word = embedded_list[(*embix)++]; + word = embedded_list[*embix]; value = word >> 8; command = word & 0x7f; + if(command == 0) + return; // error + + (*embix)++; + switch(command & 0x1f) { case EMBED_S: // speed @@ -1078,7 +1303,7 @@ static void DoEmbedded(int *embix, int sourceix) DoPause(10,0); // ensure a break in the speech wcmdq[wcmdq_tail][0] = WCMD_WAVE; wcmdq[wcmdq_tail][1] = soundicon_tab[value].length; - wcmdq[wcmdq_tail][2] = (long)soundicon_tab[value].data + 44; // skip WAV header + wcmdq[wcmdq_tail][2] = (long64)soundicon_tab[value].data + 44; // skip WAV header wcmdq[wcmdq_tail][3] = 0x1500; // 16 bit data, amp=21 WcmdqInc(); } @@ -1123,17 +1348,27 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) unsigned char *pitch_env=NULL; unsigned char *amp_env; PHONEME_TAB *ph; - PHONEME_TAB *prev_ph; + int use_ipa=0; + int done_phoneme_marker; + char phoneme_name[16]; static int sourceix=0; -#ifdef TEST_MBROLA - if(mbrola_name[0] != 0) - return(MbrolaGenerate(phoneme_list,n_ph,resume)); -#endif + PHONEME_DATA phdata; + PHONEME_DATA phdata_prev; + PHONEME_DATA phdata_next; + PHONEME_DATA phdata_tone; + FMT_PARAMS fmtp; + static WORD_PH_DATA worddata; if(option_quiet) return(0); + if(option_phoneme_events & espeakINITIALIZE_PHONEME_IPA) + use_ipa = 1; + + if(mbrola_name[0] != 0) + return(MbrolaGenerate(phoneme_list,n_ph,resume)); + if(resume == 0) { ix = 1; @@ -1148,19 +1383,21 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) syllable_centre = -1; last_pitch_cmd = -1; memset(vowel_transition,0,sizeof(vowel_transition)); + memset(&worddata, 0, sizeof(worddata)); + DoPause(0,0); // isolate from the previous clause } - while(ix < (*n_ph)) + while((ix < (*n_ph)) && (ix < N_PHONEME_LIST-2)) { p = &phoneme_list[ix]; if(p->type == phPAUSE) - free_min = 5; + free_min = 10; else if(p->type != phVOWEL) - free_min = 10; // we need less Q space for non-vowels, and we need to generate phonemes after a vowel so that the pitch_length is filled in + free_min = 15; // we need less Q space for non-vowels, and we need to generate phonemes after a vowel so that the pitch_length is filled in else - free_min = MIN_WCMDQ; // 22 + free_min = MIN_WCMDQ; // 25 if(WcmdqFree() <= free_min) return(1); // wait @@ -1176,8 +1413,14 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) if(p->newword) { - if(translator->langopts.param[LOPT_WORD_MERGE] == 0) + if(((p->type == phVOWEL) && (translator->langopts.param[LOPT_WORD_MERGE] & 1)) || + (p->ph->phflags & phNOPAUSE)) + { + } + else + { last_frame = NULL; + } sourceix = (p->sourceix & 0x7ff) + clause_start_char; @@ -1188,44 +1431,91 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) // DoMarker(espeakEVENT_END, count_characters, 0, count_sentences); // end of clause if(p->newword & 1) - DoMarker(espeakEVENT_WORD, sourceix, p->sourceix >> 11, clause_start_word + word_count++); + DoMarker(espeakEVENT_WORD, sourceix, p->sourceix >> 11, clause_start_word + word_count++); // NOTE, this count doesn't include multiple-word pronunciations in *_list. eg (of a) } EndAmplitude(); - if(p->prepause > 0) + if((p->prepause > 0) && !(p->ph->phflags & phPREVOICE)) DoPause(p->prepause,1); - if(option_phoneme_events && (p->type != phVOWEL)) + done_phoneme_marker = 0; + if(option_phoneme_events && (p->ph->code != phonEND_WORD)) { - // Note, for vowels, do the phoneme event after the vowel-start - DoMarker(espeakEVENT_PHONEME, sourceix, 0, p->ph->mnemonic); + if((p->type == phVOWEL) && (prev->type==phLIQUID || prev->type==phNASAL)) + { + // For vowels following a liquid or nasal, do the phoneme event after the vowel-start + } + else + { + WritePhMnemonic(phoneme_name, p->ph, p, use_ipa, NULL); + DoPhonemeMarker(espeakEVENT_PHONEME, sourceix, 0, phoneme_name); + done_phoneme_marker = 1; + } } switch(p->type) { case phPAUSE: DoPause(p->length,0); +#ifdef _ESPEAKEDIT + p->std_length = p->ph->std_length; +#endif break; case phSTOP: released = 0; - if(next->type==phVOWEL) released = 1; - if(next->type==phLIQUID && !next->newword) released = 1; - - if(released) - DoSample(p->ph,next->ph,2,0,0); + ph = p->ph; + if(next->type==phVOWEL) + { + released = 1; + } else - DoSample(p->ph,phoneme_tab[phonPAUSE],2,0,0); + if(!next->newword) + { + if(next->type==phLIQUID) released = 1; +// if(((p->ph->phflags & phPLACE) == phPLACE_blb) && (next->ph->phflags & phSIBILANT)) released = 1; + } + if(released == 0) + p->synthflags |= SFLAG_NEXT_PAUSE; + + if(ph->phflags & phPREVOICE) + { + // a period of voicing before the release + memset(&fmtp, 0, sizeof(fmtp)); + InterpretPhoneme(NULL, 0x01, p, &phdata, &worddata); + fmtp.fmt_addr = phdata.sound_addr[pd_FMT]; + fmtp.fmt_amp = phdata.sound_param[pd_FMT]; + + if(last_pitch_cmd < 0) + { + DoAmplitude(next->amp,NULL); + DoPitch(envelope_data[p->env],next->pitch1,next->pitch2); + } + + DoSpect2(ph, 0, &fmtp, p, 0); + } + + InterpretPhoneme(NULL, 0, p, &phdata, &worddata); + phdata.pd_control |= pd_DONTLENGTHEN; + DoSample3(&phdata, 0, 0); break; case phFRICATIVE: + InterpretPhoneme(NULL, 0, p, &phdata, &worddata); + if(p->synthflags & SFLAG_LENGTHEN) - DoSample(p->ph,next->ph,2,p->length,0); // play it twice for [s:] etc. - DoSample(p->ph,next->ph,2,p->length,0); + { + DoSample3(&phdata, p->length, 0); // play it twice for [s:] etc. + } + DoSample3(&phdata, p->length, 0); break; case phVSTOP: + ph = p->ph; + memset(&fmtp, 0, sizeof(fmtp)); + fmtp.fmt_control = pd_DONTLENGTHEN; + pre_voiced = 0; if(next->type==phVOWEL) { @@ -1249,14 +1539,18 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) } } - if((prev->type==phVOWEL) || (prev->ph->phflags & phVOWEL2)) + if((prev->type==phVOWEL) || (prev->ph->phflags & phVOWEL2) || (ph->phflags & phPREVOICE)) { // a period of voicing before the release - DoSpect(p->ph,phoneme_tab[phonSCHWA],next->ph,1,p,0); + InterpretPhoneme(NULL, 0x01, p, &phdata, &worddata); + fmtp.fmt_addr = phdata.sound_addr[pd_FMT]; + fmtp.fmt_amp = phdata.sound_param[pd_FMT]; + + DoSpect2(ph, 0, &fmtp, p, 0); if(p->synthflags & SFLAG_LENGTHEN) { - DoPause(20,0); - DoSpect(p->ph,phoneme_tab[phonSCHWA],next->ph,1,p,0); + DoPause(25,1); + DoSpect2(ph, 0, &fmtp, p, 0); } } else @@ -1271,15 +1565,24 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) { // followed by a vowel, or liquid + vowel StartSyllable(); - DoSpect(p->ph,prev->ph,next->ph,2,p,0); } else { -// if((prev->type != phVOWEL) && ((prev->ph->phflags & phVOICED)==0) && ((next->ph->phflags & phVOICED)==0)) -// DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE_SHORT],2,p,0); -// else - DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,0); -// DoSpect(p->ph,prev->ph,next->ph,2,p,0); + p->synthflags |= SFLAG_NEXT_PAUSE; + } + InterpretPhoneme(NULL,0, p, &phdata, &worddata); + fmtp.fmt_addr = phdata.sound_addr[pd_FMT]; + fmtp.fmt_amp = phdata.sound_param[pd_FMT]; + fmtp.wav_addr = phdata.sound_addr[pd_ADDWAV]; + fmtp.wav_amp = phdata.sound_param[pd_ADDWAV]; + DoSpect2(ph, 0, &fmtp, p, 0); + + if((p->newword == 0) && (next2->newword == 0)) + { + if(next->type == phVFRICATIVE) + DoPause(20,0); + if(next->type == phFRICATIVE) + DoPause(12,0); } break; @@ -1304,22 +1607,29 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) } } - if((next->type==phVOWEL) || ((next->type==phLIQUID)) && (next->newword==0)) // ?? test 14.Aug.2007 + if((next->type==phVOWEL) || ((next->type==phLIQUID) && (next->newword==0))) // ?? test 14.Aug.2007 { StartSyllable(); - if(p->synthflags & SFLAG_LENGTHEN) - DoSpect(p->ph,prev->ph,next->ph,2,p,0); - DoSpect(p->ph,prev->ph,next->ph,2,p,0); } else { - if(p->synthflags & SFLAG_LENGTHEN) - DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,0); - DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,0); + p->synthflags |= SFLAG_NEXT_PAUSE; } + InterpretPhoneme(NULL,0, p, &phdata, &worddata); + memset(&fmtp, 0, sizeof(fmtp)); + fmtp.std_length = phdata.pd_param[i_SET_LENGTH]*2; + fmtp.fmt_addr = phdata.sound_addr[pd_FMT]; + fmtp.fmt_amp = phdata.sound_param[pd_FMT]; + fmtp.wav_addr = phdata.sound_addr[pd_ADDWAV]; + fmtp.wav_amp = phdata.sound_param[pd_ADDWAV]; + + if(p->synthflags & SFLAG_LENGTHEN) + DoSpect2(p->ph, 0, &fmtp, p, 0); + DoSpect2(p->ph, 0, &fmtp, p, 0); break; case phNASAL: + memset(&fmtp, 0, sizeof(fmtp)); if(!(p->synthflags & SFLAG_SEQCONTINUE)) { DoAmplitude(p->amp,NULL); @@ -1331,37 +1641,36 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) last_frame = NULL; } + InterpretPhoneme(NULL,0, p, &phdata, &worddata); + fmtp.std_length = phdata.pd_param[i_SET_LENGTH]*2; + fmtp.fmt_addr = phdata.sound_addr[pd_FMT]; + fmtp.fmt_amp = phdata.sound_param[pd_FMT]; + if(next->type==phVOWEL) { StartSyllable(); - DoSpect(p->ph,prev->ph,next->ph,1,p,0); + DoSpect2(p->ph, 0, &fmtp, p, 0); } else if(prev->type==phVOWEL && (p->synthflags & SFLAG_SEQCONTINUE)) { - DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,0); + DoSpect2(p->ph, 0, &fmtp, p, 0); } else { last_frame = NULL; // only for nasal ? - if(next->type == phLIQUID) - DoSpect(p->ph,prev->ph,phoneme_tab[phonSONORANT],2,p,0); - else - DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,0); + DoSpect2(p->ph, 0, &fmtp, p, 0); last_frame = NULL; } break; case phLIQUID: + memset(&fmtp, 0, sizeof(fmtp)); modulation = 0; if(p->ph->phflags & phTRILL) modulation = 5; - prev_ph = prev->ph; -// if(p->newword) -// prev_ph = phoneme_tab[phonPAUSE]; // pronounce fully at the start of a word - if(!(p->synthflags & SFLAG_SEQCONTINUE)) { DoAmplitude(p->amp,NULL); @@ -1376,17 +1685,14 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) if(next->type==phVOWEL) { StartSyllable(); - DoSpect(p->ph,prev_ph,next->ph,1,p,modulation); // (,)r - } - else - if(prev->type==phVOWEL && (p->synthflags & SFLAG_SEQCONTINUE)) - { - DoSpect(p->ph,prev_ph,next->ph,1,p,modulation); - } - else - { - DoSpect(p->ph,prev_ph,next->ph,1,p,modulation); } + InterpretPhoneme(NULL, 0, p, &phdata, &worddata); + fmtp.std_length = phdata.pd_param[i_SET_LENGTH]*2; + fmtp.fmt_addr = phdata.sound_addr[pd_FMT]; + fmtp.fmt_amp = phdata.sound_param[pd_FMT]; + fmtp.wav_addr = phdata.sound_addr[pd_ADDWAV]; + fmtp.wav_amp = phdata.sound_param[pd_ADDWAV]; + DoSpect2(p->ph, 0, &fmtp, p, modulation); break; @@ -1394,16 +1700,48 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) ph = p->ph; stress = p->stresslevel & 0xf; - // vowel transition from the preceding phoneme - vowel_transition0 = vowel_transition[0]; - vowel_transition1 = vowel_transition[1]; + memset(&fmtp, 0, sizeof(fmtp)); + + InterpretPhoneme(NULL, 0, p, &phdata, &worddata); + fmtp.std_length = phdata.pd_param[i_SET_LENGTH] * 2; + + if(((fmtp.fmt_addr = phdata.sound_addr[pd_VWLSTART]) != 0) && ((phdata.pd_control & pd_FORNEXTPH) == 0)) + { + // a vowel start has been specified by the Vowel program + fmtp.fmt_length = phdata.sound_param[pd_VWLSTART]; + } + else + if(prev->type != phPAUSE) + { + // check the previous phoneme + InterpretPhoneme(NULL, 0, prev, &phdata_prev, NULL); + if((fmtp.fmt_addr = phdata_prev.sound_addr[pd_VWLSTART]) != 0) + { + // a vowel start has been specified by the Vowel program + fmtp.fmt2_lenadj = phdata_prev.sound_param[pd_VWLSTART]; + } + fmtp.transition0 = phdata_prev.vowel_transition[0]; + fmtp.transition1 = phdata_prev.vowel_transition[1]; + } + + if(fmtp.fmt_addr == 0) + { + // use the default start for this vowel + fmtp.use_vowelin = 1; + fmtp.fmt_control = 1; + fmtp.fmt_addr = phdata.sound_addr[pd_FMT]; + } + + fmtp.fmt_amp = phdata.sound_param[pd_FMT]; pitch_env = envelope_data[p->env]; amp_env = NULL; if(p->tone_ph != 0) { - pitch_env = LookupEnvelope(phoneme_tab[p->tone_ph]->spect); - amp_env = LookupEnvelope(phoneme_tab[p->tone_ph]->after); + InterpretPhoneme2(p->tone_ph, &phdata_tone); + pitch_env = GetEnvelope(phdata_tone.pitch_env); + if(phdata_tone.amp_env > 0) + amp_env = GetEnvelope(phdata_tone.amp_env); } StartSyllable(); @@ -1419,13 +1757,13 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) { DoAmplitude(p->amp,amp_env); DoPitch(pitch_env,p->pitch1,p->pitch2); // don't use prevocalic rising tone - DoSpect(ph,prev->ph,next->ph,1,p,modulation); + DoSpect2(ph, 1, &fmtp, p, modulation); } else if(prev->type==phLIQUID || prev->type==phNASAL) { DoAmplitude(p->amp,amp_env); - DoSpect(ph,prev->ph,next->ph,1,p,modulation); // continue with pre-vocalic rising tone + DoSpect2(ph, 1, &fmtp, p, modulation); // continue with pre-vocalic rising tone DoPitch(pitch_env,p->pitch1,p->pitch2); } else @@ -1436,17 +1774,42 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) DoPitch(pitch_env,p->pitch1,p->pitch2); } - DoSpect(ph,prev->ph,next->ph,1,p,modulation); + DoSpect2(ph, 1, &fmtp, p, modulation); + } + + if((option_phoneme_events) && (done_phoneme_marker == 0)) + { + WritePhMnemonic(phoneme_name, p->ph, p, use_ipa, NULL); + DoPhonemeMarker(espeakEVENT_PHONEME, sourceix, 0, phoneme_name); } - if(option_phoneme_events) + fmtp.fmt_addr = phdata.sound_addr[pd_FMT]; + fmtp.fmt_amp = phdata.sound_param[pd_FMT]; + fmtp.transition0 = 0; + fmtp.transition1 = 0; + + if((fmtp.fmt2_addr = phdata.sound_addr[pd_VWLEND]) != 0) { - DoMarker(espeakEVENT_PHONEME, sourceix, 0, p->ph->mnemonic); + fmtp.fmt2_lenadj = phdata.sound_param[pd_VWLEND]; } + else + if(next->type != phPAUSE) + { + fmtp.fmt2_lenadj = 0; + InterpretPhoneme(NULL, 0, next, &phdata_next, NULL); - DoSpect(p->ph,prev->ph,next->ph,2,p,modulation); + fmtp.use_vowelin = 1; + fmtp.transition0 = phdata_next.vowel_transition[2]; // always do vowel_transition, even if ph_VWLEND ?? consider [N] + fmtp.transition1 = phdata_next.vowel_transition[3]; + + if((fmtp.fmt2_addr = phdata_next.sound_addr[pd_VWLEND]) != 0) + { + fmtp.fmt2_lenadj = phdata_next.sound_param[pd_VWLEND]; + } + } + + DoSpect2(ph, 2, &fmtp, p, modulation); - memset(vowel_transition,0,sizeof(vowel_transition)); break; } ix++; @@ -1512,6 +1875,7 @@ int SpeakNextClause(FILE *f_in, const void *text_in, int control) char *voice_change; static FILE *f_text=NULL; static const void *p_text=NULL; + const char *phon_out; if(control == 4) { @@ -1534,7 +1898,6 @@ int SpeakNextClause(FILE *f_in, const void *text_in, int control) n_phoneme_list = 0; WcmdqStop(); - embedded_value[EMBED_T] = 0; return(0); } @@ -1599,20 +1962,21 @@ int SpeakNextClause(FILE *f_in, const void *text_in, int control) CalcPitches(translator, clause_tone); CalcLengths(translator); - GetTranslatedPhonemeString(translator->phon_out,sizeof(translator->phon_out)); - if(option_phonemes > 0) + if((option_phonemes > 0) || (phoneme_callback != NULL)) { - fprintf(f_trans,"%s\n",translator->phon_out); + int phoneme_mode = 0; + if(option_phonemes >= 3) + phoneme_mode = 0x10 + option_phonemes-3; // 0x10=ipa, 0x11=ipa with tie, 0x12=ipa with ZWJ, 0x13=ipa with separators - if(!iswalpha(0x010d)) + phon_out = GetTranslatedPhonemeString(phoneme_mode); + if(option_phonemes > 0) { - // check that c-caron is recognized as an alphabetic character - fprintf(stderr,"Warning: Accented letters are not recognized, eg: U+010D\nSet LC_CTYPE to a UTF-8 locale\n"); + fprintf(f_trans,"%s\n",phon_out); + } + if(phoneme_callback != NULL) + { + phoneme_callback(phon_out); } - } - if(phoneme_callback != NULL) - { - phoneme_callback(translator->phon_out); } @@ -1622,20 +1986,6 @@ int SpeakNextClause(FILE *f_in, const void *text_in, int control) return(1); } - if(mbrola_name[0] != 0) - { -#ifdef USE_MBROLA_LIB - MbrolaTranslate(phoneme_list,n_phoneme_list,NULL); -#else - { - FILE *f_mbrola; - if((f_mbrola = f_trans) == stderr) - f_mbrola = stdout; - MbrolaTranslate(phoneme_list,n_phoneme_list,f_mbrola); - } -#endif - } - Generate(phoneme_list,&n_phoneme_list,0); WavegenOpenSound(); |