Text to Speech - Trying to port (multi language TTS) arduino-espeak-ng to Teensy 4.1

Hello,

Recently, Phil Schatzmann succesfully ported espeak-ng to arduino (ESP32). His work is located here: https://github.com/pschatzmann/arduino-espeak-ng (with some backstory here: https://www.pschatzmann.ch/home/2022/11/10/espeak-ng-the-difficult-journey-to-an-arduino-library/

I am attempting to see if I can port it to a Teensy 4.1. Espeak-ng uses phoneme sound files that are 22khz 16 bit files. So to make it work with Teensy 4.1 I believe it will have to use Frank's https://github.com/FrankBoesing/Teensy-WavePlayer library.
The phoneme files are in espeak-ng-data-min (a slimmed down English only version, about 828kB) or espeak-ng-data (most all languages, about 11.7MB), which would be loaded to the SD card in /SD/espeak-ng-data

I think this would be a fantastic feature for the Teensy to be able to have with it's audio functions. If anyone could help to try to get this working for a Teensy 4.1, I would really appreciate it.

My feeble, unfinished attempt is below. But I get "Compilation error: exit status 1" when trying to compile.



Code:
//Teensy 4.1 


/**
 * @file espeak.ino
 * @author Phil Schatzmann
 * @brief Functional API example with additional information
 * @version 0.1
 * @date 2022-10-27
 *
 * @copyright Copyright (c) 2022
 *
 */
//#include "AudioTools.h" - this was for ESP32
#include <FileSystems.h> // https://github.com/pschatzmann/arduino-posix-fs
//#include "AudioLibs/AudioKit.h" // https://github.com/pschatzmann/arduino-audiokit
#include <play_wav.h> //added for Teensy to allow for 22khz sound
#include <Audio.h>  //added for teensy
#include <Wire.h>
#include <espeak.h>
#include <SD.h>
#include <SPI.h>
#include <SdFat.h>

#if defined(__IMXRT1062__)
#define T4
#include <utility/imxrt_hw.h> // make available set_audioClock() for setting I2S freq on Teensy 4
#else
#define F_I2S ((((I2S0_MCR >> 24) & 0x03) == 3) ? F_PLL : F_CPU) // calculation for I2S freq on Teensy 3
#endif

//Teensy sound setup
// GUItool: begin automatically generated code
AudioPlayWav             playWav;     //xy=323,171
AudioMixer4              mixer1;         //xy=647,123
AudioMixer4              mixer3;         //xy=648,212
AudioOutputI2S           output1;       //xy=828,169
AudioConnection          patchCord1(playWav, 0, mixer1, 0);
AudioConnection          patchCord2(playWav, 1, mixer3, 0);
AudioConnection          patchCord3(playWav, 2, mixer1, 1);
AudioConnection          patchCord4(playWav, 3, mixer3, 1);
AudioConnection          patchCord5(playWav, 4, mixer1, 2);
AudioConnection          patchCord6(playWav, 5, mixer3, 2);
AudioConnection          patchCord7(playWav, 6, mixer1, 3);
AudioConnection          patchCord8(playWav, 7, mixer3, 3);
AudioConnection          patchCord9(mixer1, 0, output1, 0);
AudioConnection          patchCord10(mixer3, 0, output1, 1);
AudioControlSGTL5000     sgtl5000_1;     //xy=853,409
// GUItool: end automatically generated code

//modded for Teensy 4.1 with builtin SD card
#define PIN_SD_CARD_CS    BUILTIN_SDCARD
#define PIN_SD_CARD_MISO  12
#define PIN_SD_CARD_MOSI  11
#define PIN_SD_CARD_CLK   13

//I2SStream was for ESP32 - Need to change for Teensy
//I2SStream i2s; // or replace with AudioKitStream for AudioKit

const char* path = "/sd/espeak-ng-data"; //Change path by command below:

//file_systems::FileSystemSD efs(path, SD); // dummy implementation on ESP32
espeak_AUDIO_OUTPUT output = AUDIO_OUTPUT_SYNCH_PLAYBACK;
//output = AUDIO_OUTPUT_SYNCH_PLAYBACK;
void *user_data = nullptr;
unsigned int *identifier = nullptr;
int buflength = 500, options = 0;
unsigned int position = 0, end_position = 0, flags = espeakCHARS_AUTO;
espeak_POSITION_TYPE position_type = POS_CHARACTER;

void setup() {
  Serial.begin(115200);
  // we load the config data from SD: "sd" is default mount point
  Serial.println("starting SD");

  while (!(SD.begin(PIN_SD_CARD_CS))) {
  // stop here, but print a message repetitively
    Serial.println("Unable to access the SD card");
    delay(500);
  }

  sgtl5000_1.enable();
  sgtl5000_1.volume(0.6);

  // setup audio
  Serial.println("starting i2s");
  audio_info espeak_info = espeak_get_audio_info();
  
  /*
  //configures sampling rates for ESP32
  auto cfg = i2s.defaultConfig();
  cfg.channels = espeak_info.channels; // 1
  cfg.sample_rate = espeak_info.sample_rate; // 22050
  cfg.bits_per_sample = espeak_info.bits_per_sample; // 16
  i2s.begin(cfg);
  espeak_set_audio_output(&i2s);
  */

  // setup espeak
  Serial.println("espeak_Initialize");
  espeak_Initialize(output, buflength, path, options);
  espeak_VOICE voice;
  memset(&voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first
  const char *langNativeString = "en";     // Set voice by properties
  voice.languages = langNativeString;
  voice.name = "US";
  voice.variant = 2;
  voice.gender = 2;
  Serial.println("espeak_SetVoiceByProperties");
  espeak_SetVoiceByProperties(&voice);
}

//from teensy waveplayer
void playFile(const char *filename)
{
  Serial.print("Playing file: ");
  Serial.println(filename);
  playWav.play(filename);
  while (playWav.isPlaying())
  {
    // Needed for EventResponder: could instead call yield(),
    // or switch to old scheme of reading SD inside the update() loop
    // by executing playWav.enableEventReading(false)
    delay(10);
  }
}

//teensy 4 calculate speed
#ifdef T4
#else
// calculate I2S dividers for Teensy 3
uint32_t I2S_dividers( float fsamp, uint32_t nbits, uint32_t tcr2_div )
{

  unsigned fract, divi;
  fract = divi = 1;
  float minfehler = 1e7;

  unsigned x = (nbits * ((tcr2_div + 1) * 2));
  unsigned b = F_I2S / x;

  for (unsigned i = 1; i < 256; i++) {

    unsigned d = round(b / fsamp * i);
    float freq = b * i / (float)d ;
    float fehler = fabs(fsamp - freq);

    if ( fehler < minfehler && d < 4096 ) {
      fract = i;
      divi = d;
      minfehler = fehler;
      //Serial.printf("%fHz<->%fHz(%d/%d) Fehler:%f\n", fsamp, freq, fract, divi, minfehler);
      if (fehler == 0.0f) break;
    }

  }

  return I2S_MDR_FRACT( (fract - 1) ) | I2S_MDR_DIVIDE( (divi - 1) );
}
#endif


// set I2S samplerate for Teensy
void setI2SFreq(int freq) {
#if defined(T4)
  // PLL between 27*24 = 648MHz und 54*24=1296MHz
  int n1 = 4; //SAI prescaler 4 => (n1*n2) = multiple of 4
  int n2 = 1 + (24000000 * 27) / (freq * 256 * n1);
  double C = ((double)freq * 256 * n1 * n2) / 24000000;
  int c0 = C;
  int c2 = 10000;
  int c1 = C * c2 - (c0 * c2);
  set_audioClock(c0, c1, c2, true);
  CCM_CS1CDR = (CCM_CS1CDR & ~(CCM_CS1CDR_SAI1_CLK_PRED_MASK | CCM_CS1CDR_SAI1_CLK_PODF_MASK))
       | CCM_CS1CDR_SAI1_CLK_PRED(n1-1) // &0x07
       | CCM_CS1CDR_SAI1_CLK_PODF(n2-1); // &0x3f
#else
  unsigned tcr5 = I2S0_TCR5;
  unsigned word0width = ((tcr5 >> 24) & 0x1f) + 1;
  unsigned wordnwidth = ((tcr5 >> 16) & 0x1f) + 1;
  unsigned framesize = ((I2S0_TCR4 >> 16) & 0x0f) + 1;
  unsigned nbits = word0width + wordnwidth * (framesize - 1 );
  unsigned tcr2div = I2S0_TCR2 & 0xff; //bitclockdiv
  uint32_t MDR = I2S_dividers(freq, nbits, tcr2div);
  if (MDR > 0) {
    while (I2S0_MCR & I2S_MCR_DUF) {
      ;
    }
    I2S0_MDR = MDR;
  }
#endif
}

void loop() {
  char text[] = "Hello world!";
  Serial.println(text);
  espeak_Synth(text, buflength, position, position_type, end_position, flags,
                identifier, user_data);
  Serial.println("Done");
  delay(5000);
}

Thank you for any help or suggestions you can offer,

Walter
 
Back
Top