wiphone/Audio.h at main · jonrick/wiphone · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
/*
Copyright © 2019, 2020, 2021, 2022, 2023 HackEDA, Inc.
Licensed under the WiPhone Public License v.1.0 (the "License"); you
may not use this file except in compliance with the License. You may
obtain a copy of the License at
https://wiphone.io/WiPhone_Public_License_v1.0.txt.

Unless required by applicable law or agreed to in writing, software,
hardware or documentation distributed under the License is distributed
on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
either express or implied. See the License for the specific language
governing permissions and limitations under the License.
*/

/*
 * Audio.h
 *
 *  Class to handle I2S peripheral of ESP32, hardware audio codec, amplifier IC, microphone
 *  data, audio encoding/decoding, audio RTP streams, etc.
*/

// TODO:
// - migrate to dr_mp3
// - add WAV capability (dr_wav)

#ifndef __AUDIO_H_
#define __AUDIO_H_

#include "Arduino.h"
#include "FS.h"
#include "SPI.h"
#include "SPIFFS.h"
#include "driver/i2s.h"
#include "config.h"
#include "Hardware.h"
#include "Networks.h"
#include "helpers.h"
#include "RTPacket.h"

#define AUDIO_INLINE inline __attribute__((always_inline))

#define DR_WAV_NO_CONVERSION_API
#define DR_WAV_NO_STDIO
#include "audio/dr_wav.h"

// These are used in WiPhone.ino
#include "audio/g722_encoder.h"
#include "audio/g722_decoder.h"
#include "audio/g711.h"

extern AUDIO_CODEC_CLASS  codec;

#define LOUDSPEAKER 1
#define EARSPEAKER 0

#define STP_SILENT_PERIOD 60000  // to detect rtp silent
extern uint8_t    rtpSilentPeriod;  // for detection of other party rtp stream silent
extern uint32_t    rtpSilentScan;
#define RTP_SILENT_ON     0x02
#define RTP_SILENT_OFF    0x00
/* Description
 *     used for profiling the audio loop
 */
struct CycleInfo {
  uint32_t time[7];
  uint32_t samples[2];

  CycleInfo() {
    memset(time, 0, sizeof(time));
    memset(samples, 0, sizeof(samples));
  }

  void show() {
    char buf[100];
    char* p = buf;
    int last = 0;
    for (int i=1; i<sizeof(time)/sizeof(time[0]); i++) {
      if (time[i]!=0) {
        p += sprintf(p, "%d ", time[i]-time[last]);
        last = i;
      } else {
        p += sprintf(p, "- ");
      }
    }
    p += sprintf(p, "/ ");
    for (int i=0; i<sizeof(samples)/sizeof(samples[0]); i++) {
      p += sprintf(p, "%d ", samples[i]);
    }
    log_d("%s", buf);
  }
};
typedef struct CycleInfo CycleInfo_t;


class Audio  {

public:
  Audio(bool stereoOut, int BCLK, int LRC, int DOUT, int DIN);
  ~Audio();

  // Configuring
  void configureI2S();
  bool setSampleRate(int hz);         // TODO: which or these purely configuring, and which reset the configuration?
  bool setBitsPerSample(int bits);
  void setMonoOutput(bool mono);      // TODO: force Mono and not force mono

  // Actions
  void loop();
  void ceasePlayback();
  void report();
  bool start();
  void pause();
  void resume();
  bool shutdown();
  void setVolumes(int8_t speakerVol, int8_t headphonesVol, int8_t loudspeakerVol);
  void getVolumes(int8_t &speakerVol, int8_t &headphonesVol, int8_t &loudspeakerVol);
  void setHeadphones(bool plugged);
  bool getHeadphones(void);
  void chooseSpeaker(bool loudspeaker);
  bool isLoudspeaker() {
    return this->loudspeaker;
  }
  bool error() {
    return this->err != WM8750_ERROR_OK;
  }

  bool playFile(fs::FS *fs, const char* path);
  bool playRecord();
  bool playRingtone(fs::FS *fs);
  bool rewind() {
    return this->playFile(this->playbackFS, this->playbackFilename.c_str());
  }

  // Actions related to RTP
  void newCall();
  void showAudioStats();

  enum : uint8_t {
    ULAW_RTP_PAYLOAD = 0,         // G.711, u-Law / PCMU
    ALAW_RTP_PAYLOAD = 8,         // G.711, A-Law / PCMA
    G722_RTP_PAYLOAD = 9          // G.722
  };
  uint16_t openRtpConnection(uint16_t rtpLocalPort);                         // the port that will be listened to AND from which RTP will be sent TODO: allows these two to be different
  bool playRtpStream(uint8_t payloadType, uint16_t rtpRemotePort = 0);       // remote port - play audio only from that port

  // Actions related to microphone
  // TODO: first open port, than feed that port to TinySIP for SDP
  // TODO: currently the mic configuration is the same as the playback configuration, which might be not desirable (at 48 kHz sample rate, especially)
  bool turnMicOn();      // turn on mic, calculate average intensity, but otherwise don't do anything with the data     TODO: check whether it needs to be called before start() and whether it's used properly
  bool sendRtpStreamFromMic(uint8_t payloadType, IPAddress rtpRemoteIP, uint16_t rtpRemotePort);
  bool recordFromMic();
  bool isRecordingFinished() {
    return this->recordFinished;
  }
  bool saveWavRecord(fs::FS *fs, const char* pathName);
  void ceaseRecording();
  void setMicAvg(uint32_t mic);
  uint32_t getMicAvg();

  // TODO
  void preserve();        // remember current configs to restore playback later
  void restore();         // restore preserved state

  // Properties
  const char* getTitle() {
    return this->title.length()>0 ? this->title.c_str() : this->playbackBasename.length() ? this->playbackBasename.c_str() : "";
  }
  const char* getArtist() {
    return this->artist.length()>0 ? this->artist.c_str() : "";
  }

  uint32_t getFileSize();
  uint32_t getFilePos();
  bool isOn() {
    return this->audioOn;
  }
  bool isEof() {
    return this->playbackEof;
  }
  int  getBps() {
    return this->bps;
  };
  int packetSizeSamples(int duration);

  static const i2s_port_t i2s_num = I2S_NUM_0;

  // Volume range in the audio codec chip
  static const int8_t MaxVolume = 6;
  static const int8_t MuteVolume = -69;
  static const int8_t MuteVolumeEarSpkr = -54;

  // Software limit for the loudspeaker (otherwise can burn)
  static const int8_t MuteVolumeLoudSpkr = -18;
  static const int8_t MuteVolumeVoiceLoudSpkr = -60;
  static const int8_t MaxLoudspeakerVolume = 0;

  static const int8_t MaxPercentVolume = 100;
  static const int8_t MutePercentVolume = 0;

  // Profiling
  //LinearArray<CycleInfo_t, false> profile;

  bool playSampleChunk();

protected:
  bool turnOn();                            // enable the audio systems and main loop if not enabled already
  bool playFile();
  bool setDataChannels(int channels);
  bool setFilePos(uint32_t pos);
  bool playChunk();
  AUDIO_INLINE bool playSample();
  void codecReconfig();

  // Specific to MP3
  void readID3Metadata();
  int  decodeMp3Bytes(uint8_t *data, size_t len);

protected:

  // What to play in DAC (speaker & headphones)?
  enum class Playback { Nothing, RtpStream, LocalMp3, Record, LocalPcm };

  bool        audioOn = false;              // I2S and audio codec are turned ON
  bool        audioLoop = true;             // do the audio processing if audio is ON?
  bool        microphoneOn = false;         // TODO: configure I2S and audio codec based on this value (currently microphone is ON whenever audio is ON)
  Playback    playback;                     // what are we currently feeding to DAC?
  bool        microphoneStreamOut;          // do we send microphone data in RTP stream?
  bool        microphoneRecord;             // do we record microphone data to a local file?
  int16_t     sample[2];
  bool        headphones = false;           // if headphones are plugged in, need to send output only to headphones (not earspeaker and/or loudspeaker)
  bool        loudspeaker = false;           // which speaker to use: loudspeaker (true) or earspeaker (false)?
  int8_t      earpieceVol = 6;            // small speaker connected directly to the audio codec IC
  int8_t      headphonesVol = 6;
  int8_t      loudspeakerVol = 0;         // big speaker connected to the amplifier

  int         sampleRate;                   // how many samples per second
  uint8_t     bps = 16;                     // bitsPerSample
  uint8_t     dataChannels = 2;             // number of channels in the MP3 file; used by playChunk
  bool        monoOut = false;              // does I2S driver expect one (left only) or two channels (right and left)?

  // Local playback file
  fs::FS*     playbackFS;                   // filesystem
  String      playbackFilename="";          // full path of the playback file in the filesystem
  String      playbackBasename="";          // basename (shor filename)
  File        playbackFile;                 // MP3 file
  bool        playbackEof = false;

  String      artist;
  String      title;

  // Record buffer (PCM)
  uint16_t*   recordRaw = NULL;             // temporary buffer in PSRAM where the audio data is being stored
  size_t      recordRawSizeSamples;
  int         recordRawR;
  int         recordRawW;
  bool        recordFinished;

  // Play buffers: encoded and decoded (PCM)
  uint8_t     playEnc[1600];                // undecoded audio (MP3) / receiving buffer for UDP packets
  uint16_t    playEncR=0;                   // read index
  uint16_t    playEncW=0;                   // write index

  int16_t     playDec[2400];                // decoded audio (PCM): 1-channel: mono (max. 2400 samples);  2-channel: interleaved L/R (maximum 1152 frames, 2*1200 = 2400 samples)
  // NOTE: this is sufficient for 150 ms of 16000 Hz mono audio (e.g. decoded G.722)
  uint16_t    playDecFramesLeft = 0;
  uint16_t    playDecCurFrame;
  bool        playDecEvenSample = 1;        // if true, sample is swapped with the next in mono playback

  // Mic buffers: raw (PCM) and encoded
  uint16_t    micRaw[2049];
  uint16_t    micRawW;
  uint16_t    micRawR;                      // micRawR < micRawW, if equal -> empty
  uint8_t     micEnc[1600];

  uint32_t    micAvg[4];
  uint16_t    micAvgNext = 0;

  bool        calcMicIntensity;             // Do we need to calculate microphone average input?

  // Specific to MP3
  int         id3Size=0;                    // length id3 tag
  int         nextSync=0;
  int         bytesLeft=0;
  int         bitrate=0;                    // TODO: what is this?
  uint8_t     rev=0;                        // revision
  bool        f_podcast = false;            // set if found ID3Header in stream
  bool        f_extHead = false;            // ID3 extended header
  bool        f_mp3 = false;                // indicates mp3
  bool        mp3Playing = false;           // valid mp3 stream recognized
  uint32_t    lastRate;                     // TODO: what is this?

  // Incoming RTP audio stream
  WiFiUDP     rtp;
  IPAddress   rtpRemoteIP;
  uint16_t    rtpRemotePort = 0;
  uint8_t     rtpPayloadType;
  RTPacket    rtpSend;                      // this one is initialized with parameters from
  RTPacket    rtpRecv;
  bool        firstPacket;                  // is the next incoming packet will the first in audio stream?
  uint16_t    lastSequenceNum;              // last RTP sequence num
  //uint32_t    pos;                          // position in playback     TODO
  uint16_t    rtpPort;
  uint16_t    rtcpPort;
  uint16_t    voipPacketSize;

  // Call quality of service (QoS)
  uint32_t    rtcpPacketsReceived;
  uint32_t    packetsReceived;              // total UDP packets received during all
  uint32_t    packetsGood;                  // audio packets count that have no issues
  uint32_t    packetsWrongPayload;          // audio format does not match negotiated one
  uint32_t    packetsMissed;                // packets not played (either completely missing or out of order)
  uint32_t    packetsUnord;                 // packets out of order arriving now (temporary)

  uint32_t    packetsSent;                  // total UDP packets attempted to send
  uint32_t    packetsSendingFailed;         // total packets failed to send

  // Codecs
  G722_DEC_CTX* g722Decoder;
  G722_ENC_CTX* g722Encoder;

  // Debug
  uint32_t    loopCnt = 0;
  uint32_t    runCnt = 0;
  uint32_t    rtpCnt = 0;
  int         sampleX = 0;

  static const uint16_t audio_sample[];
  static const uint16_t VOIP_PACKET_DURATION_MS = 20;     // maximum anticipated packet duration (we always leave this length in the output buffer in anticipation of such packet)
  static const uint32_t PACKET_PCM_WSIZE_8KHZ = 160;      // number of samples for 20ms PCM 16-bit/8kHz, 1-chanel
  static const uint32_t PACKET_PCM_WSIZE_16KHZ = 320;     // number of samples for 20ms PCM 16-bit/16kHz, 1-chanel
  static const uint32_t RECORDING_SIZE_SAMPLES = 1<<20;   // 1 MB

  // Power masks
  static const uint16_t POWER_ALL = 0;
  static const uint16_t DAC_HEADPHONES  = WM8750_POWER2_DAC | WM8750_POWER2_OUT1;
  static const uint16_t DAC_EARSPEAKER  = WM8750_POWER2_DAC | WM8750_POWER2_OUT3 | WM8750_POWER2_LOUT1;
  static const uint16_t DAC_LOUDSPEAKER = WM8750_POWER2_DAC | WM8750_POWER2_OUT2;

  enum : int { APLL_AUTO = -1, APLL_ENABLE = 1, APLL_DISABLE = 0 };

  wm8750_err_t err;

  float m_amplitude;
  float m_frequency;
  float m_phase;
  float m_time;
  float m_deltaTime;
};

#endif /* __AUDIO_H_ */