diff --git a/include/libultraship/bridge/audiobridge.h b/include/libultraship/bridge/audiobridge.h index eef355d41..5acb2a68d 100644 --- a/include/libultraship/bridge/audiobridge.h +++ b/include/libultraship/bridge/audiobridge.h @@ -22,13 +22,30 @@ API_EXPORT AudioChannelsSetting GetAudioChannels(); API_EXPORT int32_t GetNumAudioChannels(); /** - * @brief Submits a frame of PCM audio to the audio output device. + * @brief Submits a frame of s16 PCM audio to the audio output device (legacy). + * + * Default entry point preserved for libultraship consumers on the s16 + * audio path. Forwards to AudioPlayer::Play(uint8_t*, size_t); valid only + * when the player is in s16 mode (the default). * * @param buf Interleaved sample data (stereo: L,R,… or surround: FL,FR,C,LFE,SL,SR,…). * @param len Length of @p buf in bytes. */ API_EXPORT void AudioPlayerPlayFrame(const uint8_t* buf, size_t len); +/** + * @brief Submits a frame of float PCM audio to the audio output device. + * + * Float-pipeline entry point. Valid only when the player has been switched + * to float mode (see AudioPlayer::SetUseFloatPipeline). The full audio + * path — resample / optional mix-source sum / surround decode — runs in + * float at the device's output rate. + * + * @param buf Interleaved stereo float samples (L, R, L, R, …) in nominal [-1, 1] range. + * @param frames Number of stereo frames in @p buf. + */ +API_EXPORT void AudioPlayerPlayFrameF32(const float* buf, size_t frames); + /** * @brief Changes the audio channel configuration at runtime. * diff --git a/include/ship/audio/Audio.h b/include/ship/audio/Audio.h index 925130919..b774186da 100644 --- a/include/ship/audio/Audio.h +++ b/include/ship/audio/Audio.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -68,6 +70,33 @@ class Audio { */ AudioChannelsSetting GetAudioChannels() const; + /** @brief Returns whether the float (HD) audio pipeline is currently active. */ + bool IsUsingFloatPipeline() const; + + /** + * @brief Single authority for the float-pipeline mode. + * + * Updates the live flag, the settings new players inherit, and the current + * player (reopening its device in the matching format). Everything else -- + * the producer's PlayF32-vs-Play choice and any newly constructed player -- + * derives from this, so there is exactly one place the mode is owned. + * + * @return true if applied; false if the current player refused float mode + * (in which case the authority is reverted to the player's actual mode). + */ + bool SetUseFloatPipeline(bool enabled); + + /** + * @brief Registers a callback invoked whenever a new AudioPlayer is + * initialised (backend switch, fallback to Null, startup). + * + * The new player already inherits the float-pipeline mode; this hook exists + * so the host can re-attach instance-bound state the player cannot carry + * across a rebuild (e.g. a FluidSynth mix source). Pass an empty function to + * clear it. + */ + void SetOnAudioPlayerInitialized(std::function callback); + protected: /** @brief (Re)initialises the AudioPlayer for the current backend and channel settings. */ void InitAudioPlayer(); @@ -95,5 +124,11 @@ class Audio { AudioSettings mAudioSettings; std::shared_ptr> mAvailableAudioBackends; std::shared_ptr mConfig; + + // Single source of truth for the float-pipeline mode. Lock-free so the audio + // producer can read it cheaply; written only by SetUseFloatPipeline, which + // also mirrors it into mAudioSettings (the template new players inherit). + std::atomic mUseFloatPipeline{ false }; + std::function mOnAudioPlayerInitialized; }; } // namespace Ship diff --git a/include/ship/audio/AudioPlayer.h b/include/ship/audio/AudioPlayer.h index 2725ec3c0..e4c04e987 100644 --- a/include/ship/audio/AudioPlayer.h +++ b/include/ship/audio/AudioPlayer.h @@ -3,7 +3,9 @@ #include "stddef.h" #include #include +#include #include "ship/audio/AudioChannelsSetting.h" +#include "ship/audio/AudioResampler.h" #include "ship/audio/SoundMatrixDecoder.h" namespace Ship { @@ -12,11 +14,20 @@ namespace Ship { * @brief Configuration parameters shared by all AudioPlayer backends. */ struct AudioSettings { - int32_t SampleRate = 44100; ///< Output sample rate in Hz. + int32_t SampleRate = 48000; ///< Output sample rate in Hz. + int32_t SourceSampleRate = 0; ///< Source sample rate in Hz. (0 = same as SampleRate, no resampling) int32_t SampleLength = 1024; ///< Number of samples per audio frame. int32_t DesiredBuffered = 2480; ///< Target number of frames to keep buffered. AudioChannelsSetting ChannelSetting = AudioChannelsSetting::audioStereo; ///< Channel mode (stereo / 5.1 matrix / 5.1 raw). + + /// When true, the AudioPlayer pipeline (Play / resampler / matrix decoder + /// / backend device format) runs in 32-bit float. Enables the optional + /// MixSource hook for sources rendered at the device output rate. When + /// false (default), the pipeline is interleaved int16 — same byte layout + /// and entry points the AudioPlayer always had, so existing libultraship + /// consumers keep working with no code change. + bool UseFloatPipeline = false; }; /** @@ -38,7 +49,7 @@ class AudioPlayer { */ AudioPlayer(AudioSettings settings) : mAudioSettings(settings) { } - ~AudioPlayer(); + virtual ~AudioPlayer(); /** * @brief Calls DoInit() and sets the initialised flag on success. @@ -55,25 +66,42 @@ class AudioPlayer { virtual int32_t Buffered() = 0; /** - * @brief Submits a frame of PCM audio to the output device. + * @brief Submits a frame of PCM audio to the output device — legacy s16 path. * - * If 5.1 surround output is configured and the channel setting requires matrix - * decoding, the stereo @p buf is first decoded to 6-channel surround before - * being passed to DoPlay(). + * Default entry point preserved for libultraship consumers. Samples are + * interleaved signed 16-bit; the legacy resampler / matrix-decoder + * boundaries do the (lossy) s16↔float conversions internally. Calls + * here when @c UseFloatPipeline is true are a configuration mistake + * and emit a warning + drop the buffer. * - * @param buf Interleaved samples: - * - Stereo: (L, R, L, R, …) - * - 5.1: (FL, FR, C, LFE, SL, SR, …) + * @param buf Interleaved s16 samples (Stereo: (L,R,…); 5.1: (FL,FR,C,LFE,SL,SR,…)). * @param len Length of @p buf in bytes. */ void Play(const uint8_t* buf, size_t len); + /** + * @brief Submits a frame of PCM audio to the output device — float pipeline. + * + * Only valid when @c UseFloatPipeline is true. The audio path stays in + * 32-bit float through resample, optional MixSource summing + soft-clip, + * surround decode, and into the backend. The MixSource (if set) runs at + * the device output rate, so any secondary source can render at native + * device quality without traversing the resampler. + * + * @param buf Interleaved stereo float samples in nominal [-1, 1]. + * @param frames Number of stereo frames in @p buf. + */ + void Play(const float* buf, size_t frames); + /** @brief Returns true if Init() has been called and succeeded. */ bool IsInitialized(); /** @brief Returns the configured output sample rate in Hz. */ int32_t GetSampleRate() const; + /** @brief Returns the configured source sample rate in Hz. */ + int32_t GetSourceSampleRate() const; + /** @brief Returns the configured number of samples per audio frame. */ int32_t GetSampleLength() const; @@ -89,6 +117,12 @@ class AudioPlayer { */ void SetSampleRate(int32_t rate); + /** + * @brief Sets the source sample rate. + * @param rate New sample rate in Hz. + */ + void SetSourceSampleRate(int32_t rate); + /** * @brief Sets the number of samples per audio frame. * @param length New frame size in samples. @@ -118,6 +152,49 @@ class AudioPlayer { */ int32_t GetNumOutputChannels() const; + /** + * @brief Callback signature for a secondary stereo audio source mixed in + * after the resampler. + * + * The callback fills @p stereoOut with @p frames frames of interleaved + * stereo float at the device's output rate (GetSampleRate()), which + * lets the source bypass the resampler entirely — the resampler runs + * only over the primary input stream. The mix sums the two sources + * with a tanh-style soft-clip before surround decoding (if any). + */ + using MixSource = std::function; + + /** + * @brief Installs a secondary audio source whose contribution is mixed + * in at the output rate, post-resampler. + * + * Only meaningful when @c UseFloatPipeline is true (the s16 legacy path + * has no mix step). Pass @c nullptr to remove the source. Thread-safe + * with respect to the audio thread only in the sense that + * std::function assignment is sequentially consistent on x86; callers + * in practice swap this once at synth install/teardown. + * + * @return true if accepted; false (and ignored) when the player is in + * the s16 legacy mode. + */ + bool SetMixSource(MixSource source); + + /** + * @brief Switches the pipeline between legacy s16 and float HD modes + * at runtime. + * + * Closes the audio device, updates @c UseFloatPipeline, and reopens + * the device with the matching format. The Play overload that matches + * the new mode is the only one valid until the next switch. + * + * @return true if the device successfully reinitialised in the new + * mode. On failure the previous mode is restored. + */ + bool SetUseFloatPipeline(bool enabled); + + /** @brief Returns whether the float pipeline mode is active. */ + bool IsUsingFloatPipeline() const { return mAudioSettings.UseFloatPipeline; } + protected: /** * @brief Opens and configures the platform audio device. @@ -146,8 +223,43 @@ class AudioPlayer { virtual void DoPlay(const uint8_t* buf, size_t len) = 0; private: + /// Picks the right channel count (stereo for float mode, output channel + /// count for legacy s16 mode) and (re)constructs mResampler. No-op when + /// the rates already match. + void RebuildResampler(); + + /// Whether a stereo->5.1 matrix decoder is required for the current + /// (channel, pipeline) combination. Matrix 5.1 always needs it; Raw 5.1 + /// needs it only in float mode (there the source is stereo, so the engine's + /// native 6-channel output isn't available to pass through). Raw 5.1 on the + /// s16 path keeps passing the engine's native 6 channels straight through. + bool NeedsMatrixDecoder() const; + + /// (Re)creates or releases mSoundMatrixDecoder to match NeedsMatrixDecoder(). + /// Call after any channel-setting or pipeline-mode change. + void EnsureMatrixDecoder(); + std::unique_ptr - mSoundMatrixDecoder; ///< Stereo-to-surround decoder (active in matrix-5.1 mode). + mSoundMatrixDecoder; ///< Stereo-to-surround decoder (Matrix 5.1, or Raw 5.1 in float mode). + std::unique_ptr mResampler; + + // Fixed-size scratch buffers — no heap allocation on the audio hot path. + // Sized for the worst-case ratio and channel count of the data the buffer + // holds at its stage. mResampleBuf holds *stereo* output-rate frames + // (resample step), so 2 channels suffice; mMixSourceBuf likewise holds + // stereo frames the secondary source writes into. mSurroundBuf is sized + // for 6 channels of output-rate frames (matrix-5.1 final output) so the + // decoder has somewhere to write before DoPlay. 16384 gives comfortable + // headroom for 32k→48k @ SampleLength=1024 (ceil(1024 * 3/2) * 6 = 9216) + // and for higher device rates (e.g. 32k→96k). + static constexpr size_t kResampleBufSamples = 16384; + std::array mResampleBuf{}; + std::array mMixSourceBuf{}; + // Legacy s16 path uses its own scratch so both code paths can coexist + // without retypeing the float buffer. 16384 × 2 B = 32 KB. + std::array mResampleBufS16{}; + + MixSource mMixSource; AudioSettings mAudioSettings; bool mInitialized = false; diff --git a/include/ship/audio/AudioResampler.h b/include/ship/audio/AudioResampler.h new file mode 100644 index 000000000..d69635238 --- /dev/null +++ b/include/ship/audio/AudioResampler.h @@ -0,0 +1,90 @@ +#pragma once + +#include +#include + +namespace Ship { + +/* + * AudioResampler — polyphase sinc resampler for integer ratios. + * + * Designed for the specific case of console audio upsampling from 32000 Hz + * to 48000 Hz (ratio 3/2 exact). Works for any integer ratio P/Q where + * P = outRate / gcd(outRate, inRate) and Q = inRate / gcd(outRate, inRate). + * + * Architecture: + * - Polyphase decomposition of a windowed-sinc lowpass filter. + * - Filter cutoff at min(inRate, outRate) / 2 to prevent aliasing. + * - Kaiser window (beta=6) for a good stopband attenuation (~60 dB). + * - For 32k→48k: P=3, Q=2, 8 taps per phase → 24 total filter coefficients. + * + * Usage: + * AudioResampler r(32000, 48000, numChannels); + * r.Process(inFloat, inFrames, outFloat, outFrames); + * + * Process() returns the number of output frames actually written. + * State (history samples) is preserved between calls for continuous streams. + * Samples are interleaved float in nominal [-1, 1] range; the polyphase + * filter is unity-gain so peaks slightly above 1.0 may pass through and + * should be soft-clipped by the caller (or before reaching the backend). + */ +class AudioResampler { + public: + AudioResampler(int32_t inRate, int32_t outRate, int32_t numChannels); + + /* Resample inFrames input frames into outBuf. + * Returns number of output frames written. + * outBuf must be large enough for ceil(inFrames * outRate / inRate) frames. + * + * Two overloads: + * - float in / float out is the canonical path used by the float audio + * pipeline. Samples are interleaved float in nominal [-1, 1]. + * - int16_t in / int16_t out is the legacy entry point preserved for + * libultraship consumers still on the s16 path. It converts at the + * boundaries and clamps the output to the s16 range; the inner DSP + * is identical (the filter coefficients live in float either way). */ + int32_t Process(const float* inBuf, int32_t inFrames, float* outBuf, int32_t maxOutFrames); + int32_t Process(const int16_t* inBuf, int32_t inFrames, int16_t* outBuf, int32_t maxOutFrames); + + /* Maximum output frames for a given number of input frames. */ + int32_t MaxOutputFrames(int32_t inFrames) const; + + /* Reset history (e.g. on stream discontinuity). */ + void Reset(); + + private: + int32_t mInRate; + int32_t mOutRate; + int32_t mNumChannels; + + /* Rational ratio P/Q after GCD reduction */ + int32_t mP; /* upsample factor */ + int32_t mQ; /* downsample factor */ + + /* Polyphase filter — mNumPhases phases × mTapsPerPhase taps */ + static constexpr int kTapsPerPhase = 8; + int32_t mNumPhases; /* = P */ + std::vector mCoeffs; /* [phase * kTapsPerPhase + tap] */ + + /* Current phase index in [0, P) */ + int32_t mPhase; + + /* History buffer: kTapsPerPhase-1 frames per channel for convolution state */ + std::vector mHistory; /* [(kTapsPerPhase-1) * numChannels] */ + + void BuildFilter(); + static float BesselI0(float x); + static float KaiserWindow(int n, int N, float beta); + static float Sinc(float x); + + static inline int32_t GCD(int32_t a, int32_t b) { + while (b) { + int32_t t = b; + b = a % b; + a = t; + } + return a; + } +}; + +} // namespace Ship diff --git a/include/ship/audio/CoreAudioAudioPlayer.h b/include/ship/audio/CoreAudioAudioPlayer.h index edbe9a809..e42dd4d08 100644 --- a/include/ship/audio/CoreAudioAudioPlayer.h +++ b/include/ship/audio/CoreAudioAudioPlayer.h @@ -24,7 +24,7 @@ class CoreAudioAudioPlayer : public AudioPlayer { * @param settings Sample rate, buffer size, desired buffered frames, and channel mode. */ CoreAudioAudioPlayer(AudioSettings settings); - ~CoreAudioAudioPlayer(); + ~CoreAudioAudioPlayer() override; /** * @brief Returns the number of audio frames currently queued in the ring buffer. diff --git a/include/ship/audio/FluidSynth.h b/include/ship/audio/FluidSynth.h new file mode 100644 index 000000000..230112fd7 --- /dev/null +++ b/include/ship/audio/FluidSynth.h @@ -0,0 +1,156 @@ +#pragma once +#if ENABLE_FLUIDSYNTH + +#include "IMidiSynth.h" +#include +#include +#include +#include + +namespace Ship { + +// Backend tuning supplied by the integrating game. FluidSynth has no opinion +// on what these should be for a given title, so they are parameters rather than +// hardcoded constants — a game sizes polyphony and gain for its own workload and +// mix. The defaults here are FluidSynth's own stock values, so a consumer that +// leaves a field untouched gets unsurprising upstream behavior. +struct FluidSynthConfig { + // Audio output rate; must match the output device (typically 44100 or 48000). + // Set before new_fluid_synth — the synth reads it once at construction. + double sampleRate = 44100.0; + + // When true, install a softened volume curve: replaces the SF2 default + // vel / CC7 / CC11 -> initial-attenuation modulators with versions that keep + // the concave NEGATIVE shape but halve the amount (960 -> 480 cB). Maximum + // attenuation drops from -96 dB to -48 dB, lifting quiet voices while + // preserving dynamics shape. False preserves standard SF2 behavior. + bool linearVelocity = false; + + // Maximum simultaneous voices. FluidSynth's stock default is 256; a game that + // layers many SF2 voices or holds one-shot percussion voices can exhaust that + // and drop notes. Idle voices cost almost nothing, so sizing up is cheap. + int polyphony = 256; + + // Master output gain. FluidSynth's stock default is 0.2 — conservative to + // avoid clipping when many voices sound at once. A game that mixes the synth + // against a louder source may need to lift this so the two arrive balanced. + double gain = 0.2; +}; + +class FluidSynth final : public IMidiSynth { +public: + explicit FluidSynth(const FluidSynthConfig& config); + ~FluidSynth() override; + + // Single-shot replace: unloads every previously-loaded SF2 then loads + // this one. Convenience wrapper over ClearSoundFonts + AddSoundFont*. + void LoadSoundFont(const std::string& path) override; + + // Same shape as LoadSoundFont but takes an in-memory SF2 (e.g. one read + // from a mounted .o2r archive). The buffer is copied into the synth's + // internal storage so the caller may free their copy immediately. + void LoadSoundFontFromMemory(const uint8_t* data, size_t size); + + // Add an SF2 alongside any already-loaded ones. FluidSynth's preset + // lookup walks loaded soundfonts in REVERSE load order, so the most + // recently added SF2 wins on (bank, program) collisions — matches the + // "last loaded wins" semantics our mod stack uses elsewhere. + // + // Returns the FluidSynth sfont id on success, or FLUID_FAILED. The + // memory variant copies the buffer into instance-owned storage and + // routes through the mem:// sentinel; the path variant uses the + // default filesystem loader. + int AddSoundFont(const std::string& path); + int AddSoundFontFromMemory(const uint8_t* data, size_t size); + + // Unload every loaded SF2. Safe to call when none are loaded. + void ClearSoundFonts(); + + // Loaded SF2 ids in load order. Use to map a sfont id back to its + // pack name on the caller side (the caller knows what it loaded; + // FluidSynth only knows the opaque ids). + std::vector GetLoadedSfontIds(); + + // One row per preset across every loaded SF2 (every sfont's full + // preset list, in iteration order — which is generally the SF2's + // phdr order, grouped by sfont). Re-enumerated on demand; callers + // typically cache the result and refresh when packs change. + struct LoadedPreset { + int sfontId; + int bank; + int program; + std::string name; + }; + std::vector EnumerateLoadedPresets(); + void NoteOn(uint8_t channel, uint8_t note, uint8_t velocity) override; + void NoteOff(uint8_t channel, uint8_t note) override; + void ProgramChange(uint8_t channel, uint16_t preset) override; + bool ProgramSelect(uint8_t channel, int sfontId, + uint16_t bank, uint16_t program) override; + void PitchBend(uint8_t channel, float semitones) override; + void ControlChange(uint8_t channel, uint8_t cc, uint16_t value) override; + void Render(float* out, uint32_t frameCount) override; + uint32_t GetActiveVoiceCount() const override; + uint32_t GetPolyphonyLimit() const override; + + // Configure the synth-wide reverb. Safe to call any time after construction; + // takes the synth mutex. Useful for per-mode presets — callers swap reverb + // settings without having to rebuild the synth. Parameters mirror the + // FluidSynth fluid_synth_set_reverb_* calls: + // roomsize : [0..1] perceived reverb tail length. + // damping : [0..1] high-frequency damping. + // width : [0..100] stereo spread. + // level : [0..1] reverb wet level. + void SetReverbParams(double roomsize, double damping, double width, double level); + + // Set FluidSynth's master output gain at runtime (forwards to + // fluid_synth_set_gain). Lets the host track a global volume fader without + // rebuilding the synth. Takes the synth mutex; safe any time after + // construction. Mirrors FluidSynthConfig::gain, which sets the same knob at + // construction. + void SetMasterGain(float gain) override; + + // Pitch bend range in semitones sent to FluidSynth on channel init. + // Must match what the MidiTranslator uses. Default: 12 semitones. + static constexpr float kPitchBendRangeSemitones = 12.0f; + +private: + void InitChannel(uint8_t channel); + + // Installs the softened volume curve on the freshly-created fluid_synth_t: + // replaces the SF2 default vel/CC7/CC11 -> attenuation modulators with versions + // at halved amount (480 cB). Must run after new_fluid_synth() but before any + // LoadSoundFont() so SF2 instrument-level modulators layer correctly on top. + void InstallLinearVelocityModulators(); + + fluid_settings_t* mSettings = nullptr; + fluid_synth_t* mSynth = nullptr; + double mSampleRate; + bool mLinearVelocity = false; + + // One entry per loaded SF2, in load order. FluidSynth itself walks + // loaded sfonts in reverse load order during preset lookup, so the + // tail of this vector wins on collisions. + std::vector mSfontIds; + + // Backing storage for memory-loaded SF2s, paired one-to-one with + // mSfontIds entries. Filesystem-loaded SF2s use the default loader + // and the corresponding slot here stays empty. Buffers must outlive + // the sfload call so the mem-sfloader's callbacks have stable data + // for the duration of the load. + std::vector> mLoadedBuffers; + + // Protects fluid_synth_* calls from concurrent access. + // The audio thread calls Render(); the game thread calls NoteOn/Off/etc. + mutable std::mutex mSynthMutex; + + // Which channels have had InitChannel() called. Sized to kNumChannels + // so the translator's per-pair channel allocation can address all of + // them; the synth setting is matched to this in the constructor. + static constexpr int kNumChannels = 64; + bool mChannelInited[kNumChannels] = {}; +}; + +} // namespace Ship + +#endif // ENABLE_FLUIDSYNTH diff --git a/include/ship/audio/IMidiSynth.h b/include/ship/audio/IMidiSynth.h new file mode 100644 index 000000000..44593faf7 --- /dev/null +++ b/include/ship/audio/IMidiSynth.h @@ -0,0 +1,93 @@ +#pragma once +#include +#include +#include + +namespace Ship { + +// MIDI-shaped soft synth interface. An implementation that is installed +// on the MidiSynthManager replaces the engine's native audio synthesis +// path: the audio thread fills its output buffer by calling Render() +// instead of running the native synth. +// +// When no implementation is installed, the manager returns nullptr and +// the audio thread falls back to native synthesis. +class IMidiSynth { +public: + virtual ~IMidiSynth() = default; + + // Load an SF2 soundfont from disk. Implementations that do not use + // SF2 may treat this as a no-op. + virtual void LoadSoundFont(const std::string& path) = 0; + + // MIDI-like note events. channel index is implementation-defined; the + // current FluidSynth backend exposes 64 channels. Standard MIDI drum + // semantics are NOT pinned to channel 9 — drum vs melodic is decided + // by the bank passed to ProgramChange (bank 128 = drum kit), and the + // implementation flips the channel type per call. + virtual void NoteOn(uint8_t channel, uint8_t note, uint8_t velocity) = 0; + virtual void NoteOff(uint8_t channel, uint8_t note) = 0; + + // preset encodes both bank (high byte) and program (low byte). + // The synth's preset lookup resolves this against the union of every + // loaded soundfont — typically with last-loaded-wins precedence. + virtual void ProgramChange(uint8_t channel, uint16_t preset) = 0; + + // Like ProgramChange but pins the channel to a SPECIFIC loaded soundfont via + // its `sfontId`, bypassing the cross-soundfont preset lookup. Use when the + // caller knows exactly which SF2 the preset must come from, even if another + // loaded SF2 also has that (bank, program). + // + // Returns true when the pin succeeds (the sfontId is valid and has the + // (bank, program) tuple), false otherwise. Failure signals the caller to fall + // back to native synthesis for this entry. + virtual bool ProgramSelect(uint8_t channel, int sfontId, + uint16_t bank, uint16_t program) = 0; + + // semitones is a signed float: +1.0 = one semitone up. The + // implementation owns the usable range and clamps out-of-range values + // (the FluidSynth backend clamps to its configured pitch-wheel range, + // approximately +/-12 semitones), so callers need not pre-clamp. + virtual void PitchBend(uint8_t channel, float semitones) = 0; + + // Convenience: bend by a frequency RATIO instead of semitones. + // 1.0 = no bend, 2.0 = +1 octave, 0.5 = -1 octave. Handy for engines + // that express pitch as a frequency/resampling scale rather than in + // semitones. Forwards to PitchBend, which owns the range clamp. + void PitchBendFactor(uint8_t channel, float freqRatio) { + PitchBend(channel, 12.0f * std::log2(freqRatio > 0.0f ? freqRatio : 1e-6f)); + } + + // Convenience: start a note already pitch-bent by `freqRatio` (same + // convention as PitchBendFactor). The bend is applied BEFORE the NoteOn + // so the voice attacks at the bent pitch in one step, rather than + // sounding at concert pitch until the next bend update lands. + void NoteOnPitchFactor(uint8_t channel, uint8_t note, uint8_t velocity, float freqRatio) { + PitchBendFactor(channel, freqRatio); + NoteOn(channel, note, velocity); + } + + // Standard MIDI CC. value is 0-16383 (14-bit). + virtual void ControlChange(uint8_t channel, uint8_t cc, uint16_t value) = 0; + + // Fill `out` with `frameCount` stereo interleaved float32 samples. + // Called from the audio thread; must be real-time safe. + virtual void Render(float* out, uint32_t frameCount) = 0; + + // Current number of audible voices held by the synth. Used by host UIs + // as a real-time diagnostic — when this approaches GetPolyphonyLimit(), + // new NoteOns will steal existing voices and the host can correlate + // user-reported "cuts" with voice exhaustion. Implementations without + // a voice pool may return 0. + virtual uint32_t GetActiveVoiceCount() const = 0; + virtual uint32_t GetPolyphonyLimit() const = 0; + + // Set the synth-wide master output gain (linear; 1.0 = unity). The host + // uses this to apply a global volume fader to the synth's contribution + // without rebuilding it — e.g. tracking a Master Volume slider. Safe to + // call from the game thread. Implementations without a controllable master + // gain may treat this as a no-op. + virtual void SetMasterGain(float gain) {} +}; + +} // namespace Ship diff --git a/include/ship/audio/MidiSynthManager.h b/include/ship/audio/MidiSynthManager.h new file mode 100644 index 000000000..d1b4e3ff2 --- /dev/null +++ b/include/ship/audio/MidiSynthManager.h @@ -0,0 +1,30 @@ +#pragma once +#include "IMidiSynth.h" +#include +#include + +namespace Ship { + +// Owns the optional IMidiSynth that, when present, replaces the engine's +// native audio synthesis. When no synth is installed, GetActiveSynth() +// returns nullptr and the audio thread should fall back to the native +// path. +class MidiSynthManager { +public: + static MidiSynthManager& Instance(); + + // Install or remove the active synth. Thread-safe. + // Must NOT be called from the audio thread. + // Passing nullptr uninstalls (native synthesis takes over). + void SetSynth(std::shared_ptr synth); + + // Returns the installed synth, or nullptr if native synthesis is active. + std::shared_ptr GetActiveSynth(); + +private: + MidiSynthManager() = default; + std::shared_ptr mSynth; + std::mutex mMutex; +}; + +} // namespace Ship diff --git a/include/ship/audio/NullAudioPlayer.h b/include/ship/audio/NullAudioPlayer.h index 98a27b5c3..5ea381de5 100644 --- a/include/ship/audio/NullAudioPlayer.h +++ b/include/ship/audio/NullAudioPlayer.h @@ -20,7 +20,7 @@ class NullAudioPlayer final : public AudioPlayer { */ NullAudioPlayer(AudioSettings settings) : AudioPlayer(settings) { } - ~NullAudioPlayer(); + ~NullAudioPlayer() override; /** * @brief Returns the desired buffered frame count so the game always produces audio. diff --git a/include/ship/audio/SDLAudioPlayer.h b/include/ship/audio/SDLAudioPlayer.h index ac61dd36e..c68cfb9c1 100644 --- a/include/ship/audio/SDLAudioPlayer.h +++ b/include/ship/audio/SDLAudioPlayer.h @@ -20,7 +20,7 @@ class SDLAudioPlayer final : public AudioPlayer { */ SDLAudioPlayer(AudioSettings settings) : AudioPlayer(settings) { } - ~SDLAudioPlayer(); + ~SDLAudioPlayer() override; /** * @brief Returns the number of audio frames currently queued in the SDL audio device. diff --git a/include/ship/audio/SoundMatrixDecoder.h b/include/ship/audio/SoundMatrixDecoder.h index 04166d90d..138628bd4 100644 --- a/include/ship/audio/SoundMatrixDecoder.h +++ b/include/ship/audio/SoundMatrixDecoder.h @@ -35,10 +35,21 @@ class SoundMatrixDecoder { void ResetState(); /** - * Decode stereo to 5.1 surround - * @param stereoInput Interleaved stereo samples [L0, R0, L1, R1, ...] - * @param samplePairs Number of stereo sample pairs to process - * @return Pointer to internal buffer with interleaved 5.1 samples [FL, FR, C, LFE, SL, SR, ...] + * Decode stereo to 5.1 surround — float-pipeline entry point. + * @param stereoInput Interleaved stereo float samples [L0, R0, L1, R1, ...] + * in nominal [-1, 1] range. + * @param frames Number of stereo frames (sample pairs) to process. + * @return {pointer, frameCount} into the internal 6-channel float buffer + * laid out as [FL, FR, C, LFE, SL, SR, ...]. + */ + std::tuple Process(const float* stereoInput, size_t frames); + + /** + * Decode stereo to 5.1 surround — legacy s16 entry point. + * Preserved byte-exactly for libultraship consumers on the s16 path. + * @param buf Interleaved s16 stereo samples, as bytes. + * @param len Length of @p buf in bytes. + * @return {pointer, byteLength} into the internal 6-channel s16 buffer. */ std::tuple Process(const uint8_t* buf, size_t len); @@ -142,11 +153,17 @@ class SoundMatrixDecoder { float ProcessDelay(float sample, CircularDelay& buffer); /** - * @brief Clamps a floating-point sample to the int16_t range. + * @brief Soft-saturates a float sample to the nominal [-1, 1] range. + * + * The matrix mixer can briefly push peaks slightly above 1.0 when both + * channels are loud. A hard clip would produce harshness; the pipeline's + * soft-clip step handles dramatic over-budget peaks, so this helper just + * keeps the surround buffer numerically sane. + * * @param value Input sample value. - * @return Saturated 16-bit integer sample. + * @return Clamped float sample. */ - static int16_t Saturate(float value); + static float Saturate(float value); int32_t mDelayLength = 0; double mAllPassBaseRate = 1.0; // Precomputed for ProcessAllPass @@ -176,8 +193,10 @@ class SoundMatrixDecoder { CircularDelay mDelaySurrLeft; CircularDelay mDelaySurrRight; - // Output buffer - std::vector mSurroundBuffer; + // Output buffer — interleaved 6-channel float frames. + std::vector mSurroundBuffer; + // Quantised mirror used by the legacy s16 Process() overload only. + std::vector mSurroundBufferS16; }; } // namespace Ship diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 45989b662..8876bfd1b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,6 +12,7 @@ find_package(Python3 REQUIRED COMPONENTS Interpreter) set(INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../include) option(USE_OPENGLES "Enable GLES3" OFF) option(GFX_DEBUG_DISASSEMBLER "Enable libgfxd" OFF) +option(ENABLE_FLUIDSYNTH "Enable FluidSynth synthesis backend" OFF) if (CMAKE_SYSTEM_NAME STREQUAL "Windows") use_props(${PROJECT_NAME} "${CMAKE_CONFIGURATION_TYPES}" "${DEFAULT_CXX_PROPS}") @@ -139,6 +140,23 @@ if(ENABLE_SCRIPTING) target_compile_definitions(libultraship PUBLIC ENABLE_SCRIPTING) endif() +if(ENABLE_FLUIDSYNTH) + # Try to find FluidSynth cmake files, fallback to pkg-config if not available. + # Linked PUBLIC because the public header ship/audio/FluidSynth.h includes + # : consumers need its include dir, which only propagates publicly. + find_package(FluidSynth CONFIG QUIET) + if(TARGET FluidSynth::libfluidsynth) + target_link_libraries(libultraship PUBLIC FluidSynth::libfluidsynth) + else() + find_package(PkgConfig REQUIRED) + pkg_check_modules(FLUIDSYNTH REQUIRED IMPORTED_TARGET fluidsynth) + target_link_libraries(libultraship PUBLIC PkgConfig::FLUIDSYNTH) + endif() + target_compile_definitions(libultraship PUBLIC ENABLE_FLUIDSYNTH=1) +else() + target_compile_definitions(libultraship PUBLIC ENABLE_FLUIDSYNTH=0) +endif() + #=================== Compile Options & Defs =================== target_compile_definitions(libultraship PRIVATE ${GBI_UCODE}) diff --git a/src/libultraship/bridge/audiobridge.cpp b/src/libultraship/bridge/audiobridge.cpp index c1d433f2b..9d65a239b 100644 --- a/src/libultraship/bridge/audiobridge.cpp +++ b/src/libultraship/bridge/audiobridge.cpp @@ -63,6 +63,19 @@ void AudioPlayerPlayFrame(const uint8_t* buf, size_t len) { audio->Play(buf, len); } +void AudioPlayerPlayFrameF32(const float* buf, size_t frames) { + auto audio = Ship::Context::GetInstance()->GetAudio()->GetAudioPlayer(); + if (audio == nullptr) { + return; + } + + if (!audio->IsInitialized()) { + return; + } + + audio->Play(buf, frames); +} + void SetAudioChannels(AudioChannelsSetting channels) { auto audio = Ship::Context::GetInstance()->GetAudio(); if (audio == nullptr) { diff --git a/src/ship/CMakeLists.txt b/src/ship/CMakeLists.txt index e37a3a1dd..5473079d0 100644 --- a/src/ship/CMakeLists.txt +++ b/src/ship/CMakeLists.txt @@ -17,6 +17,10 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") list(FILTER Source_Files__Audio EXCLUDE REGEX "audio/CoreAudioAudioPlayer.*") endif() +if (NOT ENABLE_FLUIDSYNTH) + list(FILTER Source_Files__Audio EXCLUDE REGEX "audio/FluidSynth\\..*") +endif() + source_group("audio" FILES ${Source_Files__Audio}) target_sources(libultraship PRIVATE ${Source_Files__Audio}) diff --git a/src/ship/audio/Audio.cpp b/src/ship/audio/Audio.cpp index 40140b601..7d4bc629c 100644 --- a/src/ship/audio/Audio.cpp +++ b/src/ship/audio/Audio.cpp @@ -36,9 +36,44 @@ void Audio::InitAudioPlayer() { if (mAudioPlayer && !mAudioPlayer->Init()) { // Failed to initialize system audio player. - // Fallback to Null if the native system player does not work. + // Fallback to Null if the native system player does not work. That path + // re-enters InitAudioPlayer (and fires the hook for the Null player), so + // return here to avoid also firing it for the failed player. SetCurrentAudioBackend(AudioBackend::NUL); + return; } + + // A fresh AudioPlayer is live. It already inherits the float-pipeline mode + // via mAudioSettings; the hook lets the host re-attach instance-bound state + // the player cannot carry across a rebuild (e.g. FluidSynth's mix source). + if (mOnAudioPlayerInitialized) { + mOnAudioPlayerInitialized(); + } +} + +bool Audio::IsUsingFloatPipeline() const { + return mUseFloatPipeline.load(std::memory_order_acquire); +} + +bool Audio::SetUseFloatPipeline(bool enabled) { + // Authority first: update the live flag and the template new players inherit, + // so anything constructed from here on comes up in the right mode. + mAudioSettings.UseFloatPipeline = enabled; + mUseFloatPipeline.store(enabled, std::memory_order_release); + + if (mAudioPlayer && !mAudioPlayer->SetUseFloatPipeline(enabled)) { + // The player refused the requested mode; reflect what it actually settled + // on so producer and consumer stay in agreement. + const bool actual = mAudioPlayer->IsUsingFloatPipeline(); + mAudioSettings.UseFloatPipeline = actual; + mUseFloatPipeline.store(actual, std::memory_order_release); + return false; + } + return true; +} + +void Audio::SetOnAudioPlayerInitialized(std::function callback) { + mOnAudioPlayerInitialized = std::move(callback); } void Audio::Init() { @@ -125,6 +160,10 @@ void Audio::SetCurrentAudioBackend(AudioBackend backend) { } mConfig->Save(); + // The new player inherits the float-pipeline mode from mAudioSettings (kept + // authoritative by SetUseFloatPipeline), so it comes up in the correct mode + // by construction. InitAudioPlayer's hook then re-attaches any instance-bound + // state (e.g. the FluidSynth mix source). InitAudioPlayer(); } diff --git a/src/ship/audio/AudioPlayer.cpp b/src/ship/audio/AudioPlayer.cpp index 037f1b48c..b665c67df 100644 --- a/src/ship/audio/AudioPlayer.cpp +++ b/src/ship/audio/AudioPlayer.cpp @@ -1,5 +1,7 @@ #include "ship/audio/AudioPlayer.h" +#include "ship/audio/AudioResampler.h" #include "spdlog/spdlog.h" +#include namespace Ship { @@ -7,12 +9,50 @@ AudioPlayer::~AudioPlayer() { SPDLOG_TRACE("destruct audio player"); } -bool AudioPlayer::Init() { - // Initialize sound matrix decoder if matrix surround mode is enabled +// Resampler channel count differs between modes: +// - Float pipeline: resampler processes stereo (input is always stereo; +// mix + surround decode both run after the resample step). +// - S16 legacy: resampler processes GetNumOutputChannels() (surround +// decode runs *before* the resample step, preserving the historical +// order other libultraship consumers rely on). +void AudioPlayer::RebuildResampler() { + const int32_t channels = mAudioSettings.UseFloatPipeline ? 2 : GetNumOutputChannels(); + if (mAudioSettings.SourceSampleRate != mAudioSettings.SampleRate && mAudioSettings.SourceSampleRate > 0) { + SPDLOG_INFO("AudioPlayer: initializing resampler {} Hz → {} Hz, {} ch ({})", + mAudioSettings.SourceSampleRate, mAudioSettings.SampleRate, channels, + mAudioSettings.UseFloatPipeline ? "float HD" : "s16 legacy"); + mResampler = std::make_unique(mAudioSettings.SourceSampleRate, mAudioSettings.SampleRate, + channels); + } else { + SPDLOG_INFO("AudioPlayer: resampler disabled {} Hz → {} Hz, {} ch", mAudioSettings.SourceSampleRate, + mAudioSettings.SampleRate, channels); + mResampler = nullptr; + } +} + +bool AudioPlayer::NeedsMatrixDecoder() const { if (mAudioSettings.ChannelSetting == AudioChannelsSetting::audioMatrix51) { - SPDLOG_INFO("Initializing sound matrix decoder for surround"); - mSoundMatrixDecoder = std::make_unique(mAudioSettings.SampleRate); + return true; } + // Raw 5.1 has no native 6-channel source in float mode (the float producer + // is stereo), so the stereo bus must be upmixed instead of passed through. + return mAudioSettings.ChannelSetting == AudioChannelsSetting::audioRaw51 && mAudioSettings.UseFloatPipeline; +} + +void AudioPlayer::EnsureMatrixDecoder() { + if (NeedsMatrixDecoder()) { + if (!mSoundMatrixDecoder) { + SPDLOG_INFO("Initializing sound matrix decoder for surround"); + mSoundMatrixDecoder = std::make_unique(mAudioSettings.SampleRate); + } + } else { + mSoundMatrixDecoder.reset(); + } +} + +bool AudioPlayer::Init() { + EnsureMatrixDecoder(); + RebuildResampler(); mInitialized = DoInit(); return IsInitialized(); } @@ -25,11 +65,21 @@ int32_t AudioPlayer::GetSampleRate() const { return mAudioSettings.SampleRate; } +int32_t AudioPlayer::GetSourceSampleRate() const { + return mAudioSettings.SourceSampleRate; +} + int32_t AudioPlayer::GetSampleLength() const { return mAudioSettings.SampleLength; } int32_t AudioPlayer::GetDesiredBuffered() const { + // Scale DesiredBuffered from source rate to output rate so callers + // (e.g. DoPlay fill threshold) work in output-rate frames consistently. + if (mAudioSettings.SourceSampleRate > 0 && mAudioSettings.SourceSampleRate != mAudioSettings.SampleRate) { + return (int32_t)((int64_t)mAudioSettings.DesiredBuffered * mAudioSettings.SampleRate / + mAudioSettings.SourceSampleRate); + } return mAudioSettings.DesiredBuffered; } @@ -41,6 +91,10 @@ void AudioPlayer::SetSampleRate(int32_t rate) { mAudioSettings.SampleRate = rate; } +void AudioPlayer::SetSourceSampleRate(int32_t rate) { + mAudioSettings.SourceSampleRate = rate; +} + void AudioPlayer::SetSampleLength(int32_t length) { mAudioSettings.SampleLength = length; } @@ -63,19 +117,53 @@ bool AudioPlayer::SetAudioChannels(AudioChannelsSetting channels) { // Update channel setting mAudioSettings.ChannelSetting = channels; - // Setup or teardown sound matrix decoder - if (channels == AudioChannelsSetting::audioMatrix51) { - if (!mSoundMatrixDecoder) { - mSoundMatrixDecoder = std::make_unique(mAudioSettings.SampleRate); - } - } else { - // When switching away from matrix mode, release the decoder - mSoundMatrixDecoder.reset(); - } + EnsureMatrixDecoder(); + // Channel-count change can affect the s16 legacy resampler (built at + // GetNumOutputChannels()); rebuild to pick that up. + RebuildResampler(); return DoInit(); } +bool AudioPlayer::SetMixSource(MixSource source) { + if (!mAudioSettings.UseFloatPipeline && source) { + SPDLOG_WARN("AudioPlayer::SetMixSource ignored — float pipeline is disabled"); + return false; + } + mMixSource = std::move(source); + return true; +} + +bool AudioPlayer::SetUseFloatPipeline(bool enabled) { + if (mAudioSettings.UseFloatPipeline == enabled) { + return true; + } + SPDLOG_INFO("AudioPlayer: switching pipeline mode {} → {}", + mAudioSettings.UseFloatPipeline ? "float HD" : "s16 legacy", + enabled ? "float HD" : "s16 legacy"); + DoClose(); + const bool oldMode = mAudioSettings.UseFloatPipeline; + mAudioSettings.UseFloatPipeline = enabled; + if (!enabled) { + // Dropping the float path also drops any installed mix source — the + // s16 mix happens upstream in the consumer. + mMixSource = nullptr; + } + // Raw 5.1 needs the decoder only in float mode, so the mode switch can + // change whether it is required. + EnsureMatrixDecoder(); + RebuildResampler(); + mInitialized = DoInit(); + if (!mInitialized) { + SPDLOG_ERROR("AudioPlayer: reinit failed at new mode, reverting"); + mAudioSettings.UseFloatPipeline = oldMode; + EnsureMatrixDecoder(); + RebuildResampler(); + mInitialized = DoInit(); + } + return mInitialized && mAudioSettings.UseFloatPipeline == enabled; +} + int32_t AudioPlayer::GetNumOutputChannels() const { switch (mAudioSettings.ChannelSetting) { case AudioChannelsSetting::audioMatrix51: @@ -88,21 +176,125 @@ int32_t AudioPlayer::GetNumOutputChannels() const { } void AudioPlayer::Play(const uint8_t* buf, size_t len) { - if (mAudioSettings.ChannelSetting != AudioChannelsSetting::audioMatrix51) { - // Stereo or Raw 5.1 passthrough - DoPlay(buf, len); + if (mAudioSettings.UseFloatPipeline) { + SPDLOG_WARN("AudioPlayer::Play(uint8_t*) called in float mode — dropping buffer"); return; } - if (!mSoundMatrixDecoder) { - SPDLOG_ERROR("AudioPlayer: Matrix 5.1 mode enabled but SoundMatrixDecoder is not initialized"); + // Legacy stages (unchanged from the pre-float-pipeline behaviour so + // existing libultraship consumers keep their byte-exact contract): + // 1. Surround decode if matrix-5.1 (stereo s16 → 6-channel s16). + // 2. Resample at the current channel count. + // 3. DoPlay. + + const uint8_t* pcm = buf; + size_t pcmLen = len; + + std::vector surroundBuf; + + if (mAudioSettings.ChannelSetting == AudioChannelsSetting::audioMatrix51) { + if (!mSoundMatrixDecoder) { + SPDLOG_ERROR("AudioPlayer: Matrix 5.1 mode enabled but SoundMatrixDecoder is not initialized"); + return; + } + const auto [surroundOut, surroundLen] = mSoundMatrixDecoder->Process(buf, len); + // Copy to local buffer so we own the memory through the resampler step + surroundBuf.assign(surroundOut, surroundOut + surroundLen); + pcm = surroundBuf.data(); + pcmLen = surroundLen; + } + + // Step 2: resample if source rate ≠ output rate + if (mResampler) { + const int ch = GetNumOutputChannels(); + const int32_t inFrames = static_cast(pcmLen / (sizeof(int16_t) * ch)); + const int32_t maxOut = mResampler->MaxOutputFrames(inFrames); + + assert(static_cast(maxOut * ch) <= kResampleBufSamples && + "Resample output exceeds kResampleBufSamples — increase the buffer size"); + + const int32_t outFrames = mResampler->Process(reinterpret_cast(pcm), inFrames, + mResampleBufS16.data(), maxOut); + DoPlay(reinterpret_cast(mResampleBufS16.data()), + static_cast(outFrames * ch * sizeof(int16_t))); return; } - // Decode stereo to surround using sound matrix decoder - const auto [surroundOut, surroundLen] = mSoundMatrixDecoder->Process(buf, len); + // Step 3: passthrough (no resampling needed) + DoPlay(pcm, pcmLen); +} + +void AudioPlayer::Play(const float* buf, size_t frames) { + if (!mAudioSettings.UseFloatPipeline) { + SPDLOG_WARN("AudioPlayer::Play(float*) called in s16 mode — dropping buffer"); + return; + } + + // Float audio pipeline stages: + // 1. Resample the primary stereo input to the device's output rate. + // 2. Mix in the optional secondary stereo source (FluidSynth) at the output + // rate with a tanh soft-clip, bypassing the resampler. + // 3. Surround-decode stereo -> 5.1 if in matrix-5.1 mode. + // 4. DoPlay the interleaved float buffer. + + // ── Stage 1: resample stereo to output rate ─────────────────────────── + const float* stereoOutRate = buf; + int32_t outFrames = static_cast(frames); + if (mResampler) { + const int32_t inFrames = static_cast(frames); + const int32_t maxOut = mResampler->MaxOutputFrames(inFrames); + + assert(static_cast(maxOut * 2) <= kResampleBufSamples && + "Resample output exceeds kResampleBufSamples — increase the buffer size"); + + outFrames = mResampler->Process(buf, inFrames, mResampleBuf.data(), maxOut); + stereoOutRate = mResampleBuf.data(); + } + + // ── Stage 2: mix the secondary source (post-resampler, output rate) ── + // We always write the mixed result back into mResampleBuf so the same + // pointer feeds the surround decode below. + if (mMixSource) { + assert(static_cast(outFrames * 2) <= kResampleBufSamples && + "Mix output exceeds kResampleBufSamples — increase the buffer size"); + mMixSource(mMixSourceBuf.data(), outFrames); + + // Tanh approximation used to soft-clip the secondary-source mix, so dynamics + // stay well-behaved when the synth contributes a peaky signal. + auto SoftClipTanhApprox = [](float x) -> float { + const float x2 = x * x; + return x * (27.0f + x2) / (27.0f + 9.0f * x2); + }; + + for (int32_t i = 0; i < outFrames * 2; i++) { + mResampleBuf[i] = SoftClipTanhApprox(stereoOutRate[i] + mMixSourceBuf[i]); + } + stereoOutRate = mResampleBuf.data(); + } else if (stereoOutRate != mResampleBuf.data()) { + // No mix and no resample: stereoOutRate still points at the caller's + // buffer. The surround decode below copies through its own buffer, + // and the stereo passthrough at the end works off whatever + // stereoOutRate points to, so no copy is needed here. + } + + // ── Stage 3: surround decode (stereo → 5.1) ────────────────────────── + // The float source is always stereo, so any 6-channel output must be upmixed + // here — Matrix 5.1 and Raw 5.1 alike. The synth was summed into the stereo + // bus above, so it reaches every surround channel through the decoder too. + if (GetNumOutputChannels() == 6) { + if (!mSoundMatrixDecoder) { + SPDLOG_ERROR("AudioPlayer: 5.1 output but SoundMatrixDecoder is not initialized"); + return; + } + const auto [surroundOut, surroundFrames] = + mSoundMatrixDecoder->Process(stereoOutRate, static_cast(outFrames)); + DoPlay(reinterpret_cast(surroundOut), + static_cast(surroundFrames) * 6 * sizeof(float)); + return; + } - // Play the audio - DoPlay(surroundOut, surroundLen); + // ── Stage 4: stereo passthrough ────────────────────────────────────── + DoPlay(reinterpret_cast(stereoOutRate), + static_cast(outFrames) * 2 * sizeof(float)); } } // namespace Ship diff --git a/src/ship/audio/AudioResampler.cpp b/src/ship/audio/AudioResampler.cpp new file mode 100644 index 000000000..e1a7c7f26 --- /dev/null +++ b/src/ship/audio/AudioResampler.cpp @@ -0,0 +1,229 @@ +#include "ship/audio/AudioResampler.h" + +#include +#include +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +namespace Ship { + +// --------------------------------------------------------------------------- +// Construction +// --------------------------------------------------------------------------- + +AudioResampler::AudioResampler(int32_t inRate, int32_t outRate, int32_t numChannels) + : mInRate(inRate), mOutRate(outRate), mNumChannels(numChannels), mPhase(0) { + + int32_t g = GCD(inRate, outRate); + mP = outRate / g; /* upsample factor (e.g. 3 for 32k→48k) */ + mQ = inRate / g; /* downsample factor (e.g. 2 for 32k→48k) */ + mNumPhases = mP; + + BuildFilter(); + + /* History: kTapsPerPhase-1 past frames per channel, zero-initialized + * so the first output frames fade in cleanly from silence. */ + mHistory.assign((kTapsPerPhase - 1) * mNumChannels, 0.0f); +} + +// --------------------------------------------------------------------------- +// Filter construction — windowed-sinc lowpass, polyphase decomposition +// --------------------------------------------------------------------------- + +float AudioResampler::BesselI0(float x) { + /* Modified Bessel function of the first kind, order 0. + * Used for Kaiser window computation. + * Series expansion — converges well for x < 20 (beta up to ~14). */ + float sum = 1.0f; + float term = 1.0f; + float half_x = x * 0.5f; + for (int k = 1; k <= 30; k++) { + term *= (half_x / (float)k); + term *= (half_x / (float)k); + sum += term; + if (term < 1e-12f * sum) + break; + } + return sum; +} + +float AudioResampler::KaiserWindow(int n, int N, float beta) { + /* Kaiser window of length N+1, sample n in [0, N]. + * beta=6 gives ~60 dB stopband attenuation — good balance for audio. */ + float r = 2.0f * (float)n / (float)N - 1.0f; /* normalise to [-1, 1] */ + float inside = 1.0f - r * r; + if (inside < 0.0f) + inside = 0.0f; + return BesselI0(beta * sqrtf(inside)) / BesselI0(beta); +} + +float AudioResampler::Sinc(float x) { + if (fabsf(x) < 1e-8f) + return 1.0f; + float px = (float)M_PI * x; + return sinf(px) / px; +} + +void AudioResampler::BuildFilter() { + /* Total filter length: P * kTapsPerPhase taps. + * Cutoff at fc = 0.5 / max(P, Q) in normalised frequency (relative to + * the upsampled rate P*inRate = P*outRate/Q). For 32k→48k: P=3, Q=2, + * fc = 0.5/3 ≈ 0.167. */ + const int totalTaps = mNumPhases * kTapsPerPhase; + const float fc = 0.5f / (float)std::max(mP, mQ); + const float beta = 6.0f; + const int N = totalTaps - 1; + + std::vector h(totalTaps); + + /* Windowed sinc prototype filter */ + for (int i = 0; i < totalTaps; i++) { + float x = (float)i - (float)N * 0.5f; + h[i] = 2.0f * fc * Sinc(2.0f * fc * x) * KaiserWindow(i, N, beta); + } + + /* Polyphase decomposition: interleave into mNumPhases banks. + * Phase p contains taps h[p], h[p+P], h[p+2P], ... + * Normalise by P so energy is preserved after upsampling. */ + mCoeffs.resize(mNumPhases * kTapsPerPhase); + for (int phase = 0; phase < mNumPhases; phase++) { + for (int tap = 0; tap < kTapsPerPhase; tap++) { + mCoeffs[phase * kTapsPerPhase + tap] = h[phase + tap * mNumPhases] * (float)mP; + } + } +} + +// --------------------------------------------------------------------------- +// Reset +// --------------------------------------------------------------------------- + +void AudioResampler::Reset() { + std::fill(mHistory.begin(), mHistory.end(), 0.0f); + mPhase = 0; +} + +// --------------------------------------------------------------------------- +// MaxOutputFrames +// --------------------------------------------------------------------------- + +int32_t AudioResampler::MaxOutputFrames(int32_t inFrames) const { + /* ceil((inFrames * P) / Q) */ + return (int32_t)(((int64_t)inFrames * mP + mQ - 1) / mQ); +} + +// --------------------------------------------------------------------------- +// Process: the core resampling loop. +// +// Conceptually upsample by P (insert P-1 zeros between input samples), lowpass +// filter, then downsample by Q. The polyphase decomposition does this without the +// zero-padded samples: advance through phases, advancing the input pointer only +// after completing Q phases. Per output sample: apply filter bank[mPhase] to the +// last kTapsPerPhase input frames, then mPhase += Q; if mPhase >= P, subtract P +// and advance input by 1. +// --------------------------------------------------------------------------- + +int32_t AudioResampler::Process(const float* inBuf, int32_t inFrames, float* outBuf, int32_t maxOutFrames) { + const int histLen = kTapsPerPhase - 1; + const int ch = mNumChannels; + + /* Build a contiguous float window: history + new input. + * history holds the last (kTapsPerPhase-1) input frames as float. */ + const int windowFrames = histLen + inFrames; + std::vector window(windowFrames * ch); + + /* Copy history */ + for (int i = 0; i < histLen * ch; i++) { + window[i] = mHistory[i]; + } + + /* Append new input frames verbatim — samples are already float in the + * nominal [-1, 1] range so no conversion or normalisation is needed. */ + for (int i = 0; i < inFrames * ch; i++) { + window[histLen * ch + i] = inBuf[i]; + } + + /* Resample */ + int32_t outFrames = 0; + int32_t inPos = 0; /* current input frame position in window[] */ + int32_t phase = mPhase; + + while (inPos + kTapsPerPhase <= windowFrames && outFrames < maxOutFrames) { + const float* coeffs = &mCoeffs[phase * kTapsPerPhase]; + + for (int c = 0; c < ch; c++) { + float acc = 0.0f; + for (int tap = 0; tap < kTapsPerPhase; tap++) { + acc += window[(inPos + tap) * ch + c] * coeffs[tap]; + } + /* Pass through float as-is. Soft-clip happens upstream + * (OTRAudio_Thread's mix step); the polyphase filter is + * unity-gain so brief excursions slightly above 1.0 are fine. */ + outBuf[outFrames * ch + c] = acc; + } + outFrames++; + + /* Advance phase by Q; when phase wraps, consume one input frame */ + phase += mQ; + if (phase >= mP) { + phase -= mP; + inPos++; + } + } + + /* Save tail of window as new history */ + const int consumed = inPos; /* input frames consumed from window */ + const int remaining = histLen - (inFrames - consumed); + + if (inFrames >= histLen) { + /* Enough new input to fill history entirely from inBuf */ + for (int i = 0; i < histLen * ch; i++) { + mHistory[i] = window[(windowFrames - histLen) * ch + i]; + } + } else { + /* Partial update: shift old history and append new input */ + const int keep = histLen - inFrames; + for (int i = 0; i < keep * ch; i++) { + mHistory[i] = mHistory[inFrames * ch + i]; + } + for (int i = 0; i < inFrames * ch; i++) { + mHistory[keep * ch + i] = window[histLen * ch + i]; + } + } + + mPhase = phase; + return outFrames; +} + +// --------------------------------------------------------------------------- +// s16 overload: wraps the float core with conversions at the boundaries. +// --------------------------------------------------------------------------- + +int32_t AudioResampler::Process(const int16_t* inBuf, int32_t inFrames, int16_t* outBuf, int32_t maxOutFrames) { + const int ch = mNumChannels; + const int totalIn = inFrames * ch; + const int totalOut = maxOutFrames * ch; + + std::vector inF(totalIn); + std::vector outF(totalOut); + + constexpr float kS16ToFloat = 1.0f / 32768.0f; + for (int i = 0; i < totalIn; i++) { + inF[i] = static_cast(inBuf[i]) * kS16ToFloat; + } + + const int32_t outFrames = Process(inF.data(), inFrames, outF.data(), maxOutFrames); + + const int outSamples = outFrames * ch; + for (int i = 0; i < outSamples; i++) { + float v = outF[i] * 32767.0f; + if (v > 32767.0f) v = 32767.0f; + if (v < -32768.0f) v = -32768.0f; + outBuf[i] = static_cast(v); + } + return outFrames; +} + +} // namespace Ship diff --git a/src/ship/audio/CoreAudioAudioPlayer.cpp b/src/ship/audio/CoreAudioAudioPlayer.cpp index 489273b64..14380e775 100644 --- a/src/ship/audio/CoreAudioAudioPlayer.cpp +++ b/src/ship/audio/CoreAudioAudioPlayer.cpp @@ -34,7 +34,10 @@ bool CoreAudioAudioPlayer::DoInit() { mNumChannels = this->GetAudioChannels() == AudioChannelsSetting::audioStereo ? 2 : 6; - const size_t bytesPerSample = sizeof(int16_t); + // Sample width follows the pipeline mode: float HD = 32-bit float, + // legacy = 16-bit signed integer. + const bool useFloat = this->IsUsingFloatPipeline(); + const size_t bytesPerSample = useFloat ? sizeof(float) : sizeof(int16_t); const size_t bytesPerFrame = bytesPerSample * mNumChannels; mRingBufferSize = 6000 * bytesPerFrame; @@ -72,12 +75,14 @@ bool CoreAudioAudioPlayer::DoInit() { AudioStreamBasicDescription format; format.mSampleRate = this->GetSampleRate(); format.mFormatID = kAudioFormatLinearPCM; - format.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; + format.mFormatFlags = useFloat + ? (kLinearPCMFormatFlagIsFloat | kLinearPCMFormatFlagIsPacked) + : (kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked); format.mBytesPerPacket = bytesPerFrame; format.mFramesPerPacket = 1; format.mBytesPerFrame = bytesPerFrame; format.mChannelsPerFrame = mNumChannels; - format.mBitsPerChannel = 16; + format.mBitsPerChannel = useFloat ? 32 : 16; status = AudioUnitSetProperty(mAudioUnit, kAudioUnitProperty_StreamFormat, kAudioUnitScope_Input, 0, &format, sizeof(format)); @@ -123,7 +128,7 @@ int CoreAudioAudioPlayer::Buffered() { buffered = mRingBufferSize - (mRingBufferReadPos - mRingBufferWritePos); } - const size_t bytesPerFrame = sizeof(int16_t) * mNumChannels; + const size_t bytesPerFrame = (this->IsUsingFloatPipeline() ? sizeof(float) : sizeof(int16_t)) * mNumChannels; int samples = buffered / bytesPerFrame; pthread_mutex_unlock(&mMutex); @@ -133,7 +138,7 @@ int CoreAudioAudioPlayer::Buffered() { void CoreAudioAudioPlayer::DoPlay(const uint8_t* buf, size_t len) { pthread_mutex_lock(&mMutex); - const size_t bytesPerFrame = sizeof(int16_t) * mNumChannels; + const size_t bytesPerFrame = (this->IsUsingFloatPipeline() ? sizeof(float) : sizeof(int16_t)) * mNumChannels; const size_t maxBuffered = 6000 * bytesPerFrame; size_t available; diff --git a/src/ship/audio/FluidSynth.cpp b/src/ship/audio/FluidSynth.cpp new file mode 100644 index 000000000..7ca31bc60 --- /dev/null +++ b/src/ship/audio/FluidSynth.cpp @@ -0,0 +1,534 @@ +#if ENABLE_FLUIDSYNTH +#include "ship/audio/FluidSynth.h" +#include +#include +#include +#include + +namespace Ship { + +namespace { +// ---------------------------------------------------------------------- +// Memory-backed SF2 loader. +// +// FluidSynth tries its sound-font loaders in order against the path passed to +// fluid_synth_sfload(). The default loader handles filesystem paths; we register +// one that responds to the sentinel "mem://current" and ignores everything else, +// so path- and memory-based loads coexist. The open callback has no user-data slot +// (only a filename), so the in-flight buffer pointer passes through a single static +// slot; AddSoundFontFromMemory runs only on the GUI thread under the synth mutex, +// so that slot is safe even with multiple SF2s loaded. +// ---------------------------------------------------------------------- + +struct MemoryInflight { + const uint8_t* data = nullptr; + size_t size = 0; +}; +static MemoryInflight sMemoryInflight; + +struct MemoryHandle { + const uint8_t* data; + size_t size; + size_t pos; +}; + +constexpr const char* kMemorySentinel = "mem://current"; + +void* MemoryOpen(const char* filename) { + if (filename == nullptr || std::strcmp(filename, kMemorySentinel) != 0) { + return nullptr; + } + if (sMemoryInflight.data == nullptr || sMemoryInflight.size == 0) { + return nullptr; + } + auto* h = new MemoryHandle{ sMemoryInflight.data, sMemoryInflight.size, 0 }; + // Single-shot: clear the slot so a stray repeat sfload can't replay. + sMemoryInflight = {}; + return h; +} + +int MemoryRead(void* buf, fluid_long_long_t count, void* handle) { + auto* h = static_cast(handle); + if (count < 0 || static_cast(count) > h->size - h->pos) { + return FLUID_FAILED; + } + std::memcpy(buf, h->data + h->pos, static_cast(count)); + h->pos += static_cast(count); + return FLUID_OK; +} + +int MemorySeek(void* handle, fluid_long_long_t offset, int origin) { + auto* h = static_cast(handle); + fluid_long_long_t newPos; + switch (origin) { + case SEEK_SET: + newPos = offset; + break; + case SEEK_CUR: + newPos = static_cast(h->pos) + offset; + break; + case SEEK_END: + newPos = static_cast(h->size) + offset; + break; + default: + return FLUID_FAILED; + } + if (newPos < 0 || static_cast(newPos) > h->size) { + return FLUID_FAILED; + } + h->pos = static_cast(newPos); + return FLUID_OK; +} + +fluid_long_long_t MemoryTell(void* handle) { + return static_cast(static_cast(handle)->pos); +} + +int MemoryClose(void* handle) { + delete static_cast(handle); + return FLUID_OK; +} + +// ---------------------------------------------------------------------- +// Route FluidSynth's own log output into the Ship logger. +// +// FluidSynth otherwise writes straight to stderr, bypassing our log sinks +// and level filtering. We forward each message at the matching spdlog level. +// ---------------------------------------------------------------------- +fluid_log_function_t FluidLogToShip(int level) { + switch (level) { + case FLUID_PANIC: + return [](int, const char* message, void*) { SPDLOG_CRITICAL("[FluidSynth] {}", message); }; + case FLUID_ERR: + return [](int, const char* message, void*) { SPDLOG_ERROR("[FluidSynth] {}", message); }; + case FLUID_WARN: + return [](int, const char* message, void*) { SPDLOG_WARN("[FluidSynth] {}", message); }; + case FLUID_INFO: + return [](int, const char* message, void*) { SPDLOG_INFO("[FluidSynth] {}", message); }; + case FLUID_DBG: + return [](int, const char* message, void*) { SPDLOG_DEBUG("[FluidSynth] {}", message); }; + default: + return [](int, const char* message, void*) { SPDLOG_INFO("[FluidSynth] {}", message); }; + } +} +} // namespace + +FluidSynth::FluidSynth(const FluidSynthConfig& config) + : mSampleRate(config.sampleRate), mLinearVelocity(config.linearVelocity) { + + static std::once_flag once; + std::call_once(once, [] { + // Not using any audio driver for fluidsynth, we pull samples via + // Render() ourselves. "file" is not used, but registering only it to + // avoid a warning when trying to load unavailable drivers such as SDL3. + const char* allowed_drivers[] = { "file", nullptr }; + fluid_audio_driver_register(allowed_drivers); + + // Redirect fluidsynth logs to SPDLOG at equivalent level + for (int level = 0; level < fluid_log_level::LAST_LOG_LEVEL; ++level) { + fluid_set_log_function(level, FluidLogToShip(level), nullptr); + } + }); + + mSettings = new_fluid_settings(); + // Sample rate MUST be set before new_fluid_synth — the synth reads it + // once at construction. + fluid_settings_setnum(mSettings, "synth.sample-rate", config.sampleRate); + // 64 channels = enough headroom for the per-pair channel allocator in + // MidiTranslator to give each (fontId, instOrWave) pair its own MIDI + // channel, so per-pair effect CCs (CC91/93/74/71) don't stomp each + // other. Must be a multiple of 16. + fluid_settings_setint(mSettings, "synth.midi-channels", kNumChannels); + // "none" = no internal audio driver; we pull samples via Render() ourselves. + // "file" is an offline render-to-disk mode and must NOT be used here. + fluid_settings_setstr(mSettings, "audio.driver", "none"); + + // Master gain. Stock FluidSynth is 0.2. + fluid_settings_setnum(mSettings, "synth.gain", config.gain); + + // Polyphony (max simultaneous voices). Stock FluidSynth is 256; the integrating + // game sizes this for its workload (see FluidSynthConfig::polyphony). Undersizing + // drops notes. FluidSynth frees each voice when its sample/envelope completes (no + // leak) and idle voices are cheap, so a generous ceiling is fine -- e.g. when a + // game layers a full melodic mapping plus voice-holding one-shot percussion. + fluid_settings_setint(mSettings, "synth.polyphony", config.polyphony); + + mSynth = new_fluid_synth(mSettings); + if (!mSynth) { + SPDLOG_ERROR("[FluidSynth] Failed to create synth"); + return; + } + + // Verify the sample rate FluidSynth actually locked in. + double actualRate = 0.0; + fluid_settings_getnum(mSettings, "synth.sample-rate", &actualRate); + SPDLOG_INFO("[FluidSynth] Synth created. Requested sample rate={} actual={} linearVelocity={} " + "polyphony={} gain={}", + config.sampleRate, actualRate, mLinearVelocity, config.polyphony, config.gain); + + if (mLinearVelocity) { + InstallLinearVelocityModulators(); + } + + // Register the memory-backed sound-font loader alongside the default + // filesystem loader. Loaders are tried in addition order: default + // catches real filesystem paths, ours catches the mem:// sentinel. + // FluidSynth takes ownership of the loader and frees it via + // delete_fluid_synth. + fluid_sfloader_t* memLoader = new_fluid_defsfloader(mSettings); + if (memLoader) { + fluid_sfloader_set_callbacks(memLoader, MemoryOpen, MemoryRead, MemorySeek, MemoryTell, MemoryClose); + fluid_synth_add_sfloader(mSynth, memLoader); + } else { + SPDLOG_WARN("[FluidSynth] Memory sound-font loader unavailable; " + "LoadSoundFontFromMemory will fall back to default loader"); + } +} + +void FluidSynth::InstallLinearVelocityModulators() { + // Approach adapted from ANMP (GPL-2, github.com/derselbst/ANMP): replace the + // SF2 spec's default velocity / CC7 / CC11 -> initial-attenuation modulators + // with versions that keep the concave NEGATIVE shape but halve the amount + // (960 -> 480 cB), pulling maximum attenuation from -96 dB to -48 dB. This + // lifts quiet voices without flattening dynamics. CC11 stays active because + // the translator drives loudness dynamics through it. + // + // Use remove_default_mod + add_default_mod rather than add(... OVERWRITE), + // which only replaces when every source flag matches exactly. Must run after + // new_fluid_synth() but before any LoadSoundFont(): SF2 instrument-level + // modulators layer on top of these defaults at load time. + + fluid_mod_t* mod = new_fluid_mod(); + if (!mod) { + SPDLOG_ERROR("[FluidSynth] new_fluid_mod() failed; velocity modulators disabled"); + return; + } + + constexpr int kHalfAttenuationCentibels = 480; // = 960 / 2 + + fluid_mod_set_source2(mod, FLUID_MOD_NONE, 0); + fluid_mod_set_dest(mod, GEN_ATTENUATION); + fluid_mod_set_amount(mod, kHalfAttenuationCentibels); + + // 1. NoteOn velocity → initial attenuation (concave, halved). + fluid_mod_set_source1(mod, FLUID_MOD_VELOCITY, + FLUID_MOD_GC | FLUID_MOD_CONCAVE | FLUID_MOD_UNIPOLAR | FLUID_MOD_NEGATIVE); + fluid_synth_remove_default_mod(mSynth, mod); + fluid_synth_add_default_mod(mSynth, mod, FLUID_SYNTH_OVERWRITE); + + // 2. CC7 (channel volume) → initial attenuation (concave, halved). + fluid_mod_set_source1(mod, 7, FLUID_MOD_CC | FLUID_MOD_CONCAVE | FLUID_MOD_UNIPOLAR | FLUID_MOD_NEGATIVE); + fluid_synth_remove_default_mod(mSynth, mod); + fluid_synth_add_default_mod(mSynth, mod, FLUID_SYNTH_OVERWRITE); + + // 3. CC11 (expression) → initial attenuation (concave, halved). + fluid_mod_set_source1(mod, 11, FLUID_MOD_CC | FLUID_MOD_CONCAVE | FLUID_MOD_UNIPOLAR | FLUID_MOD_NEGATIVE); + fluid_synth_remove_default_mod(mSynth, mod); + fluid_synth_add_default_mod(mSynth, mod, FLUID_SYNTH_OVERWRITE); + + delete_fluid_mod(mod); + + SPDLOG_INFO("[FluidSynth] velocity modulators installed (vel/CC7/CC11 concave x 0.5)"); +} + +FluidSynth::~FluidSynth() { + if (mSynth) + delete_fluid_synth(mSynth); + if (mSettings) + delete_fluid_settings(mSettings); +} + +void FluidSynth::ClearSoundFonts() { + std::lock_guard lock(mSynthMutex); + if (!mSynth) { + mSfontIds.clear(); + mLoadedBuffers.clear(); + return; + } + for (int id : mSfontIds) { + if (id != FLUID_FAILED) + fluid_synth_sfunload(mSynth, id, /*reset_presets=*/1); + } + mSfontIds.clear(); + mLoadedBuffers.clear(); + mLoadedBuffers.shrink_to_fit(); + // reset_presets above cleared channel state inside the synth, so the + // RPN-0 (pitch bend range) push needs to repeat on the next NoteOn. + for (bool& inited : mChannelInited) + inited = false; +} + +int FluidSynth::AddSoundFont(const std::string& path) { + std::lock_guard lock(mSynthMutex); + if (!mSynth) + return FLUID_FAILED; + // reset_presets only on the FIRST sfont — for subsequent loads we + // want preset assignments on existing channels left alone so a + // stacked pack doesn't blow away the prior pack's program selection. + int resetPresets = mSfontIds.empty() ? 1 : 0; + int id = fluid_synth_sfload(mSynth, path.c_str(), resetPresets); + if (id == FLUID_FAILED) { + SPDLOG_ERROR("[FluidSynth] Failed to load SF2: {}", path); + return FLUID_FAILED; + } + SPDLOG_INFO("[FluidSynth] Loaded SF2: {} (id={})", path, id); + mSfontIds.push_back(id); + mLoadedBuffers.emplace_back(); // empty — filesystem load owns its own data + if (resetPresets) { + for (bool& inited : mChannelInited) + inited = false; + } + return id; +} + +int FluidSynth::AddSoundFontFromMemory(const uint8_t* data, size_t size) { + std::lock_guard lock(mSynthMutex); + if (!mSynth || data == nullptr || size == 0) + return FLUID_FAILED; + + // Pre-reserve the slot so we can hand its address through the static + // sMemoryInflight pointer for the duration of sfload. Vector growth + // is fine here because the SF2 buffer lives in the vector element, + // which is itself a vector (small, by-value relocations + // don't invalidate the underlying heap-allocated data). + mLoadedBuffers.emplace_back(data, data + size); + auto& buf = mLoadedBuffers.back(); + sMemoryInflight = { buf.data(), buf.size() }; + int resetPresets = mSfontIds.empty() ? 1 : 0; + int id = fluid_synth_sfload(mSynth, kMemorySentinel, resetPresets); + sMemoryInflight = {}; + if (id == FLUID_FAILED) { + SPDLOG_ERROR("[FluidSynth] Failed to load SF2 from memory ({} bytes)", size); + mLoadedBuffers.pop_back(); + return FLUID_FAILED; + } + SPDLOG_INFO("[FluidSynth] Loaded SF2 from memory ({} bytes, id={})", size, id); + mSfontIds.push_back(id); + if (resetPresets) { + for (bool& inited : mChannelInited) + inited = false; + } + return id; +} + +std::vector FluidSynth::GetLoadedSfontIds() { + std::lock_guard lock(mSynthMutex); + return mSfontIds; +} + +std::vector FluidSynth::EnumerateLoadedPresets() { + std::lock_guard lock(mSynthMutex); + std::vector result; + if (!mSynth) + return result; + for (int id : mSfontIds) { + if (id == FLUID_FAILED) + continue; + fluid_sfont_t* sfont = fluid_synth_get_sfont_by_id(mSynth, id); + if (!sfont) + continue; + fluid_sfont_iteration_start(sfont); + while (fluid_preset_t* preset = fluid_sfont_iteration_next(sfont)) { + LoadedPreset p; + p.sfontId = id; + p.bank = fluid_preset_get_banknum(preset); + p.program = fluid_preset_get_num(preset); + const char* nm = fluid_preset_get_name(preset); + p.name = nm ? nm : ""; + result.push_back(std::move(p)); + } + } + return result; +} + +void FluidSynth::LoadSoundFont(const std::string& path) { + ClearSoundFonts(); + AddSoundFont(path); +} + +void FluidSynth::LoadSoundFontFromMemory(const uint8_t* data, size_t size) { + ClearSoundFonts(); + AddSoundFontFromMemory(data, size); +} + +void FluidSynth::InitChannel(uint8_t channel) { + if (mChannelInited[channel]) + return; + mChannelInited[channel] = true; + + int ch = static_cast(channel); + + // Set pitch-bend range via the dedicated API. The MIDI-spec equivalent (CC + // 101/100/6/38 RPN sequence) has subtle behavior differences across FluidSynth + // versions; the direct semitone setter avoids the ambiguity. + fluid_synth_pitch_wheel_sens(mSynth, ch, static_cast(kPitchBendRangeSemitones)); + + // fluid_synth_set_gen() applies an additive (NRPN-style) offset on top of the + // SF2 zone value rather than overriding it, and the absolute sibling set_gen2() + // isn't in the 2.5.2 public API. So baked LFO-to-pitch can't be silenced + // channel-wide; it's patched per-voice on NoteOn (common case) or at SF2 load + // time. See NoteOn(). +} + +void FluidSynth::NoteOn(uint8_t channel, uint8_t note, uint8_t velocity) { + std::lock_guard lock(mSynthMutex); + if (!mSynth) + return; + InitChannel(channel); + int result = fluid_synth_noteon(mSynth, channel, note, velocity); + SPDLOG_TRACE("[FluidSynth] NoteOn ch={} note={} vel={} sfonts={} result={}", channel, note, velocity, + mSfontIds.size(), result); + + // Suppress SF2-author-baked LFO-to-pitch on the voices we just started. + // fluid_voice_gen_set() writes the generator's `val` field directly (the SF2 + // zone value), and final = val + mod + nrpn, so zeroing val drops the SF2's + // contribution. Per-voice patching is the only public path that works, since + // the channel-wide set_gen is additive and set_gen2 isn't in the public API. + fluid_voice_t* voices[256]; + fluid_synth_get_voicelist(mSynth, voices, 256, -1); + for (int i = 0; i < 256 && voices[i] != nullptr; ++i) { + if (fluid_voice_get_channel(voices[i]) != channel) + continue; + if (!fluid_voice_is_playing(voices[i])) + continue; + fluid_voice_gen_set(voices[i], GEN_VIBLFOTOPITCH, 0.0f); + fluid_voice_gen_set(voices[i], GEN_MODLFOTOPITCH, 0.0f); + fluid_voice_update_param(voices[i], GEN_VIBLFOTOPITCH); + fluid_voice_update_param(voices[i], GEN_MODLFOTOPITCH); + } +} + +void FluidSynth::NoteOff(uint8_t channel, uint8_t note) { + std::lock_guard lock(mSynthMutex); + if (!mSynth) + return; + fluid_synth_noteoff(mSynth, channel, note); +} + +void FluidSynth::ProgramChange(uint8_t channel, uint16_t preset) { + std::lock_guard lock(mSynthMutex); + if (!mSynth) + return; + InitChannel(channel); + + int bank = (preset >> 8) & 0xFF; + int program = preset & 0xFF; + + SPDLOG_TRACE("[FluidSynth] ProgramChange ch={} bank={} program={}", channel, bank, program); + + if (bank == 128) { + fluid_synth_set_channel_type(mSynth, channel, CHANNEL_TYPE_DRUM); + fluid_synth_bank_select(mSynth, channel, 128); + } else { + fluid_synth_set_channel_type(mSynth, channel, CHANNEL_TYPE_MELODIC); + fluid_synth_bank_select(mSynth, channel, bank); + } + + fluid_synth_program_change(mSynth, channel, program); +} + +bool FluidSynth::ProgramSelect(uint8_t channel, int sfontId, uint16_t bank, uint16_t program) { + std::lock_guard lock(mSynthMutex); + if (!mSynth) + return false; + InitChannel(channel); + + // Verify the sfontId is one we loaded — fluid_synth_program_select + // would also reject an unknown id but its log goes through + // FluidSynth's own logger rather than ours; pre-check so we can + // emit our SPDLOG path uniformly. + bool known = false; + for (int id : mSfontIds) { + if (id == sfontId) { + known = true; + break; + } + } + if (!known) { + SPDLOG_TRACE("[FluidSynth] ProgramSelect ch={} sfontId={} not loaded; rejecting pin", channel, sfontId); + return false; + } + + // Set drum/melodic type before the select — bank 128 is the GM + // percussion convention and FluidSynth's voice allocator branches + // on channel type, not on the bank we're selecting into. + if (bank == 128) { + fluid_synth_set_channel_type(mSynth, channel, CHANNEL_TYPE_DRUM); + } else { + fluid_synth_set_channel_type(mSynth, channel, CHANNEL_TYPE_MELODIC); + } + + int result = fluid_synth_program_select(mSynth, channel, static_cast(sfontId), + static_cast(bank), static_cast(program)); + if (result != FLUID_OK) { + SPDLOG_TRACE("[FluidSynth] ProgramSelect ch={} sfontId={} bank={} prog={} -> FAILED", channel, sfontId, bank, + program); + return false; + } + SPDLOG_TRACE("[FluidSynth] ProgramSelect ch={} sfontId={} bank={} prog={} -> OK", channel, sfontId, bank, program); + return true; +} + +void FluidSynth::PitchBend(uint8_t channel, float semitones) { + std::lock_guard lock(mSynthMutex); + if (!mSynth) + return; + float ratio = semitones / kPitchBendRangeSemitones; + int val = static_cast(ratio * 8192.0f) + 8192; + val = std::clamp(val, 0, 16383); + fluid_synth_pitch_bend(mSynth, channel, val); +} + +void FluidSynth::ControlChange(uint8_t channel, uint8_t cc, uint16_t value) { + std::lock_guard lock(mSynthMutex); + if (!mSynth) + return; + fluid_synth_cc(mSynth, channel, cc, (value >> 7) & 0x7F); +} + +void FluidSynth::SetReverbParams(double roomsize, double damping, double width, double level) { + std::lock_guard lock(mSynthMutex); + if (!mSynth) + return; + fluid_synth_set_reverb_group_roomsize(mSynth, -1, roomsize); + fluid_synth_set_reverb_group_damp(mSynth, -1, damping); + fluid_synth_set_reverb_group_width(mSynth, -1, width); + fluid_synth_set_reverb_group_level(mSynth, -1, level); + SPDLOG_INFO("[FluidSynth] Reverb set: roomsize={} damping={} width={} level={}", roomsize, damping, width, level); +} + +void FluidSynth::SetMasterGain(float gain) { + std::lock_guard lock(mSynthMutex); + if (!mSynth) + return; + fluid_synth_set_gain(mSynth, gain); +} + +void FluidSynth::Render(float* out, uint32_t frameCount) { + std::lock_guard lock(mSynthMutex); + if (!mSynth || mSfontIds.empty()) { + std::memset(out, 0, frameCount * 2 * sizeof(float)); + return; + } + + fluid_synth_write_float(mSynth, static_cast(frameCount), out, 0, 2, out, 1, 2); +} + +uint32_t FluidSynth::GetActiveVoiceCount() const { + std::lock_guard lock(mSynthMutex); + if (!mSynth) + return 0; + int n = fluid_synth_get_active_voice_count(mSynth); + return n < 0 ? 0u : static_cast(n); +} + +uint32_t FluidSynth::GetPolyphonyLimit() const { + std::lock_guard lock(mSynthMutex); + if (!mSynth) + return 0; + int n = fluid_synth_get_polyphony(mSynth); + return n < 0 ? 0u : static_cast(n); +} + +} // namespace Ship +#endif // ENABLE_FLUIDSYNTH diff --git a/src/ship/audio/MidiSynthManager.cpp b/src/ship/audio/MidiSynthManager.cpp new file mode 100644 index 000000000..37ba49d85 --- /dev/null +++ b/src/ship/audio/MidiSynthManager.cpp @@ -0,0 +1,20 @@ +#include "ship/audio/MidiSynthManager.h" + +namespace Ship { + +MidiSynthManager& MidiSynthManager::Instance() { + static MidiSynthManager sInstance; + return sInstance; +} + +void MidiSynthManager::SetSynth(std::shared_ptr synth) { + std::lock_guard lock(mMutex); + mSynth = std::move(synth); +} + +std::shared_ptr MidiSynthManager::GetActiveSynth() { + std::lock_guard lock(mMutex); + return mSynth; +} + +} // namespace Ship diff --git a/src/ship/audio/SDLAudioPlayer.cpp b/src/ship/audio/SDLAudioPlayer.cpp index d9f1e44d6..0f7d62711 100644 --- a/src/ship/audio/SDLAudioPlayer.cpp +++ b/src/ship/audio/SDLAudioPlayer.cpp @@ -32,12 +32,12 @@ bool SDLAudioPlayer::DoInit() { SDL_AudioSpec want, have; SDL_zero(want); want.freq = this->GetSampleRate(); - want.format = AUDIO_S16SYS; - want.channels = mNumChannels; - want.samples = this->GetSampleLength(); - want.callback = NULL; + want.format = this->IsUsingFloatPipeline() ? AUDIO_F32SYS : AUDIO_S16SYS; + want.channels = this->GetNumOutputChannels(); + want.samples = GetSampleLength(); + want.callback = nullptr; - mDevice = SDL_OpenAudioDevice(NULL, 0, &want, &have, 0); + mDevice = SDL_OpenAudioDevice(nullptr, 0, &want, &have, 0); if (mDevice == 0) { SPDLOG_ERROR("SDL_OpenAudio error: {}", SDL_GetError()); return false; @@ -50,7 +50,8 @@ bool SDLAudioPlayer::DoInit() { } int SDLAudioPlayer::Buffered() { - return SDL_GetQueuedAudioSize(mDevice) / (sizeof(int16_t) * mNumChannels); + const size_t sampleSize = this->IsUsingFloatPipeline() ? sizeof(float) : sizeof(int16_t); + return SDL_GetQueuedAudioSize(mDevice) / (sampleSize * mNumChannels); } void SDLAudioPlayer::DoPlay(const uint8_t* buf, size_t len) { diff --git a/src/ship/audio/SoundMatrixDecoder.cpp b/src/ship/audio/SoundMatrixDecoder.cpp index 7eeeffe1b..e6d049f40 100644 --- a/src/ship/audio/SoundMatrixDecoder.cpp +++ b/src/ship/audio/SoundMatrixDecoder.cpp @@ -210,19 +210,15 @@ float SoundMatrixDecoder::ProcessDelay(float sample, CircularDelay& buffer) { return output; } -int16_t SoundMatrixDecoder::Saturate(float value) { - if (value > 32767.0f) { - return 32767; - } - if (value < -32768.0f) { - return -32768; - } - return static_cast(value); +float SoundMatrixDecoder::Saturate(float value) { + // Soft cap at ±1.0; upstream soft-clip handles any dramatic overshoots. + if (value > 1.0f) return 1.0f; + if (value < -1.0f) return -1.0f; + return value; } -std::tuple SoundMatrixDecoder::Process(const uint8_t* buf, size_t len) { - const int16_t* stereoInput = reinterpret_cast(buf); - int samplePairs = len / (2 * sizeof(int16_t)); +std::tuple SoundMatrixDecoder::Process(const float* stereoInput, size_t frames) { + const int samplePairs = static_cast(frames); // Resize output buffer if needed size_t samplesNeeded = static_cast(samplePairs) * 6; @@ -231,8 +227,8 @@ std::tuple SoundMatrixDecoder::Process(const uint8_t* buf, } for (int i = 0; i < samplePairs; ++i) { - float inL = static_cast(stereoInput[i * 2]); - float inR = static_cast(stereoInput[i * 2 + 1]); + float inL = stereoInput[i * 2]; + float inR = stereoInput[i * 2 + 1]; // Center: sum of L+R, band-limited float ctr = (inL + inR) * Gains::gCenter; @@ -278,7 +274,38 @@ std::tuple SoundMatrixDecoder::Process(const uint8_t* buf, mSurroundBuffer[i * 6 + 5] = Saturate(surrR); } - return { reinterpret_cast(mSurroundBuffer.data()), samplePairs * 6 * sizeof(int16_t) }; + return { mSurroundBuffer.data(), samplePairs }; +} + +// --------------------------------------------------------------------------- +// Legacy s16 overload — wraps the float Process with conversions so +// existing libultraship consumers see byte-exact behaviour. +// --------------------------------------------------------------------------- + +std::tuple SoundMatrixDecoder::Process(const uint8_t* buf, size_t len) { + const int16_t* stereoInput = reinterpret_cast(buf); + const size_t samplePairs = len / (2 * sizeof(int16_t)); + + std::vector stereoF(samplePairs * 2); + constexpr float kS16ToFloat = 1.0f / 32768.0f; + for (size_t i = 0; i < samplePairs * 2; i++) { + stereoF[i] = static_cast(stereoInput[i]) * kS16ToFloat; + } + + const auto [surroundOut, surroundFrames] = Process(stereoF.data(), samplePairs); + + const size_t surroundSamples = static_cast(surroundFrames) * 6; + if (mSurroundBufferS16.size() < surroundSamples) { + mSurroundBufferS16.resize(surroundSamples); + } + for (size_t i = 0; i < surroundSamples; i++) { + float v = surroundOut[i] * 32767.0f; + if (v > 32767.0f) v = 32767.0f; + if (v < -32768.0f) v = -32768.0f; + mSurroundBufferS16[i] = static_cast(v); + } + return { reinterpret_cast(mSurroundBufferS16.data()), + static_cast(surroundSamples * sizeof(int16_t)) }; } } // namespace Ship diff --git a/src/ship/audio/WasapiAudioPlayer.cpp b/src/ship/audio/WasapiAudioPlayer.cpp index 34d782253..f572beae3 100644 --- a/src/ship/audio/WasapiAudioPlayer.cpp +++ b/src/ship/audio/WasapiAudioPlayer.cpp @@ -32,11 +32,16 @@ bool WasapiAudioPlayer::SetupStream() { // Use GetNumOutputChannels() to determine stereo vs surround mNumChannels = this->GetNumOutputChannels(); + // Device format mirrors the pipeline mode: 32-bit IEEE float for the + // HD path, 16-bit signed integer for the legacy s16 path. + const bool useFloat = this->IsUsingFloatPipeline(); + const WORD formatTag = useFloat ? WAVE_FORMAT_IEEE_FLOAT : WAVE_FORMAT_PCM; + const WORD bitsPerSample = useFloat ? 32 : 16; if (mNumChannels == 2) { WAVEFORMATEX desired; - desired.wFormatTag = WAVE_FORMAT_PCM; + desired.wFormatTag = formatTag; desired.nChannels = mNumChannels; // Stereo audio - desired.wBitsPerSample = 16; // 16-bit audio + desired.wBitsPerSample = bitsPerSample; desired.nSamplesPerSec = this->GetSampleRate(); desired.nBlockAlign = desired.nChannels * desired.wBitsPerSample / 8; desired.nAvgBytesPerSec = desired.nSamplesPerSec * desired.nBlockAlign; @@ -46,18 +51,18 @@ bool WasapiAudioPlayer::SetupStream() { AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY, 2000000, 0, &desired, nullptr)); } else if (mNumChannels == 6) { - // 5.1 surround (6 channels) + // 5.1 surround (6 channels) — sub-format mirrors the mode. WAVEFORMATEXTENSIBLE desired; desired.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE; - desired.Format.nChannels = mNumChannels; // 6 channels for 5.1 audio - desired.Format.wBitsPerSample = 16; // 16-bit audio + desired.Format.nChannels = mNumChannels; + desired.Format.wBitsPerSample = bitsPerSample; desired.Format.nSamplesPerSec = this->GetSampleRate(); desired.Format.nBlockAlign = desired.Format.nChannels * desired.Format.wBitsPerSample / 8; desired.Format.nAvgBytesPerSec = desired.Format.nSamplesPerSec * desired.Format.nBlockAlign; desired.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); desired.dwChannelMask = KSAUDIO_SPEAKER_5POINT1; - desired.Samples.wValidBitsPerSample = 16; - desired.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; + desired.Samples.wValidBitsPerSample = bitsPerSample; + desired.SubFormat = useFloat ? KSDATAFORMAT_SUBTYPE_IEEE_FLOAT : KSDATAFORMAT_SUBTYPE_PCM; ThrowIfFailed(mClient->Initialize( AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY, @@ -129,7 +134,8 @@ void WasapiAudioPlayer::DoPlay(const uint8_t* buf, size_t len) { } } try { - UINT32 frames = len / (mNumChannels * sizeof(int16_t)); + const size_t sampleSize = this->IsUsingFloatPipeline() ? sizeof(float) : sizeof(int16_t); + UINT32 frames = len / (mNumChannels * sampleSize); UINT32 padding; ThrowIfFailed(mClient->GetCurrentPadding(&padding)); @@ -143,7 +149,7 @@ void WasapiAudioPlayer::DoPlay(const uint8_t* buf, size_t len) { BYTE* data; ThrowIfFailed(mRenderClient->GetBuffer(frames, &data)); - memcpy(data, buf, frames * mNumChannels * sizeof(int16_t)); + memcpy(data, buf, frames * mNumChannels * sampleSize); ThrowIfFailed(mRenderClient->ReleaseBuffer(frames, 0)); if (!mStarted && padding + frames > 1500) {