[SOLVED] Get actual sounds listened by one or more listeners

david_ragazzi · June 3, 2017, 1:33am

To whom may interest… Finally I managed to get the spectrum of the actual 3D sound reaching each ear! For this I had to use directly the FMOD library through pyfmodex (https://github.com/tyrylu/pyfmodex/tree/master/pyfmodex) - a very nice python bindings to FMOD library.

The code bellow is an example showing in real-time the 3D panning of a sound passing in front of your head from left to right and then from rigth to left. You will note that when the sound is going to the left side, the bars of the spectrum chart representing the right ear gradually disappear once the volume in this ear is getting low.

I tested with only one listener, once the FMOD documentation says that some DSP effects are disabled when using multiple listeners to avoid confusion (https://www.fmod.org/docs/content/generated/overview/3dsound.html):

Split screen / multiple listeners

In some games, there may be a split screen mode. When it comes to audio, this means that FMOD Studio has to know about having more than 1 listener on the screen at once. This is easily handled via System::set3DNumListeners and System::set3DListenerAttributes.

If you have 2 player split screen, then for each ‘camera’ or ‘listener’ simply call System::set3DListenerAttributes with 0 as the listener number of the first camera, and 1 for the listener number of the second camera. System::set3DNumListeners would be set to 2.

When using the low-level, 3D channels have the following behaviour:
- It turns off all doppler. This is because one listener might be going towards the sound, and another listener might be going away from the sound. To avoid confusion, the doppler is simply turned off.
- All audio is mono. If to one listener the sound should be coming out of the left speaker, and to another listener it should be coming out of the right speaker, there will be a conflict, and more confusion, so all sounds are simply panned to the middle. This removes confusion.
- Each sound is played only once as it would with a single player game, saving voice and cpu resources. This means the sound’s effective audibility is determined by the closest listener to the sound. This makes sense as the sound should be the loudest to the nearest listener. Any listeners that are further away wouldn’t have any impact on the volume at this point.

The code to get left and right spectrums (you must have PyQt or PySide):

import sys
import pyfmodex
from pyfmodex.constants import FMOD_SOFTWARE, FMOD_LOOP_NORMAL, FMOD_3D
from PyQt5 import QtWidgets, QtGui, QtCore

LEFT_CHANNEL = 0
RIGHT_CHANNEL = 1

FMOD_DSP_FFT_WINDOW_RECT = 0


class FrequencyAnalysis(QtWidgets.QWidget):

    def __init__(self, fmod, sound):
        QtWidgets.QWidget.__init__(self)

        self.init_ui()

        # Normalization toggle and sample size
        self.fmod = fmod
        self.sound = sound
        self.enable_normalize = False
        self.sample_size = 64

    def init_ui(self):
        self.picture = QtWidgets.QLabel(self)
        self.picture.setScaledContents(True)

        layout = QtWidgets.QHBoxLayout()
        layout.setContentsMargins(0, 0, 0, 0)
        layout.addWidget(self.picture, 0, QtCore.Qt.AlignCenter)

        self.setLayout(layout)
        self.setWindowTitle("FMOD 3D Frequency Analysis")
        self.setSizePolicy(QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed))
        self.resize(1600, 350)
        self.show()

    def keyPressEvent(self, event):
        key = event.key()
        print(key)

        # Toggle pause
        if key == QtCore.Qt.Key_P:
            self.sound.TogglePause()

        # Toggle normalization
        if key == QtCore.Qt.Key_N:
            self.enable_normalize = not self.enable_normalize

        # Decrease FFT sample size
        if key == QtCore.Qt.Key_1:
            self.sample_size = max(self.sample_size / 2, 64)

        # Increase FFT sample size
        if key == QtCore.Qt.Key_2:
            self.sample_size = min(self.sample_size * 2, 8192)

    def paintEvent(self, event):
        qp = QtGui.QPainter()
        qp.begin(self)
        qp.fillRect(self.rect(), QtCore.Qt.black)

        # Find frequency range of each array item
        hz_range = (44100 / 2) / float(self.sample_size)

        # Draw display
        qp.setPen(QtCore.Qt.white)
        qp.setFont(QtGui.QFont("Verdana", 8.))
        qp.drawText(10, 10, "Press P to toggle pause, N to toggle normalize, 1 and 2 to adjust FFT size")
        qp.drawText(10, 30, "Sample size: " + str(self.sample_size) + "  -  Range per sample: " + str(hz_range) + "Hz")

        def draw_spectrum(title, channel, start_x):

            # Get spectrum for the channel
            spec = self.fmod.get_spectrum(self.sample_size, channel, FMOD_DSP_FFT_WINDOW_RECT)

            # Find max volume
            max_vol = max(spec)

            # Normalize
            if self.enable_normalize and max_vol != 0:
                def normalize(db):
                    return db / float(max_vol)
                spec = [normalize(db) for db in spec]

            # Draw display
            qp.setPen(QtCore.Qt.white)
            qp.setFont(QtGui.QFont("Verdana", 8.))
            qp.drawText(start_x + 10, 70, title)
            qp.drawText(start_x + 10, 80, "Max vol this frame: " + str(max_vol).format("0.000"))

            # Get painter dimensions
            width = (self.rect().width() / 2)
            height = self.rect().height()

            # VU bars
            block_gap = 4 / (self.sample_size / 64)
            block_width = int((float(width) * 0.8) / float(self.sample_size) - block_gap)
            block_max_height = 220

            # Left-hand X co-ordinate of bar, left-hand Y co-ordinate of bar, width of bar, height of bar (negative to draw upwards), paintbrush to use
            for b in range(self.sample_size - 1):
                rect = QtCore.QRect(start_x + int(width * 0.1 + (block_width + block_gap) * b),
                                    height - 20,
                                    block_width,
                                    int(-block_max_height * spec[b]))
                gradient = QtGui.QLinearGradient(rect.topLeft(), rect.bottomRight())  # Diagonal gradient from top-left to bottom-right
                gradient.setColorAt(0, QtCore.Qt.green)
                gradient.setColorAt(1, QtCore.Qt.red)
                qp.fillRect(rect, gradient)

        # Draw the spectrums perceived by each ear
        draw_spectrum("LEFT EAR", LEFT_CHANNEL, start_x=0)
        draw_spectrum("RIGHT EAR", RIGHT_CHANNEL, start_x=self.rect().width() / 2)

        qp.end()


def main():

    def change_listener(listener):
        current_listener.position = listener
        fmod.update()

    # FMOD initialization
    fmod = pyfmodex.System()
    fmod.init()

    # Load the sound
    sound1 = fmod.create_sound("sine.wav", mode=FMOD_LOOP_NORMAL | FMOD_3D | FMOD_SOFTWARE)

    # Play the sound
    channel = sound1.play()
    channel.volume = 0.7
    channel.min_distance = 50
    channel.max_distance = 10000  # Need this for sound fall off

    # Create listeners positions
    listener1 = (0, 0, 0)
    listener2 = (0, 10, 0)

    # Create a listener in the center of the scene
    current_listener = fmod.listener(id=0)
    change_listener(listener1)

    # Open the form
    app = QtWidgets.QApplication(sys.argv)
    fa = FrequencyAnalysis(fmod, sound1)

    # Walk the sound around your head
    global x, min_x, max_x, inc
    min_x = -30
    max_x = 30
    sound_pos = (max_x, 3, 0)
    x = min_x
    inc = 1
    
    def tick():
        global x, min_x, max_x, inc
        if x == min_x:
            inc = 1
        elif x == max_x:
            inc = -1
        x += inc
        channel.position = [x, sound_pos[1], sound_pos[2]]
        print("Playing at %r" % str(channel.position))

        # Update FMOD
        fmod.update()
        fa.repaint()
        
    timer = QtCore.QTimer()
    timer.timeout.connect(tick)
    timer.start(100)

    app.exec_()
    sys.exit()

if __name__ == "__main__":
    main()