syncronize object(size) to sound(volume) [solved]

keen · August 20, 2010, 8:08pm

is it possible to syncronize the size of an object to the volume of a soundfile?
(~like in this after effects-demo: youtube.com/watch?v=8aOnFGX1OxE)

ThomasEgi · August 20, 2010, 9:53pm

it is. but it’s a bit math heavy.
using python’s default audio libs you can easily get the peak-peak value of a certain section of a wav.

if you pull in numpy you can also do things like fourier transforms (which are useful if you want to have an equalizer.

i have a small sample around which analysis audio data and creates tracks and palces objects based on that. it’s quite a mess. but you may find it useful. i’ll upload it if you’r interested.

keen · August 21, 2010, 1:33am

→ this would be very nice, thx

ThomasEgi · August 21, 2010, 12:36pm

it’s a bit of a mess cause this was only intended for internal experiments. what you’r interested in is the freqArray in AudioData. the analyzeBurst function may also be of interest for you since the fourrier transforms are done there.
feel free to do whatever you feel fit with that code. just dont blame me if anything doesnt work as expected.

#import direct.directbase.DirectStart
from panda3d.core import MovieAudio, Filename, PNMImage, Texture, NurbsCurveEvaluator, Vec4
from struct import unpack

from numpy import fft
import numpy as np

class AudioData():
    def __init__(self,avrLength=10):
        """ contains p2pArray (peak values), freqArray (peak frequencyes) , bassArray (bass-amplitude), 
        most intresting are:
        avrP2pArray(smoothed out p2pArray) , bassPeakArray (contains bass-detection) and beatArray (contains beat-detection)
        """
        self.p2pArray =[] #intresting
        self.freqArray=[] #somewhat interesting
        self.bassArray=[] 
        
        self.avrP2pArray=[] #rather interesting, usefull for lots of stuff.
        self.avrFreqArray=[]
        self.avrBassLevel=[]
        self.reallySmoothP2pArray=[]
        
        self.bassPeakArray=[] 
        self.beatArray=[]  # really intresting (shows you the beats)

        self.rawdata=[]
        
        self.avrLength=avrLength
        
    def printData(self):
        print self.p2pArray,"\n",self.freqArray,"\n",self.bassArray,"\n",self.avrP2pArray,"\n",self.bassPeakArray,self.bassLevel,self.beatArray
        
    def addDataSet(self,dataset):
        if len(dataset)>4:
            print "FUBAR! wrong dataset-length"
        self.p2pArray.append(dataset[0])
        self.freqArray.append(dataset[1])
        self.bassArray.append(dataset[2])
        if len(dataset)==4:
            self.rawdata.append(dataset[3])
    
    def smooth(self,Input,avrLength=None):
        if not avrLength:
            avrLength=self.avrLength
        Output=[]
        for i in range(avrLength):
            Output.append(np.average(Input[0:i+1]))
        for i in range(avrLength,len(Input)-avrLength):
            Output.append(np.average(Input[i-avrLength:i+avrLength]) )
        for i in range(avrLength):
            Output.append(np.average(Input[len(Input)-avrLength+i:len(Input)]))
        return Output 
        
    def calcLevels(self):
        self.avrP2pArray  = self.smooth(self.p2pArray)
        self.avrFreqArray = self.smooth(self.freqArray,10)
        self.avrBassLevel = self.smooth(self.bassArray,40)
        abpclevel = np.amax(self.avrBassLevel)*0.15
        smoothbass=self.smooth(self.bassArray,3)
        #####################                 <                 bass above average                  >   & < above minimal amplitude        >
        self.bassPeakArray =  np.logical_and( np.greater(smoothbass,np.multiply(self.avrBassLevel,1.1)) , np.greater(smoothbass,abpclevel) )
        self.beatArray    = np.greater(self.p2pArray,np.multiply(self.avrP2pArray,1.2))
        self.reallySmoothP2pArray = self.smooth(self.p2pArray,50)
        
        
class MultiChannelAudioData():
    """
    just a class to store the analyzed audio data since lists in lists in lists are so crappy
    """
    def __init__(self,myAudioCursor):
        self.myAudioCursor = myAudioCursor
        self.length=self.myAudioCursor.length()
        self.burstlen = int(self.myAudioCursor.audioRate()*0.02)
        self.numFrames = int(self.length*self.myAudioCursor.audioRate())
        self.numChannels = self.myAudioCursor.audioChannels()
        self.numBursts= int(self.numFrames/self.burstlen)
        self.Channels = []
        for i in range(self.numChannels):
            self.Channels.append(AudioData())
            
    def analyzeBurst(self,inArray,ssp):
        """ returns an array containing different kinds of information on the waveform you throw into,
        requires an array as input, and the samples per second,for now that's [peakamplitude,bass-amplitude,index of the frequency with max amplitude]
        given the main class defaults last value can be multiplied by 50 and you have the Hz number.
        """    
        outarray= fft.rfft(inArray)[:len(inArray)/2]  #the splitting is only done cause the rfft returns symetrical results. we dont need the right half
        freq= fft.fftfreq(len(inArray),1./ssp)[:len(inArray)/2] #contains the frequency in Hz
        outarray = np.abs(outarray) # this array contains the amplitude for each frequency in the array above.
        #print freq,outarray
        peak=np.ptp(inArray)
        bass= np.sum(outarray[1:3])# np.sum(outarray[1:3]) #that makes frequencies 50,100 and 150  hz
        freqpeak= np.argmax(outarray[10:50])
        return [peak,freqpeak,bass,outarray]
    
    def analyze(self):
        """ analyze the audio-data. results are written into the self.Channels list, containing one AudioData instance for each channel.
        """
        for i in range( self.numBursts ):
            rawdata=self.myAudioCursor.readSamples(self.burstlen) #read the raw-data, signed shorts little endian 
            rawdata = unpack( "<"+("h"*self.burstlen*self.numChannels),rawdata ) #unpack the rawdata into a list of int's
            for j in range(self.numChannels):
                data= np.array(rawdata)[j::self.numChannels]/float(1<<16)  #untangle-channels
                analyzedData = self.analyzeBurst(data,self.myAudioCursor.audioRate())
                self.Channels[j].addDataSet(analyzedData)
        for j in self.Channels:
            j.calcLevels()
            #j.printData()

infile="your.mp3"

myAudio = MovieAudio("mytest")
myAudioCursor = myAudio.get(Filename(infile)).open() 
aData = MultiChannelAudioData(myAudioCursor)
aData.analyze()

keen · August 25, 2010, 6:31pm

thx, im using now:

import numpy
import pyaudio
import analyse

# Initialize PyAudio
pyaud = pyaudio.PyAudio()

# Open input stream, 16-bit mono at 44100 Hz
# On my system, device 2 is a USB microphone, your number may differ.
stream = pyaud.open(
    format = pyaudio.paInt16,
    channels = 1,
    rate = 44100,
    input_device_index = 2,
    input = True)

while True:
    # Read raw microphone data
    rawsamps = stream.read(1024)
    # Convert raw data to NumPy array
    samps = numpy.fromstring(rawsamps, dtype=numpy.int16)
    # Show the volume and pitch
    print analyse.loudness(samps), analyse.musical_detect_pitch(samps)

-> to use it in panda3d, it works perfect if you call it only each x frames due to performance…