I’m getting performance issues when using too many BulletGhostNode objects.
I did some tests and got the following results:
50 objects - up to 1ms max per simulation (do_physics)
100 - 5ms
150 - 10ms
200 - 300ms
250 - 1500ms
I’m running physics for tests at 60Hz with the following params: do_physics(dt, 60, 1/60)
The shape is BulletBoxShape.
It would be nice to publish the code so that we can test it.
import time
import sys
from direct.showbase.ShowBase import ShowBase
from panda3d.bullet import (
BulletBoxShape, BulletDebugNode, BulletGhostNode,
BulletWorld, ZUp)
from panda3d.core import load_prc_file_data, NodePath, Vec3
class Demo(ShowBase):
def __init__(self):
load_prc_file_data('', '''
win-size 1280 720
show-frame-rate-meter t
''')
super(Demo, self).__init__()
self.accept('escape', sys.exit)
self.cam.set_y(-5)
self._t = 0
self._bworlds = []
self._bdebug = []
self._ghosts = []
for i in range(1):
d = NodePath(BulletDebugNode('Debug #{}'.format(i)))
d.reparent_to(self.render)
d.show()
w = BulletWorld()
w.set_gravity(Vec3(0, 0, 3))
w.set_debug_node(d.node())
self._bworlds.append(w)
self._bdebug.append(d)
for j in range(250):
s = BulletBoxShape(1)
g = BulletGhostNode('Ghost #{}'.format(j))
g.add_shape(s)
# g.set_static(True)
w.attach_ghost(g)
self._ghosts.append(g)
self.task_mgr.add(self._update, '_update')
def _update(self, task):
if not self._t:
self._t = time.time()
now = time.time()
dt = now - self._t
self._t = now
t = time.time()
for w in self._bworlds:
w.do_physics(
dt,
60, # max num of substeps
1 / 60) # 60Hz
print('PHY {:0.6f} ms'.format((time.time() - t) * 1000))
return task.again
if __name__ == '__main__':
demo = Demo()
demo.run()
I don’t think you need to artificially limit the frequency of steps in the Bullet. If you use:
globalClock.getDt()
This doesn’t make any sense, you can also limit the frame rate.
clock-mode limited
clock-frame-rate 60
Although this also doesn’t make sense when you have syncing enabled.
from direct.showbase.ShowBase import ShowBase
from panda3d.bullet import BulletBoxShape, BulletDebugNode, BulletGhostNode, BulletWorld, ZUp
from panda3d.core import NodePath, Vec3
class Demo(ShowBase):
def __init__(self):
super(Demo, self).__init__()
self.w = BulletWorld()
self.w.set_gravity(Vec3(0, 0, 3))
for j in range(250):
g = BulletGhostNode('Ghost #{}'.format(j))
g.add_shape(BulletBoxShape(1))
self.w.attach_ghost(g)
self.task_mgr.add(self._update, '_update')
def _update(self, task):
dt = globalClock.getDt()
self.w.do_physics(dt)
return task.again
demo = Demo()
demo.run()
I have it showing 55 frames.
It would be dynamic step, but I need a static substeps.
I have updated the sample so it does not depends on the frame rate, because I want to calculate the physics part only.
import builtins
import time
import sys
from direct.showbase.ShowBase import ShowBase
from panda3d.bullet import (
BulletBoxShape, BulletDebugNode, BulletGhostNode,
BulletWorld, ZUp)
from panda3d.core import load_prc_file_data, NodePath, Vec3
class Demo(ShowBase):
def __init__(self):
load_prc_file_data('', '''
win-size 1280 720
show-frame-rate-meter t
''')
super(Demo, self).__init__()
self.accept('escape', sys.exit)
self.cam.set_y(-5)
self._t = 0
self._bworlds = []
self._bdebug = []
self._ghosts = []
for i in range(1):
d = NodePath(BulletDebugNode('Debug #{}'.format(i)))
d.reparent_to(self.render)
d.show()
w = BulletWorld()
w.set_gravity(Vec3(0, 0, 3))
w.set_debug_node(d.node())
self._bworlds.append(w)
self._bdebug.append(d)
for j in range(250):
s = BulletBoxShape(1)
g = BulletGhostNode('Ghost #{}'.format(j))
g.add_shape(s)
# g.set_static(True)
w.attach_ghost(g)
self._ghosts.append(g)
self.task_mgr.add(self._update, '_update')
def _update(self, task):
# if not self._t:
# self._t = time.time()
# now = time.time()
# dt = now - self._t
# self._t = now
t = time.time()
for w in self._bworlds:
w.do_physics(
1 / 30, # 30Hz with steps
120 // 30,
1 / 120) # internally 120Hz with substeps
print('PHY {:0.6f} ms'.format((time.time() - t) * 1000))
return task.again
if __name__ == '__main__':
demo = Demo()
demo.run()
I’m getting 85ms on 1 world and 250 ghosts or 9ms on 10 worlds with 25 ghosts per world.
It’s still slow in my opinion because those objects are not moving.
Hmm, the very fact of creating an instance of ShowBase
, binds to the frame rate. Since task_mgr, it is called once a frame per frame
Perhaps you should not use ShowBase
. Just create a loop on python.
Thanks for an idea about simplifying the tests.
Made a pure panda-bullet test without a ShowBase (but it doesn’t changes results anyway):
import time
from panda3d.bullet import BulletBoxShape, BulletGhostNode, BulletWorld, ZUp
from panda3d.core import Vec3
def main():
bworlds = []
ghosts = []
for i in range(1): # number of worlds
w = BulletWorld()
w.set_gravity(Vec3(0, 0, 3))
bworlds.append(w)
for j in range(250): # number of ghosts
s = BulletBoxShape(1)
g = BulletGhostNode('Ghost #{}'.format(j))
g.add_shape(s)
# g.set_static(True)
w.attach_ghost(g)
ghosts.append(g)
for i in range(10): # number of tests
t = time.time()
for w in bworlds:
w.do_physics(
1 / 30, # 30Hz with steps
120 // 30,
1 / 120) # internally 120Hz with substeps
print('PHY {:0.6f} ms'.format((time.time() - t) * 1000))
if __name__ == '__main__':
main()
To be honest I don’t understand, you knowingly sets a big time for the calculation.
0.03 take a step, this is slow in itself. You have to wait for this time, until the physics engine performs the calculations.
import time
from panda3d.bullet import BulletBoxShape, BulletGhostNode, BulletWorld, ZUp
from panda3d.core import Vec3
def main():
bworlds = []
ghosts = []
for i in range(1): # number of worlds
w = BulletWorld()
w.set_gravity(Vec3(0, 0, 3))
bworlds.append(w)
for j in range(250): # number of ghosts
s = BulletBoxShape(1)
g = BulletGhostNode('Ghost #{}'.format(j))
g.add_shape(s)
# g.set_static(True)
w.attach_ghost(g)
ghosts.append(g)
for i in range(10): # number of tests
t = time.time()
for w in bworlds:
w.do_physics(1/100, 120 // 30, 1 / 120)
print('PHY {:0.6f} ms'.format((time.time() - t) * 1000))
if __name__ == '__main__':
main()
PHY 33.231735 ms
PHY 18.352270 ms
PHY 17.855883 ms
PHY 17.883301 ms
PHY 17.855406 ms
PHY 34.688473 ms
PHY 17.857313 ms
PHY 17.855167 ms
PHY 17.856121 ms
PHY 17.856121 ms
I think you just need to reduce the dt.
What do you mean? I can’t calculate the future object’s position?
If the object moves with some constant speed, you just need to multiply it with time to get the distance. You don’t have to wait all this time. Simple case: distance = speed * time.
Of course, you do not need to wait for real time, but for the calculation time.
If you say Bullet, calculate the simulation in 1 second.
w.do_physics(1, 120 // 30, 1 / 120)
PHY 85.780382 ms
PHY 68.943977 ms
PHY 68.943739 ms
PHY 68.475246 ms
PHY 68.419456 ms
PHY 67.951918 ms
PHY 68.476200 ms
PHY 68.420887 ms
PHY 68.945169 ms
PHY 68.447590 ms
Or:
w.do_physics(0.01, 120 // 30, 1 / 120)
PHY 33.230543 ms
PHY 17.856121 ms
PHY 17.387867 ms
PHY 17.333269 ms
PHY 17.358780 ms
PHY 34.720421 ms
PHY 17.854929 ms
PHY 17.360687 ms
PHY 17.360926 ms
PHY 17.856121 ms
Or:
w.do_physics(0.001, 120 // 30, 1 / 120)
PHY 0.494719 ms
PHY 0.526667 ms
PHY 0.495434 ms
PHY 0.495195 ms
PHY 0.495195 ms
PHY 0.498295 ms
PHY 0.495195 ms
PHY 0.494480 ms
PHY 33.204794 ms
PHY 0.495434 ms
This time depends on the complexity of the calculations that Bullet must perform.
This is actually wrong, because it skips most of substeps.
It should be:
w.do_physics(
1 / 1, # 1Hz with steps
120 // 1,
1 / 120) # internally 120Hz with substeps
Which simplifies to:
w.do_physics(1, 120, 1 / 120)
Which means a calculation of 1 second using 120 substeps
Well I’ll remember, but I never thought about the sub-steps.
As far as I know, these are additional passes to improve the accuracy of the calculations. And they take time.
Did some research and tests and figured out that problem is related to overlapping callbacks. It’s also highly depends on number of nodes:
For 2 nodes I have 2 pairs (AB, BA).
3 nodes - 6 pairs (AB, AC, BA, BC, CA, CB).
So dependency is not linear.
I have tried to set a group mask and disable collision inside a group, but that doesn’t work.
The only way to solve it is by randomizing nodes position so nodes don’t overlap. I haven’t found a way to disable callbacks yet.
Updated example:
import time
from panda3d.bullet import BulletBoxShape, BulletGhostNode, BulletWorld, ZUp
from panda3d.core import CollideMask, NodePath, Vec3
def main():
bworlds = []
ghosts = []
for i in range(1): # number of worlds
w = BulletWorld()
w.set_gravity(Vec3(0, 0, 3))
w.set_group_collision_flag(20, 20, False)
bworlds.append(w)
for j in range(250): # number of ghosts
s = BulletBoxShape(0.1)
g = NodePath(BulletGhostNode('Ghost #{}'.format(j)))
# g.set_x(j)
g.node().add_shape(s)
g.node().set_into_collide_mask(CollideMask.bit(20))
w.attach_ghost(g.node())
ghosts.append(g)
for i in range(10): # number of tests
t = time.time()
for w in bworlds:
w.do_physics(
1 / 30, # 30Hz with steps
120 // 30,
1 / 120) # internally 120Hz with substeps
print('PHY {:0.6f} ms'.format((time.time() - t) * 1000))
if __name__ == '__main__':
main()
Looks like I forgot to enable group masks.
Solved by:
load_prc_file_data('', '''
bullet-filter-algorithm groups-mask
''')