Multiple cameras with offscreen rendering

Hi all,

I’m trying to use Panda3D to simulate a scene (for now just a box on the ground), and render the scene from multiple cameras. The application does not need any onscreen visualization, and ideally I’d do all of this with offscreen buffers.

I’ve gotten this far (see code below), but I’m getting an error ValueError: cannot reshape array of size 0 into shape (512,512,4) because data in the render_images function is 0.

Could someone help me get this working (an image as an array for each camera in the ring returned by render_images)?

import numpy as np
import cv2
from direct.showbase.ShowBase import ShowBase
from typing import List


class BoxSceneSimulator(ShowBase):
  """Simulates a scene with a box and a ring of cameras."""

  def __init__(self, x: float, y: float, r: float, n_cameras: int):
    """Sets up a scene with a cube and a ring of cameras.

    :param x: the x-coordinate of the box.
    :param y: the y-coordinate of the box.
    :param r: the side-length of the box.
    :n_cameras: the number of cameras, evenly spaced in a ring (with radius 10,
                height 4) around the center of the scene, facing the center of
                the scene.
    """
    ShowBase.__init__(self)
    self.setBackgroundColor(1,1,1)  # set white background

    # place box in scene
    box = self.loader.loadModel("models/box")
    box.reparentTo(self.render)
    box.setScale(r)
    box.setPos(x-r/2,y-r/2,0)
    self.box = box

    # set up camera ring: evenly spaced ring w/ radius=10, height=4
    # and pointing to center of scene
    self.buffers = []
    camera_radius = 10
    camera_height = 3
    for i in range(n_cameras):
      buffer = base.win.makeTextureBuffer(f'Camera {i}', 512, 512)
      texture = buffer.getTexture()
      buffer.setSort(-100)
      camera = base.makeCamera(buffer)
      camera.reparentTo(self.render)

      camera_angle = i * (2 * np.pi / n_cameras)
      camera.setPos(
        camera_radius * np.sin(camera_angle),
        - camera_radius * np.cos(camera_angle),
        camera_height
      )
      camera.lookAt(self.render)

      self.buffers.append(buffer)

  def render_images(self) -> List[np.ndarray]:
    """Render the scene from all cameras in the ring and return the images.

    :return: a `list` of `np.ndarray`s that correspond to each image.
    """
    self.graphicsEngine.renderFrame()
    images = []
    for buffer in self.buffers:
      tex = buffer.getTexture()
      data = tex.getRamImage()
      image = np.frombuffer(data, np.uint8)
      image.shape = (tex.getYSize(), tex.getXSize(), tex.getNumComponents())
      image = np.flipud(image)
      images.append(image)
    return images
  

if __name__ == '__main__':
  x, y, r = 1, 1, 3
  n_cameras = 3
  sim = BoxSceneSimulator(x, y, r, n_cameras)
  observations = sim.render_images()
  for i, image in enumerate(observations):
    cv2.imshow(f'Observation from camera {i}', image)

As far as I know, you need to call graphicsEngine.renderFrame twice before the first frame becomes available to be read.

Hmm… I tried adding a few calls to graphicsEngine.renderFrame() (instead of just one) in render_images but I’m getting the same error.

Below is the code that works for me

buffer = base.graphicsEngine.make_output(base.pipe, "stereo buffer", -100,
                                         props,
                                         win_prop, flags, base.win.getGsg(),
                                         base.win)

texture = Texture()
buffer.addRenderTexture(texture, GraphicsOutput.RTM_copy_ram)

render = NodePath('stereo')
cam = base.makeCamera(buffer)
lens = PerspectiveLens()
lens.setFov(54.611362)
lens.setFocalLength(1.37795276)

lens.set_interocular_distance(50)
lens.set_convergence_distance(float('inf'))

cam.node().setLens(lens)
cam.node().setScene(render)
dp = buffer.makeDisplayRegion()
dp.setCamera(cam)

base.camera.set_pos(100, -500, 0)
cube = base.loader.loadModel('misc/rgbCube')
cube.reparent_to(render)
cube.set_scale(100)
cube.set_z(100)

base.graphicsEngine.renderFrame()
image = np.asarray(memoryview(texture.getRamImage())).reshape(1024, 512, 3)

Hello. I’ve noticed you don’t keep a reference to your camera(s). Maybe storing them as an instance variable will fix the problem.

Okay, thanks for the suggestions. I think I’ve figured it out – posting the code below in case it’s useful to anyone else.

import numpy as np
from direct.showbase.ShowBase import ShowBase
from panda3d.core import FrameBufferProperties, WindowProperties
from panda3d.core import Texture, PerspectiveLens
from typing import List
import matplotlib.pyplot as plt
from panda3d.core import ConfigVariableString
from sensors.cameras import CameraIntrinsics


ConfigVariableString('background-color', '1.0 1.0 1.0 0.0')  # sets background to white


class SceneSimulator(ShowBase):
  """Simulates a scene with image and depth cameras."""

  IMAGE_CAMERA_RENDER_ORDER = -2
  DEPTH_CAMERA_RENDER_ORDER = -2

  def __init__(self):
    """Simulate an empty scene with no cameras.
    
    Note that cameras can be added with later calls to `add_image_camera` and
    `add_depth_camera`.
    """
    ShowBase.__init__(self)
    self._image_buffers = []
    self._image_cameras = []
    self._depth_buffers = []
    self._depth_cameras = []

  def render_frame(self):
    self.graphics_engine.render_frame()

  def add_image_camera(self, intrinsics: CameraIntrinsics, pos, hpr, name=None):
    # set up texture and graphics buffer
    window_props = WindowProperties.size(*intrinsics.get_size())
    frame_buffer_props = FrameBufferProperties()
    buffer = self.graphicsEngine.make_output(self.pipe,
      f'Image Buffer [{name}]',
      self.IMAGE_CAMERA_RENDER_ORDER,
      frame_buffer_props,
      window_props,
      GraphicsPipe.BFRefuseWindow,    # don't open a window
      self.win.getGsg(),
      self.win
    )
    texture = Texture()
    buffer.addRenderTexture(texture, GraphicsOutput.RTMCopyRam)

    # set up lens according to camera intrinsics
    lens = PerspectiveLens()
    lens.set_film_size(*intrinsics.get_size())
    lens.set_fov(*np.rad2deg(intrinsics.get_fov()))
    
    camera = self.makeCamera(buffer, lens=lens, camName=f'Image Camera [{name}]')
    camera.reparentTo(self.render)
    camera.setPos(*pos)
    camera.setHpr(*hpr)

    self._image_buffers.append(buffer)
    self._image_cameras.append(camera)

  def get_images(self) -> List[np.ndarray]:
    """Get the images from each image camera after the most recent rendering.

    Note that `self.render_frame()` must be called separately.
    """
    images = []
    for buffer in self._image_buffers:
      tex = buffer.getTexture()
      data = tex.getRamImage()
      image = np.frombuffer(data, np.uint8)
      image.shape = (tex.getYSize(), tex.getXSize(), tex.getNumComponents())
      image = np.flipud(image)
      images.append(image)
    return images


if __name__ == '__main__':
  sim = SceneSimulator()
  
  n_cameras = 3
  camera_radius = 6
  camera_height = 4
  intrinsics = CameraIntrinsics.from_size_and_fov((1920, 1080), (np.pi/6, np.pi/6))
  for i in range(n_cameras):
    camera_angle = i * (2 * np.pi / n_cameras)
    pos = (camera_radius * np.sin(camera_angle), - camera_radius * np.cos(camera_angle), camera_height)
    sim.add_image_camera(intrinsics, pos, (0, 0, 0), name=str(i))
    sim._image_cameras[i].lookAt(sim.render)

  # place box in scene
  x, y, r = 0, 0, 1
  box = sim.loader.loadModel("models/box")
  box.reparentTo(sim.render)
  box.setScale(r)
  box.setPos(x-r/2,y-r/2,0)
  sim.box = box

  sim.render_frame()
  observations = sim.get_images()

  for obs in observations:
    plt.imshow(obs)
    plt.show()
2 Likes

thank you for posting the final code, it was super useful!

1 Like