Getting camera image directly

Rajarshi_Banerjee · December 28, 2018, 6:13am

How do you get get the camera image directly into a numpy array rather than having a window displaying it?

rdb · December 29, 2018, 8:39pm

You can use offscreen window mode to avoid having a window show up.

You can attach a texture to the window using base.win.addRenderTexture, and then you can use RTMCopyRam mode (which copies the result of the render to RAM every frame) or RTMTriggeredCopyRam (which will copy it every time after you call base.win.triggerCopy().)

Then, you can extract the RAM image of this texture using tex.getRamImage() or tex.getRamImageAs("RGBA") in order to obtain it as something you can then convert easily into a numpy array.

Let us know if you need specific help with any of the steps!

volmen3 · October 7, 2020, 3:05pm

Hey edb!

Could you please elaborate your answer a bit? May be with a code sample…

I have a similar problem as the topic creator: Currently I have a Panda3D-window which displays models that I load. And now I would like to get the window content at every frame/scene as a matrix of pixel values for further analysis.

Here is the code, that I use:

    self.texture = Texture()
    self.win.addRenderTexture(tex=self.texture,
                              mode=GraphicsOutput.RTM_copy_ram,
                              bitplane=GraphicsOutput.RTP_color)
    imgs = self.texture.getRamImageAs('RGBA')

which produces the following error:

:gobj(error): Couldn’t find an uncompressed RAM image!

If I use getRamImage() instead of getRamImageAs() the imgs is just zero.

Thanks in advance!

rdb · October 8, 2020, 7:47am

You first need to render a frame with the texture attached before data will appear in the texture. You can do that using base.win.graphicsEngine.renderFrame().

JohnCinaaa · November 27, 2021, 3:10pm

I succeeded !
I added this into my constructor init of MyApp(ShowBase) :

self.texture = Texture()
base.win.addRenderTexture(self.texture, GraphicsOutput.RTMCopyRam, GraphicsOutput.RTPColor)
self.taskMgr.add(self.getImagebytes, "GetBytes", sort=50)

Then added this function to MyApp(ShowBase):

   def getImagebytes(self, task):
        print("is texture : ", self.texture.mightHaveRamImage())
        t = time.time()
        print("size : ", self.win.size)
        width = self.win.size[0]
        height = self.win.size[1]
        data = self.texture.getRamImage().getData()
        print("texture : ", len(data))
        img = np.frombuffer(data, dtype=np.uint8)
        img = np.reshape(img, (height, width, 4))
        img = cv2.flip(img, 0)
        print("img.shape : ", img.shape)
        cv2.imshow('t', img)
        cv2.waitKey(1)
        print("delay : ", time.time() - t)
        return Task.cont

Don’t forget at the beginning :

from panda3d.core import Texture, GraphicsOutput

and in my case :

import cv2

Full code:

from math import pi, sin, cos

from direct.showbase.ShowBase import ShowBase
from direct.task import Task
from direct.actor.Actor import Actor
from direct.interval.IntervalGlobal import Sequence
from panda3d.core import Point3
from twisted.internet.task import LoopingCall
import time
from panda3d.core import Texture
#from panda3d.core import RTM_Copy_Ram
from panda3d.core import FrameBufferProperties, GraphicsOutput
import numpy as np
import cv2
import imutils


class MyApp(ShowBase):
    def __init__(self):
        ShowBase.__init__(self)

        # Disable the camera trackball controls.
        self.disableMouse()

        self.texture = Texture()
        base.win.addRenderTexture(self.texture, GraphicsOutput.RTMCopyRam, GraphicsOutput.RTPColor) #RTM_copy_texture)
        self.taskMgr.add(self.getImagebytes, "GetBytes", sort=50)
        

        # Load the environment model.
        self.scene = self.loader.loadModel("models/environment")
        # Reparent the model to render.
        self.scene.reparentTo(self.render)
        # Apply scale and position transforms on the model.
        self.scene.setScale(0.25, 0.25, 0.25)
        self.scene.setPos(-8, 42, 0)

        # Add the spinCameraTask procedure to the task manager.
        self.taskMgr.add(self.spinCameraTask, "SpinCameraTask")

        # Add the get image row task :
        self.taskMgr.add(self.getImagebytes, "GetBytes", sort=50)

        # Load and transform the panda actor.
        self.pandaActor = Actor("models/panda-model",
                                {"walk": "models/panda-walk4"})
        self.pandaActor.setScale(0.005, 0.005, 0.005)
        self.pandaActor.reparentTo(self.render)
        # Loop its animation.
        self.pandaActor.loop("walk")

        # Create the four lerp intervals needed for the panda to
        # walk back and forth.
        posInterval1 = self.pandaActor.posInterval(13,
                                                   Point3(0, -10, 0),
                                                   startPos=Point3(0, 10, 0))
        posInterval2 = self.pandaActor.posInterval(13,
                                                   Point3(0, 10, 0),
                                                   startPos=Point3(0, -10, 0))
        hprInterval1 = self.pandaActor.hprInterval(3,
                                                   Point3(180, 0, 0),
                                                   startHpr=Point3(0, 0, 0))
        hprInterval2 = self.pandaActor.hprInterval(3,
                                                   Point3(0, 0, 0),
                                                   startHpr=Point3(180, 0, 0))

        # Create and play the sequence that coordinates the intervals.
        self.pandaPace = Sequence(posInterval1, hprInterval1,
                                  posInterval2, hprInterval2,
                                  name="pandaPace")
        self.pandaPace.loop()

    # Define a procedure to move the camera.
    def spinCameraTask(self, task):
        angleDegrees = task.time * 6.0
        angleRadians = angleDegrees * (pi / 180.0)
        #self.camera.setPos(20 * sin(angleRadians), -20 * cos(angleRadians), 3)
        self.camera.setPos(0, 0, 3)
        self.camera.setHpr(0, 0, 0)
        return Task.cont

    def getImagebytes(self, task):
        print("is texture : ", self.texture.mightHaveRamImage())
        t = time.time()
        print("size : ", self.win.size)
        width = self.win.size[0]
        height = self.win.size[1]
        data = self.texture.getRamImage().getData()
        print("texture : ", len(data))
        img = np.frombuffer(data, dtype=np.uint8)
        img = np.reshape(img, (height, width, 4))
        img = cv2.flip(img, 0)
        print("img.shape : ", img.shape)
        cv2.imshow('t', img)
        cv2.waitKey(1)
        print("delay : ", time.time() - t)
        return Task.cont

app = MyApp()
app.run()