Extreme Network Lag (TCP 2 Clients)

I’m developing a small game for use in one of the Georgia Tech research labs, the purpose of the game is to train people to use a brain-computer interface, so accuracy is important. Because accuracy is so important, I decided to use TCP so that packets wouldn’t be dropped. I recently got the code up to supporting two clients who each control one axis of a ball (X and Y axis).

It works fine when using one client (regardless of which player slot that client is in), but when using two clients the game introduces some horrendous lag. The lag continues to get worse the longer the connections are both open, and watching the data being sent and received by client and server, it seems like the server is not sending the data back out quickly enough, and is instead just storing it or some such. At some points, the server even stops generating new data and just sits there for several seconds while the clients continue to stream data to the server.

I don’t think the issue is due to using TCP instead of UDP, but it may be. It also may be that I need to implement threading, though I don’t have any experience with it so I’m really hoping that’s not the case.

client.py

import direct.directbase.DirectStart
from pandac.PandaModules import * 
from direct.distributed.PyDatagram import PyDatagram 
from direct.distributed.PyDatagramIterator import PyDatagramIterator 
from direct.task import Task 
from direct.actor import Actor
import random, sys, os, math
from direct.showbase.DirectObject import DirectObject

class World(DirectObject):
	def __init__(self):
	
		#setup the server connection protocols
		ADDRESS = "127.0.0.1"
		PORT = 1248
		TIMEOUT = 3000 #3 seconds in milliseconds
		BACKLOG = 8

		#setup the connection managers
		self.cManager = QueuedConnectionManager()
		self.cReader = QueuedConnectionReader(self.cManager, 0)
		self.cWriter = ConnectionWriter(self.cManager, 0)
		self.activeConnections =[]
		
		#connect to server
		print "Attempting to Connect."
		connection = self.cManager.openTCPClientConnection(ADDRESS, PORT, TIMEOUT)
		self.playerNum = 0
		if connection:
			self.cReader.addConnection(connection)
			self.activeConnections.append(connection)
			datagram = PyDatagram()
			datagram.addString("PlayerNumRequest")
			if self.activeConnections:
				for connection in self.activeConnections:
					self.cWriter.send(datagram, connection)
			print "Connected!"
	
		#Load the first environment model 
		self.environ = loader.loadModel("models/environment") 
		self.environ.reparentTo(render) 
		self.environ.setScale(0.25,0.25,0.25) 
		self.environ.setPos(0,0,0) 
  
		#Load the player ball actor
		self.playerOneBall = Actor.Actor("BrainGame/playerOneBall",{})
		self.playerOneBall.setScale(.05,.05,.05)
		self.playerOneBall.reparentTo(render)
		self.playerOneBall.setPos(0,0,0)

		playerOnePos = self.playerOneBall.getPos() 

		# Camera 
		cam = Camera("Top-Down") 
		base.cam.reparentTo(self.playerOneBall)
		base.cam.setPos(0,0,2000)
		base.cam.lookAt(self.playerOneBall)
		base.disableMouse()

		# Keys
		self.controlMap = {"left":0, "right":0, "forward":0, "backward":0}
		self.accept("escape", sys.exit)
		self.accept("w", self.setControl, ["forward",1])
		self.accept("s", self.setControl, ["backward",1])
		self.accept("w-up", self.setControl, ["forward",0])
		self.accept("s-up", self.setControl, ["backward",0])
		
		taskMgr.add(self.move,"moveTask")
		taskMgr.add(self.tskReaderPolling, "poll the connection reader")
		
	def setControl(self, key, value):
		self.controlMap[key] = value
		
	def move(self, task):
		# store the original position in case it needs to be reset for collision
		startPos = self.playerOneBall.getPos()
		
		#if a key is pressed, move the ball in the specified direction
		if (self.controlMap["forward"] !=0):
			datagram = PyDatagram()
			datagram.addString("Input")
			datagram.addInt32(self.playerNum)
			datagram.addFloat64(1)
			if self.activeConnections:
				for connection in self.activeConnections:
					self.cWriter.send(datagram, connection)
		elif (self.controlMap["backward"] !=0):
			datagram = PyDatagram()
			datagram.addString("Input")
			datagram.addInt32(self.playerNum)
			datagram.addFloat64(-1)
			if self.activeConnections:
				for connection in self.activeConnections:
					self.cWriter.send(datagram, connection)
					
		else:
			datagram = PyDatagram()
			datagram.addString("Input")
			datagram.addInt32(self.playerNum)
			datagram.addFloat64(0)
			if self.activeConnections:
				for connection in self.activeConnections:
					self.cWriter.send(datagram, connection)
			
		base.cam.setPos(0,0,2000)
		base.cam.lookAt(self.playerOneBall)
		
		return task.cont
	
	def tskReaderPolling(self, taskdata):
		if self.cReader.dataAvailable():
			datagram = NetDatagram()
			if self.cReader.getData(datagram): self.processDatagram(datagram)
	
		return Task.cont
		
	def processDatagram(self, datagram):
		iterator = PyDatagramIterator(datagram)
		type = iterator.getString()
		if (type == "PlayerNum"): self.playerNum = iterator.getInt32()
		if (type == "movement"):
			x = iterator.getFloat64()
			y = iterator.getFloat64()
			#print x
			#print y
			self.playerOneBall.setPos(x, y, self.playerOneBall.getZ())
	
w = World()

run()

server.py

# from pandac.PandaModules import loadPrcFileData 
# loadPrcFileData("", "window-type none")
# the above commands will open the server with no window

import direct.directbase.DirectStart
from pandac.PandaModules import *
from direct.distributed.PyDatagram import PyDatagram 
from direct.distributed.PyDatagramIterator import PyDatagramIterator 
from direct.task import Task 
from direct.actor import Actor

PORT = 1248
BACKLOG = 8

cManager = QueuedConnectionManager()
cListener = QueuedConnectionListener(cManager, 0)
cReader = QueuedConnectionReader(cManager, 0)
cWriter = ConnectionWriter(cManager, 0)
activeConnections = []

tcpSocket = cManager.openTCPServerRendezvous(PORT, BACKLOG)
cListener.addConnection(tcpSocket)

playerOneBall = Actor.Actor("BrainGame/playerOneBall",{})
playerOneBall.setScale(.05,.05,.05)
playerOneBall.reparentTo(render)
playerOneBall.setPos(0,0,0)
playerOnePos = playerOneBall.getPos()
playerOneBall.reparentTo(hidden)

playerOneBall.yVelocity = 0
playerOneBall.xVelocity = 0

def tskListenerPolling(taskdata):
	if cListener.newConnectionAvailable():
		print "Connection Request Received."
		rendezvous = PointerToConnection()
		netAddress = NetAddress()
		newConnection = PointerToConnection()
		if cListener.getNewConnection(rendezvous, netAddress, newConnection):
			newConnection = newConnection.p()
			activeConnections.append(newConnection)
			cReader.addConnection(newConnection)
			print "Connection Accepted."
			
	return Task.again
	
def processDatagram(datagram):
	dataSource = datagram.getConnection()
	iterator = PyDatagramIterator(datagram)
	type = iterator.getString()
	
	if (type == "PlayerNumRequest"):
		print "Player Number Requested"
		print len(activeConnections)
		retDatagram = PyDatagram()
		retDatagram.addString("PlayerNum")
		retDatagram.addInt32(len(activeConnections))
		cWriter.send(retDatagram, activeConnections[len(activeConnections)-1])
	
	else: player = iterator.getInt32()
		
	if (type == "Input"):
		output = iterator.getFloat64()
		#print dataSource.getAddress().getIpString()
		#print output
		if (output == -1):
			if (player%2 !=0):
				playerOneBall.yVelocity = playerOneBall.yVelocity - 1
				if(playerOneBall.yVelocity > 0): playerOneBall.yVelocity = playerOneBall.yVelocity - 1
				playerOneBall.setY(playerOneBall, playerOneBall.yVelocity * globalClock.getDt())
			elif (player%2 ==0):
				playerOneBall.xVelocity = playerOneBall.xVelocity - 1
				if(playerOneBall.xVelocity > 0): playerOneBall.xVelocity = playerOneBall.xVelocity - 1
				playerOneBall.setX(playerOneBall, playerOneBall.xVelocity * globalClock.getDt())
			#print playerOneBall.getX()
			#print playerOneBall.getY()
			retDatagram = PyDatagram()
			retDatagram.addString("movement")
			retDatagram.addFloat64(playerOneBall.getX())
			retDatagram.addFloat64(playerOneBall.getY())
			if activeConnections:
				for connection in activeConnections:
					cWriter.send(retDatagram, connection)

		elif (output == 1):
			if (player%2 !=0):
				playerOneBall.yVelocity = playerOneBall.yVelocity + 1
				if(playerOneBall.yVelocity > 0): playerOneBall.yVelocity = playerOneBall.yVelocity + 1
				playerOneBall.setY(playerOneBall, playerOneBall.yVelocity * globalClock.getDt())
			elif (player%2 ==0):
				playerOneBall.xVelocity = playerOneBall.xVelocity + 1
				if(playerOneBall.xVelocity > 0): playerOneBall.xVelocity = playerOneBall.xVelocity + 1
				playerOneBall.setX(playerOneBall, playerOneBall.xVelocity * globalClock.getDt())
			#print playerOneBall.getX()
			#print playerOneBall.getY()
			retDatagram = PyDatagram()
			retDatagram.addString("movement")
			retDatagram.addFloat64(playerOneBall.getX())
			retDatagram.addFloat64(playerOneBall.getY())
			if activeConnections:
				for connection in activeConnections:
					cWriter.send(retDatagram, connection)

		elif (output == 0):
			if (player%2 !=0):
				if (playerOneBall.yVelocity > 0): playerOneBall.yVelocity = playerOneBall.yVelocity - 1
				elif (playerOneBall.yVelocity < 0): playerOneBall.yVelocity = playerOneBall.yVelocity + 1
				playerOneBall.setY(playerOneBall, playerOneBall.yVelocity * globalClock.getDt())
			elif (player%2 ==0):
				if (playerOneBall.xVelocity > 0): playerOneBall.xVelocity = playerOneBall.xVelocity - 1
				elif (playerOneBall.xVelocity < 0): playerOneBall.xVelocity = playerOneBall.xVelocity + 1
				playerOneBall.setX(playerOneBall, playerOneBall.xVelocity * globalClock.getDt())
			#print playerOneBall.getX()
			#print playerOneBall.getY()
			retDatagram = PyDatagram()
			retDatagram.addString("movement")
			retDatagram.addFloat64(playerOneBall.getX())
			retDatagram.addFloat64(playerOneBall.getY())
			if activeConnections:
				for connection in activeConnections:
					cWriter.send(retDatagram, connection)				
	
def tskReaderPolling(taskdata):
	if cReader.dataAvailable():
		datagram = NetDatagram()
		if cReader.getData(datagram): processDatagram(datagram)
	
	return Task.cont
	

taskMgr.add(tskListenerPolling, "Poll the connection listener.", -39)
taskMgr.add(tskReaderPolling, "Poll the connection reader.", -38)
run()

That sounds kind of like normal TCP behavior to me. You can tweak the parameters to improve this, but when low-latency is critical, you’re generally better off with UDP.

There is a feature in Panda to hold up TCP packets for a period of time to reduce TCP overhead, but it’s not enabled by default (it’s controlled by the collect-tcp variable). So assuming you’re not turning this on inadvertently, the TCP delays you’re seeing are probably in your operating system, not in Panda.

David

Well, I was wondering if it could be due to socket blocking. I’m not sure if that’s enabled or disabled by default, but I’ve seen a couple people who’ve been using UDP having this same issue, and their solution had something to do with socket blocking.

Well there is the nagle-ack issue it only introduce a timeoverhead around 100 ms (depends on your OS settings etc.). Futher reading: stuartcheshire.org/papers/NagleDelayedAck/
This should not cause a stall of the server but there are some scenarios which can cause a stall depending on the applied settings:
-server sends data and a client does not empty the recv-buffer => first the client recvbuffer, then the server sendbuffer overflows causing send to be blocked
-server tries to recv and there are no data to read
So i would guess this is not the fault of the OS and/or TCP…

Thanks for the replies guys.

After asking some of the CS faculty here and a bunch of other folks I’m more and more convinced that my problem is threading, as unless threading is built into panda’s networking this program is not threaded.

Which means I’m going to spend a while learning about threading. Woo!

I’m still open to more suggestions if anyone sees anything else.

Did you already try to reduce this problem to a “pure” network app? Maybe you find the reason why it does not work or you obtain a basic “blockfree” networkapp which you can extend step by step to find the reasion.

This was originally built up from a pure network app that had clients sending in random float values and the server sending a list of the values back out to each client. There did seem to be a little lag when I did that, but it was within acceptable range. Perhaps though, I just wasn’t able to visualize it enough.

Edit: I ran some tests on the base network app I created for this. There is a significant level of lag that I didn’t uncover before due to a commented out test case (go me!). So at least I know that the problem is definitely somewhere in the networking, not in the game code.

Can you provide this test?

Right now the files are on my other computer, but it’s been brought to my attention that panda’s networking stuff is already threaded (or at least, somewhat) as long as you use the QueuedConnection[manager/reader/etc.] I’m not sure if this is true, but the explanation made enough sense that I’m willing to believe it for now.

It’s also been brought to my attention that the problem may be that I am taking in a datagram and sending one out in the same task, rather than having separate tasks for receiving and sending datagrams. Changing this will certainly cut down some of the lag, as it will change the structure to [client1…clientN send packets -> server sends packet to each client] rather than what it is now, which is [client1 sends, server sends to each, client2 sends, server sends to each…]

I plan to fix that later today, and I’ll try to get back to post the results here once I do.

Okay, problem is solved. Here is the simplified network code that is now working. (Object rendering and whatnot is all turned off, there are no models, but the network should be the same.)

The problem was actually that I was sending out datagrams in the processDatagram function. So each time the server received a packet, it sent out one per client. This has been solved by making a different task for sending outputs, which means it’s sending an output once per tick. It also means that the incoming data buffer is flushed each time it sends the output, meaning that outputs can’t get queued up and bog down the connection.

server.py

import direct.directbase.DirectStart
from pandac.PandaModules import *
from direct.distributed.PyDatagram import PyDatagram 
from direct.distributed.PyDatagramIterator import PyDatagramIterator 
from direct.task import Task 

class point:
	def __init__(self):
		self.x = 0
		self.y = 0

PORT = 1248
BACKLOG = 8

position = point()
cManager = QueuedConnectionManager()
cListener = QueuedConnectionListener(cManager, 0)
cReader = QueuedConnectionReader(cManager, 0)
cWriter = ConnectionWriter(cManager, 0)
activeConnections = []

tcpSocket = cManager.openTCPServerRendezvous(PORT, BACKLOG)
cListener.addConnection(tcpSocket)

def tskListenerPolling(taskdata):
	if cListener.newConnectionAvailable():
		print "Connection Request Received."
		rendezvous = PointerToConnection()
		netAddress = NetAddress()
		newConnection = PointerToConnection()
		if cListener.getNewConnection(rendezvous, netAddress, newConnection):
			newConnection = newConnection.p()
			activeConnections.append(newConnection)
			cReader.addConnection(newConnection)
			print "Connection Accepted."
	return Task.again

def processDatagram(datagram):
	dataSource = datagram.getConnection()
	output = PyDatagramIterator(datagram).getFloat64()
	if (dataSource == activeConnections[0]):
		position.y = output
	elif (dataSource == activeConnections[1]):
		position.x = output

def tskReaderPolling(taskdata):
	while cReader.dataAvailable():
		datagram = NetDatagram()
		if cReader.getData(datagram): processDatagram(datagram)
	
	return Task.cont

def tskReturnData(taskdata):
	print position.x
	print position.y
	retDatagram = PyDatagram()
	retDatagram.addFloat64(position.y)
	retDatagram.addFloat64(position.x)
	if activeConnections:
		for connection in activeConnections:
			cWriter.send(retDatagram, connection)
	return Task.cont

taskMgr.add(tskListenerPolling, "Poll the connection listener.", -39)
taskMgr.add(tskReaderPolling, "Poll the connection reader.", -38)
taskMgr.add(tskReturnData, "Return Data to the client", -37)
run()

client.py

import direct.directbase.DirectStart

from pandac.PandaModules import * 
from direct.distributed.PyDatagram import PyDatagram 
from direct.distributed.PyDatagramIterator import PyDatagramIterator 
from direct.task import Task 
from random import random

ADDRESS = "127.0.0.1"
PORT = 1248
TIMEOUT = 3000 #3 seconds in milliseconds
BACKLOG = 8

cManager = QueuedConnectionManager()
cReader = QueuedConnectionReader(cManager, 0)
cWriter = ConnectionWriter(cManager, 0)
activeConnections =[]

print "Attempting to Connect."
connection = cManager.openTCPClientConnection(ADDRESS, PORT, TIMEOUT)
if connection:
	cReader.addConnection(connection)
	activeConnections.append(connection)
	print "Connected!"
	
def sendOutput(taskdata):
	datagram = PyDatagram()
	data = random()
	data = data*10
	#print data
	datagram.addFloat64(data)
	if activeConnections:
		for connection in activeConnections:
			cWriter.send(datagram, connection)
	
	return Task.again

def tskReaderPolling(taskdata):
	while cReader.dataAvailable():
		datagram = NetDatagram()
		if cReader.getData(datagram): processDatagram(datagram)
	
	return Task.cont

def processDatagram(datagram):
	iterator = PyDatagramIterator(datagram)
	y = iterator.getFloat64()
	x = iterator.getFloat64()
	print y
	print x

taskMgr.add(sendOutput, "Send the Output to the Server", -39)
taskMgr.add(tskReaderPolling, "Poll the connection reader.", -38)
run()

Great! That post was usefull!
I got exactly the same problem, and I apply what you suggest, and now I have no more lag problem.

Thank you, for post your resolution:!