parallel processing

:slight_smile: so now i have wrote my first parallel processing application. of course it makes no sense at two objects, to process the distance twice times :smiley:. but i know excatly how i will use this at smashBALL. :smiley: or in overall at my raster based particle system :smiley:

im just curious, few hours ago, i painted my objects in the threads and it worked. but i have no idea what i have done, i cant reproduce it. now, if im trying it, my application crashs after some paintings. thats why im using now a task for store the parallel processed values. i think its related to the main_loop, i think, i asked this a long time ago, but how can i stop the main_loop? i asked it for python, or i need to set the global clock synchron to my threads? maybe this could fix this too.
anyway, i can store my datas in vector 3, it wont make a difference, it just a bit more work to change my code.

im very proud of me! :slight_smile:

greetz
logans run

// PARALLEL PROCESSING -> parallel processing of two distances, this should just display how to parallel processing, of course there is no need to do this twice! ;)
// this processing idea was just coming along to improve the performance at the raster particles for smashBALL, now im able to split my particle calculation up!
// it should improve the performance up to 30 % or more! :D :D :D i hope! but im sure it does :D
// drh 2010

#include "pandaFramework.h"
#include "pandaSystem.h"
#include "graphicsEngine.h" 
#include "process.h" 
#include "windows.h"

PandaFramework framework;
WindowFramework *window;
GraphicsEngine *engine;
PT(AsyncTaskManager) taskMgr = AsyncTaskManager::get_global_ptr();
PT(ClockObject) globalClock = ClockObject::get_global_clock();
NodePath camera,model[2];

//init the start position
LVecBase3f pos1 (0.8,0.0,0.0),pos2 (-0.8,0.0,0.0);

//parallel processing related
// declare the parallel processing vars
float dis,dis1;
int isACTIVE = 1; 

void parallelDISTANCEcalc1 (void *);
void parallelDISTANCEcalc2 (void *);


//window open
void windowOPEN() {
	WindowProperties *props = new WindowProperties(); 
    props->set_size(500,500); 
	props->set_origin(500,10);
    props->set_undecorated(true); 
    window = framework.open_window(*props,0);
	window->get_graphics_window()->get_active_display_region(0)->set_clear_color(Colorf(0,0,0,1));
}
// stor camera
void getCAMERA() {
	camera = window->get_camera_group(); 
}
// load model
void loadMODEL() {
	for (int i=0;i<2;i++) {
		model[i] = window->load_model(framework.get_models(),"smiley");
		model[i].set_scale(.1);
		model[i].reparent_to(window->get_aspect_2d());
	}
}

//parallel processing
void parallelDISTANCEcalc1 (void *) {
	while (isACTIVE) {
		LVecBase3f disX = (pos1-pos2);
		dis = disX.length();
		if (dis < .1) {
			pos1.set_x(pos1.get_x());
		}
		else {
			pos1.set_x(pos1.get_x()-.005);
		}
		Sleep(10); 
	}
}

void parallelDISTANCEcalc2 (void *) {
	while (isACTIVE) {
		
		LVecBase3f disX = (pos1-pos2);
		dis1 = disX.length();
		if (dis1 < .1) {
			pos2.set_x(pos2.get_x());
		}
		else {
			pos2.set_x(pos2.get_x()+.005);
		}
		Sleep(10);
	}
}

AsyncTask::DoneStatus mainTASK(GenericAsyncTask* task, void* data) { 
	model[0].set_x(pos1.get_x());
	model[1].set_x(pos2.get_x());
	return AsyncTask::DS_cont;
}

int main(int argc, char *argv[]) {
	framework.open_framework(argc, argv);
    windowOPEN();
	getCAMERA();
	loadMODEL();

	_beginthread(parallelDISTANCEcalc1, 0, NULL); 
	_beginthread(parallelDISTANCEcalc2, 0, NULL); 

	taskMgr->add(new GenericAsyncTask("main task", &mainTASK, (void*) NULL));

	framework.main_loop();
    framework.close_framework();
    return (0);
} 

here a other example for parallel processing, where a def. cpu is doing the calculation for. at my laptop im only using a dual core, thats why i assigned both processes onto the second cpu. this thing is still freaking me a bit. but i start to understand parallel processing. after i got it on the cpu. i will try cuda :slight_smile: :smiley: so far i watched some docus about, cuda is excatly the thing, how i think that multiprocessing should works. with building blocks and grids 2d and 3d (like a rubik cube) :slight_smile: :smiley: :smiley: :smiley:

// PARALLEL PROCESSING -> parallel processing of two distances, this should just display how to parallel processing, of course there is no need to do this twice! ;)
// this processing idea was just coming along to improve the performance at the raster particles for smashBALL, now im able to split my particle calculation up!
// it should improve the performance up to 30 % or more! :D :D :D i hope! but im sure it does :D
// dirk-r. hochegger 2010

#include "pandaFramework.h"
#include "pandaSystem.h"
#include "graphicsEngine.h"
#include "conio.h" 
#include "process.h" 
#include "windows.h"
#include "rigidBodyCombiner.h"

PandaFramework framework;
WindowFramework *window;
GraphicsEngine *engine;
PT(RigidBodyCombiner) rbc = new RigidBodyCombiner("rbc");
PT(AsyncTaskManager) taskMgr = AsyncTaskManager::get_global_ptr();
PT(ClockObject) globalClock = ClockObject::get_global_clock();
NodePath camera,model[500],testNODE[500],rigNode;

float veloX[500];
float veloY[500];
int particleCOUNT = 500;
//init the start position
LVecBase3f pos[1000];
LVecBase3f posX[1000];
LVecBase3f posX1[1000];

//parallel processing related
// declare the parallel processing vars
static HANDLE thread1; 
static HANDLE thread2; 
static HANDLE thread3; 

float dis,dis1;
int isACTIVE = 1; 
int id1;
int posXid[1000];
int posXid1[1000];
int id;
int i,i1;


//window open
void windowOPEN() {
	WindowProperties *props = new WindowProperties(); 
    props->set_size(500,500); 
	props->set_origin(500,10);
    props->set_undecorated(true); 
    window = framework.open_window(*props,0);
	window->get_graphics_window()->get_active_display_region(0)->set_clear_color(Colorf(0,0,0,1));
}
// stor camera
void getCAMERA() {
	camera = window->get_camera_group(); 
}
// load model
void loadMODEL() {
	rigNode = NodePath(rbc);
	rigNode.reparent_to(window->get_aspect_2d());
	float x = -.5;
	float y = .5;
	int gridID = 0;
	for (int i=0;i<particleCOUNT;i++) {
		pos[i].set_x(x);
		pos[i].set_y(0.0);
		pos[i].set_z(y);
		model[i] = window->load_model(framework.get_models(),"box");
		model[i].set_scale(.02);
		model[i].reparent_to(rigNode);
		model[i].set_x(pos[i].get_x());
		model[i].set_z(pos[i].get_z());
		x+= .05;
		gridID++;
		if (gridID > 19) {
			x = -.5;
			y -=.05;
			gridID = 0;
		}
	}
	rbc->collect();
}

void setVELO() {
		for (int i=0;i<particleCOUNT;i++) {
			veloX[i] /= 10;
			veloY[i] /= 10;
			model[i].set_x(pos[i].get_x()+veloX[i]);
			model[i].set_z(pos[i].get_z()+veloY[i]);
			pos[i].set_x(pos[i].get_x()+veloX[i]);
			pos[i].set_z(pos[i].get_z()+veloY[i]);
		}
}

//parallel processing
DWORD WINAPI parallelDISTANCEcalc1( LPVOID pvoid ) {
	i =0;
	while (isACTIVE) {	
		i++;
		if (pos[i].get_x()>0) {
			posX[id] = pos[i];
			posXid[id] = i;
			id+=1;	
		}
		if (id > 0) {
			for (int x=0;x<id;x++) {
				for (int xx=0;xx<id;xx++) {
					if (posX1[xx] != posX1[x]) {
						LVecBase3f disX = (posX[x]-posX[xx]);
						float dis = disX.length();
						if (dis < .08) {
							float high = posX[xx].get_z()-posX[x].get_z();
							float base = posX[xx].get_x()-posX[x].get_x();
							veloX[posXid[xx]] += base/4;
							veloY[posXid[xx]] += high/4;
						}
					}
				}
			}	
		}
		
		if (i==particleCOUNT) {
			i=0;
			id = 0;
		}
	}
}

DWORD WINAPI parallelDISTANCEcalc2( LPVOID pvoid ) {
	i1=0;
	while (isACTIVE) {	
		i1++;
		if (pos[i1].get_x()<0) {
			posX1[id1] = pos[i1];
			posXid1[id1] = i1;
			id1+=1;	
		}
		if (id1 > 0) {
			for (int x=0;x<id1;x++) {
				for (int xx=0;xx<id1;xx++) {
					if (posX1[xx] != posX1[x]) {
						LVecBase3f disX = (posX1[x]-posX1[xx]);
						float dis = disX.length();
						if (dis < .08) {
							float high = posX1[xx].get_z()-posX1[x].get_z();
							float base = posX1[xx].get_x()-posX1[x].get_x();
							veloX[posXid1[xx]] += base/4;
							veloY[posXid1[xx]] += high/4;
						}
					}
				}
			}	
		}
		if (i1==particleCOUNT) {
			i1=0;
			id1 = 0;
		}
	}
}

// using a task for picking the values
AsyncTask::DoneStatus mainTASK(GenericAsyncTask* task, void* data) { 
	setVELO();
	return AsyncTask::DS_cont;
}

int main(int argc, char *argv[]) {
	framework.open_framework(argc, argv);
    windowOPEN();
	getCAMERA();
	loadMODEL();

	DWORD threadPARAMETER1 = 1; 
    DWORD threadID1;
	thread1 = CreateThread (NULL, 0, parallelDISTANCEcalc1, 
                     &threadPARAMETER1, 0, &threadID1); 
    SetThreadAffinityMask(thread1, 2); // assign the distance processing onto the second cpu

	DWORD threadPARAMETER2 = 2; 
    DWORD threadID2;
	thread2 = CreateThread (NULL, 0, parallelDISTANCEcalc2, 
                     &threadPARAMETER2, 0, &threadID2); 
    SetThreadAffinityMask(thread2, 2); // assign the distance processing onto the second cpu

	taskMgr->add(new GenericAsyncTask("main task", &mainTASK, (void*) NULL));

	framework.main_loop();
    framework.close_framework();
    return (0);
} 

thats called the cpu heart wave :slight_smile:

// PARALLEL PROCESSING -> parallel processing of two distances, this should just display how to parallel processing, of course there is no need to do this twice! ;)
// this processing idea was just coming along to improve the performance at the raster particles for smashBALL, now im able to split my particle calculation up!
// it should improve the performance up to 30 % or more! :D :D :D i hope! but im sure it does :D
// dirk-r. hochegger 2010

#include "pandaFramework.h"
#include "pandaSystem.h"
#include "graphicsEngine.h"
#include "conio.h" 
#include "process.h" 
#include "windows.h"
#include "rigidBodyCombiner.h"

PandaFramework framework;
WindowFramework *window;
GraphicsEngine *engine;
PT(RigidBodyCombiner) rbc = new RigidBodyCombiner("rbc");
PT(AsyncTaskManager) taskMgr = AsyncTaskManager::get_global_ptr();
PT(ClockObject) globalClock = ClockObject::get_global_clock();
NodePath camera,model[1000],testNODE[1000],rigNode;

float veloX[1000];
float veloY[1000];
int particleCOUNT = 1000;
//init the start position
LVecBase3f pos[10000];
LVecBase3f posX[10000];
LVecBase3f posX1[10000];

//parallel processing related
// declare the parallel processing vars
static HANDLE thread1; 
static HANDLE thread2; 
static HANDLE thread3; 

float dis,dis1;
int isACTIVE = 1; 
int id1;
int posXid[10000];
int posXid1[10000];
int id;
int i,i1;


//window open
void windowOPEN() {
	WindowProperties *props = new WindowProperties(); 
    props->set_size(500,800); 
	props->set_origin(500,10);
    props->set_undecorated(true); 
    window = framework.open_window(*props,0);
	window->get_graphics_window()->get_active_display_region(0)->set_clear_color(Colorf(0,0,0,1));
}
// stor camera
void getCAMERA() {
	camera = window->get_camera_group(); 
}
// load model
void loadMODEL() {
	rigNode = NodePath(rbc);
	rigNode.reparent_to(window->get_aspect_2d());
	float x = -.2;
	float y = .6;
	int gridID = 0;
	for (int i=0;i<particleCOUNT;i++) {
		pos[i].set_x(x);
		pos[i].set_y(0.0);
		pos[i].set_z(y);
		model[i] = window->load_model(framework.get_models(),"box");
		model[i].set_scale(.01);
		model[i].reparent_to(rigNode);
		model[i].set_x(pos[i].get_x());
		model[i].set_z(pos[i].get_z());
		x+= .02;
		gridID++;
		if (gridID > 19) {
			x = -.2;
			y -=.02;
			gridID = 0;
		}
	}
	rbc->collect();
}

void setVELO() {
		for (int i=0;i<particleCOUNT;i++) {
			veloX[i] /= 10;
			veloY[i] /= 10;
			model[i].set_x(pos[i].get_x()+veloX[i]);
			model[i].set_z(pos[i].get_z()+veloY[i]);
			/*pos[i].set_x(pos[i].get_x()+veloX[i]);
			pos[i].set_z(pos[i].get_z()+veloY[i]);*/
		}
}

//parallel processing
DWORD WINAPI parallelDISTANCEcalc1( LPVOID pvoid ) {
	i =0;
	while (isACTIVE) {	
		i++;
		if (pos[i].get_x()>0) {
			posX[id] = pos[i];
			posXid[id] = i;
			id+=1;	
		}
		if (id > 0) {
			for (int x=0;x<id;x++) {
				for (int xx=0;xx<id;xx++) {
					if (posX1[xx] != posX1[x]) {
						LVecBase3f disX = (posX[x]-posX[xx]);
						float dis = disX.length();
						if (dis < .08) {
							float high = posX[xx].get_z()-posX[x].get_z();
							float base = posX[xx].get_x()-posX[x].get_x();
							veloX[posXid[xx]] += base/4;
							veloY[posXid[xx]] += high/4;
						}
					}
				}
			}	
			
		}
		
		if (i==particleCOUNT) {
			i=0;
			id = 0;
			
		}
	}
}

DWORD WINAPI parallelDISTANCEcalc2( LPVOID pvoid ) {
	i1=0;
	while (isACTIVE) {	
		i1++;
		if (pos[i1].get_x()<0) {
			posX1[id1] = pos[i1];
			posXid1[id1] = i1;
			id1+=1;	
		}
		if (id1 > 0) {
			for (int x=0;x<id1;x++) {
				for (int xx=0;xx<id1;xx++) {
					if (posX1[xx] != posX1[x]) {
						LVecBase3f disX = (posX1[x]-posX1[xx]);
						float dis = disX.length();
						if (dis < .08) {
							float high = posX1[xx].get_z()-posX1[x].get_z();
							float base = posX1[xx].get_x()-posX1[x].get_x();
							veloX[posXid1[xx]] += base/4;
							veloY[posXid1[xx]] += high/4;
						}
					}
				}
			}
			
		}
		if (i1==particleCOUNT) {
			i1=0;
			id1 = 0;
		}
	}
}

// using a task for picking the values
AsyncTask::DoneStatus mainTASK(GenericAsyncTask* task, void* data) { 
	setVELO();
	return AsyncTask::DS_cont;
}

int main(int argc, char *argv[]) {
	framework.open_framework(argc, argv);
    windowOPEN();
	getCAMERA();
	loadMODEL();

	DWORD threadPARAMETER1 = 1; 
    DWORD threadID1;
	thread1 = CreateThread (NULL, 0, parallelDISTANCEcalc1, 
                     &threadPARAMETER1, 0, &threadID1); 
    SetThreadAffinityMask(thread1, 2); // assign the distance processing onto the second cpu

	DWORD threadPARAMETER2 = 2;
    DWORD threadID2;
	thread2 = CreateThread (NULL, 0, parallelDISTANCEcalc2, 
                     &threadPARAMETER2, 0, &threadID2); 
    SetThreadAffinityMask(thread2, 2); // assign the distance processing onto the second cpu

	taskMgr->add(new GenericAsyncTask("main task", &mainTASK, (void*) NULL));

	framework.main_loop();
    framework.close_framework();
    return (0);
}