CIS565-Fall-2015 · likangning93 · Sep 2, 2015 · Sep 2, 2015 · Sep 2, 2015 · Sep 2, 2015
diff --git a/.~lock.performance.ods# b/.~lock.performance.ods#
@@ -0,0 +1 @@
+,DESKTOP-AI5KJKM/Kangning Li,DESKTOP-AI5KJKM,06.09.2015 23:06,file:///C:/Users/Kangning%20Li/AppData/Roaming/LibreOffice/4;
diff --git a/Project1-Part1/src/kernel.cu b/Project1-Part1/src/kernel.cu
@@ -131,6 +131,14 @@ void Nbody::initSimulation(int N) {
     cudaThreadSynchronize();
 }
 
+/**
+ *Frees stuff so my display driver doesn't break.
+ */
+void Nbody::tearDown() {
+	cudaFree(dev_pos);
+	cudaFree(dev_vel);
+	cudaFree(dev_acc);
+}
 
 /******************
  * copyPlanetsToVBO *
@@ -169,6 +177,19 @@ void Nbody::copyPlanetsToVBO(float *vbodptr) {
  * stepSimulation *
  ******************/
 
+/**
+ * Helper for computing acceleration at a single point to to a single other mass
+ */
+__device__ glm::vec3 single_point_acceleration(glm::vec3 my_pos, glm::vec3 other_pos, float other_mass) {
+	float r = glm::length(other_pos - my_pos);
+	if (r < 0.00001f) {
+		return glm::vec3(0, 0, 0);
+	}
+	float g = G * other_mass / (r * r);
+	//printf("g is %f and r is %f\n", g, r);
+	return glm::normalize(other_pos - my_pos) * g;
+}
+
 /**
  * Compute the acceleration on a body at `my_pos` due to the `N` bodies in the array `other_planets`.
  */
@@ -189,8 +210,20 @@ __device__  glm::vec3 accelerate(int N, int iSelf, glm::vec3 this_planet, const
     //    * G is the universal gravitational constant (already defined for you)
     //    * M is the mass of the other object
     //    * r is the distance between this object and the other object
-
-    return glm::vec3(0.0f);
+	//int index = threadIdx.x + (blockIdx.x * blockDim.x);
+	glm::vec3 contributions = glm::vec3(0, 0, 0);
+	// this isn't ideal. but how to accumulate contributions without a for?
+	// alternative: compute all velocities on threads, but... memory?
+	//printf("%d: doing acceleration for point at %f %f %f\n", iSelf, this_planet[0], this_planet[1], this_planet[2]);
+	for (int i = 0; i < N; i++) {
+		if (i == iSelf) {
+			//printf("%d: don't check self\n", iSelf);
+			continue;
+		}
+		contributions += single_point_acceleration(this_planet, other_planets[i], planetMass);
+	}
+	glm::vec3 star_contribution = single_point_acceleration(this_planet, glm::vec3(0, 0, 0), starMass);
+	return star_contribution + contributions;
 }
 
 /**
@@ -201,6 +234,11 @@ __global__ void kernUpdateAcc(int N, float dt, const glm::vec3 *pos, glm::vec3 *
     // TODO: implement updateAccArray.
     // This function body runs once on each CUDA thread.
     // To avoid race conditions, each instance should only write ONE value to `acc`!
+	int index = threadIdx.x + (blockIdx.x * blockDim.x);
+	if (index >= N) {
+		return;
+	}
+	acc[index] = accelerate(N, index, pos[index], pos);
 }
 
 /**
@@ -209,6 +247,12 @@ __global__ void kernUpdateAcc(int N, float dt, const glm::vec3 *pos, glm::vec3 *
  */
 __global__ void kernUpdateVelPos(int N, float dt, glm::vec3 *pos, glm::vec3 *vel, const glm::vec3 *acc) {
     // TODO: implement updateVelocityPosition
+	int index = threadIdx.x + (blockIdx.x * blockDim.x);
+	if (index >= N) {
+		return;
+	}
+	pos[index] += vel[index] * dt + 0.5f * acc[index] * dt * dt;
+	vel[index] += acc[index] * dt;
 }
 
 /**
@@ -217,4 +261,7 @@ __global__ void kernUpdateVelPos(int N, float dt, glm::vec3 *pos, glm::vec3 *vel
 void Nbody::stepSimulation(float dt) {
     // TODO: Using the CUDA kernels you wrote above, write a function that
     // calls the kernels to perform a full simulation step.
+	dim3 fullBlocksPerGrid((int)ceil(float(numObjects) / float(blockSize)));
+	kernUpdateAcc <<<fullBlocksPerGrid, blockSize>>>(numObjects, dt, dev_pos, dev_acc);
+	kernUpdateVelPos <<<fullBlocksPerGrid, blockSize >>>(numObjects, dt, dev_pos, dev_vel, dev_acc);
 }
diff --git a/Project1-Part1/src/kernel.h b/Project1-Part1/src/kernel.h
@@ -7,6 +7,7 @@
 
 namespace Nbody {
 void initSimulation(int N);
+void tearDown();
 void stepSimulation(float dt);
 void copyPlanetsToVBO(float *vbodptr);
 }
diff --git a/Project1-Part1/src/main.cpp b/Project1-Part1/src/main.cpp
@@ -12,7 +12,7 @@
 // Configuration
 // ================
 
-#define VISUALIZE 1
+#define VISUALIZE 0
 
 const int N_FOR_VIS = 5000;
 const float DT = 0.2f;
@@ -25,6 +25,7 @@ int main(int argc, char* argv[]) {
 
     if (init(argc, argv)) {
         mainLoop();
+		Nbody::tearDown();
         return 0;
     } else {
         return 1;
@@ -236,9 +237,9 @@ void mainLoop() {
 
         glUseProgram(0);
         glBindVertexArray(0);
-#endif
 
         glfwSwapBuffers(window);
+#endif
     }
     glfwDestroyWindow(window);
     glfwTerminate();

diff --git a/Project1-Part2/CMakeLists.txt b/Project1-Part2/CMakeLists.txt
@@ -0,0 +1,86 @@
+cmake_minimum_required(VERSION 3.0)
+
+project(cis565_hw1_matrices)
+
+set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
+
+# Set up include and lib paths
+set(EXTERNAL "external")
+include_directories("${EXTERNAL}/include")
+include_directories("${EXTERNAL}/src")
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(EXTERNAL_LIB_PATH "${EXTERNAL}/lib/osx")
+elseif(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+    set(EXTERNAL_LIB_PATH "${EXTERNAL}/lib/linux" "/usr/lib64")
+elseif(WIN32)
+    set(EXTERNAL_LIB_PATH "${EXTERNAL}/lib/win")
+endif()
+link_directories(${EXTERNAL_LIB_PATH})
+list(APPEND CMAKE_LIBRARY_PATH "${EXTERNAL_LIB_PATH}")
+
+# Find up and set up core dependency libs
+
+set(GLFW_INCLUDE_DIR "${EXTERNAL}/include")
+set(GLFW_LIBRARY_DIR "${CMAKE_LIBRARY_PATH}")
+find_library(GLFW_LIBRARY "glfw3" HINTS "${GLFW_LIBRARY_DIR}")
+
+set(GLEW_INCLUDE_DIR "${EXTERNAL}/include")
+set(GLEW_LIBRARY_DIR "${CMAKE_LIBRARY_PATH}")
+add_definitions(-DGLEW_STATIC)
+find_package(GLEW)
+
+find_package(OpenGL)
+
+set(CORELIBS
+    "${GLFW_LIBRARY}"
+    "${OPENGL_LIBRARY}"
+    "${GLEW_LIBRARY}"
+    )
+
+# Enable C++11 for host code
+set(CMAKE_CXX_STANDARD 11)
+
+list(APPEND CUDA_NVCC_FLAGS -G -g)
+
+# OSX-specific hacks/fixes
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    list(APPEND CORELIBS "-framework IOKit")
+    list(APPEND CORELIBS "-framework Cocoa")
+    list(APPEND CORELIBS "-framework CoreVideo")
+endif()
+
+# Linux-specific hacks/fixes
+if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+    list(APPEND CMAKE_EXE_LINKER_FLAGS "-lX11 -lXxf86vm -lXrandr -lpthread -lXi")
+endif()
+
+# Crucial magic for CUDA linking
+find_package(Threads REQUIRED)
+find_package(CUDA REQUIRED)
+
+set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON)
+set(CUDA_SEPARABLE_COMPILATION ON)
+
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+endif()
+
+add_subdirectory(src)
+
+cuda_add_executable(${CMAKE_PROJECT_NAME}
+    "src/main.hpp"
+    "src/main.cpp"
+    )
+
+target_link_libraries(${CMAKE_PROJECT_NAME}
+    src
+    ${CORELIBS}
+    )
+
+add_custom_command(
+    TARGET ${CMAKE_PROJECT_NAME}
+    POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy_directory
+        ${CMAKE_SOURCE_DIR}/shaders
+        ${CMAKE_BINARY_DIR}/shaders
+    )
diff --git a/Project1-Part2/GNUmakefile b/Project1-Part2/GNUmakefile
@@ -0,0 +1,31 @@
+CMAKE_ALT1 := /usr/local/bin/cmake
+CMAKE_ALT2 := /Applications/CMake.app/Contents/bin/cmake
+CMAKE := $(shell \
+	which cmake 2>/dev/null || \
+	([ -e ${CMAKE_ALT1} ] && echo "${CMAKE_ALT1}") || \
+	([ -e ${CMAKE_ALT2} ] && echo "${CMAKE_ALT2}") \
+	)
+
+all: RelWithDebugInfo
+
+
+Debug: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+MinSizeRel: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+Release: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+RelWithDebugInfo: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+
+build:
+	(mkdir -p build && cd build)
+
+clean:
+	((cd build && make clean) 2>&- || true)
+
+.PHONY: all Debug MinSizeRel Release RelWithDebugInfo clean