diff --git a/CMakeLists.txt b/CMakeLists.txt index d3d976c..41dc21e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,7 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") endif() include_directories(.) -#add_subdirectory(stream_compaction) # TODO: uncomment if using your stream compaction +add_subdirectory(stream_compaction) # TODO: uncomment if using your stream compaction add_subdirectory(src) cuda_add_executable(${CMAKE_PROJECT_NAME} @@ -82,7 +82,7 @@ cuda_add_executable(${CMAKE_PROJECT_NAME} target_link_libraries(${CMAKE_PROJECT_NAME} src - #stream_compaction # TODO: uncomment if using your stream compaction + stream_compaction # TODO: uncomment if using your stream compaction ${CORELIBS} ) diff --git a/README.md b/README.md index 110697c..a004c32 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,156 @@ CUDA Path Tracer **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3** -* (TODO) YOUR NAME HERE -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) - -### (TODO: Your README) - -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +* Angelina Risi + * [LinkedIn](www.linkedin.com/in/angelina-risi) + * [Twitter](https://twitter.com/Angelina_Risi) +* Tested on: Windows 10, i7-6700HQ @ 2.60GHz 8GB, GTX 960M 4096MB (Personal Laptop) + + + + + +## Features + +### Stream Compaction + +Included with the code are methods for stream compaction using shared memory modified from a previous project, tested and confirmed to function when correctly used with the pathtracer. While I had attempted to use the thrust implementation, there was some error in calculating the new number of paths such that it never changed, so it was just dead weight. Stream compaction resorts an array such that non-zero elements (in this case un-terminated ray paths) are at the beginning, and returns the number of these elements. This allows us to reduce the number of paths to check for intersections and the number of paths to shade, and keeps them contiguous in memory. + +### Depth-of Field + +![Depth of Field](img/big_DOF.2018-09-30_02-33-09z.770samp.png) + +Depth of field (DOF) is the effect in which objects further away from the point where the eye is focused become blurry, less well-defined, than those closer to the focal point. This is done by jittering the cast rays' origins in an "aperture" radius and recalculating the direction toward the originally aimed focal point. This method causes greater distortion of the ray from the target point the further it is from the focal point. +To customize depth-of-field effects there are two main variables: the focal length and aperture radius. Currently the focal length, which determines the focal point of each ray, is determined in code as the difference between the camera position and "Look-At" point, while the aperture radius is defined as a constant (located in path_helpers.h). Increasing or decreasing the focal length moves the curved "focal plane" while changing the aperture size affects the range of jitter, making the image more or less blurry outside of the focal plane. This process is best described by the image below: + +![DOF](https://github.com/pjcozzi/Articles/raw/master/CIS565/GitHubRepo/Figures/dof.png) + +Due to the nature of this feature, it cannot be properly implemented while caching the first bounce (described in next section). +This feature adds a little more overhead in generating new rays from the camera at the begining of each iteration by adding more instructions per thread and memory access. This feature is toggleable from a defined boolean in path_helpers.h. This would be quite inefficent even for this seemingly small task versus parallel implementations. Since the code itself is short, very few more GPU optimizations can be imagined, but perhaps speeding up memory access such as through using shared memory would be useful. +In a hypothetical CPU implementation, the instructions would essentially be the same, but done sequentially in a loop. This means the cpu would need to generate 2 times the number of pixels random numbers, in sequence, and apply a pair of them to ach ray and recalculate the direction. + +#### First-Bounce Caching + +First-Bounce caching is the storage of the first set of intersections between the rays from the camera and the scene. Instead of recalculating the first bounce every iteration, we copy the values from the first iterteration into memory and reload it at the beginning of each iteration afterwards. The DOF effect requires randomization of the initial ray, therefore it is not compatible with this caching method, as it would only randomize the first cast and leave the image distorted. This problem can be seen in the following image, even after 373 iterations: + +This method has little overhead, only requiring a memory copy after the first intersection test, and then replaces subsequent first intersection tests with a memory copy, which should greatly reduce the computation time in the first bounce of each iteration. Since there is some randomized factors involved in shading and bouncing the rays, we can't store the path itself after the first bounce (which would further save on computation). + +### Materials + +#### Diffuse + +Diffuse materials are opaque, and due to surface particles scatter light in a "random" direction in a hemisphere around the surface normal of the point intersected. The color of the surface is determined by multiplying the colors each material bounced from. This causes some color bleed from nearby objects, as seen in the example image, in which the red and green walls contribute color to the sphere. + +#### Absorbancy + +Material absorbancy is modelled in refractive or subsurface scattering materials using an exponential function of path length through the material and material absorbancy. Since I did not implement imperfect specular I reused the materials' SPECEX parameter as the absorbance coefficient. Multiplying e-aL by the accumulated path color models portions of the light energy being absorbed by the material instead of completely passing through. (insert pictures with and without absorbancy) + +#### Perfect Specular (Reflective) + +![Perfect Reflection](img/REFLECT.2018-09-27_15-13-55z.163samp.png) + +Perfectly reflective surfaces reflect the incident ras perfectly around the surface normals of the object. In code, the specular color is sampled and the reflected direction calculated from the incident and normal vectors. This creates a mirror-like effect on the object surface. Since the specular color is sampled, the reflection is tinted by this color. A white specular would act as a perfect mirror, while other colors act more like a smooth perfectly reflective colored metal. +An imperfect specular material would still reflect, but with some randomness around the perfect reflection vector, much like the diffuse case. This would better model rough reflective surfaces. + +#### Perfect Refractive + +![Perfect Refraction](img/refraction.2018-09-30_23-21-46z.501samp.png) + +Refractive materials transmit light through them, but if the refractive index of the material differs from that of the surrounding material (in this case air) the transmitted ray is "bent" in a new angle determined by the ratio of refractive indices and the angle of the incident ray with the surface normal. The result is a distorted view through the object of the objects behind or around it. + +![Perfect Refraction](img/test.2018-10-01_00-09-16z.339samp.png) + +#### Imperfect Specular + +![Imperfect Specular](img/cornell.2018-10-01_03-19-26z.279samp.png) + +As previously mentioned, we can model imperfect reflection by randomly generating a ray in the hemisphere around the normal and then weighing it toward the ideal reflection vector with some exponential factor. The same specular exponential used in absorbance is reused here, with the reflection weighted by 1 - e-a and the random diffuse vector weighted e-a. The effect is similar to a rougher metal. + +#### Imperfect Refractive (Partially reflective/Fresnel Effects) + +![Imperfect Refraction](img/ReflectRefractTest684sample.png) + +Materials with both reflective and refractive elements can be modelled by calculating the portions of the incident ray reflected and transmitted and then choosing which path to pursue using this as a probability. The coloring of the material is determined by adding the portion reflected multiplied by specular color with the portion refracted multiplied by diffuse color. The proportions of reflected versus transmitted rays were calculated using the Schlick Approximation. + +#### Subsurface Scattering + +![Subsurface Scattering](img/subsurface.2018-09-30_21-51-35z.602samp.png) + +Subsurface scattering is a material effect in which light diffuses into a material and is scattered by particles in the material. This results in a "milkiness" of the material, such as in candle wax or skin. The absorbancy and "scatter length" of the material are important in determining he color in this case. Currently the program uses a fixed defined scatter length, the average length between "scatter" events in the material, but in real materials this variable would depend on its crystal structure and components. A smaller scatter length means more scattering events on average through the material, so fewer photons would make it through in the number of bounces allowed, and there would be more light attenuation from the absorbancy of material due to the increased total path through the material. +Potentially one could remove the allowed bounce count decrease when scattering in the material to allow more accuracy, but this would increase compute time per iteration and make it more variable. +This image made it possible for me to wrap my head around the process of how it physically works: +![Subsurface Scatter](https://i.stack.imgur.com/tOp55.jpg) +Source: [Stack Exchange](https://computergraphics.stackexchange.com/questions/5214/a-recent-approach-for-subsurface-scattering) (I did not reference the actual code) + +Since the ray is scattering randomly in a sphere around the scatter point, it was necessary to add a function to calculate this as opposed to just a hemisphere as in the diffuse case. + +We can compare different scatter length effects from the images below. In order, they use scattering lengths of 0.01, 0.05, 0.1, and 0.5: + + + +## Toggles, Constants, and Changes + +As mentioned previously, the specular exponent parameter for materials in the scene files was reused as an absorbance coefficient per unit length. This may not have been it's intended purpose, but that is how this program uses it for convenience. +CMakeLists was edited to include my stream compaction files in the stream compaction folder. +There are several "toggles" defined in code for certain features. +In path_helpers.h: + * STREAM_COMPACT + * CACHE_FIRST_BOUNCE + * MATERIAL_SORT + * DEPTH_OF_FIELD +In interactions.h: + * SUBSURFACE + +Each is self explanatory in functioning as a boolean toggling these features on and off. +Additionally, we have several defined constants for use with these features, which are briefly defined in their respective sections. +In path_helpers.h: + * COMPACT_BLOCK (block size for stream compaction) + * APERTURE_RADIUS +In interactions.h: + * PENETRATE_DEPTH (this is for moving the rays through a surface in or out of a material + * SCATTER_LENGTH + + +## Comparison With CPU Implementation + + In this section we'll discuss the advantages of parallel GPU implementation of the features over a hypothetical equivalent sequential CPU implementation. + +The depth of field is currently implemented in the generateRayFromCamera kernel function, which initializes the rays and their paths. It is a rather short set of instructions, but involves generating two random numbers and applying them, repeated for each ray. While trivial for a single ray, repeated number of pixels multiplied by number of iterations times becomes very non-trivial in terms of computational overhead when performed serially. + +First-bounce caching only requires memory copies. On a CPU, which has memory caching and generally faster clock speeds, this may be faster. However, since our other features are on GPU it is faster to copy device-to-device rather than host-to-device and device-to-host. + +Shading processes are almost always going to be faster in parallel GPU processing. Applying material properties to an object can be computationally expensive and must be performed for each pixel being shaded. While the computations themselves may be performed faster (for a single pixel) on the CPU versus the GPU, the same cannot be said for shading on the order of millions of pixels in series as opposed to staggered in parallel. Likewise applying an absorbancy factor is not very computationally expensive, it's just the problem of repeating this calculation for millions of pixels. + +The next issue with CPU implementation would be ray compaction for reduced computation. Stream compaction is proven to be much faster for larger array sizes when computed in parallel (on the GPU). Furthermore, this would require being more dynamic in the loop iteration size or some recursion when implementing on the CPU as the point is to reduce the number of rays needed to be shaded as they terminate. + + +## Performance Analysis + +### Stream Compaction + +![SC Analysis](img/compaction.png) + +The number of paths left unterminated was measured over a single iteration using the "diffuse" scene. The linear slope of the plot using a logarithmic scale shows an exponential decrease in active paths as the depth increases. This means each level deeper a sizeable factor of paths either reached the light or the void outside the scene. With that many fewer rays to process, much fewer warps are required so we can assume a proportionally exponential increase in speed for each higher depth level to process when using compaction, minus the overhead of the compaction process. This also shows the diminishing returns in adding more depth levels, as 90% of the rays terminate by the 11th depth level in this example case. + +#### Open vs Closed Scenes + +![Open vs Closed](img/closed_vs_open.PNG) + +In an "open" scene, many rays will bounce into the void and thus terminate. However, when the scene is closed, those rays will instead bounce off another wall and continue to be active. While the number of active rays will still considerably reduce due to eventually reaching the light, this is nowhere near the path count attenuation seen in an open scene. In such cases, the stream compaction becomes much less efficient. If the stream compaction were to take longer than the memory access and extra warps from those rays, it would be wasteful. + +### First-Bounce Caching + + + +Left: With First Bounce Cache, Right: Without First-Bounce Cache + +To determine the performance of first-bounce caching, I estimated the run-time for 100 iterations with and without this feature enabled. Running a performance analysis on the pathtracer with first-bounce caching took approximately 61.48 seconds, and without it took 67.86 seconds. That seems to mean approximately 63.8 ms per iteration were saved by caching the first bounce. The analysis shows that there were 99 fewer call of the computeIntersections kernel, as expected from skipping this once per iteration after the first. The analysis shows 552.3 ms difference in total time in that kernel, less than what would be estimated from just the run time difference, and the other kernels taking the bulk of the CUDA run time are around the same time with and without. Thus, most of the difference in total is probably due to fluctuations in CPU time, as this is not the only process running on my machine. Taking the difference between computeIntersections runtime gives an average of 5.52 ms less time per iteration in this kernel. + +### Material Sort + + + +A similar analysis was done for the program without sorting by material type. The purpose of sorting materials is to decrease time branching in the shader by ensuring rays intersecting the same materials are near each other in the array, so when the shader runs only the warps between material types will branch. Sorting materials took about 3.986 seconds total (including radix sort) over 100 iterations in the previous analysis. Shading took 2.013 seconds and computeIntersections 4.492 seconds. Surprisingly, the computeIntersections kernel takes longer (prabably since sorting put rays intersecting the same object near each other), 6.310 seconds, and unsurprisingly shading took slightly longer, 2.250 seconds, when sorting was disabled. This adds up to a total of 1.931 seconds extra total overhead from sorting by material. With a more complicated shader or scene perhaps this sort will prove beneficial, but with the current implementation it has been detrimental to the pathtracing speed. + + diff --git a/img/DIFFUSE.2018-09-25_14-43-29z.313samp.png b/img/DIFFUSE.2018-09-25_14-43-29z.313samp.png new file mode 100644 index 0000000..23b7207 Binary files /dev/null and b/img/DIFFUSE.2018-09-25_14-43-29z.313samp.png differ diff --git a/img/DOFwFBC.2018-09-30_21-37-10z.373samp.png b/img/DOFwFBC.2018-09-30_21-37-10z.373samp.png new file mode 100644 index 0000000..1ea4cb7 Binary files /dev/null and b/img/DOFwFBC.2018-09-30_21-37-10z.373samp.png differ diff --git a/img/REFLECT.2018-09-27_15-13-55z.163samp.png b/img/REFLECT.2018-09-27_15-13-55z.163samp.png new file mode 100644 index 0000000..3d62434 Binary files /dev/null and b/img/REFLECT.2018-09-27_15-13-55z.163samp.png differ diff --git a/img/ReflectRefractTest684sample.png b/img/ReflectRefractTest684sample.png new file mode 100644 index 0000000..adef06b Binary files /dev/null and b/img/ReflectRefractTest684sample.png differ diff --git a/img/SampleScene1.2018-09-30_22-56-15z.574samp.png b/img/SampleScene1.2018-09-30_22-56-15z.574samp.png new file mode 100644 index 0000000..6e9002b Binary files /dev/null and b/img/SampleScene1.2018-09-30_22-56-15z.574samp.png differ diff --git a/img/big_DOF.2018-09-30_02-33-09z.770samp.png b/img/big_DOF.2018-09-30_02-33-09z.770samp.png new file mode 100644 index 0000000..95b4df6 Binary files /dev/null and b/img/big_DOF.2018-09-30_02-33-09z.770samp.png differ diff --git a/img/closed_vs_open.PNG b/img/closed_vs_open.PNG new file mode 100644 index 0000000..e27bfbd Binary files /dev/null and b/img/closed_vs_open.PNG differ diff --git a/img/compaction.png b/img/compaction.png new file mode 100644 index 0000000..12e9605 Binary files /dev/null and b/img/compaction.png differ diff --git a/img/cornell.2018-10-01_03-19-26z.279samp.png b/img/cornell.2018-10-01_03-19-26z.279samp.png new file mode 100644 index 0000000..f5afd7e Binary files /dev/null and b/img/cornell.2018-10-01_03-19-26z.279samp.png differ diff --git a/img/refraction.2018-09-27_22-44-44z.124samp.png b/img/refraction.2018-09-27_22-44-44z.124samp.png new file mode 100644 index 0000000..13f5f0b Binary files /dev/null and b/img/refraction.2018-09-27_22-44-44z.124samp.png differ diff --git a/img/refraction.2018-09-30_23-21-46z.501samp.png b/img/refraction.2018-09-30_23-21-46z.501samp.png new file mode 100644 index 0000000..3847e1f Binary files /dev/null and b/img/refraction.2018-09-30_23-21-46z.501samp.png differ diff --git a/img/refraction.2018-09-30_23-40-39z.3700samp.png b/img/refraction.2018-09-30_23-40-39z.3700samp.png new file mode 100644 index 0000000..4a7eb55 Binary files /dev/null and b/img/refraction.2018-09-30_23-40-39z.3700samp.png differ diff --git a/img/subsurface.2018-09-30_21-51-35z.602samp.png b/img/subsurface.2018-09-30_21-51-35z.602samp.png new file mode 100644 index 0000000..0d10830 Binary files /dev/null and b/img/subsurface.2018-09-30_21-51-35z.602samp.png differ diff --git a/img/subsurface.2018-09-30_22-21-57z.96samp_len0.01.png b/img/subsurface.2018-09-30_22-21-57z.96samp_len0.01.png new file mode 100644 index 0000000..e6ae97f Binary files /dev/null and b/img/subsurface.2018-09-30_22-21-57z.96samp_len0.01.png differ diff --git a/img/subsurface.2018-09-30_22-23-11z.147samp_len0.1.png b/img/subsurface.2018-09-30_22-23-11z.147samp_len0.1.png new file mode 100644 index 0000000..b68b395 Binary files /dev/null and b/img/subsurface.2018-09-30_22-23-11z.147samp_len0.1.png differ diff --git a/img/subsurface.2018-09-30_22-24-55z.128samp_len0.5.png b/img/subsurface.2018-09-30_22-24-55z.128samp_len0.5.png new file mode 100644 index 0000000..57dec15 Binary files /dev/null and b/img/subsurface.2018-09-30_22-24-55z.128samp_len0.5.png differ diff --git a/img/subsurface.2018-09-30_22-26-31z.54samp_len0.05.png b/img/subsurface.2018-09-30_22-26-31z.54samp_len0.05.png new file mode 100644 index 0000000..56224dd Binary files /dev/null and b/img/subsurface.2018-09-30_22-26-31z.54samp_len0.05.png differ diff --git a/img/test.2018-09-29_02-12-24z.83samp.png b/img/test.2018-09-29_02-12-24z.83samp.png new file mode 100644 index 0000000..a947855 Binary files /dev/null and b/img/test.2018-09-29_02-12-24z.83samp.png differ diff --git a/img/test.2018-10-01_00-09-16z.339samp.png b/img/test.2018-10-01_00-09-16z.339samp.png new file mode 100644 index 0000000..93c20e3 Binary files /dev/null and b/img/test.2018-10-01_00-09-16z.339samp.png differ diff --git a/img/wFBC.PNG b/img/wFBC.PNG new file mode 100644 index 0000000..f487f30 Binary files /dev/null and b/img/wFBC.PNG differ diff --git a/img/wFBC1_2.PNG b/img/wFBC1_2.PNG new file mode 100644 index 0000000..d58f475 Binary files /dev/null and b/img/wFBC1_2.PNG differ diff --git a/img/wFBC2.PNG b/img/wFBC2.PNG new file mode 100644 index 0000000..d896cd1 Binary files /dev/null and b/img/wFBC2.PNG differ diff --git a/img/wFBC2_2.PNG b/img/wFBC2_2.PNG new file mode 100644 index 0000000..7b22ef1 Binary files /dev/null and b/img/wFBC2_2.PNG differ diff --git a/img/wMaterialSort2.PNG b/img/wMaterialSort2.PNG new file mode 100644 index 0000000..1e25817 Binary files /dev/null and b/img/wMaterialSort2.PNG differ diff --git a/img/woFBC.PNG b/img/woFBC.PNG new file mode 100644 index 0000000..90cbda6 Binary files /dev/null and b/img/woFBC.PNG differ diff --git a/img/woFBC2.PNG b/img/woFBC2.PNG new file mode 100644 index 0000000..62ed044 Binary files /dev/null and b/img/woFBC2.PNG differ diff --git a/img/woMaterialSort.PNG b/img/woMaterialSort.PNG new file mode 100644 index 0000000..2828257 Binary files /dev/null and b/img/woMaterialSort.PNG differ diff --git a/img/woMaterialSort2.PNG b/img/woMaterialSort2.PNG new file mode 100644 index 0000000..fce32dd Binary files /dev/null and b/img/woMaterialSort2.PNG differ diff --git a/scenes/SampleScene.txt b/scenes/SampleScene.txt new file mode 100644 index 0000000..efdeb79 --- /dev/null +++ b/scenes/SampleScene.txt @@ -0,0 +1,171 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Perfect Refractive white +MATERIAL 5 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 0 +REFR 1 +REFRIOR 2 +EMITTANCE 0 + +// Subsurface Blue +MATERIAL 6 +RGB .9 .9 .98 +SPECEX 0.02 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Reflective/Refractive Purple, Green spec +MATERIAL 7 +RGB .98 0 .98 +SPECEX 0.02 +SPECRGB 0 0.98 0 +REFL 1 +REFR 1 +REFRIOR 2 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE SampleScene1 +EYE 0 5 10 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Reflective Sphere +OBJECT 6 +sphere +material 4 +TRANS -3 5 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Refractive Sphere +OBJECT 7 +sphere +material 5 +TRANS 3 5 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Reflect/Refractive Sphere +OBJECT 8 +sphere +material 7 +TRANS 0 8 -2 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Reflect/Refractive Sphere +OBJECT 9 +sphere +material 6 +TRANS 0 2 0 +ROTAT 0 0 0 +SCALE 3 3 3 \ No newline at end of file diff --git a/scenes/SampleSceneCubes.txt b/scenes/SampleSceneCubes.txt new file mode 100644 index 0000000..4cfbcb3 --- /dev/null +++ b/scenes/SampleSceneCubes.txt @@ -0,0 +1,171 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Perfect Refractive white +MATERIAL 5 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 0 +REFR 1 +REFRIOR 2 +EMITTANCE 0 + +// Subsurface Blue +MATERIAL 6 +RGB .9 .9 .98 +SPECEX 0.02 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Reflective/Refractive Purple, Green spec +MATERIAL 7 +RGB .98 0 .98 +SPECEX 0.02 +SPECRGB 0 0.98 0 +REFL 1 +REFR 1 +REFRIOR 2 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE SampleScene1 +EYE 0 5 10 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Reflective +OBJECT 6 +cube +material 4 +TRANS -3 5 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Refractive +OBJECT 7 +cube +material 5 +TRANS 3 5 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Reflect/Refractive +OBJECT 8 +cube +material 7 +TRANS 0 8 -2 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Subsurface Scatter +OBJECT 9 +cube +material 6 +TRANS 0 2 0 +ROTAT 0 0 0 +SCALE 3 3 3 \ No newline at end of file diff --git a/scenes/big.txt b/scenes/big.txt new file mode 100644 index 0000000..d0096ff --- /dev/null +++ b/scenes/big.txt @@ -0,0 +1,177 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Refractive White +MATERIAL 5 +RGB 1 1 0 +SPECEX 0 +SPECRGB 1 0 1 +REFL 1 +REFR 1 +REFRIOR 2 +EMITTANCE 0 + +// Sub Blue +MATERIAL 6 +RGB 0.5 0.5 1 +SPECEX 0.02 +SPECRGB 1 0 1 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 10 +FILE big_DOF +EYE 0.0 5 9 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 20 0 +ROTAT 0 0 0 +SCALE 10 .3 10 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 20 .01 20 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 20 0 +ROTAT 0 0 90 +SCALE .01 20 20 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 10 -10 +ROTAT 0 90 0 +SCALE .01 20 20 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -10 10 0 +ROTAT 0 0 0 +SCALE .01 20 20 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 10 10 0 +ROTAT 0 0 0 +SCALE .01 20 20 + +// Sphere +OBJECT 6 +sphere +material 4 +TRANS -8 5 -2 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Sphere +OBJECT 7 +sphere +material 5 +TRANS 8 5 -2 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Sphere +OBJECT 8 +sphere +material 2 +TRANS 0 5 0 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Sphere +OBJECT 9 +sphere +material 6 +TRANS -4 5 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Sphere +OBJECT 10 +sphere +material 6 +TRANS 4 5 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Front Wall +//OBJECT 11 +//cube +//material 1 +//TRANS 0 10 10 +//ROTAT 0 -90 0 +//SCALE .01 20 20 \ No newline at end of file diff --git a/scenes/cornell.txt b/scenes/cornell.txt index 83ff820..4db6f05 100644 --- a/scenes/cornell.txt +++ b/scenes/cornell.txt @@ -41,7 +41,7 @@ EMITTANCE 0 // Specular white MATERIAL 4 RGB .98 .98 .98 -SPECEX 0 +SPECEX 0.5 SPECRGB .98 .98 .98 REFL 1 REFR 0 diff --git a/scenes/diffuse.txt b/scenes/diffuse.txt new file mode 100644 index 0000000..965026c --- /dev/null +++ b/scenes/diffuse.txt @@ -0,0 +1,117 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 1 +DEPTH 50 +FILE cornell +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Sphere +OBJECT 6 +sphere +material 1 +TRANS -1 4 -1 +ROTAT 0 0 0 +SCALE 3 3 3 diff --git a/scenes/reflect_cube.txt b/scenes/reflect_cube.txt new file mode 100644 index 0000000..5a4279c --- /dev/null +++ b/scenes/reflect_cube.txt @@ -0,0 +1,117 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE cornell +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Cube +OBJECT 6 +cube +material 4 +TRANS -1 4 -1 +ROTAT 0 0.25 0 +SCALE 3 3 3 diff --git a/scenes/refraction.txt b/scenes/refraction.txt new file mode 100644 index 0000000..c59db06 --- /dev/null +++ b/scenes/refraction.txt @@ -0,0 +1,135 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Refractive White +MATERIAL 5 +RGB 0.98 0.98 0.98 +SPECEX 0.03 +SPECRGB 1 0 1 +REFL 0 +REFR 1 +REFRIOR 2 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE refraction +EYE 0.0 5 2 +LOOKAT 2 4 -1 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Sphere +//OBJECT 6 +//sphere +//material 4 +//TRANS -2 4 -1 +//ROTAT 0 0 0 +//SCALE 3 3 3 + +// Sphere +OBJECT 6 +sphere +material 5 +TRANS 2 4 -1 +ROTAT 0 0 0 +SCALE 3 3 3 diff --git a/scenes/refraction2.txt b/scenes/refraction2.txt new file mode 100644 index 0000000..417aba7 --- /dev/null +++ b/scenes/refraction2.txt @@ -0,0 +1,127 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Refractive White +MATERIAL 5 +RGB 1 1 0 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 0 +REFR 1 +REFRIOR 2 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE refraction2 +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Sphere +OBJECT 6 +sphere +material 5 +TRANS -1 4 -1 +ROTAT 0 0 0 +SCALE 3 3 3 diff --git a/scenes/subsurface.txt b/scenes/subsurface.txt new file mode 100644 index 0000000..864c324 --- /dev/null +++ b/scenes/subsurface.txt @@ -0,0 +1,179 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Subsurface Cream +MATERIAL 5 +RGB 1 1 0.9 +SPECEX 0.02 +SPECRGB 1 0 1 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Subsurface Red +MATERIAL 6 +RGB 0.98 0.8 0.8 +SPECEX 0.02 +SPECRGB 1 0 1 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Subsurface Blue +MATERIAL 7 +RGB 0.9 0.9 0.98 +SPECEX 0.02 +SPECRGB 1 0 1 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE refraction +EYE 0.0 5 2 +LOOKAT 2 4 -1 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// subsurface scatter test +OBJECT 6 +cube +material 7 +TRANS 2 2.75 -1 +ROTAT 0 0 0 +SCALE 3 5.5 3 + +// subsurface scatter test +OBJECT 7 +cube +material 7 +TRANS 1 6 -2 +ROTAT 0 0 0 +SCALE 1 1 1 + +// subsurface scatter test +OBJECT 8 +cube +material 7 +TRANS 3 6 -2 +ROTAT 0 0 0 +SCALE 1 1 1 + +// subsurface scatter test +OBJECT 9 +cube +material 7 +TRANS 1 6.5 -2 +ROTAT 0 0 0 +SCALE 0.5 0.5 0.5 + +// subsurface scatter test +OBJECT 10 +sphere +material 6 +TRANS -2 4 -1 +ROTAT 0 0 0 +SCALE 3 3 3 diff --git a/scenes/subsurface2.txt b/scenes/subsurface2.txt new file mode 100644 index 0000000..63bfc23 --- /dev/null +++ b/scenes/subsurface2.txt @@ -0,0 +1,177 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Subsurface Cream +MATERIAL 5 +RGB 1 1 0.9 +SPECEX 0.02 +SPECRGB 1 1 0.9 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Subsurface Red +MATERIAL 6 +RGB 0.98 0.8 0.8 +SPECEX 0.02 +SPECRGB 1 0 1 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 10 +FILE subsurface +EYE -1 6 5 +LOOKAT 0 6 -2 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 50 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 50 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 50 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -25 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 25 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// subsurface scatter test +OBJECT 6 +cube +material 5 +TRANS 2 2.75 -1 +ROTAT 0 0 0 +SCALE 3 5.5 3 + +// subsurface scatter test +OBJECT 7 +cube +material 5 +TRANS 1 6 -2 +ROTAT 0 0 0 +SCALE 1 1 1 + +// subsurface scatter test +OBJECT 8 +cube +material 5 +TRANS 3 6 -2 +ROTAT 0 0 0 +SCALE 1 1 1 + +// subsurface scatter test +OBJECT 9 +cube +material 5 +TRANS 1 6.5 -2 +ROTAT 0 0 0 +SCALE 0.5 0.5 0.5 + +// subsurface scatter test +OBJECT 10 +cube +material 6 +TRANS 0 7 0 +ROTAT -45 0 0 +SCALE 3 0.25 3 + +// subsurface scatter test +OBJECT 11 +sphere +material 6 +TRANS -2 4 -1 +ROTAT 0 0 0 +SCALE 3 3 3 diff --git a/scenes/subsurface3.txt b/scenes/subsurface3.txt new file mode 100644 index 0000000..5b7368f --- /dev/null +++ b/scenes/subsurface3.txt @@ -0,0 +1,153 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Refractive White +MATERIAL 5 +RGB 1 1 0 +SPECEX 0 +SPECRGB 1 0 1 +REFL 1 +REFR 1 +REFRIOR 2 +EMITTANCE 0 + +// Sub Blue +MATERIAL 6 +RGB 0.8 0.8 1 +SPECEX 0.02 +SPECRGB 1 0 1 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE sub3 +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 20 0 +ROTAT 0 0 0 +SCALE 10 .3 10 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 20 .01 20 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 20 0 +ROTAT 0 0 90 +SCALE .01 20 20 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 10 -10 +ROTAT 0 90 0 +SCALE .01 20 20 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -10 10 0 +ROTAT 0 0 0 +SCALE .01 20 20 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 10 10 0 +ROTAT 0 0 0 +SCALE .01 20 20 + +// Sphere +OBJECT 6 +sphere +material 6 +TRANS -8 5 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Sphere +OBJECT 7 +sphere +material 6 +TRANS 8 5 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Rect. Prism +OBJECT 8 +cube +material 6 +TRANS 0 5 0 +ROTAT 0 30 0 +SCALE 3 5 3 diff --git a/scenes/subsurface4.txt b/scenes/subsurface4.txt new file mode 100644 index 0000000..789dc28 --- /dev/null +++ b/scenes/subsurface4.txt @@ -0,0 +1,193 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Subsurface Cream +MATERIAL 5 +RGB 1 1 0.9 +SPECEX 0.02 +SPECRGB 1 1 0.9 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Subsurface Red +MATERIAL 6 +RGB 0.98 0.8 0.8 +SPECEX 0.05 +SPECRGB 1 0 1 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 10 +FILE subsurface +EYE -1 6 5 +LOOKAT 0 6 -2 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 50 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 50 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 50 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -25 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 25 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// subsurface scatter test +OBJECT 6 +cube +material 5 +TRANS 2 2.75 -1 +ROTAT 0 0 0 +SCALE 3 5.5 3 + +// subsurface scatter test +OBJECT 7 +cube +material 5 +TRANS 1 6 -2 +ROTAT 0 0 0 +SCALE 1 1 1 + +// subsurface scatter test +OBJECT 8 +cube +material 5 +TRANS 3 6 -2 +ROTAT 0 0 0 +SCALE 1 1 1 + +// subsurface scatter test +OBJECT 9 +cube +material 5 +TRANS 1 6.5 -2 +ROTAT 0 0 0 +SCALE 0.5 0.5 0.5 + +// subsurface scatter test +OBJECT 10 +cube +material 6 +TRANS 0 7 0 +ROTAT -45 0 0 +SCALE 3 0.25 3 + +// subsurface scatter test +OBJECT 11 +sphere +material 6 +TRANS -2 4 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Ceiling light +OBJECT 12 +cube +material 0 +TRANS -10 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Ceiling light +OBJECT 13 +cube +material 0 +TRANS 10 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 diff --git a/scenes/test.txt b/scenes/test.txt new file mode 100644 index 0000000..a711a50 --- /dev/null +++ b/scenes/test.txt @@ -0,0 +1,187 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Refractive Yellow w/ Purple Spec +MATERIAL 5 +RGB 1 1 0 +SPECEX 0 +SPECRGB 1 0 1 +REFL 1 +REFR 1 +REFRIOR 1.5 +EMITTANCE 0 + +// Refractive BLUE +MATERIAL 6 +RGB 0.8 0.8 0.98 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 0 +REFR 1 +REFRIOR 3 +EMITTANCE 0 + +// Refractive RED +MATERIAL 7 +RGB 0.98 0.8 0.8 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 0 +REFR 1 +REFRIOR 3 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 100 +DEPTH 8 +FILE test +EYE 0.0 5 -10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 -20 +ROTAT 0 0 0 +SCALE 10 .01 50 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 -20 +ROTAT 0 0 90 +SCALE .01 10 50 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -45 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 -20 +ROTAT 0 0 0 +SCALE .01 10 50 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 -20 +ROTAT 0 0 0 +SCALE .01 10 50 + +// Sphere +OBJECT 6 +sphere +material 4 +TRANS -3 4 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Rect prism +OBJECT 7 +cube +material 5 +TRANS 3 2.5 0 +ROTAT 0.5 0 0.5 +SCALE 3 5 3 + +// Cube +OBJECT 8 +cube +material 6 +TRANS 0 6 3 +ROTAT 0 0 0 +SCALE 3 3 3 + +// "Front" wall +OBJECT 9 +cube +material 1 +TRANS 0 5 5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Sphere +OBJECT 10 +sphere +material 7 +TRANS -1 5 -5 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Cube +OBJECT 11 +cube +material 4 +TRANS 0 2 3 +ROTAT 0 0 0 +SCALE 3 3 3 \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a1cb3fb..13dfc17 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,6 +15,8 @@ set(SOURCE_FILES "preview.cpp" "utilities.cpp" "utilities.h" + "shader.h" + "path_helpers.h" ) cuda_add_library(src diff --git a/src/interactions.h b/src/interactions.h index 5ce3628..ac1905f 100644 --- a/src/interactions.h +++ b/src/interactions.h @@ -2,11 +2,59 @@ #include "intersections.h" +#define SUBSURFACE 1 +#define PENETRATE_DEPTH 0.0002f +#define SCATTER_LENGTH 0.05f // average length between scatters + // CHECKITOUT /** * Computes a cosine-weighted random direction in a hemisphere. * Used for diffuse lighting. */ +__device__ +glm::vec3 calculateIdealReflect(glm::vec3 normal, glm::vec3 incident) { + glm::vec3 reflected = incident - 2 * glm::dot(incident, normal) * normal; + return reflected; +} + +__device__ +glm::vec3 calculateIdealRefract(glm::vec3 normal, glm::vec3 incident, float n) { + // cos of angle between incident vector and normal + float cos_i = fabs(glm::dot(incident, normal)); + // sine of angle squared + float sin_i2 = 1 - cos_i * cos_i; + + // cosine of transmitted angle + float cos_t = sqrt(1 - sin_i2 / (n * n)); + // transmitted vector + glm::vec3 refracted = (incident / n) + ((cos_i / n) - cos_t) * normal; + return refracted; +} + +__device__ +glm::vec3 calculateRandomDirectionInSphere(thrust::default_random_engine &rng) { + thrust::minstd_rand rng2; + thrust::uniform_real_distribution u01(0, 1); + float phi = u01(rng) * TWO_PI; + + float a = u01(rng) * 2 - 1; // make distribution -1 to 1 + float costheta; + if (a < 0) { + costheta = -sqrt(-a); + } + else costheta = sqrt(a); + + float sintheta = sqrt(1 - costheta * costheta); // sin(theta) + + glm::vec3 rand_direction; + + rand_direction.x = sintheta * cos(phi); + rand_direction.y = sintheta * sin(phi); + rand_direction.z = costheta; + + return rand_direction; +} + __host__ __device__ glm::vec3 calculateRandomDirectionInHemisphere( glm::vec3 normal, thrust::default_random_engine &rng) { @@ -66,14 +114,166 @@ glm::vec3 calculateRandomDirectionInHemisphere( * * You may need to change the parameter list for your purposes! */ -__host__ __device__ + +__device__ void shadeDiffuse(PathSegment & path, Material m, thrust::default_random_engine rng, glm::vec3 normal) { + // select if diffuse material + path.color *= m.color; + path.ray.direction = calculateRandomDirectionInHemisphere(normal, rng); +} + +__device__ void shadeReflective(PathSegment & path, Material m, glm::vec3 normal) { + // select if reflective material + path.color *= m.specular.color; + path.ray.direction = calculateIdealReflect(normal, path.ray.direction); + +} + +__device__ void shadeImperfectSpec(PathSegment & path, Material m, glm::vec3 normal, thrust::default_random_engine rng) { + glm::vec3 reflection = calculateIdealReflect(normal, path.ray.direction); + glm::vec3 diffuse = calculateRandomDirectionInHemisphere(normal, rng); + float spec_factor = exp(-m.specular.exponent); + path.ray.direction = glm::normalize(diffuse * spec_factor + reflection * (1 - spec_factor)); + path.color = m.color * spec_factor + m.specular.color * spec_factor; +} + +__device__ float schlickApprox(float n, glm::vec3 normal, glm::vec3 incident) { + float dot = fabs(glm::dot(normal, incident)); + float r0 = pow((1 - n) / (1 + n), 2); + float R = r0 + (1 - r0) * pow((1 - dot), 5); + return R; +} + +__device__ void shadeReflectRefract(PathSegment & path, Material material, glm::vec3 intersect, glm::vec3 normal, thrust::default_random_engine rng) { + Ray & ray = path.ray; + + float length = glm::length(intersect - ray.origin); + ray.origin = intersect; + + float n = material.indexOfRefraction; + if (path.inside == true) { + n = 1 / n; + path.color *= exp(-length * material.specular.exponent); + } + float R = schlickApprox(n, normal, ray.direction); + float T = 1 - R; + glm::vec3 incident = ray.direction; + + thrust::uniform_real_distribution u01(0, 1); + if (u01(rng) > T) { + ray.direction = calculateIdealReflect(normal, incident); + } + else { + ray.origin += (glm::normalize(ray.direction) - glm::normalize(normal)) * PENETRATE_DEPTH; // march ray through surface + ray.direction = calculateIdealRefract(normal, incident, n); // get new refracted direction + } + path.color *= material.specular.color * R + material.color * T; +} + +__device__ void shadeRefractive(PathSegment & path, Material material, glm::vec3 intersect, glm::vec3 normal, thrust::default_random_engine rng) { + Ray & ray = path.ray; + + float n = material.indexOfRefraction; + if (path.inside == true) n = 1 / n; + glm::vec3 incident = ray.direction; + + float length = glm::length(intersect - ray.origin); + ray.origin = intersect; + + ray.origin += (glm::normalize(ray.direction) - glm::normalize(normal)) * PENETRATE_DEPTH; // march ray through surface + ray.direction = calculateIdealRefract(normal, incident, n); // get new refracted direction + + // exponent accounts for absorbancy of material + path.color *= material.color * exp(-length * material.specular.exponent); +} + +__device__ void shadeSubsurface(PathSegment & path, Material material, glm::vec3 intersect, glm::vec3 normal, thrust::default_random_engine rng) { + Ray & ray = path.ray; + + // enter material if not inside + if (path.inside == false) { + ray.origin = intersect; + ray.origin += (glm::normalize(ray.direction) - glm::normalize(normal)) * PENETRATE_DEPTH; // march ray through surface + ray.direction = calculateRandomDirectionInHemisphere(-normal, rng); // hemisphere diffuse around neg. normal + path.color *= material.color; // mix material color into ray + } + // else we need bounce the ray after some scattering distance + else { + float length = glm::length(intersect - ray.origin); + thrust::random::normal_distribution dist(SCATTER_LENGTH, SCATTER_LENGTH / 3.0f); + float scatter = dist(rng); // distance along ray we do scatter + if (scatter <= 0 || scatter >= length) { + // if we don't scatter we go out of the material and diffuse + ray.origin = intersect; + ray.origin += (glm::normalize(ray.direction) - glm::normalize(normal)) * PENETRATE_DEPTH; // march ray through surface + ray.direction = calculateRandomDirectionInHemisphere(-normal, rng); // hemisphere diffuse around neg. normal + path.color *= material.color * exp(-length * material.specular.exponent); // mix material color into ray w/ some absorbancy + } + else { + // scatter in sphere around scatter point + ray.origin = getPointOnRay(ray, scatter); // point ray travels to scatter point + // generate new direction + ray.direction = calculateRandomDirectionInSphere(rng); + // color ray + path.color *= material.color * exp(-scatter * material.specular.exponent); // mix material color into ray w/ some absorbancy + } + + + } + +} + + +__device__ void scatterRay( - PathSegment & pathSegment, + PathSegment & path, glm::vec3 intersect, - glm::vec3 normal, + glm::vec3 normal, const Material &m, - thrust::default_random_engine &rng) { + thrust::default_random_engine rng + ) { // TODO: implement this. // A basic implementation of pure-diffuse shading will just call the // calculateRandomDirectionInHemisphere defined above. + + //thrust::uniform_real_distribution u01(0, 1); + //path.ray.origin = intersect; + + + path.remainingBounces--; + + // if has emmitance (light) add in light factor + if (m.emittance > 0.0f) { + + path.color *= m.color * m.emittance; + + // terminate path + path.remainingBounces = 0; + return; + } + + + if (m.hasRefractive && m.hasReflective) { + // shader where both reflective and refraction elements accounted for + shadeReflectRefract(path, m, intersect, normal, rng); + } + else if (m.hasRefractive) { + // pure refractive shader + shadeRefractive(path, m, intersect, normal, rng); + } + else if (m.hasReflective) { + path.ray.origin = intersect; + //reflective shader + if (m.specular.exponent > 0) shadeImperfectSpec(path, m, normal, rng); + else shadeReflective(path, m, normal); + } + else if (SUBSURFACE && m.specular.exponent > 0) { + // subsurface scattering + shadeSubsurface(path, m, intersect, normal, rng); + } + else { + path.ray.origin = intersect; + // generic diffuse shading + shadeDiffuse(path, m, rng, normal); + } + } diff --git a/src/intersections.h b/src/intersections.h index 6f23872..5843ddb 100644 --- a/src/intersections.h +++ b/src/intersections.h @@ -42,11 +42,11 @@ __host__ __device__ glm::vec3 multiplyMV(glm::mat4 m, glm::vec4 v) { * * @param intersectionPoint Output parameter for point of intersection. * @param normal Output parameter for surface normal. - * @param outside Output param for whether the ray came from outside. + * @param inside Output param for whether the ray came from inside. * @return Ray parameter `t` value. -1 if no intersection. */ __host__ __device__ float boxIntersectionTest(Geom box, Ray r, - glm::vec3 &intersectionPoint, glm::vec3 &normal, bool &outside) { + glm::vec3 &intersectionPoint, glm::vec3 &normal, bool &inside) { Ray q; q.origin = multiplyMV(box.inverseTransform, glm::vec4(r.origin , 1.0f)); q.direction = glm::normalize(multiplyMV(box.inverseTransform, glm::vec4(r.direction, 0.0f))); @@ -76,11 +76,11 @@ __host__ __device__ float boxIntersectionTest(Geom box, Ray r, } if (tmax >= tmin && tmax > 0) { - outside = true; + inside = false; if (tmin <= 0) { tmin = tmax; tmin_n = tmax_n; - outside = false; + inside = true; } intersectionPoint = multiplyMV(box.transform, glm::vec4(getPointOnRay(q, tmin), 1.0f)); normal = glm::normalize(multiplyMV(box.transform, glm::vec4(tmin_n, 0.0f))); @@ -96,11 +96,11 @@ __host__ __device__ float boxIntersectionTest(Geom box, Ray r, * * @param intersectionPoint Output parameter for point of intersection. * @param normal Output parameter for surface normal. - * @param outside Output param for whether the ray came from outside. + * @param inside Output param for whether the ray came from inside. * @return Ray parameter `t` value. -1 if no intersection. */ __host__ __device__ float sphereIntersectionTest(Geom sphere, Ray r, - glm::vec3 &intersectionPoint, glm::vec3 &normal, bool &outside) { + glm::vec3 &intersectionPoint, glm::vec3 &normal, bool &inside) { float radius = .5; glm::vec3 ro = multiplyMV(sphere.inverseTransform, glm::vec4(r.origin, 1.0f)); @@ -126,17 +126,17 @@ __host__ __device__ float sphereIntersectionTest(Geom sphere, Ray r, return -1; } else if (t1 > 0 && t2 > 0) { t = min(t1, t2); - outside = true; + inside = false; } else { t = max(t1, t2); - outside = false; + inside = true; } glm::vec3 objspaceIntersection = getPointOnRay(rt, t); intersectionPoint = multiplyMV(sphere.transform, glm::vec4(objspaceIntersection, 1.f)); normal = glm::normalize(multiplyMV(sphere.invTranspose, glm::vec4(objspaceIntersection, 0.f))); - if (!outside) { + if (inside) { normal = -normal; } diff --git a/src/main.cpp b/src/main.cpp index fe8e85e..42fb57d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,7 @@ #include "main.h" #include "preview.h" #include +//#include "stream_compaction/stream_compact.h" // test static std::string startTimeString; @@ -31,175 +32,178 @@ int height; //------------------------------- int main(int argc, char** argv) { - startTimeString = currentTimeString(); + //scanTest(); // test - if (argc < 2) { - printf("Usage: %s SCENEFILE.txt\n", argv[0]); - return 1; - } + startTimeString = currentTimeString(); - const char *sceneFile = argv[1]; + if (argc < 2) { + printf("Usage: %s SCENEFILE.txt\n", argv[0]); + return 1; + } - // Load scene file - scene = new Scene(sceneFile); + const char *sceneFile = argv[1]; - // Set up camera stuff from loaded path tracer settings - iteration = 0; - renderState = &scene->state; - Camera &cam = renderState->camera; - width = cam.resolution.x; - height = cam.resolution.y; + // Load scene file + scene = new Scene(sceneFile); - glm::vec3 view = cam.view; - glm::vec3 up = cam.up; - glm::vec3 right = glm::cross(view, up); - up = glm::cross(right, view); + // Set up camera stuff from loaded path tracer settings + iteration = 0; + renderState = &scene->state; + Camera &cam = renderState->camera; + width = cam.resolution.x; + height = cam.resolution.y; - cameraPosition = cam.position; + glm::vec3 view = cam.view; + glm::vec3 up = cam.up; + glm::vec3 right = glm::cross(view, up); + up = glm::cross(right, view); - // compute phi (horizontal) and theta (vertical) relative 3D axis - // so, (0 0 1) is forward, (0 1 0) is up - glm::vec3 viewXZ = glm::vec3(view.x, 0.0f, view.z); - glm::vec3 viewZY = glm::vec3(0.0f, view.y, view.z); - phi = glm::acos(glm::dot(glm::normalize(viewXZ), glm::vec3(0, 0, -1))); - theta = glm::acos(glm::dot(glm::normalize(viewZY), glm::vec3(0, 1, 0))); - ogLookAt = cam.lookAt; - zoom = glm::length(cam.position - ogLookAt); + cameraPosition = cam.position; - // Initialize CUDA and GL components - init(); + // compute phi (horizontal) and theta (vertical) relative 3D axis + // so, (0 0 1) is forward, (0 1 0) is up + glm::vec3 viewXZ = glm::vec3(view.x, 0.0f, view.z); + glm::vec3 viewZY = glm::vec3(0.0f, view.y, view.z); + phi = glm::acos(glm::dot(glm::normalize(viewXZ), glm::vec3(0, 0, -1))); + theta = glm::acos(glm::dot(glm::normalize(viewZY), glm::vec3(0, 1, 0))); + ogLookAt = cam.lookAt; + zoom = glm::length(cam.position - ogLookAt); - // GLFW main loop - mainLoop(); + // Initialize CUDA and GL components + init(); - return 0; + // GLFW main loop + mainLoop(); + + return 0; } void saveImage() { - float samples = iteration; - // output image file - image img(width, height); - - for (int x = 0; x < width; x++) { - for (int y = 0; y < height; y++) { - int index = x + (y * width); - glm::vec3 pix = renderState->image[index]; - img.setPixel(width - 1 - x, y, glm::vec3(pix) / samples); - } - } - - std::string filename = renderState->imageName; - std::ostringstream ss; - ss << filename << "." << startTimeString << "." << samples << "samp"; - filename = ss.str(); - - // CHECKITOUT - img.savePNG(filename); - //img.saveHDR(filename); // Save a Radiance HDR file + float samples = iteration; + // output image file + image img(width, height); + + for (int x = 0; x < width; x++) { + for (int y = 0; y < height; y++) { + int index = x + (y * width); + glm::vec3 pix = renderState->image[index]; + img.setPixel(width - 1 - x, y, glm::vec3(pix) / samples); + } + } + + std::string filename = renderState->imageName; + std::ostringstream ss; + ss << filename << "." << startTimeString << "." << samples << "samp"; + filename = ss.str(); + + // CHECKITOUT + img.savePNG(filename); + //img.saveHDR(filename); // Save a Radiance HDR file } void runCuda() { - if (camchanged) { - iteration = 0; - Camera &cam = renderState->camera; - cameraPosition.x = zoom * sin(phi) * sin(theta); - cameraPosition.y = zoom * cos(theta); - cameraPosition.z = zoom * cos(phi) * sin(theta); - - cam.view = -glm::normalize(cameraPosition); - glm::vec3 v = cam.view; - glm::vec3 u = glm::vec3(0, 1, 0);//glm::normalize(cam.up); - glm::vec3 r = glm::cross(v, u); - cam.up = glm::cross(r, v); - cam.right = r; - - cam.position = cameraPosition; - cameraPosition += cam.lookAt; - cam.position = cameraPosition; - camchanged = false; - } - - // Map OpenGL buffer object for writing from CUDA on a single GPU - // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer - - if (iteration == 0) { - pathtraceFree(); - pathtraceInit(scene); - } - - if (iteration < renderState->iterations) { - uchar4 *pbo_dptr = NULL; - iteration++; - cudaGLMapBufferObject((void**)&pbo_dptr, pbo); - - // execute the kernel - int frame = 0; - pathtrace(pbo_dptr, frame, iteration); - - // unmap buffer object - cudaGLUnmapBufferObject(pbo); - } else { - saveImage(); - pathtraceFree(); - cudaDeviceReset(); - exit(EXIT_SUCCESS); - } + if (camchanged) { + iteration = 0; + Camera &cam = renderState->camera; + cameraPosition.x = zoom * sin(phi) * sin(theta); + cameraPosition.y = zoom * cos(theta); + cameraPosition.z = zoom * cos(phi) * sin(theta); + + cam.view = -glm::normalize(cameraPosition); + glm::vec3 v = cam.view; + glm::vec3 u = glm::vec3(0, 1, 0);//glm::normalize(cam.up); + glm::vec3 r = glm::cross(v, u); + cam.up = glm::cross(r, v); + cam.right = r; + + cam.position = cameraPosition; + cameraPosition += cam.lookAt; + cam.position = cameraPosition; + camchanged = false; + } + + // Map OpenGL buffer object for writing from CUDA on a single GPU + // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer + + if (iteration == 0) { + pathtraceFree(); + pathtraceInit(scene); + } + + if (iteration < renderState->iterations) { + uchar4 *pbo_dptr = NULL; + iteration++; + cudaGLMapBufferObject((void**)&pbo_dptr, pbo); + + // execute the kernel + int frame = 0; + pathtrace(pbo_dptr, frame, iteration); + + // unmap buffer object + cudaGLUnmapBufferObject(pbo); + } + else { + saveImage(); + pathtraceFree(); + cudaDeviceReset(); + exit(EXIT_SUCCESS); + } } void keyCallback(GLFWwindow* window, int key, int scancode, int action, int mods) { - if (action == GLFW_PRESS) { - switch (key) { - case GLFW_KEY_ESCAPE: - saveImage(); - glfwSetWindowShouldClose(window, GL_TRUE); - break; - case GLFW_KEY_S: - saveImage(); - break; - case GLFW_KEY_SPACE: - camchanged = true; - renderState = &scene->state; - Camera &cam = renderState->camera; - cam.lookAt = ogLookAt; - break; - } - } + if (action == GLFW_PRESS) { + switch (key) { + case GLFW_KEY_ESCAPE: + saveImage(); + glfwSetWindowShouldClose(window, GL_TRUE); + break; + case GLFW_KEY_S: + saveImage(); + break; + case GLFW_KEY_SPACE: + camchanged = true; + renderState = &scene->state; + Camera &cam = renderState->camera; + cam.lookAt = ogLookAt; + break; + } + } } void mouseButtonCallback(GLFWwindow* window, int button, int action, int mods) { - leftMousePressed = (button == GLFW_MOUSE_BUTTON_LEFT && action == GLFW_PRESS); - rightMousePressed = (button == GLFW_MOUSE_BUTTON_RIGHT && action == GLFW_PRESS); - middleMousePressed = (button == GLFW_MOUSE_BUTTON_MIDDLE && action == GLFW_PRESS); + leftMousePressed = (button == GLFW_MOUSE_BUTTON_LEFT && action == GLFW_PRESS); + rightMousePressed = (button == GLFW_MOUSE_BUTTON_RIGHT && action == GLFW_PRESS); + middleMousePressed = (button == GLFW_MOUSE_BUTTON_MIDDLE && action == GLFW_PRESS); } void mousePositionCallback(GLFWwindow* window, double xpos, double ypos) { - if (xpos == lastX || ypos == lastY) return; // otherwise, clicking back into window causes re-start - if (leftMousePressed) { - // compute new camera parameters - phi -= (xpos - lastX) / width; - theta -= (ypos - lastY) / height; - theta = std::fmax(0.001f, std::fmin(theta, PI)); - camchanged = true; - } - else if (rightMousePressed) { - zoom += (ypos - lastY) / height; - zoom = std::fmax(0.1f, zoom); - camchanged = true; - } - else if (middleMousePressed) { - renderState = &scene->state; - Camera &cam = renderState->camera; - glm::vec3 forward = cam.view; - forward.y = 0.0f; - forward = glm::normalize(forward); - glm::vec3 right = cam.right; - right.y = 0.0f; - right = glm::normalize(right); - - cam.lookAt -= (float) (xpos - lastX) * right * 0.01f; - cam.lookAt += (float) (ypos - lastY) * forward * 0.01f; - camchanged = true; - } - lastX = xpos; - lastY = ypos; + if (xpos == lastX || ypos == lastY) return; // otherwise, clicking back into window causes re-start + if (leftMousePressed) { + // compute new camera parameters + phi -= (xpos - lastX) / width; + theta -= (ypos - lastY) / height; + theta = std::fmax(0.001f, std::fmin(theta, PI)); + camchanged = true; + } + else if (rightMousePressed) { + zoom += (ypos - lastY) / height; + zoom = std::fmax(0.1f, zoom); + camchanged = true; + } + else if (middleMousePressed) { + renderState = &scene->state; + Camera &cam = renderState->camera; + glm::vec3 forward = cam.view; + forward.y = 0.0f; + forward = glm::normalize(forward); + glm::vec3 right = cam.right; + right.y = 0.0f; + right = glm::normalize(right); + + cam.lookAt -= (float)(xpos - lastX) * right * 0.01f; + cam.lookAt += (float)(ypos - lastY) * forward * 0.01f; + camchanged = true; + } + lastX = xpos; + lastY = ypos; } diff --git a/src/path_helpers.h b/src/path_helpers.h new file mode 100644 index 0000000..bac1925 --- /dev/null +++ b/src/path_helpers.h @@ -0,0 +1,183 @@ +#pragma once + +#define COMPACT_BLOCK 512 + +#define STREAM_COMPACT 1 +#define CACHE_FIRST_BOUNCE 1 // whether to store first intersection (not compatible with DOF) +#define MATERIAL_SORT 1 // Whether or not to sort rays by material index +#define DEPTH_OF_FIELD 0 + +#define APERTURE_RADIUS 0.5f + + +struct isBouncy +{ + isBouncy() {}; + __host__ __device__ + bool operator()(const PathSegment& path) + { + return (path.remainingBounces > 0); + } +}; + +__host__ __device__ +thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) { + int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); + return thrust::default_random_engine(h); +} + +__global__ void kernSCsetup(int n, int* dev_val, PathSegment* paths) { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= n) return; + if ((paths[index]).remainingBounces > 0) dev_val[index] = 1; +} + +__global__ void kernComputeFalseIdx(int n, int totTrue, int* f_arr, int* t_arr) { + int index = (blockDim.x * blockIdx.x) + threadIdx.x; + if (index >= n) return; + + f_arr[index] = index - t_arr[index] + totTrue; +} + +__global__ void kernSortRays(int n, int* bools, int* t_idx, int* f_idx, PathSegment* input, PathSegment* output) { + int index = (blockDim.x * blockIdx.x) + threadIdx.x; + if (index >= n) return; + + if (bools[index] == 1) output[t_idx[index]] = input[index]; + else output[f_idx[index]] = input[index]; +} + +int compactRays(int n, PathSegment *dev_paths) { + int num_blocks = (n + COMPACT_BLOCK - 1) / COMPACT_BLOCK; + dim3 fullBlocksPerGrid(num_blocks); + + int *dev_map; + int *dev_scan; + int *dev_false; + + PathSegment* dev_out; + + cudaMalloc((void**)&dev_map, n * sizeof(int)); + cudaMalloc((void**)&dev_scan, n * sizeof(int)); + cudaMalloc((void**)&dev_false, n * sizeof(int)); + cudaMalloc((void**)&dev_out, n * sizeof(PathSegment)); + checkCUDAError("compact malloc fail!"); + + cudaMemset(dev_out, -1, n * sizeof(PathSegment)); + + // map + kernSCsetup << > >(n, dev_map, dev_paths); + checkCUDAError("compact setup fail!"); + + // scan + gpuScan(n, dev_scan, dev_map); + checkCUDAError("scanning fail!"); + + // calc. num of elements + int r1; + int r2; + + cudaMemcpy(&r1, dev_scan + n - 1, sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(&r2, dev_map + n - 1, sizeof(int), cudaMemcpyDeviceToHost); + checkCUDAError("memcpy fail!"); + + // get false indices (kind of like radix sort) + kernComputeFalseIdx << < fullBlocksPerGrid, COMPACT_BLOCK >> >(n, r1 + r2, dev_false, dev_scan); + + // scatter + kernSortRays << < fullBlocksPerGrid, COMPACT_BLOCK >> > (n, dev_map, dev_scan, dev_false, dev_paths, dev_out); + checkCUDAError("compact scatter fail!"); + + // copy output + cudaMemcpy(dev_paths, dev_out, n * sizeof(PathSegment), cudaMemcpyDeviceToDevice); + + // free memory + cudaFree(dev_map); + cudaFree(dev_scan); + cudaFree(dev_false); + cudaFree(dev_out); + checkCUDAError("free fail!"); + + return (r1 + r2); + +} + +__global__ void kernMaterialMap(int n, ShadeableIntersection* intersects, int* materials, int* idx) { + int index = (blockDim.x * blockIdx.x) + threadIdx.x; + if (index >= n) return; + + idx[index] = index; + materials[index] = (intersects[index]).materialId; +} + +__global__ void kernSortMaterial(int n, int* indices, PathSegment* paths, ShadeableIntersection* intersects, PathSegment* path_sort, ShadeableIntersection* inter_sort) { + int index = (blockDim.x * blockIdx.x) + threadIdx.x; + if (index >= n) return; + + int idx = indices[index]; + + inter_sort[index] = intersects[idx]; + path_sort[index] = paths[idx]; +} + +void sortRaysMaterial(int n, PathSegment* paths, ShadeableIntersection* intersects) { + int num_blocks = (n + COMPACT_BLOCK - 1) / COMPACT_BLOCK; + dim3 fullBlocksPerGrid(num_blocks); + + int* dev_mat; + int* dev_idx; + + ShadeableIntersection* inter_sort; + PathSegment* path_sort; + + cudaMalloc((void**)&dev_mat, n * sizeof(int)); + cudaMalloc((void**)&dev_idx, n * sizeof(int)); + cudaMalloc((void**)&path_sort, n * sizeof(PathSegment)); + cudaMalloc((void**)&inter_sort, n * sizeof(ShadeableIntersection)); + + // create material map + kernMaterialMap<<>>(n, intersects, dev_mat, dev_idx); + + // use as keys for sort + thrust::device_ptr dev_thrust_keys = thrust::device_ptr(dev_mat); + + thrust::device_ptr dev_thrust_vals = thrust::device_ptr(dev_idx); + + thrust::sort_by_key(dev_thrust_keys, dev_thrust_keys + n, dev_thrust_vals); + + // sort + kernSortMaterial << > > (n, dev_idx, paths, intersects, path_sort, inter_sort); + + // copy sorted vals + cudaMemcpy(paths, path_sort, n * sizeof(PathSegment), cudaMemcpyDeviceToDevice); + cudaMemcpy(intersects, inter_sort, n * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice); + + + cudaFree(dev_mat); + cudaFree(dev_idx); + cudaFree(path_sort); + cudaFree(inter_sort); + +} + +__global__ void kernDOF(int num_paths, int iter, Camera cam, float focalLength, PathSegment* paths) { + // Calc. Thread Index + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx > num_paths) return; + + Ray & ray = paths[idx].ray; + + glm::vec3 aim = glm::normalize(ray.direction) * focalLength + ray.origin; + + // Set up RNG + thrust::default_random_engine rngX = makeSeededRandomEngine(iter, num_paths - idx, 0); + thrust::default_random_engine rngY = makeSeededRandomEngine(iter, idx, 1); + thrust::uniform_real_distribution u(-APERTURE_RADIUS, APERTURE_RADIUS); + + //glm::vec3 dif = glm::normalize(aim - ray.origin) / focalLength; + ray.origin.x += u(rngX);// *dif.x; + ray.origin.y += u(rngY);// *dif.y; + + ray.direction = glm::normalize(aim - ray.origin); + +} \ No newline at end of file diff --git a/src/pathtrace.cu b/src/pathtrace.cu index c1ec122..0e6504c 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -2,8 +2,11 @@ #include #include #include +#include +#include #include #include +#include #include "sceneStructs.h" #include "scene.h" @@ -13,6 +16,9 @@ #include "pathtrace.h" #include "intersections.h" #include "interactions.h" +#include "stream_compaction/stream_compact.h" +#include "path_helpers.h" +#include "shader.h" #define ERRORCHECK 1 @@ -20,51 +26,51 @@ #define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) void checkCUDAErrorFn(const char *msg, const char *file, int line) { #if ERRORCHECK - cudaDeviceSynchronize(); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess == err) { - return; - } - - fprintf(stderr, "CUDA error"); - if (file) { - fprintf(stderr, " (%s:%d)", file, line); - } - fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); + cudaDeviceSynchronize(); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess == err) { + return; + } + + fprintf(stderr, "CUDA error"); + if (file) { + fprintf(stderr, " (%s:%d)", file, line); + } + fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); # ifdef _WIN32 - getchar(); + getchar(); # endif - exit(EXIT_FAILURE); + exit(EXIT_FAILURE); #endif } -__host__ __device__ -thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) { - int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); - return thrust::default_random_engine(h); -} +//__host__ __device__ +//thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) { +// int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); +// return thrust::default_random_engine(h); +//} //Kernel that writes the image to the OpenGL PBO directly. __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution, - int iter, glm::vec3* image) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - glm::vec3 pix = image[index]; - - glm::ivec3 color; - color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255); - color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255); - color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255); - - // Each thread writes one pixel location in the texture (textel) - pbo[index].w = 0; - pbo[index].x = color.x; - pbo[index].y = color.y; - pbo[index].z = color.z; - } + int iter, glm::vec3* image) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + glm::vec3 pix = image[index]; + + glm::ivec3 color; + color.x = glm::clamp((int)(pix.x / iter * 255.0), 0, 255); + color.y = glm::clamp((int)(pix.y / iter * 255.0), 0, 255); + color.z = glm::clamp((int)(pix.z / iter * 255.0), 0, 255); + + // Each thread writes one pixel location in the texture (textel) + pbo[index].w = 0; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; + } } static Scene * hst_scene = NULL; @@ -73,42 +79,48 @@ static Geom * dev_geoms = NULL; static Material * dev_materials = NULL; static PathSegment * dev_paths = NULL; static ShadeableIntersection * dev_intersections = NULL; + + // TODO: static variables for device memory, any extra info you need, etc // ... +ShadeableIntersection* first_bounce; void pathtraceInit(Scene *scene) { - hst_scene = scene; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; + hst_scene = scene; + const Camera &cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; - cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); - cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); + cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); + cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); - cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); + cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); - cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); - cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); + cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); + cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); - cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); - cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); + cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); + cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); - cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - // TODO: initialize any extra device memeory you need + // TODO: initialize any extra device memeory you need + cudaMalloc(&first_bounce, pixelcount * sizeof(ShadeableIntersection)); - checkCUDAError("pathtraceInit"); + checkCUDAError("pathtraceInit"); } void pathtraceFree() { - cudaFree(dev_image); // no-op if dev_image is null - cudaFree(dev_paths); - cudaFree(dev_geoms); - cudaFree(dev_materials); - cudaFree(dev_intersections); - // TODO: clean up any extra device memory you created - - checkCUDAError("pathtraceFree"); + cudaFree(dev_image); // no-op if dev_image is null + cudaFree(dev_paths); + cudaFree(dev_geoms); + cudaFree(dev_materials); + cudaFree(dev_intersections); + // TODO: clean up any extra device memory you created + + cudaFree(first_bounce); + + checkCUDAError("pathtraceFree"); } /** @@ -119,27 +131,44 @@ void pathtraceFree() { * motion blur - jitter rays "in time" * lens effect - jitter ray origin positions based on a lens */ -__global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, PathSegment* pathSegments) +__global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, PathSegment* pathSegments, float focalLength) { int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; - if (x < cam.resolution.x && y < cam.resolution.y) { - int index = x + (y * cam.resolution.x); - PathSegment & segment = pathSegments[index]; + if (x >= cam.resolution.x || y >= cam.resolution.y) return; + + int index = x + (y * cam.resolution.x); + + PathSegment & segment = pathSegments[index]; - segment.ray.origin = cam.position; - segment.color = glm::vec3(1.0f, 1.0f, 1.0f); + segment.ray.origin = cam.position; + segment.color = glm::vec3(1.0f, 1.0f, 1.0f); - // TODO: implement antialiasing by jittering the ray - segment.ray.direction = glm::normalize(cam.view - - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) - - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) - ); + // TODO: implement antialiasing by jittering the ray + segment.ray.direction = glm::normalize(cam.view + - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) + - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) + ); - segment.pixelIndex = index; - segment.remainingBounces = traceDepth; + // depth of field + if (DEPTH_OF_FIELD) { + glm::vec3 aim = glm::normalize(segment.ray.direction) * focalLength + segment.ray.origin; + + // Set up RNG + thrust::default_random_engine rngX = makeSeededRandomEngine(iter, cam.resolution.x * cam.resolution.y - index, 0); + thrust::default_random_engine rngY = makeSeededRandomEngine(iter, index, 1); + thrust::uniform_real_distribution u(-APERTURE_RADIUS, APERTURE_RADIUS); + + segment.ray.origin.x += u(rngX); + segment.ray.origin.y += u(rngY); + + segment.ray.direction = glm::normalize(aim - segment.ray.origin); } + + segment.pixelIndex = index; + segment.remainingBounces = traceDepth; + segment.inside = false; } // TODO: @@ -153,20 +182,21 @@ __global__ void computeIntersections( , Geom * geoms , int geoms_size , ShadeableIntersection * intersections - ) +) { int path_index = blockIdx.x * blockDim.x + threadIdx.x; if (path_index < num_paths) { PathSegment pathSegment = pathSegments[path_index]; + if (pathSegment.remainingBounces == 0) return; float t; glm::vec3 intersect_point; glm::vec3 normal; float t_min = FLT_MAX; int hit_geom_index = -1; - bool outside = true; + bool & inside = pathSegment.inside; glm::vec3 tmp_intersect; glm::vec3 tmp_normal; @@ -179,11 +209,11 @@ __global__ void computeIntersections( if (geom.type == CUBE) { - t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, inside); } else if (geom.type == SPHERE) { - t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, inside); } // TODO: add more intersection tests here... triangle? metaball? CSG? @@ -209,7 +239,10 @@ __global__ void computeIntersections( intersections[path_index].materialId = geoms[hit_geom_index].materialid; intersections[path_index].surfaceNormal = normal; } + + pathSegments[path_index].inside = inside; } + } // LOOK: "fake" shader demonstrating what you might do with the info in @@ -221,48 +254,49 @@ __global__ void computeIntersections( // Note that this shader does NOT do a BSDF evaluation! // Your shaders should handle that - this can allow techniques such as // bump mapping. -__global__ void shadeFakeMaterial ( - int iter - , int num_paths +__global__ void shadeFakeMaterial( + int iter + , int num_paths , ShadeableIntersection * shadeableIntersections , PathSegment * pathSegments , Material * materials - ) +) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < num_paths) - { - ShadeableIntersection intersection = shadeableIntersections[idx]; - if (intersection.t > 0.0f) { // if the intersection exists... - // Set up the RNG - // LOOK: this is how you use thrust's RNG! Please look at - // makeSeededRandomEngine as well. - thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0); - thrust::uniform_real_distribution u01(0, 1); - - Material material = materials[intersection.materialId]; - glm::vec3 materialColor = material.color; - - // If the material indicates that the object was a light, "light" the ray - if (material.emittance > 0.0f) { - pathSegments[idx].color *= (materialColor * material.emittance); - } - // Otherwise, do some pseudo-lighting computation. This is actually more - // like what you would expect from shading in a rasterizer like OpenGL. - // TODO: replace this! you should be able to start with basically a one-liner - else { - float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f)); - pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f; - pathSegments[idx].color *= u01(rng); // apply some noise because why not - } - // If there was no intersection, color the ray black. - // Lots of renderers use 4 channel color, RGBA, where A = alpha, often - // used for opacity, in which case they can indicate "no opacity". - // This can be useful for post-processing and image compositing. - } else { - pathSegments[idx].color = glm::vec3(0.0f); - } - } + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) + { + ShadeableIntersection intersection = shadeableIntersections[idx]; + if (intersection.t > 0.0f) { // if the intersection exists... + // Set up the RNG + // LOOK: this is how you use thrust's RNG! Please look at + // makeSeededRandomEngine as well. + thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0); + thrust::uniform_real_distribution u01(0, 1); + + Material material = materials[intersection.materialId]; + glm::vec3 materialColor = material.color; + + // If the material indicates that the object was a light, "light" the ray + if (material.emittance > 0.0f) { + pathSegments[idx].color *= (materialColor * material.emittance); + } + // Otherwise, do some pseudo-lighting computation. This is actually more + // like what you would expect from shading in a rasterizer like OpenGL. + // TODO: replace this! you should be able to start with basically a one-liner + else { + float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f)); + pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f; + pathSegments[idx].color *= u01(rng); // apply some noise because why not + } + // If there was no intersection, color the ray black. + // Lots of renderers use 4 channel color, RGBA, where A = alpha, often + // used for opacity, in which case they can indicate "no opacity". + // This can be useful for post-processing and image compositing. + } + else { + pathSegments[idx].color = glm::vec3(0.0f); + } + } } // Add the current iteration's output to the overall image @@ -282,51 +316,52 @@ __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterati * of memory management */ void pathtrace(uchar4 *pbo, int frame, int iter) { - const int traceDepth = hst_scene->state.traceDepth; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; + const int traceDepth = hst_scene->state.traceDepth; + const Camera &cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; // 2D block for generating ray from camera - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); // 1D block for path tracing const int blockSize1d = 128; - /////////////////////////////////////////////////////////////////////////// - - // Recap: - // * Initialize array of path rays (using rays that come out of the camera) - // * You can pass the Camera object to that kernel. - // * Each path ray must carry at minimum a (ray, color) pair, - // * where color starts as the multiplicative identity, white = (1, 1, 1). - // * This has already been done for you. - // * For each depth: - // * Compute an intersection in the scene for each path ray. - // A very naive version of this has been implemented for you, but feel - // free to add more primitives and/or a better algorithm. - // Currently, intersection distance is recorded as a parametric distance, - // t, or a "distance along the ray." t = -1.0 indicates no intersection. - // * Color is attenuated (multiplied) by reflections off of any object - // * TODO: Stream compact away all of the terminated paths. - // You may use either your implementation or `thrust::remove_if` or its - // cousins. - // * Note that you can't really use a 2D kernel launch any more - switch - // to 1D. - // * TODO: Shade the rays that intersected something or didn't bottom out. - // That is, color the ray by performing a color computation according - // to the shader, then generate a new ray to continue the ray path. - // We recommend just updating the ray's PathSegment in place. - // Note that this step may come before or after stream compaction, - // since some shaders you write may also cause a path to terminate. - // * Finally, add this iteration's results to the image. This has been done - // for you. - - // TODO: perform one iteration of path tracing - - generateRayFromCamera <<>>(cam, iter, traceDepth, dev_paths); + /////////////////////////////////////////////////////////////////////////// + + // Recap: + // * Initialize array of path rays (using rays that come out of the camera) + // * You can pass the Camera object to that kernel. + // * Each path ray must carry at minimum a (ray, color) pair, + // * where color starts as the multiplicative identity, white = (1, 1, 1). + // * This has already been done for you. + // * For each depth: + // * Compute an intersection in the scene for each path ray. + // A very naive version of this has been implemented for you, but feel + // free to add more primitives and/or a better algorithm. + // Currently, intersection distance is recorded as a parametric distance, + // t, or a "distance along the ray." t = -1.0 indicates no intersection. + // * Color is attenuated (multiplied) by reflections off of any object + // * TODO: Stream compact away all of the terminated paths. + // You may use either your implementation or `thrust::remove_if` or its + // cousins. + // * Note that you can't really use a 2D kernel launch any more - switch + // to 1D. + // * TODO: Shade the rays that intersected something or didn't bottom out. + // That is, color the ray by performing a color computation according + // to the shader, then generate a new ray to continue the ray path. + // We recommend just updating the ray's PathSegment in place. + // Note that this step may come before or after stream compaction, + // since some shaders you write may also cause a path to terminate. + // * Finally, add this iteration's results to the image. This has been done + // for you. + + // TODO: perform one iteration of path tracing + + float focalLength = glm::length(cam.lookAt - cam.position); + generateRayFromCamera << > > (cam, iter, traceDepth, dev_paths, focalLength); checkCUDAError("generate camera ray"); int depth = 0; @@ -335,59 +370,87 @@ void pathtrace(uchar4 *pbo, int frame, int iter) { // --- PathSegment Tracing Stage --- // Shoot ray into scene, bounce between objects, push shading chunks + dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; + + int new_num_paths = num_paths; + + thrust::device_vector dev_thrust_paths = thrust::device_vector(dev_paths, dev_path_end); + auto dev_thrust_end = dev_thrust_paths.end(); - bool iterationComplete = false; + bool iterationComplete = false; while (!iterationComplete) { - // clean shading chunks - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + // clean shading chunks + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - // tracing - dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; - computeIntersections <<>> ( - depth - , num_paths - , dev_paths - , dev_geoms - , hst_scene->geoms.size() - , dev_intersections - ); - checkCUDAError("trace one bounce"); - cudaDeviceSynchronize(); - depth++; - - - // TODO: - // --- Shading Stage --- - // Shade path segments based on intersections and generate new rays by - // evaluating the BSDF. - // Start off with just a big kernel that handles all the different - // materials you have in the scenefile. - // TODO: compare between directly shading the path segments and shading - // path segments that have been reshuffled to be contiguous in memory. - - shadeFakeMaterial<<>> ( - iter, - num_paths, - dev_intersections, - dev_paths, - dev_materials - ); - iterationComplete = true; // TODO: should be based off stream compaction results. + // tracing + if (CACHE_FIRST_BOUNCE && iter > 1 && depth == 0) { + cudaMemcpy(dev_intersections, first_bounce, pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice); + } + else { + computeIntersections << > > ( + depth + , new_num_paths + , dev_paths + , dev_geoms + , hst_scene->geoms.size() + , dev_intersections + ); + checkCUDAError("trace one bounce"); + cudaDeviceSynchronize(); + } + + // cache first bounce + if (CACHE_FIRST_BOUNCE && iter == 1 && depth == 0) { + cudaMemcpy(first_bounce, dev_intersections, pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice); + } + + depth++; + + // TODO: + // --- Shading Stage --- + // Shade path segments based on intersections and generate new rays by + // evaluating the BSDF. + // Start off with just a big kernel that handles all the different + // materials you have in the scenefile. + // TODO: compare between directly shading the path segments and shading + // path segments that have been reshuffled to be contiguous in memory. + if (MATERIAL_SORT) sortRaysMaterial(new_num_paths, dev_paths, dev_intersections); + + //kernShadeGeneric<<< numblocksPathSegmentTracing, blockSize1d >>>(iter, new_num_paths, depth, dev_intersections, dev_paths, dev_materials); + kernShadeMaterials << > > (iter, new_num_paths, depth, dev_intersections, dev_paths, dev_materials); + checkCUDAError("Shader Failed!"); + + + // my compaction + if (STREAM_COMPACT) new_num_paths = compactRays(num_paths, dev_paths); + + // thrust compaction, turns out not working? + //if (STREAM_COMPACT) { + //dev_thrust_end = thrust::partition(dev_thrust_paths.begin(), dev_thrust_paths.end(), isBouncy()); + //new_num_paths = dev_thrust_end - dev_thrust_paths.begin(); + //} + //printf("Depth: %i, Num_paths: %i\n", depth, new_num_paths); + + numblocksPathSegmentTracing = (new_num_paths + blockSize1d - 1) / blockSize1d; + + // if no more bounces iteration done + if (depth >= traceDepth || new_num_paths == 0) + iterationComplete = true; // TODO: should be based off stream compaction results. } - // Assemble this iteration and apply it to the image - dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; - finalGather<<>>(num_paths, dev_image, dev_paths); + // Assemble this iteration and apply it to the image + dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; + finalGather << > > (num_paths, dev_image, dev_paths); - /////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// - // Send results to OpenGL buffer for rendering - sendImageToPBO<<>>(pbo, cam.resolution, iter, dev_image); + // Send results to OpenGL buffer for rendering + sendImageToPBO << > > (pbo, cam.resolution, iter, dev_image); - // Retrieve image from GPU - cudaMemcpy(hst_scene->state.image.data(), dev_image, - pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); + // Retrieve image from GPU + cudaMemcpy(hst_scene->state.image.data(), dev_image, + pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); - checkCUDAError("pathtrace"); + checkCUDAError("pathtrace"); } diff --git a/src/sceneStructs.h b/src/sceneStructs.h index b38b820..c2f5669 100644 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -64,6 +64,8 @@ struct PathSegment { glm::vec3 color; int pixelIndex; int remainingBounces; + + bool inside; }; // Use with a corresponding PathSegment to do: diff --git a/src/shader.h b/src/shader.h new file mode 100644 index 0000000..f5189b8 --- /dev/null +++ b/src/shader.h @@ -0,0 +1,76 @@ +#pragma once + + +__global__ void kernShadeMaterials(int iter, int num_paths, int depth, ShadeableIntersection *shadeableIntersections, PathSegment *pathSegments, Material *materials) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx > num_paths) return; + + PathSegment & path = pathSegments[idx]; // the ray we are checking + if (path.remainingBounces < 1) return; // abort if no need to shade + + Material material; // material of object intersecting ray (if exists) + + // Set up RNG + thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, depth); + + ShadeableIntersection intersect = shadeableIntersections[idx]; // the calculated intersection of this ray with the scene objects + glm::vec3 intersectPoint; + glm::vec3 normal; + if (intersect.t == -1) { + // if no intersection, black + path.color = glm::vec3(0.0f); + + // terminate path + path.remainingBounces = 0; + } + else { + intersectPoint = getPointOnRay(path.ray, intersect.t); + material = materials[intersect.materialId]; // load material + normal = intersect.surfaceNormal; + + scatterRay(path, intersectPoint, normal, material, rng); + + } + +} + + +// Basic naive shading/ray gen +__global__ void kernShadeGeneric(int iter, int num_paths, int depth, ShadeableIntersection *shadeableIntersections, PathSegment *pathSegments, Material *materials) { + // Calc. Thread Index + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx > num_paths) return; + + PathSegment & path = pathSegments[idx]; // the ray we are checking + + if (path.remainingBounces < 1) return; + + Material material; // material of object intersecting ray (if exists) + glm::vec3 intersectPoint; // calculated intersection point + + // Set up RNG + thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, depth); + //thrust::uniform_real_distribution u01(0, 1); + + // check for intersection + ShadeableIntersection intersect = shadeableIntersections[idx]; // the calculated intersection of this ray with the scene objects + if (intersect.t == -1) { + // if no intersection, black + path.color = glm::vec3(0.0f); + + // terminate path + path.remainingBounces = 0; + } + else { + material = materials[intersect.materialId]; // load material + + //bsdf stuff + intersectPoint = getPointOnRay(path.ray, intersect.t); + scatterRay(path, intersectPoint, intersect.surfaceNormal, material, rng); + //path.ray.direction = calculateRandomDirectionInHemisphere(intersect.surfaceNormal, rng); + + } + + //pathSegments[idx] = path; // update path ray + +} \ No newline at end of file diff --git a/stream_compaction/CMakeLists.txt b/stream_compaction/CMakeLists.txt index ac358c9..a59011c 100644 --- a/stream_compaction/CMakeLists.txt +++ b/stream_compaction/CMakeLists.txt @@ -1,4 +1,6 @@ set(SOURCE_FILES + "stream_compact.h" + "stream_compact.cu" ) cuda_add_library(stream_compaction diff --git a/stream_compaction/stream_compact.cu b/stream_compaction/stream_compact.cu new file mode 100644 index 0000000..928c7ea --- /dev/null +++ b/stream_compaction/stream_compact.cu @@ -0,0 +1,249 @@ +#include "stream_compact.h" + + +__global__ void kernMapToBoolean(int n, int *bools, const int *idata) { + int index = (blockDim.x * blockIdx.x) + threadIdx.x; + if (index >= n) return; + if (idata[index] != 0) bools[index] = 1; + else bools[index] = 0; +} + +__global__ void kernScatter(int n, int *odata, + const int *idata, const int *bools, const int *indices) { + int index = (blockDim.x * blockIdx.x) + threadIdx.x; + if (index >= n) return; + if (bools[index] == 1) odata[indices[index]] = idata[index]; +} + +__global__ void kernScanDataUpSweep(int n, int offset1, int offset2, int* buff) { + int index = (blockDim.x * blockIdx.x) + threadIdx.x; + + int access = index * offset2 - 1; + if (access >= n || n < 1 || access < 0) return; + + buff[access] += buff[access - offset1]; +} + + +__global__ void kernScanDataDownSweep(int n, int offset1, int offset2, int* buff) { + int index = (blockDim.x * blockIdx.x) + threadIdx.x; + + int access = index * offset2 - 1; + if (access >= n || n < 1 || access < 0) return; + + int temp = buff[access - offset1]; + buff[access - offset1] = buff[access]; + buff[access] += temp; +} + +__global__ void kernScanDataShared(int n, int* in, int* out, int* sums) { + // init shared mem for block, could improve latency + __shared__ int sBuf[blockSize]; + + int tx = threadIdx.x; + int index = (blockDim.x * blockIdx.x) + tx; + + int off_tx = tx + CONFLICT_FREE_OFFSET(tx); + + // copy used vals to shared mem + sBuf[off_tx] = (index < n) ? in[index] : 0; + + __syncthreads(); // avoid mem issues + + int offset; // step size + int access; // shared buffer access index + int a2; + + // Upsweep + for (offset = 1; offset < blockSize; offset *= 2) { + access = (2 * offset * (tx + 1)) - 1; + a2 = access - offset; + a2 += CONFLICT_FREE_OFFSET(a2); + access += CONFLICT_FREE_OFFSET(access); + if (access < blockSize) sBuf[access] += sBuf[a2]; + __syncthreads(); // avoid mem issues + } + + // prepare array for downsweep + if (tx == blockSize - 1 + CONFLICT_FREE_OFFSET(blockSize - 1)) { + if (sums != NULL) sums[blockIdx.x] = sBuf[off_tx]; + sBuf[off_tx] = 0; + } + __syncthreads(); + if (index >= n - 1) sBuf[off_tx] = 0; + __syncthreads(); // avoid mem issues + + // Downsweep (inclusive) + // do exclusive downsweep + int temp; + + for (offset = blockSize; offset >= 1; offset /= 2) { + access = (2 * offset * (tx + 1)) - 1; + a2 = access - offset; + a2 += CONFLICT_FREE_OFFSET(a2); + access += CONFLICT_FREE_OFFSET(access); + if (access < blockSize) { + temp = sBuf[a2]; // store left child + sBuf[a2] = sBuf[access]; // swap + sBuf[access] += temp; // add + } + __syncthreads(); // avoid mem issues + } + + // write to dev memory + if (index < n) { + out[index] = sBuf[off_tx]; + } +} + +__global__ void kernStitch(int n, int* in, int* sums) { + int bx = blockIdx.x; + int index = (blockDim.x * bx) + threadIdx.x;; + + if (bx == 0) return; + if (index >= n) return; + in[index] += sums[bx]; +} + +void gpuScan(int n, int* dev_out, int* dev_in) { + + // set up shared mem scan + int num_blocks = 1 + (n - 1) / blockSize; // number of blocks n elements fit into + int limit = ilog2ceil(num_blocks); + int sum_size = pow(2, limit); // size of block sum array for scanning + + int num_threads; + + dim3 fullBlocksPerGrid(num_blocks); // blocks in grid to start with + + int* dev_sums; // sums, from first blockwise scan + cudaMalloc((void**)&dev_sums, sum_size * sizeof(int)); + + cudaMemset(dev_out, 0, n * sizeof(int)); //initialize output buffer + checkCUDAError("initializing shared mem scan data buff fail!"); + + // shared mem scan blocks of data + kernScanDataShared << > >(n, dev_in, dev_out, dev_sums); + checkCUDAError("shared mem scan fail!"); + + + if (num_blocks > 1) { + // scan sums + if (num_blocks <= blockSize) { + // can use faster shared mem scan + fullBlocksPerGrid.x = 1 + (num_blocks - 1) / blockSize; + kernScanDataShared << > >(num_blocks, dev_sums, dev_sums, NULL); + checkCUDAError("sum shared scan fail!"); + } + + else { + // use global memory scan (easier) + + int d; + int offset1; + int offset2; + + // UpSweep + for (d = 1; d <= limit; d++) { + offset1 = pow(2, d - 1); + offset2 = pow(2, d); + num_threads = sum_size / offset2; + fullBlocksPerGrid.x = 1 + num_threads / blockSize; + kernScanDataUpSweep << > > (sum_size, offset1, offset2, dev_sums); + checkCUDAError("upsweep fail!"); + } + + // DownSweep + cudaMemset(dev_sums + num_blocks - 1, 0, (sum_size - num_blocks + 1) * sizeof(int)); + for (d = limit; d >= 1; d--) { + offset1 = pow(2, d - 1); + offset2 = pow(2, d); + num_threads = sum_size / offset2; + fullBlocksPerGrid.x = 1 + num_threads / blockSize; + kernScanDataDownSweep << > > (sum_size, offset1, offset2, dev_sums); + checkCUDAError("downsweep fail!"); + } + } + + // stitch together blocks + fullBlocksPerGrid.x = num_blocks; + kernStitch << > >(n, dev_out, dev_sums); + checkCUDAError("shared mem scan stitch fail!"); + } + + cudaFree(dev_sums); +} + +// for pathtrace: +// dev_in1 is the # of bounces remaining array +// dev_in2 is the pixel index +// dev_out is then the sorted pixel indices, +// return is # members w/ bounces remaining +int intCompact(int n, int* dev_out, int* dev_in1, int *dev_in2) { + int* dev_map; + int* dev_scan; + + int num_blocks = 1 + (n - 1) / blockSize; + dim3 fullBlocksPerGrid(num_blocks); + + cudaMalloc((void**)&dev_map, n * sizeof(int)); + cudaMalloc((void**)&dev_scan, n * sizeof(int)); + checkCUDAError("compact malloc fail!"); + + cudaMemset(dev_out, 0, n * sizeof(int)); + + // map + kernMapToBoolean << > >(n, dev_map, dev_in1); + checkCUDAError("shared mem compact bool mapping fail!"); + + // scan + gpuScan(n, dev_scan, dev_map); + + // scatter + kernScatter << > >(n, dev_out, dev_in2, dev_map, dev_scan); + checkCUDAError("shared mem compact scatter fail!"); + + // calc. num of elements + int r1; + int r2; + + cudaMemcpy(&r1, dev_scan + n - 1, sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(&r2, dev_map + n - 1, sizeof(int), cudaMemcpyDeviceToHost); + + // free memory + cudaFree(dev_map); + cudaFree(dev_scan); + + return (r1 + r2); +} + +#define TEST_SIZE 15 +void scanTest() { + int test_in[TEST_SIZE] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14}; + int test_out[TEST_SIZE] = { 0 }; + + int* dev_in; + int* dev_out; + + cudaMalloc((void**)&dev_in, TEST_SIZE * sizeof(int)); + cudaMalloc((void**)&dev_out, TEST_SIZE * sizeof(int)); + checkCUDAError("test malloc fail!"); + + cudaMemcpy(dev_in, test_in, TEST_SIZE * sizeof(int), cudaMemcpyHostToDevice); + checkCUDAError("test in copy fail!"); + + gpuScan(TEST_SIZE, dev_out, dev_in); + + cudaMemcpy(test_out, dev_out, TEST_SIZE * sizeof(int), cudaMemcpyDeviceToHost); + checkCUDAError("test out copy fail!"); + + printf("\nTest scan: { "); + for (int i = 0; i < TEST_SIZE; i++) { + printf("%i ", test_out[i]); + } + printf(" }\n"); + + cudaFree(dev_in); + cudaFree(dev_out); + checkCUDAError("test free fail!"); +} diff --git a/stream_compaction/stream_compact.h b/stream_compaction/stream_compact.h new file mode 100644 index 0000000..84ecddf --- /dev/null +++ b/stream_compaction/stream_compact.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) +#define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) + +#define blockSize 512 + +// for reducing bank conflicts +#define NUM_BANKS 32 +#define LOG_NUM_BANKS 5 +#define CONFLICT_FREE_OFFSET(n) \ + ((n) >> NUM_BANKS + (n) >> (2 * LOG_NUM_BANKS)) + +/** +* Check for CUDA errors; print and exit if there was a problem. +*/ +void checkCUDAErrorFn(const char *msg, const char *file = NULL, int line = -1); + +inline int ilog2(int x) { + int lg = 0; + while (x >>= 1) { + ++lg; + } + return lg; +} + +inline int ilog2ceil(int x) { + return x == 1 ? 0 : ilog2(x - 1) + 1; +} + +__global__ void kernMapToBoolean(int n, int *bools, const int *idata); + +__global__ void kernScatter(int n, int *odata, const int *idata, const int *bools, const int *indices); + +__global__ void kernScanDataShared(int n, int* in, int* out, int* sums); + +__global__ void kernScanDataUpSweep(int n, int offset1, int offset2, int* buff); +__global__ void kernScanDataDownSweep(int n, int offset1, int offset2, int* buff); + +__global__ void kernScanDataShared(int n, int* in, int* out, int* sums); + +__global__ void kernStitch(int n, int* in, int* sums); + +void gpuScan(int n, int* dev_out, int* dev_in); +int intCompact(int n, int* dev_out, int* dev_in, int *dev_in2); + +void scanTest(); \ No newline at end of file