From 35b0098b9915e101536e8da5c51b0f4b36df672d Mon Sep 17 00:00:00 2001 From: Mario Juric Date: Mon, 17 May 2010 12:21:02 -0400 Subject: [PATCH] Use lowest extinction for flux-limit calculation As extinction can change within the finite width of the beam, find and use the smallest extinction within the voxel to test whether it's fainter than the flux limit. --- galaxy.kdevelop | 40 +++--- src/Makefile.am | 2 +- src/galfast.cpp | 3 + src/modules/FeH_gpu.cu.h | 5 +- src/modules/FeH_host.cpp | 5 +- src/modules/module_lib.h | 3 + src/pipeline.cpp | 1 + src/pipeline.h | 1 + src/skygen/os_skygen.cpp | 248 +++++++++++++++++++++++++++++------- src/skygen/skyconfig_impl.h | 46 ++++--- src/skygen/skygen.cu.h | 104 +++++++++++---- src/skygen/skygen.h | 9 +- 12 files changed, 352 insertions(+), 115 deletions(-) diff --git a/galaxy.kdevelop b/galaxy.kdevelop index 0b0a1e2..922ebd1 100644 --- a/galaxy.kdevelop +++ b/galaxy.kdevelop @@ -17,19 +17,19 @@ galaxy - + src/galfast.x - optimized + debug /home/mjuric/projects/galaxy/debug/src/galfast.x true custom /home/mjuric/projects/galaxy/tests/os_photometry/ - + catalog cmd.conf true @@ -60,7 +60,7 @@ catalog cmd.conf - /home/mjuric/projects/galaxy/workspace/milkyway-1.0/ + /home/mjuric/projects/galaxy/workspace/milkyway-1.0.1 true false false @@ -81,14 +81,14 @@ kdevg77options -O2 -Wstrict-aliasing -fno-strict-aliasing --with-cuda --enable-cuda-devemuX --with-sm=/usr/local --with-libpeyton=$HOME/projects/libpeyton --prefix=$HOME/projects/galaxy/workspace/staging --with-cfitsio-include=/usr/include/cfitsio --with-libpeyton-libdir=$HOME/projects/libpeyton/lib-optimized - + -DDEBUGMODE=0 -rdynamic -L/opt/cudatools/2.3/lib - - - - - + + + + + @@ -98,12 +98,12 @@ kdevgppoptions kdevg77options -O0 -g3 -Wno-deprecated - + -DDEBUGMODE=1 -rdynamic -L/opt/cudatools/2.3/lib - - - + + + -O0 -g3 -Wno-deprecated -O0 -g3 -Wno-deprecated @@ -134,7 +134,7 @@ false 8 false - + 0 true @@ -206,7 +206,7 @@ - + set m_,_ theValue @@ -232,11 +232,11 @@ postprocess postprocess.conf skygen.conf - + libtool --mode=execute - - - + + + true false false diff --git a/src/Makefile.am b/src/Makefile.am index 8e006b5..371c998 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -31,7 +31,7 @@ SUBDIRS = common gpc modules skygen # explicit dependency for CUDA sources include ./$(DEPDIR)/kernels_gpu.Po -noinst_HEADERS = simulate_gpu.cu.h sph_polygon.h pipeline.h +noinst_HEADERS = pipeline.h simulate_gpu.cu.h sph_polygon.h galfast_x_SOURCES = footprint.cpp galfast.cpp kernels_cpu.cpp kernels_gpu.cu \ pipeline.cpp tests.cpp galfast_x_LDADD = $(top_builddir)/src/skygen/libskygen.la \ diff --git a/src/galfast.cpp b/src/galfast.cpp index 9ef69fc..4a23bbe 100644 --- a/src/galfast.cpp +++ b/src/galfast.cpp @@ -22,6 +22,8 @@ #include "gpu.h" #include "io.h" +#include "gpulog/gpulog.h" +void init_logs(); // defined in os_skygen() #include #include @@ -160,6 +162,7 @@ try MLOG(verb1) << "Error initializing GPU acceleration. Aborting."; return -1; } + init_logs(); if(cmd == "util cudaquery") { diff --git a/src/modules/FeH_gpu.cu.h b/src/modules/FeH_gpu.cu.h index a1b0e1d..0c11b43 100644 --- a/src/modules/FeH_gpu.cu.h +++ b/src/modules/FeH_gpu.cu.h @@ -45,16 +45,19 @@ KERNEL( os_FeH_kernel( otable_ks ks, os_FeH_data par, gpu_rng_t rng, cint_t::gpu_t comp, + cint_t::gpu_t hidden, cfloat_t::gpu_t XYZ, cfloat_t::gpu_t FeH), os_FeH_kernel, - (ks, par, rng, comp, XYZ, FeH) + (ks, par, rng, comp, hidden, XYZ, FeH) ) { uint32_t tid = threadID(); rng.load(tid); for(uint32_t row = ks.row_begin(); row < ks.row_end(); row++) { + if(hidden(row)) { continue; } + int cmp = comp(row); if(par.comp_thin.isset(cmp) || par.comp_thick.isset(cmp)) { diff --git a/src/modules/FeH_host.cpp b/src/modules/FeH_host.cpp index 2bad590..3f676a9 100644 --- a/src/modules/FeH_host.cpp +++ b/src/modules/FeH_host.cpp @@ -56,7 +56,7 @@ class os_FeH : public osink, os_FeH_data }; extern "C" opipeline_stage *create_module_feh() { return new os_FeH(); } // Factory; called by opipeline_stage::create() -DECLARE_KERNEL(os_FeH_kernel(otable_ks ks, os_FeH_data par, gpu_rng_t rng, cint_t::gpu_t comp, cfloat_t::gpu_t XYZ, cfloat_t::gpu_t FeH)) +DECLARE_KERNEL(os_FeH_kernel(otable_ks ks, os_FeH_data par, gpu_rng_t rng, cint_t::gpu_t comp, cint_t::gpu_t hidden, cfloat_t::gpu_t XYZ, cfloat_t::gpu_t FeH)) size_t os_FeH::process(otable &in, size_t begin, size_t end, rng_t &rng) { // ASSUMPTIONS: @@ -66,13 +66,14 @@ size_t os_FeH::process(otable &in, size_t begin, size_t end, rng_t &rng) // fetch prerequisites cint_t &comp = in.col("comp"); + cint_t &hidden= in.col("hidden"); cfloat_t &XYZ = in.col("XYZ"); cfloat_t &FeH = in.col("FeH"); comp_thin = icomp_thin; comp_thick = icomp_thick; comp_halo = icomp_halo; - CALL_KERNEL(os_FeH_kernel, otable_ks(begin, end), *this, rng, comp, XYZ, FeH); + CALL_KERNEL(os_FeH_kernel, otable_ks(begin, end), *this, rng, comp, hidden, XYZ, FeH); return nextlink->process(in, begin, end, rng); } diff --git a/src/modules/module_lib.h b/src/modules/module_lib.h index 6dbd60d..8427b8c 100644 --- a/src/modules/module_lib.h +++ b/src/modules/module_lib.h @@ -104,6 +104,9 @@ return ret; }; + __device__ __host__ double lon() const { return atan2(sl, cl); } + __device__ __host__ double lat() const { return atan2(sb, cb); } + direction() {} direction(double l_, double b_) // l,b are in radians : cl(cos(l_)), cb(cos(b_)), sl(sin(l_)), sb(sin(b_)) diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 78e4194..4ee8192 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -1246,6 +1246,7 @@ void generate_catalog(int seed, size_t maxstars, size_t nstars, const std::set &sky); @@ -104,6 +104,7 @@ class os_skygen : public osource bool dryrun; // whether to stop after computing the expected number of stars float nstars; // the mean number of stars to generate (if nstars=0, the number will be determined by the model) cuxTexture ext_north, ext_south; // north/south extinction maps + cuxTexture ext_beam; // maps of minimum extinction for each pixel (coords are x==pixelIndex/2048, y=pixelIndex%2048, y==DM) std::string denMapPrefix; // HACK: dump the starcount density of each component into a file named denMapPrefix.$comp.txt @@ -114,11 +115,11 @@ class os_skygen : public osource virtual const std::string &type() const { static std::string s("input"); return s; } protected: - const os_clipper &load_footprints(const std::string &footprints, float dx, opipeline &pipe); skygenInterface *create_kernel_for_model(const std::string &model); - int load_models(skygenParams &sc, const std::string &model_cfg_list, const os_clipper &clipper); + void load_footprints(skygenParams &sc, std::vector &skypixels, const std::string &footprints, float dx, opipeline &pipe); + int load_models(skygenParams &sc, const std::string &model_cfg_list, const std::vector &skypixels); void load_skyPixelizationConfig(float &dx, skygenParams &sc, const Config &cfg); - void load_extinction_maps(const std::string &econf); + void load_extinction_maps(std::vector &skypixels, const skygenParams &sc, const std::string &econf); }; extern "C" opipeline_stage *create_module_skygen() { return new os_skygen(); } // Factory; called by opipeline_stage::create() @@ -235,7 +236,7 @@ partitioned_skymap *make_skymap(Radians dx, gpc_polygon sky) // defined in footprint.cpp std::pair load_footprints(const std::vector &footstr, const peyton::math::lambert &proj, Radians equatorSamplingScale); -const os_clipper &os_skygen::load_footprints(const std::string &footprints, float dx, opipeline &pipe) +void os_skygen::load_footprints(skygenParams &sc, std::vector &skypixels, const std::string &footprints, float dx, opipeline &pipe) { std::vector footstr; split(footstr, footprints); @@ -256,7 +257,88 @@ const os_clipper &os_skygen::load_footprints(const std::string &footprints, floa gpc_free_polygon(&sky.first); gpc_free_polygon(&sky.second); - return clipper; + // projections the pixels are bound to + std::vector > lb0; // projection poles + int nproj = clipper.getProjections(lb0); + assert(nproj == 2); + for(int i=0; i != lb0.size(); i++) + { + sc.proj[i].init(lb0[i].first, lb0[i].second); + } + + // prepare the skypixels to be processed by subsequently loaded models + std::vector pix; + sc.npixels = clipper.getPixelCenters(pix); + skypixels.resize(sc.npixels); + Radians pixdx = 0; + FOR(0, sc.npixels) + { + os_clipper::pixel &p = pix[i]; + float coveredFraction = p.coveredArea / p.pixelArea; + pixdx = sqrt(p.pixelArea); + skypixels[i] = pencilBeam(p.l, p.b, p.X, p.Y, p.projIdx, pixdx, coveredFraction, -1); + } + + MLOG(verb1) << "Footprint: Tiled with " << sc.npixels << " " << deg(pixdx) << "x" << deg(pixdx) << " deg^2 pixels"; +} + +void find_extinction_minima(cuxTexture &ext_beam, const pencilBeam &p, const cuxTexture &tex, const ::lambert &proj) +{ + int xidx = p.extIdx / 2048; + int yidx = p.extIdx % 2048; + + // Find minimum extinctions along a pencil beam p + // It is assumed that ext_binned has already been correctly sized, and + // the values initialized + ASSERT(ext_beam.depth() == tex.depth()); + ASSERT(xidx*2048 + yidx < ext_beam.width()*ext_beam.height()); + + // bottom left corner of this pixel assuming the pixel is square + // and aligned with the coordinates of the given projection + float x, y; + proj.project(x, y, p); + + // find all texture pixels that may overlap this sky pixel. These are + // those satisfying the worst-case condition that the distance between the centers is + // leq. sqrt(2)*(dx+dx'). The 1.0001 is there to avoid numerical precision problems. + const afloat2 &tcx = tex.coords[0]; + const afloat2 &tcy = tex.coords[1]; + double dr = 1.0001/2.*sqrt(2)*(p.dx + std::max(1./tcx.y, 1./tcy.y)); + int xi = (int)((x - dr - tcx.x) * tcx.y); xi = std::max(0, std::min(xi -1, (int)tex.width())); + int yi = (int)((y - dr - tcy.x) * tcy.y); yi = std::max(0, std::min(yi -1, (int)tex.height())); + int xie = (int)((x + dr - tcx.x) * tcx.y); xie = std::max(0, std::min(xie+2, (int)tex.width())); + int yie = (int)((y + dr - tcy.x) * tcy.y); yie = std::max(0, std::min(yie+2, (int)tex.height())); + + // for every distance pixel in the beam, find the minimum extinction one pixel + // closer. This is the worst-case scenario when bilinear interpolation + // during texture sampling is taken into account. + for(int x = xi; x != xie; x++) + { + if(x < 0 || x >= tex.width()) { continue; } + for(int y = yi; y != yie; y++) + { + if(y < 0 || y >= tex.width()) { continue; } + + // detect empty pixels outside our hemisphere + double Amax = tex(x, y, tex.depth()-1); + if(Amax == 0) + { + double xx = ((double)x)/tcx.y + tcx.x; + double yy = ((double)y)/tcy.y + tcy.x; + double r2 = xx*xx + yy*yy; + if(r2 > 2) { continue; } + } + + for(int z = 0; z < tex.depth(); z++) + { + float &bAr = ext_beam(xidx, yidx, z); + int zz = z-1; + float Ar = zz < 0 ? 0.f : tex(x, y, zz); + + bAr = std::min(bAr, Ar); + } + } + } } typedef skygenInterface *(*modelFactory_t)(); @@ -291,28 +373,8 @@ struct aux_kernel_sorter } }; -int os_skygen::load_models(skygenParams &sc, const std::string &model_cfg_list, const os_clipper &clipper) +int os_skygen::load_models(skygenParams &sc, const std::string &model_cfg_list, const std::vector &skypixels) { - // projections the pixels are bound to - std::vector > lb0; // projection poles - int nproj = clipper.getProjections(lb0); - assert(nproj == 2); - for(int i=0; i != lb0.size(); i++) - { - sc.proj[i].init(lb0[i].first, lb0[i].second); - } - - // prepare the skypixels to be processed by subsequently loaded models - std::vector pix; - sc.npixels = clipper.getPixelCenters(pix); - std::vector skypixels(sc.npixels); - FOR(0, sc.npixels) - { - os_clipper::pixel &p = pix[i]; - float coveredFraction = p.coveredArea / p.pixelArea; - skypixels[i] = pencilBeam(p.l, p.b, p.X, p.Y, p.projIdx, sqrt(p.pixelArea), coveredFraction); - } - // Load density model configurations, instantiate // and configure the correct skygenHost kernels std::vector models; @@ -498,6 +560,22 @@ cuxTexture load_extinction_map(const std::string &fn, Radians &l0, Rad fits_close_file(fptr, &status); FITS_ERRCHECK("Error closing extinction map file.", status); + // sanity check the map -- extinction always has to increase in the DM direction + for(int x = 0; x != img.width(); x++) + for(int y = 0; y != img.height(); y++) + { + float Aprev = 0.; + for(int z = 0; z != img.depth(); z++) + { + float A = img(x, y, z); + if(Aprev > A) + { + MLOG(verb1) << "INVALID EXTINCTION MAP at (x,y,DM)=(" << x << "," << y << "," << z << ") : Ar=" << A << " < " << Aprev << "\n"; + } + Aprev = A; + } + } + return cuxTexture(img, tc); } #endif @@ -612,11 +690,13 @@ extern "C" void resample_texture(const std::string &outfn, const std::string &te /** Load north and south extinction textures based on the configuration found in econf. + + Create a per-pencil-beam extinction maps for the loaded skypixels. */ -void os_skygen::load_extinction_maps(const std::string &econf) +void os_skygen::load_extinction_maps(std::vector &skypixels, const skygenParams &sc, const std::string &econf) { cuxTexture zeroExtinctionTex = load_constant_texture_3D(0., -2., 2., -2., 2., -2., 2.); - ext_south = ext_north = zeroExtinctionTex; + ext_south = ext_north = ext_beam = zeroExtinctionTex; // zero extinction if extinction unspecified if(econf.empty()) { return; } @@ -678,6 +758,62 @@ void os_skygen::load_extinction_maps(const std::string &econf) MLOG(verb2) << "Ext. scale factors: " << nscale << " " << sscale << "\n"; MLOG(verb2) << "Extinction north: " << northfn << " [ X x Y x DM = " << ext_north.width() << " x " << ext_north.height() << " x " << ext_north.depth() << "] [min, max = " << nmin << ", " << nmax << "]\n"; MLOG(verb2) << "Extinction south: " << southfn << " [ X x Y x DM = " << ext_south.width() << " x " << ext_south.height() << " x " << ext_south.depth() << "] [min, max = " << smin << ", " << smax << "]\n"; + + // prepare binned texture lookups for the pixels + // NOTE: assumes the extinction maps have already been loaded + if(!northfn.empty() && !southfn.empty()) + { + ASSERT(ext_north.depth() == ext_south.depth()); + } + + // n(pencilBeams) x n(DM_pixels) + // We're packing this as a 3D texture, because CUDA can't take a 2D texture wider than 64k + // 2048 is the maximum dimension size for a 3D texture (which is why we're using it) + int xdim = sc.npixels / 2048; + if(sc.npixels % 2048) { xdim++; } + int ydim = std::min(sc.npixels, 2048); + ext_beam = cuxTexture(xdim, ydim, ext_north.depth(), texcoord_from_range(0, xdim, 0, xdim), texcoord_from_range(0, ydim, 0, ydim), ext_north.coords[2]); + + FOREACH(ext_beam) { *i = std::numeric_limits::infinity(); } + FOR(0, sc.npixels) + { + pencilBeam &p = skypixels[i]; + p.extIdx = i; + if(!northfn.empty()) + find_extinction_minima(ext_beam, p, ext_north, sc.proj[0]); + if(!southfn.empty()) + find_extinction_minima(ext_beam, p, ext_south, sc.proj[1]); + + // some simple sanity checks + float bArPrev = 0.; + int xidx = p.extIdx / 2048; + int yidx = p.extIdx % 2048; +// std::cerr << deg(p.lon()) << " " << deg(p.lat()) << ": "; + for(int z = 0; z != ext_beam.depth(); z++) + { + float &bAr = ext_beam(xidx, yidx, z); +// std::cerr << bAr << " "; + assert(bArPrev <= bAr); + bArPrev = bAr; + } +// std::cerr << "\n"; + } + +#if 0 + FILE *fp = fopen("out.txt", "w"); + //out.write((char*)&ext_binned(0, 0), ext_binned.width()*ext_binned.height()*sizeof(float)); + for(int i = 0; i != ext_binned.height(); i++) + { + fprintf(fp, "%8d: ", i); + for(int j = 0; j != ext_binned.width(); j++) + { + fprintf(fp, "%.10f ", ext_binned(j, i)); + } + fprintf(fp, "\n"); + } + fclose(fp); +#endif + #endif // #else !HAVE_LIBCFITSIO } @@ -690,19 +826,20 @@ bool os_skygen::construct(const Config &cfg, otable &t, opipeline &pipe) // output file for sky densities cfg.get(denMapPrefix, "skyCountsMapPrefix", ""); - // load extinction volume maps - load_extinction_maps(cfg["extmaps"]); - // load sky pixelization and prepare the table for output skygenParams sc; float dx; load_skyPixelizationConfig(dx, sc, cfg); // load footprints and construct the clipper - const os_clipper &clipper = load_footprints(cfg.get("foot"), dx, pipe); + std::vector skypixels; + load_footprints(sc, skypixels, cfg.get("foot"), dx, pipe); + + // load extinction volume maps and prepare the textures + load_extinction_maps(skypixels, sc, cfg["extmaps"]); - // load models - load_models(sc, cfg.get("model"), clipper); + // load the models + load_models(sc, cfg.get("model"), skypixels); // prepare the table for output t.use_column("lb"); @@ -713,6 +850,7 @@ bool os_skygen::construct(const Config &cfg, otable &t, opipeline &pipe) t.use_column("XYZ"); t.use_column("comp"); t.use_column("DM"); + t.use_column("hidden"); std::string bandName = cfg.get("band"), @@ -727,6 +865,7 @@ bool os_skygen::construct(const Config &cfg, otable &t, opipeline &pipe) DECLARE_TEXTURE(ext_north, float, 3, cudaReadModeElementType); DECLARE_TEXTURE(ext_south, float, 3, cudaReadModeElementType); +DECLARE_TEXTURE(ext_beam, float, 3, cudaReadModeElementType); size_t os_skygen::run(otable &in, rng_t &rng) { @@ -735,6 +874,7 @@ size_t os_skygen::run(otable &in, rng_t &rng) cuxTextureBinder tb_north(::ext_north, ext_north); cuxTextureBinder tb_south(::ext_south, ext_south); + cuxTextureBinder tb_binned(::ext_beam, ext_beam); double nstarsExpected = 0; FOREACH(kernels) @@ -790,9 +930,7 @@ size_t os_skygen::run(otable &in, rng_t &rng) swatch.addTime(runtime); } - double sigma = (starsGenerated - nstarsExpected) / sqrt(nstarsExpected); - char sigmas[50]; sprintf(sigmas, "%4.1f", sigma); - MLOG(verb1) << "Total sources: " << starsGenerated << " (" << sigmas << " sigma from expectation value)."; + MLOG(verb1) << "Total : " << starsGenerated << " stars written out.\n"; return starsGenerated; } @@ -865,7 +1003,7 @@ int os_clipper::getPixelCenters(std::vector &pix) const size_t os_clipper::process(otable &in, size_t begin, size_t end, rng_t &rng) { swatch.start(); - +#if 1 // fetch prerequisites cint_t::host_t pIdx = in.col("projIdx"); cfloat_t::host_t projXY = in.col("projXY"); @@ -875,6 +1013,8 @@ size_t os_clipper::process(otable &in, size_t begin, size_t end, rng_t &rng) int nstars[2] = { 0, 0 }; for(size_t row=begin; row < end; row++) { + if(hidden(row)) { continue; } + // clip everything outside the footprint polygon int projIdx = pIdx(row); nstars[projIdx]++; @@ -883,13 +1023,6 @@ size_t os_clipper::process(otable &in, size_t begin, size_t end, rng_t &rng) x = projXY(row, 0); y = projXY(row, 1); - // immediately reject if in the southern hemisphere (for this projection) - if(sqr(x) + sqr(y) > 2.) - { - hidden(row) = 1; - continue; - } - partitioned_skymap *skymap = hemispheres[projIdx].sky; std::pair XY; XY.first = (int)((x - skymap->x0) / skymap->dx); @@ -903,13 +1036,32 @@ size_t os_clipper::process(otable &in, size_t begin, size_t end, rng_t &rng) gpc_vertex vtmp = { x, y }; bool infoot = in_polygon(vtmp, poly); - hidden(row) = !infoot; + if(!infoot) { hidden(row) = 1; } } DLOG(verb1) << "nstars north: " << nstars[0]; DLOG(verb1) << "nstars south: " << nstars[1]; - +#endif swatch.stop(); return nextlink->process(in, begin, end, rng); } + +#include "../src/gpulog/gpulog.h" +#include "../src/gpulog/lprintf.h" +extern gpulog::host_log hlog; + +extern "C" void flush_logs() +{ + copy(hlog, "dlog", gpulog::LOG_DEVCLEAR); + replay_printf(std::cerr, hlog); +} + +void init_logs() +{ + // log memory allocation + int host_buffer_size, device_buffer_size; + host_buffer_size = device_buffer_size = 10*1024*1024; + hlog.alloc(host_buffer_size); + gpulog::alloc_device_log("dlog", device_buffer_size); +} diff --git a/src/skygen/skyconfig_impl.h b/src/skygen/skyconfig_impl.h index 3402203..328726a 100644 --- a/src/skygen/skyconfig_impl.h +++ b/src/skygen/skyconfig_impl.h @@ -39,6 +39,7 @@ struct star_comp cint_t::host_t comp; cfloat_t::host_t M, Am, AmInf; cfloat_t::host_t DM; + cint_t::host_t hidden; star_comp(otable &in) { @@ -51,15 +52,18 @@ struct star_comp Am = in.col("Am"); AmInf = in.col("AmInf"); DM = in.col("DM"); + hidden = in.col("hidden"); } bool operator()(const size_t a, const size_t b) const // less semantics { - return lb(a, 0) < lb(b, 0) || lb(a, 0) == lb(b, 0) && ( + return hidden(a)< hidden(b) || hidden(a)== hidden(b) && ( // need to sort by hidden first, as other fields may not even be set if hidden is + lb(a, 0) < lb(b, 0) || lb(a, 0) == lb(b, 0) && ( lb(a, 1) < lb(b, 1) || lb(a, 1) == lb(b, 1) && ( DM(a) < DM(b) || DM(a) == DM(b) && ( - M(a) < M(b) - ))); + M(a) < M(b) || M(a) == M(b) && ( + Am(a) < Am(b) + ))))); } }; @@ -413,8 +417,9 @@ double skygenHost::integrateCounts(float &runtime, const char *denmapPrefix) ss << "<--" << pow10f(this->lrho0+(this->nhistbins-0.5)*this->dlrho); MLOG(verb2) << ss.str(); - MLOG(verb1) << "Comp. " << componentMap.compID(this->model.component()) << " starcount : " << std::setprecision(9) << this->nstarsExpected - << " (" << this->nstarsExpectedToGenerate << " in pixelized area)"; + MLOG(verb1) << "Comp. " << componentMap.compID(this->model.component()) << " counts : " << std::setprecision(9) + << this->nstarsExpectedToGenerate << " to generate, " + << this->nstarsExpected << " within footprint."; swatch.stop(); runtime = swatch.getTime(); @@ -435,8 +440,9 @@ size_t skygenHost::drawSources(otable &in, osink *nextlink, float &runtime) this->output_table_capacity = in.capacity(); this->ks.alloc(this->nthreads); - uint64_t total = 0; - do + uint64_t totalGenerated = 0, totalStored = 0; + bool generated_all = false; + while(!generated_all) { // setup output destination this->stars.lb = in.col("lb"); @@ -448,6 +454,7 @@ size_t skygenHost::drawSources(otable &in, osink *nextlink, float &runtime) this->stars.comp = in.col("comp"); this->stars.M = in.col("absmag"); this->stars.DM = in.col("DM"); + this->stars.hidden = in.col("hidden"); // call the generation kernel upload(true, 0, this->npixels); @@ -455,6 +462,7 @@ size_t skygenHost::drawSources(otable &in, osink *nextlink, float &runtime) download(true, 0, this->npixels); in.set_size(this->stars_generated); // truncate the table to the number of rows that were actually generated + generated_all = this->stars_generated < this->stopstars; { // @@ -483,6 +491,7 @@ size_t skygenHost::drawSources(otable &in, osink *nextlink, float &runtime) SWAP(comp); SWAP(M); SWAP(DM); + SWAP(hidden); std::swap(h2a[hi], h2a[hj]); std::swap(a2h[i], a2h[j]); @@ -498,23 +507,26 @@ size_t skygenHost::drawSources(otable &in, osink *nextlink, float &runtime) swatch.stop(); if(in.size()) { - total += nextlink->process(in, 0, in.size(), *cpurng); + totalGenerated += in.size(); + totalStored += nextlink->process(in, 0, in.size(), *cpurng); } swatch.start(); - - double pctdone = 100. * total / this->nstarsExpected; - char pcts[50]; sprintf(pcts, "%.0f", pctdone); - MLOG(verb1) << "Comp. "<< componentMap.compID(this->model.component()) << " progress: " << pcts << "% done."; - } while(this->stars_generated >= this->stopstars); + if(!generated_all) + { + double pctdone = 100. * totalGenerated / this->nstarsExpectedToGenerate; + char pcts[50]; sprintf(pcts, "%.0f", pctdone); + MLOG(verb1) << "Comp. "<< componentMap.compID(this->model.component()) << " progress: " << pcts << "% done."; + } + }; - double sigma = (total - this->nstarsExpected) / sqrt(this->nstarsExpected); - char sigmas[50]; sprintf(sigmas, "% 4.1f", sigma); - MLOG(verb1) << "Comp. "<< componentMap.compID(this->model.component()) << " completed: " << total << " stars, " << sigmas << " sigma from input model mean (" << this->nstarsExpected << ")."; + double sigma = (totalGenerated - this->nstarsExpectedToGenerate) / sqrt(this->nstarsExpectedToGenerate); + char sigmas[50]; sprintf(sigmas, "%.1f", sigma); + MLOG(verb1) << "Comp. "<< componentMap.compID(this->model.component()) << " completed: " << totalStored << " stars (" << totalGenerated << " generated, " << sigmas << " sigma from " << this->nstarsExpectedToGenerate << ")."; swatch.stop(); runtime = swatch.getTime(); DLOG(verb1) << "run() runtime: " << runtime << "s"; - return total; + return totalStored; } diff --git a/src/skygen/skygen.cu.h b/src/skygen/skygen.cu.h index 365be0a..9c675b1 100644 --- a/src/skygen/skygen.cu.h +++ b/src/skygen/skygen.cu.h @@ -30,6 +30,14 @@ #include #include #include "skygen.h" +#include "../gpulog/gpulog.h" +#include "../gpulog/lprintf.h" + +#if __CUDACC__ + #define LPRINTF(...) lprintf(dlog, __VA_ARGS__) +#else + #define LPRINTF(...) printf(__VA_ARGS__) +#endif #include @@ -38,10 +46,20 @@ using namespace cudacc; __constant__ gpuRng::constant rng; // GPU RNG __constant__ lambert proj[2]; // Projection definitions for the two hemispheres +#if __CUDACC__ +// The assumption is all CUDA code will be concatenated/included and compiled +// as a single source file (thus avoiding the creation of duplicate copies of +// hlog and dlog) +gpulog::host_log hlog; +__constant__ gpulog::device_log dlog; +#endif +extern "C" void flush_logs(); // GPU debug logs + /********************************************************/ DEFINE_TEXTURE(ext_north, float, 3, cudaReadModeElementType, false, cudaFilterModeLinear, cudaAddressModeClamp); DEFINE_TEXTURE(ext_south, float, 3, cudaReadModeElementType, false, cudaFilterModeLinear, cudaAddressModeClamp); +DEFINE_TEXTURE(ext_beam, float, 3, cudaReadModeElementType, false, cudaFilterModePoint, cudaAddressModeClamp); ////////////////////////////////////////////////////////////////////////////////////////////////////////// // 3D texture resampler, used mainly to debug extinction maps. Should be moved to a separate file. @@ -189,7 +207,6 @@ __device__ float sampleExtinction(int projIdx, float X, float Y, float DM) Am = TEX3D(ext_north, X, Y, DM); else Am = TEX3D(ext_south, X, Y, DM); - return Am; } @@ -206,7 +223,10 @@ __device__ float3 skygenGPU::compute_pos(float &D, float &Am, float M, const float DM = m - M; D = powf(10.f, 0.2f*DM + 1.f); - Am = sampleExtinction(pix.projIdx, pix.X, pix.Y, DM); + + int xidx = pix.extIdx / 2048; + int yidx = pix.extIdx % 2048; + Am = TEX3D(ext_beam, xidx, yidx, DM); return pix.xyz(D); } @@ -347,37 +367,30 @@ __device__ bool skygenGPU::advance(int &ilb, int &i, int &j, pencilBeam &dir, stars. If there isn't, ndraw will be != 0 upon return. */ template -__device__ void skygenGPU::draw_stars(int &ndraw, const float &M, const int &im, const pencilBeam &pix) const +__device__ void skygenGPU::draw_stars(int &ndraw, const float &M, const int &im, const pencilBeam &pix, float AmMin) const { if(!ndraw) { return; } int idx = atomicAdd(nstars.ptr, ndraw); - for(; ndraw && idx < stopstars; idx++) + for(; ndraw && idx < stopstars; idx++, ndraw--) { - // Draw the distance and absolute magnitude - float Mtmp, mtmp, DM; - stars.M(idx) = Mtmp = M + dM*(rng.uniform() - 0.5f); - mtmp = m0 + dm*(im + rng.uniform() - 0.5f); - DM = mtmp - Mtmp; - stars.DM(idx) = DM; - float D = powf(10, 0.2f*DM + 1.f); - // Draw the position within the pixel float x = pix.X, y = pix.Y; x += pix.dx*(rng.uniform() - 0.5f); y += pix.dx*(rng.uniform() - 0.5f); + + // stop right away if we're beyond the boundaries of the projection. + if(x*x + y*y > 2.f) + { + stars.hidden(idx) = 1; + continue; + } + stars.projIdx(idx) = pix.projIdx; stars.projXY(idx, 0) = x; stars.projXY(idx, 1) = y; - // Draw extinction - float Am0, Am1; - stars.Am(idx) = Am0 = sampleExtinction(pix.projIdx, x, y, DM); - Am1 = sampleExtinction(pix.projIdx, x, y, 100); - stars.AmInf(idx) = Am1 > Am0 ? Am1 : Am0; // work around the situation where due to trilinear interp. in _all_ dimensions AmInf can come out being slightly smaller than Am - // FIXME: implement a proper (possibly nontrivial) fix for this, some day - // Transform projected coordinates to (l,b), in degrees double l, b; proj[pix.projIdx].deproject(l, b, x, y); @@ -389,6 +402,14 @@ __device__ void skygenGPU::draw_stars(int &ndraw, const float &M, const int & stars.lb(idx, 0) = l; stars.lb(idx, 1) = b; + // Draw the distance and absolute magnitude + float Mtmp, mtmp, DM; + stars.M(idx) = Mtmp = M + dM*(rng.uniform() - 0.5f); + mtmp = m0 + dm*(im + rng.uniform() - 0.5f); + DM = mtmp - Mtmp; + stars.DM(idx) = DM; + float D = powf(10, 0.2f*DM + 1.f); + // Compute and store the 3D position (XYZ) float3 pos = dir2.xyz(D); stars.XYZ(idx, 0) = pos.x; @@ -399,7 +420,42 @@ __device__ void skygenGPU::draw_stars(int &ndraw, const float &M, const int & //stars.comp(idx) = model.component(pos.x, pos.y, pos.z, Mtmp, rng); stars.comp(idx) = model.component(); - ndraw--; + // Draw extinction + float Am0, Am1; + stars.Am(idx) = Am0 = sampleExtinction(pix.projIdx, x, y, DM); + Am1 = sampleExtinction(pix.projIdx, x, y, 100); + stars.AmInf(idx) = Am1 > Am0 ? Am1 : Am0; // work around the situation where due to trilinear interp. in _all_ dimensions AmInf can come out being slightly smaller than Am + // FIXME: implement a proper (possibly nontrivial) fix for this, some day + + // hide the star if the magnitude is beyond the flux limit + if(mtmp + Am0 > m1) + { + stars.hidden(idx) = 1; + } + else + { + stars.hidden(idx) = 0; + } + + if(AmMin > Am0) + { +#if __CUDACC__ + float2 &tcx = ext_northTC[0]; + float2 &tcy = ext_northTC[1]; + float2 &tcz = ext_northTC[2]; + float xi = (x - tcx.x) * tcx.y + 0.5f; + float yi = (y - tcy.x) * tcy.y + 0.5f; + float zi = (DM - tcz.x) * tcz.y + 0.5f; + int extx = pix.extIdx / 2048; + int exty = pix.extIdx % 2048; + LPRINTF("%f > %f : (%f, %f, %f) -> (%f, %f, %f)!\n", AmMin, Am0, x, y, DM, xi, yi, zi); + LPRINTF(" l,b=%.10f %.10f\n", l, b); + LPRINTF(" beam=%f beamIdx=%d\n", TEX3D(ext_beam, extx, exty, 10000), pix.extIdx); + LPRINTF(" idx=%d ndraw=%d\n", idx, ndraw); + LPRINTF(" projIdx=%d\n", pix.projIdx); +#endif + stars.AmInf(idx) = -AmMin; + } } } @@ -445,7 +501,7 @@ __device__ void skygenGPU::kernel() const if(ndraw) { float M = M1 - iM*dM; - draw_stars(ndraw, M, im, pix); + draw_stars(ndraw, M, im, pix, Am); } } @@ -491,7 +547,7 @@ __device__ void skygenGPU::kernel() const if(ilb >= npixels) { break; } // We moved to a new distance bin. Recompute and reset. - pos = compute_pos(D, Am, M, im, pix); + pos = compute_pos(D, Am, M, im, pix); model.setpos(ms, pos.x, pos.y, pos.z); } @@ -518,7 +574,7 @@ __device__ void skygenGPU::kernel() const ndraw = rng.poisson(rho); } - draw_stars(ndraw, M, im, pix); + draw_stars(ndraw, M, im, pix, Am); } else { @@ -600,6 +656,8 @@ void skygenHost::compute(bool draw) #endif cuxErrCheck( cudaThreadSynchronize() ); kernelRunSwatch.stop(); + + flush_logs(); } #include "model_J08.h" diff --git a/src/skygen/skygen.h b/src/skygen/skygen.h index 7f721ae..46472da 100644 --- a/src/skygen/skygen.h +++ b/src/skygen/skygen.h @@ -94,9 +94,11 @@ struct ALIGN(16) pencilBeam : public direction int projIdx; // index of the lambert projection in which this pixel was constructed float X, Y; // lambert coordinates of this pencil beam (in projIdx projection) + int extIdx; // index into per-beam extinction texture + pencilBeam() {} - pencilBeam(Radians l_, Radians b_, float X_, float Y_, int projIdx_, float dx_, float coveredFraction_) - : direction(l_, b_), X(X_), Y(Y_), projIdx(projIdx_), dx(dx_), dA(dx_*dx_), coveredFraction(coveredFraction_) + pencilBeam(Radians l_, Radians b_, float X_, float Y_, int projIdx_, float dx_, float coveredFraction_, int extIdx_) + : direction(l_, b_), X(X_), Y(Y_), projIdx(projIdx_), dx(dx_), dA(dx_*dx_), coveredFraction(coveredFraction_), extIdx(extIdx_) { } }; @@ -171,6 +173,7 @@ struct ALIGN(16) ocolumns cint_t::gpu_t projIdx; cfloat_t::gpu_t projXY, DM, M, XYZ, Am, AmInf; cint_t::gpu_t comp; + cint_t::gpu_t hidden; }; // @@ -286,7 +289,7 @@ struct ALIGN(16) skygenGPU : public skygenParams template __device__ void kernel() const; __device__ float3 compute_pos(float &D, float &Am, float M, const int im, const pencilBeam &dir) const; __device__ bool advance(int &ilb, int &i, int &j, pencilBeam &pix, const int x, const int y) const; - __device__ void draw_stars(int &ndraw, const float &M, const int &im, const pencilBeam &pix) const; + __device__ void draw_stars(int &ndraw, const float &M, const int &im, const pencilBeam &pix, float AmMin) const; }; //