From 03e4a65554df5d3ccad985c0156b2cd832af85d7 Mon Sep 17 00:00:00 2001 From: "charles.prouveur" Date: Wed, 6 Nov 2024 18:34:16 +0100 Subject: [PATCH 1/8] added GPU support for thermalize BC, introduced erfinv_v3 as a DP approximation of the inverse function, replaced a few pow ; validated on nvidia GPU, to be validated on AMD GPUs --- src/ParticleBC/BoundaryConditionType.cpp | 518 +++++++++++------- src/ParticleBC/BoundaryConditionType.h | 131 ++++- src/Tools/userFunctions.cpp | 93 ++++ src/Tools/userFunctions.h | 1 + ...gpu_validate_tst1d_03_thermal_expansion.py | 26 + 5 files changed, 582 insertions(+), 187 deletions(-) create mode 100755 validation/analyses/gpu_validate_tst1d_03_thermal_expansion.py diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp index 55579a2c7..ea117ae7e 100755 --- a/src/ParticleBC/BoundaryConditionType.cpp +++ b/src/ParticleBC/BoundaryConditionType.cpp @@ -132,12 +132,19 @@ void reflect_particle_wall( Species *species, int imin, int imax, int direction, energy_change = 0.; // no energy loss during reflection double* position = species->particles->getPtrPosition(direction); double* momentum = species->particles->getPtrMomentum(direction); +#ifdef SMILEI_ACCELERATOR_GPU_OACC + #pragma acc parallel deviceptr(position,momentum, invgf) + #pragma acc loop gang worker vector +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target is_device_ptr( position, momentum, invgf ) + #pragma omp teams distribute parallel for +#endif for (int ipart=imin ; ipart= limit_sup*limit_sup ) { - double LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + double LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); energy_change += weight[ ipart ]*( LorentzFactor-1.0 ); // energy lost REDUCTION charge[ ipart ] = 0; cell_keys[ipart] = -1; @@ -343,7 +350,7 @@ void remove_photon_inf( Species *species, int imin, int imax, int direction, dou int* cell_keys = species->particles->getPtrCellKeys(); for (int ipart=imin ; ipartparticles->getPtrCellKeys(); for (int ipart=imin ; ipart= limit_sup) { - double momentumNorm = sqrt( pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + double momentumNorm = sqrt( momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); energy_change += weight[ ipart ]*( momentumNorm ); // energy lost REDUCTION charge[ ipart ] = 0; cell_keys[ipart] = -1; @@ -373,42 +380,62 @@ void remove_photon_sup( Species *species, int imin, int imax, int direction, dou void stop_particle_inf( Species *species, int imin, int imax, int direction, double limit_inf, double /*dt*/, std::vector &/*invgf*/, Random * /*rand*/, double &energy_change ) { - energy_change = 0; + double change_in_energy = 0.0; double* position = species->particles->getPtrPosition(direction); double* momentum_x = species->particles->getPtrMomentum(0); double* momentum_y = species->particles->getPtrMomentum(1); double* momentum_z = species->particles->getPtrMomentum(2); double* weight = species->particles->getPtrWeight(); +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target is_device_ptr( position, momentum_x, momentum_y, momentum_z, weight ) map( tofrom : change_in_energy ) + #pragma omp teams distribute parallel for reduction( + : change_in_energy ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight) + #pragma acc loop gang worker vector reduction(+ : change_in_energy) +#else + #pragma omp simd reduction(+ : change_in_energy) +#endif for (int ipart=imin ; ipart &/*invgf*/, Random * /*rand*/, double &energy_change ) { - energy_change = 0; + double change_in_energy = 0.0; double* position = species->particles->getPtrPosition(direction); double* momentum_x = species->particles->getPtrMomentum(0); double* momentum_y = species->particles->getPtrMomentum(1); double* momentum_z = species->particles->getPtrMomentum(2); double* weight = species->particles->getPtrWeight(); +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target is_device_ptr( position, momentum_x, momentum_y, momentum_z, weight ) map( tofrom : change_in_energy ) + #pragma omp teams distribute parallel for reduction( + : change_in_energy ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight) + #pragma acc loop gang worker vector reduction(+ : change_in_energy) +#else + #pragma omp simd reduction(+ : change_in_energy) +#endif for (int ipart=imin ; ipart= limit_sup) { - double LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); - energy_change = weight[ ipart ]*( LorentzFactor-1.0 ); // energy lost REDUCTION + double LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); + change_in_energy += weight[ ipart ]*( LorentzFactor-1.0 ); // energy lost REDUCTION position[ ipart ] = 2.*limit_sup - position[ ipart ]; momentum_x[ ipart ] = 0.; momentum_y[ ipart ] = 0.; momentum_z[ ipart ] = 0.; } } + energy_change = change_in_energy; } void stop_particle_wall( Species *species, int imin, int imax, int direction, double wall_position, double dt, std::vector &invgf, Random * /*rand*/, double &energy_change ) @@ -424,7 +451,7 @@ void stop_particle_wall( Species *species, int imin, int imax, int direction, do double particle_position = position[ipart]; double particle_position_old = particle_position - dt*invgf[ipart]*momentum[ipart]; if ( ( wall_position-particle_position_old )*( wall_position-particle_position )<0 ) { - double LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + double LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); energy_change += weight[ ipart ]*( LorentzFactor-1.0 ); // energy lost REDUCTION position[ ipart ] = 2.*wall_position - position[ ipart ]; momentum_x[ ipart ] = 0.; @@ -447,7 +474,7 @@ void stop_particle_AM( Species *species, int imin, int imax, int /*direction*/, for (int ipart=imin ; ipart= limit_sup*limit_sup ) { - double LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + double LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); energy_change += weight[ ipart ]*( LorentzFactor-1.0 ); // energy lost REDUCTION double distance_to_axis = sqrt( distance2ToAxis ); // limit_pos = 2*limit_pos @@ -479,95 +506,153 @@ void thermalize_particle_inf( Species *species, int imin, int imax, int directio double* momentum_y = species->particles->getPtrMomentum(1); double* momentum_z = species->particles->getPtrMomentum(2); double* weight = species->particles->getPtrWeight(); +#if defined( SMILEI_ACCELERATOR_GPU ) + uint32_t xorshift32_state = rand->xorshift32_state; +#endif + double change_in_energy = 0.0; + double thermal_momentum = species->thermal_momentum_[direction]; + double thermal_momentum1; + double thermal_momentum2; + double v0 = species->thermal_velocity_[0]; + if (nDim>1) { + thermal_momentum1 = species->thermal_momentum_[(direction+1)%nDim]; + if (nDim>2) { + thermal_momentum2 = species->thermal_momentum_[(direction+2)%nDim]; + } + } + double vx, vy, vz, v2, g, gm1, Lxx, Lyy, Lzz, Lxy, Lxz, Lyz; + // mean-velocity + vx = -species->thermal_boundary_velocity_[0]; + vy = -species->thermal_boundary_velocity_[1]; + vz = -species->thermal_boundary_velocity_[2]; + v2 = vx*vx + vy*vy + vz*vz; + if( v2>0. ) { + g = 1.0/sqrt( 1.0-v2 ); + gm1 = g - 1.0; + // compute the different component of the Matrix block of the Lorentz transformation + Lxx = 1.0 + gm1 * vx*vx/v2; + Lyy = 1.0 + gm1 * vy*vy/v2; + Lzz = 1.0 + gm1 * vz*vz/v2; + Lxy = gm1 * vx*vy/v2; + Lxz = gm1 * vx*vz/v2; + Lyz = gm1 * vy*vz/v2; + } - energy_change = 0; - for (int ipart=imin ; ipart3.0*species->thermal_velocity_[0] ) { //IF VELOCITY > 3*THERMAL VELOCITY THEN THERMALIZE IT - - // velocity of the particle after thermalization/reflection - //for (int i=0; inDim_fields; i++) { - - // change of velocity in the direction normal to the reflection plane - double sign_vel = -momentum[ ipart ]/std::abs( momentum[ ipart ] ); - momentum[ ipart ] = sign_vel * species->thermal_momentum_[direction] * std::sqrt( -std::log( 1.0-rand->uniform1() ) ); - - // change of momentum in the direction(s) along the reflection plane - if (nDim>1) { - momentumRefl_2D[ipart] = species->thermal_momentum_[(direction+1)%nDim] * perp_rand( rand ); - if (nDim>2) { - momentumRefl_3D[ipart] = species->thermal_momentum_[(direction+2)%nDim] * perp_rand( rand ); - } - } - - // Adding the mean velocity (using relativistic composition) - double vx, vy, vz, v2, g, gm1, Lxx, Lyy, Lzz, Lxy, Lxz, Lyz, gp, px, py, pz; - // mean-velocity - vx = -species->thermal_boundary_velocity_[0]; - vy = -species->thermal_boundary_velocity_[1]; - vz = -species->thermal_boundary_velocity_[2]; - v2 = vx*vx + vy*vy + vz*vz; - if( v2>0. ) { - - g = 1.0/sqrt( 1.0-v2 ); - gm1 = g - 1.0; - - // compute the different component of the Matrix block of the Lorentz transformation - Lxx = 1.0 + gm1 * vx*vx/v2; - Lyy = 1.0 + gm1 * vy*vy/v2; - Lzz = 1.0 + gm1 * vz*vz/v2; - Lxy = gm1 * vx*vy/v2; - Lxz = gm1 * vx*vz/v2; - Lyz = gm1 * vy*vz/v2; - - // Lorentz transformation of the momentum - gp = sqrt( 1.0 + pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); - px = -gp*g*vx + Lxx * momentum_x[ ipart ] + Lxy * momentum_y[ ipart ] + Lxz * momentum_z[ ipart ]; - py = -gp*g*vy + Lxy * momentum_x[ ipart ] + Lyy * momentum_y[ ipart ] + Lyz * momentum_z[ ipart ]; - pz = -gp*g*vz + Lxz * momentum_x[ ipart ] + Lyz * momentum_y[ ipart ] + Lzz * momentum_z[ ipart ]; - momentum_x[ ipart ] = px; - momentum_y[ ipart ] = py; - momentum_z[ ipart ] = pz; - - }//ENDif vel != 0 - - } else { // IF VELOCITY < 3*THERMAL SIMPLY REFLECT IT - momentum[ ipart ] = -momentum[ ipart ]; - - }// endif on v vs. thermal_velocity_ - - // position of the particle after reflection - position[ ipart ] = 2.*limit_inf - position[ ipart ]; - - // energy lost during thermalization - LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); - energy_change += weight[ ipart ]*( initial_energy - LorentzFactor+1.0 ); - - - /* HERE IS AN ATTEMPT TO INTRODUCE A SPACE DEPENDENCE ON THE BCs - // double val_min(params.dens_profile.vacuum_length[1]), val_max(params.dens_profile.vacuum_length[1]+params.dens_profile.length_params_y[0]); +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target is_device_ptr( position, momentum, momentumRefl_2D, momentumRefl_3D, momentum_x, momentum_y, momentum_z, weight ) map( tofrom : change_in_energy ) + #pragma omp teams distribute thread_limit(32) parallel for reduction( + : change_in_energy ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc parallel loop gang vector_length(32) reduction(+ : change_in_energy) independent deviceptr(position, momentum, momentumRefl_2D, momentumRefl_3D,momentum_x,momentum_y,momentum_z,weight) +#else + #pragma omp simd reduction(+ : change_in_energy) +#endif + for (int ichunk = imin/32 ; ichunk < imax/32 ; ichunk++ ) { - if ( ( species->particles->position(1,ipart) >= val_min ) && ( species->particles->position(1,ipart) <= val_max ) ) { - // nrj computed during diagnostics - species->particles->position(direction, ipart) = limit_pos - species->particles->position(direction, ipart); - species->particles->momentum(direction, ipart) = sqrt(params.thermal_velocity_[direction]) * tabFcts.erfinv( rand->uniform() ); - } - else { - stop_particle( species->particles, ipart, direction, limit_pos, params, energy_change ); +#if defined( SMILEI_ACCELERATOR_GPU ) + uint32_t xorshift32_state_local = xorshift32_state + ichunk; + uint32_t xorshift32_state_array[32]; +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc loop seq +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp single +#endif + // boucle sur les particules de ce chunk pour remplir xorshift32_state_array[...] avec le state local + for( int i = 0; i < 32; ++i ){ + xorshift32_state_array[i] = xorshift32(xorshift32_state_local); + } +#endif + // boucle sur les particules de ce chunk qui utilise xorshift32_state_array[i] + int istart = ichunk==(imin/32) ? imin%32 : 0; + int iend = ichunk==(imax/32) ? imax%32 : 32; +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc loop vector +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp simd +#endif + for( int i = istart; i < iend ; ++i ){ + int ipart = ichunk * 32 + i; + if ( position[ ipart ] < limit_inf) { + // checking the particle's velocity compared to the thermal one + double p2 = momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart]; + double LorentzFactor = sqrt( 1.+p2 ); + double v = sqrt( p2 )/LorentzFactor; + + // energy before thermalization + double initial_energy = LorentzFactor - 1.0; + // Apply bcs depending on the particle velocity + // -------------------------------------------- + if( v > 3.0 * v0) { //IF VELOCITY > 3*THERMAL VELOCITY THEN THERMALIZE IT + + // velocity of the particle after thermalization/reflection + //for (int i=0; inDim_fields; i++) { + // change of velocity in the direction normal to the reflection plane + double sign_vel = -momentum[ ipart ]/std::abs( momentum[ ipart ] ); + #if defined( SMILEI_ACCELERATOR_GPU ) + momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - uniform1(xorshift32_state_array[i]) ) ); + #else + momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - rand->uniform1() ) ); + #endif + + // change of momentum in the direction(s) along the reflection plane + if (nDim>1) { + #if defined( SMILEI_ACCELERATOR_GPU ) + momentumRefl_2D[ ipart ] = thermal_momentum1 * perp_rand_gpu_v3(xorshift32_state_array[i]); + #else + momentumRefl_2D[ ipart ] = thermal_momentum1 * perp_rand( rand ); + #endif + if (nDim>2) { + #if defined( SMILEI_ACCELERATOR_GPU ) + momentumRefl_3D[ ipart ] = thermal_momentum2 * perp_rand_gpu_v3(xorshift32_state_array[i]); + #else + momentumRefl_3D[ ipart ] = thermal_momentum2 * perp_rand( rand ); + #endif + } + } + // Adding the mean velocity (using relativistic composition) + double gp, px, py, pz; + if( v2>0. ) { + // Lorentz transformation of the momentum + gp = sqrt( 1.0 + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); + px = -gp*g*vx + Lxx * momentum_x[ ipart ] + Lxy * momentum_y[ ipart ] + Lxz * momentum_z[ ipart ]; + py = -gp*g*vy + Lxy * momentum_x[ ipart ] + Lyy * momentum_y[ ipart ] + Lyz * momentum_z[ ipart ]; + pz = -gp*g*vz + Lxz * momentum_x[ ipart ] + Lyz * momentum_y[ ipart ] + Lzz * momentum_z[ ipart ]; + momentum_x[ ipart ] = px; + momentum_y[ ipart ] = py; + momentum_z[ ipart ] = pz; + }//ENDif vel != 0 + } else { // IF VELOCITY < 3*THERMAL SIMPLY REFLECT IT + momentum[ ipart ] = -momentum[ ipart ]; + + }// endif on v vs. thermal_velocity_ + + // position of the particle after reflection + position[ ipart ] = 2.*limit_inf - position[ ipart ]; + + // energy lost during thermalization + LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); + energy_change += weight[ ipart ] * ( initial_energy - LorentzFactor + 1.0 ); + + + // HERE IS AN ATTEMPT TO INTRODUCE A SPACE DEPENDENCE ON THE BCs + // double val_min(params.dens_profile.vacuum_length[1]), val_max(params.dens_profile.vacuum_length[1]+params.dens_profile.length_params_y[0]); + + //if ( ( species->particles->position(1,ipart) >= val_min ) && ( species->particles->position(1,ipart) <= val_max ) ) { + // nrj computed during diagnostics + //species->particles->position(direction, ipart) = limit_pos - species->particles->position(direction, ipart); + //species->particles->momentum(direction, ipart) = sqrt(params.thermal_velocity_[direction]) * tabFcts.erfinv( rand->uniform() ); + //} + //else { + //stop_particle( species->particles, ipart, direction, limit_pos, params, energy_change ); + //} + } - */ } } + energy_change = change_in_energy; +#if defined( SMILEI_ACCELERATOR_GPU ) + xorshift32_state += 32; + rand->xorshift32_state = xorshift32_state; +#endif } void thermalize_particle_sup( Species *species, int imin, int imax, int direction, double limit_sup, double /*dt*/, std::vector &/*invgf*/, Random * rand, double &energy_change ) @@ -581,95 +666,156 @@ void thermalize_particle_sup( Species *species, int imin, int imax, int directio double* momentum_y = species->particles->getPtrMomentum(1); double* momentum_z = species->particles->getPtrMomentum(2); double* weight = species->particles->getPtrWeight(); - - energy_change = 0; - for (int ipart=imin ; ipart= limit_sup) { - // checking the particle's velocity compared to the thermal one - double p2 = pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ); - double LorentzFactor = sqrt( 1.+p2 ); - double v = sqrt( p2 )/LorentzFactor; - - // energy before thermalization - double initial_energy = LorentzFactor-1.0; - - // Apply bcs depending on the particle velocity - // -------------------------------------------- - if( v>3.0*species->thermal_velocity_[0] ) { //IF VELOCITY > 3*THERMAL VELOCITY THEN THERMALIZE IT - - // velocity of the particle after thermalization/reflection - //for (int i=0; inDim_fields; i++) { - - // change of velocity in the direction normal to the reflection plane - double sign_vel = -momentum[ ipart ]/std::abs( momentum[ ipart ] ); - momentum[ ipart ] = sign_vel * species->thermal_momentum_[direction] * std::sqrt( -std::log( 1.0-rand->uniform1() ) ); - - // change of momentum in the direction(s) along the reflection plane - if (nDim>1) { - momentumRefl_2D[ ipart ] = species->thermal_momentum_[(direction+1)%nDim] * perp_rand( rand ); - if (nDim>2) { - momentumRefl_3D[ ipart ] = species->thermal_momentum_[(direction+2)%nDim] * perp_rand( rand ); +#if defined( SMILEI_ACCELERATOR_GPU ) + uint32_t xorshift32_state = rand->xorshift32_state; +#endif + double change_in_energy = 0.0; + double thermal_momentum = species->thermal_momentum_[direction]; + double thermal_momentum1; + double thermal_momentum2; + double v0 = species->thermal_velocity_[0]; + if (nDim>1) { + thermal_momentum1 = species->thermal_momentum_[(direction+1)%nDim]; + if (nDim>2) { + thermal_momentum2 = species->thermal_momentum_[(direction+2)%nDim]; + } + } + double vx, vy, vz, v2, g, gm1, Lxx, Lyy, Lzz, Lxy, Lxz, Lyz; + // mean-velocity + vx = -species->thermal_boundary_velocity_[0]; + vy = -species->thermal_boundary_velocity_[1]; + vz = -species->thermal_boundary_velocity_[2]; + v2 = vx*vx + vy*vy + vz*vz; + if( v2>0. ) { + g = 1.0/sqrt( 1.0-v2 ); + gm1 = g - 1.0; + // compute the different component of the Matrix block of the Lorentz transformation + Lxx = 1.0 + gm1 * vx*vx/v2; + Lyy = 1.0 + gm1 * vy*vy/v2; + Lzz = 1.0 + gm1 * vz*vz/v2; + Lxy = gm1 * vx*vy/v2; + Lxz = gm1 * vx*vz/v2; + Lyz = gm1 * vy*vz/v2; + } +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp target is_device_ptr( position, momentum, momentumRefl_2D, momentumRefl_3D, momentum_x, momentum_y, momentum_z, weight ) map( tofrom : change_in_energy ) + #pragma omp teams distribute thread_limit(32) parallel for reduction( + : change_in_energy ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc parallel loop gang vector_length(32) reduction(+ : change_in_energy) independent deviceptr(position, momentum, momentumRefl_2D, momentumRefl_3D,momentum_x,momentum_y,momentum_z,weight) +#else + #pragma omp simd reduction(+ : change_in_energy) +#endif + for (int ichunk = imin/32 ; ichunk < imax/32 ; ichunk++ ) { + +#if defined( SMILEI_ACCELERATOR_GPU ) + uint32_t xorshift32_state_local = xorshift32_state + ichunk; + uint32_t xorshift32_state_array[32]; +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc loop seq +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp single +#endif + // boucle sur les particules de ce chunk pour remplir xorshift32_state_array[...] avec le state local + for( int i = 0; i < 32; ++i ){ + xorshift32_state_array[i] = xorshift32(xorshift32_state_local); + } +#endif + int istart = ichunk==(imin/32) ? imin%32 : 0; + int iend = ichunk==(imax/32) ? imax%32 : 32; +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) + #pragma acc loop vector +#elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + #pragma omp simd +#endif + for( int i = istart; i < iend ; ++i ){ + int ipart = ichunk * 32 + i; + + if ( position[ ipart ] >= limit_sup) { + // checking the particle's velocity compared to the thermal one + double p2 = momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart]; + double LorentzFactor = sqrt( 1.+p2 ); + double v = sqrt( p2 )/LorentzFactor; + + // energy before thermalization + double initial_energy = LorentzFactor - 1.0; + + // Apply bcs depending on the particle velocity + // -------------------------------------------- + if( v > 3.0 * v0 ) { //IF VELOCITY > 3*THERMAL VELOCITY THEN THERMALIZE IT + + // velocity of the particle after thermalization/reflection + //for (int i=0; inDim_fields; i++) { + + // change of velocity in the direction normal to the reflection plane + double sign_vel = -momentum[ ipart ]/std::abs( momentum[ ipart ] ); + #if defined( SMILEI_ACCELERATOR_GPU ) + momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - uniform1(xorshift32_state_array[i]) ) ); + #else + momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - rand->uniform1() ) ); + #endif + + // change of momentum in the direction(s) along the reflection plane + if (nDim>1) { + #if defined( SMILEI_ACCELERATOR_GPU ) + momentumRefl_2D[ ipart ] = thermal_momentum1 * perp_rand_gpu_v3(xorshift32_state_array[i]); + #else + momentumRefl_2D[ ipart ] = thermal_momentum1 * perp_rand( rand ); + #endif + if (nDim>2) { + #if defined( SMILEI_ACCELERATOR_GPU ) + momentumRefl_3D[ ipart ] = thermal_momentum2 * perp_rand_gpu_v3(xorshift32_state_array[i]); + #else + momentumRefl_3D[ ipart ] = thermal_momentum2 * perp_rand( rand ); + #endif + } } - } - // Adding the mean velocity (using relativistic composition) - double vx, vy, vz, v2, g, gm1, Lxx, Lyy, Lzz, Lxy, Lxz, Lyz, gp, px, py, pz; - // mean-velocity - vx = -species->thermal_boundary_velocity_[0]; - vy = -species->thermal_boundary_velocity_[1]; - vz = -species->thermal_boundary_velocity_[2]; - v2 = vx*vx + vy*vy + vz*vz; - if( v2>0. ) { - - g = 1.0/sqrt( 1.0-v2 ); - gm1 = g - 1.0; - - // compute the different component of the Matrix block of the Lorentz transformation - Lxx = 1.0 + gm1 * vx*vx/v2; - Lyy = 1.0 + gm1 * vy*vy/v2; - Lzz = 1.0 + gm1 * vz*vz/v2; - Lxy = gm1 * vx*vy/v2; - Lxz = gm1 * vx*vz/v2; - Lyz = gm1 * vy*vz/v2; - - // Lorentz transformation of the momentum - gp = sqrt( 1.0 + pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); - px = -gp*g*vx + Lxx * momentum_x[ ipart ] + Lxy * momentum_y[ ipart ] + Lxz * momentum_z[ ipart ]; - py = -gp*g*vy + Lxy * momentum_x[ ipart ] + Lyy * momentum_y[ ipart ] + Lyz * momentum_z[ ipart ]; - pz = -gp*g*vz + Lxz * momentum_x[ ipart ] + Lyz * momentum_y[ ipart ] + Lzz * momentum_z[ ipart ]; - momentum_x[ ipart ] = px; - momentum_y[ ipart ] = py; - momentum_z[ ipart ] = pz; - - }//ENDif vel != 0 - - } else { // IF VELOCITY < 3*THERMAL SIMPLY REFLECT IT - momentum[ ipart ] = -momentum[ ipart ]; - - }// endif on v vs. thermal_velocity_ - - // position of the particle after reflection - position[ ipart ] = 2.*limit_sup - position[ ipart ]; - - // energy lost during thermalization - LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); - energy_change += weight[ ipart ]*( initial_energy - LorentzFactor+1.0 ); - - - /* HERE IS AN ATTEMPT TO INTRODUCE A SPACE DEPENDENCE ON THE BCs - // double val_min(params.dens_profile.vacuum_length[1]), val_max(params.dens_profile.vacuum_length[1]+params.dens_profile.length_params_y[0]); - - if ( ( species->particles->position(1,ipart) >= val_min ) && ( species->particles->position(1,ipart) <= val_max ) ) { - // nrj computed during diagnostics - species->particles->position(direction, ipart) = limit_pos - species->particles->position(direction, ipart); - species->particles->momentum(direction, ipart) = sqrt(params.thermal_velocity_[direction]) * tabFcts.erfinv( rand->uniform() ); - } - else { - stop_particle( species->particles, ipart, direction, limit_pos, params, energy_change ); + // Adding the mean velocity (using relativistic composition) + double gp, px, py, pz; + // mean-velocity + if( v2>0. ) { + // Lorentz transformation of the momentum + gp = sqrt( 1.0 + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); + px = -gp*g*vx + Lxx * momentum_x[ ipart ] + Lxy * momentum_y[ ipart ] + Lxz * momentum_z[ ipart ]; + py = -gp*g*vy + Lxy * momentum_x[ ipart ] + Lyy * momentum_y[ ipart ] + Lyz * momentum_z[ ipart ]; + pz = -gp*g*vz + Lxz * momentum_x[ ipart ] + Lyz * momentum_y[ ipart ] + Lzz * momentum_z[ ipart ]; + momentum_x[ ipart ] = px; + momentum_y[ ipart ] = py; + momentum_z[ ipart ] = pz; + }//ENDif vel != 0 + + } else { // IF VELOCITY < 3*THERMAL SIMPLY REFLECT IT + momentum[ ipart ] = -momentum[ ipart ]; + + }// endif on v vs. thermal_velocity_ + + // position of the particle after reflection + position[ ipart ] = 2.*limit_sup - position[ ipart ]; + + // energy lost during thermalization + LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); + energy_change += weight[ ipart ] * ( initial_energy - LorentzFactor + 1.0 ); + + /* HERE IS AN ATTEMPT TO INTRODUCE A SPACE DEPENDENCE ON THE BCs + // double val_min(params.dens_profile.vacuum_length[1]), val_max(params.dens_profile.vacuum_length[1]+params.dens_profile.length_params_y[0]); + + if ( ( species->particles->position(1,ipart) >= val_min ) && ( species->particles->position(1,ipart) <= val_max ) ) { + // nrj computed during diagnostics + species->particles->position(direction, ipart) = limit_pos - species->particles->position(direction, ipart); + species->particles->momentum(direction, ipart) = sqrt(params.thermal_velocity_[direction]) * tabFcts.erfinv( rand->uniform() ); + } + else { + stop_particle( species->particles, ipart, direction, limit_pos, params, energy_change ); + } + */ } - */ } } + energy_change = change_in_energy; +#if defined( SMILEI_ACCELERATOR_GPU ) + xorshift32_state += 32; + rand->xorshift32_state = xorshift32_state; +#endif } @@ -691,7 +837,7 @@ void thermalize_particle_wall( Species *species, int imin, int imax, int directi double particle_position_old = particle_position - dt*invgf[ipart]*species->particles->Momentum[direction][ipart]; if ( ( wall_position-particle_position_old )*( wall_position-particle_position )<0 ) { // checking the particle's velocity compared to the thermal one - double p2 = pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ); + double p2 = momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart]; double LorentzFactor = sqrt( 1.+p2 ); double v = sqrt( p2 )/LorentzFactor; @@ -739,7 +885,7 @@ void thermalize_particle_wall( Species *species, int imin, int imax, int directi Lyz = gm1 * vy*vz/v2; // Lorentz transformation of the momentum - gp = sqrt( 1.0 + pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + gp = sqrt( 1.0 + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); px = -gp*g*vx + Lxx * momentum_x[ ipart ] + Lxy * momentum_y[ ipart ] + Lxz * momentum_z[ ipart ]; py = -gp*g*vy + Lxy * momentum_x[ ipart ] + Lyy * momentum_y[ ipart ] + Lyz * momentum_z[ ipart ]; pz = -gp*g*vz + Lxz * momentum_x[ ipart ] + Lyz * momentum_y[ ipart ] + Lzz * momentum_z[ ipart ]; @@ -758,7 +904,7 @@ void thermalize_particle_wall( Species *species, int imin, int imax, int directi position[ ipart ] = 2.*wall_position - position[ ipart ]; // energy lost during thermalization - LorentzFactor = sqrt( 1.+pow( momentum_x[ipart], 2 )+pow( momentum_y[ipart], 2 )+pow( momentum_z[ipart], 2 ) ); + LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); energy_change += weight[ ipart ]*( initial_energy - LorentzFactor+1.0 ); diff --git a/src/ParticleBC/BoundaryConditionType.h b/src/ParticleBC/BoundaryConditionType.h index 3a89e9758..137438eb7 100755 --- a/src/ParticleBC/BoundaryConditionType.h +++ b/src/ParticleBC/BoundaryConditionType.h @@ -15,15 +15,144 @@ #include "Params.h" #include "tabulatedFunctions.h" #include "userFunctions.h" +#include "Random.h" inline double perp_rand( Random * rand ) { - double a = userFunctions::erfinv( rand->uniform1() ); + double a = userFunctions::erfinv( rand->uniform1() ); // to be switched to erfinv 3 if( rand->cointoss() ) { a *= -1.; } return a; } + +/*inline double perp_rand_gpu( Random * rand ) { + double a = userFunctions::erfinv_v3( rand->uniform1() ); + if( rand->cointoss() ) { + a *= -1.; + } + return a; +}*/ +/** + * copied from erfinv_DP_1.cu by Prof. Mike Giles. + * https://people.maths.ox.ac.uk/gilesm/ + * https://people.maths.ox.ac.uk/gilesm/codes/erfinv/ + * + * Original code is written for CUDA. + * Mutsuo Saito modified original code for C++. + */ +inline double erfinv_v3(double x) +{ + double w, p; + double sign; + if (x > 0) { + sign = 1.0; + } else { + sign = -1.0; + x = abs(x); + } + w = - log((1.0-x)*(1.0+x)); + + if ( w < 6.250000 ) { + w = w - 3.125000; + p = -3.6444120640178196996e-21; + p = -1.685059138182016589e-19 + p*w; + p = 1.2858480715256400167e-18 + p*w; + p = 1.115787767802518096e-17 + p*w; + p = -1.333171662854620906e-16 + p*w; + p = 2.0972767875968561637e-17 + p*w; + p = 6.6376381343583238325e-15 + p*w; + p = -4.0545662729752068639e-14 + p*w; + p = -8.1519341976054721522e-14 + p*w; + p = 2.6335093153082322977e-12 + p*w; + p = -1.2975133253453532498e-11 + p*w; + p = -5.4154120542946279317e-11 + p*w; + p = 1.051212273321532285e-09 + p*w; + p = -4.1126339803469836976e-09 + p*w; + p = -2.9070369957882005086e-08 + p*w; + p = 4.2347877827932403518e-07 + p*w; + p = -1.3654692000834678645e-06 + p*w; + p = -1.3882523362786468719e-05 + p*w; + p = 0.0001867342080340571352 + p*w; + p = -0.00074070253416626697512 + p*w; + p = -0.0060336708714301490533 + p*w; + p = 0.24015818242558961693 + p*w; + p = 1.6536545626831027356 + p*w; + } + else if ( w < 16.000000 ) { + w = sqrt(w) - 3.250000; + p = 2.2137376921775787049e-09; + p = 9.0756561938885390979e-08 + p*w; + p = -2.7517406297064545428e-07 + p*w; + p = 1.8239629214389227755e-08 + p*w; + p = 1.5027403968909827627e-06 + p*w; + p = -4.013867526981545969e-06 + p*w; + p = 2.9234449089955446044e-06 + p*w; + p = 1.2475304481671778723e-05 + p*w; + p = -4.7318229009055733981e-05 + p*w; + p = 6.8284851459573175448e-05 + p*w; + p = 2.4031110387097893999e-05 + p*w; + p = -0.0003550375203628474796 + p*w; + p = 0.00095328937973738049703 + p*w; + p = -0.0016882755560235047313 + p*w; + p = 0.0024914420961078508066 + p*w; + p = -0.0037512085075692412107 + p*w; + p = 0.005370914553590063617 + p*w; + p = 1.0052589676941592334 + p*w; + p = 3.0838856104922207635 + p*w; + } + else { + w = sqrt(w) - 5.000000; + p = -2.7109920616438573243e-11; + p = -2.5556418169965252055e-10 + p*w; + p = 1.5076572693500548083e-09 + p*w; + p = -3.7894654401267369937e-09 + p*w; + p = 7.6157012080783393804e-09 + p*w; + p = -1.4960026627149240478e-08 + p*w; + p = 2.9147953450901080826e-08 + p*w; + p = -6.7711997758452339498e-08 + p*w; + p = 2.2900482228026654717e-07 + p*w; + p = -9.9298272942317002539e-07 + p*w; + p = 4.5260625972231537039e-06 + p*w; + p = -1.9681778105531670567e-05 + p*w; + p = 7.5995277030017761139e-05 + p*w; + p = -0.00021503011930044477347 + p*w; + p = -0.00013871931833623122026 + p*w; + p = 1.0103004648645343977 + p*w; + p = 4.8499064014085844221 + p*w; + } + return sign * p * x; +} + +inline uint32_t xorshift32(uint32_t xorshift32_state) +{ + // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" + xorshift32_state ^= xorshift32_state << 13; + xorshift32_state ^= xorshift32_state >> 17; + xorshift32_state ^= xorshift32_state << 5; + return xorshift32_state; +} + +static constexpr double xorshift32_invmax1 = (1.-1e-11)/4294967296.; + +inline double uniform1(uint32_t xorshift32_state) { + return xorshift32(xorshift32_state) * xorshift32_invmax1; +} + +inline double perp_rand_gpu_v3(uint32_t xorshift32_state) { + double a = erfinv_v3( uniform1(xorshift32_state) ); //userFunctions:: + // technically we could also use the erfinv() function fron cuda, it would require compiling with -cuda though ... + // the study showed the gap in perf for BC thermal was not worth the added depend + if( xorshift32(xorshift32_state) & 1 ) { // rand->cointoss() + a *= -1.; + } + return a; +} +//*/ + + + + void internal_inf( Species *species, int imin, int imax, int direction, double limit_inf, double dt, std::vector &invgf, Random * rand, double &energy_change ); void internal_sup( Species *species, int imin, int imax, int direction, double limit_sup, double dt, std::vector &invgf, Random * rand, double &energy_change ); diff --git a/src/Tools/userFunctions.cpp b/src/Tools/userFunctions.cpp index 2e540b322..9cea0703e 100755 --- a/src/Tools/userFunctions.cpp +++ b/src/Tools/userFunctions.cpp @@ -4,6 +4,8 @@ #include "Params.h" + +// This function is to be deleted after complete validation of the transition on erfinv3 //! inverse error function is taken from NIST double userFunctions::erfinv( double x ) { @@ -68,6 +70,97 @@ double userFunctions::erfinv2( double x ) return p*x; } +/** + * copied from erfinv_DP_1.cu by Prof. Mike Giles. + * https://people.maths.ox.ac.uk/gilesm/ + * https://people.maths.ox.ac.uk/gilesm/codes/erfinv/ + * + * Original code is written for CUDA. + * Mutsuo Saito modified original code for C++. + */ +double userFunctions::erfinv_v3(double x) +{ + double w, p; + double sign; + if (x > 0) { + sign = 1.0; + } else { + sign = -1.0; + x = abs(x); + } + w = - log((1.0-x)*(1.0+x)); + + if ( w < 6.250000 ) { + w = w - 3.125000; + p = -3.6444120640178196996e-21; + p = -1.685059138182016589e-19 + p*w; + p = 1.2858480715256400167e-18 + p*w; + p = 1.115787767802518096e-17 + p*w; + p = -1.333171662854620906e-16 + p*w; + p = 2.0972767875968561637e-17 + p*w; + p = 6.6376381343583238325e-15 + p*w; + p = -4.0545662729752068639e-14 + p*w; + p = -8.1519341976054721522e-14 + p*w; + p = 2.6335093153082322977e-12 + p*w; + p = -1.2975133253453532498e-11 + p*w; + p = -5.4154120542946279317e-11 + p*w; + p = 1.051212273321532285e-09 + p*w; + p = -4.1126339803469836976e-09 + p*w; + p = -2.9070369957882005086e-08 + p*w; + p = 4.2347877827932403518e-07 + p*w; + p = -1.3654692000834678645e-06 + p*w; + p = -1.3882523362786468719e-05 + p*w; + p = 0.0001867342080340571352 + p*w; + p = -0.00074070253416626697512 + p*w; + p = -0.0060336708714301490533 + p*w; + p = 0.24015818242558961693 + p*w; + p = 1.6536545626831027356 + p*w; + } + else if ( w < 16.000000 ) { + w = sqrt(w) - 3.250000; + p = 2.2137376921775787049e-09; + p = 9.0756561938885390979e-08 + p*w; + p = -2.7517406297064545428e-07 + p*w; + p = 1.8239629214389227755e-08 + p*w; + p = 1.5027403968909827627e-06 + p*w; + p = -4.013867526981545969e-06 + p*w; + p = 2.9234449089955446044e-06 + p*w; + p = 1.2475304481671778723e-05 + p*w; + p = -4.7318229009055733981e-05 + p*w; + p = 6.8284851459573175448e-05 + p*w; + p = 2.4031110387097893999e-05 + p*w; + p = -0.0003550375203628474796 + p*w; + p = 0.00095328937973738049703 + p*w; + p = -0.0016882755560235047313 + p*w; + p = 0.0024914420961078508066 + p*w; + p = -0.0037512085075692412107 + p*w; + p = 0.005370914553590063617 + p*w; + p = 1.0052589676941592334 + p*w; + p = 3.0838856104922207635 + p*w; + } + else { + w = sqrt(w) - 5.000000; + p = -2.7109920616438573243e-11; + p = -2.5556418169965252055e-10 + p*w; + p = 1.5076572693500548083e-09 + p*w; + p = -3.7894654401267369937e-09 + p*w; + p = 7.6157012080783393804e-09 + p*w; + p = -1.4960026627149240478e-08 + p*w; + p = 2.9147953450901080826e-08 + p*w; + p = -6.7711997758452339498e-08 + p*w; + p = 2.2900482228026654717e-07 + p*w; + p = -9.9298272942317002539e-07 + p*w; + p = 4.5260625972231537039e-06 + p*w; + p = -1.9681778105531670567e-05 + p*w; + p = 7.5995277030017761139e-05 + p*w; + p = -0.00021503011930044477347 + p*w; + p = -0.00013871931833623122026 + p*w; + p = 1.0103004648645343977 + p*w; + p = 4.8499064014085844221 + p*w; + } + return sign * p * x; +} + // ---------------------------------------------------------------------------- //! \brief Distribute equally the load into chunk of an array //! and return the number of elements for the specified chunk number. diff --git a/src/Tools/userFunctions.h b/src/Tools/userFunctions.h index d9525723d..fd83f89d8 100755 --- a/src/Tools/userFunctions.h +++ b/src/Tools/userFunctions.h @@ -13,6 +13,7 @@ class userFunctions static double erfinv( double x ); static double erfinv2( double x ); + static inline double erfinv_v3( double x ); //! Load repartition in 1d between MPI processes static void distributeArray( int rank, diff --git a/validation/analyses/gpu_validate_tst1d_03_thermal_expansion.py b/validation/analyses/gpu_validate_tst1d_03_thermal_expansion.py new file mode 100755 index 000000000..767d21ad4 --- /dev/null +++ b/validation/analyses/gpu_validate_tst1d_03_thermal_expansion.py @@ -0,0 +1,26 @@ +import os, re, numpy as np +import happi +from scipy.signal import butter, filtfilt +b, a = butter(5, 0.2, btype='low', analog=False) + + +#S = happi.Open("./restart*", verbose=False) + +S = happi.Open(verbose=False) + +eon_spectrum = S.ParticleBinning.Diag2().get() +ekin = eon_spectrum["ekin"] +eon_spectrum = np.mean(eon_spectrum["data"], axis=0) +eon_spectrum_filt = filtfilt(b, a, eon_spectrum) +# # theory +# Te = S.namelist.Species["eon"].temperature[0] +# factor = S.namelist.Species["eon"].number_density.xplateau / S.namelist.Main.grid_length[0] +# theoretical_spectrum = factor*2./Te * (ekin/np.pi/Te)**0.5 * np.exp(-ekin/Te) +# plt.plot(ekin, eon_spectrum_filt, '.-') +# plt.plot(ekin, theoretical_spectrum, '-') +Validate("Electron spectrum", eon_spectrum_filt, 20. ) + + +rho = S.Field.Field0.Rho_ion(timesteps=11800).getData()[0] +rho_filt = filtfilt(b, a, rho) +Validate("Final ion profile", rho_filt[::10], 0.15) From 71ee7e03bf2ce5a1ee117d7c3fd335c0a90c3335 Mon Sep 17 00:00:00 2001 From: "charles.prouveur" Date: Wed, 6 Nov 2024 21:14:54 +0100 Subject: [PATCH 2/8] CPU fix for thermal BC, started removing more pow() calls after making sure CPU performance was not impacted / increased --- src/ParticleBC/BoundaryConditionType.cpp | 27 ++++++++----- src/Species/SpeciesMetrics.cpp | 49 +++++++----------------- src/Species/SpeciesV.cpp | 6 +-- 3 files changed, 34 insertions(+), 48 deletions(-) diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp index ea117ae7e..3e1fb3333 100755 --- a/src/ParticleBC/BoundaryConditionType.cpp +++ b/src/ParticleBC/BoundaryConditionType.cpp @@ -540,13 +540,15 @@ void thermalize_particle_inf( Species *species, int imin, int imax, int directio #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target is_device_ptr( position, momentum, momentumRefl_2D, momentumRefl_3D, momentum_x, momentum_y, momentum_z, weight ) map( tofrom : change_in_energy ) - #pragma omp teams distribute thread_limit(32) parallel for reduction( + : change_in_energy ) + #pragma omp teams distribute thread_limit(32) reduction( + : change_in_energy ) #elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel loop gang vector_length(32) reduction(+ : change_in_energy) independent deviceptr(position, momentum, momentumRefl_2D, momentumRefl_3D,momentum_x,momentum_y,momentum_z,weight) #else #pragma omp simd reduction(+ : change_in_energy) + for (int ipart = imin ; ipart < imax ; ++ipart ) { #endif - for (int ichunk = imin/32 ; ichunk < imax/32 ; ichunk++ ) { +#if defined( SMILEI_ACCELERATOR_GPU) + for (int ichunk = imin/32 ; ichunk < imax/32 ; ++ichunk ) { #if defined( SMILEI_ACCELERATOR_GPU ) uint32_t xorshift32_state_local = xorshift32_state + ichunk; @@ -567,10 +569,11 @@ void thermalize_particle_inf( Species *species, int imin, int imax, int directio #if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc loop vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) - #pragma omp simd + #pragma omp parallel for #endif for( int i = istart; i < iend ; ++i ){ int ipart = ichunk * 32 + i; +#endif if ( position[ ipart ] < limit_inf) { // checking the particle's velocity compared to the thermal one double p2 = momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart]; @@ -630,7 +633,7 @@ void thermalize_particle_inf( Species *species, int imin, int imax, int directio // energy lost during thermalization LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); - energy_change += weight[ ipart ] * ( initial_energy - LorentzFactor + 1.0 ); + change_in_energy += weight[ ipart ] * ( initial_energy - LorentzFactor + 1.0 ); // HERE IS AN ATTEMPT TO INTRODUCE A SPACE DEPENDENCE ON THE BCs @@ -647,7 +650,9 @@ void thermalize_particle_inf( Species *species, int imin, int imax, int directio } } +#if defined( SMILEI_ACCELERATOR_GPU ) } +#endif energy_change = change_in_energy; #if defined( SMILEI_ACCELERATOR_GPU ) xorshift32_state += 32; @@ -699,13 +704,15 @@ void thermalize_particle_sup( Species *species, int imin, int imax, int directio } #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target is_device_ptr( position, momentum, momentumRefl_2D, momentumRefl_3D, momentum_x, momentum_y, momentum_z, weight ) map( tofrom : change_in_energy ) - #pragma omp teams distribute thread_limit(32) parallel for reduction( + : change_in_energy ) + #pragma omp teams distribute thread_limit(32) reduction( + : change_in_energy ) #elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel loop gang vector_length(32) reduction(+ : change_in_energy) independent deviceptr(position, momentum, momentumRefl_2D, momentumRefl_3D,momentum_x,momentum_y,momentum_z,weight) #else #pragma omp simd reduction(+ : change_in_energy) + for (int ipart = imin ; ipart < imax ; ++ipart ) { #endif - for (int ichunk = imin/32 ; ichunk < imax/32 ; ichunk++ ) { +#if defined( SMILEI_ACCELERATOR_GPU) + for (int ichunk = imin/32 ; ichunk < imax/32 ; ++ichunk ) { #if defined( SMILEI_ACCELERATOR_GPU ) uint32_t xorshift32_state_local = xorshift32_state + ichunk; @@ -725,11 +732,11 @@ void thermalize_particle_sup( Species *species, int imin, int imax, int directio #if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc loop vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) - #pragma omp simd + #pragma omp parallel for #endif for( int i = istart; i < iend ; ++i ){ int ipart = ichunk * 32 + i; - +#endif if ( position[ ipart ] >= limit_sup) { // checking the particle's velocity compared to the thermal one double p2 = momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart]; @@ -794,7 +801,7 @@ void thermalize_particle_sup( Species *species, int imin, int imax, int directio // energy lost during thermalization LorentzFactor = sqrt( 1. + momentum_x[ipart] * momentum_x[ipart] + momentum_y[ipart] * momentum_y[ipart] + momentum_z[ipart] * momentum_z[ipart] ); - energy_change += weight[ ipart ] * ( initial_energy - LorentzFactor + 1.0 ); + change_in_energy += weight[ ipart ] * ( initial_energy - LorentzFactor + 1.0 ); /* HERE IS AN ATTEMPT TO INTRODUCE A SPACE DEPENDENCE ON THE BCs // double val_min(params.dens_profile.vacuum_length[1]), val_max(params.dens_profile.vacuum_length[1]+params.dens_profile.length_params_y[0]); @@ -810,7 +817,9 @@ void thermalize_particle_sup( Species *species, int imin, int imax, int directio */ } } +#if defined( SMILEI_ACCELERATOR_GPU ) } +#endif energy_change = change_in_energy; #if defined( SMILEI_ACCELERATOR_GPU ) xorshift32_state += 32; diff --git a/src/Species/SpeciesMetrics.cpp b/src/Species/SpeciesMetrics.cpp index 0cb9c5332..f9c531e58 100755 --- a/src/Species/SpeciesMetrics.cpp +++ b/src/Species/SpeciesMetrics.cpp @@ -113,11 +113,8 @@ void SpeciesMetrics::get_computation_time( const std::vector &count, /*#pragma omp declare simd double SpeciesMetrics::get_particle_computation_time_vectorization(const double log_particle_number) { - return -7.983397022180499e-05 * pow(log_particle_number,4) - -1.220834603123080e-02 * pow(log_particle_number,3) -+ 2.262009704511124e-01 * pow(log_particle_number,2) - -1.346529777726451e+00 * log_particle_number -+ 3.053068997965275e+00; + return 3.053068997965275e+00 + log_particle_number * (-1.346529777726451e+00 + log_particle_number * (2.262009704511124e-01 + + log_particle_number * ( -1.220834603123080e-02 - 7.983397022180499e-05 * log_particle_number ) ) ) ; };*/ //! Evaluate the time necessary to compute `particle_number` particles @@ -127,46 +124,28 @@ float SpeciesMetrics::get_particle_computation_time_vectorization( const float l { // Cascade lake 6248 (Ex: Jean Zay) #if defined __INTEL_CASCADELAKE_6248 - return -3.878426186471072e-03 * pow(log_particle_number,4) - + 3.143999691029673e-02 * pow(log_particle_number,3) - + 6.520005335065826e-02 * pow(log_particle_number,2) - -1.103410559576951e+00 * log_particle_number - + 2.851575999756124e+00; + return 2.851575999756124e+00 + log_particle_number * ( -1.103410559576951e+00 + log_particle_number * ( 6.520005335065826e-02 + + log_particle_number * ( 3.143999691029673e-02 - 3.878426186471072e-03 * log_particle_number ) ) ) ; // Skylake 8168 (Ex: Irene Joliot-Curie) #elif defined __INTEL_SKYLAKE_8168 - return -5.500324176161280e-03 * pow( log_particle_number, 4 ) - + 5.302690106220765e-02 * pow( log_particle_number, 3 ) - -2.390999177899332e-02 * pow( log_particle_number, 2 ) - -1.018178658950980e+00 * log_particle_number - + 2.873965603217334e+00; + return 2.873965603217334e+00 + log_particle_number * ( -1.018178658950980e+00 + log_particle_number * ( -2.390999177899332e-02 + + log_particle_number * ( 5.302690106220765e-02 - 5.500324176161280e-03 * log_particle_number ) ) ) ; // Knight Landings Intel Xeon Phi 7250 (Ex: Frioul) #elif defined __INTEL_KNL_7250 - return + 9.287025545185804e-03 * pow( log_particle_number, 4 ) - -1.252595460426959e-01 * pow( log_particle_number, 3 ) - + 6.609030611761257e-01 * pow( log_particle_number, 2 ) - -1.948861281215199e+00 * log_particle_number - + 3.391615458521049e+00; + return 3.391615458521049e+00 + log_particle_number * ( -1.948861281215199e+00 + log_particle_number * ( 6.609030611761257e-01 + + log_particle_number * ( -1.252595460426959e-01 + 9.287025545185804e-03 * log_particle_number ) ) ) ; // Broadwell Intel Xeon E5-2697 v4 (Ex: Tornado) #elif defined __INTEL_BDW_E5_2697_V4 - return -4.732086199743545e-03 * pow( log_particle_number, 4 ) - + 3.249709067117774e-02 * pow( log_particle_number, 3 ) - + 1.940828611778672e-01 * pow( log_particle_number, 2 ) - -2.010116307618810e+00 * log_particle_number - + 4.661824411143119e+00; + return 4.661824411143119e+00 + log_particle_number * ( -2.010116307618810e+00 + log_particle_number * (1.940828611778672e-01 + + log_particle_number * ( 3.249709067117774e-02 - 4.732086199743545e-03 * log_particle_number ) ) ) ; // Haswell Intel Xeon E5-2680 v3 (Ex: Jureca) #elif defined __INTEL_HSW_E5_2680_v3 - return -4.127980207551420e-03 * pow( log_particle_number, 4 ) - + 3.688297004269906e-02 * pow( log_particle_number, 3 ) - + 3.666171703120181e-02 * pow( log_particle_number, 2 ) - -1.066920754145127e+00 * log_particle_number - + 2.893485213852858e+00; + return 2.893485213852858e+00 + log_particle_number * ( -1.066920754145127e+00 + log_particle_number * ( 3.666171703120181e-02 + + log_particle_number * ( 3.688297004269906e-02 - 4.127980207551420e-03 * log_particle_number ) ) ) ; // General fit #else - return -1.760649180606238e-03 * pow(log_particle_number,4) - + 8.410553824987992e-03 * pow(log_particle_number,3) - + 1.447576003168199e-01 * pow(log_particle_number,2) - -1.192593070397785e+00 * log_particle_number - + 2.855507642982689e+00; + return 2.855507642982689e+00 + log_particle_number * ( -1.192593070397785e+00 + log_particle_number * ( 1.447576003168199e-01 + + log_particle_number * ( 8.410553824987992e-03 - 1.760649180606238e-03 * log_particle_number ) ) ) ; #endif }; diff --git a/src/Species/SpeciesV.cpp b/src/Species/SpeciesV.cpp index 4a4199b63..cb173963d 100755 --- a/src/Species/SpeciesV.cpp +++ b/src/Species/SpeciesV.cpp @@ -1758,7 +1758,7 @@ void SpeciesV::mergeParticles( double time_dual ) // for (unsigned int ip = 0; ip < (unsigned int)(particles->last_index.back()) ; ip++) { // weight_before += particles->weight(ip); - // energy_before += sqrt(1 + pow(particles->momentum(0,ip),2) + pow(particles->momentum(1,ip),2) + pow(particles->momentum(2,ip),2)); + // energy_before += sqrt(1 + particles->momentum(0,ip) * particles->momentum(0,ip) + particles->momentum(1,ip) * particles->momentum(1,ip) + particles->momentum(2,ip) * particles->momentum(2,ip)); // } // For each cell, we apply independently the merging process @@ -1787,9 +1787,7 @@ void SpeciesV::mergeParticles( double time_dual ) // for (unsigned int ip = 0; ip < (unsigned int)(particles->last_index.back()) ; ip++) { // weight_after += particles->weight(ip); - // energy_after += sqrt(1 + pow(particles->momentum(0,ip),2) - // + pow(particles->momentum(1,ip),2) - // + pow(particles->momentum(2,ip),2)); + // energy_after += sqrt(1 + particles->momentum(0,ip) * particles->momentum(0,ip) + particles->momentum(1,ip) * particles->momentum(1,ip) + particles->momentum(2,ip) * particles->momentum(2,ip)); // } // // if (weight_before != weight_after) { From c177cbf42ee6d93661095a02a089af80dd3835a8 Mon Sep 17 00:00:00 2001 From: "charles.prouveur" Date: Fri, 8 Nov 2024 01:28:50 +0100 Subject: [PATCH 3/8] removed most pow() possible in smilei --- src/Collisions/BinaryProcesses.cpp | 6 +- src/Collisions/CollisionalIonization.cpp | 2 +- src/Collisions/Collisions.cpp | 2 +- src/Diagnostic/DiagnosticScreen.cpp | 25 +- src/Diagnostic/Histogram.h | 254 +++++++++--------- .../MF_Solver2D_Bouchard.cpp | 6 +- src/ElectroMagnSolver/MF_Solver2D_Lehe.cpp | 6 +- .../MF_Solver3D_Bouchard.cpp | 6 +- src/ElectroMagnSolver/MF_Solver3D_Lehe.cpp | 8 +- src/ElectroMagnSolver/MF_SolverAM_Lehe.cpp | 6 +- .../PML_Solver2D_Bouchard.cpp | 6 +- .../PML_Solver2D_Envelope.cpp | 68 ++--- .../PML_Solver3D_Bouchard.cpp | 6 +- .../PML_Solver3D_Envelope.cpp | 124 ++++----- .../PML_SolverAM_Envelope.cpp | 116 ++++---- ...PML_SolverAM_EnvelopeReducedDispersion.cpp | 96 +++---- src/Interpolator/Interpolator1D2Order.cpp | 2 +- src/Interpolator/Interpolator1D2Order.h | 2 +- src/Ionization/IonizationTunnel.cpp | 14 +- .../IonizationTunnelEnvelopeAveraged.cpp | 19 +- .../MultiphotonBreitWheeler.cpp | 6 +- .../MultiphotonBreitWheeler.h | 8 +- .../MultiphotonBreitWheelerTables.cpp | 2 +- src/Params/Params.cpp | 2 +- src/ParticleBC/BoundaryConditionType.h | 11 - src/Particles/ParticleCreator.cpp | 6 +- src/Particles/Particles.h | 4 +- src/Patch/Patch.cpp | 10 +- src/Pusher/PusherHigueraCary.cpp | 3 +- src/Radiation/Radiation.cpp | 2 +- src/Radiation/Radiation.h | 8 +- .../RadiationDiagRadiationSpectrum.cpp | 2 +- src/Radiation/RadiationTables.h | 5 +- src/Radiation/RadiationTools.h | 28 +- src/Tools/tabulatedFunctions.cpp | 2 +- 35 files changed, 433 insertions(+), 440 deletions(-) diff --git a/src/Collisions/BinaryProcesses.cpp b/src/Collisions/BinaryProcesses.cpp index 107d12c8f..344ff05ba 100644 --- a/src/Collisions/BinaryProcesses.cpp +++ b/src/Collisions/BinaryProcesses.cpp @@ -162,7 +162,7 @@ void BinaryProcesses::calculate_debye_length( Params ¶ms, Patch *patch ) // compute debye length squared in code units patch->debye_length_squared[ibin] = 1./inv_D2; // apply lower limit to the debye length (minimum interatomic distance) - double rmin2 = pow( coeff*density_max, -2./3. ); + double rmin2 = 1.0 / cbrt( coeff*density_max * coeff*density_max ) ; if( patch->debye_length_squared[ibin] < rmin2 ) { patch->debye_length_squared[ibin] = rmin2; } @@ -292,8 +292,8 @@ void BinaryProcesses::apply( Params ¶ms, Patch *patch, int itime, vectormomentum( 0, ie ) *= pr; pe->momentum( 1, ie ) *= pr; pe->momentum( 2, ie ) *= pr; diff --git a/src/Collisions/Collisions.cpp b/src/Collisions/Collisions.cpp index f38b1b414..41618959b 100755 --- a/src/Collisions/Collisions.cpp +++ b/src/Collisions/Collisions.cpp @@ -22,7 +22,7 @@ Collisions::Collisions( coeff1_ = 4.046650232e-21*params.reference_angular_frequency_SI; // h*omega/(2*me*c^2) coeff2_ = 2.817940327e-15*params.reference_angular_frequency_SI/299792458.; // re omega / c coeff3_ = coeff2_ * coulomb_log_factor_; - coeff4_ = pow( 3.*coeff2_, -1./3. ); + coeff4_ = 1.0 / cbrt( 3.*coeff2_); } diff --git a/src/Diagnostic/DiagnosticScreen.cpp b/src/Diagnostic/DiagnosticScreen.cpp index 92b95e1ac..f98b2fd64 100755 --- a/src/Diagnostic/DiagnosticScreen.cpp +++ b/src/Diagnostic/DiagnosticScreen.cpp @@ -75,7 +75,7 @@ DiagnosticScreen::DiagnosticScreen( if( params.nDim_particle > 1 ) { screen_vector_a[0] = -screen_unitvector[1]; screen_vector_a[1] = screen_unitvector[0]; - double norm = sqrt( pow( screen_vector_a[0], 2 ) + pow( screen_vector_a[1], 2 ) ); + double norm = sqrt( screen_vector_a[0] * screen_vector_a[0] + screen_vector_a[1] * screen_vector_a[1] ); if( norm < 1.e-8 ) { screen_vector_a[0] = 0.; screen_vector_a[1] = 1.; @@ -132,7 +132,7 @@ DiagnosticScreen::DiagnosticScreen( ERROR( errorPrefix << ": axis `theta` not available for `" << screen_shape << "` screen" ); } for( idim=0; idimcenter_[idim] - screen_point[idim], 2 ); + distance_to_center += ( patch->center_[idim] - screen_point[idim] ) * ( patch->center_[idim] - screen_point[idim] ); } distance_to_center = sqrt( distance_to_center ); if( abs( screen_vectornorm - distance_to_center ) > patch->radius ) { @@ -196,10 +196,10 @@ void DiagnosticScreen::run( Patch *patch, int, SimWindow *simWindow ) } else if( screen_type == 2 ) { // cylinder double distance_to_axis = 0.; for( unsigned int idim=0; idimcenter_[(idim+1)%ndim] - screen_point[(idim+1)%ndim] ) * screen_unitvector[(idim+2)%ndim] - -( patch->center_[(idim+2)%ndim] - screen_point[(idim+2)%ndim] ) * screen_unitvector[(idim+1)%ndim] - , 2 ); + + distance_to_axis += ( ( patch->center_[(idim+1)%ndim] - screen_point[(idim+1)%ndim] ) * screen_unitvector[(idim+2)%ndim] + -( patch->center_[(idim+2)%ndim] - screen_point[(idim+2)%ndim] ) * screen_unitvector[(idim+1)%ndim] ) * ( ( patch->center_[(idim+1)%ndim] - screen_point[(idim+1)%ndim] ) * screen_unitvector[(idim+2)%ndim] + -( patch->center_[(idim+2)%ndim] - screen_point[(idim+2)%ndim] ) * screen_unitvector[(idim+1)%ndim] ); } distance_to_axis = sqrt( distance_to_axis ); if( abs( screen_vectornorm - distance_to_axis ) > patch->radius ) { @@ -260,8 +260,9 @@ void DiagnosticScreen::run( Patch *patch, int, SimWindow *simWindow ) double side_old = 0.; double dtg = dt / s->particles->LorentzFactor( ipart ); for( unsigned int idim=0; idimparticles->Position[idim][ipart] - screen_point[idim], 2 ); - side_old += pow( s->particles->Position[idim][ipart] - dtg*( s->particles->Momentum[idim][ipart] ) - screen_point[idim], 2 ); + side += ( s->particles->Position[idim][ipart] - screen_point[idim] ) * ( s->particles->Position[idim][ipart] - screen_point[idim] ); + side_old += ( s->particles->Position[idim][ipart] - dtg*( s->particles->Momentum[idim][ipart] ) - screen_point[idim] ) * + ( s->particles->Position[idim][ipart] - dtg*( s->particles->Momentum[idim][ipart] ) - screen_point[idim] ) ; } side = screen_vectornorm-sqrt( side ); side_old = screen_vectornorm-sqrt( side_old ); @@ -284,10 +285,12 @@ void DiagnosticScreen::run( Patch *patch, int, SimWindow *simWindow ) for( unsigned int idim=0; idimparticles->Position[(idim+1)%ndim][ipart] - screen_point[(idim+1)%ndim]; double u2 = s->particles->Position[(idim+2)%ndim][ipart] - screen_point[(idim+2)%ndim]; - side += pow( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim], 2 ); + side += ( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] ) * + ( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] ); u1 -= dtg * s->particles->Momentum[(idim+1)%ndim][ipart]; u2 -= dtg * s->particles->Momentum[(idim+1)%ndim][ipart]; - side_old += pow( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim], 2 ); + side_old += ( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] ) * + ( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] ); } side = r2 - side; side_old = r2 - side_old; diff --git a/src/Diagnostic/Histogram.h b/src/Diagnostic/Histogram.h index 9ab9cbab3..71c50f4ff 100755 --- a/src/Diagnostic/Histogram.h +++ b/src/Diagnostic/Histogram.h @@ -318,9 +318,9 @@ class HistogramAxis_p : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = s->mass_ * sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = s->mass_ * sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -329,9 +329,9 @@ class HistogramAxis_p : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -347,9 +347,9 @@ class HistogramAxis_gamma : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -358,9 +358,9 @@ class HistogramAxis_gamma : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -376,9 +376,9 @@ class HistogramAxis_ekin : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = s->mass_ * ( sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ) - 1. ); + array[ipart] = s->mass_ * ( sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) - 1. ); } } // Photons @@ -387,9 +387,9 @@ class HistogramAxis_ekin : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -406,9 +406,9 @@ class HistogramAxis_vx : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[0][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -418,9 +418,9 @@ class HistogramAxis_vx : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[0][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -437,9 +437,9 @@ class HistogramAxis_vy : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[1][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -449,9 +449,9 @@ class HistogramAxis_vy : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[1][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -468,9 +468,9 @@ class HistogramAxis_vz : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[2][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -480,9 +480,9 @@ class HistogramAxis_vz : public HistogramAxis continue; } array[ipart] = s->particles->Momentum[2][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -496,9 +496,9 @@ class HistogramAxis_v : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = pow( 1. + 1./( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ), -0.5 ); + array[ipart] = 1.0 / sqrt( 1. + 1./( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) ); } }; }; @@ -513,11 +513,11 @@ class HistogramAxis_vperp2 : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = ( pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) - ) / ( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = ( s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] + ) / ( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -526,11 +526,11 @@ class HistogramAxis_vperp2 : public HistogramAxis if( index[ipart]<0 ) { continue; } - array[ipart] = ( pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) - ) / ( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + array[ipart] = ( s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] + ) / ( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -647,9 +647,9 @@ class Histogram_jx : public Histogram } array[ipart] = s->particles->Weight[ipart] * ( double )( s->particles->Charge[ipart] ) * s->particles->Momentum[0][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -660,9 +660,9 @@ class Histogram_jx : public Histogram } array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -681,9 +681,9 @@ class Histogram_jy : public Histogram } array[ipart] = s->particles->Weight[ipart] * ( double )( s->particles->Charge[ipart] ) * s->particles->Momentum[1][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -694,9 +694,9 @@ class Histogram_jy : public Histogram } array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[1][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -715,9 +715,9 @@ class Histogram_jz : public Histogram } array[ipart] = s->particles->Weight[ipart] * ( double )( s->particles->Charge[ipart] ) * s->particles->Momentum[2][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -728,9 +728,9 @@ class Histogram_jz : public Histogram } array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[2][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -748,9 +748,9 @@ class Histogram_ekin : public Histogram continue; } array[ipart] = s->mass_ * s->particles->Weight[ipart] - * ( sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ) - 1. ); + * ( sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) - 1. ); } } // Photons @@ -760,9 +760,9 @@ class Histogram_ekin : public Histogram continue; } array[ipart] = s->particles->Weight[ipart] - * ( sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ) ); + * ( sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) ); } } }; @@ -808,9 +808,9 @@ class Histogram_p : public Histogram continue; } array[ipart] = s->mass_ * s->particles->Weight[ipart] - * sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -820,9 +820,9 @@ class Histogram_p : public Histogram continue; } array[ipart] = s->particles->Weight[ipart] - * sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -918,10 +918,10 @@ class Histogram_pressure_xx : public Histogram continue; } array[ipart] = s->mass_ * s->particles->Weight[ipart] - * pow( s->particles->Momentum[0][ipart], 2 ) - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] ) + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -931,10 +931,10 @@ class Histogram_pressure_xx : public Histogram continue; } array[ipart] = s->particles->Weight[ipart] - * pow( s->particles->Momentum[0][ipart], 2 ) - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] ) + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -952,10 +952,10 @@ class Histogram_pressure_yy : public Histogram continue; } array[ipart] = s->mass_ * s->particles->Weight[ipart] - * pow( s->particles->Momentum[1][ipart], 2 ) - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] ) + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -965,10 +965,10 @@ class Histogram_pressure_yy : public Histogram continue; } array[ipart] = s->particles->Weight[ipart] - * pow( s->particles->Momentum[1][ipart], 2 ) - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] ) + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -986,10 +986,10 @@ class Histogram_pressure_zz : public Histogram continue; } array[ipart] = s->mass_ * s->particles->Weight[ipart] - * pow( s->particles->Momentum[2][ipart], 2 ) - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -999,10 +999,10 @@ class Histogram_pressure_zz : public Histogram continue; } array[ipart] = s->particles->Weight[ipart] - * pow( s->particles->Momentum[2][ipart], 2 ) - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + * ( s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -1022,9 +1022,9 @@ class Histogram_pressure_xy : public Histogram array[ipart] = s->mass_ * s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] * s->particles->Momentum[1][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -1036,9 +1036,9 @@ class Histogram_pressure_xy : public Histogram array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] * s->particles->Momentum[1][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -1058,9 +1058,9 @@ class Histogram_pressure_xz : public Histogram array[ipart] = s->mass_ * s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] * s->particles->Momentum[2][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -1072,9 +1072,9 @@ class Histogram_pressure_xz : public Histogram array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] * s->particles->Momentum[2][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -1094,9 +1094,9 @@ class Histogram_pressure_yz : public Histogram array[ipart] = s->mass_ * s->particles->Weight[ipart] * s->particles->Momentum[1][ipart] * s->particles->Momentum[2][ipart] - / sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } // Photons @@ -1108,9 +1108,9 @@ class Histogram_pressure_yz : public Histogram array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[1][ipart] * s->particles->Momentum[2][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; @@ -1129,9 +1129,9 @@ class Histogram_ekin_vx : public Histogram } array[ipart] = s->mass_ * s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] - * ( 1. - 1./sqrt( 1. + pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ) ); + * ( 1. - 1./sqrt( 1. + s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ) ); } } // Photons @@ -1142,9 +1142,9 @@ class Histogram_ekin_vx : public Histogram } array[ipart] = s->particles->Weight[ipart] * s->particles->Momentum[0][ipart] - / sqrt( pow( s->particles->Momentum[0][ipart], 2 ) - + pow( s->particles->Momentum[1][ipart], 2 ) - + pow( s->particles->Momentum[2][ipart], 2 ) ); + / sqrt( s->particles->Momentum[0][ipart] * s->particles->Momentum[0][ipart] + + s->particles->Momentum[1][ipart] * s->particles->Momentum[1][ipart] + + s->particles->Momentum[2][ipart] * s->particles->Momentum[2][ipart] ); } } }; diff --git a/src/ElectroMagnSolver/MF_Solver2D_Bouchard.cpp b/src/ElectroMagnSolver/MF_Solver2D_Bouchard.cpp index a0e9b876b..eff43788a 100755 --- a/src/ElectroMagnSolver/MF_Solver2D_Bouchard.cpp +++ b/src/ElectroMagnSolver/MF_Solver2D_Bouchard.cpp @@ -26,9 +26,9 @@ MF_Solver2D_Bouchard::MF_Solver2D_Bouchard(Params ¶ms) // On the axes v_phi^max = 1.01c and is below c @ 0.54 kxdx/pi // So there could existe a numerical cherenkov emission at this point // On the diagonal v_phi^max = 1.01c and is below c @ 0.85 sqrt((kxdx)^2+(kydy)^2) - double delta = 0.1222*(1-pow(2.,2))/4. ; - double beta = -0.1727*(1-0.5*pow(2.,2)-4.*delta)/4. ; - double alpha = 1-2.*beta-3.*delta; + double delta = -0.09165000000000001;//0.1222*(1-pow (2.,2))/4. ; + double beta = 0.0273470450;//-0.1727*(1-0.5*pow (2.,2)-4.*delta)/4. ; + double alpha = 1.22025591; //1-2.*beta-3.*delta; beta_xy = beta; beta_yx = beta; diff --git a/src/ElectroMagnSolver/MF_Solver2D_Lehe.cpp b/src/ElectroMagnSolver/MF_Solver2D_Lehe.cpp index 049fcaaea..8c92e6ec1 100755 --- a/src/ElectroMagnSolver/MF_Solver2D_Lehe.cpp +++ b/src/ElectroMagnSolver/MF_Solver2D_Lehe.cpp @@ -13,9 +13,9 @@ MF_Solver2D_Lehe::MF_Solver2D_Lehe( Params ¶ms ) dx = params.cell_length[0]; dy = params.cell_length[1]; - beta_yx = 1./8.; - beta_xy = pow( dx/dy, 2 )/8.; - delta_x = ( 1./4. )*( 1.-pow( sin( M_PI*dt_ov_dx/2. )/dt_ov_dx, 2 ) ); + beta_yx = 0.125; + beta_xy = dx*dx/(dy*dy)*0.125; + delta_x = 0.25*( 1.-( sin( M_PI*dt_ov_dx*0.5 )/dt_ov_dx) * ( sin( M_PI*dt_ov_dx*0.5 )/dt_ov_dx)); alpha_y = 1.-2.*beta_yx; alpha_x = 1.-2.*beta_xy-3.*delta_x; diff --git a/src/ElectroMagnSolver/MF_Solver3D_Bouchard.cpp b/src/ElectroMagnSolver/MF_Solver3D_Bouchard.cpp index 6fca97804..20143690f 100755 --- a/src/ElectroMagnSolver/MF_Solver3D_Bouchard.cpp +++ b/src/ElectroMagnSolver/MF_Solver3D_Bouchard.cpp @@ -29,9 +29,9 @@ MF_Solver3D_Bouchard::MF_Solver3D_Bouchard( Params ¶ms ) WARNING( "Bouchard solver requires dx/dt = 2 (Magical Timestep)" ); } - double delta = 0.1222*(1-pow(2.,2))/4. ; - double beta = -0.1727*(1-0.5*pow(2.,2)-4.*delta)/4. ; - double alpha = 1-4.*beta-3.*delta ; + double delta = -0.0916500000000000;//0.1222*(1-pow (2.,2))/4. ; + double beta = 0.027347044999999997;//-0.1727*(1-0.5*pow (2.,2)-4.*delta)/4. ; + double alpha = 1.16556182; //1-4.*beta-3.*delta ; delta_x = delta ; delta_y = delta ; diff --git a/src/ElectroMagnSolver/MF_Solver3D_Lehe.cpp b/src/ElectroMagnSolver/MF_Solver3D_Lehe.cpp index 9806a6bf9..558248d01 100755 --- a/src/ElectroMagnSolver/MF_Solver3D_Lehe.cpp +++ b/src/ElectroMagnSolver/MF_Solver3D_Lehe.cpp @@ -14,10 +14,10 @@ MF_Solver3D_Lehe::MF_Solver3D_Lehe( Params ¶ms ) dy = params.cell_length[1]; dz = params.cell_length[2]; - beta_yx = 1./8.; // = beta_zx as well but we define and use only 1 variable - beta_xy = pow( dx/dy, 2 )/8.; - beta_xz = pow( dx/dz, 2 )/8.; - delta_x = ( 1./4. )*( 1.-pow( sin( M_PI*dt_ov_dx/2. )/dt_ov_dx, 2 ) ); + beta_yx = 0.125; // = beta_zx as well but we define and use only 1 variable + beta_xy = dx*dx/(dy*dy)*0.125; + beta_xz = dx*dx/(dz*dz)*0.125; + delta_x = 0.25*( 1.-( sin( M_PI*dt_ov_dx*0.5 )/dt_ov_dx ) * ( sin( M_PI*dt_ov_dx*0.5 )/dt_ov_dx )); alpha_y = 1. - 2.*beta_yx; // = alpha_z as well but we define and use only 1 variable alpha_x = 1. - 2.*beta_xy - 2.*beta_xz - 3.*delta_x ; diff --git a/src/ElectroMagnSolver/MF_SolverAM_Lehe.cpp b/src/ElectroMagnSolver/MF_SolverAM_Lehe.cpp index 33d2cc712..6ed034f3a 100644 --- a/src/ElectroMagnSolver/MF_SolverAM_Lehe.cpp +++ b/src/ElectroMagnSolver/MF_SolverAM_Lehe.cpp @@ -10,9 +10,9 @@ MF_SolverAM_Lehe::MF_SolverAM_Lehe( Params ¶ms ) : SolverAM( params ) { - beta_rl = 1./4.; - beta_tl = 1./4.; - delta_l = ( 1./4. )*( 1.-pow( sin( M_PI*dt_ov_dl/2. )/dt_ov_dl, 2 ) ); + beta_rl = 0.25; + beta_tl = 0.25; + delta_l = 0.25 * ( 1.- ( sin( M_PI * dt_ov_dl * 0.5 )/dt_ov_dl )*( sin( M_PI*dt_ov_dl * 0.5 )/dt_ov_dl ) ); alpha_r = 1. - 2.*beta_rl; alpha_t = 1. - 2.*beta_tl; diff --git a/src/ElectroMagnSolver/PML_Solver2D_Bouchard.cpp b/src/ElectroMagnSolver/PML_Solver2D_Bouchard.cpp index ac07fb047..5c498a68d 100644 --- a/src/ElectroMagnSolver/PML_Solver2D_Bouchard.cpp +++ b/src/ElectroMagnSolver/PML_Solver2D_Bouchard.cpp @@ -29,9 +29,9 @@ PML_Solver2D_Bouchard::PML_Solver2D_Bouchard(Params ¶ms): // On the axes v_phi^max = 1.01c and is below c @ 0.54 kxdx/pi // So there could existe a numerical cherenkov emission at this point // On the diagonal v_phi^max = 1.01c and is below c @ 0.85 sqrt((kxdx)^2+(kydy)^2) - double delta = 0.1222*(1-pow(2.,2))/4. ; - double beta = -0.1727*(1-0.5*pow(2.,2)-4.*delta)/4. ; - double alpha = 1-2.*beta-3.*delta; + double delta = -0.0916500000000000;//0.1222*(1-pow (2.,2))/4. ; + double beta = 0.027347045;//-0.1727*(1-0.5*pow (2.,2)-4.*delta)/4. ; + double alpha = 1.22025591; //1-2.*beta-3.*delta; beta_xy = beta; beta_yx = beta; diff --git a/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp b/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp index 7eb9c8810..5d31f5b2e 100644 --- a/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp +++ b/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp @@ -482,19 +482,19 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 1. update u3 ( *u3_np1_x_pml )( i, j ) = +kappa_prime_x_p[i]*sigma_x_p[i] ; ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) - sigma_prime_x_p[i]*kappa_x_p[i] ; - ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) - alpha_prime_x_p[i]*pow(kappa_x_p[i],2) ; - ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) * -1. * pow(sigma_x_p[i],2) * dA_over_dx_fdtd / pow(kappa_x_p[i],4) ; + ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) - alpha_prime_x_p[i]*kappa_x_p[i]*kappa_x_p[i] ; + ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) * -1. * sigma_x_p[i]*sigma_x_p[i] * dA_over_dx_fdtd / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_x_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u3_np1_x_pml )( i, j ) ; ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )/( 2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u3_nm1_x_pml )( i, j ) ; //( *u3_np1_x_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0*0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u3_np1_x_pml )( i, j ) ; //( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) + ( 2.+dt*(i1*k0*0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )/( 2.-dt*(i1*k0*0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u3_nm1_x_pml )( i, j ) ; // 2. update u2 - ( *u2_np1_x_pml )( i, j ) = -1.*(2.*sigma_prime_x_p[i]*kappa_x_p[i]+pow(kappa_x_p[i],2)*alpha_prime_x_p[i]-3.*kappa_prime_x_p[i]*sigma_x_p[i])*dA_over_dx_fdtd ; + ( *u2_np1_x_pml )( i, j ) = -1.*(2.*sigma_prime_x_p[i]*kappa_x_p[i]+kappa_x_p[i]*kappa_x_p[i]*alpha_prime_x_p[i]-3.*kappa_prime_x_p[i]*sigma_x_p[i])*dA_over_dx_fdtd ; ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) - sigma_x_p[i]*kappa_x_p[i]*d2A_over_dx2_fdtd ; ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) * sigma_x_p[i] ; - ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) - pow(kappa_x_p[i],3)*0.5*( ( *u3_np1_x_pml )( i, j ) + ( *u3_nm1_x_pml )( i, j ) ) ; - ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) / pow(kappa_x_p[i],4) ; + ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u3_np1_x_pml )( i, j ) + ( *u3_nm1_x_pml )( i, j ) ) ; + ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_x_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u2_np1_x_pml )( i, j ) ; ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )/( 2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u2_nm1_x_pml )( i, j ) ; @@ -503,9 +503,9 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 3. update u1 ( *u1_np1_x_pml )( i, j ) = -1.*( 3*kappa_prime_x_p[i]*sigma_x_p[i] - sigma_prime_x_p[i]*kappa_x_p[i] ) * dA_over_dx_fdtd ; ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) + 2.*sigma_x_p[i]*kappa_x_p[i]*d2A_over_dx2_fdtd ; - ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) + 2.*i1*k0*sigma_x_p[i]*pow(kappa_x_p[i],2) * dA_over_dx_fdtd ; - ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) - pow(kappa_x_p[i],3)*0.5*( ( *u2_np1_x_pml )( i, j ) + ( *u2_nm1_x_pml )( i, j ) ) ; - ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) / pow(kappa_x_p[i],4) ; + ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) + 2.*i1*k0*sigma_x_p[i]*kappa_x_p[i]*kappa_x_p[i] * dA_over_dx_fdtd ; + ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u2_np1_x_pml )( i, j ) + ( *u2_nm1_x_pml )( i, j ) ) ; + ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_x_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u1_np1_x_pml )( i, j ) ; ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )/( 2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u1_nm1_x_pml )( i, j ) ; @@ -515,11 +515,11 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // Envelop udpate with correction/source terms // ---- // 4.a update A : Correction/source terms - source_term_x = ( kappa_x_p[i] - pow(kappa_x_p[i],3) )*d2A_over_dx2_fdtd ; + source_term_x = ( kappa_x_p[i] - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) )*d2A_over_dx2_fdtd ; source_term_x = source_term_x - kappa_prime_x_p[i]*dA_over_dx_fdtd ; - source_term_x = source_term_x + ( 2.*i1*k0*pow(kappa_x_p[i],2) - 2.*i1*k0*pow(kappa_x_p[i],3) ) * dA_over_dx_fdtd; - source_term_x = source_term_x + pow(kappa_x_p[i],3)*0.5*( ( *u1_np1_x_pml )( i, j ) + ( *u1_nm1_x_pml )( i, j ) ) ; - source_term_x = dt*dt*source_term_x / pow(kappa_x_p[i],3) ; + source_term_x = source_term_x + ( 2.*i1*k0*kappa_x_p[i]*kappa_x_p[i] - 2.*i1*k0*(kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ) * dA_over_dx_fdtd; + source_term_x = source_term_x + (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u1_np1_x_pml )( i, j ) + ( *u1_nm1_x_pml )( i, j ) ) ; + source_term_x = dt*dt*source_term_x / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // ---- // source_term_y = ( 1. - pow(c_yx_kappa*kappa_x_p[i],2) )*d2A_over_dy2 ; // source_term_y = source_term_y - pow(c_yx_kappa*kappa_x_p[i],2)*0.5*( ( *u1_np1_y_pml )( i, j ) + ( *u1_nm1_y_pml )( i, j ) ) ; @@ -613,19 +613,19 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 1. update u3 ( *u3_np1_x_pml )( i, j ) = +kappa_prime_x_p[i]*sigma_x_p[i] ; ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) - sigma_prime_x_p[i]*kappa_x_p[i] ; - ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) - alpha_prime_x_p[i]*pow(kappa_x_p[i],2) ; - ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) * -1. * pow(sigma_x_p[i],2) * dA_over_dx_fdtd / pow(kappa_x_p[i],4) ; + ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) - alpha_prime_x_p[i]*kappa_x_p[i]*kappa_x_p[i] ; + ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) * -1. * kappa_x_p[i]*kappa_x_p[i] * dA_over_dx_fdtd / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_x_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u3_np1_x_pml )( i, j ) ; ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )/( 2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u3_nm1_x_pml )( i, j ) ; // ( *u3_np1_x_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0*0. + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u3_np1_x_pml )( i, j ) ; // ( *u3_np1_x_pml )( i, j ) = ( *u3_np1_x_pml )( i, j ) + ( 2.+dt*(i1*k0*0. + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )/( 2.-dt*(i1*k0*0. + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u3_nm1_x_pml )( i, j ) ; // 2. update u2 - ( *u2_np1_x_pml )( i, j ) = -1.*(2.*sigma_prime_x_p[i]*kappa_x_p[i]+pow(kappa_x_p[i],2)*alpha_prime_x_p[i]-3.*kappa_prime_x_p[i]*sigma_x_p[i])*dA_over_dx_fdtd ; + ( *u2_np1_x_pml )( i, j ) = -1.*(2.*sigma_prime_x_p[i]*kappa_x_p[i]+kappa_x_p[i]*kappa_x_p[i]*alpha_prime_x_p[i]-3.*kappa_prime_x_p[i]*sigma_x_p[i])*dA_over_dx_fdtd ; ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) - sigma_x_p[i]*kappa_x_p[i]*d2A_over_dx2_fdtd ; ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) * sigma_x_p[i] ; - ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) - pow(kappa_x_p[i],3)*0.5*( ( *u3_np1_x_pml )( i, j ) + ( *u3_nm1_x_pml )( i, j ) ) ; - ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) / pow(kappa_x_p[i],4) ; + ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u3_np1_x_pml )( i, j ) + ( *u3_nm1_x_pml )( i, j ) ) ; + ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_x_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u2_np1_x_pml )( i, j ) ; ( *u2_np1_x_pml )( i, j ) = ( *u2_np1_x_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )/( 2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u2_nm1_x_pml )( i, j ) ; @@ -634,9 +634,9 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 3. update u1 ( *u1_np1_x_pml )( i, j ) = -1.*( 3*kappa_prime_x_p[i]*sigma_x_p[i] - sigma_prime_x_p[i]*kappa_x_p[i] ) * dA_over_dx_fdtd ; ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) + 2.*sigma_x_p[i]*kappa_x_p[i]*d2A_over_dx2_fdtd ; - ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) + 2.*i1*k0*sigma_x_p[i]*pow(kappa_x_p[i],2) * dA_over_dx_fdtd ; - ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) - pow(kappa_x_p[i],3)*0.5*( ( *u2_np1_x_pml )( i, j ) + ( *u2_nm1_x_pml )( i, j ) ) ; - ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) / pow(kappa_x_p[i],4) ; + ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) + 2.*i1*k0*sigma_x_p[i]*kappa_x_p[i]*kappa_x_p[i] * dA_over_dx_fdtd ; + ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u2_np1_x_pml )( i, j ) + ( *u2_nm1_x_pml )( i, j ) ) ; + ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_x_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u1_np1_x_pml )( i, j ) ; ( *u1_np1_x_pml )( i, j ) = ( *u1_np1_x_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )/( 2.-dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) )*( *u1_nm1_x_pml )( i, j ) ; @@ -647,19 +647,19 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // Y-PML ------ ( *u3_np1_y_pml )( i, j ) = +kappa_prime_y_p[j]*sigma_y_p[j] ; ( *u3_np1_y_pml )( i, j ) = ( *u3_np1_y_pml )( i, j ) - sigma_prime_y_p[j]*kappa_y_p[j] ; - ( *u3_np1_y_pml )( i, j ) = ( *u3_np1_y_pml )( i, j ) - alpha_prime_y_p[j]*pow(kappa_y_p[j],2) ; - ( *u3_np1_y_pml )( i, j ) = ( *u3_np1_y_pml )( i, j ) * -1. * pow(sigma_y_p[j],2) * dA_over_dy / pow(kappa_y_p[j],4) ; + ( *u3_np1_y_pml )( i, j ) = ( *u3_np1_y_pml )( i, j ) - alpha_prime_y_p[j]*kappa_y_p[j]*kappa_y_p[j] ; + ( *u3_np1_y_pml )( i, j ) = ( *u3_np1_y_pml )( i, j ) * -1. * sigma_y_p[j]*sigma_y_p[j] * dA_over_dy / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_y_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )*( *u3_np1_y_pml )( i, j ) ; ( *u3_np1_y_pml )( i, j ) = ( *u3_np1_y_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )/( 2.-dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )*( *u3_nm1_y_pml )( i, j ) ; //( *u3_np1_y_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0*0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )*( *u3_np1_y_pml )( i, j ) ; //( *u3_np1_y_pml )( i, j ) = ( *u3_np1_y_pml )( i, j ) + ( 2.+dt*(i1*k0*0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )/( 2.-dt*(i1*k0*0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )*( *u3_nm1_y_pml )( i, j ) ; // 2. update u2 - ( *u2_np1_y_pml )( i, j ) = -1.*(2.*sigma_prime_y_p[j]*kappa_y_p[j]+pow(kappa_y_p[j],2)*alpha_prime_y_p[j]-3.*kappa_prime_y_p[j]*sigma_y_p[j])*dA_over_dy ; + ( *u2_np1_y_pml )( i, j ) = -1.*(2.*sigma_prime_y_p[j]*kappa_y_p[j]+kappa_y_p[j]*kappa_y_p[j]*alpha_prime_y_p[j]-3.*kappa_prime_y_p[j]*sigma_y_p[j])*dA_over_dy ; ( *u2_np1_y_pml )( i, j ) = ( *u2_np1_y_pml )( i, j ) - sigma_y_p[j]*kappa_y_p[j]*d2A_over_dy2 ; ( *u2_np1_y_pml )( i, j ) = ( *u2_np1_y_pml )( i, j ) * sigma_y_p[j] ; - ( *u2_np1_y_pml )( i, j ) = ( *u2_np1_y_pml )( i, j ) - pow(kappa_y_p[j],3)*0.5*( ( *u3_np1_y_pml )( i, j ) + ( *u3_nm1_y_pml )( i, j ) ) ; - ( *u2_np1_y_pml )( i, j ) = ( *u2_np1_y_pml )( i, j ) / pow(kappa_y_p[j],4) ; + ( *u2_np1_y_pml )( i, j ) = ( *u2_np1_y_pml )( i, j ) - (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j])*0.5*( ( *u3_np1_y_pml )( i, j ) + ( *u3_nm1_y_pml )( i, j ) ) ; + ( *u2_np1_y_pml )( i, j ) = ( *u2_np1_y_pml )( i, j ) / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_y_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )*( *u2_np1_y_pml )( i, j ) ; ( *u2_np1_y_pml )( i, j ) = ( *u2_np1_y_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )/( 2.-dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )*( *u2_nm1_y_pml )( i, j ) ; @@ -668,8 +668,8 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 3. update u1 ( *u1_np1_y_pml )( i, j ) = -1.*( 3*kappa_prime_y_p[j]*sigma_y_p[j] - sigma_prime_y_p[j]*kappa_y_p[j] ) * dA_over_dy ; ( *u1_np1_y_pml )( i, j ) = ( *u1_np1_y_pml )( i, j ) + 2.*sigma_y_p[j]*kappa_y_p[j]*d2A_over_dy2 ; - ( *u1_np1_y_pml )( i, j ) = ( *u1_np1_y_pml )( i, j ) - pow(kappa_y_p[j],3)*0.5*( ( *u2_np1_y_pml )( i, j ) + ( *u2_nm1_y_pml )( i, j ) ) ; - ( *u1_np1_y_pml )( i, j ) = ( *u1_np1_y_pml )( i, j ) / pow(kappa_y_p[j],4) ; + ( *u1_np1_y_pml )( i, j ) = ( *u1_np1_y_pml )( i, j ) - (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j])*0.5*( ( *u2_np1_y_pml )( i, j ) + ( *u2_nm1_y_pml )( i, j ) ) ; + ( *u1_np1_y_pml )( i, j ) = ( *u1_np1_y_pml )( i, j ) / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_y_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )*( *u1_np1_y_pml )( i, j ) ; ( *u1_np1_y_pml )( i, j ) = ( *u1_np1_y_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )/( 2.-dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) )*( *u1_nm1_y_pml )( i, j ) ; @@ -679,16 +679,16 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // Envelop udpate with correction/source terms // ---- // 4.a update A : Correction/source terms - source_term_x = ( kappa_x_p[i] - pow(kappa_x_p[i],3) )*d2A_over_dx2_fdtd ; + source_term_x = ( kappa_x_p[i] - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) )*d2A_over_dx2_fdtd ; source_term_x = source_term_x - kappa_prime_x_p[i]*dA_over_dx_fdtd ; - source_term_x = source_term_x + ( 2.*i1*k0*pow(kappa_x_p[i],2) - 2.*i1*k0*pow(kappa_x_p[i],3) ) * dA_over_dx_fdtd; - source_term_x = source_term_x + pow(kappa_x_p[i],3)*0.5*( ( *u1_np1_x_pml )( i, j ) + ( *u1_nm1_x_pml )( i, j ) ) ; - source_term_x = dt*dt*source_term_x / pow(kappa_x_p[i],3) ; + source_term_x = source_term_x + ( 2.*i1*k0*kappa_x_p[i]*kappa_x_p[i] - 2.*i1*k0*(kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ) * dA_over_dx_fdtd; + source_term_x = source_term_x + (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u1_np1_x_pml )( i, j ) + ( *u1_nm1_x_pml )( i, j ) ) ; + source_term_x = dt*dt*source_term_x / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // ---- - source_term_y = ( kappa_y_p[j] - pow(kappa_y_p[j],3) )*d2A_over_dy2 ; + source_term_y = ( kappa_y_p[j] - (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) )*d2A_over_dy2 ; source_term_y = source_term_y - kappa_prime_y_p[j]*dA_over_dy ; - source_term_y = source_term_y + pow(kappa_y_p[j],3)*0.5*( ( *u1_np1_y_pml )( i, j ) + ( *u1_nm1_y_pml )( i, j ) ) ; - source_term_y = dt*dt*source_term_y / pow(kappa_y_p[j],3) ; + source_term_y = source_term_y + (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j])*0.5*( ( *u1_np1_y_pml )( i, j ) + ( *u1_nm1_y_pml )( i, j ) ) ; + source_term_y = dt*dt*source_term_y / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // ---- ( *A_np1_pml )( i, j ) = 1.*source_term_x + 1.*source_term_y - dt*dt*( *Chi_n_pml )( i, j )*( *A_n_pml )( i, j ) ; // ( *A_np1_pml )( i, j ) = 0; diff --git a/src/ElectroMagnSolver/PML_Solver3D_Bouchard.cpp b/src/ElectroMagnSolver/PML_Solver3D_Bouchard.cpp index 6cea7ff5b..5b4aa4ed7 100644 --- a/src/ElectroMagnSolver/PML_Solver3D_Bouchard.cpp +++ b/src/ElectroMagnSolver/PML_Solver3D_Bouchard.cpp @@ -29,9 +29,9 @@ PML_Solver3D_Bouchard::PML_Solver3D_Bouchard( Params ¶ms ): WARNING( "Bouchard solver requires dx/dt = 2 (Magical Timestep)" ); } - double delta = 0.1222*(1-pow(2.,2))/4. ; - double beta = -0.1727*(1-0.5*pow(2.,2)-4.*delta)/4. ; - double alpha = 1-4.*beta-3.*delta ; + double delta = -0.0916500000000000;//0.1222*(1-pow (2.,2))/4. ; + double beta = 0.027347045;//-0.1727*(1-0.5*pow (2.,2)-4.*delta)/4. ; + double alpha = 1.16556182;//1-4.*beta-3.*delta ; delta_x = delta ; delta_y = delta ; diff --git a/src/ElectroMagnSolver/PML_Solver3D_Envelope.cpp b/src/ElectroMagnSolver/PML_Solver3D_Envelope.cpp index 6b43e09c3..3dca14b03 100644 --- a/src/ElectroMagnSolver/PML_Solver3D_Envelope.cpp +++ b/src/ElectroMagnSolver/PML_Solver3D_Envelope.cpp @@ -566,27 +566,27 @@ void PML_Solver3D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 1. update u3 ( *u3_np1_x_pml )( i, j, k ) = -kappa_prime_x_p[i]*sigma_x_p[i] ; ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) + sigma_prime_x_p[i]*kappa_x_p[i] ; - ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) + alpha_prime_x_p[i]*pow(kappa_x_p[i],2) ; - ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) * pow(sigma_x_p[i],2) * dA_over_dx / pow(kappa_x_p[i],4) ; + ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) + alpha_prime_x_p[i]*kappa_x_p[i]*kappa_x_p[i] ; + ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) * sigma_x_p[i]*sigma_x_p[i] * dA_over_dx / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_x_pml )( i, j, k ) = ( ( *u3_np1_x_pml )( i, j, k ) - ( *u3_nm1_x_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_x_p[i] + sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; //( *u3_np1_x_pml )( i, j, k ) = ( ( *u3_np1_x_pml )( i, j, k ) - ( *u3_nm1_x_pml )( i, j, k )*( 1. + 1.0*dt*( i1*k0 + alpha_x_p[i] + sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) * dt / ( 1.0*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; // 2. update u2 - ( *u2_np1_x_pml )( i, j, k ) = (2.*sigma_prime_x_p[i]*kappa_x_p[i]+pow(kappa_x_p[i],2)*alpha_prime_x_p[i]-3.*kappa_prime_x_p[i]*sigma_x_p[i])*dA_over_dx ; + ( *u2_np1_x_pml )( i, j, k ) = (2.*sigma_prime_x_p[i]*kappa_x_p[i]+kappa_x_p[i]*kappa_x_p[i]*alpha_prime_x_p[i]-3.*kappa_prime_x_p[i]*sigma_x_p[i])*dA_over_dx ; ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) + sigma_x_p[i]*kappa_x_p[i]*d2A_over_dx2 ; ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) * sigma_x_p[i] ; - ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) - pow(kappa_x_p[i],3)*0.5*( ( *u3_np1_x_pml )( i, j, k ) + ( *u3_nm1_x_pml )( i, j, k ) ) ; - //( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) - pow(kappa_x_p[i],3)*( *u3_np1_x_pml )( i, j, k ) ; - ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) / pow(kappa_x_p[i],4) ; + ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u3_np1_x_pml )( i, j, k ) + ( *u3_nm1_x_pml )( i, j, k ) ) ; + //( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*( *u3_np1_x_pml )( i, j, k ) ; + ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_x_pml )( i, j, k ) = ( ( *u2_np1_x_pml )( i, j, k ) - ( *u2_nm1_x_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_x_p[i] + sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; //( *u2_np1_x_pml )( i, j, k ) = ( ( *u2_np1_x_pml )( i, j, k ) - ( *u2_nm1_x_pml )( i, j, k )*( 1. + 1.0*dt*( i1*k0 + alpha_x_p[i] + sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) * dt / ( 1.0*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; // 3. update u1 ( *u1_np1_x_pml )( i, j, k ) = ( sigma_prime_x_p[i]*kappa_x_p[i] - 3*kappa_prime_x_p[i]*sigma_x_p[i] ) * dA_over_dx ; ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) + 2.*sigma_x_p[i]*kappa_x_p[i]*d2A_over_dx2 ; - ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) - pow(kappa_x_p[i],3)*0.5*( ( *u2_np1_x_pml )( i, j, k ) + ( *u2_nm1_x_pml )( i, j, k ) ) ; - //( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) - pow(kappa_x_p[i],3)*( *u2_np1_x_pml )( i, j, k ) ; - ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) / pow(kappa_x_p[i],4) ; + ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u2_np1_x_pml )( i, j, k ) + ( *u2_nm1_x_pml )( i, j, k ) ) ; + //( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*( *u2_np1_x_pml )( i, j, k ) ; + ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_x_pml )( i, j, k ) = ( ( *u1_np1_x_pml )( i, j, k ) - ( *u1_nm1_x_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_x_p[i] + sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; //( *u1_np1_x_pml )( i, j, k ) = ( ( *u1_np1_x_pml )( i, j, k ) - ( *u1_nm1_x_pml )( i, j, k )*( 1. + 1.0*dt*( i1*k0 + alpha_x_p[i] + sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) * dt / ( 1.0*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; @@ -594,10 +594,10 @@ void PML_Solver3D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // Envelop udpate with correction/source terms // ---- // 4.a update A : Correction/source terms - source_term_x = ( kappa_x_p[i] - pow(kappa_x_p[i],3) )*d2A_over_dx2 ; + source_term_x = ( kappa_x_p[i] - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) )*d2A_over_dx2 ; source_term_x = source_term_x - kappa_prime_x_p[i]*dA_over_dx ; - source_term_x = source_term_x - pow(kappa_x_p[i],3)*0.5*( ( *u1_np1_x_pml )( i, j, k ) + ( *u1_nm1_x_pml )( i, j, k ) ) ; - source_term_x = dt*dt*source_term_x / pow(kappa_x_p[i],3) ; + source_term_x = source_term_x - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u1_np1_x_pml )( i, j, k ) + ( *u1_nm1_x_pml )( i, j, k ) ) ; + source_term_x = dt*dt*source_term_x / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // ---- ( *A_np1_pml )( i, j, k ) = 1.*source_term_x - dt*dt*( *A_n_pml )( i, j, k )*( *Chi_n_pml )(i, j, k) ; //( *A_np1_pml )( i, j, k ) = 0; @@ -682,60 +682,60 @@ void PML_Solver3D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 1. update u3 ( *u3_np1_x_pml )( i, j, k ) = -kappa_prime_x_p[i]*sigma_x_p[i] ; ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) + sigma_prime_x_p[i]*kappa_x_p[i] ; - ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) + alpha_prime_x_p[i]*pow(kappa_x_p[i],2) ; - ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) * pow(sigma_x_p[i],2) * dA_over_dx / pow(kappa_x_p[i],4) ; + ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) + alpha_prime_x_p[i]*kappa_x_p[i]*kappa_x_p[i] ; + ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) * sigma_x_p[i]*sigma_x_p[i] * dA_over_dx / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_x_pml )( i, j, k ) = ( ( *u3_np1_x_pml )( i, j, k ) - ( *u3_nm1_x_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; // 2. update u2 - ( *u2_np1_x_pml )( i, j, k ) = (2.*sigma_prime_x_p[i]*kappa_x_p[i]+pow(kappa_x_p[i],2)*alpha_prime_x_p[i]-3.*kappa_prime_x_p[i]*sigma_x_p[i])*dA_over_dx ; + ( *u2_np1_x_pml )( i, j, k ) = (2.*sigma_prime_x_p[i]*kappa_x_p[i]+kappa_x_p[i]*kappa_x_p[i]*alpha_prime_x_p[i]-3.*kappa_prime_x_p[i]*sigma_x_p[i])*dA_over_dx ; ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) + sigma_x_p[i]*kappa_x_p[i]*d2A_over_dx2 ; ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) * sigma_x_p[i] ; - ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) - pow(kappa_x_p[i],3)*0.5*( ( *u3_np1_x_pml )( i, j, k ) + ( *u3_nm1_x_pml )( i, j, k ) ) ; - ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) / pow(kappa_x_p[i],4) ; + ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u3_np1_x_pml )( i, j, k ) + ( *u3_nm1_x_pml )( i, j, k ) ) ; + ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_x_pml )( i, j, k ) = ( ( *u2_np1_x_pml )( i, j, k ) - ( *u2_nm1_x_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; // 3. update u1 ( *u1_np1_x_pml )( i, j, k ) = ( sigma_prime_x_p[i]*kappa_x_p[i] - 3*kappa_prime_x_p[i]*sigma_x_p[i] ) * dA_over_dx ; ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) + 2.*sigma_x_p[i]*kappa_x_p[i]*d2A_over_dx2 ; - ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) - pow(kappa_x_p[i],3)*0.5*( ( *u2_np1_x_pml )( i, j, k ) + ( *u2_nm1_x_pml )( i, j, k ) ) ; - ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) / pow(kappa_x_p[i],4) ; + ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u2_np1_x_pml )( i, j, k ) + ( *u2_nm1_x_pml )( i, j, k ) ) ; + ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_x_pml )( i, j, k ) = ( ( *u1_np1_x_pml )( i, j, k ) - ( *u1_nm1_x_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) *dt / ( 0.5*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; // 1. update u3 ( *u3_np1_y_pml )( i, j, k ) = -kappa_prime_y_p[j]*sigma_y_p[j] ; ( *u3_np1_y_pml )( i, j, k ) = ( *u3_np1_y_pml )( i, j, k ) + sigma_prime_y_p[j]*kappa_y_p[j] ; - ( *u3_np1_y_pml )( i, j, k ) = ( *u3_np1_y_pml )( i, j, k ) + alpha_prime_y_p[j]*pow(kappa_y_p[j],2) ; - ( *u3_np1_y_pml )( i, j, k ) = ( *u3_np1_y_pml )( i, j, k ) * pow(sigma_y_p[j],2) * dA_over_dy / pow(kappa_y_p[j],4) ; + ( *u3_np1_y_pml )( i, j, k ) = ( *u3_np1_y_pml )( i, j, k ) + alpha_prime_y_p[j]*kappa_y_p[j]*kappa_y_p[j] ; + ( *u3_np1_y_pml )( i, j, k ) = ( *u3_np1_y_pml )( i, j, k ) * sigma_y_p[j]*sigma_y_p[j] * dA_over_dy / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_y_pml )( i, j, k ) = ( ( *u3_np1_y_pml )( i, j, k ) - ( *u3_nm1_y_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j]) - 1. ) ; // 2. update u2 - ( *u2_np1_y_pml )( i, j, k ) = (2.*sigma_prime_y_p[j]*kappa_y_p[j]+pow(kappa_y_p[j],2)*alpha_prime_y_p[j]-3.*kappa_prime_y_p[j]*sigma_y_p[j])*dA_over_dy ; + ( *u2_np1_y_pml )( i, j, k ) = (2.*sigma_prime_y_p[j]*kappa_y_p[j]+kappa_y_p[j]*kappa_y_p[j]*alpha_prime_y_p[j]-3.*kappa_prime_y_p[j]*sigma_y_p[j])*dA_over_dy ; ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) + sigma_y_p[j]*kappa_y_p[j]*d2A_over_dy2 ; ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) * sigma_y_p[j] ; - ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) - pow(kappa_y_p[j],3)*0.5*( ( *u3_np1_y_pml )( i, j, k ) + ( *u3_nm1_y_pml )( i, j, k ) ) ; - ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) / pow(kappa_y_p[j],4) ; + ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) - (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j])*0.5*( ( *u3_np1_y_pml )( i, j, k ) + ( *u3_nm1_y_pml )( i, j, k ) ) ; + ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_y_pml )( i, j, k ) = ( ( *u2_np1_y_pml )( i, j, k ) - ( *u2_nm1_y_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j]) - 1. ) ; // 3. update u1 ( *u1_np1_y_pml )( i, j, k ) = ( sigma_prime_y_p[j]*kappa_y_p[j] - 3*kappa_prime_y_p[j]*sigma_y_p[j] ) * dA_over_dy ; ( *u1_np1_y_pml )( i, j, k ) = ( *u1_np1_y_pml )( i, j, k ) + 2.*sigma_y_p[j]*kappa_y_p[j]*d2A_over_dy2 ; - ( *u1_np1_y_pml )( i, j, k ) = ( *u1_np1_y_pml )( i, j, k ) - pow(kappa_y_p[j],3)*0.5*( ( *u2_np1_y_pml )( i, j, k ) + ( *u2_nm1_y_pml )( i, j, k ) ) ; - ( *u1_np1_y_pml )( i, j, k ) = ( *u1_np1_y_pml )( i, j, k ) / pow(kappa_y_p[j],4) ; + ( *u1_np1_y_pml )( i, j, k ) = ( *u1_np1_y_pml )( i, j, k ) - (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j])*0.5*( ( *u2_np1_y_pml )( i, j, k ) + ( *u2_nm1_y_pml )( i, j, k ) ) ; + ( *u1_np1_y_pml )( i, j, k ) = ( *u1_np1_y_pml )( i, j, k ) / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_y_pml )( i, j, k ) = ( ( *u1_np1_y_pml )( i, j, k ) - ( *u1_nm1_y_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) ) / dt ) *dt / ( 0.5*dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j]) - 1. ) ; // ---- // Envelop udpate with correction/source terms // ---- // 4.a update A : Correction/source terms - source_term_x = ( kappa_x_p[i] - pow(kappa_x_p[i],3) )*d2A_over_dx2 ; + source_term_x = ( kappa_x_p[i] - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) )*d2A_over_dx2 ; source_term_x = source_term_x - kappa_prime_x_p[i]*dA_over_dx ; - source_term_x = source_term_x - pow(kappa_x_p[i],3)*0.5*( ( *u1_np1_x_pml )( i, j, k ) + ( *u1_nm1_x_pml )( i, j, k ) ) ; - source_term_x = dt*dt*source_term_x / pow(kappa_x_p[i],3) ; + source_term_x = source_term_x - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u1_np1_x_pml )( i, j, k ) + ( *u1_nm1_x_pml )( i, j, k ) ) ; + source_term_x = dt*dt*source_term_x / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // ---- - source_term_y = ( kappa_y_p[j] - pow(kappa_y_p[j],3) )*d2A_over_dy2 ; + source_term_y = ( kappa_y_p[j] - (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) )*d2A_over_dy2 ; source_term_y = source_term_y - kappa_prime_y_p[j]*dA_over_dy ; - source_term_y = source_term_y - pow(kappa_y_p[j],3)*0.5*( ( *u1_np1_y_pml )( i, j, k ) + ( *u1_nm1_y_pml )( i, j, k ) ) ; - source_term_y = dt*dt*source_term_y / pow(kappa_y_p[j],3) ; + source_term_y = source_term_y - (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j])*0.5*( ( *u1_np1_y_pml )( i, j, k ) + ( *u1_nm1_y_pml )( i, j, k ) ) ; + source_term_y = dt*dt*source_term_y / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // ---- ( *A_np1_pml )( i, j, k ) = 1.*source_term_x + 1.*source_term_y - dt*dt*( *A_n_pml )( i, j, k )*( *Chi_n_pml )(i, j, k) ; // ( *A_np1_pml )( i, j, k ) = 0; @@ -823,87 +823,87 @@ void PML_Solver3D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 1. update u3 ( *u3_np1_x_pml )( i, j, k ) = -kappa_prime_x_p[i]*sigma_x_p[i] ; ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) + sigma_prime_x_p[i]*kappa_x_p[i] ; - ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) + alpha_prime_x_p[i]*pow(kappa_x_p[i],2) ; - ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) * pow(sigma_x_p[i],2) * dA_over_dx / pow(kappa_x_p[i],4) ; + ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) + alpha_prime_x_p[i]*kappa_x_p[i]*kappa_x_p[i] ; + ( *u3_np1_x_pml )( i, j, k ) = ( *u3_np1_x_pml )( i, j, k ) * sigma_x_p[i]*sigma_x_p[i] * dA_over_dx / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_x_pml )( i, j, k ) = ( ( *u3_np1_x_pml )( i, j, k ) - ( *u3_nm1_x_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; // 2. update u2 - ( *u2_np1_x_pml )( i, j, k ) = (2.*sigma_prime_x_p[i]*kappa_x_p[i]+pow(kappa_x_p[i],2)*alpha_prime_x_p[i]-3.*kappa_prime_x_p[i]*sigma_x_p[i])*dA_over_dx ; + ( *u2_np1_x_pml )( i, j, k ) = (2.*sigma_prime_x_p[i]*kappa_x_p[i]+kappa_x_p[i]*kappa_x_p[i]*alpha_prime_x_p[i]-3.*kappa_prime_x_p[i]*sigma_x_p[i])*dA_over_dx ; ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) + sigma_x_p[i]*kappa_x_p[i]*d2A_over_dx2 ; ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) * sigma_x_p[i] ; - ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) - pow(kappa_x_p[i],3)*0.5*( ( *u3_np1_x_pml )( i, j, k ) + ( *u3_nm1_x_pml )( i, j, k ) ) ; - ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) / pow(kappa_x_p[i],4) ; + ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u3_np1_x_pml )( i, j, k ) + ( *u3_nm1_x_pml )( i, j, k ) ) ; + ( *u2_np1_x_pml )( i, j, k ) = ( *u2_np1_x_pml )( i, j, k ) / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_x_pml )( i, j, k ) = ( ( *u2_np1_x_pml )( i, j, k ) - ( *u2_nm1_x_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; // 3. update u1 ( *u1_np1_x_pml )( i, j, k ) = ( sigma_prime_x_p[i]*kappa_x_p[i] - 3*kappa_prime_x_p[i]*sigma_x_p[i] ) * dA_over_dx ; ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) + 2.*sigma_x_p[i]*kappa_x_p[i]*d2A_over_dx2 ; - ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) - pow(kappa_x_p[i],3)*0.5*( ( *u2_np1_x_pml )( i, j, k ) + ( *u2_nm1_x_pml )( i, j, k ) ) ; - ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) / pow(kappa_x_p[i],4) ; + ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u2_np1_x_pml )( i, j, k ) + ( *u2_nm1_x_pml )( i, j, k ) ) ; + ( *u1_np1_x_pml )( i, j, k ) = ( *u1_np1_x_pml )( i, j, k ) / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_x_pml )( i, j, k ) = ( ( *u1_np1_x_pml )( i, j, k ) - ( *u1_nm1_x_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i] ) ) / dt ) *dt / ( 0.5*dt*(i1*k0 + alpha_x_p[i]+sigma_x_p[i]/kappa_x_p[i]) - 1. ) ; // 1. update u3 ( *u3_np1_y_pml )( i, j, k ) = -kappa_prime_y_p[j]*sigma_y_p[j] ; ( *u3_np1_y_pml )( i, j, k ) = ( *u3_np1_y_pml )( i, j, k ) + sigma_prime_y_p[j]*kappa_y_p[j] ; - ( *u3_np1_y_pml )( i, j, k ) = ( *u3_np1_y_pml )( i, j, k ) + alpha_prime_y_p[j]*pow(kappa_y_p[j],2) ; - ( *u3_np1_y_pml )( i, j, k ) = ( *u3_np1_y_pml )( i, j, k ) * pow(sigma_y_p[j],2) * dA_over_dy / pow(kappa_y_p[j],4) ; + ( *u3_np1_y_pml )( i, j, k ) = ( *u3_np1_y_pml )( i, j, k ) + alpha_prime_y_p[j]*kappa_y_p[j]*kappa_y_p[j] ; + ( *u3_np1_y_pml )( i, j, k ) = ( *u3_np1_y_pml )( i, j, k ) * sigma_y_p[j]*sigma_y_p[j] * dA_over_dy / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_y_pml )( i, j, k ) = ( ( *u3_np1_y_pml )( i, j, k ) - ( *u3_nm1_y_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j]) - 1. ) ; // 2. update u2 - ( *u2_np1_y_pml )( i, j, k ) = (2.*sigma_prime_y_p[j]*kappa_y_p[j]+pow(kappa_y_p[j],2)*alpha_prime_y_p[j]-3.*kappa_prime_y_p[j]*sigma_y_p[j])*dA_over_dy ; + ( *u2_np1_y_pml )( i, j, k ) = (2.*sigma_prime_y_p[j]*kappa_y_p[j]+kappa_y_p[j]*kappa_y_p[j]*alpha_prime_y_p[j]-3.*kappa_prime_y_p[j]*sigma_y_p[j])*dA_over_dy ; ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) + sigma_y_p[j]*kappa_y_p[j]*d2A_over_dy2 ; ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) * sigma_y_p[j] ; - ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) - pow(kappa_y_p[j],3)*0.5*( ( *u3_np1_y_pml )( i, j, k ) + ( *u3_nm1_y_pml )( i, j, k ) ) ; - ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) / pow(kappa_y_p[j],4) ; + ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) - (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j])*0.5*( ( *u3_np1_y_pml )( i, j, k ) + ( *u3_nm1_y_pml )( i, j, k ) ) ; + ( *u2_np1_y_pml )( i, j, k ) = ( *u2_np1_y_pml )( i, j, k ) / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_y_pml )( i, j, k ) = ( ( *u2_np1_y_pml )( i, j, k ) - ( *u2_nm1_y_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j]) - 1. ) ; // 3. update u1 ( *u1_np1_y_pml )( i, j, k ) = ( sigma_prime_y_p[j]*kappa_y_p[j] - 3*kappa_prime_y_p[j]*sigma_y_p[j] ) * dA_over_dy ; ( *u1_np1_y_pml )( i, j, k ) = ( *u1_np1_y_pml )( i, j, k ) + 2.*sigma_y_p[j]*kappa_y_p[j]*d2A_over_dy2 ; - ( *u1_np1_y_pml )( i, j, k ) = ( *u1_np1_y_pml )( i, j, k ) - pow(kappa_y_p[j],3)*0.5*( ( *u2_np1_y_pml )( i, j, k ) + ( *u2_nm1_y_pml )( i, j, k ) ) ; - ( *u1_np1_y_pml )( i, j, k ) = ( *u1_np1_y_pml )( i, j, k ) / pow(kappa_y_p[j],4) ; + ( *u1_np1_y_pml )( i, j, k ) = ( *u1_np1_y_pml )( i, j, k ) - (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j])*0.5*( ( *u2_np1_y_pml )( i, j, k ) + ( *u2_nm1_y_pml )( i, j, k ) ) ; + ( *u1_np1_y_pml )( i, j, k ) = ( *u1_np1_y_pml )( i, j, k ) / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_y_pml )( i, j, k ) = ( ( *u1_np1_y_pml )( i, j, k ) - ( *u1_nm1_y_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j] ) ) / dt ) *dt / ( 0.5*dt*(i1*k0 + alpha_y_p[j]+sigma_y_p[j]/kappa_y_p[j]) - 1. ) ; // 1. update u3 ( *u3_np1_z_pml )( i, j, k ) = -kappa_prime_z_p[k]*sigma_z_p[k] ; ( *u3_np1_z_pml )( i, j, k ) = ( *u3_np1_z_pml )( i, j, k ) + sigma_prime_z_p[k]*kappa_z_p[k] ; - ( *u3_np1_z_pml )( i, j, k ) = ( *u3_np1_z_pml )( i, j, k ) + alpha_prime_z_p[k]*pow(kappa_z_p[k],2) ; - ( *u3_np1_z_pml )( i, j, k ) = ( *u3_np1_z_pml )( i, j, k ) * pow(sigma_z_p[k],2) * dA_over_dz / pow(kappa_z_p[k],4) ; + ( *u3_np1_z_pml )( i, j, k ) = ( *u3_np1_z_pml )( i, j, k ) + alpha_prime_z_p[k]*kappa_z_p[k]*kappa_z_p[k] ; + ( *u3_np1_z_pml )( i, j, k ) = ( *u3_np1_z_pml )( i, j, k ) * sigma_z_p[k]*sigma_z_p[k] * dA_over_dz / (kappa_z_p[k]*kappa_z_p[k]*kappa_z_p[k]*kappa_z_p[k]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_z_pml )( i, j, k ) = ( ( *u3_np1_z_pml )( i, j, k ) - ( *u3_nm1_z_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_z_p[k]+sigma_z_p[k]/kappa_z_p[k] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_z_p[k]+sigma_z_p[k]/kappa_z_p[k]) - 1. ) ; // 2. update u2 - ( *u2_np1_z_pml )( i, j, k ) = (2.*sigma_prime_z_p[k]*kappa_z_p[k]+pow(kappa_z_p[k],2)*alpha_prime_z_p[k]-3.*kappa_prime_z_p[k]*sigma_z_p[k])*dA_over_dz ; + ( *u2_np1_z_pml )( i, j, k ) = (2.*sigma_prime_z_p[k]*kappa_z_p[k]+kappa_z_p[k]*kappa_z_p[k]*alpha_prime_z_p[k]-3.*kappa_prime_z_p[k]*sigma_z_p[k])*dA_over_dz ; ( *u2_np1_z_pml )( i, j, k ) = ( *u2_np1_z_pml )( i, j, k ) + sigma_z_p[k]*kappa_z_p[k]*d2A_over_dz2 ; ( *u2_np1_z_pml )( i, j, k ) = ( *u2_np1_z_pml )( i, j, k ) * sigma_z_p[k] ; - ( *u2_np1_z_pml )( i, j, k ) = ( *u2_np1_z_pml )( i, j, k ) - pow(kappa_z_p[k],3)*0.5*( ( *u3_np1_z_pml )( i, j, k ) + ( *u3_nm1_z_pml )( i, j, k ) ) ; - ( *u2_np1_z_pml )( i, j, k ) = ( *u2_np1_z_pml )( i, j, k ) / pow(kappa_z_p[k],4) ; + ( *u2_np1_z_pml )( i, j, k ) = ( *u2_np1_z_pml )( i, j, k ) - (kappa_z_p[k]*kappa_z_p[k]*kappa_z_p[k])*0.5*( ( *u3_np1_z_pml )( i, j, k ) + ( *u3_nm1_z_pml )( i, j, k ) ) ; + ( *u2_np1_z_pml )( i, j, k ) = ( *u2_np1_z_pml )( i, j, k ) / (kappa_z_p[k]*kappa_z_p[k]*kappa_z_p[k]*kappa_z_p[k]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_z_pml )( i, j, k ) = ( ( *u2_np1_z_pml )( i, j, k ) - ( *u2_nm1_z_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_z_p[k]+sigma_z_p[k]/kappa_z_p[k] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_z_p[k]+sigma_z_p[k]/kappa_z_p[k]) - 1. ) ; // 3. update u1 ( *u1_np1_z_pml )( i, j, k ) = ( sigma_prime_z_p[k]*kappa_z_p[k] - 3*kappa_prime_z_p[k]*sigma_z_p[k] ) * dA_over_dz ; ( *u1_np1_z_pml )( i, j, k ) = ( *u1_np1_z_pml )( i, j, k ) + 2.*sigma_z_p[k]*kappa_z_p[k]*d2A_over_dz2 ; - ( *u1_np1_z_pml )( i, j, k ) = ( *u1_np1_z_pml )( i, j, k ) - pow(kappa_z_p[k],3)*0.5*( ( *u2_np1_z_pml )( i, j, k ) + ( *u2_nm1_z_pml )( i, j, k ) ) ; - ( *u1_np1_z_pml )( i, j, k ) = ( *u1_np1_z_pml )( i, j, k ) / pow(kappa_z_p[k],4) ; + ( *u1_np1_z_pml )( i, j, k ) = ( *u1_np1_z_pml )( i, j, k ) - (kappa_z_p[k]*kappa_z_p[k]*kappa_z_p[k])*0.5*( ( *u2_np1_z_pml )( i, j, k ) + ( *u2_nm1_z_pml )( i, j, k ) ) ; + ( *u1_np1_z_pml )( i, j, k ) = ( *u1_np1_z_pml )( i, j, k ) / (kappa_z_p[k]*kappa_z_p[k]*kappa_z_p[k]*kappa_z_p[k]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_z_pml )( i, j, k ) = ( ( *u1_np1_z_pml )( i, j, k ) - ( *u1_nm1_z_pml )( i, j, k )*( 1. + 0.5*dt*( i1*k0 + alpha_z_p[k]+sigma_z_p[k]/kappa_z_p[k] ) ) / dt ) *dt / ( 0.5*dt*(i1*k0 + alpha_z_p[k]+sigma_z_p[k]/kappa_z_p[k]) - 1. ) ; // ---- // Envelop udpate with correction/source terms // ---- // 4.a update A : Correction/source terms - source_term_x = ( kappa_x_p[i] - pow(kappa_x_p[i],3) )*d2A_over_dx2 ; + source_term_x = ( kappa_x_p[i] - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) )*d2A_over_dx2 ; source_term_x = source_term_x - kappa_prime_x_p[i]*dA_over_dx ; - source_term_x = source_term_x - pow(kappa_x_p[i],3)*0.5*( ( *u1_np1_x_pml )( i, j, k ) + ( *u1_nm1_x_pml )( i, j, k ) ) ; - source_term_x = dt*dt*source_term_x / pow(kappa_x_p[i],3) ; + source_term_x = source_term_x - (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i])*0.5*( ( *u1_np1_x_pml )( i, j, k ) + ( *u1_nm1_x_pml )( i, j, k ) ) ; + source_term_x = dt*dt*source_term_x / (kappa_x_p[i]*kappa_x_p[i]*kappa_x_p[i]) ; // ---- - source_term_y = ( kappa_y_p[j] - pow(kappa_y_p[j],3) )*d2A_over_dy2 ; + source_term_y = ( kappa_y_p[j] - (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) )*d2A_over_dy2 ; source_term_y = source_term_y - kappa_prime_y_p[j]*dA_over_dy ; - source_term_y = source_term_y - pow(kappa_y_p[j],3)*0.5*( ( *u1_np1_y_pml )( i, j, k ) + ( *u1_nm1_y_pml )( i, j, k ) ) ; - source_term_y = dt*dt*source_term_y / pow(kappa_y_p[j],3) ; + source_term_y = source_term_y - (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j])*0.5*( ( *u1_np1_y_pml )( i, j, k ) + ( *u1_nm1_y_pml )( i, j, k ) ) ; + source_term_y = dt*dt*source_term_y / (kappa_y_p[j]*kappa_y_p[j]*kappa_y_p[j]) ; // ---- - source_term_z = ( kappa_z_p[k] - pow(kappa_z_p[k],3) )*d2A_over_dz2 ; + source_term_z = ( kappa_z_p[k] - (kappa_z_p[k]*kappa_z_p[k]*kappa_z_p[k]) )*d2A_over_dz2 ; source_term_z = source_term_z - kappa_prime_z_p[k]*dA_over_dz ; - source_term_z = source_term_z - pow(kappa_z_p[k],3)*0.5*( ( *u1_np1_z_pml )( i, j, k ) + ( *u1_nm1_z_pml )( i, j, k ) ) ; - source_term_z = dt*dt*source_term_z / pow(kappa_z_p[k],3) ; + source_term_z = source_term_z - (kappa_z_p[k]*kappa_z_p[k]*kappa_z_p[k])*0.5*( ( *u1_np1_z_pml )( i, j, k ) + ( *u1_nm1_z_pml )( i, j, k ) ) ; + source_term_z = dt*dt*source_term_z / (kappa_z_p[k]*kappa_z_p[k]*kappa_z_p[k]) ; // ---- ( *A_np1_pml )( i, j, k ) = 1.*source_term_x + 1.*source_term_y + 1.*source_term_z - dt*dt*( *A_n_pml )( i, j, k )*( *Chi_n_pml )(i, j, k) ; // ( *A_np1_pml )( i, j, k ) = 0; diff --git a/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp b/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp index d8c65645a..9362bc824 100644 --- a/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp +++ b/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp @@ -430,26 +430,26 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 1. update u3 ( *u3_np1_l_pml )( i, j ) = +kappa_prime_l_p[i]*sigma_l_p[i] ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - sigma_prime_l_p[i]*kappa_l_p[i] ; - ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*pow(kappa_l_p[i],2) ; - ( *u3_np1_l_pml )( i, j ) = - ( *u3_np1_l_pml )( i, j ) * pow(sigma_l_p[i],2) * dG_over_dx_fdtd / pow(kappa_l_p[i],4) ; + ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*kappa_l_p[i]*kappa_l_p[i] ; + ( *u3_np1_l_pml )( i, j ) = - ( *u3_np1_l_pml )( i, j ) * sigma_l_p[i]*sigma_l_p[i] * dG_over_dx_fdtd / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_np1_l_pml )( i, j ) ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_nm1_l_pml )( i, j ) ; // 2. update u2 - ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+pow(kappa_l_p[i],2)*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dG_over_dx_fdtd ; + ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+kappa_l_p[i]*kappa_l_p[i]*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dG_over_dx_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - sigma_l_p[i]*kappa_l_p[i]*d2G_over_dx2_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) * sigma_l_p[i] ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_np1_l_pml )( i, j ) ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_nm1_l_pml )( i, j ) ; // 3. update u1 ( *u1_np1_l_pml )( i, j ) = -1.*( 3*kappa_prime_l_p[i]*sigma_l_p[i] - sigma_prime_l_p[i]*kappa_l_p[i] ) * dG_over_dx_fdtd ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*sigma_l_p[i]*kappa_l_p[i]*d2G_over_dx2 ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*pow(kappa_l_p[i],2) * dG_over_dx_fdtd ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*kappa_l_p[i]*kappa_l_p[i] * dG_over_dx_fdtd ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_np1_l_pml )( i, j ) ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_nm1_l_pml )( i, j ) ; @@ -457,11 +457,11 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // Envelop udpate with correction/source terms // ---- // 4.a update A : Correction/source terms - source_term_x = ( kappa_l_p[i] - pow(kappa_l_p[i],3) )*d2G_over_dx2_fdtd ; + source_term_x = ( kappa_l_p[i] - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) )*d2G_over_dx2_fdtd ; source_term_x = source_term_x - kappa_prime_l_p[i]*dG_over_dx_fdtd ; - source_term_x = source_term_x + ( 2.*i1*k0*pow(kappa_l_p[i],2) - 2.*i1*k0*pow(kappa_l_p[i],3) ) * dG_over_dx_fdtd; - source_term_x = source_term_x - pow(kappa_l_p[i],3)*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; - source_term_x = dt*dt*source_term_x / pow(kappa_l_p[i],3) ; + source_term_x = source_term_x + ( 2.*i1*k0*kappa_l_p[i]*kappa_l_p[i] - 2.*i1*k0*(kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ) * dG_over_dx_fdtd; + source_term_x = source_term_x - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; + source_term_x = dt*dt*source_term_x / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // // Test ADE Scheme // ( *G_np1_pml )( i, j ) = 0. ; ( *G_np1_pml )( i, j ) = 1.*source_term_x - dt*dt*( *G_n_pml )( i, j )*( *Chi_n_pml )(i, j) ; @@ -506,26 +506,26 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 1. update u3 ( *u3_np1_l_pml )( i, j ) = +kappa_prime_l_p[i]*sigma_l_p[i] ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - sigma_prime_l_p[i]*kappa_l_p[i] ; - ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*pow(kappa_l_p[i],2) ; - ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) * -1. * pow(sigma_l_p[i],2) * dA_over_dx_fdtd / pow(kappa_l_p[i],4) ; + ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*kappa_l_p[i]*kappa_l_p[i] ; + ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) * -1. * sigma_l_p[i]*sigma_l_p[i] * dA_over_dx_fdtd / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_np1_l_pml )( i, j ) ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_nm1_l_pml )( i, j ) ; // 2. update u2 - ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+pow(kappa_l_p[i],2)*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dA_over_dx_fdtd ; + ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+kappa_l_p[i]*kappa_l_p[i]*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dA_over_dx_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - sigma_l_p[i]*kappa_l_p[i]*d2A_over_dx2_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) * sigma_l_p[i] ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_np1_l_pml )( i, j ) ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_nm1_l_pml )( i, j ) ; // 3. update u1 ( *u1_np1_l_pml )( i, j ) = -1.*( 3*kappa_prime_l_p[i]*sigma_l_p[i] - sigma_prime_l_p[i]*kappa_l_p[i] ) * dA_over_dx_fdtd ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*sigma_l_p[i]*kappa_l_p[i]*d2A_over_dx2 ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*pow(kappa_l_p[i],2) * dA_over_dx_fdtd ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*kappa_l_p[i]*kappa_l_p[i] * dA_over_dx_fdtd ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_np1_l_pml )( i, j ) ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_nm1_l_pml )( i, j ) ; @@ -533,11 +533,11 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // Envelop udpate with correction/source terms // ---- // 4.a update A : Correction/source terms - source_term_x = ( kappa_l_p[i] - pow(kappa_l_p[i],3) )*d2A_over_dx2_fdtd ; + source_term_x = ( kappa_l_p[i] - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) )*d2A_over_dx2_fdtd ; source_term_x = source_term_x - kappa_prime_l_p[i]*dA_over_dx_fdtd ; - source_term_x = source_term_x + ( 2.*i1*k0*pow(kappa_l_p[i],2) - 2.*i1*k0*pow(kappa_l_p[i],3) ) * dA_over_dx_fdtd; - source_term_x = source_term_x - pow(kappa_l_p[i],3)*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; - source_term_x = dt*dt*source_term_x / pow(kappa_l_p[i],3) ; + source_term_x = source_term_x + ( 2.*i1*k0*kappa_l_p[i]*kappa_l_p[i] - 2.*i1*k0*(kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ) * dA_over_dx_fdtd; + source_term_x = source_term_x - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; + source_term_x = dt*dt*source_term_x / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // ( *A_np1_pml )( i, j ) = 0. ; // 4.b Envelope FDTD with intermediate variable ( *A_np1_pml )( i, j ) = 1.*source_term_x - dt*dt*( *A_n_pml )( i, j )*( *Chi_n_pml )(i, j) ; @@ -552,33 +552,33 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // // // // // 1. update u3 // // // // ( *u3_np1_l_pml )( i, j ) = -kappa_prime_l_p[i]*sigma_l_p[i] ; // // // // ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) + sigma_prime_l_p[i]*kappa_l_p[i] ; - // // // // ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) + alpha_prime_l_p[i]*pow(kappa_l_p[i],2) ; - // // // // ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) * pow(sigma_l_p[i],2) * dG_over_dx / pow(kappa_l_p[i],4) ; + // // // // ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) + alpha_prime_l_p[i]*kappa_l_p[i]*kappa_l_p[i] ; + // // // // ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) * sigma_l_p[i]*sigma_l_p[i] * dG_over_dx / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // // // // // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave // // // // ( *u3_np1_l_pml )( i, j ) = ( ( *u3_np1_l_pml )( i, j ) - ( *u3_nm1_l_pml )( i, j )*( 1. + 0.5*dt*( i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i]) - 1. ) ; // // // // // 2. update u2 - // // // // ( *u2_np1_l_pml )( i, j ) = (2.*sigma_prime_l_p[i]*kappa_l_p[i]+pow(kappa_l_p[i],2)*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dG_over_dx ; + // // // // ( *u2_np1_l_pml )( i, j ) = (2.*sigma_prime_l_p[i]*kappa_l_p[i]+kappa_l_p[i]*kappa_l_p[i]*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dG_over_dx ; // // // // ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) + sigma_l_p[i]*kappa_l_p[i]*d2G_over_dx2 ; // // // // ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) * sigma_l_p[i] ; - // // // // ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; - // // // // ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + // // // // ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; + // // // // ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // // // // // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave // // // // ( *u2_np1_l_pml )( i, j ) = ( ( *u2_np1_l_pml )( i, j ) - ( *u2_nm1_l_pml )( i, j )*( 1. + 0.5*dt*( i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) ) / dt ) * dt / ( 0.5*dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i]) - 1. ) ; // // // // // 3. update u1 // // // // ( *u1_np1_l_pml )( i, j ) = ( sigma_prime_l_p[i]*kappa_l_p[i] - 3*kappa_prime_l_p[i]*sigma_l_p[i] ) * dG_over_dx ; // // // // ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*sigma_l_p[i]*kappa_l_p[i]*d2G_over_dx2 ; - // // // // ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; - // // // // ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + // // // // ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; + // // // // ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // // // // // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave // // // // ( *u1_np1_l_pml )( i, j ) = ( ( *u1_np1_l_pml )( i, j ) - ( *u1_nm1_l_pml )( i, j )*( 1. + 0.5*dt*( i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) ) / dt ) *dt / ( 0.5*dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i]) - 1. ) ; // // // // // ---- // // // // // Envelop udpate with correction/source terms // // // // // ---- // // // // // 4.a update A : Correction/source terms - // // // // source_term_x = ( kappa_l_p[i] - pow(kappa_l_p[i],3) )*d2G_over_dx2 ; + // // // // source_term_x = ( kappa_l_p[i] - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) )*d2G_over_dx2 ; // // // // source_term_x = source_term_x - kappa_prime_l_p[i]*dG_over_dx ; - // // // // source_term_x = source_term_x - pow(kappa_l_p[i],3)*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; - // // // // source_term_x = dt*dt*source_term_x / pow(kappa_l_p[i],3) ; + // // // // source_term_x = source_term_x - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; + // // // // source_term_x = dt*dt*source_term_x / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // // // // // Test ADE Scheme // // // // ( *G_np1_pml )( i, j ) = 0. ; // // // // ( *G_np1_pml )( i, j ) = 1.*source_term_x ; @@ -671,26 +671,26 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 1. update u3 ( *u3_np1_l_pml )( i, j ) = +kappa_prime_l_p[i]*sigma_l_p[i] ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - sigma_prime_l_p[i]*kappa_l_p[i] ; - ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*pow(kappa_l_p[i],2) ; - ( *u3_np1_l_pml )( i, j ) = - ( *u3_np1_l_pml )( i, j ) * pow(sigma_l_p[i],2) * dG_over_dx_fdtd / pow(kappa_l_p[i],4) ; + ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*kappa_l_p[i]*kappa_l_p[i] ; + ( *u3_np1_l_pml )( i, j ) = - ( *u3_np1_l_pml )( i, j ) * sigma_l_p[i]*sigma_l_p[i] * dG_over_dx_fdtd / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_np1_l_pml )( i, j ) ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_nm1_l_pml )( i, j ) ; // 2. update u2 - ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+pow(kappa_l_p[i],2)*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dG_over_dx_fdtd ; + ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+kappa_l_p[i]*kappa_l_p[i]*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dG_over_dx_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - sigma_l_p[i]*kappa_l_p[i]*d2G_over_dx2_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) * sigma_l_p[i] ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_np1_l_pml )( i, j ) ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_nm1_l_pml )( i, j ) ; // 3. update u1 ( *u1_np1_l_pml )( i, j ) = -1.*( 3*kappa_prime_l_p[i]*sigma_l_p[i] - sigma_prime_l_p[i]*kappa_l_p[i] ) * dG_over_dx_fdtd ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*sigma_l_p[i]*kappa_l_p[i]*d2G_over_dx2_fdtd ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*pow(kappa_l_p[i],2) * dG_over_dx_fdtd ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*kappa_l_p[i]*kappa_l_p[i] * dG_over_dx_fdtd ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_np1_l_pml )( i, j ) ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_nm1_l_pml )( i, j ) ; @@ -700,26 +700,26 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // 1. update u3 ( *u3_np1_r_pml )( i, j ) = +kappa_prime_r_p[j]*sigma_r_p[j] ; ( *u3_np1_r_pml )( i, j ) = ( *u3_np1_r_pml )( i, j ) - sigma_prime_r_p[j]*kappa_r_p[j] ; - ( *u3_np1_r_pml )( i, j ) = ( *u3_np1_r_pml )( i, j ) - alpha_prime_r_p[j]*pow(kappa_r_p[j],2) ; - ( *u3_np1_r_pml )( i, j ) = - ( *u3_np1_r_pml )( i, j ) * pow(sigma_r_p[j],2) * dG_over_dy / pow(kappa_r_p[j],4) ; + ( *u3_np1_r_pml )( i, j ) = ( *u3_np1_r_pml )( i, j ) - alpha_prime_r_p[j]*kappa_r_p[j]*kappa_r_p[j] ; + ( *u3_np1_r_pml )( i, j ) = - ( *u3_np1_r_pml )( i, j ) * sigma_r_p[j]*sigma_r_p[j] * dG_over_dy / (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_r_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u3_np1_r_pml )( i, j ) ; ( *u3_np1_r_pml )( i, j ) = ( *u3_np1_r_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )/( 2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u3_nm1_r_pml )( i, j ) ; // 2. update u2 - ( *u2_np1_r_pml )( i, j ) = -1.*(2.*sigma_prime_r_p[j]*kappa_r_p[j]+pow(kappa_r_p[j],2)*alpha_prime_r_p[j]-3.*kappa_prime_r_p[j]*sigma_r_p[j])*dG_over_dy ; + ( *u2_np1_r_pml )( i, j ) = -1.*(2.*sigma_prime_r_p[j]*kappa_r_p[j]+kappa_r_p[j]*kappa_r_p[j]*alpha_prime_r_p[j]-3.*kappa_prime_r_p[j]*sigma_r_p[j])*dG_over_dy ; ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) - sigma_r_p[j]*kappa_r_p[j]*d2G_over_dy2 ; ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) * sigma_r_p[j] ; - ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) - pow(kappa_r_p[j],3)*0.5*( ( *u3_np1_r_pml )( i, j ) + ( *u3_nm1_r_pml )( i, j ) ) ; - ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) / pow(kappa_r_p[j],4) ; + ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) - (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j])*0.5*( ( *u3_np1_r_pml )( i, j ) + ( *u3_nm1_r_pml )( i, j ) ) ; + ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) / (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_r_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u2_np1_r_pml )( i, j ) ; ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )/( 2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u2_nm1_r_pml )( i, j ) ; // 3. update u1 ( *u1_np1_r_pml )( i, j ) = -1.*( 3*kappa_prime_r_p[j]*sigma_r_p[j] - sigma_prime_r_p[j]*kappa_r_p[j] ) * dG_over_dy ; ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) + 2.*sigma_r_p[j]*kappa_r_p[j]*d2G_over_dy2 ; - ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) - sigma_r_p[j]*pow(kappa_r_p[j],2) * dA_over_dy ; - ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) - pow(kappa_r_p[j],3)*0.5*( ( *u2_np1_r_pml )( i, j ) + ( *u2_nm1_r_pml )( i, j ) ) ; - ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) / pow(kappa_r_p[j],4) ; + ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) - sigma_r_p[j]*kappa_r_p[j]*kappa_r_p[j] * dA_over_dy ; + ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) - (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j])*0.5*( ( *u2_np1_r_pml )( i, j ) + ( *u2_nm1_r_pml )( i, j ) ) ; + ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) / (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_r_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u1_np1_r_pml )( i, j ) ; ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )/( 2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u1_nm1_r_pml )( i, j ) ; @@ -727,17 +727,17 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // Envelop udpate with correction/source terms // ---- // 4.a update A : Correction/source terms - source_term_x = ( kappa_l_p[i] - pow(kappa_l_p[i],3) )*d2G_over_dx2_fdtd ; + source_term_x = ( kappa_l_p[i] - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) )*d2G_over_dx2_fdtd ; source_term_x = source_term_x - kappa_prime_l_p[i]*dG_over_dx_fdtd ; - source_term_x = source_term_x + ( 2.*i1*k0*pow(kappa_l_p[i],2) - 2.*i1*k0*pow(kappa_l_p[i],3) ) * dG_over_dx_fdtd; - source_term_x = source_term_x + pow(kappa_l_p[i],3)*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; - source_term_x = dt*dt*source_term_x / pow(kappa_l_p[i],3) ; + source_term_x = source_term_x + ( 2.*i1*k0*kappa_l_p[i]*kappa_l_p[i] - 2.*i1*k0*(kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ) * dG_over_dx_fdtd; + source_term_x = source_term_x + (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; + source_term_x = dt*dt*source_term_x / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; - source_term_y = ( kappa_r_p[j] - pow(kappa_r_p[j],3) )*d2G_over_dy2 ; + source_term_y = ( kappa_r_p[j] - (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) )*d2G_over_dy2 ; source_term_y = source_term_y - kappa_prime_r_p[j]*dG_over_dy ; - source_term_y = source_term_y + ( pow(kappa_r_p[j],3) - pow(kappa_r_p[j],2) )*dA_over_dy ; - source_term_y = source_term_y + pow(kappa_r_p[j],3)*0.5*( ( *u1_np1_r_pml )( i, j ) + ( *u1_nm1_r_pml )( i, j ) ) ; - source_term_y = dt*dt*source_term_y / pow(kappa_r_p[j],3) ; + source_term_y = source_term_y + ( (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) - kappa_r_p[j]*kappa_r_p[j] )*dA_over_dy ; + source_term_y = source_term_y + (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j])*0.5*( ( *u1_np1_r_pml )( i, j ) + ( *u1_nm1_r_pml )( i, j ) ) ; + source_term_y = dt*dt*source_term_y / (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) ; // ---- // Test ADE Scheme // ( *G_np1_pml )( i, j ) = 0 ; // No decay diff --git a/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp b/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp index c2a5c4087..0a11858c2 100644 --- a/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp +++ b/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp @@ -435,26 +435,26 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en // 1. update u3 ( *u3_np1_l_pml )( i, j ) = +kappa_prime_l_p[i]*sigma_l_p[i] ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - sigma_prime_l_p[i]*kappa_l_p[i] ; - ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*pow(kappa_l_p[i],2) ; - ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) * -1. * pow(sigma_l_p[i],2) * dG_over_dx_fdtd / pow(kappa_l_p[i],4) ; + ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*kappa_l_p[i]*kappa_l_p[i] ; + ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) * -1. * sigma_l_p[i]*sigma_l_p[i] * dG_over_dx_fdtd / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_np1_l_pml )( i, j ) ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_nm1_l_pml )( i, j ) ; // 2. update u2 - ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+pow(kappa_l_p[i],2)*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dG_over_dx_fdtd ; + ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+kappa_l_p[i]*kappa_l_p[i]*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dG_over_dx_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - sigma_l_p[i]*kappa_l_p[i]*d2G_over_dx2_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) * sigma_l_p[i] ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_np1_l_pml )( i, j ) ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_nm1_l_pml )( i, j ) ; // 3. update u1 ( *u1_np1_l_pml )( i, j ) = -1.*( 3*kappa_prime_l_p[i]*sigma_l_p[i] - sigma_prime_l_p[i]*kappa_l_p[i] ) * dG_over_dx_fdtd ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*sigma_l_p[i]*kappa_l_p[i]*d2G_over_dx2 ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*pow(kappa_l_p[i],2) * dG_over_dx_fdtd ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*kappa_l_p[i]*kappa_l_p[i] * dG_over_dx_fdtd ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_np1_l_pml )( i, j ) ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_nm1_l_pml )( i, j ) ; @@ -462,11 +462,11 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en // Envelop udpate with correction/source terms // ---- // 4.a update A : Correction/source terms - source_term_x = ( kappa_l_p[i] - pow(kappa_l_p[i],3) )*d2G_over_dx2_fdtd ; + source_term_x = ( kappa_l_p[i] - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) )*d2G_over_dx2_fdtd ; source_term_x = source_term_x - kappa_prime_l_p[i]*dG_over_dx_fdtd ; - source_term_x = source_term_x + ( 2.*i1*k0*pow(kappa_l_p[i],2) - 2.*i1*k0*pow(kappa_l_p[i],3) ) * dG_over_dx_fdtd; - source_term_x = source_term_x - pow(kappa_l_p[i],3)*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; - source_term_x = dt*dt*source_term_x / pow(kappa_l_p[i],3) ; + source_term_x = source_term_x + ( 2.*i1*k0*kappa_l_p[i]*kappa_l_p[i] - 2.*i1*k0*(kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ) * dG_over_dx_fdtd; + source_term_x = source_term_x - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; + source_term_x = dt*dt*source_term_x / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // // Test ADE Scheme // ( *G_np1_pml )( i, j ) = 0. ; ( *G_np1_pml )( i, j ) = 1.*source_term_x - dt*dt*( *G_n_pml )( i, j )*( *Chi_n_pml )(i, j) ; @@ -516,26 +516,26 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en // 1. update u3 ( *u3_np1_l_pml )( i, j ) = +kappa_prime_l_p[i]*sigma_l_p[i] ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - sigma_prime_l_p[i]*kappa_l_p[i] ; - ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*pow(kappa_l_p[i],2) ; - ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) * -1. * pow(sigma_l_p[i],2) * dA_over_dx_fdtd / pow(kappa_l_p[i],4) ; + ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*kappa_l_p[i]*kappa_l_p[i] ; + ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) * -1. * sigma_l_p[i]*sigma_l_p[i] * dA_over_dx_fdtd / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_np1_l_pml )( i, j ) ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_nm1_l_pml )( i, j ) ; // 2. update u2 - ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+pow(kappa_l_p[i],2)*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dA_over_dx_fdtd ; + ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+kappa_l_p[i]*kappa_l_p[i]*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dA_over_dx_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - sigma_l_p[i]*kappa_l_p[i]*d2A_over_dx2_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) * sigma_l_p[i] ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_np1_l_pml )( i, j ) ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_nm1_l_pml )( i, j ) ; // 3. update u1 ( *u1_np1_l_pml )( i, j ) = -1.*( 3*kappa_prime_l_p[i]*sigma_l_p[i] - sigma_prime_l_p[i]*kappa_l_p[i] ) * dA_over_dx_fdtd ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*sigma_l_p[i]*kappa_l_p[i]*d2A_over_dx2 ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*pow(kappa_l_p[i],2) * dA_over_dx_fdtd ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*kappa_l_p[i]*kappa_l_p[i] * dA_over_dx_fdtd ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_np1_l_pml )( i, j ) ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_nm1_l_pml )( i, j ) ; @@ -543,11 +543,11 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en // Envelop udpate with correction/source terms // ---- // 4.a update A : Correction/source terms - source_term_x = ( kappa_l_p[i] - pow(kappa_l_p[i],3) )*d2A_over_dx2_fdtd ; + source_term_x = ( kappa_l_p[i] - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) )*d2A_over_dx2_fdtd ; source_term_x = source_term_x - kappa_prime_l_p[i]*dA_over_dx_fdtd ; - source_term_x = source_term_x + ( 2.*i1*k0*pow(kappa_l_p[i],2) - 2.*i1*k0*pow(kappa_l_p[i],3) ) * dA_over_dx_fdtd; - source_term_x = source_term_x - pow(kappa_l_p[i],3)*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; - source_term_x = dt*dt*source_term_x / pow(kappa_l_p[i],3) ; + source_term_x = source_term_x + ( 2.*i1*k0*kappa_l_p[i]*kappa_l_p[i] - 2.*i1*k0*(kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ) * dA_over_dx_fdtd; + source_term_x = source_term_x - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; + source_term_x = dt*dt*source_term_x / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // ( *A_np1_pml )( i, j ) = 0. ; // 4.b Envelope FDTD with intermediate variable ( *A_np1_pml )( i, j ) = 1.*source_term_x - dt*dt*( *A_n_pml )( i, j )*( *Chi_n_pml )(i, j) ; @@ -627,26 +627,26 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en // 1. update u3 ( *u3_np1_l_pml )( i, j ) = +kappa_prime_l_p[i]*sigma_l_p[i] ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - sigma_prime_l_p[i]*kappa_l_p[i] ; - ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*pow(kappa_l_p[i],2) ; - ( *u3_np1_l_pml )( i, j ) = - ( *u3_np1_l_pml )( i, j ) * pow(sigma_l_p[i],2) * dG_over_dx_fdtd / pow(kappa_l_p[i],4) ; + ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) - alpha_prime_l_p[i]*kappa_l_p[i]*kappa_l_p[i] ; + ( *u3_np1_l_pml )( i, j ) = - ( *u3_np1_l_pml )( i, j ) * sigma_l_p[i]*sigma_l_p[i] * dG_over_dx_fdtd / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_np1_l_pml )( i, j ) ; ( *u3_np1_l_pml )( i, j ) = ( *u3_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u3_nm1_l_pml )( i, j ) ; // 2. update u2 - ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+pow(kappa_l_p[i],2)*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dG_over_dx_fdtd ; + ( *u2_np1_l_pml )( i, j ) = -1*(2.*sigma_prime_l_p[i]*kappa_l_p[i]+kappa_l_p[i]*kappa_l_p[i]*alpha_prime_l_p[i]-3.*kappa_prime_l_p[i]*sigma_l_p[i])*dG_over_dx_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - sigma_l_p[i]*kappa_l_p[i]*d2G_over_dx2_fdtd ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) * sigma_l_p[i] ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; - ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u3_np1_l_pml )( i, j ) + ( *u3_nm1_l_pml )( i, j ) ) ; + ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_np1_l_pml )( i, j ) ; ( *u2_np1_l_pml )( i, j ) = ( *u2_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u2_nm1_l_pml )( i, j ) ; // 3. update u1 ( *u1_np1_l_pml )( i, j ) = -1.*( 3*kappa_prime_l_p[i]*sigma_l_p[i] - sigma_prime_l_p[i]*kappa_l_p[i] ) * dG_over_dx_fdtd ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*sigma_l_p[i]*kappa_l_p[i]*d2G_over_dx2_fdtd ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*pow(kappa_l_p[i],2) * dG_over_dx_fdtd ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - pow(kappa_l_p[i],3)*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; - ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / pow(kappa_l_p[i],4) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + 2.*i1*k0*sigma_l_p[i]*kappa_l_p[i]*kappa_l_p[i] * dG_over_dx_fdtd ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u2_np1_l_pml )( i, j ) + ( *u2_nm1_l_pml )( i, j ) ) ; + ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_l_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_np1_l_pml )( i, j ) ; ( *u1_np1_l_pml )( i, j ) = ( *u1_np1_l_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )/( 2.-dt*(i1*k0 + alpha_l_p[i]+sigma_l_p[i]/kappa_l_p[i] ) )*( *u1_nm1_l_pml )( i, j ) ; @@ -656,26 +656,26 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en // 1. update u3 ( *u3_np1_r_pml )( i, j ) = +kappa_prime_r_p[j]*sigma_r_p[j] ; ( *u3_np1_r_pml )( i, j ) = ( *u3_np1_r_pml )( i, j ) - sigma_prime_r_p[j]*kappa_r_p[j] ; - ( *u3_np1_r_pml )( i, j ) = ( *u3_np1_r_pml )( i, j ) - alpha_prime_r_p[j]*pow(kappa_r_p[j],2) ; - ( *u3_np1_r_pml )( i, j ) = - ( *u3_np1_r_pml )( i, j ) * pow(sigma_r_p[j],2) * dG_over_dy / pow(kappa_r_p[j],4) ; + ( *u3_np1_r_pml )( i, j ) = ( *u3_np1_r_pml )( i, j ) - alpha_prime_r_p[j]*kappa_r_p[j]*kappa_r_p[j] ; + ( *u3_np1_r_pml )( i, j ) = - ( *u3_np1_r_pml )( i, j ) * pow(sigma_r_p[j],2) * dG_over_dy / (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) ; // time operation on u3 : Be carefull, u3 has to be considered like an envelop * a carrier wave ( *u3_np1_r_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u3_np1_r_pml )( i, j ) ; ( *u3_np1_r_pml )( i, j ) = ( *u3_np1_r_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )/( 2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u3_nm1_r_pml )( i, j ) ; // 2. update u2 - ( *u2_np1_r_pml )( i, j ) = -1.*(2.*sigma_prime_r_p[j]*kappa_r_p[j]+pow(kappa_r_p[j],2)*alpha_prime_r_p[j]-3.*kappa_prime_r_p[j]*sigma_r_p[j])*dG_over_dy ; + ( *u2_np1_r_pml )( i, j ) = -1.*(2.*sigma_prime_r_p[j]*kappa_r_p[j]+kappa_r_p[j]*kappa_r_p[j]*alpha_prime_r_p[j]-3.*kappa_prime_r_p[j]*sigma_r_p[j])*dG_over_dy ; ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) - sigma_r_p[j]*kappa_r_p[j]*d2G_over_dy2 ; ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) * sigma_r_p[j] ; - ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) - pow(kappa_r_p[j],3)*0.5*( ( *u3_np1_r_pml )( i, j ) + ( *u3_nm1_r_pml )( i, j ) ) ; - ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) / pow(kappa_r_p[j],4) ; + ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) - (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j])*0.5*( ( *u3_np1_r_pml )( i, j ) + ( *u3_nm1_r_pml )( i, j ) ) ; + ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) / (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) ; // time operation on u2 : Be carefull, u2 has to be considered like an envelop * a carrier wave ( *u2_np1_r_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u2_np1_r_pml )( i, j ) ; ( *u2_np1_r_pml )( i, j ) = ( *u2_np1_r_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )/( 2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u2_nm1_r_pml )( i, j ) ; // 3. update u1 ( *u1_np1_r_pml )( i, j ) = -1.*( 3*kappa_prime_r_p[j]*sigma_r_p[j] - sigma_prime_r_p[j]*kappa_r_p[j] ) * dG_over_dy ; ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) + 2.*sigma_r_p[j]*kappa_r_p[j]*d2G_over_dy2 ; - ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) - sigma_r_p[j]*pow(kappa_r_p[j],2) * dA_over_dy ; - ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) - pow(kappa_r_p[j],3)*0.5*( ( *u2_np1_r_pml )( i, j ) + ( *u2_nm1_r_pml )( i, j ) ) ; - ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) / pow(kappa_r_p[j],4) ; + ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) - sigma_r_p[j]*kappa_r_p[j]*kappa_r_p[j] * dA_over_dy ; + ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) - (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j])*0.5*( ( *u2_np1_r_pml )( i, j ) + ( *u2_nm1_r_pml )( i, j ) ) ; + ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) / (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) ; // time operation on u1 : Be carefull, u1 has to be considered like an envelop * a carrier wave ( *u1_np1_r_pml )( i, j ) = (2.*dt)/(2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u1_np1_r_pml )( i, j ) ; ( *u1_np1_r_pml )( i, j ) = ( *u1_np1_r_pml )( i, j ) + ( 2.+dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )/( 2.-dt*(i1*k0 + alpha_r_p[j]+sigma_r_p[j]/kappa_r_p[j] ) )*( *u1_nm1_r_pml )( i, j ) ; @@ -683,17 +683,17 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en // Envelop udpate with correction/source terms // ---- // 4.a update A : Correction/source terms - source_term_x = ( kappa_l_p[i] - pow(kappa_l_p[i],3) )*d2G_over_dx2_fdtd ; + source_term_x = ( kappa_l_p[i] - (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) )*d2G_over_dx2_fdtd ; source_term_x = source_term_x - kappa_prime_l_p[i]*dG_over_dx_fdtd ; - source_term_x = source_term_x + ( 2.*i1*k0*pow(kappa_l_p[i],2) - 2.*i1*k0*pow(kappa_l_p[i],3) ) * dG_over_dx_fdtd; - source_term_x = source_term_x + pow(kappa_l_p[i],3)*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; - source_term_x = dt*dt*source_term_x / pow(kappa_l_p[i],3) ; + source_term_x = source_term_x + ( 2.*i1*k0*kappa_l_p[i]*kappa_l_p[i] - 2.*i1*k0*(kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ) * dG_over_dx_fdtd; + source_term_x = source_term_x + (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i])*0.5*( ( *u1_np1_l_pml )( i, j ) + ( *u1_nm1_l_pml )( i, j ) ) ; + source_term_x = dt*dt*source_term_x / (kappa_l_p[i]*kappa_l_p[i]*kappa_l_p[i]) ; - source_term_y = ( kappa_r_p[j] - pow(kappa_r_p[j],3) )*d2G_over_dy2 ; + source_term_y = ( kappa_r_p[j] - (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) )*d2G_over_dy2 ; source_term_y = source_term_y - kappa_prime_r_p[j]*dG_over_dy ; - source_term_y = source_term_y + ( pow(kappa_r_p[j],3) - pow(kappa_r_p[j],2) )*dA_over_dy ; - source_term_y = source_term_y + pow(kappa_r_p[j],3)*0.5*( ( *u1_np1_r_pml )( i, j ) + ( *u1_nm1_r_pml )( i, j ) ) ; - source_term_y = dt*dt*source_term_y / pow(kappa_r_p[j],3) ; + source_term_y = source_term_y + ( (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) - kappa_r_p[j]*kappa_r_p[j] )*dA_over_dy ; + source_term_y = source_term_y + (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j])*0.5*( ( *u1_np1_r_pml )( i, j ) + ( *u1_nm1_r_pml )( i, j ) ) ; + source_term_y = dt*dt*source_term_y / (kappa_r_p[j]*kappa_r_p[j]*kappa_r_p[j]) ; // ---- // Test ADE Scheme // ( *G_np1_pml )( i, j ) = 0 ; // No decay diff --git a/src/Interpolator/Interpolator1D2Order.cpp b/src/Interpolator/Interpolator1D2Order.cpp index f85e735de..a252bda1e 100755 --- a/src/Interpolator/Interpolator1D2Order.cpp +++ b/src/Interpolator/Interpolator1D2Order.cpp @@ -596,7 +596,7 @@ void Interpolator1D2Order::envelopeFieldForIonization( ElectroMagn *EMfields, Pa // Primal idx_p[0] = round( xpn ); // index of the central point delta_p[0] = xpn -( double )idx_p[0]; // normalized distance to the central node - delta2 = pow( delta_p[0], 2 ); // square of the normalized distance to the central node + delta2 = delta_p[0] * delta_p[0]; // square of the normalized distance to the central node // 2nd order interpolation on 3 nodes coeffxp[0] = 0.5 * ( delta2-delta_p[0]+0.25 ); diff --git a/src/Interpolator/Interpolator1D2Order.h b/src/Interpolator/Interpolator1D2Order.h index 44e6651d4..a4f62e945 100755 --- a/src/Interpolator/Interpolator1D2Order.h +++ b/src/Interpolator/Interpolator1D2Order.h @@ -69,7 +69,7 @@ class Interpolator1D2Order final : public Interpolator1D coeffxd[2] = 0.5 * ( delta2 + delta + 0.25 ); delta = xpn - static_cast( idx_p[0] ); - delta2 = delta * delta; // pow( delta_p[0], 2 ); // square of the normalized distance to the central node + delta2 = delta * delta; // pow ( delta_p[0], 2 ); // square of the normalized distance to the central node delta_p[0] = delta; // normalized distance to the central node coeffxp[0] = 0.5 * ( delta2 - delta_p[0] + 0.25 ); diff --git a/src/Ionization/IonizationTunnel.cpp b/src/Ionization/IonizationTunnel.cpp index abadcc5bd..e94ec51a4 100755 --- a/src/Ionization/IonizationTunnel.cpp +++ b/src/Ionization/IonizationTunnel.cpp @@ -39,7 +39,7 @@ IonizationTunnel::IonizationTunnel( Params ¶ms, Species *species ) : Ionizat double cst = ( ( double )Z+1.0 ) * sqrt( 2.0/Potential[Z] ); alpha_tunnel[Z] = cst-1.0; beta_tunnel[Z] = pow( 2, alpha_tunnel[Z] ) * ( 8.*Azimuthal_quantum_number[Z]+4.0 ) / ( cst*tgamma( cst ) ) * Potential[Z] * au_to_w0; - gamma_tunnel[Z] = 2.0 * pow( 2.0*Potential[Z], 1.5 ); + gamma_tunnel[Z] = 2.0 * sqrt( 2.0*Potential[Z] * 2.0*Potential[Z] * 2.0*Potential[Z] ); } DEBUG( "Finished Creating the Tunnel Ionizaton class" ); @@ -73,9 +73,9 @@ void IonizationTunnel::operator()( Particles *particles, unsigned int ipart_min, } // Absolute value of the electric field normalized in atomic units - E = EC_to_au * sqrt( pow( *( Ex+ipart-ipart_ref ), 2 ) - +pow( *( Ey+ipart-ipart_ref ), 2 ) - +pow( *( Ez+ipart-ipart_ref ), 2 ) ); + E = EC_to_au * sqrt( (*( Ex+ipart-ipart_ref )) * (*( Ex+ipart-ipart_ref )) + + (*( Ey+ipart-ipart_ref )) * (*( Ey+ipart-ipart_ref )) + + (*( Ez+ipart-ipart_ref )) * (*( Ez+ipart-ipart_ref )) ); if( E<1e-10 ) { continue; } @@ -208,9 +208,9 @@ void IonizationTunnel::ionizationTunnelWithTasks( Particles *particles, unsigned } // Absolute value of the electric field normalized in atomic units - E = EC_to_au * sqrt( pow( *( Ex+ipart-ipart_ref ), 2 ) - +pow( *( Ey+ipart-ipart_ref ), 2 ) - +pow( *( Ez+ipart-ipart_ref ), 2 ) ); + E = EC_to_au * sqrt( (*( Ex+ipart-ipart_ref )) * (*( Ex+ipart-ipart_ref )) + + (*( Ey+ipart-ipart_ref )) * (*( Ey+ipart-ipart_ref )) + + (*( Ez+ipart-ipart_ref )) * (*( Ez+ipart-ipart_ref )) ); if( E<1e-10 ) { continue; } diff --git a/src/Ionization/IonizationTunnelEnvelopeAveraged.cpp b/src/Ionization/IonizationTunnelEnvelopeAveraged.cpp index f53ec02f2..b3993a736 100644 --- a/src/Ionization/IonizationTunnelEnvelopeAveraged.cpp +++ b/src/Ionization/IonizationTunnelEnvelopeAveraged.cpp @@ -40,8 +40,8 @@ IonizationTunnelEnvelopeAveraged::IonizationTunnelEnvelopeAveraged( Params ¶ double cst = ( ( double )Z+1.0 ) * sqrt( 2.0/Potential[Z] ); alpha_tunnel[Z] = cst-1.0; // 2(n^*)-1 beta_tunnel[Z] = pow( 2, alpha_tunnel[Z] ) * ( 8.*Azimuthal_quantum_number[Z]+4.0 ) / ( cst*tgamma( cst ) ) * Potential[Z] * au_to_w0; - gamma_tunnel[Z] = 2.0 * pow( 2.0*Potential[Z], 1.5 ); // 2*(2I_p)^{3/2} - Ip_times2_to_minus3ov4[Z] = pow(2.*Potential[Z],-0.75); // (2I_p)^{-3/4} + gamma_tunnel[Z] = 2.0 * sqrt( 2.0*Potential[Z] * 2.0*Potential[Z] * 2.0*Potential[Z] ); // 2*(2I_p)^{3/2} + Ip_times2_to_minus3ov4[Z] = 1.0 / sqrt(sqrt((2.*Potential[Z] * 2.*Potential[Z] * 2.*Potential[Z]))); // (2I_p)^{-3/4} } ellipticity = params.envelope_ellipticity; @@ -82,13 +82,14 @@ void IonizationTunnelEnvelopeAveraged::envelopeIonization( Particles *particles, // Absolute value of the electric field |E_plasma| (from the plasma) normalized in atomic units - E_sq = pow(EC_to_au,2) * (pow( *( Ex+ipart-ipart_ref ), 2 ) - +pow( *( Ey+ipart-ipart_ref ), 2 ) - +pow( *( Ez+ipart-ipart_ref ), 2 ) ); + E_sq = (EC_to_au * EC_to_au) * ( ( *( Ex+ipart-ipart_ref ) ) * ( *( Ex+ipart-ipart_ref ) ) + + ( *( Ey+ipart-ipart_ref ) ) * ( *( Ey+ipart-ipart_ref ) ) + + ( *( Ez+ipart-ipart_ref ) ) * ( *( Ez+ipart-ipart_ref ) ) ); // Laser envelope electric field normalized in atomic units, using both transverse and longitudinal components: // |E_envelope|^2 = |Env_E|^2 + |Env_Ex|^2 - EnvE_sq = pow(EC_to_au,2)*( pow( *( E_env+ipart-ipart_ref ), 2 ) ) + pow(EC_to_au,2)*( pow( *( Ex_env+ipart-ipart_ref ), 2 ) ); + EnvE_sq = (EC_to_au * EC_to_au) * ( *( E_env+ipart-ipart_ref )) * ( *( E_env+ipart-ipart_ref ) ) + (EC_to_au * EC_to_au) * + ( *( Ex_env+ipart-ipart_ref ) ) * ( *( Ex_env+ipart-ipart_ref ) ); // Effective electric field for ionization: // |E| = sqrt(|E_plasma|^2+|E_envelope|^2) @@ -109,7 +110,7 @@ void IonizationTunnelEnvelopeAveraged::envelopeIonization( Particles *particles, // Corrections on averaged ionization rate given by the polarization ellipticity if( ellipticity==0. ){ // linear polarization - coeff_ellipticity_in_ionization_rate = pow((3./M_PI)/delta*2.,0.5); + coeff_ellipticity_in_ionization_rate = sqrt((3./M_PI)/delta*2.); } else if( ellipticity==1. ){ // circular polarization coeff_ellipticity_in_ionization_rate = 1.; // for circular polarization, the ionization rate is unchanged } @@ -145,8 +146,8 @@ void IonizationTunnelEnvelopeAveraged::envelopeIonization( Particles *particles, // Corrections on averaged ionization rate given by the polarization ellipticity if( ellipticity==0. ){ // linear polarization - //coeff_ellipticity_in_ionization_rate = pow((3./M_PI)/(gamma_tunnel[newZ-1]*invE)*2.,0.5); - coeff_ellipticity_in_ionization_rate = pow((3./M_PI)/delta*2.,0.5); + //coeff_ellipticity_in_ionization_rate = sqrt((3./M_PI)/(gamma_tunnel[newZ-1]*invE)*2.); + coeff_ellipticity_in_ionization_rate = sqrt((3./M_PI)/delta*2.); } else if( ellipticity==1. ){ // circular polarization coeff_ellipticity_in_ionization_rate = 1.; // for circular polarization, the ionization rate is unchanged } diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp index 8136f36ff..98c316337 100755 --- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp +++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp @@ -448,7 +448,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, for( int ipair=i_pair_start; ipair < i_pair_start+mBW_pair_creation_sampling_[0]; ipair++ ) { // Momentum - const double p = std::sqrt( std::pow( 1.+pair_chi[0]*inv_chiph_gammaph, 2 )-1 ); + const double p = std::sqrt( ( 1.+pair_chi[0]*inv_chiph_gammaph)*( 1.+pair_chi[0]*inv_chiph_gammaph) - 1 ); pair0_momentum_x[ipair] = p*ux; pair0_momentum_y[ipair] = p*uy; pair0_momentum_z[ipair] = p*uz; @@ -511,7 +511,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, for( auto ipair=i_pair_start; ipair < i_pair_start + mBW_pair_creation_sampling_[1]; ipair++ ) { // Momentum - const double p = std::sqrt( std::pow( 1.+pair_chi[1]*inv_chiph_gammaph, 2 )-1 ); + const double p = std::sqrt( ( 1.+pair_chi[1]*inv_chiph_gammaph) * ( 1.+pair_chi[1]*inv_chiph_gammaph) - 1 ); pair1_momentum_x[ipair] = p*ux; pair1_momentum_y[ipair] = p*uy; pair1_momentum_z[ipair] = p*uz; @@ -569,7 +569,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, for( int idNew=nparticles-mBW_pair_creation_sampling_[k]; idNew= T_.size_-1 ) { ichiph = T_.size_-2; - dNBWdt = 2.067731275227008*std::pow( photon_chi, 5.0/3.0 ); + dNBWdt = 2.067731275227008 * cbrt(photon_chi*photon_chi*photon_chi*photon_chi*photon_chi); } else { // Upper and lower values for linear interpolation const double logchiphm = ichiph*T_.delta_ + T_.log10_min_; diff --git a/src/Params/Params.cpp b/src/Params/Params.cpp index 9128a5c11..d19af4e81 100755 --- a/src/Params/Params.cpp +++ b/src/Params/Params.cpp @@ -1839,7 +1839,7 @@ void Params::multiple_decompose_3D() // Number of domain in 3D // Decomposition in 2 times, X and larger side double tmp = (double)(number_of_patches[0]*number_of_patches[0]) / (double)(number_of_patches[1]*number_of_patches[2]); - number_of_region[0] = min( sz, max(1, (int) pow( (double)sz*tmp, 1./3. ) ) ); + number_of_region[0] = min( sz, max(1, (int) (cbrt (sz*tmp)) ) ); int rest = (int)(sz / number_of_region[0]); while ( (int)number_of_region[0]*rest != sz ) { diff --git a/src/ParticleBC/BoundaryConditionType.h b/src/ParticleBC/BoundaryConditionType.h index 137438eb7..25038e8f1 100755 --- a/src/ParticleBC/BoundaryConditionType.h +++ b/src/ParticleBC/BoundaryConditionType.h @@ -25,14 +25,6 @@ inline double perp_rand( Random * rand ) { return a; } - -/*inline double perp_rand_gpu( Random * rand ) { - double a = userFunctions::erfinv_v3( rand->uniform1() ); - if( rand->cointoss() ) { - a *= -1.; - } - return a; -}*/ /** * copied from erfinv_DP_1.cu by Prof. Mike Giles. * https://people.maths.ox.ac.uk/gilesm/ @@ -148,9 +140,6 @@ inline double perp_rand_gpu_v3(uint32_t xorshift32_state) { } return a; } -//*/ - - void internal_inf( Species *species, int imin, int imax, int direction, double limit_inf, double dt, std::vector &invgf, Random * rand, double &energy_change ); diff --git a/src/Particles/ParticleCreator.cpp b/src/Particles/ParticleCreator.cpp index 270d0fee1..dc12e6089 100644 --- a/src/Particles/ParticleCreator.cpp +++ b/src/Particles/ParticleCreator.cpp @@ -843,7 +843,7 @@ void ParticleCreator::createMomentum( std::string momentum_initialization, for( unsigned int p=iPart; puniform2() ); double theta = rand->uniform_2pi(); - double psm = sqrt( pow( 1.0+energies[p-iPart], 2 )-1.0 ); + double psm = sqrt( ( 1.0 + energies[p-iPart]) * ( 1.0 + energies[p-iPart]) - 1.0 ); particles->momentum( 0, p ) = psm*cos( theta )*sin( phi ); particles->momentum( 1, p ) = psm*sin( theta )*sin( phi ); @@ -1018,7 +1018,7 @@ std::vector ParticleCreator::maxwellJuttner( Species * species, unsigned // Calculate the inverse of F lnlnU = log( -log( U ) ); if( lnlnU>2. ) { - invF = 3.*sqrt( M_PI )/4. * pow( U, 2./3. ); + invF = 3.*sqrt( M_PI )/4. * cbrt( U*U ); } else if( lnlnU<-19. ) { invF = 1.; } else { @@ -1047,7 +1047,7 @@ std::vector ParticleCreator::maxwellJuttner( Species * species, unsigned // Calculate the inverse of H at the point log(1.-U) + H0 lnU = log( -log( 1.-U ) - H0 ); if( lnU<-26. ) { - invH = pow( -6.*U, 1./3. ); + invH = cbrt( -6.*U ); } else if( lnU>12. ) { invH = -U + 11.35 * pow( -U, 0.06 ); } else { diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h index 20b9c2ea6..f1b327f3c 100755 --- a/src/Particles/Particles.h +++ b/src/Particles/Particles.h @@ -310,7 +310,7 @@ class Particles //! Method used to get the Particle Lorentz factor inline double LorentzFactor( unsigned int ipart ) { - return sqrt( 1.+pow( momentum( 0, ipart ), 2 )+pow( momentum( 1, ipart ), 2 )+pow( momentum( 2, ipart ), 2 ) ); + return sqrt( 1. + momentum( 0, ipart ) * momentum( 0, ipart ) + momentum( 1, ipart ) * momentum( 1, ipart ) + momentum( 2, ipart ) * momentum( 2, ipart ) ); } //! Method used to get the inverse Particle Lorentz factor @@ -322,7 +322,7 @@ class Particles //! Method used to get the momentum norm which is also the normalized photon energy inline double momentumNorm( unsigned int ipart ) { - return sqrt( pow( momentum( 0, ipart ), 2 )+pow( momentum( 1, ipart ), 2 )+pow( momentum( 2, ipart ), 2 ) ); + return sqrt( momentum( 0, ipart ) * momentum( 0, ipart ) + momentum( 1, ipart ) * momentum( 1, ipart ) + momentum( 2, ipart ) * momentum( 2, ipart ) ); } void resetIds() diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp index 1a96ab654..b24877bcd 100755 --- a/src/Patch/Patch.cpp +++ b/src/Patch/Patch.cpp @@ -149,10 +149,10 @@ void Patch::initStep3( Params ¶ms, SmileiMPI *smpi, unsigned int n_moved ) min_local_[i] = ( Pcoordinates[i] )*( params.patch_size_[i]*params.cell_length[i] ); max_local_[i] = ( Pcoordinates[i]+1 )*( params.patch_size_[i]*params.cell_length[i] ); cell_starting_global_index[i] += Pcoordinates[i]*params.patch_size_[i]; - cell_starting_global_index_noGC[i] = Pcoordinates[i]*params.patch_size_[i]; + cell_starting_global_index_noGC[i] = Pcoordinates[i]*params.patch_size_[i]; cell_starting_global_index[i] -= params.oversize[i]; center_[i] = ( min_local_[i]+max_local_[i] )*0.5; - radius += pow( max_local_[i] - center_[i] + params.cell_length[i], 2 ); + radius += ( max_local_[i] - center_[i] + params.cell_length[i] ) * ( max_local_[i] - center_[i] + params.cell_length[i] ); } radius = sqrt( radius ); @@ -327,9 +327,9 @@ void Patch::setLocationAndAllocateFields( Params ¶ms, DomainDecomposition *d min_local_[iDim] = params.offset_map[iDim][ijk[iDim]] * params.cell_length[iDim]; max_local_[iDim] = (params.offset_map[iDim][ijk[iDim]]+params.region_size_[iDim]) * params.cell_length[iDim]; center_[iDim] = ( min_local_[iDim]+max_local_[iDim] )*0.5; - radius += pow( max_local_[iDim] - center_[iDim] + params.cell_length[iDim], 2 ); + radius += ( max_local_[iDim] - center_[iDim] + params.cell_length[iDim]) * ( max_local_[iDim] - center_[iDim] + params.cell_length[iDim]); cell_starting_global_index[iDim] = params.offset_map[iDim][ijk[iDim]]; - cell_starting_global_index_noGC[iDim] = params.offset_map[iDim][ijk[iDim]]; + cell_starting_global_index_noGC[iDim] = params.offset_map[iDim][ijk[iDim]]; // Neighbor before if( ijk[iDim] > 0 ) { unsigned int IJK[3] = { ijk[0], ijk[1], ijk[2] }; @@ -396,7 +396,7 @@ void Patch::setLocationAndAllocateFields( Params ¶ms, DomainDecomposition *d max_local_[iDim] = params.global_size_[iDim]*params.cell_length[iDim]; center_[iDim] = ( min_local_[iDim]+max_local_[iDim] )*0.5; - radius += pow( max_local_[iDim] - center_[iDim] + params.cell_length[iDim], 2 ); + radius += ( max_local_[iDim] - center_[iDim] + params.cell_length[iDim] ) * ( max_local_[iDim] - center_[iDim] + params.cell_length[iDim] ); cell_starting_global_index[iDim] = -oversize[iDim]; diff --git a/src/Pusher/PusherHigueraCary.cpp b/src/Pusher/PusherHigueraCary.cpp index c85189fff..456624f40 100755 --- a/src/Pusher/PusherHigueraCary.cpp +++ b/src/Pusher/PusherHigueraCary.cpp @@ -117,10 +117,11 @@ void PusherHigueraCary::operator()( Particles &particles, SmileiMPI *smpi, int i // beta**2 const double beta2 = Tx*Tx + Ty*Ty + Tz*Tz; + const double Tum = Tx*umx + Ty*umy + Tz*umz; // Equivalent of 1/\gamma_{new} in the paper const double local_invgf = 1./std::sqrt( 0.5*( gfm2 - beta2 + - std::sqrt( (gfm2 - beta2)*(gfm2 - beta2) + 4.0*( beta2 + std::pow( Tx*umx + Ty*umy + Tz*umz, 2 ) ) ) ) ); + std::sqrt( (gfm2 - beta2)*(gfm2 - beta2) + 4.0*( beta2 + Tum * Tum ) ) ) ); // Rotation in the magnetic field Tx *= local_invgf; diff --git a/src/Radiation/Radiation.cpp b/src/Radiation/Radiation.cpp index 19ea3f648..e9791dc2a 100755 --- a/src/Radiation/Radiation.cpp +++ b/src/Radiation/Radiation.cpp @@ -83,7 +83,7 @@ void Radiation::computeParticlesChi( Particles &particles, double charge_over_mass2; // 1/mass^2 - double one_over_mass_square = pow( one_over_mass_, 2. ); + double one_over_mass_square = one_over_mass_ * one_over_mass_; // Temporary Lorentz factor double gamma; diff --git a/src/Radiation/Radiation.h b/src/Radiation/Radiation.h index 0755d0f3e..d1b1ee979 100755 --- a/src/Radiation/Radiation.h +++ b/src/Radiation/Radiation.h @@ -76,10 +76,10 @@ class Radiation { return std::fabs( charge_over_mass2 )*inv_norm_E_Schwinger_ - * std::sqrt( std::fabs( std::pow( Ex*px + Ey*py + Ez*pz, 2 ) - - std::pow( gamma*Ex - By*pz + Bz*py, 2 ) - - std::pow( gamma*Ey - Bz*px + Bx*pz, 2 ) - - std::pow( gamma*Ez - Bx*py + By*px, 2 ) ) ); + * std::sqrt( std::fabs( (Ex*px + Ey*py + Ez*pz) * (Ex*px + Ey*py + Ez*pz) + - (gamma*Ex - By*pz + Bz*py) * (gamma*Ex - By*pz + Bz*py) + - (gamma*Ey - Bz*px + Bx*pz) * (gamma*Ey - Bz*px + Bx*pz) + - (gamma*Ez - Bx*py + By*px) * (gamma*Ez - Bx*py + By*px) ) ); }; //! Computation of the quantum parameter for the given diff --git a/src/Radiation/RadiationDiagRadiationSpectrum.cpp b/src/Radiation/RadiationDiagRadiationSpectrum.cpp index 32ab07205..5138dfe06 100644 --- a/src/Radiation/RadiationDiagRadiationSpectrum.cpp +++ b/src/Radiation/RadiationDiagRadiationSpectrum.cpp @@ -72,7 +72,7 @@ void RadiationDiagRadiationSpectrum::operator() ( double charge_over_mass2; // 1/mass^2 - const double one_over_mass_2 = std::pow(one_over_mass_,2.); + const double one_over_mass_2 = one_over_mass_ * one_over_mass_; // Temporary Lorentz factor double gamma; diff --git a/src/Radiation/RadiationTables.h b/src/Radiation/RadiationTables.h index 77bcac8e2..77a47f07f 100755 --- a/src/Radiation/RadiationTables.h +++ b/src/Radiation/RadiationTables.h @@ -131,8 +131,9 @@ class RadiationTables //#pragma omp declare simd static inline double __attribute__((always_inline)) computeRidgersFit( double particle_chi ) { - return std::pow( 1.0 + 4.8*( 1.0+particle_chi )*std::log( 1.0 + 1.7*particle_chi ) - + 2.44*particle_chi*particle_chi, -2.0/3.0 ); + double a = 1.0 + 4.8 * ( 1.0 + particle_chi )*std::log( 1.0 + 1.7 * particle_chi ) + + 2.44 * particle_chi * particle_chi; + return 1.0 / std::cbrt( a * a ); }; //! Get of the classical continuous radiated energy during dt diff --git a/src/Radiation/RadiationTools.h b/src/Radiation/RadiationTools.h index 1746c894e..21966e899 100644 --- a/src/Radiation/RadiationTools.h +++ b/src/Radiation/RadiationTools.h @@ -94,10 +94,7 @@ class RadiationTools { const double chi2 = particle_chi * particle_chi; const double chi3 = chi2 * particle_chi; return chi3*1.9846415503393384 - *std::pow( - 1.0 + (1. + 4.528*particle_chi)*std::log(1.+12.29*particle_chi) + 4.632*chi2 - ,-7./6. - ); + *std::pow(1.0 + (1. + 4.528*particle_chi)*std::log(1.+12.29*particle_chi) + 4.632*chi2,-7./6.); } //! Computation of the function g of Erber using the Ridgers @@ -107,10 +104,11 @@ class RadiationTools { #ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif - static inline double __attribute__((always_inline)) computeGRidgers(double particle_chi) + static inline double __attribute__((always_inline)) computeGRidgers(double particle_chi) // this is a duplicate of computeRidgersFit from radiationTables.h { - return std::pow(1. + 4.8*(1.0+particle_chi)*std::log(1. + 1.7*particle_chi) - + 2.44*particle_chi*particle_chi,-2./3.); + double a = 1.0 + 4.8 * ( 1.0 + particle_chi )*std::log( 1.0 + 1.7 * particle_chi ) + + 2.44 * particle_chi * particle_chi; + return 1.0 / std::cbrt( a * a ); }; // ----------------------------------------------------------------------------- @@ -122,8 +120,8 @@ class RadiationTools { #endif static inline double __attribute__((always_inline)) computeF1Nu(double nu) { - if (nu<0.1) return 2.149528241483088*std::pow(nu,-0.6666666666666667) - 1.813799364234217; - else if (nu>10) return 1.253314137315500*std::pow(nu,-0.5)*exp(-nu); + if (nu<0.1) return 2.149528241483088/std::cbrt(nu*nu) - 1.813799364234217; + else if (nu>10) return 1.253314137315500/std::sqrt(nu)*exp(-nu); else { const double lognu = std::log(nu); double lognu_power_n = lognu; @@ -142,10 +140,10 @@ class RadiationTools { return std::exp(f); - /*return exp(-1.042081355552157e-02 * pow(lognu,5) - -5.349995695960174e-02 * pow(lognu,4) - -1.570476212230771e-01 * pow(lognu,3) - -4.575331390887448e-01 * pow(lognu,2) + /*return exp(-1.042081355552157e-02 * pow (lognu,5) + -5.349995695960174e-02 * pow (lognu,4) + -1.570476212230771e-01 * pow (lognu,3) + -4.575331390887448e-01 * pow (lognu,2) -1.687909081004528e+00 * lognu -4.341018460806052e-01) ;*/ } @@ -160,8 +158,8 @@ class RadiationTools { #endif static inline double __attribute__((always_inline)) computeF2Nu(double nu) { - if (nu<0.05) return 1.074764120720013*std::pow(nu,-0.6666666666666667); - else if (nu>10) return 1.253314137315500*std::pow(nu,-0.5)*exp(-nu); + if (nu<0.05) return 1.074764120720013/std::cbrt(nu*nu) ; + else if (nu>10) return 1.253314137315500/std::sqrt(nu)*exp(-nu); else { const double lognu = std::log(nu); double lognu_power_n = lognu; diff --git a/src/Tools/tabulatedFunctions.cpp b/src/Tools/tabulatedFunctions.cpp index e31506cea..adc7a58ff 100755 --- a/src/Tools/tabulatedFunctions.cpp +++ b/src/Tools/tabulatedFunctions.cpp @@ -67,7 +67,7 @@ double erfinv::call( double x ) double val = 0.0; if( x <= erfinv_x_[0] ) { - val = 0.5*sqrt( pi )*x + pi/24.0 *pow( x, 3 ); + val = 0.5*sqrt( pi )*x + pi/24.0 *x*x*x; } else if( x >= erfinv_x_.back() ) { double eta = -log( sqrt( pi )*( 1.0-x ) ); double log_eta = log( eta ); From fbb3a9065153d1c4d686ddbd1368b809617f3a91 Mon Sep 17 00:00:00 2001 From: cprouveur Date: Fri, 8 Nov 2024 13:19:40 +0100 Subject: [PATCH 4/8] validation BC thermal on adastra, had to comment omp single --- src/ParticleBC/BoundaryConditionType.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp index 3e1fb3333..502055ec9 100755 --- a/src/ParticleBC/BoundaryConditionType.cpp +++ b/src/ParticleBC/BoundaryConditionType.cpp @@ -132,16 +132,17 @@ void reflect_particle_wall( Species *species, int imin, int imax, int direction, energy_change = 0.; // no energy loss during reflection double* position = species->particles->getPtrPosition(direction); double* momentum = species->particles->getPtrMomentum(direction); + double* invgf_p = invgf.data(); #ifdef SMILEI_ACCELERATOR_GPU_OACC - #pragma acc parallel deviceptr(position,momentum, invgf) + #pragma acc parallel deviceptr(position,momentum,invgf_p) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) - #pragma omp target is_device_ptr( position, momentum, invgf ) + #pragma omp target is_device_ptr(position,momentum,invgf_p) #pragma omp teams distribute parallel for #endif for (int ipart=imin ; ipart Date: Fri, 8 Nov 2024 16:20:11 +0100 Subject: [PATCH 5/8] test CI --- src/Smilei.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Smilei.cpp b/src/Smilei.cpp index 1c30dfddc..335c37bbc 100755 --- a/src/Smilei.cpp +++ b/src/Smilei.cpp @@ -799,7 +799,6 @@ int main( int argc, char *argv[] ) // END MAIN CODE // --------------------------------------------------------------------------------------------------------------------- - int executeTestMode( VectorPatch &vecPatches, Region ®ion, SmileiMPI *smpi, From 3a0928c60dc49bb9ce3337b83069bc0ac3bb8a2b Mon Sep 17 00:00:00 2001 From: Francesco Massimo Date: Sun, 10 Nov 2024 09:48:11 +0100 Subject: [PATCH 6/8] comment message on OpenMP task --- src/Params/Params.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Params/Params.cpp b/src/Params/Params.cpp index d19af4e81..c3424f269 100755 --- a/src/Params/Params.cpp +++ b/src/Params/Params.cpp @@ -1537,12 +1537,12 @@ void Params::print_parallelism_params( SmileiMPI *smpi ) #else MESSAGE( 1, "OpenMP disabled" ); #endif -#ifdef _OMPTASKS - MESSAGE( 1, "OpenMP task parallelization activated"); -#else - MESSAGE( 1, "OpenMP task parallelization not activated"); -#endif - MESSAGE( "" ); +// #ifdef _OMPTASKS +// MESSAGE( 1, "OpenMP task parallelization activated"); +// #else +// MESSAGE( 1, "OpenMP task parallelization not activated"); +// #endif +// MESSAGE( "" ); ostringstream np; np << "Number of patches: " << number_of_patches[0]; From 98dccbd795248f52d0a113da412e9fbb944ac2cd Mon Sep 17 00:00:00 2001 From: "charles.prouveur" Date: Tue, 12 Nov 2024 15:31:19 +0100 Subject: [PATCH 7/8] removed Tools/tabulatedFunctions.* which are not used, userFunction.* is the one used ; updated the doc for GPU install with openmpi and the BC supported --- doc/Sphinx/Understand/GPU_offloading.rst | 2 + doc/Sphinx/Use/GPU_version.rst | 6 +- doc/Sphinx/Use/install_linux_GPU.rst | 20 ++++++ src/ParticleBC/BoundaryConditionType.cpp | 1 - src/ParticleBC/BoundaryConditionType.h | 1 - src/ParticleBC/PartBoundCond.h | 1 - src/ParticleBC/PartWall.h | 1 - src/Tools/tabulatedFunctions.cpp | 89 ------------------------ src/Tools/tabulatedFunctions.h | 64 ----------------- 9 files changed, 24 insertions(+), 161 deletions(-) delete mode 100755 src/Tools/tabulatedFunctions.cpp delete mode 100755 src/Tools/tabulatedFunctions.h diff --git a/doc/Sphinx/Understand/GPU_offloading.rst b/doc/Sphinx/Understand/GPU_offloading.rst index 6b2ea599a..d3a82c37c 100644 --- a/doc/Sphinx/Understand/GPU_offloading.rst +++ b/doc/Sphinx/Understand/GPU_offloading.rst @@ -20,6 +20,8 @@ the announced exaflopic supercomputers will include GPUs. * Cartesian geometry in 1D, 2D and in 3D , for order 2 * Diagnostics: Field, Probes, Scalar, ParticleBinning, TrackParticles * Moving Window + * Boundary conditions for Fields: Periodic, reflective and silver-muller are supported (no PML or BM) + * Boundary conditions for Particles: Periodic, Reflective, thermal, remove and stop are supported * A few key features remain to be implemented (AM geometry, ionization, PML, envelope, additional physics), but the fundamentals of the code are ported. diff --git a/doc/Sphinx/Use/GPU_version.rst b/doc/Sphinx/Use/GPU_version.rst index 2228c0ea1..c2633b1a7 100755 --- a/doc/Sphinx/Use/GPU_version.rst +++ b/doc/Sphinx/Use/GPU_version.rst @@ -16,8 +16,6 @@ This page contains the links of this documentation to compile and run SMILEI on ---- -Known issues -^^^^^^^^^^^^ +Important note: -2D and 3D runs may crash with A2000 & A6000 GPUs (used in laptops and worstations respectively, -they are not 'production GPUs' which are designed for 64 bits floating point operations ) +The biggest challenge to execute SMILEI on an accelerator is the correct installation of the openmpi library. It needs to be compiled with nvc++ after configuring (ie. ./configure --options) with the appropriate options specific to your system diff --git a/doc/Sphinx/Use/install_linux_GPU.rst b/doc/Sphinx/Use/install_linux_GPU.rst index 197cd499b..fdd97258b 100644 --- a/doc/Sphinx/Use/install_linux_GPU.rst +++ b/doc/Sphinx/Use/install_linux_GPU.rst @@ -6,6 +6,9 @@ First, make sure you have a recent version of CMAKE, and the other libraries to compile Smilei on CPU as usual. In particular, for this example, you need GCC <= 12. +The installation protocol showed below uses the openmpi included in nvhpc. This approach often results in segfault at runtime (note that nvidia will remove openmpi from nvhpc in the future). +The "proper" way, which is much harder, consists in installing openmpi compiled with nvhpc ( + Make a directory to store all the nvidia tools. We call it $NVDIR: .. code:: bash @@ -72,3 +75,20 @@ To run: source nvidia_env.sh smilei namelist.py + + +As an example of a "simple" openmpi installation +Openmpi dependencies such as zlib, hwloc and libevent should first be compiled with nvc++ + +.. code:: bash + export cuda=PATH_TO_YOUR_NVHPC_FOLDER/Linux_x86_64/24.5/cuda + wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz + tar -xzf openmpi-4.1.5.tar.gz + cd openmpi-4.1.5 + mkdir build + cd build + CC=nvc++ CXX=nvc++ CFLAGS=-fPIC CXXFLAGS=-fPIC ../configure --with-hwloc --enable-mpirun-prefix-by-default --prefix=PATH_TO_openmpi/openmpi-4.1.6/build --enable-mpi-cxx --without-verb --with-cuda=$cuda --disable-mpi-fortran -with-libevent=PATH_TO_libevent/libevent-2.1.12-stable/build + make -j 4 all + make install + +Because of the complexity of the configure for openmpi, we recommend using your supercomputer support to use smilei on GPUs. diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp index 502055ec9..fb774d4db 100755 --- a/src/ParticleBC/BoundaryConditionType.cpp +++ b/src/ParticleBC/BoundaryConditionType.cpp @@ -9,7 +9,6 @@ #include "BoundaryConditionType.h" #include "Params.h" -#include "tabulatedFunctions.h" #include "userFunctions.h" diff --git a/src/ParticleBC/BoundaryConditionType.h b/src/ParticleBC/BoundaryConditionType.h index 25038e8f1..9795fdfe5 100755 --- a/src/ParticleBC/BoundaryConditionType.h +++ b/src/ParticleBC/BoundaryConditionType.h @@ -13,7 +13,6 @@ #include "Particles.h" #include "Species.h" #include "Params.h" -#include "tabulatedFunctions.h" #include "userFunctions.h" #include "Random.h" diff --git a/src/ParticleBC/PartBoundCond.h b/src/ParticleBC/PartBoundCond.h index 7afd6ca9c..e03aaaf79 100755 --- a/src/ParticleBC/PartBoundCond.h +++ b/src/ParticleBC/PartBoundCond.h @@ -4,7 +4,6 @@ #include "Params.h" #include "Species.h" #include "Particles.h" -#include "tabulatedFunctions.h" class Patch; diff --git a/src/ParticleBC/PartWall.h b/src/ParticleBC/PartWall.h index 45aeb6fde..1f118d828 100755 --- a/src/ParticleBC/PartWall.h +++ b/src/ParticleBC/PartWall.h @@ -3,7 +3,6 @@ #include "Params.h" #include "Random.h" -#include "tabulatedFunctions.h" class Patch; class Species; diff --git a/src/Tools/tabulatedFunctions.cpp b/src/Tools/tabulatedFunctions.cpp deleted file mode 100755 index adc7a58ff..000000000 --- a/src/Tools/tabulatedFunctions.cpp +++ /dev/null @@ -1,89 +0,0 @@ -#include "tabulatedFunctions.h" -#include -#include -#include -#include -#include - - -using namespace std; - -// --------------------------------------------------------------------------------------------------------------------- -// Inverse error function -// --------------------------------------------------------------------------------------------------------------------- - -// method used to load the tabulated function -// ------------------------------------------ -void erfinv::prepare() -{ - if( erfinv_tab_.size()==0 ) { - - erfinv_tabSize_ = 1000; - erfinv_xmin_ = 0.0001; - erfinv_xmax_ = 0.9999; - erfinv_alpha_ = log( erfinv_xmax_/erfinv_xmin_ )/( double )( erfinv_tabSize_-1 ); - - erfinv_tab_.resize( erfinv_tabSize_ ); - erfinv_x_.resize( erfinv_tabSize_ ); - - // ----------------------------------- - // TABULATE THE INVERSE ERROR FUNCTION - // (using a logarithmic scale) - // ----------------------------------- - double tiny=1.e-10; // numerical parameter (~precision) - - for( unsigned int n=0; ntiny ) { - vm=0.5*( vl+vr ); - if( erfinv_x_[n]>erf( vm ) ) { - vl=vm; - } else { - vr=vm; - } - } - erfinv_tab_[n] = 0.5*( vl+vr ); - - }//n - - } else { - DEBUG( "trying to call this again!" ); - }//needLoad - -} - -// method used to compute the value for a given x (use linear interpolation on log. scale axis) -// -------------------------------------------------------------------------------------------- -double erfinv::call( double x ) -{ - - double pi = M_PI; - double val = 0.0; - - if( x <= erfinv_x_[0] ) { - val = 0.5*sqrt( pi )*x + pi/24.0 *x*x*x; - } else if( x >= erfinv_x_.back() ) { - double eta = -log( sqrt( pi )*( 1.0-x ) ); - double log_eta = log( eta ); - val = sqrt( eta - 0.5*log_eta + ( 0.25*log_eta-0.5 )/eta ); - } else { - unsigned int n = floor( log( erfinv_xmax_/( 1.0-x ) )/erfinv_alpha_ ); - double wl = ( erfinv_x_[n+1]-x )/( erfinv_x_[n+1]-erfinv_x_[n] ); - double wr = ( x-erfinv_x_[n] ) /( erfinv_x_[n+1]-erfinv_x_[n] ); - val = wl*erfinv_tab_[n] + wr*erfinv_tab_[n+1]; - } - - return val; -} - - - - - - diff --git a/src/Tools/tabulatedFunctions.h b/src/Tools/tabulatedFunctions.h deleted file mode 100755 index 161eea70d..000000000 --- a/src/Tools/tabulatedFunctions.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef TABULATEDFUNCTIONS_H -#define TABULATEDFUNCTIONS_H - -#include -#include -#include -#include -#include -#include "Tools.h" - - - -//! singleton class of tabulated functions - -class erfinv -{ -public: - static erfinv &instance() - { - static erfinv one_and_only_instance; // Guaranteed to be destroyed. - // Instantiated on first use. - return one_and_only_instance; - } - - //! returns inverse error function of a double x - double call( double x ); - - //! needs to be called one time before using erfinv - void prepare(); - -protected: - // creator is private for singletons - erfinv() {}; - erfinv( erfinv const & ); // avoid implementation of this - void operator=( erfinv const & ); // avoid implementation of this - -private: - - //! number of points used to sample the fct - unsigned int erfinv_tabSize_; - - //! mininum value for x - double erfinv_xmin_; - - //! maximum value for x - double erfinv_xmax_; - - //! constant used to compute the different values of x used during the sampling - double erfinv_alpha_; - - //! vector storing the values of x used to sample erfinv - std::vector erfinv_x_; - - //! vector storing the sampled values of erfinv - std::vector erfinv_tab_; - -}; - - -#endif - - - - From 8581e3a6822ba16bbdf39819176ef269972c354d Mon Sep 17 00:00:00 2001 From: "charles.prouveur" Date: Wed, 13 Nov 2024 15:19:42 +0100 Subject: [PATCH 8/8] Created namespace userFunctions and removed src/Tools/userFunctions.cpp ; created namespace random_namespace and cleaned src/ParticleBC/BoundaryConditionType.h ; the objective here is to make available functions without having to access a class object --- src/ParticleBC/BoundaryConditionType.cpp | 16 +- src/ParticleBC/BoundaryConditionType.h | 119 +-------- src/Tools/Random.h | 29 +++ src/Tools/userFunctions.cpp | 300 ----------------------- src/Tools/userFunctions.h | 276 ++++++++++++++++++--- 5 files changed, 276 insertions(+), 464 deletions(-) delete mode 100755 src/Tools/userFunctions.cpp diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp index fb774d4db..aa86586cf 100755 --- a/src/ParticleBC/BoundaryConditionType.cpp +++ b/src/ParticleBC/BoundaryConditionType.cpp @@ -560,7 +560,7 @@ void thermalize_particle_inf( Species *species, int imin, int imax, int directio #endif // boucle sur les particules de ce chunk pour remplir xorshift32_state_array[...] avec le state local for( int i = 0; i < 32; ++i ){ - xorshift32_state_array[i] = xorshift32(xorshift32_state_local); + xorshift32_state_array[i] = Random_namespace::xorshift32(xorshift32_state_local); } #endif // boucle sur les particules de ce chunk qui utilise xorshift32_state_array[i] @@ -591,7 +591,7 @@ void thermalize_particle_inf( Species *species, int imin, int imax, int directio // change of velocity in the direction normal to the reflection plane double sign_vel = -momentum[ ipart ]/std::abs( momentum[ ipart ] ); #if defined( SMILEI_ACCELERATOR_GPU ) - momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - uniform1(xorshift32_state_array[i]) ) ); + momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - Random_namespace::uniform1(xorshift32_state_array[i]) ) ); #else momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - rand->uniform1() ) ); #endif @@ -599,13 +599,13 @@ void thermalize_particle_inf( Species *species, int imin, int imax, int directio // change of momentum in the direction(s) along the reflection plane if (nDim>1) { #if defined( SMILEI_ACCELERATOR_GPU ) - momentumRefl_2D[ ipart ] = thermal_momentum1 * perp_rand_gpu_v3(xorshift32_state_array[i]); + momentumRefl_2D[ ipart ] = thermal_momentum1 * Random_namespace::perp_rand_dp(xorshift32_state_array[i]); #else momentumRefl_2D[ ipart ] = thermal_momentum1 * perp_rand( rand ); #endif if (nDim>2) { #if defined( SMILEI_ACCELERATOR_GPU ) - momentumRefl_3D[ ipart ] = thermal_momentum2 * perp_rand_gpu_v3(xorshift32_state_array[i]); + momentumRefl_3D[ ipart ] = thermal_momentum2 * Random_namespace::perp_rand_dp(xorshift32_state_array[i]); #else momentumRefl_3D[ ipart ] = thermal_momentum2 * perp_rand( rand ); #endif @@ -724,7 +724,7 @@ void thermalize_particle_sup( Species *species, int imin, int imax, int directio #endif // boucle sur les particules de ce chunk pour remplir xorshift32_state_array[...] avec le state local for( int i = 0; i < 32; ++i ){ - xorshift32_state_array[i] = xorshift32(xorshift32_state_local); + xorshift32_state_array[i] = Random_namespace::xorshift32(xorshift32_state_local); } #endif int istart = ichunk==(imin/32) ? imin%32 : 0; @@ -756,7 +756,7 @@ void thermalize_particle_sup( Species *species, int imin, int imax, int directio // change of velocity in the direction normal to the reflection plane double sign_vel = -momentum[ ipart ]/std::abs( momentum[ ipart ] ); #if defined( SMILEI_ACCELERATOR_GPU ) - momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - uniform1(xorshift32_state_array[i]) ) ); + momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - Random_namespace::uniform1(xorshift32_state_array[i]) ) ); #else momentum[ ipart ] = sign_vel * thermal_momentum * std::sqrt( -std::log( 1.0 - rand->uniform1() ) ); #endif @@ -764,13 +764,13 @@ void thermalize_particle_sup( Species *species, int imin, int imax, int directio // change of momentum in the direction(s) along the reflection plane if (nDim>1) { #if defined( SMILEI_ACCELERATOR_GPU ) - momentumRefl_2D[ ipart ] = thermal_momentum1 * perp_rand_gpu_v3(xorshift32_state_array[i]); + momentumRefl_2D[ ipart ] = thermal_momentum1 * Random_namespace::perp_rand_dp(xorshift32_state_array[i]); #else momentumRefl_2D[ ipart ] = thermal_momentum1 * perp_rand( rand ); #endif if (nDim>2) { #if defined( SMILEI_ACCELERATOR_GPU ) - momentumRefl_3D[ ipart ] = thermal_momentum2 * perp_rand_gpu_v3(xorshift32_state_array[i]); + momentumRefl_3D[ ipart ] = thermal_momentum2 * Random_namespace::perp_rand_dp(xorshift32_state_array[i]); #else momentumRefl_3D[ ipart ] = thermal_momentum2 * perp_rand( rand ); #endif diff --git a/src/ParticleBC/BoundaryConditionType.h b/src/ParticleBC/BoundaryConditionType.h index 9795fdfe5..c4912ed12 100755 --- a/src/ParticleBC/BoundaryConditionType.h +++ b/src/ParticleBC/BoundaryConditionType.h @@ -17,130 +17,13 @@ #include "Random.h" inline double perp_rand( Random * rand ) { - double a = userFunctions::erfinv( rand->uniform1() ); // to be switched to erfinv 3 + double a = userFunctions::erfinv_dp( rand->uniform1() ); if( rand->cointoss() ) { a *= -1.; } return a; } -/** - * copied from erfinv_DP_1.cu by Prof. Mike Giles. - * https://people.maths.ox.ac.uk/gilesm/ - * https://people.maths.ox.ac.uk/gilesm/codes/erfinv/ - * - * Original code is written for CUDA. - * Mutsuo Saito modified original code for C++. - */ -inline double erfinv_v3(double x) -{ - double w, p; - double sign; - if (x > 0) { - sign = 1.0; - } else { - sign = -1.0; - x = abs(x); - } - w = - log((1.0-x)*(1.0+x)); - - if ( w < 6.250000 ) { - w = w - 3.125000; - p = -3.6444120640178196996e-21; - p = -1.685059138182016589e-19 + p*w; - p = 1.2858480715256400167e-18 + p*w; - p = 1.115787767802518096e-17 + p*w; - p = -1.333171662854620906e-16 + p*w; - p = 2.0972767875968561637e-17 + p*w; - p = 6.6376381343583238325e-15 + p*w; - p = -4.0545662729752068639e-14 + p*w; - p = -8.1519341976054721522e-14 + p*w; - p = 2.6335093153082322977e-12 + p*w; - p = -1.2975133253453532498e-11 + p*w; - p = -5.4154120542946279317e-11 + p*w; - p = 1.051212273321532285e-09 + p*w; - p = -4.1126339803469836976e-09 + p*w; - p = -2.9070369957882005086e-08 + p*w; - p = 4.2347877827932403518e-07 + p*w; - p = -1.3654692000834678645e-06 + p*w; - p = -1.3882523362786468719e-05 + p*w; - p = 0.0001867342080340571352 + p*w; - p = -0.00074070253416626697512 + p*w; - p = -0.0060336708714301490533 + p*w; - p = 0.24015818242558961693 + p*w; - p = 1.6536545626831027356 + p*w; - } - else if ( w < 16.000000 ) { - w = sqrt(w) - 3.250000; - p = 2.2137376921775787049e-09; - p = 9.0756561938885390979e-08 + p*w; - p = -2.7517406297064545428e-07 + p*w; - p = 1.8239629214389227755e-08 + p*w; - p = 1.5027403968909827627e-06 + p*w; - p = -4.013867526981545969e-06 + p*w; - p = 2.9234449089955446044e-06 + p*w; - p = 1.2475304481671778723e-05 + p*w; - p = -4.7318229009055733981e-05 + p*w; - p = 6.8284851459573175448e-05 + p*w; - p = 2.4031110387097893999e-05 + p*w; - p = -0.0003550375203628474796 + p*w; - p = 0.00095328937973738049703 + p*w; - p = -0.0016882755560235047313 + p*w; - p = 0.0024914420961078508066 + p*w; - p = -0.0037512085075692412107 + p*w; - p = 0.005370914553590063617 + p*w; - p = 1.0052589676941592334 + p*w; - p = 3.0838856104922207635 + p*w; - } - else { - w = sqrt(w) - 5.000000; - p = -2.7109920616438573243e-11; - p = -2.5556418169965252055e-10 + p*w; - p = 1.5076572693500548083e-09 + p*w; - p = -3.7894654401267369937e-09 + p*w; - p = 7.6157012080783393804e-09 + p*w; - p = -1.4960026627149240478e-08 + p*w; - p = 2.9147953450901080826e-08 + p*w; - p = -6.7711997758452339498e-08 + p*w; - p = 2.2900482228026654717e-07 + p*w; - p = -9.9298272942317002539e-07 + p*w; - p = 4.5260625972231537039e-06 + p*w; - p = -1.9681778105531670567e-05 + p*w; - p = 7.5995277030017761139e-05 + p*w; - p = -0.00021503011930044477347 + p*w; - p = -0.00013871931833623122026 + p*w; - p = 1.0103004648645343977 + p*w; - p = 4.8499064014085844221 + p*w; - } - return sign * p * x; -} - -inline uint32_t xorshift32(uint32_t xorshift32_state) -{ - // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" - xorshift32_state ^= xorshift32_state << 13; - xorshift32_state ^= xorshift32_state >> 17; - xorshift32_state ^= xorshift32_state << 5; - return xorshift32_state; -} - -static constexpr double xorshift32_invmax1 = (1.-1e-11)/4294967296.; - -inline double uniform1(uint32_t xorshift32_state) { - return xorshift32(xorshift32_state) * xorshift32_invmax1; -} - -inline double perp_rand_gpu_v3(uint32_t xorshift32_state) { - double a = erfinv_v3( uniform1(xorshift32_state) ); //userFunctions:: - // technically we could also use the erfinv() function fron cuda, it would require compiling with -cuda though ... - // the study showed the gap in perf for BC thermal was not worth the added depend - if( xorshift32(xorshift32_state) & 1 ) { // rand->cointoss() - a *= -1.; - } - return a; -} - - void internal_inf( Species *species, int imin, int imax, int direction, double limit_inf, double dt, std::vector &invgf, Random * rand, double &energy_change ); void internal_sup( Species *species, int imin, int imax, int direction, double limit_sup, double dt, std::vector &invgf, Random * rand, double &energy_change ); diff --git a/src/Tools/Random.h b/src/Tools/Random.h index 1355de28e..c57ac2550 100755 --- a/src/Tools/Random.h +++ b/src/Tools/Random.h @@ -4,6 +4,35 @@ #include #include #include +#include "userFunctions.h" + +namespace Random_namespace // in order to use the random functions without having access to the class random +{ + + inline uint32_t xorshift32(uint32_t xorshift32_state) + { + // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" + xorshift32_state ^= xorshift32_state << 13; + xorshift32_state ^= xorshift32_state >> 17; + xorshift32_state ^= xorshift32_state << 5; + return xorshift32_state; + } + + inline double uniform1(uint32_t xorshift32_state) { + constexpr double xorshift32_invmax1 = (1.-1e-11)/4294967296.; + return xorshift32(xorshift32_state) * xorshift32_invmax1; + } + + inline double perp_rand_dp(uint32_t xorshift32_state) { + double a = userFunctions::erfinv_dp( uniform1(xorshift32_state) ); + // technically we could also use the erfinv() function fron cuda, it would require compiling with -cuda though ... + // the study showed the gap in perf for BC thermal was not worth the added depend + if( xorshift32(xorshift32_state) & 1 ) { // rand->cointoss() + a *= -1.; + } + return a; + } +} class Random { diff --git a/src/Tools/userFunctions.cpp b/src/Tools/userFunctions.cpp deleted file mode 100755 index 9cea0703e..000000000 --- a/src/Tools/userFunctions.cpp +++ /dev/null @@ -1,300 +0,0 @@ -#include -#include -#include "userFunctions.h" - -#include "Params.h" - - -// This function is to be deleted after complete validation of the transition on erfinv3 -//! inverse error function is taken from NIST -double userFunctions::erfinv( double x ) -{ - if( x < -1. || x > 1. ) { - return std::numeric_limits::quiet_NaN(); - } - - if( x == 0 ) { - return 0; - } - - int sign_x=( x > 0? 1 : -1 ); - - double r; - if( x <= 0.686 ) { - double x2 = x * x; - r = x * ( ( ( -0.140543331 * x2 + 0.914624893 ) * x2 + -1.645349621 ) * x2 + 0.886226899 ); - r /= ( ( ( 0.012229801 * x2 + -0.329097515 ) * x2 + 1.442710462 ) * x2 + -2.118377725 ) * x2 + 1.; - } else { - double y = sqrt( -log( ( 1. - x ) / 2. ) ); - r = ( ( ( 1.641345311 * y + 3.429567803 ) * y + -1.62490649 ) * y + -1.970840454 ); - r /= ( 1.637067800 * y + 3.543889200 ) * y + 1.; - } - - r *= ( double )sign_x; - x *= ( double )sign_x; - - r -= ( erf( r ) - x ) / ( 2. / sqrt( M_PI ) * exp( -r*r ) ); - - return r; -} - -//! inverse error function is taken from M.B. Giles. 'Approximating the erfinv function'. In GPU Computing Gems, volume 2, Morgan Kaufmann, 2011. -double userFunctions::erfinv2( double x ) -{ - double w, p; - w = -log( ( 1.0-x )*( 1.0+x ) ); - - if( w < 5.000000 ) { - w = w - 2.500000; - p = +2.81022636000e-08 ; - p = +3.43273939000e-07 + p*w; - p = -3.52338770000e-06 + p*w; - p = -4.39150654000e-06 + p*w; - p = +0.00021858087e+00 + p*w; - p = -0.00125372503e+00 + p*w; - p = -0.00417768164e+00 + p*w; - p = +0.24664072700e+00 + p*w; - p = +1.50140941000e+00 + p*w; - } else { - w = sqrt( w ) - 3.000000; - p = -0.000200214257 ; - p = +0.000100950558 + p*w; - p = +0.001349343220 + p*w; - p = -0.003673428440 + p*w; - p = +0.005739507730 + p*w; - p = -0.007622461300 + p*w; - p = +0.009438870470 + p*w; - p = +1.001674060000 + p*w; - p = +2.832976820000 + p*w; - } - return p*x; -} - -/** - * copied from erfinv_DP_1.cu by Prof. Mike Giles. - * https://people.maths.ox.ac.uk/gilesm/ - * https://people.maths.ox.ac.uk/gilesm/codes/erfinv/ - * - * Original code is written for CUDA. - * Mutsuo Saito modified original code for C++. - */ -double userFunctions::erfinv_v3(double x) -{ - double w, p; - double sign; - if (x > 0) { - sign = 1.0; - } else { - sign = -1.0; - x = abs(x); - } - w = - log((1.0-x)*(1.0+x)); - - if ( w < 6.250000 ) { - w = w - 3.125000; - p = -3.6444120640178196996e-21; - p = -1.685059138182016589e-19 + p*w; - p = 1.2858480715256400167e-18 + p*w; - p = 1.115787767802518096e-17 + p*w; - p = -1.333171662854620906e-16 + p*w; - p = 2.0972767875968561637e-17 + p*w; - p = 6.6376381343583238325e-15 + p*w; - p = -4.0545662729752068639e-14 + p*w; - p = -8.1519341976054721522e-14 + p*w; - p = 2.6335093153082322977e-12 + p*w; - p = -1.2975133253453532498e-11 + p*w; - p = -5.4154120542946279317e-11 + p*w; - p = 1.051212273321532285e-09 + p*w; - p = -4.1126339803469836976e-09 + p*w; - p = -2.9070369957882005086e-08 + p*w; - p = 4.2347877827932403518e-07 + p*w; - p = -1.3654692000834678645e-06 + p*w; - p = -1.3882523362786468719e-05 + p*w; - p = 0.0001867342080340571352 + p*w; - p = -0.00074070253416626697512 + p*w; - p = -0.0060336708714301490533 + p*w; - p = 0.24015818242558961693 + p*w; - p = 1.6536545626831027356 + p*w; - } - else if ( w < 16.000000 ) { - w = sqrt(w) - 3.250000; - p = 2.2137376921775787049e-09; - p = 9.0756561938885390979e-08 + p*w; - p = -2.7517406297064545428e-07 + p*w; - p = 1.8239629214389227755e-08 + p*w; - p = 1.5027403968909827627e-06 + p*w; - p = -4.013867526981545969e-06 + p*w; - p = 2.9234449089955446044e-06 + p*w; - p = 1.2475304481671778723e-05 + p*w; - p = -4.7318229009055733981e-05 + p*w; - p = 6.8284851459573175448e-05 + p*w; - p = 2.4031110387097893999e-05 + p*w; - p = -0.0003550375203628474796 + p*w; - p = 0.00095328937973738049703 + p*w; - p = -0.0016882755560235047313 + p*w; - p = 0.0024914420961078508066 + p*w; - p = -0.0037512085075692412107 + p*w; - p = 0.005370914553590063617 + p*w; - p = 1.0052589676941592334 + p*w; - p = 3.0838856104922207635 + p*w; - } - else { - w = sqrt(w) - 5.000000; - p = -2.7109920616438573243e-11; - p = -2.5556418169965252055e-10 + p*w; - p = 1.5076572693500548083e-09 + p*w; - p = -3.7894654401267369937e-09 + p*w; - p = 7.6157012080783393804e-09 + p*w; - p = -1.4960026627149240478e-08 + p*w; - p = 2.9147953450901080826e-08 + p*w; - p = -6.7711997758452339498e-08 + p*w; - p = 2.2900482228026654717e-07 + p*w; - p = -9.9298272942317002539e-07 + p*w; - p = 4.5260625972231537039e-06 + p*w; - p = -1.9681778105531670567e-05 + p*w; - p = 7.5995277030017761139e-05 + p*w; - p = -0.00021503011930044477347 + p*w; - p = -0.00013871931833623122026 + p*w; - p = 1.0103004648645343977 + p*w; - p = 4.8499064014085844221 + p*w; - } - return sign * p * x; -} - -// ---------------------------------------------------------------------------- -//! \brief Distribute equally the load into chunk of an array -//! and return the number of elements for the specified chunk number. -// -//! \param chunk number -//! \param nb_chunks Total number of MPI tasks -//! \param nb_elems Total number of element to be distributed -//! \param imin Index of the first element for chunk -//! \param nb_loc_elems Number of element for chunk -// ---------------------------------------------------------------------------- -void userFunctions::distributeArray( int chunk, - int nb_chunks, - int nb_elems, - int &imin, - int &nb_loc_elems ) -{ - // If more ranks than elements, - // only a part of the processes will work - if( nb_chunks >= nb_elems ) { - if( chunk < nb_elems ) { - imin = chunk; - nb_loc_elems = 1; - } else { - imin = nb_elems; - nb_loc_elems = 0; - } - } else { - - int quotient; - int remainder; - - // Part of the load equally distributed - quotient = nb_elems/nb_chunks; - - // Remaining load to be distributed after balanced repartition - remainder = nb_elems%nb_chunks; - - if( chunk < remainder ) { - imin = chunk*quotient+chunk; - nb_loc_elems = quotient + 1; - } else { - imin = remainder + chunk*quotient; - nb_loc_elems = quotient; - } - } -} - -// ---------------------------------------------------------------------------- -//! \brief Distribute equally 1D array into chunks -//! This function returns tables of indexes and length for each chunk -// -//! \param nb_chunks Total number of chunks -//! \param nb_elems Total number of element to be distributed -//! \param imin_table Index of the first element for each chunk -//! \param length_table Number of element for each chunk -// ---------------------------------------------------------------------------- -void userFunctions::distributeArray( - int nb_chunks, - int nb_elems, - int *imin_table, - int *length_table ) -{ - - // If more chunks than elements, - // only a part of the processes will work - if( nb_chunks >= nb_elems ) { - #pragma omp simd - for( int chunk = 0 ; chunk < nb_elems ; chunk ++ ) { - imin_table[chunk] = chunk; - length_table[chunk] = 1; - } - #pragma omp simd - for( int chunk = nb_elems ; chunk < nb_chunks ; chunk ++ ) { - imin_table[chunk] = nb_elems; - length_table[chunk] = 0; - } - } else { - - int quotient; - int remainder; - - // Part of the load equally distributed - quotient = nb_elems/nb_chunks; - - // Remaining load to be distributed after balanced repartition - remainder = nb_elems%nb_chunks; - - #pragma omp simd - for( int chunk = 0 ; chunk < remainder ; chunk ++ ) { - imin_table[chunk] = chunk*quotient+chunk; - length_table[chunk] = quotient + 1; - } - #pragma omp simd - for( int chunk = remainder ; chunk < nb_chunks ; chunk ++ ) { - imin_table[chunk] = remainder + chunk*quotient; - length_table[chunk] = quotient; - } - } -} - - -// ---------------------------------------------------------------------------- -//! \brief This function uses a bijection algorithm in a monotonic double array -//! to find the corresponding index i so that elem is between array[i] -//! and array[i+1]. -// -//! \param array array in which to find the value -//! \param elem element to be found -//! \param nb_elem number of elements -// ---------------------------------------------------------------------------- -// template -// int userFunctions::searchValuesInMonotonicArray( T * array, -// T elem, -// int nb_elems ) -// { -// int imin = 0; // lower bound -// int imax = nb_elems-1; // upper bound -// int imid = 0; -// -// if( elem == array[0] ) { -// return 0; -// } else if( elem == array[nb_elems-1] ) { -// return nb_elems-2; -// } else { -// while( imax - imin > 1 ) { -// imid= ( imin + imax )/2; -// //imid= (imin + imax)>>1; -// if( elem >= array[imid] ) { -// imin = imid; -// } else { -// imax = imid; -// } -// } -// return imin; -// } -// } - diff --git a/src/Tools/userFunctions.h b/src/Tools/userFunctions.h index fd83f89d8..c3e39b26d 100755 --- a/src/Tools/userFunctions.h +++ b/src/Tools/userFunctions.h @@ -6,29 +6,233 @@ #ifndef USERFUNCTIONS_H #define USERFUNCTIONS_H -class userFunctions +namespace userFunctions { +// removed all static properties since we are using a namespace -public: + //! inverse error function is taken from M.B. Giles. 'Approximating the erfinv function'. In GPU Computing Gems, volume 2, Morgan Kaufmann, 2011. + inline double erfinv_sp( double x ) + { + double w, p; + w = -log( ( 1.0 - x ) * ( 1.0 + x ) ); + + if( w < 5.000000 ) { + w = w - 2.500000; + p = +2.81022636000e-08 ; + p = +3.43273939000e-07 + p*w; + p = -3.52338770000e-06 + p*w; + p = -4.39150654000e-06 + p*w; + p = +0.00021858087e+00 + p*w; + p = -0.00125372503e+00 + p*w; + p = -0.00417768164e+00 + p*w; + p = +0.24664072700e+00 + p*w; + p = +1.50140941000e+00 + p*w; + } else { + w = sqrt( w ) - 3.000000; + p = -0.000200214257 ; + p = +0.000100950558 + p*w; + p = +0.001349343220 + p*w; + p = -0.003673428440 + p*w; + p = +0.005739507730 + p*w; + p = -0.007622461300 + p*w; + p = +0.009438870470 + p*w; + p = +1.001674060000 + p*w; + p = +2.832976820000 + p*w; + } + return p*x; + } + /** + * copied from erfinv_DP_1.cu by Prof. Mike Giles. + * https://people.maths.ox.ac.uk/gilesm/ + * https://people.maths.ox.ac.uk/gilesm/codes/erfinv/ + * + * Original code is written for CUDA. + * Mutsuo Saito modified original code for C++. + */ + inline double erfinv_dp(double x) + { + double w, p; + double sign; + if (x > 0) { + sign = 1.0; + } else { + sign = -1.0; + x = abs(x); + } + w = - log( (1.0 - x) * (1.0 + x) ); - static double erfinv( double x ); - static double erfinv2( double x ); - static inline double erfinv_v3( double x ); + if ( w < 6.250000 ) { + w = w - 3.125000; + p = -3.6444120640178196996e-21; + p = -1.685059138182016589e-19 + p*w; + p = 1.2858480715256400167e-18 + p*w; + p = 1.115787767802518096e-17 + p*w; + p = -1.333171662854620906e-16 + p*w; + p = 2.0972767875968561637e-17 + p*w; + p = 6.6376381343583238325e-15 + p*w; + p = -4.0545662729752068639e-14 + p*w; + p = -8.1519341976054721522e-14 + p*w; + p = 2.6335093153082322977e-12 + p*w; + p = -1.2975133253453532498e-11 + p*w; + p = -5.4154120542946279317e-11 + p*w; + p = 1.051212273321532285e-09 + p*w; + p = -4.1126339803469836976e-09 + p*w; + p = -2.9070369957882005086e-08 + p*w; + p = 4.2347877827932403518e-07 + p*w; + p = -1.3654692000834678645e-06 + p*w; + p = -1.3882523362786468719e-05 + p*w; + p = 0.0001867342080340571352 + p*w; + p = -0.00074070253416626697512 + p*w; + p = -0.0060336708714301490533 + p*w; + p = 0.24015818242558961693 + p*w; + p = 1.6536545626831027356 + p*w; + } + else if ( w < 16.000000 ) { + w = sqrt(w) - 3.250000; + p = 2.2137376921775787049e-09; + p = 9.0756561938885390979e-08 + p*w; + p = -2.7517406297064545428e-07 + p*w; + p = 1.8239629214389227755e-08 + p*w; + p = 1.5027403968909827627e-06 + p*w; + p = -4.013867526981545969e-06 + p*w; + p = 2.9234449089955446044e-06 + p*w; + p = 1.2475304481671778723e-05 + p*w; + p = -4.7318229009055733981e-05 + p*w; + p = 6.8284851459573175448e-05 + p*w; + p = 2.4031110387097893999e-05 + p*w; + p = -0.0003550375203628474796 + p*w; + p = 0.00095328937973738049703 + p*w; + p = -0.0016882755560235047313 + p*w; + p = 0.0024914420961078508066 + p*w; + p = -0.0037512085075692412107 + p*w; + p = 0.005370914553590063617 + p*w; + p = 1.0052589676941592334 + p*w; + p = 3.0838856104922207635 + p*w; + } + else { + w = sqrt(w) - 5.000000; + p = -2.7109920616438573243e-11; + p = -2.5556418169965252055e-10 + p*w; + p = 1.5076572693500548083e-09 + p*w; + p = -3.7894654401267369937e-09 + p*w; + p = 7.6157012080783393804e-09 + p*w; + p = -1.4960026627149240478e-08 + p*w; + p = 2.9147953450901080826e-08 + p*w; + p = -6.7711997758452339498e-08 + p*w; + p = 2.2900482228026654717e-07 + p*w; + p = -9.9298272942317002539e-07 + p*w; + p = 4.5260625972231537039e-06 + p*w; + p = -1.9681778105531670567e-05 + p*w; + p = 7.5995277030017761139e-05 + p*w; + p = -0.00021503011930044477347 + p*w; + p = -0.00013871931833623122026 + p*w; + p = 1.0103004648645343977 + p*w; + p = 4.8499064014085844221 + p*w; + } + return sign * p * x; + } - //! Load repartition in 1d between MPI processes - static void distributeArray( int rank, - int nb_ranks, - int nb_elems, - int &imin, - int &nb_loc_elems ); + //! Load repartition in 1d between MPI processes + // ---------------------------------------------------------------------------- + //! \brief Distribute equally the load into chunk of an array + //! and return the number of elements for the specified chunk number. + // + //! \param chunk number + //! \param nb_chunks Total number of MPI tasks + //! \param nb_elems Total number of element to be distributed + //! \param imin Index of the first element for chunk + //! \param nb_loc_elems Number of element for chunk + // ---------------------------------------------------------------------------- + inline void distributeArray( int chunk, + int nb_chunks, + int nb_elems, + int &imin, + int &nb_loc_elems ) + { + // If more ranks than elements, + // only a part of the processes will work + if( nb_chunks >= nb_elems ) { + if( chunk < nb_elems ) { + imin = chunk; + nb_loc_elems = 1; + } else { + imin = nb_elems; + nb_loc_elems = 0; + } + } else { + + int quotient; + int remainder; + + // Part of the load equally distributed + quotient = nb_elems/nb_chunks; + + // Remaining load to be distributed after balanced repartition + remainder = nb_elems%nb_chunks; + + if( chunk < remainder ) { + imin = chunk*quotient+chunk; + nb_loc_elems = quotient + 1; + } else { + imin = remainder + chunk*quotient; + nb_loc_elems = quotient; + } + } + } //! Load repartition in 1d between MPI processes. - //! This function returns tables of indexes and length for all rank - static void distributeArray( + // ---------------------------------------------------------------------------- + //! \brief Distribute equally 1D array into chunks + //! This function returns tables of indexes and length for each chunk + // + //! \param nb_chunks Total number of chunks + //! \param nb_elems Total number of element to be distributed + //! \param imin_table Index of the first element for each chunk + //! \param length_table Number of element for each chunk + // ---------------------------------------------------------------------------- + inline void distributeArray( int nb_chunks, int nb_elems, int *imin_table, - int *length_table ); + int *length_table ) + { + + // If more chunks than elements, + // only a part of the processes will work + if( nb_chunks >= nb_elems ) { + #pragma omp simd + for( int chunk = 0 ; chunk < nb_elems ; chunk ++ ) { + imin_table[chunk] = chunk; + length_table[chunk] = 1; + } + #pragma omp simd + for( int chunk = nb_elems ; chunk < nb_chunks ; chunk ++ ) { + imin_table[chunk] = nb_elems; + length_table[chunk] = 0; + } + } else { + + int quotient; + int remainder; + + // Part of the load equally distributed + quotient = nb_elems/nb_chunks; + + // Remaining load to be distributed after balanced repartition + remainder = nb_elems%nb_chunks; + + #pragma omp simd + for( int chunk = 0 ; chunk < remainder ; chunk ++ ) { + imin_table[chunk] = chunk*quotient+chunk; + length_table[chunk] = quotient + 1; + } + #pragma omp simd + for( int chunk = remainder ; chunk < nb_chunks ; chunk ++ ) { + imin_table[chunk] = remainder + chunk*quotient; + length_table[chunk] = quotient; + } + } + } //! \brief This function uses a bijection algorithm in a monotonic double array //! to find the corresponding index i so that elem is between array[i] @@ -41,34 +245,30 @@ class userFunctions #pragma acc routine seq #endif template - static int searchValuesInMonotonicArray( T * array, + inline int searchValuesInMonotonicArray( T * array, T elem, int nb_elems ) -{ - int imin = 0; // lower bound - int imax = nb_elems-1; // upper bound - int imid = 0; - - if( elem == array[0] ) { - return 0; - } else if( elem == array[nb_elems-1] ) { - return nb_elems-2; - } else { - while( imax - imin > 1 ) { - imid= ( imin + imax )/2; - //imid= (imin + imax)>>1; - if( elem >= array[imid] ) { - imin = imid; - } else { - imax = imid; + { + int imin = 0; // lower bound + int imax = nb_elems-1; // upper bound + int imid = 0; + + if( elem == array[0] ) { + return 0; + } else if( elem == array[nb_elems-1] ) { + return nb_elems-2; + } else { + while( imax - imin > 1 ) { + imid= ( imin + imax )/2; + //imid= (imin + imax)>>1; + if( elem >= array[imid] ) { + imin = imid; + } else { + imax = imid; + } } + return imin; } - return imin; } } - -private: - - -}; #endif