From fb41fb966d216468cba8c46f3405d104acf8a8b5 Mon Sep 17 00:00:00 2001
From: Travis Askham <askhamwhat@gmail.com>
Date: Wed, 29 May 2024 22:14:27 -0400
Subject: [PATCH] changes behavior of chunkermatapply and building corrections
 matrix - removes dval from chunkermatapply interface for consistency with
 chunkermat - adds a warning about fmms - tries to speed up kerneval_smooth in
 the most common situation - the previous version of corrections was slow,
 this avoids some slow indexing that was happening. - this adds a faster
 lege.barywts option when nodes are known

---
 chunkie/+chnk/+quadggq/buildmattd.m   |  22 ++-
 chunkie/+chnk/+quadggq/diagbuildmat.m |  39 ++++-
 chunkie/+chnk/+quadggq/nearbuildmat.m |  15 +-
 chunkie/+lege/barywts.m               |   6 +-
 chunkie/chunkermat.m                  | 203 +++++++++-----------------
 chunkie/chunkermatapply.m             |  11 +-
 devtools/test/chunkermatapplyTest.m   |  55 ++++---
 devtools/test/mixedbcTest.m           |  41 +++---
 8 files changed, 198 insertions(+), 194 deletions(-)

diff --git a/chunkie/+chnk/+quadggq/buildmattd.m b/chunkie/+chnk/+quadggq/buildmattd.m
index ee5e01c..08cd688 100644
--- a/chunkie/+chnk/+quadggq/buildmattd.m
+++ b/chunkie/+chnk/+quadggq/buildmattd.m
@@ -1,4 +1,4 @@
-function [spmat] = buildmattd(chnkr,kern,opdims,type,auxquads,ilist)
+function [spmat] = buildmattd(chnkr,kern,opdims,type,auxquads,ilist,corrections)
 %CHNK.QUADGGQ.BUILDMAT build matrix for given kernel and chnkr 
 % description of boundary, using special quadrature for self
 % and neighbor panels.
@@ -15,6 +15,9 @@
 if (nargin <6)
     ilist = [];
 end
+if nargin < 7
+    corrections = false;
+end
 
 k = chnkr.k;
 nch = chnkr.nch;
@@ -54,6 +57,17 @@
 
 % do smooth weight for all
 %sysmat = chnk.quadnative.buildmat(chnkr,kern,opdims,1:nch,1:nch,wts);
+
+if corrections
+    wtss = chnkr.wts;
+    wtss = repmat(wtss(:).',opdims(2),1); wtss = reshape(wtss,opdims(2)*k,nch);
+    indd = kron(eye(k),true(opdims(1),opdims(2)));
+    indd = indd(:) > 0;
+else
+    wtss = [];
+    indd = [];
+end
+
 mmat = k*nch*opdims(1); nmat = k*nch*opdims(2);
 
 nnz = k*nch*opdims(1)*k*3*opdims(2);
@@ -89,7 +103,7 @@
         % skip construction if both chunks are in the "bad" chunk list
         else
             submat = chnk.quadggq.nearbuildmat(r,d,n,d2,data,ibefore,j, ...
-                kern,opdims,xs1,wts1,ainterp1kron,ainterp1);
+                kern,opdims,xs1,wts1,ainterp1kron,ainterp1,corrections,wtss);
     
             imat = 1 + (ibefore-1)*k*opdims(1);
             induse = ict:ict+nnz1-1;
@@ -106,7 +120,7 @@
         % skip construction if both chunks are in the "bad" chunk list
       else      
         submat = chnk.quadggq.nearbuildmat(r,d,n,d2,data,iafter,j, ...
-            kern,opdims,xs1,wts1,ainterp1kron,ainterp1);
+            kern,opdims,xs1,wts1,ainterp1kron,ainterp1,corrections,wtss);
         
 
         imat = 1 + (iafter-1)*k*opdims(1);
@@ -124,7 +138,7 @@
       % skip construction if the chunk is in the "bad" chunk list  
     else
       submat = chnk.quadggq.diagbuildmat(r,d,n,d2,data,j,kern,opdims,...
-          xs0,wts0,ainterps0kron,ainterps0);
+          xs0,wts0,ainterps0kron,ainterps0,corrections,wtss,indd);
 
       imat = 1 + (j-1)*k*opdims(1);
       
diff --git a/chunkie/+chnk/+quadggq/diagbuildmat.m b/chunkie/+chnk/+quadggq/diagbuildmat.m
index b8ceabf..bf1c49b 100644
--- a/chunkie/+chnk/+quadggq/diagbuildmat.m
+++ b/chunkie/+chnk/+quadggq/diagbuildmat.m
@@ -1,5 +1,5 @@
 function submat = diagbuildmat(r,d,n,d2,data,i,fkern,opdims,...
-			 xs0,whts0,ainterps0kron,ainterps0)
+			 xs0,whts0,ainterps0kron,ainterps0,corrections,wtss,indd)
 %CHNK.QUADGGQ.DIAGBUILDMAT                  
 
 % grab specific boundary data
@@ -11,6 +11,14 @@
 else
     dd = data(:,:,i);
 end
+
+if nargin < 13
+    corrections = false;
+    wtss = [];
+    indd = [];
+end
+
+
 % interpolate boundary info
 
 % get relevant coefficients
@@ -23,13 +31,13 @@
 d2fine = cell(k,1);
 dsdt = cell(k,1);
 
-for i = 1:k
-    rfine{i} = (ainterps0{i}*(rs.')).';
-    dfine{i} = (ainterps0{i}*(ds.')).';
-    d2fine{i} = (ainterps0{i}*(d2s.')).';
-    dfinenrm = sqrt(sum(dfine{i}.^2,1));
-    nfine{i} = [dfine{i}(2,:); -dfine{i}(1,:)]./dfinenrm;
-    dsdt{i} = (dfinenrm(:)).*whts0{i};
+for j = 1:k
+    rfine{j} = (ainterps0{j}*(rs.')).';
+    dfine{j} = (ainterps0{j}*(ds.')).';
+    d2fine{j} = (ainterps0{j}*(d2s.')).';
+    dfinenrm = sqrt(sum(dfine{j}.^2,1));
+    nfine{j} = [dfine{j}(2,:); -dfine{j}(1,:)]./dfinenrm;
+    dsdt{j} = (dfinenrm(:)).*whts0{j};
 end
 
 srcinfo = [];
@@ -54,6 +62,21 @@
     smatbigi*ainterps0kron{j};
 end
 
+if corrections
+    srcinfo.r = rs;
+    srcinfo.d = ds;
+    srcinfo.d2 = d2s;
+    srcinfo.n = ns;
+
+    targinfo.r = rs; targinfo.d = ds; targinfo.d2 = d2s; targinfo.n = ns;
+    targinfo.data = dd;
+
+    sc = fkern(srcinfo,targinfo);
+    sc(indd) = 0;
+    
+    wtsi = wtss(:,i);
+    submat = submat - sc.*(wtsi(:).');
+
 end
    
 
diff --git a/chunkie/+chnk/+quadggq/nearbuildmat.m b/chunkie/+chnk/+quadggq/nearbuildmat.m
index 08502d1..8b5527e 100644
--- a/chunkie/+chnk/+quadggq/nearbuildmat.m
+++ b/chunkie/+chnk/+quadggq/nearbuildmat.m
@@ -1,5 +1,5 @@
 function submat = nearbuildmat(r,d,n,d2,data,i,j,fkern,opdims,...
-				      xs1,whts1,ainterp1kron,ainterp1)
+				      xs1,whts1,ainterp1kron,ainterp1,corrections,wtss)
 %CHNKR.QUADGGQ.NEARBUILDMAT
 
 % grab specific boundary data
@@ -12,6 +12,12 @@
 else
     dd = data(:,:,i);
 end
+
+if nargin < 14
+    corrections = false;
+    wtss = [];
+end
+
 % interpolate boundary info
 
 % get relevant coefficients
@@ -73,5 +79,12 @@
 smatbig = fkern(srcinfo,targinfo);
 submat = smatbig*diag(dsdtndim2)*ainterp1kron;
 
+if corrections
+    srcinfo = []; srcinfo.r = rs; srcinfo.d = ds; 
+    srcinfo.d2 = d2s; srcinfo.n = ns; 
+
+    wtsj = wtss(:,j);
+    submat = submat - fkern(srcinfo,targinfo).*(wtsj(:).');
+
  end
    
diff --git a/chunkie/+lege/barywts.m b/chunkie/+lege/barywts.m
index a9109a4..22c3732 100644
--- a/chunkie/+lege/barywts.m
+++ b/chunkie/+lege/barywts.m
@@ -1,4 +1,4 @@
-function w = barywts(k)
+function w = barywts(k,x)
 %LEGE.BARYWTS
 %
 % returns the weights for barycentric Lagrange interpolation
@@ -16,7 +16,9 @@
 %
 % where x(j) is the jth Legendre node of order k
 
-x = lege.exps(k);
+if nargin < 2
+    x = lege.exps(k);
+end
 
 xx = bsxfun(@minus,x,x.');
 xx(1:k+1:end) = 1;
diff --git a/chunkie/chunkermat.m b/chunkie/chunkermat.m
index fd6d5a8..ddf3e70 100644
--- a/chunkie/chunkermat.m
+++ b/chunkie/chunkermat.m
@@ -282,7 +282,7 @@
             if adaptive_correction
                 flag = flagnear(chnkrj,chnkri.r(:,:));
                 sysmat_tmp_adap = chunkermat_adap(chnkrj,ftmp,opdims, ...
-                    chnkri,flag,opts);
+                    chnkri,flag,opts,corrections);
                 if (~nonsmoothonly)
                     [isys,jsys,vsys] = find(sysmat_tmp_adap);
                     ijsys = isys + (jsys-1)*size(sysmat_tmp_adap,1);
@@ -385,7 +385,7 @@
             end
         end
         if nonsmoothonly
-            sysmat_tmp = chnk.quadggq.buildmattd(chnkr,ftmp,opdims,type,auxquads,jlist);
+            sysmat_tmp = chnk.quadggq.buildmattd(chnkr,ftmp,opdims,type,auxquads,jlist,corrections);
         else
             sysmat_tmp = chnk.quadggq.buildmat(chnkr,ftmp,opdims,type,auxquads,jlist);
         end
@@ -417,7 +417,7 @@
         end
         
         sysmat_tmp_adap = chunkermat_adap(chnkr, ftmp, opdims, chnkr, ...
-           flag,opts);
+           flag,opts,corrections);
 
         [isys,jsys,vsys] = find(sysmat_tmp_adap);
 
@@ -450,10 +450,9 @@
 if(icgrph && isrcip)
     [sbclmat,sbcrmat,lvmat,rvmat,u] = chnk.rcip.shiftedlegbasismats(k);
     nch_all = horzcat(chnkobj.echnks.nch);
+    npt_all = horzcat(chnkobj.echnks.npt);
     [~,nv] = size(chnkobj.verts);
     ngl = chnkrs(1).k;
-    glxs = lege.exps(k);
-    
     
     for ivert=1:nv
         clist = chnkobj.vstruc{ivert}{1};
@@ -482,17 +481,19 @@
             break
         end
         starind = zeros(1,2*ngl*ndim*nedge);
+        corinds = cell(nedge,1);
         for i=1:nedge
             i1 = (i-1)*2*ngl*ndim+1;
             i2 = i*2*ngl*ndim;
             if(isstart(i))
                 starind(i1:i2) = irowlocs(clist(i))+(1:2*ngl*ndim)-1;
+                corinds{i} = 1:2*ngl;
             else
                 starind(i1:i2) = irowlocs(clist(i)+1)-fliplr(0:2*ngl*ndim-1)-1;
+                corinds{i} = (npt_all(clist(i))-2*ngl + 1):npt_all(clist(i));
             end
         end
         
-
         [Pbc,PWbc,starL,circL,starS,circS,ilist,starL1,circL1] = ...
             chnk.rcip.setup(ngl,ndim,nedge,isstart);
         optsrcip = opts;
@@ -508,6 +509,51 @@
             
             sysmat(starind,starind) = sysmat_tmp;
         else
+
+            if corrections
+                cormat = zeros(size(sysmat_tmp));
+                jstart = 1;
+                for jj = 1:nedge
+                    jch = clist(jj);
+                    srcinfo = [];
+                    jinds = corinds{jj};
+                    srcinfo.r = chnkrs(jch).r(:,jinds);
+                    srcinfo.d = chnkrs(jch).d(:,jinds);
+                    srcinfo.d2 = chnkrs(jch).d2(:,jinds);
+                    srcinfo.n = chnkrs(jch).n(:,jinds);
+                    wtsj = chnkrs(jch).wts(jinds);
+                    op2 = opdims_mat(2,1,jch);
+                    wtsj = repmat(wtsj(:).',op2,1);
+                    wtsj = wtsj(:);
+                    jflat = jstart:(jstart-1+length(jinds)*op2);
+                    jstart = jstart+length(jinds)*op2;
+                    istart = 1;
+                    for ii = 1:nedge
+                        ich = clist(ii);
+                        iinds = corinds{ii};
+                        targinfo = [];
+                        targinfo.r = chnkrs(ich).r(:,iinds);
+                        targinfo.d = chnkrs(ich).d(:,iinds);
+                        targinfo.d2 = chnkrs(ich).d2(:,iinds);
+                        targinfo.n = chnkrs(ich).n(:,iinds);
+                        op1 = opdims_mat(1,ich,1);
+                        iflat = istart:(istart-1+length(iinds)*op1);
+                        istart = istart + length(iinds)*op1;
+                        if size(kern) == 1
+                            ktmp = kern.eval;
+                        else
+                            ktmp = kern(ich,jch).eval;
+                        end
+                        submat = ktmp(srcinfo,targinfo).*(wtsj(:).');
+                        if (ii == jj)
+                            submat(kron(eye(length(iinds)),ones(op1,op2)) > 0) = 0;
+                        end
+                        cormat(iflat,jflat) = submat;
+                    end
+                end
+                sysmat_tmp = sysmat_tmp - cormat;
+            end
+                       
             [jind,iind] = meshgrid(starind);
             
             isysmat = [isysmat;iind(:)];
@@ -532,132 +578,6 @@
 end
 
 
-
-if (corrections)
-    % subtract out the smooth quadrature rule in the points computed using
-    % a special quadrature rule
-    sys_loc = sysmat;
-    iinds = isysmat(idx);
-    jinds = jsysmat(idx);
-    icorinds = [];
-    jcorinds = [];
-    vcors = [];
-
-    iselfinds = [];
-    jselfinds = [];
-
-    for ichkr = 1:nchunkers
-        for i = 1:chnkrs(ichkr).npt
-            targinfo = [];
-	        targinfo.r  = chnkrs(ichkr).r(:,i); 
-            targinfo.d  = chnkrs(ichkr).d(:,i); 
-	        targinfo.d2 = chnkrs(ichkr).d2(:,i); 
-            targinfo.n  = chnkrs(ichkr).n(:,i);
-    
-            icortmp = (i-1)*opdims_mat(1,ichkr,1) ...
-                        +(1:opdims_mat(1,ichkr,1)) + irowlocs(ichkr) - 1;
-            jsrc = [];
-            for l = 1:opdims_mat(1,ichkr,1)
-                jsrc = [jsrc;jinds(iinds == icortmp(l))];
-            end
-        
-            jchnks = unique(idcolchnk(:,jsrc)','rows')';
-            jchnkrs = unique(jchnks(1,:));
-            if (size(kern) == 1)
-                srcinfo = []; 
-                srcinfo.r  = [];
-                srcinfo.d  = [];
-                srcinfo.d2 = [];
-                srcinfo.n  = [];
-                srcw = [];
-                for jchkr = jchnkrs
-                    jchnklgth = opdims_mat(2,1,jchkr)*chnkrs(jchkr).k;
-        
-                    jcortmp = jchnklgth*(jchnks(2,jchnks(1,:)==jchkr)-1)...
-                            + ((1:jchnklgth)')+icollocs(jchkr)-1;
-                    jcortmp = jcortmp(:); % global density j indices
-        
-                    icortmp2 = repmat(icortmp(:),1,length(jcortmp));
-                    icorinds = [icorinds; icortmp2(:)];
-
-                    jcortmp2 = repmat(jcortmp,1,opdims_mat(1,ichkr,1))';
-                    jcorinds = [jcorinds; jcortmp2(:)];
-                    
-                    jloc = chnkrs(jchkr).k*(jchnks(2,jchnks(1,:)==jchkr)-1) + ...
-                        ((1:chnkrs(jchkr).k)'); % local pt j indices
-                    jloc = jloc(:)';
-                    srcinfo.r  = [srcinfo.r, chnkrs(jchkr).r(:,jloc)]; 
-                    srcinfo.d  = [srcinfo.d, chnkrs(jchkr).d(:,jloc)]; 
-                    srcinfo.d2 = [srcinfo.d2,chnkrs(jchkr).d2(:,jloc)]; 
-                    srcinfo.n  = [srcinfo.n, chnkrs(jchkr).n(:,jloc)];
-        
-                    srcwj = repmat(chnkrs(jchkr).wts(jloc), ...
-                        opdims_mat(2,1,jchkr),1);
-                    srcw = [srcw, srcwj(:)'];
-                end
-                vcorvals = kern.eval(srcinfo, targinfo).*srcw;
-                vcors = [vcors; vcorvals(:)];
-            else
-                for jchkr = jchnkrs
-                    jchnklgth = opdims_mat(2,1,jchkr)*chnkrs(jchkr).k;
-        
-                    jcortmp = jchnklgth*(jchnks(2,jchnks(1,:)==jchkr)-1) + ...
-                        ((1:jchnklgth)')+icollocs(jchkr)-1;
-                    jcortmp = jcortmp(:); % global density j indices
-        
-                    icortmp2 = repmat(icortmp(:),1,length(jcortmp));
-                    icorinds = [icorinds; icortmp2(:)];
-
-                    jcortmp2 = repmat(jcortmp,1,opdims_mat(1,ichkr,1))';
-                    jcorinds = [jcorinds; jcortmp2(:)];
-                    
-                    jloc = chnkrs(jchkr).k*(jchnks(2,jchnks(1,:)==jchkr)-1) + ...
-                        ((1:chnkrs(jchkr).k)'); % local pt j indices
-                    jloc = jloc(:)';
-        
-                    srcinfo = []; 
-                    srcinfo.r  = chnkrs(jchkr).r(:,jloc); 
-                    srcinfo.d  = chnkrs(jchkr).d(:,jloc); 
-                    srcinfo.d2 = chnkrs(jchkr).d2(:,jloc); 
-                    srcinfo.n  = chnkrs(jchkr).n(:,jloc);
-
-                    srcw = repmat(chnkrs(jchkr).wts(jloc), ...
-                        opdims_mat(2,1,jchkr),1);
-                    srcw = srcw(:)';
-                    
-                    vcorvals = kern(ichkr,jchkr).eval(srcinfo, targinfo) ...
-                        .*srcw;
-                    vcors = [vcors; vcorvals(:)];
-                end
-            end
-        end
-
-        % identify self interactions to be set to zero
-        ids = 1:lchunks(ichkr);
-        jds = ids;
-
-        opdims = opdims_mat(:,ichkr,ichkr);
-        islf = repmat((1:opdims(1))',opdims(2),1) + opdims(1)*(ids-1) ...
-            + irowlocs(ichkr)-1;
-        tmp = repmat((1:opdims(2)),opdims(1),1); 
-        jslf = tmp(:) + opdims(2)*(jds-1)+ icollocs(ichkr)-1;
-
-        iselfinds = [iselfinds; islf(:)];
-        jselfinds = [jselfinds; jslf(:)];
-
-    end
-    icorinds = icorinds(:);
-    jcorinds = jcorinds(:);
-    vcors = vcors(:);
-
-    vcormat = sparse(icorinds,jcorinds,vcors,nrows,ncols);
-
-    linsp = iselfinds + (jselfinds-1)*nrows;
-    vcormat(linsp) = 0; % set self interactions to zero to avoid NaNs
-
-    sysmat = sys_loc-vcormat;
-end
-
 if (nargout >1) 
 	varargout{1} = opts;
 end  
@@ -665,7 +585,7 @@
 end
 
 function mat = chunkermat_adap(chnkr,kern,opdims, ...
-			       chnkrt,flag,opts)
+			       chnkrt,flag,opts,corrections)
 
   if isa(kern,'kernel')
     kernev = kernel.eval;
@@ -685,6 +605,9 @@
 if nargin < 6
     opts = [];
 end
+if nargin < 7
+    corrections = false;
+end
 
 targs = chnkrt.r(:,:); targn = chnkrt.n(:,:); 
 targd = chnkrt.d(:,:); targd2 = chnkrt.d2(:,:);
@@ -704,13 +627,15 @@
 istart = 1;
 
 [t,w] = lege.exps(2*k+1);
-ct = lege.exps(k);
-bw = lege.barywts(k);
+ct = chnkr.tstor;
+bw = lege.barywts(k,ct);
 r = chnkr.r;
 d = chnkr.d;
 n = chnkr.n;
 d2 = chnkr.d2;
 
+wtss = chnkr.wts;
+
 for i = 1:nch
     jmat = 1 + (i-1)*k*opdims(2);
     jmatend = i*k*opdims(2);
@@ -719,6 +644,14 @@
     mat1 =  chnk.adapgausswts(r,d,n,d2,ct,bw,i,targs(:,ji), ...
                 targd(:,ji),targn(:,ji),targd2(:,ji),kernev,opdims,t,w,opts);
             
+    if corrections
+        targinfo = []; targinfo.r = targs(:,ji); targinfo.d = targd(:,ji);
+        targinfo.n = targn(:,ji); targinfo.d2 = targd2(:,ji);
+        srcinfo = []; srcinfo.r = r(:,:,i); srcinfo.d = d(:,:,i); 
+        srcinfo.n = n(:,:,i); srcinfo.d2 = d2(:,:,i);
+        wtsi = wtss(:,i); wtsi = repmat(wtsi(:).',opdims(2),1);
+        mat1 = mat1 - kernev(srcinfo,targinfo).*(wtsi(:).');
+    end
     js1 = jmat:jmatend;
     js1 = repmat( (js1(:)).',opdims(1)*numel(ji),1);
             
diff --git a/chunkie/chunkermatapply.m b/chunkie/chunkermatapply.m
index 517c260..1a04e13 100644
--- a/chunkie/chunkermatapply.m
+++ b/chunkie/chunkermatapply.m
@@ -158,6 +158,7 @@
 lchunks    = zeros(nchunkers,1);
 
 %TODO: figure out a way to avoid this nchunkers^2 loop
+fmmall = true;
 for i=1:nchunkers
     
     targinfo = [];
@@ -175,15 +176,24 @@
 
         if (size(kern) == 1)
             ftemp = kern.eval(srcinfo,targinfo);
+            fmmall = fmmall && ~isempty(kern.fmm);
         else
             ktmp = kern(i,j).eval;
             ftemp = ktmp(srcinfo,targinfo);
+            fmmall = fmmall && ~isempty(kern(i,j).fmm);
         end   
         opdims = size(ftemp);
         opdims_mat(:,i,j) = opdims;
     end
 end    
 
+if ~fmmall
+    msg = "chunkermatapply: this routine only recommended if fmm" + ...
+        " is defined for all relevant kernels. Consider forming the dense matrix" + ...
+        " or using chunkerflam instead";
+    warning(msg);
+end
+
 irowlocs = zeros(nchunkers+1,1);
 icollocs = zeros(nchunkers+1,1);
 
@@ -194,7 +204,6 @@
    irowlocs(i+1) = irowlocs(i) + lchunks(i)*opdims_mat(1,i,1);
 end
 
-
 % apply local corrections and diagonal scaling
 u = cormat*dens;
 
diff --git a/devtools/test/chunkermatapplyTest.m b/devtools/test/chunkermatapplyTest.m
index 4894d37..bbb2f6e 100644
--- a/devtools/test/chunkermatapplyTest.m
+++ b/devtools/test/chunkermatapplyTest.m
@@ -38,11 +38,19 @@
 dens = fkernsrc.fmm(1e-12,srcinfo,targinfo,strengths);
 
 udense = sys*dens;
-start = tic; cormat = chunkermat(chnkr,fkern,struct("corrections",true)); 
+start = tic; cormat = chunkermat(chnkr,fkern,struct("corrections",true));
 toc(start)
 sysapply = @(sigma) -0.5*sigma + chunkermatapply(chnkr,fkern,sigma,cormat);
 start = tic; u = sysapply(dens); t1 = toc(start);
 
+e1 = zeros(chnkr.npt,1); e1(1) = 1;
+sys11 = sysapply(e1);
+
+smthmat = chnk.quadnative.buildmat(chnkr,fkern.eval,[1,1]);
+smthmat(eye(chnkr.npt) > 0) = 0;
+
+nnz(abs(sys-(cormat+smthmat-0.5*eye(chnkr.npt))) > 1e-11)
+
 fprintf('%5.2e s : time for matrix free apply\n',t1)
 relerr = norm(udense-u)/norm(udense);
 fprintf('relative apply error %5.2e\n',relerr);
@@ -55,6 +63,7 @@
 fprintf('relative solve error %5.2e\n',relerr);
 assert(relerr < 1e-13)
 
+%%
 % vector valued chunker test
 nregions = 2;
 ks = [1.1;2.1]*30;
@@ -97,14 +106,15 @@
 fprintf('relative apply error %5.2e\n',relerr);
 assert(relerr < 1e-13)
 
-sol1 = sys\bdry_data;
-sol2 = gmres(sysapply, bdry_data, [], 1e-12, 1000);
-
-relerr = norm(sol1-sol2)/norm(sol1);
-fprintf('relative solve error %5.2e\n',relerr);
-assert(relerr < 1e-10)
-
+% too slow... (transmission helper kernels don't have fmm)
+% sol1 = sys\bdry_data;
+% sol2 = gmres(sysapply, bdry_data, [], 1e-12, 1000);
+% 
+% relerr = norm(sol1-sol2)/norm(sol1);
+% fprintf('relative solve error %5.2e\n',relerr);
+% assert(relerr < 1e-10)
 
+%%
 % setup chunkgraph 
 
 nverts = 3; 
@@ -151,7 +161,10 @@
 dens = fkernsrc.fmm(1e-12,srcinfo,targinfo,strengths);
 
 udense = sys*dens;
-cormat = chunkermat(cgrph,fkern,struct("corrections",true));
+start = tic; cormat = chunkermat(cgrph,fkern,struct("corrections",true));
+t1 = toc(start);
+fprintf('%5.2e s : time to form corrections matrix\n',t1)
+
 sysapply = @(sigma) sigma + chunkermatapply(cgrph,fkern,sigma,cormat);
 
 start = tic; u = sysapply(dens); t1 = toc(start);
@@ -161,12 +174,12 @@
 fprintf('relative apply error %5.2e\n',relerr);
 assert(relerr < 1e-13)
 
-sol1 = sys\dens;
-
-sol2 = gmres(sysapply, dens, [], 1e-14, 100);
-relerr = norm(sol1-sol2)/norm(sol1);
-fprintf('relative solve error %5.2e\n',relerr);
-assert(relerr < 1e-13)
+% sol1 = sys\dens;
+% 
+% sol2 = gmres(sysapply, dens, [], 1e-14, 100);
+% relerr = norm(sol1-sol2)/norm(sol1);
+% fprintf('relative solve error %5.2e\n',relerr);
+% assert(relerr < 1e-13)
 
 % vectorvalued chunkgraph test
 nregions = 2;
@@ -208,12 +221,12 @@
 fprintf('relative apply error %5.2e\n',relerr);
 assert(relerr < 1e-13)
 
-sol1 = sys\bdry_data;
-sol2 = gmres(sysapply, bdry_data, [], 1e-12, 200);
-
-relerr = norm(sol1-sol2)/norm(sol1);
-fprintf('relative solve error %5.2e\n',relerr);
-assert(relerr < 1e-10)
+% sol1 = sys\bdry_data;
+% sol2 = gmres(sysapply, bdry_data, [], 1e-12, 200);
+% 
+% relerr = norm(sol1-sol2)/norm(sol1);
+% fprintf('relative solve error %5.2e\n',relerr);
+% assert(relerr < 1e-10)
 
 
 function [r,d,d2] = sinearc(t,amp,frq)
diff --git a/devtools/test/mixedbcTest.m b/devtools/test/mixedbcTest.m
index 11b13eb..06cfd66 100644
--- a/devtools/test/mixedbcTest.m
+++ b/devtools/test/mixedbcTest.m
@@ -74,23 +74,18 @@
 
 sol1 = sys\bdrydata;
 
-cormat = chunkermat(cgrph,kerns,struct("corrections",true));
-sysapply = @(sigma) chunkermatapply(cgrph,kerns,1,sigma,cormat,struct("forcefmm",true));
-sysapply2 = @(sigma) chunkermatapply(cgrph,kerns,dval,sigma,cormat);
-
-sol2 = gmres(sysapply,bdrydata,100,1e-6,100);
-
-norm(sol1-sol2)
+start = tic; cormat = chunkermat(cgrph,kerns,struct("corrections",true));
+t1 = toc(start);
+fprintf('%5.2e s : time to build corrections matrix\n',t1)
+sysapply = @(sigma) sigma + chunkermatapply(cgrph,kerns,sigma,cormat,struct("forcefmm",true));
 
 xapply1 = sys*bdrydata;
-xapply2 = sysapply(bdrydata);
-xapply3 = sysapply2(bdrydata);
+start = tic; xapply2 = sysapply(bdrydata); t1 = toc(start);
+fprintf('%5.2e s : time to do matrix-free apply\n',t1)
+
 relerr = norm(xapply1-xapply2)/norm(xapply1);
 fprintf('relative matrix free apply error %5.2e\n',relerr);
 assert(relerr < 1e-10)
-relerr = norm(xapply1-xapply3)/norm(xapply1);
-fprintf('relative matrix free apply error %5.2e\n',relerr);
-assert(relerr < 1e-10)
 
 rmin = min(cgrph.r(:,:)'); rmax = max(cgrph.r(:,:)');
 xl = rmax(1)-rmin(1);
@@ -103,7 +98,7 @@
 targets(1,:) = xxtarg(:); targets(2,:) = yytarg(:);
 
 start = tic; in1 = chunkerinterior(merge(cgrph.echnks(edir)),{xtarg,ytarg});
-in2 = chunkerinterior(merge(cgrph.echnks(eneu)),{xtarg,ytarg}); 
+in2 = chunkerinterior(reverse(merge(cgrph.echnks(eneu))),{xtarg,ytarg}); 
 t1 = toc(start);
 in = in1 & ~in2;
 out = ~in;
@@ -160,6 +155,7 @@
 fprintf('relative field error %5.2e\n',relerr);
 assert(relerr < 1e-4)
 
+%
 % dirichlet and transmission test
 nregions = 2;
 ks = [1.1;2.1]*30;
@@ -172,12 +168,13 @@
     -chnk.helm2d.kern(ks(1),s,t,'dprime')];
 kerns(eneu,edir) = (kerntmp);
 
-kerntmp = @(s,t) -chnk.helm2d.kern(ks(1),s,t,'trans_rep',[-1,-1]);
+trepcf = [-1,1];
+kerntmp = @(s,t) chnk.helm2d.kern(ks(1),s,t,'trans_rep',[-1,1]);
 kerns(edir,eneu) = (kerntmp);
 
-cc1 = [-1,-1;1,1];
+cc1 = [-1,1;-1,1];
 cc2 = cc1;
-kerntmp = @(s,t) -(chnk.helm2d.kern(ks(1),s,t,'all',cc1)- ...
+kerntmp = @(s,t) (chnk.helm2d.kern(ks(1),s,t,'all',cc1)- ...
                  chnk.helm2d.kern(ks(2),s,t,'all',cc2));
 kerns(eneu,eneu) = (kerntmp);
 
@@ -201,13 +198,13 @@
 targinfo.sources = merge(cgrph.echnks(eneu)).r(:,:); 
 targinfo.n = merge(cgrph.echnks(eneu)).n(:,:);
 U = hfmm2d(1e-12,ks(1),srcinfo,0,targinfo.sources,2);
-bdrydatat = [U.pottarg; -sum(targinfo.n.*U.gradtarg,1)];
+bdrydatat = [U.pottarg; sum(targinfo.n.*U.gradtarg,1)];
 bdrydata = [bdrydatad;bdrydatat(:)];
 
 sol1 = sys\bdrydata;
 
 cormat = chunkermat(cgrph,kerns,struct("corrections",true));
-sysapply = @(sigma) chunkermatapply(cgrph,kerns,1,sigma,cormat);
+sysapply = @(sigma) sigma + chunkermatapply(cgrph,kerns,sigma,cormat);
 
 xapply1 = sys*bdrydata;
 xapply2 = sysapply(bdrydata);
@@ -216,18 +213,18 @@
 assert(relerr < 1e-10)
 
 start = tic;
-fkernd = 2*kernel('helm', 'd', ks(1));
+fkernd = -2*kernel('helm', 'd', ks(1));
 iddir = 1:merge(cgrph.echnks(edir)).npt;
 uscat1 = chunkerkerneval(merge(cgrph.echnks(edir)),fkernd,sol1(iddir), ...
     targets(:,in));
 
-kerntmp = @(s,t) -chnk.helm2d.kern(ks(1),s,t,'trans_rep',[1,1]);
+kerntmp = @(s,t) chnk.helm2d.kern(ks(1),s,t,'trans_rep',trepcf);
 fkern  = kernel(kerntmp);
 idneu = (1:2*merge(cgrph.echnks(eneu)).npt) + merge(cgrph.echnks(edir)).npt;
 uscat1 = uscat1 - chunkerkerneval(merge(cgrph.echnks(eneu)),fkern, ...
     sol1(idneu),targets(:,in));
 
-kerntmp = @(s,t) -chnk.helm2d.kern(ks(2),s,t,'trans_rep',[1,1]);
+kerntmp = @(s,t) chnk.helm2d.kern(ks(2),s,t,'trans_rep',trepcf);
 fkern  = kernel(kerntmp);
 uscat2 = chunkerkerneval(merge(cgrph.echnks(eneu)),fkern,sol1(idneu), ...
     targets(:,in2)); 
@@ -237,7 +234,7 @@
 fkernsrc = kernel('helm','s',ks(1));
 targinfo = []; targinfo.r = targets(:,in); 
 uin1 = fkernsrc.fmm(1e-12,srcinfo,targinfo,charges);
-utot1 = uscat1(:)-uin1(:);
+utot1 = uscat1(:)+uin1(:);
 utot2 = uscat2(:);
 
 figure(3)