diff --git a/examples/hfmm2d_example.make b/examples/hfmm2d_example.make
index f92fddd..e94e2ef 100644
--- a/examples/hfmm2d_example.make
+++ b/examples/hfmm2d_example.make
@@ -1,7 +1,7 @@
 OS = osx
 
-#HOST = gcc
-HOST = gcc-openmp
+HOST = gcc
+#HOST = gcc-openmp
 #HOST = intel
 #HOST = intel-openmp
 
@@ -32,7 +32,7 @@ endif
 
 ifeq ($(HOST),gcc)
     FC=gfortran -L${LDFMM} 
-    FFLAGS=-fPIC -O3 -funroll-loops -march=native  
+    FFLAGS=-fPIC -O3 -funroll-loops -march=native -std=legacy 
 endif
 
 ifeq ($(HOST),gcc-openmp)
diff --git a/make.inc.windows.mingw b/make.inc.windows.mingw
index 71e54b2..d2c4e49 100644
--- a/make.inc.windows.mingw
+++ b/make.inc.windows.mingw
@@ -4,7 +4,7 @@
 # OpenMP:   default enabled unless specified
 #
 
-FFLAGS= -fPIC -O3 -funroll-loops 
+FFLAGS= -fPIC -O3 -funroll-loops -std=legacy 
 
 DYNAMICLIB = $(LIBNAME).dll
 LIMPLIB = $(LIBNAME)_dll.lib
diff --git a/makefile b/makefile
index 6b32e54..b9d6899 100644
--- a/makefile
+++ b/makefile
@@ -272,7 +272,7 @@ test/bhfmm2d:
 #python
 python: $(STATICLIB)
 	cd python && \
-	FMM_FLIBS='$(LIBS) $(OMPFLAGS)' FMM_FFLAGS='$(FFLAGS)' $(PYTHON) -m pip install -e .
+	FMM_FLIBS='$(LIBS) $(OMPFLAGS)' FMM_FFLAGS='$(FFLAGS)' $(PYTHON) -m pip install . --verbose
 
 python-dist: $(STATICLIB)
 	cd python && \
diff --git a/python/fmm2dpy/fmm2d.py b/python/fmm2dpy/fmm2d.py
index bd8f86f..5d9b977 100644
--- a/python/fmm2dpy/fmm2d.py
+++ b/python/fmm2dpy/fmm2d.py
@@ -983,13 +983,14 @@ def bhfmm2d(*,eps,sources,charges=None,dipoles=None,
 
       .. math:: 
 
-          u(x) = \sum_{j=1}^{N} c_{j} * log\(\|x-x_{j}\|\) + 
-          \overline{c}_{j} (x-x_{j})/(\overline{x-x_{j}) + d_{j,1}/(x-x_{j}) + 
-          d_{j,2}/(\overline{x-x_{j}}) - 
-          \overline{d_{j,1}} (x-x_{j})/(\overline{x-x_{j}})^2\, ,
+          u(x) = \sum_{j=1}^{N} 2 c_{j,1} * log\(\|x-x_{j}\|\) + 
+          c_{j,2} (x-x_{j})/(\overline{x-x_{j}) + d_{j,1}/(x-x_{j}) + 
+          d_{j,3}/(\overline{x-x_{j}}) + 
+          d_{j,2} (x-x_{j})/(\overline{x-x_{j}})^2\, ,
 
-      where $c_{j}$ are the charge densities, $d_{j,1}$, $d_{j,2}$ are the dipole strengths,
-      and $x_{j}$ are the source locations.
+      where $c_{j,1}$, $c_{j,2}$ are the charge densities, $d_{j,1}$, $d_{j,2}$, 
+      $d_{j,3}$ are the dipole strengths, and $x_{j}$ are the 
+      source locations.
 
       When $x=x_{m}$, the term corresponding to $x_{m}$ is dropped from the
       sum
@@ -1001,10 +1002,10 @@ def bhfmm2d(*,eps,sources,charges=None,dipoles=None,
 
         sources: float(2,n)   
                source locations (x_{j})
-        charges: complex(nd,n) or complex(n)
-               charge densities (c_{j})
-        dipoles: complex(nd,2,n) or complex(2,n)
-               dipole densities (d_{j,1}, d_{j,2})
+        charges: complex(nd,2,n) or complex(2,n)
+               charge densities (c_{j,1}, c_{j,2})
+        dipoles: complex(nd,3,n) or complex(3,n)
+               dipole densities (d_{j,1}, d_{j,2}, d_{j,3})
         targets: float(2,nt)
                 target locations (x)
         pg:  integer
@@ -1043,27 +1044,28 @@ def bhfmm2d(*,eps,sources,charges=None,dipoles=None,
         print("Nothing to compute, set either pg or pgt to non-zero")
         return out
     if charges is not None:
-        if nd == 1:
-            assert charges.shape[0] == ns, "Charges must be same length as second dimension of sources"
-            charges = charges.reshape(1,ns)
+        if nd == 1 and ns>1:
+            assert charges.shape[0] == 2 and charges.shape[1] == ns, "Charges must be of shape [2, number of source]"
+        if nd == 1 and ns==1:
+            assert charges.shape[0] == 2, "Charges must of shape [2, number of sources]"
         if nd>1:
-            assert charges.shape[0] == nd and charges.shape[1]==ns, "Charges must be of shape [nd,ns] where nd is number of densities, and ns is number of sources" 
+            assert charges.shape[0] == nd and charges.shape[1] == 2 and charges.shape[2]==ns, "Charges must be of shape [nd,2,ns] where nd is number of densities, and ns is number of sources" 
         ifcharge = 1
-        charges = charges.reshape([nd,ns])
+        charges = charges.reshape([nd,2,ns])
     else:
-       charges = np.zeros([nd,ns],dtype='complex') 
+       charges = np.zeros([nd,2,ns],dtype='complex') 
 
     if(dipoles is not None):
         if nd == 1 and ns>1:
-            assert dipoles.shape[0] == 2 and dipoles.shape[1] == ns, "Dipole must of shape [2, number of sources]"
+            assert dipoles.shape[0] == 3 and dipoles.shape[1] == ns, "Dipole must of shape [3, number of sources]"
         if nd == 1 and ns==1:
-            assert dipoles.shape[0] == 2, "Dipole must of shape [2, number of sources]"
+            assert dipoles.shape[0] == 3, "Dipole must of shape [3, number of sources]"
         if nd>1:
-            assert dipoles.shape[0] == nd and dipoles.shape[1] == 2 and dipoles.shape[2]==ns, "Dipole must of shape [nd,2, number of sources]"
-        dipoles = dipoles.reshape([nd,2,ns])
+            assert dipoles.shape[0] == nd and dipoles.shape[1] == 3 and dipoles.shape[2]==ns, "Dipole must of shape [nd,2, number of sources]"
+        dipoles = dipoles.reshape([nd,3,ns])
         ifdipole = 1
     else:
-        dipoles = np.zeros([nd,2,ns],dtype='complex')
+        dipoles = np.zeros([nd,3,ns],dtype='complex')
     if(targets is not None):
         assert targets.shape[0] == 2, "The first dimension of targets must be 2"
         iftarg = 1
@@ -1079,11 +1081,11 @@ def bhfmm2d(*,eps,sources,charges=None,dipoles=None,
         if(pgt>0):
             out.pottarg = out.pottarg.reshape(nt,)
         if(pgt==2):
-            out.gradtarg = out.gradtarg.reshape(2,nt)
+            out.gradtarg = out.gradtarg.reshape(3,nt)
         if(pg>0):
             out.pot = out.pot.reshape(ns,)
         if(pg==2):
-            out.grad = out.grad.reshape(2,ns)
+            out.grad = out.grad.reshape(3,ns)
 
     if(pg<2):
         out.grad = None
@@ -1574,7 +1576,6 @@ def c2ddir(*,sources,targets,charges=None,dipstr=None,
 
 def bh2ddir(*,sources,targets,charges=None,dipoles=None,
           pgt=0,nd=1,thresh=1e-16):
-
     r"""
       This subroutine computes the N-body biharmonic interactions 
       in two dimensions where the interaction kernel is related to the
@@ -1583,13 +1584,14 @@ def bh2ddir(*,sources,targets,charges=None,dipoles=None,
 
       .. math:: 
 
-          u(x) = \sum_{j=1}^{N} c_{j} * log\(\|x-x_{j}\|\) + 
-          \overline{c}_{j} (x-x_{j})/(\overline{x-x_{j}) + d_{j,1}/(x-x_{j}) - 
-          d_{j,2}/(\overline{x-x_{j}}) - 
-          \overline{d_{j,1}} (x-x_{j})/(\overline{x-x_{j}})^2\, ,
+          u(x) = \sum_{j=1}^{N} 2 c_{j,1} * log\(\|x-x_{j}\|\) + 
+          c_{j,2} (x-x_{j})/(\overline{x-x_{j}) + d_{j,1}/(x-x_{j}) + 
+          d_{j,3}/(\overline{x-x_{j}}) + 
+          d_{j,2} (x-x_{j})/(\overline{x-x_{j}})^2\, ,
 
-      where $c_{j}$ are the charge densities, $d_{j,1}$, $d_{j,2}$ are the dipole strengths,
-      and $x_{j}$ are the source locations.
+      where $c_{j,1}$, $c_{j,2}$ are the charge densities, $d_{j,1}$, $d_{j,2}$, 
+      $d_{j,3}$ are the dipole strengths, and $x_{j}$ are the 
+      source locations.
 
       When $x=x_{m}$, the term corresponding to $x_{m}$ is dropped from the
       sum
@@ -1601,10 +1603,10 @@ def bh2ddir(*,sources,targets,charges=None,dipoles=None,
 
         sources: float(2,n)   
                source locations (x_{j})
-        charges: complex(nd,n) or complex(n)
-               charge densities (c_{j})
-        dipoles: complex(nd,2,n) or complex(2,n)
-               dipole densities (d_{j,1}, d_{j,2})
+        charges: complex(nd,2,n) or complex(2,n)
+               charge densities (c_{j,1}, c_{j,2})
+        dipoles: complex(nd,3,n) or complex(3,n)
+               dipole densities (d_{j,1}, d_{j,2}, d_{j,3})
         targets: float(2,nt)
                 target locations (x)
         pgt:  integer
@@ -1614,7 +1616,6 @@ def bh2ddir(*,sources,targets,charges=None,dipoles=None,
         
         nd:   integer
                number of densities
-        thresh: contribution of source x_i, at location x ignored if |x-x_i|<=thresh
 
       Returns:
         out.pottarg: potential at target locations if requested
@@ -1640,27 +1641,28 @@ def bh2ddir(*,sources,targets,charges=None,dipoles=None,
     ifdipole = 0
     iftarg = 0
     if charges is not None:
-        if nd == 1:
-            assert charges.shape[0] == ns, "Charges must be same length as second dimension of sources"
-            charges = charges.reshape(1,ns)
+        if nd == 1 and ns>1:
+            assert charges.shape[0] == 2 and charges.shape[1] == ns, "Charges must be of shape [2, number of source]"
+        if nd == 1 and ns==1:
+            assert charges.shape[0] == 2, "Charges must of shape [2, number of sources]"
         if nd>1:
-            assert charges.shape[0] == nd and charges.shape[1]==ns, "Charges must be of shape [nd,ns] where nd is number of densities, and ns is number of sources" 
-        charges = charges.reshape([nd,ns])
+            assert charges.shape[0] == nd and charges.shape[1] == 2 and charges.shape[2]==ns, "Charges must be of shape [nd,2,ns] where nd is number of densities, and ns is number of sources" 
         ifcharge = 1
+        charges = charges.reshape([nd,2,ns])
     else:
-       charges = np.zeros([nd,ns],dtype='complex') 
+       charges = np.zeros([nd,2,ns],dtype='complex') 
 
     if(dipoles is not None):
         if nd == 1 and ns>1:
-            assert dipoles.shape[0] == 2 and dipoles.shape[1] == ns, "Dipole must of shape [2, number of sources]"
+            assert dipoles.shape[0] == 3 and dipoles.shape[1] == ns, "Dipole must of shape [3, number of sources]"
         if nd == 1 and ns==1:
-            assert dipoles.shape[0] == 2, "Dipole must of shape [2, number of sources]"
+            assert dipoles.shape[0] == 3, "Dipole must of shape [3, number of sources]"
         if nd>1:
-            assert dipoles.shape[0] == nd and dipoles.shape[1] == 2 and dipoles.shape[2]==ns, "Dipole must of shape [nd,2, number of sources]"
-        dipoles = dipoles.reshape([nd,2,ns])
+            assert dipoles.shape[0] == nd and dipoles.shape[1] == 3 and dipoles.shape[2]==ns, "Dipole must of shape [nd,2, number of sources]"
+        dipoles = dipoles.reshape([nd,3,ns])
         ifdipole = 1
     else:
-        dipoles = np.zeros([nd,2,ns],dtype='complex')
+        dipoles = np.zeros([nd,3,ns],dtype='complex')
 
     assert targets.shape[0] == 2, "The first dimension of targets must be 2"
     nt = targets.shape[1]
@@ -1682,13 +1684,13 @@ def bh2ddir(*,sources,targets,charges=None,dipoles=None,
         if(pgt>0):
             out.pottarg = out.pottarg.reshape(nt,)
         if(pgt==2):
-            out.gradtarg = out.gradtarg.reshape(2,nt,)
+            out.gradtarg = out.gradtarg.reshape(3,nt,)
 
     return out
 
 
 
-def comperr(*,ntest,out,outex,pg=0,pgt=0,nd=1,cauchy=0):
+def comperr(*,ntest,out,outex,pg=0,pgt=0,nd=1,cauchy=0,bh=0):
     r = 0
     err = 0
     if(nd == 1):
@@ -1700,9 +1702,12 @@ def comperr(*,ntest,out,outex,pg=0,pgt=0,nd=1,cauchy=0):
                 r = r+la.norm(outex.pot.real[0:ntest])**2
                 err = err+la.norm(outex.pot.real[0:ntest]-out.pot.real[0:ntest])**2
         if(pg >= 2):
-            if(cauchy==0):
+            if(cauchy==0 and bh == 0):
                 g = out.grad[:,0:ntest].reshape(2*ntest,)
                 gex = outex.grad[:,0:ntest].reshape(2*ntest,)
+            elif(cauchy==0 and bh==1):
+                g = out.grad[:,0:ntest].reshape(3*ntest,)
+                gex = outex.grad[:,0:ntest].reshape(3*ntest,)
             else:
                 g = out.grad[0:ntest].reshape(ntest,)
                 gex = outex.grad[0:ntest].reshape(ntest,)
@@ -1718,16 +1723,19 @@ def comperr(*,ntest,out,outex,pg=0,pgt=0,nd=1,cauchy=0):
             r = r + la.norm(hhex)**2
             err = err + la.norm(hhex-h)**2
         if(pgt > 0):
-            if(cauchy==0):
+            if(cauchy==0 and bh == 0):
                 r = r+la.norm(outex.pottarg[0:ntest])**2
                 err = err+la.norm(outex.pottarg[0:ntest]-out.pottarg[0:ntest])**2
             else:
                 r = r+la.norm(outex.pottarg.real[0:ntest])**2
                 err = err+la.norm(outex.pottarg.real[0:ntest]-out.pottarg.real[0:ntest])**2
         if(pgt >= 2):
-            if(cauchy==0):
+            if(cauchy==0 and bh==0):
                 g = out.gradtarg[:,0:ntest].reshape(2*ntest,)
                 gex = outex.gradtarg[:,0:ntest].reshape(2*ntest,)
+            elif(cauchy==0 and bh==1):
+                g = out.gradtarg[:,0:ntest].reshape(3*ntest,)
+                gex = outex.gradtarg[:,0:ntest].reshape(3*ntest,)
             else:
                 g = out.gradtarg[0:ntest].reshape(ntest,)
                 gex = outex.gradtarg[0:ntest].reshape(ntest,)
@@ -1753,9 +1761,12 @@ def comperr(*,ntest,out,outex,pg=0,pgt=0,nd=1,cauchy=0):
             r = r+la.norm(pex)**2
             err = err+la.norm(p-pex)**2
         if(pg >= 2):
-            if(cauchy==0):
+            if(cauchy==0 and bh==0):
                 g = out.grad[:,:,0:ntest].reshape(2*nd*ntest,)
                 gex = outex.grad[:,:,0:ntest].reshape(2*nd*ntest,)
+            elif(cauchy==0 and bh==1):
+                g = out.grad[:,:,0:ntest].reshape(3*nd*ntest,)
+                gex = outex.grad[:,:,0:ntest].reshape(3*nd*ntest,)
             else:
                 g = out.grad[:,0:ntest].reshape(nd*ntest,)
                 gex = outex.grad[:,0:ntest].reshape(nd*ntest,)
@@ -1780,9 +1791,12 @@ def comperr(*,ntest,out,outex,pg=0,pgt=0,nd=1,cauchy=0):
             r = r+la.norm(pex)**2
             err = err+la.norm(p-pex)**2
         if(pgt >= 2):
-            if(cauchy==0):
+            if(cauchy==0 and bh==0):
                 g = out.gradtarg[:,:,0:ntest].reshape(2*nd*ntest,)
                 gex = outex.gradtarg[:,:,0:ntest].reshape(2*nd*ntest,)
+            elif(cauchy==0 and bh==1):
+                g = out.gradtarg[:,:,0:ntest].reshape(3*nd*ntest,)
+                gex = outex.gradtarg[:,:,0:ntest].reshape(3*nd*ntest,)
             else:
                 g = out.gradtarg[:,0:ntest].reshape(nd*ntest,)
                 gex = outex.gradtarg[:,0:ntest].reshape(nd*ntest,)
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 5223779..0832368 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,3 +1,3 @@
 [build-system]
-requires = ["numpy"]
+requires = ["setuptools", "numpy"]
 build-backend = "setuptools.build_meta"
diff --git a/python/test/test_bhfmm.py b/python/test/test_bhfmm.py
index f16f91e..f5532a2 100755
--- a/python/test/test_bhfmm.py
+++ b/python/test/test_bhfmm.py
@@ -26,8 +26,8 @@ def test_bhfmm():
     ttmp = targ[:,0:ntest]
     eps = 10**(-5)
 
-    charges = np.random.uniform(0,1,n) + 0j
-    dipoles = np.random.uniform(0,1,(2,n))+1j*np.random.uniform(0,1,(2,n))
+    charges = np.random.uniform(0,1,(2,n))+1j*np.random.uniform(0,1,(2,n))
+    dipoles = np.random.uniform(0,1,(3,n))+1j*np.random.uniform(0,1,(3,n))
 
     outex=fmm.Output()
 
@@ -35,7 +35,7 @@ def test_bhfmm():
     out=fmm.bhfmm2d(eps=eps,sources=sources,charges=charges,pg=1)
     out2 = fmm.bh2ddir(sources=sources,targets=stmp,charges=charges,pgt=1)
     out2.pot = out2.pottarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -47,7 +47,7 @@ def test_bhfmm():
     out=fmm.bhfmm2d(eps=eps,sources=sources,dipoles=dipoles,pg=1)
     out2 = fmm.bh2ddir(sources=sources,targets=stmp,dipoles=dipoles,pgt=1)
     out2.pot = out2.pottarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1,bh=1)
 
 
     if(err<eps):
@@ -62,7 +62,7 @@ def test_bhfmm():
     out2 = fmm.bh2ddir(sources=sources,targets=stmp,charges=charges, \
         dipoles=dipoles,pgt=1)
     out2.pot = out2.pottarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -76,7 +76,7 @@ def test_bhfmm():
         pgt=2)
     out2.pot = out2.pottarg
     out2.grad = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2,bh=1)
 
 
     if(err<eps):
@@ -92,7 +92,7 @@ def test_bhfmm():
         pgt=2)
     out2.pot = out2.pottarg
     out2.grad = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -106,7 +106,7 @@ def test_bhfmm():
        dipoles=dipoles,pgt=2)
     out2.pot = out2.pottarg
     out2.grad = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2,bh=1)
 
 
     if(err<eps):
@@ -117,7 +117,7 @@ def test_bhfmm():
     itest=itest+1
     out=fmm.bhfmm2d(eps=eps,sources=sources,targets=targ,charges=charges,pgt=1)
     out2=fmm.bh2ddir(sources=sources,targets=ttmp,charges=charges,pgt=1)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -131,7 +131,7 @@ def test_bhfmm():
       
     out2=fmm.bh2ddir(sources=sources,targets=ttmp,\
         dipoles=dipoles,pgt=1)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -146,7 +146,7 @@ def test_bhfmm():
     out2=fmm.bh2ddir(sources=sources,targets=ttmp, \
         charges=charges, \
         dipoles=dipoles,pgt=1)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1,bh=1)
     
     if(err<eps):
         testres[itest] = 1
@@ -156,7 +156,7 @@ def test_bhfmm():
     itest = itest + 1
     out=fmm.bhfmm2d(eps=eps,sources=sources,targets=targ,charges=charges,pgt=2)
     out2=fmm.bh2ddir(sources=sources,targets=ttmp,charges=charges,pgt=2)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -169,7 +169,7 @@ def test_bhfmm():
     dipoles=dipoles,pgt=2)
     out2=fmm.bh2ddir(sources=sources,targets=ttmp,\
     dipoles=dipoles,pgt=2)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -181,7 +181,7 @@ def test_bhfmm():
         dipoles=dipoles,pgt=2)
     out2 =fmm.bh2ddir(sources=sources,targets=ttmp,charges=charges,\
         dipoles=dipoles,pgt=2)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2,bh=1)
 
 
     if(err<eps):
@@ -197,7 +197,7 @@ def test_bhfmm():
     out2=fmm.bh2ddir(sources=sources,targets=ttmp,charges=charges,pgt=1)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -215,7 +215,7 @@ def test_bhfmm():
           dipoles=dipoles,pgt=1)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -234,7 +234,7 @@ def test_bhfmm():
           dipoles=dipoles,pgt=1)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -251,7 +251,7 @@ def test_bhfmm():
           pgt=2)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -269,7 +269,7 @@ def test_bhfmm():
           pgt=2)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -287,7 +287,7 @@ def test_bhfmm():
           dipoles=dipoles,pgt=2)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -297,14 +297,14 @@ def test_bhfmm():
 
 
     nd = 2
-    charges = np.random.uniform(0,1,(nd,n))
-    dipoles = np.random.uniform(0,1,(nd,2,n))
+    charges = np.random.uniform(0,1,(nd,2,n))+1j*np.random.uniform(0,1,(nd,2,n))
+    dipoles = np.random.uniform(0,1,(nd,3,n))+1j*np.random.uniform(0,1,(nd,3,n))
 
     itest = itest+1
     out=fmm.bhfmm2d(eps=eps,sources=sources,charges=charges,pg=1,nd=nd)
     out2 = fmm.bh2ddir(sources=sources,targets=stmp,charges=charges,pgt=1,nd=nd)
     out2.pot = out2.pottarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -316,7 +316,7 @@ def test_bhfmm():
     out=fmm.bhfmm2d(eps=eps,sources=sources,dipoles=dipoles,pg=1,nd=nd)
     out2 = fmm.bh2ddir(sources=sources,targets=stmp,dipoles=dipoles,pgt=1,nd=nd)
     out2.pot = out2.pottarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1,nd=nd,bh=1)
 
 
     if(err<eps):
@@ -331,7 +331,7 @@ def test_bhfmm():
     out2 = fmm.bh2ddir(sources=sources,targets=stmp,charges=charges, \
         dipoles=dipoles,pgt=1,nd=nd)
     out2.pot = out2.pottarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=1,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -345,7 +345,7 @@ def test_bhfmm():
         pgt=2,nd=nd)
     out2.pot = out2.pottarg
     out2.grad = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2,nd=nd,bh=1)
 
 
     if(err<eps):
@@ -361,7 +361,7 @@ def test_bhfmm():
         pgt=2,nd=nd)
     out2.pot = out2.pottarg
     out2.grad = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -375,7 +375,7 @@ def test_bhfmm():
        dipoles=dipoles,pgt=2,nd=nd)
     out2.pot = out2.pottarg
     out2.grad = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pg=2,nd=nd,bh=1)
 
 
     if(err<eps):
@@ -387,7 +387,7 @@ def test_bhfmm():
     itest=itest+1
     out=fmm.bhfmm2d(eps=eps,sources=sources,targets=targ,charges=charges,pgt=1,nd=nd)
     out2=fmm.bh2ddir(sources=sources,targets=ttmp,charges=charges,pgt=1,nd=nd)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -401,7 +401,7 @@ def test_bhfmm():
       
     out2=fmm.bh2ddir(sources=sources,targets=ttmp,\
         dipoles=dipoles,pgt=1,nd=nd)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -416,7 +416,7 @@ def test_bhfmm():
     out2=fmm.bh2ddir(sources=sources,targets=ttmp, \
         charges=charges, \
         dipoles=dipoles,pgt=1,nd=nd)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=1,nd=nd,bh=1)
     
     if(err<eps):
         testres[itest] = 1
@@ -426,7 +426,7 @@ def test_bhfmm():
     itest = itest + 1
     out=fmm.bhfmm2d(eps=eps,sources=sources,targets=targ,charges=charges,pgt=2,nd=nd)
     out2=fmm.bh2ddir(sources=sources,targets=ttmp,charges=charges,pgt=2,nd=nd)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -439,7 +439,7 @@ def test_bhfmm():
     dipoles=dipoles,pgt=2,nd=nd)
     out2=fmm.bh2ddir(sources=sources,targets=ttmp,\
     dipoles=dipoles,pgt=2,nd=nd)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -451,7 +451,7 @@ def test_bhfmm():
         dipoles=dipoles,pgt=2,nd=nd)
     out2 =fmm.bh2ddir(sources=sources,targets=ttmp,charges=charges,\
         dipoles=dipoles,pgt=2,nd=nd)
-    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=out2,pgt=2,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -466,7 +466,7 @@ def test_bhfmm():
     out2=fmm.bh2ddir(sources=sources,targets=ttmp,charges=charges,pgt=1,nd=nd)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -484,7 +484,7 @@ def test_bhfmm():
           dipoles=dipoles,pgt=1,nd=nd)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -503,7 +503,7 @@ def test_bhfmm():
           dipoles=dipoles,pgt=1,nd=nd)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=1,pgt=1,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -520,7 +520,7 @@ def test_bhfmm():
           pgt=2,nd=nd)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -538,7 +538,7 @@ def test_bhfmm():
           pgt=2,nd=nd)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
@@ -556,7 +556,7 @@ def test_bhfmm():
           dipoles=dipoles,pgt=2,nd=nd)
     outex.pottarg = out2.pottarg
     outex.gradtarg = out2.gradtarg
-    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2,nd=nd)
+    err = fmm.comperr(ntest=ntest,out=out,outex=outex,pg=2,pgt=2,nd=nd,bh=1)
 
     if(err<eps):
         testres[itest] = 1
diff --git a/src/biharmonic/bhfmm2d.f b/src/biharmonic/bhfmm2d.f
index 8df21dd..a552451 100644
--- a/src/biharmonic/bhfmm2d.f
+++ b/src/biharmonic/bhfmm2d.f
@@ -37,79 +37,93 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
 c       in C^2
 c     * In the legacy interface, dipoles were input as two
 c       complex arrays dip1 and dip2, as opposed to a single
-c       complex array (nd,2,n) where n is the number of sources.
+c       complex array (nd,3,n) where n is the number of sources.
 c       Similarly, the gradients in the legacy interface
 c       were two complex arrays, grada, and gradaa; while
-c       in the new interface, the gradient is a (nd,2,n) 
+c       in the new interface, the gradient is a (nd,3,n) 
 c       complex array where (nd,1,n) is the derivative with
-c       respect to z and the (nd,2,n) component is the derivative
+c       respect to z projection onto z component, 
+c       the (nd,2,n) component is the derivative with respect to z
+c       projection onto zbar component
+c       and the (nd,3,n) component is the derivative
 c       with respect to z_bar
 c     * In this subroutine and the rest the terms potential
 c       and velocity are interchangably used
 c     
 c
-c    vel(z) = \sum charge_j *\log|z-z_j| + 
-c                (charge_j)_bar (z-z_j)/(z-z_j)_bar+
-c                dip1/(z-z_j) + dip2/(z-z_j)_bar-
-c                dip1_bar (z-z_j)/(z-z_j)^2_bar
+c    vel(z) = \sum charge1_j *\log|z-z_j| + 
+c                (charge2_j) (z-z_j)/(z-z_j)_bar+
+c                dip1_j/(z-z_j) + dip2_j/(z-z_j)_bar-
+c                dip3_j (z-z_j)/(z-z_j)^2_bar
 c
-c    The velocity for stokes is related to the goursat functions
-c    as vel = \phi(z) + z (d/dz (\phi))_bar+\psi(z)
+c    For the stokes case, if charge2_j = charge1_j_bar, and dip3 =
+c    dip1_bar, then the velocity is related to the goursat functions
+c    as vel = \phi(z) + z (d/dz (\phi))_bar+\psi(z), where
 c 
-c   The goursat functions are given by
+c    The goursat functions are given by
 c
-c   \phi(z) = \sum charge_j log(z-z_j)+
+c    \phi(z) = \sum charge_j log(z-z_j)+
 c                     dip1/(z-z_j)
 c
-c   \psi(z) = \sum dippar2_bar/(z-z_j)+
+c    \psi(z) = \sum dippar2_bar/(z-z_j)+
 c               z_j_bar dippar1/(z-z_j)^2 +
 c
 c    In all of the sums above, the terms for which |z-z_{j}| <= 2^-51|b|
 c    where |b| is the size of the root box will be ignored.
 c
-c   Gradient here has
-c    
+c    grad(:,1,:) = d/dz vel(z)_proj{z} = 
+c                     charge1_j/(z-z_j) - dip1_j/(z-z_j)^2
+c    grad(:,2,:) = d/dz vel(z)_proj{zbar} = 
+c                     charge2_j/(z-z_j)_bar - dip3_j/(z-z_j)^2_bar
+c    grad(:,3,:) = d/dzbar vel(z) = 
+c                    -charge2_j (z-z_j)/(z-z_j)^2_bar - dip2_j
+c                             (z-z_j)^2_bar +
+c                     2dip3_j(z-z_j)/(z-z_j)^3_bar
 c
 c   INPUT PARAMETERS:
-c   nd            : number of expansions
-c   eps           : FMM precision requested
-c   ns            : number of sources
-c   sources(2,ns) : source locations
-c   ifcharge      : flag for including charge interactions
-c                   charge interactions included if ifcharge =1
-c                   not included otherwise
-c   charge(nd,ns)    : charge strengths
-c                   charge(j,i) is the charge strength of the
-c                   jth charge density at source location i
-c   ifdipole      : flag for including dipole interactions
-c                   dipole interactions included if ifcharge =1
-c                   not included otherwise
-c   dip(nd,2,ns)  : dipole strengths
-c                   dip(j,1,i) is dip1 for the jth dipole density
-c                   at source location i, and dip(j,2,i) is dip2
-c                   for the jth dipole density at source location i
-c   iper          : flag for periodic implmentations. Currently unused
-c   ifpgh         : flag for computing pot/grad/hess
-c                   ifpgh = 1, only potential is computed
-c                   ifpgh = 2, potential and gradient are computed
-c                   ifpgh = 3, potential, gradient, and hessian 
-c                   are computed (hessians not supported currently)
-c   nt            : number of targets
-c   targ(2,nt)    : target locations
-c   ifpghtarg     : flag for computing pottarg/gradtarg/hesstarg
-c                   ifpghtarg = 1, only potential is computed at targets
-c                   ifpghtarg = 2, potential and gradient are 
-c                   computed at targets
-c                   ifpghtarg = 3, potential, gradient, and hessian are 
-c                   computed at targets (hessians not supported
+c   nd              : number of expansions
+c   eps             : FMM precision requested
+c   ns              : number of sources
+c   sources(2,ns)   : source locations
+c   ifcharge        : flag for including charge interactions
+c                     charge interactions included if ifcharge =1
+c                     not included otherwise
+c   charge(nd,2,ns) : charge strengths
+c                     charge(j,1,i) is the charge1 strength of the
+c                     jth charge density at source location i,
+c                     and charge2(j,1,i) is the charge2 strength
+c   ifdipole        : flag for including dipole interactions
+c                     dipole interactions included if ifcharge =1
+c                     not included otherwise
+c   dip(nd,3,ns)    : dipole strengths
+c                     dip(j,1,i) is dip1 for the jth dipole density
+c                     at source location i, dip(j,2,i) is dip2
+c                     for the jth dipole density at source location i,
+c                     and dip(j,3,i) is dip3
+c   iper            : flag for periodic implmentations. Currently unused
+c   ifpgh           : flag for computing pot/grad/hess
+c                     ifpgh = 1, only potential is computed
+c                     ifpgh = 2, potential and gradient are computed
+c                     ifpgh = 3, potential, gradient, and hessian 
+c                     are computed (hessians not supported currently)
+c   nt              : number of targets
+c   targ(2,nt)      : target locations
+c   ifpghtarg       : flag for computing pottarg/gradtarg/hesstarg
+c                     ifpghtarg = 1, only potential is computed at targets
+c                     ifpghtarg = 2, potential and gradient are 
+c                     computed at targets
+c                     ifpghtarg = 3, potential, gradient, and hessian are 
+c                     computed at targets (hessians not supported
 c                     currently)
 c
 c   OUTPUT PARAMETERS
 c   pot(nd,*)       : velocity at the source locations
-c   grad(nd,2,*)    : gradients (d/dz),d/dzbar at the source locations
+c   grad(nd,3,*)    : gradients (d/dz)_proj{z}, 
+c                     (d/dz)_proj{zbar}, d/dzbar at the source locations
 c   hess(nd,3,*)    : hessian  at the source locations (currently unused)
 c   pottarg(nd,*)   : potential at the target locations
-c   gradtarg(nd,2,*): gradient (d/dz), d/dzbar at the target locations
+c   gradtarg(nd,3,*): gradient (d/dz)_proj{z}, 
+c                     (d/dz)_proj{zbar}, d/dzbar at the target locations
 c   hesstarg(nd,3,*): hessian at the target locations (currently unused)
 c
 
@@ -123,10 +137,10 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
       real *8 eps
       integer ns,nt
       real *8 sources(2,ns),targ(2,nt)
-      complex *16 charge(nd,ns),dip(nd,2,ns)
+      complex *16 charge(nd,2,ns),dip(nd,3,ns)
 
-      complex *16 pot(nd,ns),grad(nd,2,ns), hess(nd,3,ns)
-      complex *16 pottarg(nd,nt),gradtarg(nd,2,nt)
+      complex *16 pot(nd,ns),grad(nd,3,ns), hess(nd,3,ns)
+      complex *16 pottarg(nd,nt),gradtarg(nd,3,nt)
       complex *16 hesstarg(nd,3,nt)
 
 c
@@ -150,7 +164,7 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
       integer, allocatable :: itarg(:),itargse(:,:),iexpcse(:,:)
       real *8, allocatable :: sourcesort(:,:)
       real *8, allocatable :: targsort(:,:)
-      complex *16, allocatable :: chargesort(:,:),dipsort(:,:,:)
+      complex *16, allocatable :: chargesort(:,:,:),dipsort(:,:,:)
       complex *16, allocatable :: potsort(:,:),gradsort(:,:,:),
      1                             hesssort(:,:,:)
       complex *16, allocatable :: pottargsort(:,:),gradtargsort(:,:,:),
@@ -240,37 +254,37 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
 
 
       if(ifcharge.eq.1.and.ifdipole.eq.0) then
-        allocate(chargesort(nd,ns),dipsort(nd,2,1))
+        allocate(chargesort(nd,2,ns),dipsort(nd,3,1))
       endif
       if(ifcharge.eq.0.and.ifdipole.eq.1) then
-        allocate(chargesort(nd,1),dipsort(nd,2,ns))
+        allocate(chargesort(nd,2,1),dipsort(nd,3,ns))
       endif
       if(ifcharge.eq.1.and.ifdipole.eq.1) then
-        allocate(chargesort(nd,ns),dipsort(nd,2,ns))
+        allocate(chargesort(nd,2,ns),dipsort(nd,3,ns))
       endif
 
       if(ifpgh.eq.1) then
-        allocate(potsort(nd,ns),gradsort(nd,2,1),hesssort(nd,3,1))
+        allocate(potsort(nd,ns),gradsort(nd,3,1),hesssort(nd,3,1))
       else if(ifpgh.eq.2) then
-        allocate(potsort(nd,ns),gradsort(nd,2,ns),hesssort(nd,3,1))
+        allocate(potsort(nd,ns),gradsort(nd,3,ns),hesssort(nd,3,1))
       else if(ifpgh.eq.3) then
-        allocate(potsort(nd,ns),gradsort(nd,2,ns),hesssort(nd,3,ns))
+        allocate(potsort(nd,ns),gradsort(nd,3,ns),hesssort(nd,3,ns))
       else
-        allocate(potsort(nd,1),gradsort(nd,2,1),hesssort(nd,3,1))
+        allocate(potsort(nd,1),gradsort(nd,3,1),hesssort(nd,3,1))
       endif
 
       
       if(ifpghtarg.eq.1) then
-        allocate(pottargsort(nd,nt),gradtargsort(nd,2,1),
+        allocate(pottargsort(nd,nt),gradtargsort(nd,3,1),
      1     hesstargsort(nd,3,1))
       else if(ifpghtarg.eq.2) then
-        allocate(pottargsort(nd,nt),gradtargsort(nd,2,nt),
+        allocate(pottargsort(nd,nt),gradtargsort(nd,3,nt),
      1      hesstargsort(nd,3,1))
       else if(ifpghtarg.eq.3) then
-        allocate(pottargsort(nd,nt),gradtargsort(nd,2,nt),
+        allocate(pottargsort(nd,nt),gradtargsort(nd,3,nt),
      1     hesstargsort(nd,3,nt))
       else
-        allocate(pottargsort(nd,1),gradtargsort(nd,2,1),
+        allocate(pottargsort(nd,1),gradtargsort(nd,3,1),
      1     hesstargsort(nd,3,1))
       endif
       
@@ -293,6 +307,7 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
             potsort(idim,i) = 0
             gradsort(idim,1,i) = 0
             gradsort(idim,2,i) = 0
+            gradsort(idim,3,i) = 0
           enddo
         enddo
       endif
@@ -303,6 +318,7 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
             potsort(idim,i) = 0
             gradsort(idim,1,i) = 0
             gradsort(idim,2,i) = 0
+            gradsort(idim,3,i) = 0
             hesssort(idim,1,i) = 0
             hesssort(idim,2,i) = 0
             hesssort(idim,3,i) = 0
@@ -325,6 +341,7 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
             pottargsort(idim,i) = 0
             gradtargsort(idim,1,i) = 0
             gradtargsort(idim,2,i) = 0
+            gradtargsort(idim,3,i) = 0
           enddo
         enddo
       endif
@@ -335,6 +352,7 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
             pottargsort(idim,i) = 0
             gradtargsort(idim,1,i) = 0
             gradtargsort(idim,2,i) = 0
+            gradtargsort(idim,3,i) = 0
             hesstargsort(idim,1,i) = 0
             hesstargsort(idim,2,i) = 0
             hesstargsort(idim,3,i) = 0
@@ -362,7 +380,7 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
 
 c       
 c     Multipole and local expansions will be held in workspace
-c     in locations pointed to by array iaddr(3,nboxes).
+c     in locations pointed to by array iaddr(2,nboxes).
 c
 c     iiaddr is pointer to iaddr array, itself contained in workspace.
 c     imptemp is pointer for single expansion (dimensioned by nmax)
@@ -379,9 +397,9 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
 c
       call dreorderf(2,ns,sources,sourcesort,isrc)
       if(ifcharge.eq.1) 
-     1     call dreorderf(2*nd,ns,charge,chargesort,isrc)
+     1     call dreorderf(4*nd,ns,charge,chargesort,isrc)
       if(ifdipole.eq.1) then
-         call dreorderf(4*nd,ns,dip,dipsort,isrc)
+         call dreorderf(6*nd,ns,dip,dipsort,isrc)
       endif
 
 c
@@ -441,12 +459,12 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
 
       if(ifpgh.eq.2) then
         call dreorderi(2*nd,ns,potsort,pot,isrc)
-        call dreorderi(4*nd,ns,gradsort,grad,isrc)
+        call dreorderi(6*nd,ns,gradsort,grad,isrc)
       endif
 
       if(ifpgh.eq.3) then
         call dreorderi(2*nd,ns,potsort,pot,isrc)
-        call dreorderi(4*nd,ns,gradsort,grad,isrc)
+        call dreorderi(6*nd,ns,gradsort,grad,isrc)
         call dreorderi(6*nd,ns,hesssort,hess,isrc)
       endif
 
@@ -456,12 +474,12 @@ subroutine bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
 
       if(ifpghtarg.eq.2) then
         call dreorderi(2*nd,nt,pottargsort,pottarg,itarg)
-        call dreorderi(4*nd,nt,gradtargsort,gradtarg,itarg)
+        call dreorderi(6*nd,nt,gradtargsort,gradtarg,itarg)
       endif
 
       if(ifpghtarg.eq.3) then
         call dreorderi(2*nd,nt,pottargsort,pottarg,itarg)
-        call dreorderi(4*nd,nt,gradtargsort,gradtarg,itarg)
+        call dreorderi(6*nd,nt,gradtargsort,gradtarg,itarg)
         call dreorderi(6*nd,nt,hesstargsort,hesstarg,itarg)
       endif
 
@@ -491,159 +509,122 @@ subroutine bhfmm2dmain(nd,eps,
 c   Generalized biharmonic FMM in R^2: evaluate all pairwise particle
 c   interactions (ignoring self-interaction). 
 c   
-c   bhfmm2d: charge and dipstr are complex valued, z are complex numbers.
+c   bhfmm2d: chargesort and dipsort are complex valued,
+c   z are complex numbers.
 c
-c   bhfmm2d: charge and dip1,dip2 are complex valued,
-c   z are complex numbers. The complex velocity for stokes/
-c   displacement for stokes/elasticity problem take the general form 
-c
-c   Stokes:
-c   vel(z_i) = \sum_{j \neq i} charge_j *\log|z_i-z_j| + 
-c                (charge_j)_bar (z_i-z_j)/(z_i-z_j)_bar+
-c                dip1/(z_i-z_j) + dip2/(z_i-z_j)_bar-
-c                dip1_bar (z_i-z_j)/(z_i-z_j)^2_bar
-c
-c   The velocity for stokes is related to the goursat functions
-c   as vel = \phi(z) + z (d/dz (\phi))_bar+\psi(z)
-c 
-c   The goursat functions are given by
-c
-c   \phi(z_i) = \sum_{j\neq i} charge_j log(z_i-z_j)+
-c                     dip1/(z_i-z_j)
-c
-c   \psi(z_i) = \sum_{j\neq i} dippar2_bar/(z_i-z_j)+
-c               z_j_bar dippar1/(z_i-z_j)^2 +
-c               charge_j_bar log(z_i-z_j)- z_j_bar charge_j/(z_i-z_j)
-c
-c   For stokes and elasticity problems the following derivatives are
-c   relevant to physical quantities such as pressure and vorticity 
-c   for stokes and stresses for elasticity
-c
-c   1) Analytic gradient (grada) 
-c      =  \frac{\partial \phi(z_i)}{\partial z}
-c
-c   2) Anti analytic gradient (gradaa)
-c       = z \frac{\partial^2 \phi_(z_i)} {\partial^2 z}_bar +
-c         \frac{\partial \psi_(z_i)} {\partial z}
+c   See documentation above for definitions of velocity 
 c 
 c
 c   All the source/target/expansion center related quantities
 c   are assumed to be tree-sorted
 c
 c-----------------------------------------------------------------------
-c   INPUT PARAMETERS:
-c
-c   nd:   number of charge densities
-c
-c   eps:  FMM precision requested
-c
-c   nsource:     integer:  number of sources
-c   sourcesort: real *8 (2,ns):  source locations
-c
-c   ifcharge:  charge computation flag
-c              ifcharge = 1   =>  include charge contribution
-c                                     otherwise do not
-c   chargesort: complex *16 (nsource): charge strengths
-c
-c   ifdipole:  dipole computation flag
-c              ifdipole = 1   =>  include dipole contribution
-c                                     otherwise do not
-c   dipsort: complex *16 (nsource): dipole strengths
-c   ntarget: integer:  number of targets
-c   targetsort: real *8 (2,ntarget):  target locations
-c   nexpc: number of expansion centers
-c   expcsort: real *8 (2,nexpc): expansion center locations
-c   iaddr: (2,nboxes): pointer in rmlexp where multipole
-c                      and local expansions for each
-c                      box is stored
-c                      iaddr(1,ibox) is the
-c                      starting index in rmlexp for the 
-c                      multipole expansion of ibox
-c                      and iaddr(2,ibox) is the
-c                      starting index in rmlexp
-c                      for the local expansion of ibox
-c  mptemp: (lmptmp): temporary multipole/local expansion
-c                        (may not be needed in new setting)
-c  lmptmp: length of temporary expansion
-c   
-c
-c
-c   itree    in: integer (ltree)
-c             This array contains all the information
-c             about the tree
-c             Refer to pts_tree2d.f
-c
-c   ltree    in: integer
-c            length of tree
-c
-c    iptr in: integer(8)
-c             iptr is a collection of pointers 
-c             which points to where different elements 
-c             of the tree are stored in the itree array
-c
-c     ndiv    in: integer
-c             Max number of chunks per box
-c
-c     nlevels in: integer
-c             number of levels in the tree
-c
-c     
-c     nboxes  in: integer
-c             number of boxes in the tree
-c
-c     boxsize in: real*8 (0:nlevels)
-c             boxsize(i) is the size of the box from end to end
-c             at level i
-c     iper    in: integer
-c             flag for periodic implementation
-c
-c     centers in: real *8(2,nboxes)
-c                 array containing the centers of all the boxes
-c
-c     isrcse in: integer(2,nboxes)
-c               starting and ending location of sources in ibox
-c                in sorted list of sources
-c
-c     itargse in: integer(2,nboxes)
-c               starting and ending location of targets in ibox
-c                in sorted list of sources
-c
-c     iexpcse in: integer(2,nboxes)
-c               starting and ending location of expansion centers
-c               in ibox in sorted list of sources
-c
-c     nterms: (0:nlevels) length of multipole and local expansions
-c              at various levels
-c     ntj     in: integer
-c             order of the output expansions
-c
-c     ifpgh  in: integer
-c             flag for evaluating potential/gradients/hessians 
-c             at sources.
-c             ifpgh = 1, only potentials will be evaluated
-c             ifpgh = 2, potentials/gradients will be evaluated
-c             ifpgh = 3, potentials/gradients/hessians will be evaluated
-c
-c     ifpghtarg  in: integer
-c             flag for evaluating potential/gradients/hessians 
-c             at targets.
-c             ifpghtarg = 1, only potentials will be evaluated
-c             ifpghtarg = 2, potentials/gradients will be evaluated
-c             ifpghtarg = 3, potentials/gradients/hessians will be evaluated
-c
-c   OUTPUT
-c
-c   Expansions at the targets
-c   jexps : coeffs for local expansion
-c   scj: scaling parameter for the expansions
-c
-c   pot: potential at the source locations
-c   grad: gradient at the source locations (d/dz,d/dzbar)
-c   hess: hessian at the source locations
-c  
-c   pottarg: potential at the target locations
-c   gradatarg: gradient at the target locations (d/dz,d/dzbar)
-c   hesstarg: hessian at the target locations
+c  INPUT PARAMETERS:
+c    - nd: integer
+c        number of charge densities
+c    - eps:  real *8
+c        FMM precision requested
+c    - nsource: integer
+c        number of sources
+c    - sourcesort: real *8 (2,nsource)
+c        tree sorted source locations
+c    - ifcharge: integer
+c        charge computation flag
+c        ifcharge = 1 => include charge contribution
+c                           otherwise do not
+c    - chargesort: complex *16 (nd,2,nsource): 
+c        charge strengths
+c    - ifdipole:  integer
+c        dipole computation flag
+c        ifdipole = 1 => include dipole contribution
+c                            otherwise do not
+c    - dipsort: complex *16 (nd,3,nsource): 
+c        dipole strengths
+c    - ntarget: integer
+c        number of targets
+c    - targetsort: real *8 (2,ntarget)
+c        target locations
+c    - nexpc: integer
+c        number of expansion centers
+c    - expcsort: real *8 (2,nexpc)
+c        expansion center locations
+c    - iaddr: integer (2,nboxes)
+c        pointer in rmlexp where multipole and local expansions for 
+c        each box is stored
+c        * iaddr(1,ibox) is the starting index in rmlexp for the 
+c          multipole expansion of ibox, and
+c        * iaddr(2,ibox) is the starting index in rmlexp for the
+c          local expansion of ibox
+c    - mptemp: complex *16(lmptmp): 
+c        temporary multipole/local expansion
+c        (may not be needed in new setting)
+c    - lmptmp: integer
+c        length of temporary expansion
+c    - itree: integer (ltree)
+c        This array contains all the information about the tree
+c        See to pts_tree2d.f for documentation
+c    - ltree: integer
+c        length of itree
+c    - iptr: integer(8)
+c         iptr is a collection of pointers 
+c         which points to where different elements 
+c         of the tree are stored in the itree array
+c    - ndiv: integer
+c        Max number of points per box
+c    - nlevels: integer
+c        number of levels in the tree
+c    - nboxes: integer
+c        number of boxes in the tree
+c    - boxsize: real*8 (0:nlevels)
+c        boxsize(i) is the size of the box from end to end
+c        at level i
+c    - iper: integer
+c        flag for periodic implementation
+c    - centers: real *8(2,nboxes)
+c        array containing the centers of all the boxes
+c    - isrcse: integer(2,nboxes)
+c        starting and ending location of sources in ibox
+c        in sorted list of sources
+c    - itargse: integer(2,nboxes)
+c        starting and ending location of targets in ibox
+c        in sorted list of sources
+c    - iexpcse: integer(2,nboxes)
+c        starting and ending location of expansion centers
+c        in ibox in sorted list of sources
+c    - nterms: integer(0:nlevels) 
+c        length of multipole and local expansions at various levels
+c    - ntj: integer
+c        order of the output expansions (unused)
+c    - ifpgh: integer
+c        flag for evaluating potential/gradients/hessians at sources.
+c        * ifpgh = 1, only potentials will be evaluated
+c        * ifpgh = 2, potentials/gradients will be evaluated
+c        * ifpgh = 3, potentials/gradients/hessians will be evaluated
+c    - ifpghtarg: integer
+c        flag for evaluating potential/gradients/hessians at targets.
+c        * ifpghtarg = 1, only potentials will be evaluated
+c        * ifpghtarg = 2, potentials/gradients will be evaluated
+c        * ifpghtarg = 3, potentials/gradients/hessians will be evaluated
+c
+c  OUTPUT PARAMETERS:
+c    - pot: complex *16 (nd,nsource)
+c        potential at the source locations
+c    - grad: complex *16 (nd,3,nsource)
+c        gradient at the source locations (d/dz_proj(z),
+c        d/dz_proj(zbar),d/dzbar)
+c    - hess: complex *16 (nd,3,nsource)
+c        currently unused
+c    - pottarg: complex *16 (nd,ntarget)
+c        potential at the target locations
+c    - gradtarg: complex *16 (nd,3,ntarget)
+c        gradient at the target locations (d/dz_proj(z),
+c        d/dz_proj(zbar),d/dzbar)
+c    - hesstarg: complex *16 (nd,3,ntarget)
+c        currently unused
+c    - jexps: complex *16 (nd,5,0:ntj,nexpc)
+c        currently unused
+c    - scjsort: real *8 (nexpc)
+c        currently unused
 c------------------------------------------------------------------
 
       implicit none
@@ -661,8 +642,8 @@ subroutine bhfmm2dmain(nd,eps,
 
       real *8 sourcesort(2,nsource)
 
-      complex *16 chargesort(nd,*)
-      complex *16 dipsort(nd,2,*)
+      complex *16 chargesort(nd,2,*)
+      complex *16 dipsort(nd,3,*)
 
       real *8 targetsort(2,ntarget)
       complex *16 jsort(nd,5,0:ntj,*)
@@ -670,11 +651,11 @@ subroutine bhfmm2dmain(nd,eps,
       real *8 expcsort(2,*)
 
       complex *16 pot(nd,*)
-      complex *16 grad(nd,2,*)
+      complex *16 grad(nd,3,*)
       complex *16 hess(nd,3,*)
 
       complex *16 pottarg(nd,*)
-      complex *16 gradtarg(nd,2,*)
+      complex *16 gradtarg(nd,3,*)
       complex *16 hesstarg(nd,3,*)
 
       integer iaddr(2,nboxes),lmptmp
@@ -835,7 +816,7 @@ subroutine bhfmm2dmain(nd,eps,
 c              Check if current box is a leaf box            
              if(nchild.eq.0.and.npts.gt.0) then
                  call bh2dformmpc(nd,rscales(ilev),
-     1             sourcesort(1,istart),npts,chargesort(1,istart),
+     1             sourcesort(1,istart),npts,chargesort(1,1,istart),
      2             centers(1,ibox),nterms(ilev),
      3             rmlexp(iaddr(1,ibox)))
              endif
@@ -875,7 +856,7 @@ subroutine bhfmm2dmain(nd,eps,
 c             Check if current box is a leaf box            
              if(nchild.eq.0.and.npts.gt.0) then
                 call bh2dformmpcd(nd,rscales(ilev),
-     1             sourcesort(1,istart),npts,chargesort(1,istart),
+     1             sourcesort(1,istart),npts,chargesort(1,1,istart),
      2             dipsort(1,1,istart),
      3             centers(1,ibox),
      4             nterms(ilev),rmlexp(iaddr(1,ibox))) 
@@ -926,7 +907,7 @@ subroutine bhfmm2dmain(nd,eps,
                     
                     call bh2dformtac(nd,rscales(ilev),
      1                   sourcesort(1,istart),npts,
-     2                   chargesort(1,istart),centers(1,ibox),
+     2                   chargesort(1,1,istart),centers(1,ibox),
      3                   nterms(ilev),rmlexp(iaddr(2,ibox)))
                  enddo
               endif
@@ -1003,7 +984,7 @@ subroutine bhfmm2dmain(nd,eps,
                     
                     call bh2dformtacd(nd,rscales(ilev),
      1                   sourcesort(1,istart),npts,
-     2                   chargesort(1,istart),dipsort(1,1,istart),
+     2                   chargesort(1,1,istart),dipsort(1,1,istart),
      3                   centers(1,ibox),
      3                   nterms(ilev),rmlexp(iaddr(2,ibox)))
                  enddo
@@ -1391,7 +1372,7 @@ subroutine bhfmm2dpart_direct(nd,istart,iend,jstart,jend,
 c                  The expansion due to charges will be included
 c                  if ifcharge == 1
 c
-c     charge       in: complex *16
+c     charge       in: complex *16 (nd,2,ns)
 c                  Charge at the source locations
 c
 c     ifdipole     in: Integer
@@ -1399,7 +1380,7 @@ subroutine bhfmm2dpart_direct(nd,istart,iend,jstart,jend,
 c                 The expansion due to dipoles will be included
 c                 if ifdipole == 1
 c
-c     dip         in: complex *16(2,ns)
+c     dip         in: complex *16(nd,3,ns)
 c                 dipole strengths at the source locations
 c
 c     targ        in: real *8(2,nt)
@@ -1422,7 +1403,7 @@ subroutine bhfmm2dpart_direct(nd,istart,iend,jstart,jend,
 c
 c   Updated velocity and gradients at the targets
 c   pot : potential at the targets
-c   grad: gradient at the targets (d/dz,d/dzbar)
+c   grad: gradient at the targets (d/dz_projz, d/dz_projzbar,d/dzbar)
 c   hess: Hessian at the targets
 c-------------------------------------------------------               
         implicit none
@@ -1435,14 +1416,14 @@ subroutine bhfmm2dpart_direct(nd,istart,iend,jstart,jend,
 
 
         real *8 source(2,*)
-        complex *16 charge(nd,*),dip(nd,2,*)
+        complex *16 charge(nd,2,*),dip(nd,3,*)
 
         integer ifpgh
         real *8 targ(2,*),thresh
         
 c
         complex *16 pot(nd,*)
-        complex *16 grad(nd,2,*)
+        complex *16 grad(nd,3,*)
         complex *16 hess(nd,3,*)
 
 c
@@ -1451,12 +1432,12 @@ subroutine bhfmm2dpart_direct(nd,istart,iend,jstart,jend,
         if(ifcharge.eq.1.and.ifdipole.eq.0) then
           if(ifpgh.eq.1) then
              call bh2d_directcp(nd,source(1,istart),ns,
-     1         charge(1,istart),targ(1,jstart),nt,pot(1,jstart),thresh)
+     1        charge(1,1,istart),targ(1,jstart),nt,pot(1,jstart),thresh)
           endif
 
           if(ifpgh.eq.2) then
             call bh2d_directcg(nd,source(1,istart),ns,
-     1         charge(1,istart),targ(1,jstart),nt,pot(1,jstart),
+     1         charge(1,1,istart),targ(1,jstart),nt,pot(1,jstart),
      1         grad(1,1,jstart),thresh)
           endif
         endif
@@ -1479,13 +1460,13 @@ subroutine bhfmm2dpart_direct(nd,istart,iend,jstart,jend,
         if(ifcharge.eq.1.and.ifdipole.eq.1) then
           if(ifpgh.eq.1) then
                call bh2d_directcdp(nd,source(1,istart),ns,
-     1            charge(1,istart),dip(1,1,istart),
+     1            charge(1,1,istart),dip(1,1,istart),
      2            targ(1,jstart),nt,pot(1,jstart),thresh)
           endif
 
           if(ifpgh.eq.2) then
                call bh2d_directcdg(nd,source(1,istart),ns,
-     1            charge(1,istart),dip(1,1,istart),
+     1            charge(1,1,istart),dip(1,1,istart),
      2            targ(1,jstart),nt,pot(1,jstart),
      2            grad(1,1,jstart),
      2            thresh)
diff --git a/src/biharmonic/bhfmm2dwrap.f b/src/biharmonic/bhfmm2dwrap.f
index 256c659..91158b1 100644
--- a/src/biharmonic/bhfmm2dwrap.f
+++ b/src/biharmonic/bhfmm2dwrap.f
@@ -30,10 +30,10 @@ subroutine bhfmm2dwrap_guru(nd,eps,ns,sources,ifcharge,charge,
 c       and velocity are interchangably used
 c     
 c
-c    vel(z) = \sum charge_j *\log|z-z_j| + 
-c                (charge_j)_bar (z-z_j)/(z-z_j)_bar+
-c                dip1/(z-z_j) + dip2/(z-z_j)_bar-
-c                dip1_bar (z-z_j)/(z-z_j)^2_bar
+c    vel(z) = \sum charge1_j 2*\log|z-z_j| + 
+c                (charge2_j) (z-z_j)/(z-z_j)_bar+
+c                dip1/(z-z_j) + dip3/(z-z_j)_bar+
+c                dip2 (z-z_j)/(z-z_j)^2_bar
 c
 c    The velocity for stokes is related to the goursat functions
 c    as vel = \phi(z) + z (d/dz (\phi))_bar+\psi(z)
@@ -60,16 +60,20 @@ subroutine bhfmm2dwrap_guru(nd,eps,ns,sources,ifcharge,charge,
 c   ifcharge      : flag for including charge interactions
 c                   charge interactions included if ifcharge =1
 c                   not included otherwise
-c   charge(nd,ns)    : charge strengths
-c                   charge(j,i) is the charge strength of the
+c   charge(nd,2,ns) : charge strengths
+c                   charge(j,1,i) is the charge1 strength of the
+c                   jth charge density at source location i, and
+c                   charge(j,2,i) is the charge2 stregth of the 
 c                   jth charge density at source location i
 c   ifdipole      : flag for including dipole interactions
 c                   dipole interactions included if ifcharge =1
 c                   not included otherwise
-c   dip(nd,2,ns)  : dipole strengths
+c   dip(nd,3,ns)  : dipole strengths
 c                   dip(j,1,i) is dip1 for the jth dipole density
-c                   at source location i, and dip(j,2,i) is dip2
-c                   for the jth dipole density at source location i
+c                   at source location i, dip(j,2,i) is dip2
+c                   for the jth dipole density at source location i, and
+c                   dip(j,3,i) is dip3 for the jth dipole density
+c                   at source locaation i
 c   iper          : flag for periodic implmentations. Currently unused
 c   ifpgh         : flag for computing pot/grad/hess
 c                   ifpgh = 1, only potential is computed
@@ -88,10 +92,10 @@ subroutine bhfmm2dwrap_guru(nd,eps,ns,sources,ifcharge,charge,
 c
 c   OUTPUT PARAMETERS
 c   pot(nd,*)       : velocity at the source locations
-c   grad(nd,2,*)    : gradients (d/dz),d/dzbar at the source locations
+c   grad(nd,3,*)    : gradients (d/dz)_projz, (d/dz)_projzbar, d/dzbar at the source locations
 c   hess(nd,3,*)    : hessian  at the source locations (currently unused)
 c   pottarg(nd,*)   : potential at the target locations
-c   gradtarg(nd,2,*): gradient (d/dz), d/dzbar at the target locations
+c   gradtarg(nd,3,*): gradient (d/dz)_projz, (d/dz)_projzbar, d/dzbar at the target locations
 c   hesstarg(nd,3,*): hessian at the target locations (currently unused)
 c
 
@@ -107,10 +111,10 @@ subroutine bhfmm2dwrap_guru(nd,eps,ns,sources,ifcharge,charge,
       integer ifcharge,ifdipole
       integer iper,ier,ifpgh,ifpghtarg
       real *8 sources(2,ns),targ(2,nt)
-      complex *16 charge(nd,ns),dip(nd,2,ns)
+      complex *16 charge(nd,2,ns),dip(nd,3,ns)
 
-      complex *16 pot(nd,ns),grad(nd,2,ns), hess(nd,3,ns)
-      complex *16 pottarg(nd,nt),gradtarg(nd,2,nt)
+      complex *16 pot(nd,ns),grad(nd,3,ns), hess(nd,3,ns)
+      complex *16 pottarg(nd,nt),gradtarg(nd,3,nt)
       complex *16 hesstarg(nd,3,nt)
 
       call bhfmm2d(nd,eps,ns,sources,ifcharge,charge,
diff --git a/src/biharmonic/bhkernels2d.f b/src/biharmonic/bhkernels2d.f
index 6699299..e49dde6 100644
--- a/src/biharmonic/bhkernels2d.f
+++ b/src/biharmonic/bhkernels2d.f
@@ -1,4 +1,24 @@
 c
+c      In this file, the subroutines compute the potential, and
+c      gradients due to a collection of biharmonic charges and dipoles  
+c
+c      The complex velocity at zt = xt + i yt 
+c      due to a charge a complex charge c1,c2 at zs = xs+i ys is given by
+c
+c
+c      vel = 2*c1 log|zt-zs| + c2 (zt-zs)/(zt_bar - zs_bar)
+c            
+c      The complex velocity due to a dipole with complex parameters
+c      d1,d2,d3 is given by
+c
+c
+c      vel = d1/(zt-zs) + d2 (zt-zs)/(zt_bar-zs_bar)^2 + 
+c             d3/(zt_bar - zs_bar)
+c
+c      Analytic component of the gradient grada_z = d/dz(vel(z))_proj(z)
+c      Analytic component of the gradient grada_zbar = d/dz(vel(z))_proj(zbar)
+c
+c      Anti analytic component of the gradient (gradaa) = d/dzbar (vel(z))) 
 c
 c
 c
@@ -12,40 +32,12 @@ subroutine bh2d_directcp(nd,sources,ns,charges,targ,nt,vel,
 c      This subroutine INCREMENTS the complex velocity VEL 
 c      at the targets at targ(2) due to a collection of ns
 c      charges at sources(2,ns)
-c      
-c      In this subroutine the following rescaled versions of charges and
-c      dipoles are used to compute the velocity field due to them
-c
-c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
-c
-c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
-c            
-c      The complex velocity due to a dipole with complex parameters
-c      c2,c3 is given by
-c
-c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
-c
-c      The corresponding goursat functions (\phi \psi) for derivation 
-c      of the complex velocity=\phi + z d/dz (\phi)_bar + \psi_bar
-c
-c      \phi(z) = c1 log (z-zs) + d1/(z-zs)
-c      \psi(z) = d2_bar/(z-zs) + zs_bar d1/(z-zs)^2 + c1_bar log(z-zs)-
-c               zs_bar c1 /(z-zs)
-c
-c      Analytic component of the gradient (grada)= d/dz (\phi(z))
-c
-c      Anti analytic component of the gradient (gradaa) = 
-c      z (d^2/ dz^2 (\phi))_bar + (d/dz (\psi))_bar
 c----------------------------------------------------------------------
 c      INPUT parameters
 c      nd           : number of densities
 c      sources(2,ns): location of the sources
 c      ns           : number of sources
-c      charges      : charge strength
+c      charges(2,ns): charge strength
 c      targ         : target location
 c      nt           : number of targets
 c--------------------------------------------------------------------
@@ -58,7 +50,7 @@ subroutine bh2d_directcp(nd,sources,ns,charges,targ,nt,vel,
       integer ns
       real *8 sources(2,ns), targ(2,nt)
       complex *16 vel(nd,nt),zs,zt,zdis
-      complex *16 charges(nd,ns)
+      complex *16 charges(nd,2,ns)
       complex *16 zdis1,zdis2
       complex *16 eye
 
@@ -71,8 +63,9 @@ subroutine bh2d_directcp(nd,sources,ns,charges,targ,nt,vel,
            if(abs(zdis).le.thresh) goto 1111
            zdis1 = 1.0d0/zdis
            do idim=1,nd
-             vel(idim,j)=vel(idim,j)+2*charges(idim,i)*log(cdabs(zdis))+
-     1                     dconjg(charges(idim,i)*zdis1)*zdis
+             vel(idim,j)=vel(idim,j)+
+     1              2*charges(idim,1,i)*log(cdabs(zdis))+
+     1              charges(idim,2,i)*dconjg(zdis1)*zdis
            enddo
  1111 continue
          enddo
@@ -92,49 +85,22 @@ subroutine bh2d_directcg(nd,sources,ns,charges,
 cf2py intent(out) vel,grad
 c********************************************************************
 c      This subroutine INCREMENTS the complex velocity VEL and its
-c      gradients GRADA, GRADAA, at the
+c      gradients GRADA_Z, GRADA_ZBAR, GRADAA, at the
 c      targets at targ(2) due to a collection of ns
 c      charges at sources(2,ns)
 c      
-c      In this subroutine the following rescaled versions of charges and
-c      dipoles are used to compute the velocity field due to them
-c
-c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
-c
-c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
-c            
-c      The complex velocity due to a dipole with complex parameters
-c      c2,c3 is given by
-c
-c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
-c
-c      The corresponding goursat functions (\phi \psi) for derivation 
-c      of the complex velocity=\phi + z d/dz (\phi)_bar + \psi_bar
-c
-c      \phi(z) = c1 log (z-zs) + d1/(z-zs)
-c      \psi(z) = d2_bar/(z-zs) + zs_bar d1/(z-zs)^2 + c1_bar log(z-zs)-
-c               zs_bar c1 /(z-zs)
-c
-c      Analytic component of the gradient (grada)= d/dz (\phi(z))
-c
-c      Anti analytic component of the gradient (gradaa) = 
-c      z (d^2/ dz^2 (\phi))_bar + (d/dz (\psi))_bar
 c----------------------------------------------------------------------
 c      INPUT parameters
 c      nd           : number of densities
 c      sources(2,ns): location of the sources
 c      ns           : number of sources
-c      charges      : charge strength
+c      charges(2,ns): charge strength
 c      targ         : target location
 c      nt           : number of targets
 c--------------------------------------------------------------------
 c      OUTPUT
 c      vel          : Complex velocity at target
-c      grad         : Complex gradient (d/dz, d/dzbar)
+c      grad         : Complex gradient 
 c
 c--------------------------------------------------------------------
 
@@ -142,9 +108,9 @@ subroutine bh2d_directcg(nd,sources,ns,charges,
       integer ns
       real *8 sources(2,ns), targ(2,nt)
       complex *16 vel(nd,nt),zs,zt,zdis
-      complex *16 charges(nd,ns)
+      complex *16 charges(nd,2,ns)
       complex *16 zdis1,zdis2
-      complex *16 grad(nd,2,nt),eye
+      complex *16 grad(nd,3,nt),eye
 
       eye = dcmplx(0,1.0d0)
       do j=1,nt
@@ -156,13 +122,19 @@ subroutine bh2d_directcg(nd,sources,ns,charges,
            zdis1 = 1.0d0/zdis
            zdis2=zdis1**2
            do idim=1,nd
-             vel(idim,j)=vel(idim,j)+2*charges(idim,i)*log(cdabs(zdis))+
-     1                     dconjg(charges(idim,i)*zdis1)*zdis
+             vel(idim,j)=vel(idim,j)+
+     1               2*charges(idim,1,i)*log(cdabs(zdis))+
+     1               charges(idim,2,i)*dconjg(zdis1)*zdis
      
-             grad(idim,1,j)=grad(idim,1,j)+charges(idim,i)*zdis1
-             grad(idim,2,j)=grad(idim,2,j)+charges(idim,i)*dconjg(zdis1)
-             grad(idim,2,j)=grad(idim,2,j)-
-     1          dconjg(charges(idim,i)*zdis2)*zdis
+             grad(idim,1,j)=grad(idim,1,j) + charges(idim,1,i)*zdis1
+             
+             grad(idim,2,j)=grad(idim,2,j) +
+     1           charges(idim,2,i)*dconjg(zdis1)
+
+             grad(idim,3,j)=grad(idim,3,j) + 
+     1           charges(idim,1,i)*dconjg(zdis1)
+             grad(idim,3,j)=grad(idim,3,j) - 
+     1          charges(idim,2,i)*dconjg(zdis2)*zdis
           enddo
  1111 continue
         enddo
@@ -187,41 +159,12 @@ subroutine bh2d_directdp(nd,sources,ns,dippar,
 c      at the
 c      targets at targ(2) due to a collection of ns
 c      dipoles at sources(2,ns)
-c      
-c      In this subroutine the following rescaled versions of charges and
-c      dipoles are used to compute the velocity field due to them
-c
-c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
-c
-c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
-c            
-c      The complex velocity due to a dipole with complex parameters
-c      c2,c3 is given by
-c
-c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
-c
-c      The corresponding goursat functions (\phi \psi) for derivation 
-c      of the complex velocity=\phi + z d/dz (\phi)_bar + \psi_bar
-c
-c      \phi(z) = c1 log (z-zs) + d1/(z-zs)
-c      \psi(z) = d2_bar/(z-zs) + zs_bar d1/(z-zs)^2 + c1_bar log(z-zs)-
-c               zs_bar c1 /(z-zs)
-c
-c      Analytic component of the gradient (grada)= d/dz (\phi(z))
-c
-c      Anti analytic component of the gradient (gradaa) = 
-c      z (d^2/ dz^2 (\phi))_bar + (d/dz (\psi))_bar
 c----------------------------------------------------------------------
 c      INPUT parameters
 c      nd           : number of densities
 c      sources(2,ns): location of the sources
 c      ns           : number of sources
-c      dippar       : dipole parameters (corresponding to c2,c3 in
-c                     above expression)
+c      dippar(3,ns) : dipole parameters 
 c      targ         : target location
 c      nt           : number of targets
 c--------------------------------------------------------------------
@@ -234,7 +177,7 @@ subroutine bh2d_directdp(nd,sources,ns,dippar,
       integer ns,nt
       real *8 sources(2,ns), targ(2,nt)
       complex *16 vel(nd,nt),zs,zt,zdis
-      complex *16 dippar(nd,2,ns)
+      complex *16 dippar(nd,3,ns)
       complex *16 zdis1,zdis2
       complex *16 eye
 
@@ -249,8 +192,8 @@ subroutine bh2d_directdp(nd,sources,ns,dippar,
            zdis2=zdis1**2
            do idim=1,nd
              vel(idim,j)=vel(idim,j)+dippar(idim,1,i)*zdis1 + 
-     1         dippar(idim,2,i)*dconjg(zdis1)
-             vel(idim,j)=vel(idim,j)-dconjg(dippar(idim,1,i)*zdis2)*zdis
+     1         dippar(idim,3,i)*dconjg(zdis1)
+             vel(idim,j)=vel(idim,j)+dippar(idim,2,i)*dconjg(zdis2)*zdis
           enddo
  1111 continue
         enddo
@@ -270,50 +213,21 @@ subroutine bh2d_directdg(nd,sources,ns,dippar,
 cf2py intent(out) vel,grad
 c********************************************************************
 c      This subroutine INCREMENTS the complex velocity VEL and its
-c      gradients GRADA, GRADAA, at the
+c      gradients GRADA_Z, GRADA_ZBAR, GRADAA, at the
 c      targets at targ(2) due to a collection of ns
 c      dipoles at sources(2,ns)
-c      
-c      In this subroutine the following rescaled versions of charges and
-c      dipoles are used to compute the velocity field due to them
-c
-c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
-c
-c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
-c            
-c      The complex velocity due to a dipole with complex parameters
-c      c2,c3 is given by
-c
-c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
-c
-c      The corresponding goursat functions (\phi \psi) for derivation 
-c      of the complex velocity=\phi + z d/dz (\phi)_bar + \psi_bar
-c
-c      \phi(z) = c1 log (z-zs) + d1/(z-zs)
-c      \psi(z) = d2_bar/(z-zs) + zs_bar d1/(z-zs)^2 + c1_bar log(z-zs)-
-c               zs_bar c1 /(z-zs)
-c
-c      Analytic component of the gradient (grada)= d/dz (\phi(z))
-c
-c      Anti analytic component of the gradient (gradaa) = 
-c      z (d^2/ dz^2 (\phi))_bar + (d/dz (\psi))_bar
 c----------------------------------------------------------------------
 c      INPUT parameters
 c      nd           : number of densities
 c      sources(2,ns): location of the sources
 c      ns           : number of sources
-c      dippar       : dipole parameters (corresponding to c2,c3 in
-c                     above expression)
+c      dippar(3,ns) : dipole parameters 
 c      targ         : target location
 c      nt           : number of targets
 c--------------------------------------------------------------------
 c      OUTPUT
 c      vel          : Complex velocity at target
-c      grad         : Complex gradient (d/dz, d/dzbar)
+c      grad         : Complex gradient 
 c
 c--------------------------------------------------------------------
 
@@ -321,9 +235,9 @@ subroutine bh2d_directdg(nd,sources,ns,dippar,
       integer ns,nt
       real *8 sources(2,ns), targ(2,nt)
       complex *16 vel(nd,nt),zs,zt,zdis
-      complex *16 dippar(nd,2,ns)
+      complex *16 dippar(nd,3,ns)
       complex *16 zdis1,zdis2
-      complex *16 grad(nd,2,nt),eye
+      complex *16 grad(nd,3,nt),eye
 
 
       eye = dcmplx(0.0d0,1.0d0)
@@ -337,13 +251,17 @@ subroutine bh2d_directdg(nd,sources,ns,dippar,
            zdis2=zdis1**2
            do idim=1,nd
              vel(idim,j)=vel(idim,j)+dippar(idim,1,i)*zdis1 + 
-     1         dippar(idim,2,i)*dconjg(zdis1)
-             vel(idim,j)=vel(idim,j)-dconjg(dippar(idim,1,i)*zdis2)*zdis
+     1         dippar(idim,3,i)*dconjg(zdis1)
+             vel(idim,j)=vel(idim,j)+dippar(idim,2,i)*dconjg(zdis2)*zdis
              grad(idim,1,j)=grad(idim,1,j)-dippar(idim,1,i)*(zdis2)
-             grad(idim,2,j)=grad(idim,2,j)-
-     1           dippar(idim,2,i)*dconjg(zdis2)
+              
              grad(idim,2,j)=grad(idim,2,j)+
-     1           2*dconjg(dippar(idim,1,i)*zdis2*zdis1)*zdis
+     1           dippar(idim,2,i)*dconjg(zdis2)
+
+             grad(idim,3,j)=grad(idim,3,j)-
+     1           dippar(idim,3,i)*dconjg(zdis2)
+             grad(idim,3,j)=grad(idim,3,j)-
+     1           2*dippar(idim,2,i)*dconjg(zdis2*zdis1)*zdis
           enddo
  1111 continue
         enddo
@@ -365,42 +283,13 @@ subroutine bh2d_directcdp(nd,sources,ns,charges,dippar,
 c      at the
 c      targets at targ(2) due to a collection of ns
 c      charges and dipoles at sources(2,ns)
-c      
-c      In this subroutine the following rescaled versions of charges and
-c      dipoles are used to compute the velocity field due to them
-c
-c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
-c
-c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
-c            
-c      The complex velocity due to a dipole with complex parameters
-c      c2,c3 is given by
-c
-c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
-c
-c      The corresponding goursat functions (\phi \psi) for derivation 
-c      of the complex velocity=\phi + z d/dz (\phi)_bar + \psi_bar
-c
-c      \phi(z) = c1 log (z-zs) + d1/(z-zs)
-c      \psi(z) = d2_bar/(z-zs) + zs_bar d1/(z-zs)^2 + c1_bar log(z-zs)-
-c               zs_bar c1 /(z-zs)
-c
-c      Analytic component of the gradient (grada)= d/dz (\phi(z))
-c
-c      Anti analytic component of the gradient (gradaa) = 
-c      z (d^2/ dz^2 (\phi))_bar + (d/dz (\psi))_bar
 c----------------------------------------------------------------------
 c      INPUT parameters
 c      nd           : number of densities
 c      sources(2,ns): location of the sources
 c      ns           : number of sources
 c      charges      : charge strength
-c      dippar       : dipole parameters (corresponding to c2,c3 in
-c                     above expression)
+c      dippar       : dipole parameters 
 c      targ         : target location
 c      nt           : number of targets
 c--------------------------------------------------------------------
@@ -413,7 +302,7 @@ subroutine bh2d_directcdp(nd,sources,ns,charges,dippar,
       integer ns
       real *8 sources(2,ns), targ(2,nt)
       complex *16 vel(nd,nt),zs,zt,zdis
-      complex *16 charges(nd,ns),dippar(nd,2,ns)
+      complex *16 charges(nd,2,ns),dippar(nd,3,ns)
       complex *16 zdis1,zdis2
       complex *16 eye
 
@@ -427,12 +316,13 @@ subroutine bh2d_directcdp(nd,sources,ns,charges,dippar,
            zdis1 = 1.0d0/zdis
            zdis2=zdis1**2
            do idim=1,nd
-             vel(idim,j)=vel(idim,j)+2*charges(idim,i)*log(cdabs(zdis))+
-     1                     dconjg(charges(idim,i)*zdis1)*zdis
+             vel(idim,j)=vel(idim,j)+
+     1            2*charges(idim,1,i)*log(cdabs(zdis))+
+     1            charges(idim,2,i)*dconjg(zdis1)*zdis
      
              vel(idim,j)=vel(idim,j)+dippar(idim,1,i)*zdis1 + 
-     1         dippar(idim,2,i)*dconjg(zdis1)
-             vel(idim,j)=vel(idim,j)-dconjg(dippar(idim,1,i)*zdis2)*zdis
+     1         dippar(idim,3,i)*dconjg(zdis1)
+             vel(idim,j)=vel(idim,j)+dippar(idim,2,i)*dconjg(zdis2)*zdis
           enddo
  1111 continue
         enddo
@@ -455,47 +345,18 @@ subroutine bh2d_directcdg(nd,sources,ns,charges,dippar,
 c      gradients GRADA, GRADAA, at the
 c      targets at targ(2) due to a collection of ns
 c      charges and dipoles at sources(2,ns)
-c      
-c      In this subroutine the following rescaled versions of charges and
-c      dipoles are used to compute the velocity field due to them
-c
-c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
-c
-c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
-c            
-c      The complex velocity due to a dipole with complex parameters
-c      c2,c3 is given by
-c
-c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
-c
-c      The corresponding goursat functions (\phi \psi) for derivation 
-c      of the complex velocity=\phi + z d/dz (\phi)_bar + \psi_bar
-c
-c      \phi(z) = c1 log (z-zs) + d1/(z-zs)
-c      \psi(z) = d2_bar/(z-zs) + zs_bar d1/(z-zs)^2 + c1_bar log(z-zs)-
-c               zs_bar c1 /(z-zs)
-c
-c      Analytic component of the gradient (grada)= d/dz (\phi(z))
-c
-c      Anti analytic component of the gradient (gradaa) = 
-c      z (d^2/ dz^2 (\phi))_bar + (d/dz (\psi))_bar
 c----------------------------------------------------------------------
 c      INPUT parameters
 c      nd           : number of densities
 c      sources(2,ns): location of the sources
 c      ns           : number of sources
 c      charges      : charge strength
-c      dippar       : dipole parameters (corresponding to c2,c3 in
-c                     above expression)
+c      dippar       : dipole parameters 
 c      targ         : target location
 c--------------------------------------------------------------------
 c      OUTPUT
 c      vel          : Complex velocity at target
-c      grad         : Complex gradient (d/dz, d/dzbar)
+c      grad         : Complex gradient 
 c
 c--------------------------------------------------------------------
 
@@ -503,9 +364,9 @@ subroutine bh2d_directcdg(nd,sources,ns,charges,dippar,
       integer ns,nt
       real *8 sources(2,ns), targ(2,nt)
       complex *16 vel(nd,nt),zs,zt,zdis
-      complex *16 charges(nd,ns),dippar(nd,2,ns)
+      complex *16 charges(nd,2,ns),dippar(nd,3,ns)
       complex *16 zdis1,zdis2
-      complex *16 grad(nd,2,nt),eye
+      complex *16 grad(nd,3,nt),eye
 
       eye = dcmplx(0,1.0d0)
       do j=1,nt
@@ -517,21 +378,30 @@ subroutine bh2d_directcdg(nd,sources,ns,charges,dippar,
            zdis1 = 1.0d0/zdis
            zdis2=zdis1**2
            do idim=1,nd
-             vel(idim,j)=vel(idim,j)+2*charges(idim,i)*log(cdabs(zdis))+
-     1                     dconjg(charges(idim,i)*zdis1)*zdis
+             vel(idim,j)=vel(idim,j)+
+     1           2*charges(idim,1,i)*log(cdabs(zdis))+
+     1           charges(idim,2,i)*dconjg(zdis1)*zdis
      
              vel(idim,j)=vel(idim,j)+dippar(idim,1,i)*zdis1 + 
-     1         dippar(idim,2,i)*dconjg(zdis1)
-             vel(idim,j)=vel(idim,j)-dconjg(dippar(idim,1,i)*zdis2)*zdis
-             grad(idim,1,j)=grad(idim,1,j)+charges(idim,i)*zdis1
+     1         dippar(idim,3,i)*dconjg(zdis1)
+             vel(idim,j)=vel(idim,j)+dippar(idim,2,i)*dconjg(zdis2)*zdis
+
+             grad(idim,1,j)=grad(idim,1,j)+charges(idim,1,i)*zdis1
              grad(idim,1,j)=grad(idim,1,j)-dippar(idim,1,i)*(zdis2)
-             grad(idim,2,j)=grad(idim,2,j)+charges(idim,i)*dconjg(zdis1)
-             grad(idim,2,j)=grad(idim,2,j)-
-     1          dconjg(charges(idim,i)*zdis2)*zdis
-             grad(idim,2,j)=grad(idim,2,j)-
-     1          dippar(idim,2,i)*dconjg(zdis2)
+
+             grad(idim,2,j)=grad(idim,2,j) +
+     1           charges(idim,2,i)*dconjg(zdis1)
              grad(idim,2,j)=grad(idim,2,j)+
-     1         2*dconjg(dippar(idim,1,i)*zdis2*zdis1)*zdis
+     1           dippar(idim,2,i)*dconjg(zdis2)
+
+             grad(idim,3,j)=grad(idim,3,j)+
+     1          charges(idim,1,i)*dconjg(zdis1)
+             grad(idim,3,j)=grad(idim,3,j)-
+     1          charges(idim,2,i)*dconjg(zdis2)*zdis
+             grad(idim,3,j)=grad(idim,3,j)-
+     1          dippar(idim,3,i)*dconjg(zdis2)
+             grad(idim,3,j)=grad(idim,3,j)-
+     1         2*dippar(idim,2,i)*dconjg(zdis2*zdis1)*zdis
           enddo
  1111 continue
         enddo
diff --git a/src/biharmonic/bhrouts2d.f b/src/biharmonic/bhrouts2d.f
index beef111..e2277e5 100644
--- a/src/biharmonic/bhrouts2d.f
+++ b/src/biharmonic/bhrouts2d.f
@@ -5,51 +5,47 @@
 c-----------------------------------------------------------------
 c     Expansion formation subroutines
 c
-c     bh2dformmpc_vec: creates multipole expansion (outgoing) due to 
+c     bh2dformmpc: creates multipole expansion (outgoing) due to 
 c                     a collection of charge sources
-c     bh2dformmpd_vec: creates multipole expansion (outgoing) due to 
+c     bh2dformmpd: creates multipole expansion (outgoing) due to 
 c                     a collection of dipole sources
-c     bh2dformmpcd_vec: creates multipole expansion (outgoing) due to 
+c     bh2dformmpcd: creates multipole expansion (outgoing) due to 
 c                     a collection of charge and dipole sources
-c     bh2dformta_vec: creates local expansion (incoming) due to 
+c     bh2dformta: creates local expansion (incoming) due to 
 c                     a collection of charge sources
-c     bh2dformtad_vec: creates multipole expansion (incoming) due to 
+c     bh2dformtad: creates multipole expansion (incoming) due to 
 c                     a collection of dipole sources
-c     bh2dformtacd_vec: creates multipole expansion (incoming) due to 
+c     bh2dformtacd: creates multipole expansion (incoming) due to 
 c                     a collection of charge and dipole sources
 c
 c------------------------------------------------------------------     
 c     Multipole and local translation operators
 c
-c     bh2dmpmp_vec: Converts multipole expansion to a multipole expansion
-c     bh2dmploc_vec: Converts multipole expansion to local expansion
-c     bh2dlocloc_vec: converts local expansion to local expansion
+c     bh2dmpmp: Converts multipole expansion to a multipole expansion
+c     bh2dmploc: Converts multipole expansion to local expansion
+c     bh2dlocloc: converts local expansion to local expansion
 c
 c---------------------------------------------------------------------
 c     Evaluation subroutines
 c 
-c     bh2dmpevalp_vec: Computes the complex velocity due to a 
+c     bh2dmpevalp: Computes the complex velocity due to a 
 c                      multipole expansion at a collection of targets
-c     bh2dmpevalg_vec: Computes the complex velocity and its gradients 
+c     bh2dmpevalg: Computes the complex velocity and its gradients 
 c                      due to a multipole expansion at a collection 
 c                      of targets
-c     bh2dtaevalp_vec: Computes the complex velocity due to a 
+c     bh2dtaevalp: Computes the complex velocity due to a 
 c                      local expansion at a collection of targets
-c     bh2dtaevalg_vec: Computes the complex velocity and its gradients 
+c     bh2dtaevalg: Computes the complex velocity and its gradients 
 c                      due to a local expansion at a collection 
 c                      of targets
 c----------------------------------------------------------------------
 c     Utility subroutines
 c     
-c     bh2dzero_vec: utility to intialize multipole/local coefficients to
+c     bh2dzero: utility to intialize multipole/local coefficients to
 c                   zero
 c    
 c*********************************************************************
 c
-c
-c
-c
-c
 c
 
       subroutine bh2dmpevalp(nd,rscale,center,mpole,nterms,
@@ -69,14 +65,6 @@ subroutine bh2dmpevalp(nd,rscale,center,mpole,nterms,
 c          + i Re(mp5(0) log(z) + \sum mpole(5,k)/z^k)
 c                                  k
 c      z = (ztarg - center)/rscale
-c
-c      grada = gradient(analytic comoponent of vel)
-c
-c      gradaa = gradient(all but analytic component of vel)
-c             = gradient(anti analytic + z times anti analytic 
-c                         component of vel)
-c     NOTE: The subroutine will work if nterms<1000, as 1000, is
-c     a hardwired number for computing zpow in the computation
 c----------------------------------------------------------------------
 c      INPUT parameters
 c      rscale        : scaling parameter
@@ -102,7 +90,7 @@ subroutine bh2dmpevalp(nd,rscale,center,mpole,nterms,
       complex *16 mpole(nd,5,0:nterms)
       complex *16 vel(nd,ntarg)
       complex *16 ztemp,ztemp1
-      complex *16 zdisinv,zpow(1:1000)
+      complex *16 zdisinv,zpow(1:nterms+5)
 
 
       nmax = nterms+3
@@ -164,13 +152,19 @@ subroutine bh2dmpevalg(nd,rscale,center,mpole,nterms,
 c                                  k
 c      z = (ztarg - center)/rscale
 c
-c      grada = gradient(analytic comoponent of vel)
+c      note that we use log(|z|) = 1/2(log(z) + log(zbar))
 c
+c      grada_z = gradient (analytic comoponent of vel, projected onto z
+c              component), i.e.
+c              terms involving mpole1, mpole4, mpole5 and ignoring z \sum_k
+c              mpole(3,k)/z_bar^k
+c      grada_zbar = gradient (analytic component of grad vel, projected onto
+c              zbar component), i.e.
+c              terms involving mpole3 and ignoring mpole1 terms
 c      gradaa = gradient(all but analytic component of vel)
 c             = gradient(anti analytic + z times anti analytic 
 c                         component of vel)
-c     NOTE: The subroutine will work if nterms<1000, as 1000, is
-c     a hardwired number for computing zpow in the computation
+c
 c----------------------------------------------------------------------
 c      INPUT parameters
 c      rscale        : scaling parameter
@@ -187,7 +181,7 @@ subroutine bh2dmpevalg(nd,rscale,center,mpole,nterms,
 c---------------------------------------------------------------------
 c     OUTPUT
 c     vel - Complex velocity
-c     grad(1:2) - gradient of analytic component  (d/dz and d/dzbar)
+c     grad(1:3) - gradients (grada_z, grada_zbar, gradaa) 
 c*********************************************************************
 
       implicit real *8 (a-h,o-z)
@@ -195,9 +189,9 @@ subroutine bh2dmpevalg(nd,rscale,center,mpole,nterms,
       real *8 rscale,center(2),ztarg(2,ntarg)
       complex *16 zc,zt,zdis,eye
       complex *16 mpole(nd,5,0:nterms)
-      complex *16 vel(nd,ntarg),grad(nd,2,ntarg)
+      complex *16 vel(nd,ntarg),grad(nd,3,ntarg)
       complex *16 ztemp,ztemp1
-      complex *16 zdisinv,zpow(1:1000)
+      complex *16 zdisinv,zpow(1:nterms+5)
 
 
       nmax = nterms+3
@@ -218,7 +212,7 @@ subroutine bh2dmpevalg(nd,rscale,center,mpole,nterms,
      1        (mpole(idim,4,0)+eye*mpole(idim,5,0))*log(cdabs(zdis))
           grad(idim,1,k) = grad(idim,1,k) + 
      1        0.5d0*(mpole(idim,4,0)+eye*mpole(idim,5,0))*zpow(1)*rinv
-          grad(idim,2,k)= grad(idim,2,k) + 
+          grad(idim,3,k)= grad(idim,3,k) + 
      1       0.5d0*(mpole(idim,4,0)+eye*mpole(idim,5,0))*
      2       dconjg(zpow(1))*rinv
         enddo
@@ -232,19 +226,24 @@ subroutine bh2dmpevalg(nd,rscale,center,mpole,nterms,
             vel(idim,k) = vel(idim,k) + 
      1        dreal(mpole(idim,4,i)*zpow(i))+
      2         eye*dreal(mpole(idim,5,i)*zpow(i))
+
             grad(idim,1,k) = grad(idim,1,k) - 
      1         mpole(idim,1,i)*zpow(i+1)*i*rinv
             grad(idim,1,k) = grad(idim,1,k) - 
      1         0.5*mpole(idim,4,i)*i*zpow(i+1)*rinv
             grad(idim,1,k) = grad(idim,1,k) - 
      1         eye*0.5*mpole(idim,5,i)*i*zpow(i+1)*rinv
-            grad(idim,2,k) = grad(idim,2,k) - 
+            
+            grad(idim,2,k) = grad(idim,2,k) + 
+     1           mpole(idim,3,i)*dconjg(zpow(i))
+
+            grad(idim,3,k) = grad(idim,3,k) - 
      1         mpole(idim,2,i)*dconjg(zpow(i+1))*i*rinv
-            grad(idim,2,k) = grad(idim,2,k) - 
+            grad(idim,3,k) = grad(idim,3,k) - 
      1         zdis*mpole(idim,3,i)*dconjg(zpow(i+1))*i*rinv
-            grad(idim,2,k) = grad(idim,2,k) - 
+            grad(idim,3,k) = grad(idim,3,k) - 
      1         dconjg(0.5*mpole(idim,4,i)*i*zpow(i+1))*rinv
-            grad(idim,2,k) = grad(idim,2,k) + 
+            grad(idim,3,k) = grad(idim,3,k) + 
      1         dconjg(eye*0.5*mpole(idim,5,i)*i*zpow(i+1))*rinv
           enddo
         enddo
@@ -277,8 +276,6 @@ subroutine bh2dtaevalp(nd,rscale,center,mpole,nterms,
 c          + i Re( \sum mpole(5,k) z^k)
 c                    k
 c      z = (ztarg - center)/rscale
-c     NOTE: The subroutine will work if nterms<1000, as 1000, is
-c     a hardwired number for computing zpow in the computation
 c----------------------------------------------------------------------
 c      INPUT parameters
 c      rscale        : scaling parameter
@@ -303,7 +300,7 @@ subroutine bh2dtaevalp(nd,rscale,center,mpole,nterms,
       complex *16 zc,zt,zdis,eye
       complex *16 mpole(nd,5,0:nterms)
       complex *16 vel(nd,ntarg)
-      complex *16 ztemp,ztemp1,zpow(0:1000)
+      complex *16 ztemp,ztemp1,zpow(0:nterms)
 
       rinv=1.0d0/rscale
       zc = dcmplx(center(1),center(2))
@@ -357,8 +354,20 @@ subroutine bh2dtaevalg(nd,rscale,center,mpole,nterms,
 c          + i Re( \sum mpole(5,k) z^k)
 c                    k
 c      z = (ztarg - center)/rscale
-c     NOTE: The subroutine will work if nterms<1000, as 1000, is
-c     a hardwired number for computing zpow in the computation
+c
+c      note that we use log(|z|) = 1/2(log(z) + log(zbar))
+c
+c      grada_z = gradient (analytic comoponent of vel, projected onto z
+c              component), i.e.
+c              terms involving mpole1, mpole4, mpole5 and ignoring z \sum_k
+c              mpole(3,k)/z_bar^k
+c      grada_zbar = gradient (analytic component of grad vel, projected onto
+c              zbar component), i.e.
+c              terms involving mpole3 and ignoring mpole1 terms
+c      gradaa = gradient(all but analytic component of vel)
+c             = gradient(anti analytic + z times anti analytic 
+c                         component of vel)
+c
 c----------------------------------------------------------------------
 c      INPUT parameters
 c      rscale        : scaling parameter
@@ -375,7 +384,7 @@ subroutine bh2dtaevalg(nd,rscale,center,mpole,nterms,
 c---------------------------------------------------------------------
 c     OUTPUT
 c     vel - Complex velocity
-c     grad - gradient (d/dz, d/dzbar) 
+c     grad - gradient (grada_z, grada_zbar, gradaa) 
 c*********************************************************************
 
       implicit real *8 (a-h,o-z)
@@ -383,8 +392,8 @@ subroutine bh2dtaevalg(nd,rscale,center,mpole,nterms,
       real *8 rscale,center(2),ztarg(2,ntarg)
       complex *16 zc,zt,zdis,eye
       complex *16 mpole(nd,5,0:nterms)
-      complex *16 vel(nd,ntarg),grad(nd,2,ntarg)
-      complex *16 ztemp,ztemp1,zpow(0:1000)
+      complex *16 vel(nd,ntarg),grad(nd,3,ntarg)
+      complex *16 ztemp,ztemp1,zpow(0:nterms)
 
       rinv=1.0d0/rscale
       zc = dcmplx(center(1),center(2))
@@ -408,8 +417,13 @@ subroutine bh2dtaevalg(nd,rscale,center,mpole,nterms,
             vel(idim,k) = vel(idim,k) + 
      1          dreal(mpole(idim,4,i)*zpow(i)) + 
      2          eye*dreal(mpole(idim,5,i)*zpow(i))
+
           enddo
         enddo
+
+        do idim=1,nd
+          grad(idim,2,k) = grad(idim,2,k) + mpole(idim,3,0)
+        enddo
         do i=1,nterms
           do idim=1,nd
             grad(idim,1,k) = grad(idim,1,k) + 
@@ -418,13 +432,17 @@ subroutine bh2dtaevalg(nd,rscale,center,mpole,nterms,
      1         0.5*mpole(idim,4,i)*i*zpow(i-1)*rinv
             grad(idim,1,k) = grad(idim,1,k) + 
      1         eye*0.5*mpole(idim,5,i)*i*zpow(i-1)*rinv
+
             grad(idim,2,k) = grad(idim,2,k) + 
+     1         mpole(idim,3,i)*dconjg(zpow(i))
+
+            grad(idim,3,k) = grad(idim,3,k) + 
      1          mpole(idim,2,i)*dconjg(zpow(i-1))*i*rinv
-            grad(idim,2,k) = grad(idim,2,k) + 
+            grad(idim,3,k) = grad(idim,3,k) + 
      1          mpole(idim,3,i)*dconjg(zpow(i-1))*i*zdis*rinv
-            grad(idim,2,k) = grad(idim,2,k) + 
+            grad(idim,3,k) = grad(idim,3,k) + 
      1         dconjg(0.5*mpole(idim,4,i)*i*zpow(i-1))*rinv
-            grad(idim,2,k) = grad(idim,2,k) - 
+            grad(idim,3,k) = grad(idim,3,k) - 
      1         dconjg(eye*0.5*mpole(idim,5,i)*i*zpow(i-1))*rinv
           enddo
         enddo
@@ -444,17 +462,17 @@ subroutine bh2dformmpd(nd,rscale,sources,ns,dip,
 c     the "center" due to ns sources located at sources(2,*)
 c
 c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
+c      due to a charge a complex charge (c1,c2) at zs = xs+i ys is given by
 c
 c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
+c      vel = 2*c1 log|zt-zs| + c2 (zt-zs)/(zt_bar - zs_bar)
 c            
 c      The complex velocity due to a dipole with complex parameters
-c      c2,c3 is given by
+c      d1,d2,d3 is given by
 c
 c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
+c      vel = d1/(zt-zs) + d2 (zt-zs)/(zt_bar-zs_bar)^2 + 
+c             d3/(zt_bar - zs_bar)
 c
 c     The multipole expansion has 5 set of coefficients
 c     vel = \sum mpole(1,k)/z^k + \sum mpole(2,k)/z_bar^k +
@@ -470,7 +488,7 @@ subroutine bh2dformmpd(nd,rscale,sources,ns,dip,
 c      nd            : number of densities
 c      rscale        : scaling parameter
 c      sources(2,ns) : coordinates of sources
-c      dip(2,ns)    : dipole parameters (corresponding to c2,c3 above)
+c      dip(3,ns)    : dipole parameters (corresponding to c2,c3 above)
 c      ns           : number of sources
 c      center(2)    : expansion center
 c      nterms       : Order of multipole expansion
@@ -487,7 +505,7 @@ subroutine bh2dformmpd(nd,rscale,sources,ns,dip,
       implicit real *8 (a-h,o-z)
       integer ns,nterms
       complex *16 mpole(nd,5,0:nterms)
-      complex *16 dip(nd,2,ns)
+      complex *16 dip(nd,3,ns)
       real *8 sources(2,ns),center(2),rscale
       complex *16 zc,z1,z2,zs,zdis,ztemp,zdisc,ztempc
       complex *16 ztempc2
@@ -506,9 +524,9 @@ subroutine bh2dformmpd(nd,rscale,sources,ns,dip,
          if(abs(zdis).le.1.0d-16) then
             do idim=1,nd
                 mpole(idim,1,1) = mpole(idim,1,1) + dip(idim,1,i)*rinv
-                mpole(idim,2,1) = mpole(idim,2,1) + dip(idim,2,i)*rinv
-                mpole(idim,3,2) = mpole(idim,3,2) - 
-     1              dconjg(dip(idim,1,i))*rinv**2
+                mpole(idim,2,1) = mpole(idim,2,1) + dip(idim,3,i)*rinv
+                mpole(idim,3,2) = mpole(idim,3,2) + 
+     1              dip(idim,2,i)*rinv**2
             enddo
          endif
 
@@ -517,16 +535,16 @@ subroutine bh2dformmpd(nd,rscale,sources,ns,dip,
             ztempc2=ztempc*ztempc
             do j=1,nterms
                do idim=1,nd
-                  zt2 = dip(idim,2,i)*ztempc
-                  zt3 = dconjg(dip(idim,1,i))*ztempc2
+                  zt2 = dip(idim,2,i)*ztempc2
+                  zt3 = dip(idim,3,i)*ztempc
 c           Expansion corresponding to d1/z-zs
                   mpole(idim,1,j)=mpole(idim,1,j)+
      1               dip(idim,1,i)*zdis/ztemp
-c           Expansion corresponding to d2/(z_bar - zs_bar)
-                  mpole(idim,2,j)=mpole(idim,2,j)+zt2*zdisc
-c           Expansion corresponding to -d1_bar(z-zs)/(z_bar - zs_bar)^2
-                  mpole(idim,2,j)=mpole(idim,2,j)+zt3*(j-1)*zdisc*ztemp
-                  mpole(idim,3,j)=mpole(idim,3,j)-zt3*(j-1)*zdisc
+c           Expansion corresponding to d2(z-zs)/(z_bar - zs_bar)^2
+                  mpole(idim,2,j)=mpole(idim,2,j)-zt2*(j-1)*zdisc*ztemp
+                  mpole(idim,3,j)=mpole(idim,3,j)+zt2*(j-1)*zdisc
+c           Expansion corresponding to d3/(z_bar - zs_bar)
+                  mpole(idim,2,j)=mpole(idim,2,j)+zt3*zdisc
                enddo 
                zdis=zdis*ztemp*rinv
                zdisc=zdisc/ztempc*rinv
@@ -538,8 +556,6 @@ subroutine bh2dformmpd(nd,rscale,sources,ns,dip,
       end
 c
 c
-c
-c
 c
       subroutine bh2dformmpc(nd,rscale,sources,ns,c1,
      1           center,nterms,mpole)
@@ -548,17 +564,17 @@ subroutine bh2dformmpc(nd,rscale,sources,ns,c1,
 c     the "center" due to ns sources located at sources(2,*)
 c
 c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
+c      due to a charge a complex charge c1,c2 at zs = xs+i ys is given by
 c
 c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
+c      vel = 2*c1 log|zt-zs| + c2 (zt-zs)/(zt_bar - zs_bar)
 c            
 c      The complex velocity due to a dipole with complex parameters
-c      c2,c3 is given by
+c      d1,d2,d3 is given by
 c
 c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
+c      vel = d1/(zt-zs) + d2 (zt-zs)/(zt_bar-zs_bar)^2 + 
+c             d3/(zt_bar - zs_bar)
 c
 c     The multipole expansion has 5 set of coefficients
 c     vel = \sum mpole(1,k)/z^k + \sum mpole(2,k)/z_bar^k +
@@ -571,10 +587,10 @@ subroutine bh2dformmpc(nd,rscale,sources,ns,c1,
 c                                       k
 c------------------------------------------------------------------
 c     INPUT parameters
-c      nd            : number of densities
-c      rscale        : scaling parameter
-c      sources(2,ns) : coordinates of sources
-c      c1           : charge strength
+c      nd           : number of densities
+c      rscale       : scaling parameter
+c      sources(2,ns): coordinates of sources
+c      c1(2,ns)     : charge strength
 c      ns           : number of sources
 c      center(2)    : expansion center
 c      nterms       : Order of multipole expansion
@@ -591,7 +607,7 @@ subroutine bh2dformmpc(nd,rscale,sources,ns,c1,
       implicit real *8 (a-h,o-z)
       integer ns,nterms
       complex *16 mpole(nd,5,0:nterms)
-      complex *16 c1(nd,ns)
+      complex *16 c1(nd,2,ns)
       real *8 sources(2,ns),center(2),rscale
       complex *16 zc,z1,z2,zs,zdis,ztemp,zdisc,ztempc
       complex *16 ztempc2
@@ -609,11 +625,11 @@ subroutine bh2dformmpc(nd,rscale,sources,ns,c1,
 
          if(abs(zdis).le.1.0d-16) then
             do idim=1,nd
-                mpole(idim,4,0) = mpole(idim,4,0) + real(2*c1(idim,i))
-                mpole(idim,5,0) = mpole(idim,5,0) + imag(2*c1(idim,i))
-c       Expansion corresponding to c1_bar z-zs/(z_bar - zs_bar)
+                mpole(idim,4,0) = mpole(idim,4,0) + real(2*c1(idim,1,i))
+                mpole(idim,5,0) = mpole(idim,5,0) + imag(2*c1(idim,1,i))
+c       Expansion corresponding to c2 z-zs/(z_bar - zs_bar)
                 mpole(idim,3,1) = mpole(idim,3,1) + 
-     1             dconjg(c1(idim,i))*rinv
+     1             c1(idim,2,i)*rinv
             enddo
          endif
 
@@ -621,18 +637,18 @@ subroutine bh2dformmpc(nd,rscale,sources,ns,c1,
             ztempc=1.0d0/dconjg(ztemp)
             ztempc2=ztempc*ztempc
             do idim=1,nd
-               mpole(idim,4,0)=mpole(idim,4,0)+dreal(2*c1(idim,i))
-               mpole(idim,5,0)=mpole(idim,5,0)+dimag(2*c1(idim,i))
+               mpole(idim,4,0)=mpole(idim,4,0)+dreal(2*c1(idim,1,i))
+               mpole(idim,5,0)=mpole(idim,5,0)+dimag(2*c1(idim,1,i))
             enddo
             do j=1,nterms
                do idim=1,nd
-                  zt1 = dconjg(c1(idim,i))*ztempc
+                  zt1 = c1(idim,2,i)*ztempc
 c          Expansion corresponding to 2 c1 log |z-zs|
                  mpole(idim,4,j)=mpole(idim,4,j)-
-     1               dreal(2*c1(idim,i))*zdis/j
+     1               dreal(2*c1(idim,1,i))*zdis/j
                  mpole(idim,5,j)=mpole(idim,5,j)-
-     1               dimag(2*c1(idim,i))*zdis/j
-c          Expansion corresponding to c1_bar z-zs/(z_bar - zs_bar)
+     1               dimag(2*c1(idim,1,i))*zdis/j
+c          Expansion corresponding to c2 z-zs/(z_bar - zs_bar)
                  mpole(idim,2,j)=mpole(idim,2,j)-zt1*zdisc*ztemp
                  mpole(idim,3,j)=mpole(idim,3,j)+zt1*zdisc
                enddo
@@ -658,17 +674,17 @@ subroutine bh2dformmpcd(nd,rscale,sources,ns,c1,dip,
 c     the "center" due to ns sources located at sources(2,*)
 c
 c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
+c      due to a charge a complex charge c1,c2 at zs = xs+i ys is given by
 c
 c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
+c      vel = 2*c1 log|zt-zs| + c2 (zt-zs)/(zt_bar - zs_bar)
 c            
 c      The complex velocity due to a dipole with complex parameters
 c      c2,c3 is given by
 c
 c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
+c      vel = d1/(zt-zs) + d2 (zt-zs)/(zt_bar-zs_bar)^2 + 
+c             d3/(zt_bar - zs_bar)
 c
 c     The multipole expansion has 5 set of coefficients
 c     vel = \sum mpole(1,k)/z^k + \sum mpole(2,k)/z_bar^k +
@@ -681,11 +697,11 @@ subroutine bh2dformmpcd(nd,rscale,sources,ns,c1,dip,
 c                                       k
 c------------------------------------------------------------------
 c     INPUT parameters
-c      nd            : number of densities
-c      rscale        : scaling parameter
-c      sources(2,ns) : coordinates of sources
-c      c1           : charge strength
-c      dip(2,ns)    : dipole parameters (corresponding to c2,c3 above)
+c      nd           : number of densities
+c      rscale       : scaling parameter
+c      sources(2,ns): coordinates of sources
+c      c1(2,ns)     : charge strength
+c      dip(3,ns)    : dipole parameters (corresponding to c2,c3 above)
 c      ns           : number of sources
 c      center(2)    : expansion center
 c      nterms       : Order of multipole expansion
@@ -702,7 +718,7 @@ subroutine bh2dformmpcd(nd,rscale,sources,ns,c1,dip,
       implicit real *8 (a-h,o-z)
       integer ns,nterms
       complex *16 mpole(nd,5,0:nterms)
-      complex *16 c1(nd,ns),dip(nd,2,ns)
+      complex *16 c1(nd,2,ns),dip(nd,3,ns)
       real *8 sources(2,ns),center(2),rscale
       complex *16 zc,z1,z2,zs,zdis,ztemp,zdisc,ztempc
       complex *16 ztempc2
@@ -720,15 +736,15 @@ subroutine bh2dformmpcd(nd,rscale,sources,ns,c1,dip,
 
          if(abs(zdis).le.1.0d-16) then
             do idim=1,nd
-                mpole(idim,4,0) = mpole(idim,4,0) + real(2*c1(idim,i))
-                mpole(idim,5,0) = mpole(idim,5,0) + imag(2*c1(idim,i))
-c       Expansion corresponding to c1_bar z-zs/(z_bar - zs_bar)
+                mpole(idim,4,0) = mpole(idim,4,0) + real(2*c1(idim,1,i))
+                mpole(idim,5,0) = mpole(idim,5,0) + imag(2*c1(idim,1,i))
+c       Expansion corresponding to c2 z-zs/(z_bar - zs_bar)
                 mpole(idim,3,1) = mpole(idim,3,1) + 
-     1             dconjg(c1(idim,i))*rinv
+     1             c1(idim,2,i)*rinv
                 mpole(idim,1,1) = mpole(idim,1,1) + dip(idim,1,i)*rinv
-                mpole(idim,2,1) = mpole(idim,2,1) + dip(idim,2,i)*rinv
-                mpole(idim,3,2) = mpole(idim,3,2) - 
-     1              dconjg(dip(idim,1,i))*rinv**2
+                mpole(idim,2,1) = mpole(idim,2,1) + dip(idim,3,i)*rinv
+                mpole(idim,3,2) = mpole(idim,3,2) + 
+     1              dip(idim,2,i)*rinv**2
             enddo
          endif
 
@@ -736,33 +752,33 @@ subroutine bh2dformmpcd(nd,rscale,sources,ns,c1,dip,
             ztempc=1.0d0/dconjg(ztemp)
             ztempc2=ztempc*ztempc
             do idim=1,nd
-               mpole(idim,4,0)=mpole(idim,4,0)+dreal(2*c1(idim,i))
-               mpole(idim,5,0)=mpole(idim,5,0)+dimag(2*c1(idim,i))
+               mpole(idim,4,0)=mpole(idim,4,0)+dreal(2*c1(idim,1,i))
+               mpole(idim,5,0)=mpole(idim,5,0)+dimag(2*c1(idim,1,i))
             enddo
             do j=1,nterms
                do idim=1,nd
-                  zt1 = dconjg(c1(idim,i))*ztempc
+                  zt1 = c1(idim,2,i)*ztempc
 c          Expansion corresponding to 2 c1 log |z-zs|
                  mpole(idim,4,j)=mpole(idim,4,j)-
-     1               dreal(2*c1(idim,i))*zdis/j
+     1               dreal(2*c1(idim,1,i))*zdis/j
                  mpole(idim,5,j)=mpole(idim,5,j)-
-     1               dimag(2*c1(idim,i))*zdis/j
-c          Expansion corresponding to c1_bar z-zs/(z_bar - zs_bar)
+     1               dimag(2*c1(idim,1,i))*zdis/j
+c          Expansion corresponding to c2 z-zs/(z_bar - zs_bar)
                  mpole(idim,2,j)=mpole(idim,2,j)-zt1*zdisc*ztemp
                  mpole(idim,3,j)=mpole(idim,3,j)+zt1*zdisc
                enddo
 
                do idim=1,nd
-                  zt2 = dip(idim,2,i)*ztempc
-                  zt3 = dconjg(dip(idim,1,i))*ztempc2
+                  zt2 = dip(idim,2,i)*ztempc2
+                  zt3 = dip(idim,3,i)*ztempc
 c           Expansion corresponding to d1/z-zs
                   mpole(idim,1,j)=mpole(idim,1,j)+
      1               dip(idim,1,i)*zdis/ztemp
-c           Expansion corresponding to d2/(z_bar - zs_bar)
-                  mpole(idim,2,j)=mpole(idim,2,j)+zt2*zdisc
-c           Expansion corresponding to -d1_bar(z-zs)/(z_bar - zs_bar)^2
-                  mpole(idim,2,j)=mpole(idim,2,j)+zt3*(j-1)*zdisc*ztemp
-                  mpole(idim,3,j)=mpole(idim,3,j)-zt3*(j-1)*zdisc
+c           Expansion corresponding to d2(z-zs)/(z_bar - zs_bar)^2
+                  mpole(idim,2,j)=mpole(idim,2,j)-zt2*(j-1)*zdisc*ztemp
+                  mpole(idim,3,j)=mpole(idim,3,j)+zt2*(j-1)*zdisc
+c           Expansion corresponding to d3/(z_bar - zs_bar)
+                  mpole(idim,2,j)=mpole(idim,2,j)+zt3*zdisc
                enddo 
                zdis=zdis*ztemp*rinv
                zdisc=zdisc/ztempc*rinv
@@ -786,17 +802,17 @@ subroutine bh2dformtac(nd,rscale,sources,ns,c1,
 c     the "center" due to ns sources located at sources(2,*)
 c
 c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
+c      due to a charge a complex charge c1,c2 at zs = xs+i ys is given by
 c
 c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
+c      vel = 2*c1 log|zt-zs| + c2 (zt-zs)/(zt_bar - zs_bar)
 c            
 c      The complex velocity due to a dipole with complex parameters
-c      c2,c3 is given by
+c      d1,d2,d3 is given by
 c
 c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
+c      vel = d1/(zt-zs) + d2 (zt-zs)/(zt_bar-zs_bar)^2 + 
+c             d3/(zt_bar - zs_bar)
 c
 c     The multipole expansion has 5 set of coefficients
 c     vel = \sum mp1(k) z^k + \sum mp2(k) z_bar^k +
@@ -812,7 +828,7 @@ subroutine bh2dformtac(nd,rscale,sources,ns,c1,
 c
 c      rscale        : scaling parameter
 c      sources(2,ns) : coordinates of sources
-c      c1           : charge strength
+c      c1(2,ns)      : charge strength
 c      ns            : number of sources
 c      center(2)     : expansion center
 c      nterms        : Order of multipole expansion
@@ -829,7 +845,7 @@ subroutine bh2dformtac(nd,rscale,sources,ns,c1,
       implicit real *8 (a-h,o-z)
       integer ns,nterms
       complex *16 mpole(nd,5,0:nterms)
-      complex *16 c1(nd,ns)
+      complex *16 c1(nd,2,ns)
       real *8 sources(2,ns),center(2),rscale
       complex *16 zc,z1,z2,zs,zdis,ztemp,zdisc,ztempc
 
@@ -845,19 +861,19 @@ subroutine bh2dformtac(nd,rscale,sources,ns,c1,
             do idim=1,nd
               if(j.eq.0) then
                  mpole(idim,4,j)=mpole(idim,4,j) + 
-     1               dreal(2*c1(idim,i))*log(cdabs(1.0d0/ztemp))
+     1               dreal(2*c1(idim,1,i))*log(cdabs(1.0d0/ztemp))
                  mpole(idim,5,j)=mpole(idim,5,j) + 
-     1               dimag(2*c1(idim,i))*log(cdabs(1.0d0/ztemp))
+     1               dimag(2*c1(idim,1,i))*log(cdabs(1.0d0/ztemp))
               else
                 mpole(idim,4,j)=mpole(idim,4,j) - 
-     1              dreal(2*c1(idim,i))*zdis/j
+     1              dreal(2*c1(idim,1,i))*zdis/j
                 mpole(idim,5,j)=mpole(idim,5,j) - 
-     1              dimag(2*c1(idim,i))*zdis/j
+     1              dimag(2*c1(idim,1,i))*zdis/j
               endif
               mpole(idim,2,j)=mpole(idim,2,j) + 
-     1            dconjg(c1(idim,i))*zdisc*ztempc/ztemp
+     1            c1(idim,2,i)*zdisc*ztempc/ztemp
               mpole(idim,3,j)=mpole(idim,3,j) - 
-     1           dconjg(c1(idim,i))*zdisc*ztempc
+     1           c1(idim,2,i)*zdisc*ztempc
  
             enddo
             zdis=zdis*ztemp*rscale
@@ -880,17 +896,17 @@ subroutine bh2dformtad(nd,rscale,sources,ns,dip,
 c     the "center" due to ns sources located at sources(2,*)
 c
 c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
+c      due to a charge a complex charge c1,c2 at zs = xs+i ys is given by
 c
 c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
+c      vel = 2*c1 log|zt-zs| + c2 (zt-zs)/(zt_bar - zs_bar)
 c            
 c      The complex velocity due to a dipole with complex parameters
-c      c2,c3 is given by
+c      d1,d2,d3 is given by
 c
 c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
+c      vel = d1/(zt-zs) + d2 (zt-zs)/(zt_bar-zs_bar)^2 + 
+c             d3/(zt_bar - zs_bar)
 c
 c     The multipole expansion has 5 set of coefficients
 c     vel = \sum mp1(k) z^k + \sum mp2(k) z_bar^k +
@@ -906,7 +922,7 @@ subroutine bh2dformtad(nd,rscale,sources,ns,dip,
 c
 c      rscale        : scaling parameter
 c      sources(2,ns) : coordinates of sources
-c      dip(2,ns)    : dipole parameters (corresponding to c2,c3 above)
+c      dip(3,n s)    : dipole parameters (corresponding to c2,c3 above)
 c      ns            : number of sources
 c      center(2)     : expansion center
 c      nterms        : Order of multipole expansion
@@ -923,7 +939,7 @@ subroutine bh2dformtad(nd,rscale,sources,ns,dip,
       implicit real *8 (a-h,o-z)
       integer ns,nterms
       complex *16 mpole(nd,5,0:nterms)
-      complex *16 dip(nd,2,ns)
+      complex *16 dip(nd,3,ns)
       real *8 sources(2,ns),center(2),rscale
       complex *16 zc,z1,z2,zs,zdis,ztemp,zdisc,ztempc
 
@@ -938,11 +954,11 @@ subroutine bh2dformtad(nd,rscale,sources,ns,dip,
          do j=0,nterms
             do idim=1,nd
               mpole(idim,1,j)=mpole(idim,1,j)-dip(idim,1,i)*zdis*ztemp
-              mpole(idim,2,j)=mpole(idim,2,j)-dip(idim,2,i)*zdisc*ztempc
-              mpole(idim,2,j)=mpole(idim,2,j)+
-     1             dconjg(dip(idim,1,i))*(j+1)*zdisc*ztempc*ztempc/ztemp
-              mpole(idim,3,j)=mpole(idim,3,j) - 
-     1            dconjg(dip(idim,1,i))*(j+1)*zdisc*ztempc*ztempc
+              mpole(idim,2,j)=mpole(idim,2,j)-dip(idim,3,i)*zdisc*ztempc
+              mpole(idim,2,j)=mpole(idim,2,j)-
+     1             dip(idim,2,i)*(j+1)*zdisc*ztempc*ztempc/ztemp
+              mpole(idim,3,j)=mpole(idim,3,j) + 
+     1            dip(idim,2,i)*(j+1)*zdisc*ztempc*ztempc
             
             enddo
             zdis=zdis*ztemp*rscale
@@ -965,17 +981,17 @@ subroutine bh2dformtacd(nd,rscale,sources,ns,c1,dip,
 c     the "center" due to ns sources located at sources(2,*)
 c
 c      The complex velocity at zt = xt + i yt 
-c      due to a charge a complex charge c1 at zs = xs+i ys is given by
+c      due to a charge a complex charge c1,c2 at zs = xs+i ys is given by
 c
 c
-c      vel = 2*c1 log|zt-zs| + c1_bar (zt-zs)/(zt_bar - zs_bar)
+c      vel = 2*c1 log|zt-zs| + c2 (zt-zs)/(zt_bar - zs_bar)
 c            
 c      The complex velocity due to a dipole with complex parameters
-c      c2,c3 is given by
+c      d1,d2,d3 is given by
 c
 c
-c      vel = c2/(zt-zs) - c2_bar (zt-zs)/(zt_bar-zs_bar)^2 + 
-c             c3/(zt_bar - zs_bar)
+c      vel = d1/(zt-zs) + d2 (zt-zs)/(zt_bar-zs_bar)^2 + 
+c             d3/(zt_bar - zs_bar)
 c
 c     The multipole expansion has 5 set of coefficients
 c     vel = \sum mp1(k) z^k + \sum mp2(k) z_bar^k +
@@ -989,14 +1005,13 @@ subroutine bh2dformtacd(nd,rscale,sources,ns,c1,dip,
 c------------------------------------------------------------------
 c     INPUT parameters
 c
-c      rscale        : scaling parameter
-c      sources(2,ns) : coordinates of sources
-c      c1           : charge strength
-c      d1           : dipole parameter 1 (corresponding to c2)
-c      d2           : dipole parameter 2 (corresponding to c3)
-c      ns            : number of sources
-c      center(2)     : expansion center
-c      nterms        : Order of multipole expansion
+c      rscale       : scaling parameter
+c      sources(2,ns): coordinates of sources
+c      c1(2,ns)     : charge strength
+c      d1(3,ns)     : dipole strenghts
+c      ns           : number of sources
+c      center(2)    : expansion center
+c      nterms       : Order of multipole expansion
 c------------------------------------------------------------------
 c      OUTPUT
 c      mpole         : coeffs for local expansion
@@ -1010,7 +1025,7 @@ subroutine bh2dformtacd(nd,rscale,sources,ns,c1,dip,
       implicit real *8 (a-h,o-z)
       integer ns,nterms
       complex *16 mpole(nd,5,0:nterms)
-      complex *16 c1(nd,ns),dip(nd,2,ns)
+      complex *16 c1(nd,2,ns),dip(nd,3,ns)
       real *8 sources(2,ns),center(2),rscale
       complex *16 zc,z1,z2,zs,zdis,ztemp,zdisc,ztempc
 
@@ -1026,26 +1041,26 @@ subroutine bh2dformtacd(nd,rscale,sources,ns,c1,dip,
             do idim=1,nd
               if(j.eq.0) then
                  mpole(idim,4,j)=mpole(idim,4,j) + 
-     1               dreal(2*c1(idim,i))*log(cdabs(1.0d0/ztemp))
+     1               dreal(2*c1(idim,1,i))*log(cdabs(1.0d0/ztemp))
                  mpole(idim,5,j)=mpole(idim,5,j) + 
-     1               dimag(2*c1(idim,i))*log(cdabs(1.0d0/ztemp))
+     1               dimag(2*c1(idim,1,i))*log(cdabs(1.0d0/ztemp))
               else
                 mpole(idim,4,j)=mpole(idim,4,j) - 
-     1              dreal(2*c1(idim,i))*zdis/j
+     1              dreal(2*c1(idim,1,i))*zdis/j
                 mpole(idim,5,j)=mpole(idim,5,j) - 
-     1              dimag(2*c1(idim,i))*zdis/j
+     1              dimag(2*c1(idim,1,i))*zdis/j
               endif
               mpole(idim,2,j)=mpole(idim,2,j) + 
-     1            dconjg(c1(idim,i))*zdisc*ztempc/ztemp
+     1            c1(idim,2,i)*zdisc*ztempc/ztemp
               mpole(idim,3,j)=mpole(idim,3,j) - 
-     1           dconjg(c1(idim,i))*zdisc*ztempc
+     1           c1(idim,2,i)*zdisc*ztempc
  
               mpole(idim,1,j)=mpole(idim,1,j)-dip(idim,1,i)*zdis*ztemp
-              mpole(idim,2,j)=mpole(idim,2,j)-dip(idim,2,i)*zdisc*ztempc
-              mpole(idim,2,j)=mpole(idim,2,j)+
-     1             dconjg(dip(idim,1,i))*(j+1)*zdisc*ztempc*ztempc/ztemp
-              mpole(idim,3,j)=mpole(idim,3,j) - 
-     1            dconjg(dip(idim,1,i))*(j+1)*zdisc*ztempc*ztempc
+              mpole(idim,2,j)=mpole(idim,2,j)-dip(idim,3,i)*zdisc*ztempc
+              mpole(idim,2,j)=mpole(idim,2,j)-
+     1             dip(idim,2,i)*(j+1)*zdisc*ztempc*ztempc/ztemp
+              mpole(idim,3,j)=mpole(idim,3,j) + 
+     1            dip(idim,2,i)*(j+1)*zdisc*ztempc*ztempc
             
             enddo
             zdis=zdis*ztemp*rscale
diff --git a/src/common/cdjseval2d.f b/src/common/cdjseval2d.f
index 4877f5b..e81c0b9 100644
--- a/src/common/cdjseval2d.f
+++ b/src/common/cdjseval2d.f
@@ -94,14 +94,14 @@ subroutine jbessel2d(nterms,z,rscale,fjs,ifder,fjder)
          fjs(0) = done
          do i = 1, nterms
             fjs(i) = zero
-	 enddo
+      enddo
 c
-	 if (ifder.eq.1) then
-	    do i=0,nterms
-	       fjder(i)=zero
-	    enddo
-	    fjder(1)=done/(2*rscale)
-	 endif
+      if (ifder.eq.1) then
+        do i=0,nterms
+          fjder(i)=zero
+        enddo
+        fjder(1)=done/(2*rscale)
+      endif
 c
          RETURN
       endif
@@ -148,7 +148,7 @@ subroutine jbessel2d(nterms,z,rscale,fjs,ifder,fjder)
       fjtemp(ntop)=zero
       fjtemp(ntop-1)=done
       do i=ntop-1,1,-1
-	 dcoef=2*i
+         dcoef=2*i
          ztmp=dcoef*zinv*fjtemp(i)-fjtemp(i+1)
          fjtemp(i-1)=ztmp
 c
diff --git a/src/helmholtz/helmrouts2d.f b/src/helmholtz/helmrouts2d.f
index d894ca5..2522443 100644
--- a/src/helmholtz/helmrouts2d.f
+++ b/src/helmholtz/helmrouts2d.f
@@ -1245,10 +1245,12 @@ subroutine h2dmploc(nd,zk,rscale1,center1,hexp,nterms1,
 C---------------------------------------------------------------------
       integer nterms1,nterms2,nterms,i,j,ifder,ii,nd
       real *8 rscale1,rscale2,r,theta,center1(2),center2(2),zdiff(2)
-      real *8 rsi,rsj,rsi2,rsj2,pi,done,rs12
+      real *8 rsi,rsj,rsi2,rsj2,pi,done,rs12,rfac
+      real *8, allocatable :: rs2pow(:)
       complex *16 zk,hexp(nd,-nterms1:nterms1),jexp(nd,-nterms2:nterms2)
       complex *16 z,ima, zmul,zinv,ztemp1,ztemp2
       complex *16, allocatable :: hval(:), hder(:), htemp(:)
+      complex *16, allocatable :: jexptemp(:,:)
       data ima/(0.0d0,1.0d0)/
 c
       done=1
@@ -1261,6 +1263,7 @@ subroutine h2dmploc(nd,zk,rscale1,center1,hexp,nterms1,
       allocate(hval(0:nterms+5))
       allocate(hder(0:nterms+5))
       allocate(htemp(-nterms-5:nterms+5))
+      allocate(jexptemp(nd,-nterms2:nterms2))
 c
       zdiff(1)=center2(1)-center1(1)
       zdiff(2)=center2(2)-center1(2)
@@ -1285,50 +1288,76 @@ subroutine h2dmploc(nd,zk,rscale1,center1,hexp,nterms1,
          ztemp1= ztemp1*zmul
          ztemp2=-ztemp2*zinv
       enddo
+
+      do i=-nterms2,nterms2
+        do ii=1,nd
+          jexptemp(ii,i) = 0
+        enddo
+      enddo
+
+      allocate(rs2pow(nterms2))
+      rfac = rscale1**2
+      rs2pow(1) = rfac
+      do i=2,nterms2
+        rs2pow(i) = rs2pow(i-1)*rfac
+      enddo
+
+      
 c
       do ii=1,nd
-         jexp(ii,0) = jexp(ii,0) + hexp(ii,0)*htemp(0)
+         jexptemp(ii,0) = jexptemp(ii,0) + hexp(ii,0)*htemp(0)
       enddo
       do j = 1,nterms1
         do ii=1,nd
-         jexp(ii,0) = jexp(ii,0)+(hexp(ii,+j)*htemp(-j))
-         jexp(ii,0) = jexp(ii,0)+(hexp(ii,-j)*htemp(+j))
+         jexptemp(ii,0) = jexptemp(ii,0)+(hexp(ii,+j)*htemp(-j))
+         jexptemp(ii,0) = jexptemp(ii,0)+(hexp(ii,-j)*htemp(+j))
         enddo
       enddo
 c
-      rsi=rscale1
-      rsi2=rscale1**2
-      rs12 = rscale2/rscale1
       do i = 1,nterms2
          do ii=1,nd
-            jexp(ii,i) = jexp(ii,i) + hexp(ii,0)*htemp(i)*rs12
-            jexp(ii,-i) = jexp(ii,-i) + hexp(ii,0)*htemp(-i)*rs12
+            jexptemp(ii,i) = jexptemp(ii,i) + hexp(ii,0)*htemp(i)
+            jexptemp(ii,-i) = jexptemp(ii,-i) + hexp(ii,0)*htemp(-i)
          enddo
-         rsj=rscale1
-         rsj2=rscale1**2
          do j = 1,min(nterms1,i)
             do ii=1,nd
-              jexp(ii,i) = jexp(ii,i)+(hexp(ii,+j)*htemp(i-j))*rsj2*rs12
-               jexp(ii,i) = jexp(ii,i)+(hexp(ii,-j)*htemp(i+j))*rs12
-               jexp(ii,-i) = jexp(ii,-i)+(hexp(ii,+j)*htemp(-i-j))*rs12
-               jexp(ii,-i) = jexp(ii,-i)+(hexp(ii,-j)*htemp(-i+j))*rsj2*
-     1            rs12
+              jexptemp(ii,i) = jexptemp(ii,i)+
+     1            (hexp(ii,+j)*htemp(i-j))*rs2pow(j)
+               jexptemp(ii,i) = jexptemp(ii,i)+
+     1             (hexp(ii,-j)*htemp(i+j))
+               jexptemp(ii,-i) = jexptemp(ii,-i)+
+     1             (hexp(ii,+j)*htemp(-i-j))
+               jexptemp(ii,-i) = jexptemp(ii,-i)+
+     1             (hexp(ii,-j)*htemp(-i+j))*rs2pow(j)
             enddo
-            rsj=rsj*rscale1
-            rsj2=rsj2*rscale1**2
          enddo
          do j = i+1,nterms1
             do ii=1,nd
-              jexp(ii,i) = jexp(ii,i)+(hexp(ii,+j)*htemp(i-j))*rsi2*rs12
-               jexp(ii,i) = jexp(ii,i)+(hexp(ii,-j)*htemp(i+j))*rs12
-               jexp(ii,-i) = jexp(ii,-i)+(hexp(ii,+j)*htemp(-i-j))*rs12
-               jexp(ii,-i) = jexp(ii,-i)+(hexp(ii,-j)*htemp(-i+j))*rsi2*
-     1            rs12
+              jexptemp(ii,i) = jexptemp(ii,i)+
+     1             (hexp(ii,+j)*htemp(i-j))*rs2pow(i)
+               jexptemp(ii,i) = jexptemp(ii,i)+
+     1              (hexp(ii,-j)*htemp(i+j))
+               jexptemp(ii,-i) = jexptemp(ii,-i)+
+     1              (hexp(ii,+j)*htemp(-i-j))
+               jexptemp(ii,-i) = jexptemp(ii,-i)+
+     1               (hexp(ii,-j)*htemp(-i+j))*rs2pow(i)
             enddo
          enddo
-         rsi=rsi*rscale1
-         rsi2=rsi2*rscale1**2
-         rs12 = rs12*rscale2/rscale1
+      enddo
+
+      rfac = rscale2/rscale1
+      rs12 = rfac 
+
+      do ii=1,nd
+        jexp(ii,0) = jexp(ii,0) + jexptemp(ii,0)
+      enddo
+
+      do i=1,nterms2
+        do ii=1,nd
+           jexp(ii,i) = jexp(ii,i) + jexptemp(ii,i)*rs12
+           jexp(ii,-i) = jexp(ii,-i) + jexptemp(ii,-i)*rs12
+        enddo
+        rs12 = rs12*rfac
       enddo
 c
 c      rsi=rscale2/rscale1
diff --git a/src/helmholtz/hfmm2dwrap.f b/src/helmholtz/hfmm2dwrap.f
index 89d3469..9ca7608 100644
--- a/src/helmholtz/hfmm2dwrap.f
+++ b/src/helmholtz/hfmm2dwrap.f
@@ -73,8 +73,8 @@ subroutine hfmm2d_s_c_p(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr
-      real *8 dipvec(2)
+      complex *16 dipstr(1)
+      real *8 dipvec(2,1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(1)
@@ -141,8 +141,8 @@ subroutine hfmm2d_s_c_g(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr
-      real *8 dipvec(2)
+      complex *16 dipstr(1)
+      real *8 dipvec(2,1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(1)
@@ -211,8 +211,8 @@ subroutine hfmm2d_s_c_h(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr
-      real *8 dipvec(2)
+      complex *16 dipstr(1)
+      real *8 dipvec(2,1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(1)
@@ -279,7 +279,7 @@ subroutine hfmm2d_s_d_p(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
+      complex *16 charge(1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(1)
@@ -347,7 +347,7 @@ subroutine hfmm2d_s_d_g(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
+      complex *16 charge(1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(1)
@@ -418,7 +418,7 @@ subroutine hfmm2d_s_d_h(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
+      complex *16 charge(1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(1)
@@ -691,8 +691,8 @@ subroutine hfmm2d_t_c_p(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr
-      real *8 dipvec(2)
+      complex *16 dipstr(1)
+      real *8 dipvec(2,1)
       complex *16 pot(1)
       complex *16 grad(2),gradtarg(2)
       complex *16 hess(3),hesstarg(3)
@@ -756,8 +756,8 @@ subroutine hfmm2d_t_c_g(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr
-      real *8 dipvec(2)
+      complex *16 dipstr(1)
+      real *8 dipvec(2,1)
       complex *16 pot(1)
       complex *16 grad(2)
       complex *16 hess(3),hesstarg(3)
@@ -825,8 +825,8 @@ subroutine hfmm2d_t_c_h(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr
-      real *8 dipvec(2)
+      complex *16 dipstr(1)
+      real *8 dipvec(2,1)
       complex *16 pot(1)
       complex *16 grad(2)
       complex *16 hess(3)
@@ -891,7 +891,7 @@ subroutine hfmm2d_t_d_p(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
+      complex *16 charge(1)
       complex *16 pot(1)
       complex *16 grad(2),gradtarg(2)
       complex *16 hess(3),hesstarg(3)
@@ -957,7 +957,7 @@ subroutine hfmm2d_t_d_g(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
+      complex *16 charge(1)
       complex *16 pot(1)
       complex *16 grad(2)
       complex *16 hess(3),hesstarg(3)
@@ -1027,7 +1027,7 @@ subroutine hfmm2d_t_d_h(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
+      complex *16 charge(1)
       complex *16 pot(1)
       complex *16 grad(2)
       complex *16 hess(3)
@@ -1300,8 +1300,8 @@ subroutine hfmm2d_st_c_p(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr
-      real *8 dipvec(2)
+      complex *16 dipstr(1)
+      real *8 dipvec(2,1)
       complex *16 grad(2),gradtarg(2)
       complex *16 hess(3),hesstarg(3)
       integer ifcharge,ifdipole,iper
@@ -1367,8 +1367,8 @@ subroutine hfmm2d_st_c_g(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr
-      real *8 dipvec(2)
+      complex *16 dipstr(1)
+      real *8 dipvec(2,1)
       complex *16 hess(3),hesstarg(3)
       integer ifcharge,ifdipole,iper
       integer ifpgh,ifpghtarg
@@ -1438,8 +1438,8 @@ subroutine hfmm2d_st_c_h(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr
-      real *8 dipvec(2)
+      complex *16 dipstr(1)
+      real *8 dipvec(2,1)
       integer ifcharge,ifdipole,iper
       integer ifpgh,ifpghtarg
       integer nd
@@ -1503,7 +1503,7 @@ subroutine hfmm2d_st_d_p(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
+      complex *16 charge(1)
       complex *16 grad(2),gradtarg(2)
       complex *16 hess(3),hesstarg(3)
       integer ifcharge,ifdipole,iper
@@ -1571,7 +1571,7 @@ subroutine hfmm2d_st_d_g(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
+      complex *16 charge(1)
       complex *16 hess(3),hesstarg(3)
       integer ifcharge,ifdipole,iper
       integer ifpgh,ifpghtarg
@@ -1643,7 +1643,7 @@ subroutine hfmm2d_st_d_h(eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
+      complex *16 charge(1)
       integer ifcharge,ifdipole,iper
       integer ifpgh,ifpghtarg
       integer nd
diff --git a/src/helmholtz/hfmm2dwrap_vec.f b/src/helmholtz/hfmm2dwrap_vec.f
index 84c425a..cae2b1d 100644
--- a/src/helmholtz/hfmm2dwrap_vec.f
+++ b/src/helmholtz/hfmm2dwrap_vec.f
@@ -40,8 +40,8 @@ subroutine hfmm2d_s_c_p_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr(nd)
-      real *8 dipvec(nd,2)
+      complex *16 dipstr(nd,1)
+      real *8 dipvec(nd,2,1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(nd,1)
@@ -105,8 +105,8 @@ subroutine hfmm2d_s_c_g_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr(nd)
-      real *8 dipvec(nd,2)
+      complex *16 dipstr(nd,1)
+      real *8 dipvec(nd,2,1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(nd,1)
@@ -174,8 +174,8 @@ subroutine hfmm2d_s_c_h_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr(nd)
-      real *8 dipvec(nd,2)
+      complex *16 dipstr(nd,1)
+      real *8 dipvec(nd,2,1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(nd,1)
@@ -241,7 +241,7 @@ subroutine hfmm2d_s_d_p_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge(nd)
+      complex *16 charge(nd,1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(nd,1)
@@ -307,7 +307,7 @@ subroutine hfmm2d_s_d_g_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge(nd)
+      complex *16 charge(nd,1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(nd,1)
@@ -377,7 +377,7 @@ subroutine hfmm2d_s_d_h_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge(nd)
+      complex *16 charge(nd,1)
       integer nt
       real *8 targ(2)
       complex *16 pottarg(nd,1)
@@ -645,8 +645,8 @@ subroutine hfmm2d_t_c_p_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr(nd)
-      real *8 dipvec(nd,2)
+      complex *16 dipstr(nd,1)
+      real *8 dipvec(nd,2,1)
       complex *16 pot(nd)
       complex *16 grad(nd,2),gradtarg(nd,2)
       complex *16 hess(nd,3),hesstarg(nd,3)
@@ -708,8 +708,8 @@ subroutine hfmm2d_t_c_g_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr(nd)
-      real *8 dipvec(nd,2)
+      complex *16 dipstr(nd,1)
+      real *8 dipvec(nd,2,1)
       complex *16 pot(nd,1)
       complex *16 grad(nd,2)
       complex *16 hess(nd,3),hesstarg(nd,3)
@@ -776,8 +776,8 @@ subroutine hfmm2d_t_c_h_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr(nd)
-      real *8 dipvec(nd,2)
+      complex *16 dipstr(nd,1)
+      real *8 dipvec(nd,2,1)
       complex *16 pot(nd,1)
       complex *16 grad(nd,2)
       complex *16 hess(nd,3)
@@ -841,7 +841,7 @@ subroutine hfmm2d_t_d_p_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge(nd)
+      complex *16 charge(nd,1)
       complex *16 pot(nd,1)
       complex *16 grad(nd,2),gradtarg(nd,2)
       complex *16 hess(nd,3),hesstarg(nd,3)
@@ -905,7 +905,7 @@ subroutine hfmm2d_t_d_g_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge(nd)
+      complex *16 charge(nd,1)
       complex *16 pot(nd,1)
       complex *16 grad(nd,2)
       complex *16 hess(nd,3),hesstarg(nd,3)
@@ -974,7 +974,7 @@ subroutine hfmm2d_t_d_h_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge(nd)
+      complex *16 charge(nd,1)
       complex *16 pot(nd,1)
       complex *16 grad(nd,2)
       complex *16 hess(nd,3)
@@ -1234,8 +1234,8 @@ subroutine hfmm2d_st_c_p_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr(nd)
-      real *8 dipvec(nd,2)
+      complex *16 dipstr(nd,1)
+      real *8 dipvec(nd,2,1)
       complex *16 grad(nd,2),gradtarg(nd,2)
       complex *16 hess(nd,3),hesstarg(nd,3)
       integer ifcharge,ifdipole,iper
@@ -1299,8 +1299,8 @@ subroutine hfmm2d_st_c_g_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr(nd)
-      real *8 dipvec(nd,2)
+      complex *16 dipstr(nd,1)
+      real *8 dipvec(nd,2,1)
       complex *16 hess(nd,3),hesstarg(nd,3)
       integer ifcharge,ifdipole,iper
       integer ifpgh,ifpghtarg
@@ -1369,8 +1369,8 @@ subroutine hfmm2d_st_c_h_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr(nd)
-      real *8 dipvec(nd,2)
+      complex *16 dipstr(nd,1)
+      real *8 dipvec(nd,2,1)
       integer ifcharge,ifdipole,iper
       integer ifpgh,ifpghtarg
 
@@ -1433,7 +1433,7 @@ subroutine hfmm2d_st_d_p_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge(nd)
+      complex *16 charge(nd,1)
       complex *16 grad(nd,2),gradtarg(nd,2)
       complex *16 hess(nd,3),hesstarg(nd,3)
       integer ifcharge,ifdipole,iper
@@ -1499,7 +1499,7 @@ subroutine hfmm2d_st_d_g_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge(nd)
+      complex *16 charge(nd,1)
       complex *16 hess(nd,3),hesstarg(nd,3)
       integer ifcharge,ifdipole,iper
       integer ifpgh,ifpghtarg
@@ -1570,7 +1570,7 @@ subroutine hfmm2d_st_d_h_vec(nd,eps,zk,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge(nd)
+      complex *16 charge(nd,1)
       integer ifcharge,ifdipole,iper
       integer ifpgh,ifpghtarg
 
diff --git a/src/laplace/lapkernels2d.f b/src/laplace/lapkernels2d.f
index 4217293..c797253 100644
--- a/src/laplace/lapkernels2d.f
+++ b/src/laplace/lapkernels2d.f
@@ -41,7 +41,7 @@ subroutine l2d_directcp(nd,sources,ns,charge,
 c     pot(nd)   (complex *16)      : potential is incremented
 c---------------------------------------------------------------------
       integer i,ns,ii,nd,j,nt
-      real *8 sources(2,ns),targ(2,nt),xdiff,ydiff,rr,r
+      real *8 sources(2,ns),targ(2,nt),xdiff,ydiff,rr
       real *8 thresh,rtmp,thresh2
       complex *16 pot(nd,nt)
       complex *16 charge(nd,ns)
@@ -111,7 +111,7 @@ subroutine l2d_directcg(nd,sources,ns,charge,targ,nt,pot,
 c---------------------------------------------------------------------
       integer i,ns,ii,nd,j,nt
       real *8 sources(2,ns),targ(2,nt)
-      real *8 xdiff,ydiff,rr,r,thresh,rtmp,thresh2
+      real *8 xdiff,ydiff,rr,thresh,rtmp,thresh2
       complex *16 pot(nd,nt),grad(nd,2,nt)
       real *8 dx,dy
       complex *16 charge(nd,ns)
@@ -190,9 +190,9 @@ subroutine l2d_directch(nd,sources,ns,charge,targ,nt,
 c     grad(nd,2,nt)  (complex *16)      : gradient is incremented
 c     hess(nd,3,nt)  (complex *16)      : Hessian is incremented
 c---------------------------------------------------------------------
-      integer i,ns,ifexpon,ii,nd,j,nt
+      integer i,ns,ii,nd,j,nt
       real *8 sources(2,ns),targ(2,nt)
-      real *8 xdiff,ydiff,rr,r,thresh,thresh2
+      real *8 xdiff,ydiff,rr,thresh,thresh2
       complex *16 pot(nd,nt),grad(nd,2,nt),hess(nd,3,nt)
       real *8 rtmp, xdiff2, ydiff2
       real *8 rr2, dx, dy, dxx, dxy, dyy
@@ -277,7 +277,7 @@ subroutine l2d_directdp(nd,sources,ns,dipstr,dipvec,
 c     pot(nd,nt)   (complex *16)      : potential is incremented
 c---------------------------------------------------------------------
       integer i,ns,ii,nd,j,nt
-      real *8 sources(2,ns),targ(2,nt),xdiff,ydiff,rr,r
+      real *8 sources(2,ns),targ(2,nt),xdiff,ydiff,rr
       real *8 thresh,thresh2,p1,p2
       complex *16 pot(nd,nt)
       complex *16 dipstr(nd,ns)
@@ -353,7 +353,7 @@ subroutine l2d_directdg(nd,sources,ns,dipstr,dipvec,targ,nt,pot,
 c---------------------------------------------------------------------
       integer i,ns,ii,nd,j,nt
       real *8 sources(2,ns),targ(2,nt)
-      real *8 xdiff,ydiff,rr,r,thresh,rtmp
+      real *8 xdiff,ydiff,rr,thresh
       complex *16 pot(nd,nt),grad(nd,2,nt)
       complex *16 dipstr(nd,ns),d1,d2
       real *8 dx1,dx2,dy1,dy2
@@ -447,11 +447,10 @@ subroutine l2d_directdh(nd,sources,ns,dipstr,dipvec,targ,nt,
 c     grad(nd,2,nt)  (complex *16)      : gradient is incremented
 c     hess(nd,3,nt)  (complex *16)      : Hessian is incremented
 c---------------------------------------------------------------------
-      integer i,ns,ifexpon,ii,nd,j,nt
+      integer i,ns,ii,nd,j,nt
       real *8 sources(2,ns),targ(2,nt)
-      real *8 xdiff,ydiff,rr,r,thresh
+      real *8 xdiff,ydiff,rr,thresh
       complex *16 pot(nd,nt),grad(nd,2,nt),hess(nd,3,nt)
-      real *8 rtmp
       complex *16 dipstr(nd,ns),d1,d2
       real *8 dipvec(nd,2,ns),dx1,dx2,dy1,dy2
       real *8 xdiff2,ydiff2,p1,p2,dxx1,dxx2,dxy1,dxy2,dyy1,dyy2
@@ -551,12 +550,12 @@ subroutine l2d_directcdp(nd,sources,ns,charge,
 c     pot(nd,nt)   (complex *16)      : potential is incremented
 c---------------------------------------------------------------------
       integer i,ns,ii,nd,j,nt
-      real *8 sources(2,ns),targ(2,nt),xdiff,ydiff,rr,r
+      real *8 sources(2,ns),targ(2,nt),xdiff,ydiff,rr
       real *8 thresh,rtmp,thresh2
       complex *16 pot(nd,nt)
       real *8 dipvec(nd,2,ns)
       complex *16 charge(nd,ns),dipstr(nd,ns)
-      real *8 xdiff2,ydiff2,p1,p2
+      real *8 p1,p2
 
       thresh2 = thresh*thresh
 c
@@ -632,13 +631,13 @@ subroutine l2d_directcdg(nd,sources,ns,charge,dipstr,dipvec,
 c---------------------------------------------------------------------
       integer i,ns,ii,nd,j,nt
       real *8 sources(2,ns),targ(2,nt)
-      real *8 xdiff,ydiff,rr,r,thresh,rtmp,thresh2
+      real *8 xdiff,ydiff,rr,thresh,rtmp,thresh2
       complex *16 pot(nd,nt),grad(nd,2,nt)
       complex *16 charge(nd,ns),dipstr(nd,ns)
       complex *16 d1,d2
       real *8 dipvec(nd,2,ns)
-      real *8 xdiff2,ydiff2,p1,p2,dxx1,dxx2,dxy1,dxy2,dyy1,dyy2
-      real *8 rr2,rr3,dx,dy,dx1,dx2,dy1,dy2
+      real *8 xdiff2,ydiff2,p1,p2
+      real *8 rr2,dx,dy,dx1,dx2,dy1,dy2
 
       thresh2 = thresh*thresh
 c
@@ -738,9 +737,9 @@ subroutine l2d_directcdh(nd,sources,ns,charge,dipstr,dipvec,
 c     grad(nd,2,nt)  (complex *16)      : gradient is incremented
 c     hess(nd,3,nt)  (complex *16)      : Hessian is incremented
 c---------------------------------------------------------------------
-      integer i,ns,ifexpon,ii,nd,j,nt
+      integer i,ns,ii,nd,j,nt
       real *8 sources(2,ns),targ(2,nt)
-      real *8 xdiff,ydiff,rr,r,thresh,thresh2
+      real *8 xdiff,ydiff,rr,thresh,thresh2
       complex *16 pot(nd,nt),grad(nd,2,nt),hess(nd,3,nt)
       real *8 rtmp
       complex *16 charge(nd,ns),dipstr(nd,ns),d1,d2
diff --git a/src/laplace/lfmm2dwrap.f b/src/laplace/lfmm2dwrap.f
index 97d22f5..bde9cc9 100644
--- a/src/laplace/lfmm2dwrap.f
+++ b/src/laplace/lfmm2dwrap.f
@@ -59,10 +59,10 @@ subroutine lfmm2d_s_c_p(eps,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr
-      complex *16 grad,gradtarg
-      complex *16 hess,hesstarg
-      real *8 dipvec(2)
+      complex *16 dipstr(1)
+      complex *16 grad(2),gradtarg(2)
+      complex *16 hess(3),hesstarg(3)
+      real *8 dipvec(2,1)
       real *8 targ(2)
       complex *16 pottarg(1)
       integer nt
@@ -121,9 +121,9 @@ subroutine lfmm2d_s_c_g(eps,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 dipstr
-      complex *16 hess,hesstarg
-      real *8 dipvec(2)
+      complex *16 dipstr(1)
+      complex *16 hess(3),hesstarg(3)
+      real *8 dipvec(2,1)
       real *8 targ(2)
       complex *16 pottarg(1),gradtarg(2)
       integer nt
@@ -188,7 +188,7 @@ subroutine lfmm2d_s_c_h(eps,ns,sources,
       complex *16 dipstr
       real *8 targ(2)
       complex *16 pottarg(1),gradtarg(2),hesstarg(3)
-      real *8 dipvec(2)
+      real *8 dipvec(2,1)
       integer nt
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
@@ -247,9 +247,9 @@ subroutine lfmm2d_s_d_p(eps,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
-      complex *16 grad,gradtarg
-      complex *16 hess,hesstarg
+      complex *16 charge(1)
+      complex *16 grad(2),gradtarg(2)
+      complex *16 hess(3),hesstarg(3)
       real *8 targ(2)
       complex *16 pottarg(1)
       integer nt
@@ -310,8 +310,8 @@ subroutine lfmm2d_s_d_g(eps,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
-      complex *16 hess,hesstarg
+      complex *16 charge(1)
+      complex *16 hess(3),hesstarg(3)
       real *8 targ(2)
       complex *16 pottarg(1),gradtarg(2)
       integer nt
@@ -375,7 +375,7 @@ subroutine lfmm2d_s_d_h(eps,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
+      complex *16 charge(1)
       real *8 targ(2)
       complex *16 pottarg(1),gradtarg(2),hesstarg(3)
       integer nt
@@ -436,8 +436,8 @@ subroutine lfmm2d_s_cd_p(eps,ns,sources,charge,
 c
 cc     temporary variables
 c
-      complex *16 grad,gradtarg
-      complex *16 hess,hesstarg
+      complex *16 grad(2),gradtarg(2)
+      complex *16 hess(3),hesstarg(3)
       real *8 targ(2)
       complex *16 pottarg(1)
       integer nt
@@ -498,7 +498,7 @@ subroutine lfmm2d_s_cd_g(eps,ns,sources,charge,
 c
 cc     temporary variables
 c
-      complex *16 hess,hesstarg
+      complex *16 hess(3),hesstarg(3)
       real *8 targ(2)
       complex *16 pottarg(1),gradtarg(2)
       integer nt
@@ -625,9 +625,9 @@ subroutine lfmm2d_t_c_p(eps,ns,sources,
 cc     temporary variables
 c
       complex *16 dipstr
-      complex *16 grad,gradtarg
-      complex *16 hess,hesstarg
-      real *8 dipvec(2)
+      complex *16 grad(2),gradtarg(2)
+      complex *16 hess(3),hesstarg(3)
+      real *8 dipvec(2,1)
       complex *16 pot(1)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
@@ -685,9 +685,9 @@ subroutine lfmm2d_t_c_g(eps,ns,sources,
 cc     temporary variables
 c
       complex *16 dipstr
-      complex *16 hess,hesstarg
+      complex *16 hess(3),hesstarg(3)
       complex *16 pot(1),grad(2)
-      real *8 dipvec(2)
+      real *8 dipvec(2,1)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
       integer nd,iper
@@ -748,7 +748,7 @@ subroutine lfmm2d_t_c_h(eps,ns,sources,
 cc     temporary variables
 c
       complex *16 dipstr
-      real *8 dipvec(2)
+      real *8 dipvec(2,1)
       complex *16 pot(1),grad(2),hess(3)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
@@ -807,9 +807,9 @@ subroutine lfmm2d_t_d_p(eps,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
-      complex *16 grad,gradtarg
-      complex *16 hess,hesstarg
+      complex *16 charge(1)
+      complex *16 grad(2),gradtarg(2)
+      complex *16 hess(3),hesstarg(3)
       complex *16 pot(1)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
@@ -868,8 +868,8 @@ subroutine lfmm2d_t_d_g(eps,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
-      complex *16 hess,hesstarg
+      complex *16 charge(1)
+      complex *16 hess(3),hesstarg(3)
       complex *16 pot(1),grad(2)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
@@ -932,7 +932,7 @@ subroutine lfmm2d_t_d_h(eps,ns,sources,
 c
 cc     temporary variables
 c
-      complex *16 charge
+      complex *16 charge(1)
       complex *16 pot(1),grad(2),hess(3)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
@@ -991,8 +991,8 @@ subroutine lfmm2d_t_cd_p(eps,ns,sources,charge,
 c
 cc     temporary variables
 c
-      complex *16 grad,gradtarg
-      complex *16 hess,hesstarg
+      complex *16 grad(2),gradtarg(2)
+      complex *16 hess(3),hesstarg(3)
       complex *16 pot(1)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
@@ -1051,7 +1051,7 @@ subroutine lfmm2d_t_cd_g(eps,ns,sources,charge,
 c
 cc     temporary variables
 c
-      complex *16 hess,hesstarg
+      complex *16 hess(3),hesstarg(3)
       complex *16 pot(1),grad(2)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
@@ -1176,9 +1176,9 @@ subroutine lfmm2d_st_c_p(eps,ns,sources,
 cc     temporary variables
 c
       complex *16 dipstr
-      complex *16 grad,gradtarg
-      complex *16 hess,hesstarg
-      real *8 dipvec(2)
+      complex *16 grad(2),gradtarg(2)
+      complex *16 hess(3),hesstarg(3)
+      real *8 dipvec(2,1)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
 
@@ -1231,7 +1231,7 @@ subroutine lfmm2d_st_c_g(eps,ns,sources,
       integer ns,nt,ier
       real *8 sources(2,ns),targ(2,nt)
       complex *16 charge(ns)
-      real *8 dipvec(2)
+      real *8 dipvec(2,1)
       complex *16 pot(ns),grad(2,ns)
       complex *16 pottarg(nt),gradtarg(2,nt)
 
@@ -1239,7 +1239,7 @@ subroutine lfmm2d_st_c_g(eps,ns,sources,
 cc     temporary variables
 c
       complex *16 dipstr
-      complex *16 hess,hesstarg
+      complex *16 hess(3),hesstarg(3)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
       integer nd,iper
@@ -1297,7 +1297,7 @@ subroutine lfmm2d_st_c_h(eps,ns,sources,
       integer ns,nt,ier
       real *8 sources(2,ns),targ(2,nt)
       complex *16 charge(ns)
-      real *8 dipvec(2)
+      real *8 dipvec(2,1)
       complex *16 pot(ns),grad(2,ns),hess(3,ns)
       complex *16 pottarg(nt),gradtarg(2,nt),hesstarg(3,nt)
 
@@ -1365,8 +1365,8 @@ subroutine lfmm2d_st_d_p(eps,ns,sources,
 cc     temporary variables
 c
       complex *16 charge
-      complex *16 grad,gradtarg
-      complex *16 hess,hesstarg
+      complex *16 grad(2),gradtarg(2)
+      complex *16 hess(3),hesstarg(3)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
 
@@ -1428,7 +1428,7 @@ subroutine lfmm2d_st_d_g(eps,ns,sources,
 cc     temporary variables
 c
       complex *16 charge
-      complex *16 hess,hesstarg
+      complex *16 hess(3),hesstarg(3)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
       integer nd,iper
@@ -1554,8 +1554,8 @@ subroutine lfmm2d_st_cd_p(eps,ns,sources,charge,
 c
 cc     temporary variables
 c
-      complex *16 grad,gradtarg
-      complex *16 hess,hesstarg
+      complex *16 grad(2),gradtarg(2)
+      complex *16 hess(3),hesstarg(3)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
 
@@ -1616,7 +1616,7 @@ subroutine lfmm2d_st_cd_g(eps,ns,sources,charge,
 c
 cc     temporary variables
 c
-      complex *16 hess,hesstarg
+      complex *16 hess(3),hesstarg(3)
       integer ifcharge,ifdipole
       integer ifpgh,ifpghtarg
       integer nd,iper
diff --git a/src/stokes/stfmm2d.f b/src/stokes/stfmm2d.f
index a2212fa..744e9bc 100644
--- a/src/stokes/stfmm2d.f
+++ b/src/stokes/stfmm2d.f
@@ -167,7 +167,7 @@ subroutine stfmm2d(nd, eps,
 
 
 c     local
-      complex *16, allocatable :: charge(:,:),dip(:,:,:)
+      complex *16, allocatable :: charge(:,:,:),dip(:,:,:)
       complex *16, allocatable :: potl(:,:),gradl(:,:,:),zsum(:)
       complex *16, allocatable :: pottargl(:,:),gradtargl(:,:,:)
       
@@ -189,9 +189,9 @@ subroutine stfmm2d(nd, eps,
 
 c     allocate necessary arrays
       
-      allocate(charge(nd,nsource),dip(nd,2,nsource),
+      allocate(charge(nd,2,nsource),dip(nd,3,nsource),
      1     potl(nd,nsource),pottargl(nd,ntarg),
-     2     gradl(nd,2,nsource),gradtargl(nd,2,ntarg),stat=ier)
+     2     gradl(nd,3,nsource),gradtargl(nd,3,ntarg),stat=ier)
       allocate(zsum(nd))
       if(ier .ne. 0) then
          print *, "In stfmm2d: cannot allocate Laplace call storage"
@@ -209,15 +209,18 @@ subroutine stfmm2d(nd, eps,
 ccc$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,zd1,zd2) REDUCTION(+:zsum)
       do i = 1,nsource
          do j = 1,nd
-           charge(j,i) = 0
+           charge(j,1,i) = 0
+           charge(j,2,i) = 0
            dip(j,1,i) = 0
            dip(j,2,i) = 0
+           dip(j,3,i) = 0
          enddo
 
          if(ifstoklet.eq.1) then
            do j=1,nd
-             charge(j,i) = (-ima*stoklet(j,1,i) + stoklet(j,2,i))/4
-             zsum(j) = zsum(j) - charge(j,i)
+             charge(j,1,i) = (-ima*stoklet(j,1,i) + stoklet(j,2,i))/4
+             charge(j,2,i) = dconjg(charge(j,1,i))
+             zsum(j) = zsum(j) - charge(j,1,i)
            enddo
          endif
 
@@ -226,7 +229,8 @@ subroutine stfmm2d(nd, eps,
              zd1 = -(-strslet(j,2,i) + ima*strslet(j,1,i))/2
              zd2 =  (-strsvec(j,2,i) + ima*strsvec(j,1,i))
              dip(j,1,i) = -ima*zd1*zd2 
-             dip(j,2,i) =  ima*(zd1*dconjg(zd2) + dconjg(zd1)*zd2) 
+             dip(j,2,i) = -dconjg(dip(j,1,i))
+             dip(j,3,i) =  ima*(zd1*dconjg(zd2) + dconjg(zd1)*zd2) 
            enddo
          endif
       enddo
@@ -255,8 +259,8 @@ subroutine stfmm2d(nd, eps,
 C$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j)      
         do i=1,nsource
           do j=1,nd
-            pot(j,1,i) =  imag(potl(j,i)+zsum(j)+charge(j,i)) 
-            pot(j,2,i) = -real(potl(j,i)+zsum(j)+charge(j,i)) 
+            pot(j,1,i) =  imag(potl(j,i)+zsum(j)+charge(j,1,i)) 
+            pot(j,2,i) = -real(potl(j,i)+zsum(j)+charge(j,1,i)) 
           enddo
         enddo
 C$OMP END PARALLEL DO
@@ -276,10 +280,10 @@ subroutine stfmm2d(nd, eps,
 C$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j)
         do i=1,nsource
           do j=1,nd
-             grad(j,1,1,i) =  imag(gradl(j,2,i))
-             grad(j,2,2,i) = -imag(gradl(j,2,i))
-             grad(j,2,1,i) =  real(2*gradl(j,1,i)-gradl(j,2,i))
-             grad(j,1,2,i) = -real(2*gradl(j,1,i)+gradl(j,2,i))
+             grad(j,1,1,i) =  imag(gradl(j,3,i))
+             grad(j,2,2,i) = -imag(gradl(j,3,i))
+             grad(j,2,1,i) =  real(2*gradl(j,1,i)-gradl(j,3,i))
+             grad(j,1,2,i) = -real(2*gradl(j,1,i)+gradl(j,3,i))
           enddo
         enddo
 C$OMP END PARALLEL DO
@@ -313,12 +317,12 @@ subroutine stfmm2d(nd, eps,
 C$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j)
         do i=1,ntarg
           do j=1,nd
-             gradtarg(j,1,1,i) =  imag(gradtargl(j,2,i))
-             gradtarg(j,2,2,i) = -imag(gradtargl(j,2,i))
+             gradtarg(j,1,1,i) =  imag(gradtargl(j,3,i))
+             gradtarg(j,2,2,i) = -imag(gradtargl(j,3,i))
              gradtarg(j,2,1,i) =  real(2*gradtargl(j,1,i)-
-     1           gradtargl(j,2,i))
+     1           gradtargl(j,3,i))
              gradtarg(j,1,2,i) = -real(2*gradtargl(j,1,i)+
-     1           gradtargl(j,2,i))
+     1           gradtargl(j,3,i))
           enddo
         enddo
 C$OMP END PARALLEL DO
diff --git a/test/biharmonic/test_bhfmm2d.f b/test/biharmonic/test_bhfmm2d.f
index 7074211..cae53a9 100644
--- a/test/biharmonic/test_bhfmm2d.f
+++ b/test/biharmonic/test_bhfmm2d.f
@@ -1,6 +1,6 @@
       implicit real *8 (a-h,o-z)
       real *8, allocatable :: sources(:,:),targ(:,:)
-      complex *16, allocatable :: charges(:),dip(:,:)
+      complex *16, allocatable :: charges(:,:),dip(:,:)
       complex *16, allocatable :: pot(:),grad(:,:),hess(:,:)
       complex *16, allocatable :: pottarg(:),gradtarg(:,:)
       complex *16, allocatable :: hesstarg(:,:)
@@ -30,19 +30,21 @@
       nsrc = 9998
       ntarg = 9999
 
-      allocate(sources(2,nsrc),charges(nsrc),dip(2,nsrc))
+      allocate(sources(2,nsrc),charges(2,nsrc),dip(3,nsrc))
       allocate(targ(2,ntarg))
-      allocate(pot(nsrc),grad(2,nsrc),hess(3,nsrc))
-      allocate(pottarg(ntarg),gradtarg(2,ntarg),hesstarg(3,ntarg))
+      allocate(pot(nsrc),grad(3,nsrc),hess(3,nsrc))
+      allocate(pottarg(ntarg),gradtarg(3,ntarg),hesstarg(3,ntarg))
 
       do i=1,nsrc
 
          sources(1,i) = hkrand(0)
          sources(2,i) = hkrand(0)
 
-         charges(i) = hkrand(0) + ima*hkrand(0) 
+         charges(1,i) = hkrand(0) + ima*hkrand(0) 
+         charges(2,i) = hkrand(0) + ima*hkrand(0) 
          dip(1,i) = hkrand(0) + ima*hkrand(0)
          dip(2,i) = hkrand(0) + ima*hkrand(0)
+         dip(3,i) = hkrand(0) + ima*hkrand(0)
       enddo
 
 
@@ -54,12 +56,12 @@
       nts = min(20,nsrc)
       ntt = min(20,ntarg)
 
-      allocate(potex(nts),gradex(2,nts),hessex(3,nts))
-      allocate(pottargex(ntt),gradtargex(2,ntt),hesstargex(3,ntt))
+      allocate(potex(nts),gradex(3,nts),hessex(3,nts))
+      allocate(pottargex(ntt),gradtargex(3,ntt),hesstargex(3,ntt))
 
       eps = 0.5d-6
 
-      ifcharge = 1
+      ifcharge = 0
       ifdipole = 1
       iper = 0
       ifpgh = 2
@@ -76,8 +78,8 @@
 c
       call dzero(potex,2*nts)
       call dzero(pottargex,2*ntt)
-      call dzero(gradex,4*nts)
-      call dzero(gradtargex,4*ntt)
+      call dzero(gradex,6*nts)
+      call dzero(gradtargex,6*ntt)
 
       thresh = 1.0d-14
 
@@ -102,8 +104,8 @@
       if(ifpghtarg.ge.1) call derr(pottargex,pottarg,2*ntt,errpt)
 
 
-      if(ifpgh.ge.2) call derr(gradex,grad,4*nts,errgs)
-      if(ifpghtarg.ge.2) call derr(gradtargex,gradtarg,4*ntt,errgt)
+      if(ifpgh.ge.2) call derr(gradex,grad,6*nts,errgs)
+      if(ifpghtarg.ge.2) call derr(gradtargex,gradtarg,6*ntt,errgt)
 
       call errprintbh(errps,errgs,errhs,errpt,errgt,
      1  errht)
diff --git a/test/biharmonic/test_bhfmm2d.make b/test/biharmonic/test_bhfmm2d.make
index 2e2aa9e..48eef08 100755
--- a/test/biharmonic/test_bhfmm2d.make
+++ b/test/biharmonic/test_bhfmm2d.make
@@ -1,7 +1,7 @@
 PROJECT = int2-bhfmm2d
 
 HOST = gcc
-HOST = gcc-openmp
+#HOST = gcc-openmp
 
 # FC - fortran compiler
 # FFLAGS - fortran compiler flags
diff --git a/test/biharmonic/test_bhrouts2d.f b/test/biharmonic/test_bhrouts2d.f
index 5bb843f..5950b5f 100644
--- a/test/biharmonic/test_bhrouts2d.f
+++ b/test/biharmonic/test_bhrouts2d.f
@@ -40,13 +40,13 @@
       complex *16 w(lw)
       complex *16 w2(-lw:lw)
 c
-      complex *16 pot(nd),grad(nd,2)
-      complex *16 opot(nd),ograd(nd,2)
+      complex *16 pot(nd),grad(nd,3)
+      complex *16 opot(nd),ograd(nd,3)
       complex *16, allocatable :: mpolecd(:,:,:)
       complex *16, allocatable :: mpole2cd(:,:,:)
       complex *16, allocatable :: localcd(:,:,:)
       complex *16, allocatable :: local2cd(:,:,:)
-      complex *16, allocatable :: charge(:,:),dip(:,:,:)
+      complex *16, allocatable :: charge(:,:,:),dip(:,:,:)
       complex *16, allocatable :: dummy(:,:)
       complex *16 eye
 c
@@ -77,10 +77,11 @@
 c       create two charge sources.
 c
       ns = 2
-      allocate(charge(nd,ns),dip(nd,2,ns),dummy(nd,ns))
+      nt = 1
+      allocate(charge(nd,2,ns),dip(nd,3,ns),dummy(nd,ns))
       call mksource(nd,ns,source,charge,dip,dummy,c0)
-      call prin2(' charge is *',charge,2*nd*ns)
-      call prin2(' dip is *',dip,4*nd*ns)
+      call prin2(' charge is *',charge,4*nd*ns)
+      call prin2(' dip is *',dip,6*nd*ns)
 c
 c
 c       create center for shifted expansions
@@ -103,17 +104,15 @@
 c       direct calculation:
 c
       np = 1
-      ng = 2
+      ng = 3
       call zero_out(nd,np,opot)
       call zero_out(nd,ng,ograd)
 
       thresh = 1.0d-16
       call bh2d_directcdg(nd,source,ns,charge,dip,
-     1       ztrg,opot,ograd,thresh)
-        call prin2('Via direct calculation, potential is*',opot,2*nd)
-c
+     1       ztrg,nt,opot,ograd,thresh)
       call prin2('Via direct calculation, potential is*',opot,2*nd)
-      call prin2('Via direct calculation, grad is*',ograd,4*nd)
+      call prin2('Via direct calculation, grad is*',ograd,6*nd)
 c
 c
 c       create h-expansion:
@@ -162,8 +161,7 @@
         call prin2('Via mpole calculation, potential is*',pot,2*nd)
         call prin2('Via mpole calculation, grad is*',grad,2*nd*ng)
 
-c
-ccc	stop
+
 ccc	goto 8000
 c    mpmp shift
 c
@@ -280,41 +278,37 @@ subroutine bherrprintvec(nd,pot,opot,grada,ograda,gradaa,ogradaa)
       subroutine mksource(nd,ns,source,charge,dip,dummy,c0)
       implicit none
       integer nd,ns
-      real* 8 dd,source(2,ns),c0(2)
-      complex* 16 dip(nd,2,ns),charge(nd,ns),dummy(nd,ns)
+      real* 8 dd,source(2,ns),c0(2),hkrand
+      complex* 16 dip(nd,3,ns),charge(nd,2,ns),dummy(nd,ns)
       complex *16 eye
+      integer i,idim
 c
       data eye/(0.0d0,1.0d0)/
 c
-      dd = 1.0d-6
+      dd = 1.0d-3
       source(1,1)=c0(1)+0.24d0
       source(2,1)=c0(2)+0.25d0
-      charge(1,1)= (1.0d0 + eye*0.2d0)
-      dummy(1,1)= 0.0d0
-      dip(1,1,1)= charge(1,1)*dd
-      dip(1,1,1)= charge(1,1)
-      dip(1,2,1)= charge(1,1)*dd*eye
-      dip(1,2,1)= charge(1,1)*eye
 
-      charge(2,1)= (1.0d0 + eye*0.2d0)
-      dummy(2,1)= 0.0d0
-      dip(2,1,1)= charge(1,1)*dd
-      dip(2,1,1)= charge(1,1)
-      dip(2,2,1) = charge(1,1)*dd
-      dip(2,2,1) = charge(1,1)
-      source(1,2)=c0(1)+0.25d0
-      source(2,2)=c0(2)-0.25d0
       source(1,2)=source(1,1)+dd
       source(2,2)=source(2,1)
-c
-      charge(1,2)= -1.0d0 + eye*0.1d0
-      charge(2,2)= -1.0d0 + eye*0.1d0
-      dummy(1,2)= 0.0d0
-      dummy(2,2)= 0.0d0
-      dip(1,1,2)= 2.0d0
-      dip(2,1,2)= 2.0d0
-      dip(1,2,2)= 2.0d0 + 3.0d0*eye
-      dip(2,2,2)= 2.0d0 + 3.0d0*eye
+
+
+      do i=3,ns
+        source(1,i) = c0(1) -0.25d0 + hkrand(0)*0.5d0
+        source(2,i) = c0(2) -0.25d0 + hkrand(0)*0.5d0
+      enddo
+
+      do i=1,ns
+         do idim=1,nd
+            dummy(idim,i) = 0 
+            charge(idim,1,i) = hkrand(0)-0.5d0 + eye*(hkrand(0)-0.5d0)
+            charge(idim,2,i) = hkrand(0)-0.5d0 + eye*(hkrand(0)-0.5d0)
+
+            dip(idim,1,i) = hkrand(0)-0.5d0 + eye*(hkrand(0)-0.5d0)
+            dip(idim,2,i) = hkrand(0)-0.5d0 + eye*(hkrand(0)-0.5d0)
+            dip(idim,3,i) = hkrand(0)-0.5d0 + eye*(hkrand(0)-0.5d0)
+         enddo
+      enddo
 
       return
       end
diff --git a/test/stokes/test_stfmm2d.make b/test/stokes/test_stfmm2d.make
index 50a654d..2165bb0 100755
--- a/test/stokes/test_stfmm2d.make
+++ b/test/stokes/test_stfmm2d.make
@@ -3,17 +3,20 @@ PROJECT = int2-stfmm2d
 HOST = gcc
 HOST = gcc-openmp
 
+arch = native
+# arch = x86-64
+
 # FC - fortran compiler
 # FFLAGS - fortran compiler flags
 
 ifeq ($(HOST),gcc)
     FC=gfortran 
-    FFLAGS=-fPIC -O3 -funroll-loops -march=native -std=legacy 
+    FFLAGS=-fPIC -O3 -funroll-loops -march=${arch} -std=legacy 
 endif
 
 ifeq ($(HOST),gcc-openmp)
     FC = gfortran 
-    FFLAGS=-fPIC -O3 -funroll-loops -march=native -fopenmp -std=legacy
+    FFLAGS=-fPIC -O3 -funroll-loops -march=${arch} -fopenmp -std=legacy
 endif
 
 # Test objects