Skip to content

Commit

Permalink
improved diagnostic plots, standardised residuals, improved computati…
Browse files Browse the repository at this point in the history
…on of standard errors
  • Loading branch information
darrenjw committed Jun 5, 2017
1 parent 4ddc9e0 commit 86d7b28
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 27 deletions.
7 changes: 1 addition & 6 deletions src/main/scala/scalaglm/Glm.scala
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,10 @@ case class Glm(y: DenseVector[Double],
*/
lazy val ri = inv(r)

/**
* Inverse of the final X'WX matrix
*/
lazy val xtwxi = ri * (ri.t)

/**
* Standard errors for the regression coefficients
*/
lazy val se = breeze.numerics.sqrt(diag(xtwxi))
lazy val se = norm(ri(*, ::))

/**
* z-statistics for the regression coefficients
Expand Down
50 changes: 34 additions & 16 deletions src/main/scala/scalaglm/Lm.scala
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,10 @@ case class Lm(y: DenseVector[Double],
*/
lazy val ri = inv(r)

/**
* The inverse of X'X (but calculated efficiently)
*/
lazy val xtxi = ri * (ri.t)

/**
* Standard errors for the regression coefficients
*/
lazy val se = breeze.numerics.sqrt(diag(xtxi)) * rse
lazy val se = norm(ri(*,::)) * rse

/**
* t-statistics for the regression coefficients
Expand Down Expand Up @@ -212,25 +207,48 @@ case class Lm(y: DenseVector[Double],
import breeze.plot._
def plots: Figure = {
val fig = Figure("Linear regression diagnostics")
val p0 = fig.subplot(2,2,0)
val p0 = fig.subplot(2,2,0) // Obs against fitted
p0 += plot(fitted,y,'+')
p0 += plot(fitted,fitted)
p0.title = "Observations against fitted values"
p0.xlabel = "Fitted value"
p0.ylabel = "Observation"
val p1 = fig.subplot(2,2,1)
p1 += plot(fitted,residuals,'+')
p1.title = "Residuals against fitted values"
val p1 = fig.subplot(2,2,1) // Studentised against fitted
p1 += plot(fitted,studentised,'+')
p1 += plot(fitted,DenseVector.fill(n)(0.0))
p1.title = "Studentised residuals against fitted values"
p1.xlabel = "Fitted value"
p1.ylabel = "Residual"
val p2 = fig.subplot(2,2,2)
p2 += breeze.plot.hist(residuals)
p2.title = "Histogram of residuals"
p2.xlabel = "Residual"
// TODO: Add a Q-Q plot..
p1.ylabel = "Studentised residual"
val p2 = fig.subplot(2,2,2) // Histogram of studentised
p2 += breeze.plot.hist(studentised)
p2.title = "Histogram of studentised residuals"
p2.xlabel = "Studentised residual"
p2.ylabel = "Frequency"
val p3 = fig.subplot(2,2,3) // Residual Q-Q plot
val sorted = studentised.toArray.sorted
val grid = linspace(1.0/n,1.0-1.0/n,n)
import breeze.stats.distributions.Gaussian
val quantiles = grid map {Gaussian(0.0,1.0).inverseCdf(_)}
p3 += plot(quantiles, sorted)
p3 += plot(quantiles, quantiles)
p3.title = "Residual Q-Q plot"
p3.xlabel = "Standard normal quantiles"
p3.ylabel = "Studentised residuals"
fig
}

/**
* Square root of the leverage vector
*/
lazy val sh = norm(q(*,::))

/**
* Vector containing the leverages (diagonal of the hat matrix)
*/
lazy val h = sh * sh

import breeze.numerics.sqrt
lazy val studentised = residuals / sqrt(1.0 - h) / rse

} // case class Lm

Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/scalaglm/Predict.scala
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ case class PredictLm(mod: Lm, newX: DenseMatrix[Double]) extends Predict {
/**
* for internal use (probably should be marked private)
*/
lazy val rtix = nX * mod.ri
lazy val xrti = nX * mod.ri

/**
* standard errors for the predictions
*/
lazy val se = norm(rtix(*, ::)) * mod.rse
lazy val se = norm(xrti(*, ::)) * mod.rse
} // case class PredictLm


Expand Down
8 changes: 5 additions & 3 deletions src/main/scala/scalaglm/Utils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,11 @@ object Utils {
if (i == j) {
pij += hist(mat(::,i))
pij.title = names(i)
pij.xlabel = i.toString
pij.xlabel = names(i)
} else {
pij += plot(mat(::,j),mat(::,i),'.')
pij.xlabel = j.toString
pij.ylabel = i.toString
pij.xlabel = names(j)
pij.ylabel = names(i)
}
}
}
Expand Down Expand Up @@ -183,10 +183,12 @@ object Utils {
// read the file from disk
val mat = csvread(new java.io.File(fileName))
println("Dim: " + mat.rows + " " + mat.cols)
Utils.pairs(mat, List("Freq", "Angle", "Chord", "Velo", "Thick", "Sound"))
val y = mat(::, 5) // response is the final column
val X = mat(::, 0 to 4)
val mod = Lm(y, X, List("Freq", "Angle", "Chord", "Velo", "Thick"))
mod.summary
mod.plots

// test without name list
Lm(y,X,false).summary
Expand Down
2 changes: 2 additions & 0 deletions src/test/scala/lm-tests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ class LmSpec extends FlatSpec {
assert(norm(mod.p - rP) <= 0.00001)
val rF = DenseVector[Double](R.evalD1("mod$fitted.values"))
assert(norm(mod.fitted - rF) <= 0.0001)
val rStud = DenseVector[Double](R.evalD1("rstandard(mod)"))
assert(norm(mod.studentised - rStud) <= 0.0001)
val rPred = DenseVector[Double](R.evalD1("predict(mod)"))
assert(norm(mod.predict().fitted - rPred) <= 0.0001)
val rPredSe = DenseVector[Double](R.evalD1("predict(mod,se.fit=TRUE)$se.fit"))
Expand Down

0 comments on commit 86d7b28

Please sign in to comment.