Skip to content

Commit

Permalink
Merge pull request #17 from Hi-Folks/features/14-linear-regression
Browse files Browse the repository at this point in the history
Linear Regression
  • Loading branch information
roberto-butti authored Feb 22, 2022
2 parents 619c94b + 9edfb1c commit 09d0de6
Show file tree
Hide file tree
Showing 6 changed files with 653 additions and 0 deletions.
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Stat class has methods to calculate an average or typical value from a populatio
- harmonicMean(): harmonic mean;
- correlation(): the Pearson’s correlation coefficient for two inputs;
- covariance(): the sample covariance of two inputs.
- linearRegression():

#### Stat::mean( array $data )
Return the sample arithmetic mean of the array _$data_.
Expand Down Expand Up @@ -243,6 +244,27 @@ $correlation = Stat::correlation(
// -1.0
```

#### Stat::linearRegression ( array $x , array $y )
Return the slope and intercept of simple linear regression parameters estimated using ordinary least squares.
Simple linear regression describes relationship between an independent variable *$x* and a dependent variable *$y* in terms of linear function.

```php
$years = [1971, 1975, 1979, 1982, 1983];
$films_total = [1, 2, 3, 4, 5]
list($slope, $intercept) = Stat::linearRegression(
$years,
$films_total
);
// 0.31
// -610.18
```
What happens in 2022, according to the samples above?

```php
round($slope * 2022 + $intercept);
// 17.0
```

### Freq class
With *Statistics* package you can calculate frequency table.
A frequency table is list the frequency of various outcomes in a sample.
Expand Down
6 changes: 6 additions & 0 deletions examples/stat_methods.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@
// 1.25
$variance = Stat::variance([2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]);
// 1.3720238095238095
list($slope, $intercept) = Stat::linearRegression(
[1971, 1975, 1979, 1982, 1983],
[1, 2, 3, 4, 5]
);
// 0.31
// -610.18


try {
Expand Down
43 changes: 43 additions & 0 deletions src/Stat.php
Original file line number Diff line number Diff line change
Expand Up @@ -438,4 +438,47 @@ public static function correlation(array $x, array $y): false|float

return $a / $b;
}

/**
* @param array<int|float> $x
* @param array<int|float> $y
* @throws InvalidDataInputException if 2 arrays have different size,
* or if the length of arrays are < 2, or if the 2 input arrays has not numeric elements,
* or if the elements of the array are constants
* @return array<int|float>
*/
public static function linearRegression(array $x, array $y): array
{
$countX = count($x);
$countY = count($y);
if ($countX != $countY) {
throw new InvalidDataInputException(
'Linear regression requires that both inputs have same number of data points.'
);
}
if ($countX < 2) {
throw new InvalidDataInputException(
'Linear regression requires at least two data points.'
);
}
$sumX = array_sum($x);
$sumY = array_sum($y);
$sumXX = 0;
$sumXY = 0;

foreach ($x as $key => $value) {
$sumXY += ($value * $y[$key]);
$sumXX += ($value * $value);
}
$denominator = (($countX * $sumXX) - ($sumX * $sumX));
if ($denominator === 0) {
throw new InvalidDataInputException(
'Linear regression, the inputs is constant.'
);
}
$slope = (($countX * $sumXY) - ($sumX * $sumY)) / $denominator;
$intercept = ($sumY - ($slope * $sumX)) / $countX;

return [$slope, $intercept];
}
}
36 changes: 36 additions & 0 deletions tests/StatFromCsvTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?php


use HiFolks\Statistics\Stat;

it('parse CSV', function () {
$row = 0;

if (($handle = fopen(getcwd()."/tests/data/income.data.csv", "r")) !== false) {
$x = [];
$y = [];
while (($data = fgetcsv($handle, 1000, ",")) !== false) {
$num = count($data);
expect($num)->toEqual(3);
$row++;
if ($row === 1) {
continue;
}
$income = floatval($data[1]);
$x[] = $income;
$happiness = floatval($data[2]);
$y[] = $happiness;
expect($income)->toBeFloat();
expect($income)->toBeGreaterThan(0);
expect($happiness)->toBeFloat();
}
list($slope, $intercept) = Stat::linearRegression($x, $y);
expect(round($slope, 5))->toEqual(0.71383);
expect(round($intercept, 5))->toEqual(0.20427);
//expect(round(Stat::median($x), 5))->toEqual(0);

fclose($handle);
}

expect($row)->toEqual(499);
});
47 changes: 47 additions & 0 deletions tests/StatTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -318,3 +318,50 @@
)
)->toThrow(InvalidDataInputException::class);
});

it('calculates linear regression (static)', function () {
list($slope, $intercept) = Stat::linearRegression(
[1971, 1975, 1979, 1982, 1983],
[1, 2, 3, 4, 5]
);
expect($slope)->toBeFloat();
expect($slope)->toEqual(0.31);

expect($intercept)->toBeFloat();
expect($intercept)->toEqual(-610.18);

list($slope, $intercept) = Stat::linearRegression(
[1971, 1975, 1979, 1982, 1983],
[1, 2, 1, 3, 1]
);
expect($slope)->toBeFloat();
expect($slope)->toEqual(0.05);

expect($intercept)->toBeFloat();
expect($intercept)->toEqual(-97.3);

expect(round($slope * 2019 + $intercept))->toEqual(4);
});

it('calculates linear regression with not valid input (static)', function () {
expect(
fn () => Stat::linearRegression(
[3],
[2]
)
)->toThrow(InvalidDataInputException::class);

expect(
fn () => Stat::linearRegression(
[3,3,3,3],
[2,1,1,1,1]
)
)->toThrow(InvalidDataInputException::class);

expect(
fn () => Stat::linearRegression(
[3,3,3,3,3],
[1,1,1,1,1]
)
)->toThrow(InvalidDataInputException::class);
});
Loading

0 comments on commit 09d0de6

Please sign in to comment.