Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Jyotish committed May 4, 2019
0 parents commit 5facb9f
Show file tree
Hide file tree
Showing 5 changed files with 273 additions and 0 deletions.
34 changes: 34 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# PEGASOS SVM

## Setting up

- Run `pip install -r requirements.txt` to install required packages.
- Download and extract the MNIST fashion dataset files to `data` directory.

## Running the code

The working directory should be `src`.

### Without kernel

```
python svm.py --dataset_dir ../data --iterations 10000
```

### With kernel
```
python svm.py --dataset_dir ../data --iterations 2 --kernel
```

## Bonus

### Without kernel

```
python svm-multiclass.py --dataset_dir ../data --iterations 10000
```

### With kernel
```
python svm-multiclass.py --dataset_dir ../data --iterations 2 --kernel
```
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
numpy==1.16.3
python-mnist==0.6
33 changes: 33 additions & 0 deletions src/pegasos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python

import math
import numpy as np
from random import randint

def pegasos(x, y, weights=None, iterations=2000, lam=1):
if type(weights) == type(None): weights = np.zeros(x[0].shape)
num_S = len(y)
for i in range(iterations):
it = randint(0, num_S-1)
step = 1/(lam*(i+1))
decision = y[it] * weights @ x[it].T
if decision < 1:
weights = (1 - step*lam) * weights + step*y[it]*x[it]
else:
weights = (1 - step*lam) * weights
#weights = min(1, (1/math.sqrt(lam))/(np.linalg.norm(weights)))*weights
return weights

def kernelized_pegasos(x, y, kernel, weights=None, iterations=2000, lam=1):
num_S = len(y)
if type(weights) == type(None): weights = np.zeros(num_S)
for _ in range(iterations):
it = randint(0, num_S)
decision = 0
for j in range(num_S):
decision += weights[j] * y[it] * kernel(x[it], x[j])
decision *= y[it]/lam
if decision < 1:
weights[it] += 1
return weights

101 changes: 101 additions & 0 deletions src/svm-multiclass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/usr/bin/env python

import os
import sys
import argparse
import numpy as np
from mnist import MNIST
import random

from pegasos import *

class Dataset():

def __init__(self, data_dir, labels_to_load=[0,1]):
self.labels_to_load = labels_to_load
self.mnist_loader = MNIST(data_dir)
print('Loading dataset...')

self.xtrain, self.ytrain = self.mnist_loader.load_training()
self.xtrain = np.array(self.xtrain, dtype=np.float64)
self.ytrain = np.array(self.ytrain, dtype=np.float64)

self.xtest, self.ytest = self.mnist_loader.load_testing()
self.xtest = np.array(self.xtest, dtype=np.float64)
self.ytest = np.array(self.ytest, dtype=np.float64)
print('Dataset loaded')

def send_data(self, id):
dataset = {
'data': [],
'labels': []
}

for i in range(len(self.ytrain)):
dataset['data'].append(self.xtrain[i])
if self.ytrain[i] == id:
dataset['labels'].append(1)
else:
dataset['labels'].append(-1)
dataset['data'] = np.array(dataset['data'])
dataset['labels'] = np.array(dataset['labels'])
return dataset

def kernel_function(x, y):
mean = np.linalg.norm(x - y)**2
variance = 1
return np.exp(-mean/(2*variance))

def parse_arguments():
# args
parser = argparse.ArgumentParser(description='')
parser.add_argument('--dataset_dir', required=True)
parser.add_argument('--iterations', type=int, default=10)
parser.add_argument('--kernel', default=False, action='store_true')
parser.add_argument('--lambda', default=1, type=float)
return parser.parse_args()

def main():
args = parse_arguments()
data = Dataset(args.dataset_dir)
class_weights = []

for i in range(10):
if args.kernel:
print('Using RBF kernel')
dataset = data.send_data(i)
class_weights.append(kernelized_pegasos(
x=dataset['data'],
y=dataset['labels'],
kernel=kernel_function,
iterations=args.iterations
))
else:
dataset = data.send_data(i)
class_weights.append(pegasos(
x=dataset['data'],
y=dataset['labels'],
iterations=args.iterations
))

# Testing
errors = 0
for i in range(len(data.ytest)):
predictions = []
for k in range(10):
weights = class_weights[k]
if args.kernel:
decision = 0
for j in range(len(data.ytrain)):
decision += weights[j]*data.ytrain[j]*kernel_function(data.xtrain[j], data.xtest[i])
else:
decision = weights @ data.xtest[i].T
predictions.append(decision)
predictions = np.array(predictions)
class_label = predictions.argmax()
if class_label != data.ytest[i]: errors += 1
accuracy = 1 - errors/len(data.ytest)
print('Error:', errors/len(data.ytest))
print('Accuracy:', accuracy)

main()
103 changes: 103 additions & 0 deletions src/svm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/usr/bin/env python

import os
import sys
import argparse
import numpy as np
from mnist import MNIST

from pegasos import *

class Dataset():

def __init__(self, data_dir, labels_to_load=[0,1]):
self.labels_to_load = labels_to_load
self.mnist_loader = MNIST(data_dir)
print('Loading dataset...')

self.xtrain, self.ytrain = self.mnist_loader.load_training()
self.xtrain = np.array(self.xtrain, dtype=np.float64)
self.ytrain = np.array(self.ytrain, dtype=np.float64)
self.xtrain, self.ytrain = self.trim_dataset(self.xtrain, self.ytrain)

self.xtest, self.ytest = self.mnist_loader.load_testing()
self.xtest = np.array(self.xtest, dtype=np.float64)
self.ytest = np.array(self.ytest, dtype=np.float64)
self.xtest, self.ytest = self.trim_dataset(self.xtest, self.ytest)
print('Dataset loaded')

def trim_dataset(self, x, y):
xtrain = []
ytrain = []
for i in range(len(y)):
if y[i] == 0:
ytrain.append(-1)
xtrain.append(x[i])
elif y[i] == 1:
ytrain.append(1)
xtrain.append(x[i])
else:
pass
return np.array(xtrain), np.array(ytrain)

def kernel_function(x, y):
mean = np.linalg.norm(x - y)**2
variance = 1
return np.exp(-mean/(2*variance))

def parse_arguments():
# args
parser = argparse.ArgumentParser(description='')
parser.add_argument('--dataset_dir', required=True)
parser.add_argument('--iterations', type=int, default=10)
parser.add_argument('--kernel', default=False, action='store_true')
parser.add_argument('--lambda', default=1, type=float)
return parser.parse_args()

def kernelized_svm(args, data):
weights = kernelized_pegasos(
x=data.xtrain,
y=data.ytrain,
kernel=kernel_function,
iterations=args.iterations
)
errors = 0
for i in range(len(data.ytest[:500])):
decision = 0
for j in range(len(data.ytrain)):
decision += weights[j]*data.ytrain[j]*kernel_function(data.xtrain[j], data.xtest[i])
if decision < 0:
prediction = -1
else:
prediction = 1
if prediction != data.ytest[i]: errors += 1
return 1 - errors/len(data.ytest)

def svm(args, data):
weights = pegasos(
x=data.xtrain,
y=data.ytrain,
iterations=args.iterations
)
errors = 0
for i in range(len(data.ytest)):
decision = weights @ data.xtest[i].T
if decision < 0:
prediction = -1
else:
prediction = 1
if prediction != data.ytest[i]: errors += 1
return 1 - errors/len(data.ytest)

def main():
args = parse_arguments()
data = Dataset(args.dataset_dir)

if args.kernel:
print('Using RBF kernel')
accuracy = kernelized_svm(args, data)
else:
accuracy = svm(args, data)
print('Accuracy:', accuracy)

main()

0 comments on commit 5facb9f

Please sign in to comment.