Skip to content

Commit

Permalink
improved docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
Eduardo Leao authored and Eduardo Leao committed Jul 20, 2024
1 parent 0b1b696 commit 8025feb
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 92 deletions.
147 changes: 72 additions & 75 deletions src/layers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,24 +92,22 @@ export class Module implements ModuleInterface {

// Standard Layers:

/**
* Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
* @param {boolean} bias - wether to include a bias term.
* @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension).
*/
export class Linear extends Module {
public W: Tensor;
public b: Tensor;
public has_bias: boolean;

/**
* Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
* @param {boolean} bias - wether to include a bias term.
* @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension).
*/
constructor(in_size: number, out_size: number, device = 'cpu', bias = true, xavier = true) {
super();
this.W = randn([in_size, out_size], true, xavier);
this.W.device = device;
this.W = randn([in_size, out_size], true, device, xavier);
this.b = zeros([out_size], true);
this.has_bias = bias;
}
Expand All @@ -128,16 +126,6 @@ export class Linear extends Module {
}
}

/**
* Full transformer Layer implementation.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
* @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
* @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
*/
export class MultiHeadSelfAttention extends Module {
public Wk: Linear;
public Wq: Linear;
Expand All @@ -149,6 +137,16 @@ export class MultiHeadSelfAttention extends Module {
public softmax: Softmax;
public H: number;

/**
* Full transformer Layer implementation.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
* @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
* @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
*/
constructor(
in_size: number,
out_size: number,
Expand Down Expand Up @@ -221,21 +219,20 @@ export class MultiHeadSelfAttention extends Module {
}
}

/**
* Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
* @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
* @param {boolean} bias - wether to include a bias term.
*/
export class FullyConnected extends Module {
public l1: Linear;
public relu: ReLU;
public l2: Linear;
public dropout: Dropout;

/**
* Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
* @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
* @param {boolean} bias - wether to include a bias term.
*/
constructor(in_size: number, out_size: number, dropout_prob = 0, device: string = 'cpu', bias: boolean = true) {
super();

Expand All @@ -259,22 +256,22 @@ export class FullyConnected extends Module {
}
}

/**
* Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
* @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
* @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
*/
export class Block extends Module {
public att: MultiHeadSelfAttention;
public ln1: LayerNorm;
public fcc: FullyConnected;
public ln2: LayerNorm;

/**
* Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
* @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
* @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu".
*/
constructor(
in_size: number,
out_size: number,
Expand Down Expand Up @@ -313,18 +310,18 @@ export class Block extends Module {

// Embedding Layers

/**
* Embedding class, turns indexes into vectors.
*
* @param {number} in_size - number of different indexes (vocabulary size).
* @param {number} out_size - size of the embedding vector generated.
*/
export class Embedding extends Module {
public E: Tensor;

/**
* Embedding class, turns indexes into vectors.
*
* @param {number} in_size - number of different indexes (vocabulary size).
* @param {number} out_size - size of the embedding vector generated.
*/
constructor(in_size: number, embed_size: number) {
super();
this.E = randn([in_size, embed_size], true, false);
this.E = randn([in_size, embed_size], true, 'cpu', false);
}

/**
Expand All @@ -345,18 +342,18 @@ export class Embedding extends Module {
}
}

/**
* Embedding class, turns indexes into vectors.
*
* @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch).
* @param {number} embed_size - size of the embedding vector generated.
*/
export class PositionalEmbedding extends Module {
public E: Tensor;

/**
* Embedding class, turns indexes into vectors.
*
* @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch).
* @param {number} embed_size - size of the embedding vector generated.
*/
constructor(n_timesteps: number, embed_size: number) {
super();
this.E = randn([n_timesteps, embed_size], true, false);
this.E = randn([n_timesteps, embed_size], true, 'cpu', false);
}

/**
Expand All @@ -376,10 +373,10 @@ export class PositionalEmbedding extends Module {

// Non-linearity Layers:

/**
* Rectified Linear Unit nonlinearity. Returns z if z>0 else 0.
*/
export class ReLU extends Module {
/**
* Rectified Linear Unit nonlinearity. Returns z if z>0 else 0.
*/
constructor() {
super();
}
Expand Down Expand Up @@ -413,10 +410,10 @@ export class ReLU extends Module {
}
}

/**
* Softmax nonlinearity class. Returns distribution of values (sum=1).
*/
export class Softmax extends Module {
/**
* Softmax nonlinearity class. Returns distribution of values (sum=1).
*/
constructor() {
super();
}
Expand All @@ -436,14 +433,14 @@ export class Softmax extends Module {

// Regularization Layers:

/**
* Dropout class, added usually after other layers, to drop values to zero with given probability
*
* @param {number} drop_prob - probability to drop each value in input.
*/
export class Dropout extends Module {
public p: number;

/**
* Dropout class, added usually after other layers, to drop values to zero with given probability
*
* @param {number} drop_prob - probability to drop each value in input.
*/
constructor(drop_prob: number) {
super();
this.p = drop_prob;
Expand Down Expand Up @@ -473,15 +470,15 @@ export class Dropout extends Module {
}
}

/**
* Layer Norm class, added usually after other layers to normalize across all of the output.
*
* @param {number} n_embed - size of the last dimention of the input.
*/
export class LayerNorm extends Module {
public gamma: Tensor;
public beta: Tensor;

/**
* Layer Norm class, added usually after other layers to normalize across all of the output.
*
* @param {number} n_embed - size of the last dimention of the input.
*/
constructor(n_embed: number) {
super();
this.gamma = ones([n_embed], true);
Expand All @@ -498,10 +495,10 @@ export class LayerNorm extends Module {

// Loss layers:

/**
* Cross Entropy Loss class, returns the loss given the output and the expected indexes.
*/
export class CrossEntropyLoss extends Module {
/**
* Cross Entropy Loss class, returns the loss given the output and the expected indexes.
*/
constructor() {
super();
}
Expand Down
18 changes: 9 additions & 9 deletions src/optim.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
import { Parameter, Tensor, zeros } from "./tensor";

/**
* Adam optimizer class.
* @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form.
* @param {number} lr - Scalar multiplying each learning step, controls speed of learning.
* @param {number} reg - Scalar controling strength l2 regularization.
* @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes.
* @param {number} eps - Scalar added to denominator to stop it from ever going to zero.
*/
export class Adam {
// Declare Adam's types:
params: (Parameter | Tensor)[];
Expand All @@ -16,7 +8,15 @@ export class Adam {
b1: number;
b2: number;
eps: number;


/**
* Adam optimizer class.
* @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form.
* @param {number} lr - Scalar multiplying each learning step, controls speed of learning.
* @param {number} reg - Scalar controling strength l2 regularization.
* @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes.
* @param {number} eps - Scalar added to denominator to stop it from ever going to zero.
*/
constructor(
params: (Parameter | Tensor)[],
lr = 1e-3,
Expand Down
14 changes: 6 additions & 8 deletions src/tensor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -398,11 +398,11 @@ export class Tensor {

// <<< Parameter class, tensor that always tracks gradients >>> //

/**
* Creates new Parameter (an instance of the Tensor class that always tracks gradients).
* @param {object} data - Iterable containing the data to be stored in the Tensor.
*/
export class Parameter extends Tensor {
/**
* Creates new Parameter (an instance of the Tensor class that always tracks gradients).
* @param {object} data - Iterable containing the data to be stored in the Tensor.
*/
constructor(data: Array<any> | number) {
super(data, true);
}
Expand Down Expand Up @@ -681,8 +681,6 @@ class MatMul {
}
}
}
// ================================ NUEVO ================================ //
// ================================ NUEVO ================================ //

export class Pow {
cache: any;
Expand Down Expand Up @@ -1956,8 +1954,8 @@ export function rand(shape: Array<number>, requires_grad = false, device = 'cpu'
export function randn(
shape: Array<number>,
requires_grad = false,
xavier = false,
device = 'cpu'
device = 'cpu',
xavier = false
): Tensor {
return new Tensor(
_tensorInitializer(shape, () => {
Expand Down

0 comments on commit 8025feb

Please sign in to comment.