diff --git a/src/layers.ts b/src/layers.ts index b3edd24..9f80795 100644 --- a/src/layers.ts +++ b/src/layers.ts @@ -92,24 +92,22 @@ export class Module implements ModuleInterface { // Standard Layers: -/** - * Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity. - * - * @param {number} in_size - size of the last dimention of the input array. - * @param {number} out_size - size of the last dimention of the output array. - * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu". - * @param {boolean} bias - wether to include a bias term. - * @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension). - */ export class Linear extends Module { public W: Tensor; public b: Tensor; public has_bias: boolean; - + /** + * Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity. + * + * @param {number} in_size - size of the last dimention of the input array. + * @param {number} out_size - size of the last dimention of the output array. + * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu". + * @param {boolean} bias - wether to include a bias term. + * @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension). + */ constructor(in_size: number, out_size: number, device = 'cpu', bias = true, xavier = true) { super(); - this.W = randn([in_size, out_size], true, xavier); - this.W.device = device; + this.W = randn([in_size, out_size], true, device, xavier); this.b = zeros([out_size], true); this.has_bias = bias; } @@ -128,16 +126,6 @@ export class Linear extends Module { } } -/** - * Full transformer Layer implementation. - * - * @param {number} in_size - size of the last dimention of the input array. - * @param {number} out_size - size of the last dimention of the output array. - * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size). - * @param {number} n_timesteps - length of text sequence to be processed bt Transformer. - * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. - * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu". - */ export class MultiHeadSelfAttention extends Module { public Wk: Linear; public Wq: Linear; @@ -149,6 +137,16 @@ export class MultiHeadSelfAttention extends Module { public softmax: Softmax; public H: number; + /** + * Full transformer Layer implementation. + * + * @param {number} in_size - size of the last dimention of the input array. + * @param {number} out_size - size of the last dimention of the output array. + * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size). + * @param {number} n_timesteps - length of text sequence to be processed bt Transformer. + * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. + * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu". + */ constructor( in_size: number, out_size: number, @@ -221,21 +219,20 @@ export class MultiHeadSelfAttention extends Module { } } -/** - * Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer. - * - * @param {number} in_size - size of the last dimention of the input array. - * @param {number} out_size - size of the last dimention of the output array. - * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. - * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu". - * @param {boolean} bias - wether to include a bias term. - */ export class FullyConnected extends Module { public l1: Linear; public relu: ReLU; public l2: Linear; public dropout: Dropout; - + /** + * Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer. + * + * @param {number} in_size - size of the last dimention of the input array. + * @param {number} out_size - size of the last dimention of the output array. + * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. + * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu". + * @param {boolean} bias - wether to include a bias term. + */ constructor(in_size: number, out_size: number, dropout_prob = 0, device: string = 'cpu', bias: boolean = true) { super(); @@ -259,22 +256,22 @@ export class FullyConnected extends Module { } } -/** - * Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms. - * - * @param {number} in_size - size of the last dimention of the input array. - * @param {number} out_size - size of the last dimention of the output array. - * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size). - * @param {number} n_timesteps - length of text sequence to be processed bt Transformer. - * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. - * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu". - */ export class Block extends Module { public att: MultiHeadSelfAttention; public ln1: LayerNorm; public fcc: FullyConnected; public ln2: LayerNorm; + /** + * Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms. + * + * @param {number} in_size - size of the last dimention of the input array. + * @param {number} out_size - size of the last dimention of the output array. + * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size). + * @param {number} n_timesteps - length of text sequence to be processed bt Transformer. + * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. + * @param {string} device - Device to perform Tensor operations. Either "gpu" or "cpu". + */ constructor( in_size: number, out_size: number, @@ -313,18 +310,18 @@ export class Block extends Module { // Embedding Layers -/** - * Embedding class, turns indexes into vectors. - * - * @param {number} in_size - number of different indexes (vocabulary size). - * @param {number} out_size - size of the embedding vector generated. - */ export class Embedding extends Module { public E: Tensor; + /** + * Embedding class, turns indexes into vectors. + * + * @param {number} in_size - number of different indexes (vocabulary size). + * @param {number} out_size - size of the embedding vector generated. + */ constructor(in_size: number, embed_size: number) { super(); - this.E = randn([in_size, embed_size], true, false); + this.E = randn([in_size, embed_size], true, 'cpu', false); } /** @@ -345,18 +342,18 @@ export class Embedding extends Module { } } -/** - * Embedding class, turns indexes into vectors. - * - * @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch). - * @param {number} embed_size - size of the embedding vector generated. - */ export class PositionalEmbedding extends Module { public E: Tensor; + /** + * Embedding class, turns indexes into vectors. + * + * @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch). + * @param {number} embed_size - size of the embedding vector generated. + */ constructor(n_timesteps: number, embed_size: number) { super(); - this.E = randn([n_timesteps, embed_size], true, false); + this.E = randn([n_timesteps, embed_size], true, 'cpu', false); } /** @@ -376,10 +373,10 @@ export class PositionalEmbedding extends Module { // Non-linearity Layers: -/** - * Rectified Linear Unit nonlinearity. Returns z if z>0 else 0. - */ export class ReLU extends Module { + /** + * Rectified Linear Unit nonlinearity. Returns z if z>0 else 0. + */ constructor() { super(); } @@ -413,10 +410,10 @@ export class ReLU extends Module { } } -/** - * Softmax nonlinearity class. Returns distribution of values (sum=1). - */ export class Softmax extends Module { + /** + * Softmax nonlinearity class. Returns distribution of values (sum=1). + */ constructor() { super(); } @@ -436,14 +433,14 @@ export class Softmax extends Module { // Regularization Layers: -/** - * Dropout class, added usually after other layers, to drop values to zero with given probability - * - * @param {number} drop_prob - probability to drop each value in input. - */ export class Dropout extends Module { public p: number; + /** + * Dropout class, added usually after other layers, to drop values to zero with given probability + * + * @param {number} drop_prob - probability to drop each value in input. + */ constructor(drop_prob: number) { super(); this.p = drop_prob; @@ -473,15 +470,15 @@ export class Dropout extends Module { } } -/** - * Layer Norm class, added usually after other layers to normalize across all of the output. - * - * @param {number} n_embed - size of the last dimention of the input. - */ export class LayerNorm extends Module { public gamma: Tensor; public beta: Tensor; + /** + * Layer Norm class, added usually after other layers to normalize across all of the output. + * + * @param {number} n_embed - size of the last dimention of the input. + */ constructor(n_embed: number) { super(); this.gamma = ones([n_embed], true); @@ -498,10 +495,10 @@ export class LayerNorm extends Module { // Loss layers: -/** - * Cross Entropy Loss class, returns the loss given the output and the expected indexes. - */ export class CrossEntropyLoss extends Module { + /** + * Cross Entropy Loss class, returns the loss given the output and the expected indexes. + */ constructor() { super(); } diff --git a/src/optim.ts b/src/optim.ts index 9687d5a..37aeb90 100644 --- a/src/optim.ts +++ b/src/optim.ts @@ -1,13 +1,5 @@ import { Parameter, Tensor, zeros } from "./tensor"; -/** - * Adam optimizer class. - * @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form. - * @param {number} lr - Scalar multiplying each learning step, controls speed of learning. - * @param {number} reg - Scalar controling strength l2 regularization. - * @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes. - * @param {number} eps - Scalar added to denominator to stop it from ever going to zero. - */ export class Adam { // Declare Adam's types: params: (Parameter | Tensor)[]; @@ -16,7 +8,15 @@ export class Adam { b1: number; b2: number; eps: number; - + + /** + * Adam optimizer class. + * @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form. + * @param {number} lr - Scalar multiplying each learning step, controls speed of learning. + * @param {number} reg - Scalar controling strength l2 regularization. + * @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes. + * @param {number} eps - Scalar added to denominator to stop it from ever going to zero. + */ constructor( params: (Parameter | Tensor)[], lr = 1e-3, diff --git a/src/tensor.ts b/src/tensor.ts index e19bcf6..698d5e2 100644 --- a/src/tensor.ts +++ b/src/tensor.ts @@ -398,11 +398,11 @@ export class Tensor { // <<< Parameter class, tensor that always tracks gradients >>> // -/** - * Creates new Parameter (an instance of the Tensor class that always tracks gradients). - * @param {object} data - Iterable containing the data to be stored in the Tensor. - */ export class Parameter extends Tensor { + /** + * Creates new Parameter (an instance of the Tensor class that always tracks gradients). + * @param {object} data - Iterable containing the data to be stored in the Tensor. + */ constructor(data: Array | number) { super(data, true); } @@ -681,8 +681,6 @@ class MatMul { } } } -// ================================ NUEVO ================================ // -// ================================ NUEVO ================================ // export class Pow { cache: any; @@ -1956,8 +1954,8 @@ export function rand(shape: Array, requires_grad = false, device = 'cpu' export function randn( shape: Array, requires_grad = false, - xavier = false, - device = 'cpu' + device = 'cpu', + xavier = false ): Tensor { return new Tensor( _tensorInitializer(shape, () => {