diff --git a/classifier/data/triangle_pos.py b/classifier/data/triangle_pos.py index d62c4a5..108d6d4 100644 --- a/classifier/data/triangle_pos.py +++ b/classifier/data/triangle_pos.py @@ -124,7 +124,7 @@ def make_sample(which_kind, which): else: kind = ConceptPosition.MORE_COMPOUND elif which_kind == 2 or which_kind == 4: - path2, extension = random_123_string(3, start_string=path, longer=True, return_extension=True, global_prefix=GLOBAL_PREFIX) + path2 = random_123_string(3, global_prefix=GLOBAL_PREFIX) file_dict2 = get_from_triangle(path2) sub_path, matching_entries = find_matching_entries(file_dict2, path2) _a, _b, _c, _d = matching_entries @@ -143,7 +143,7 @@ def make_sample(which_kind, which): if which == 4: X = d d = _d - kind = ConceptPosition.SUMMARIZING_CONCEPT + kind = ConceptPosition.LESS_COMPOUND elif which_kind == 3 or which_kind == 5 or which_kind== 6 or which_kind ==7: path2, extension = random_123_string(4, min_length=2 ,start_string=path, longer=True, return_extension=True, global_prefix=GLOBAL_PREFIX) diff --git a/classifier/data/weight_pos.py b/classifier/data/weight_pos.py new file mode 100644 index 0000000..e9cd493 --- /dev/null +++ b/classifier/data/weight_pos.py @@ -0,0 +1,37 @@ +from collections import Counter +from pprint import pprint + +from classifier.data.triangle_pos import data_path, yield_from_file, ConceptPosition +from lib.json import decode, encode + +print ("loading") +all_samples = [] +n = 0 +with open(data_path, 'r') as file: + lines = file.readlines() + for line in lines: + # Deserialize the JSON string back into a tuple + item = decode(line, ConceptPosition) + all_samples.append(item) + n += 1 +print ("loaded") +c_labels = Counter([_[0][1] for _ in all_samples]) +("counted") +print (c_labels) + +relative_prob = { + k: 1/(v/n) + for k, v in + c_labels.items() +} +pprint(relative_prob) + +a = sum(relative_prob.values()) +relative_prob = { + k.name: v/a + for k, v in + relative_prob.items() +} +pprint(relative_prob) + +print ([relative_prob.get(k, 0) for k in list(ConceptPosition.__members__)]) \ No newline at end of file diff --git a/classifier/model/som.py b/classifier/model/som.py index 792afa8..3317579 100644 --- a/classifier/model/som.py +++ b/classifier/model/som.py @@ -11,16 +11,16 @@ def generate_dummy_sequences(batch_size=64, seq_len=5, embedding_dim=128, num_cl class Som(nn.Module): - def __init__(self, embedding_dim, hidden_dim, output_dim, num_layers=1, bidirectional=True, dropout_rate=0.0): + def __init__(self, embedding_dim, hidden_dim, output_dim, num_layers=3, bidirectional=True, dropout_rate=0.2): super(Som, self).__init__() - self.conv1d = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1) + self.conv1d = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=5, padding=1) self.relu = nn.ReLU() self.bidirectional = bidirectional # Adjusting the hidden dimension if using a bidirectional GRU, as the outputs will be concatenated final_hidden_dim = hidden_dim * 2 if bidirectional else hidden_dim - self.gru = nn.GRU( - input_size=hidden_dim, + self.gru = nn.LSTM( + input_size=embedding_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True, @@ -33,10 +33,10 @@ def __init__(self, embedding_dim, hidden_dim, output_dim, num_layers=1, bidirect self.fc = nn.Linear(final_hidden_dim, output_dim) def forward(self, x): - x = x.transpose(1, 2) # Conv1D expects (batch, channels, seq_len) - x = self.conv1d(x) - x = self.relu(x) - x = x.transpose(1, 2) # Back to (batch, seq_len, channels) for GRU + #x = x.transpose(1, 2) # Conv1D expects (batch, channels, seq_len) + #x = self.conv1d(x) + #x = self.relu(x) + #x = x.transpose(1, 2) # Back to (batch, seq_len, channels) for GRU out, _ = self.gru(x) # If using a bidirectional GRU, out will contain concatenated hidden states from both directions @@ -96,9 +96,40 @@ def generate_sequences(data, sequence_length=5): output_size = 10 # Number of classes (based on the sum's decade) num_layers = 1 # Number of GRU layers - model = Som(input_size, hidden_size, output_size, num_layers) + # Parameters for the Transformer model + input_size = 28 * 28 # Flattened MNIST images + d_model = 512 # Size of the embedding + output_size = 10 # Number of classes (based on the sum's decade) + num_layers = 2 # Number of Transformer encoder layers + nhead = 8 # Number of heads in the multiheadattention models + dim_feedforward = 2048 # Size of the feedforward model in nn.TransformerEncoder + dropout = 0.1 # Dropout rate + + # Check if GPU is available + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Instantiate the model and move it to the device + model = TransformerModel(input_size, d_model, output_size, nhead, num_layers, dim_feedforward, dropout).to(device) + + # Remaining code for data loading and transformation + transform = transforms.Compose([transforms.ToTensor(), transforms.Lambda(lambda x: torch.flatten(x))]) + mnist_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform) + train_data, test_data = train_test_split(mnist_data, test_size=0.2, random_state=42) - # Model + # Generate sequences for training and testing + # Ensure that you move your tensors to the device where necessary + train_sequences, train_labels = generate_sequences(train_data, + 5) # Modify this function to move tensors to the device + test_sequences, test_labels = generate_sequences(test_data, 5) # Modify this function to move tensors to the device + + train_sequences, train_labels = train_sequences.to(device), train_labels.to(device) + test_sequences, test_labels = test_sequences.to(device), test_labels.to(device) + + # DataLoader + train_dataset = TensorDataset(train_sequences, train_labels) + train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) + + # Training loop criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) @@ -107,10 +138,12 @@ def generate_sequences(data, sequence_length=5): model.train() total_loss = 0 for sequences, labels in train_loader: + sequences, labels = sequences.to(device), labels.to(device) optimizer.zero_grad() outputs = model(sequences) loss = criterion(outputs, labels) loss.backward() optimizer.step() total_loss += loss.item() - print(f'Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader)}') \ No newline at end of file + print(f'Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader)}') + diff --git a/classifier/models/models.yml b/classifier/models/models.yml index f6e531d..d34ee55 100644 --- a/classifier/models/models.yml +++ b/classifier/models/models.yml @@ -27,7 +27,9 @@ models: som: <<: *som n_samples: 5 - n_classes: 12 + n_classes: 9 + loss_weights: [ 0, 0.15172247835473368, 0.1476857349332255, 0.14668625042352593, 0.07449548855387905, 0.14560698498534902, 0.1154368336454986, 0.11241477773875054, 0.10595145136503747 ] + # 0 - stay at higher concept # 1 - stay at 1 # 2 - stay at 2 @@ -38,15 +40,18 @@ models: # 7 - deeper at 2 # 8 - deeper at 3 - batch_size: 64 - batches_per_epoch: 170 + batch_size: 256 + batches_per_epoch: 200 embedding_dim: 4096 hidden_dim: 1024 + weight_decay: 0.000000001 + from_module: classifier.data.triangle_pos + f1: micro classes: diff --git a/classifier/train/ntuple.py b/classifier/train/ntuple.py index 4cb2e71..889048d 100644 --- a/classifier/train/ntuple.py +++ b/classifier/train/ntuple.py @@ -1,3 +1,4 @@ +import gc import logging import os @@ -49,11 +50,22 @@ def colorized_comparison(prefix, predicted_labels, gold_labels): ) def train(config_name): + #device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # Empty the CUDA cache + #torch.cuda.empty_cache() + + # Call the garbage collector to remove the objects from memory + gc.collect() + + # Verify the memory status (optional) + #print(torch.cuda.memory_summary()) # Provides a summary of CUDA memory usage config = get_model_config(config_name) - model = get_model(config) - optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01) - criterion = nn.CrossEntropyLoss() + model = get_model(config)#.to(device) + + optimizer = torch.optim.Adagrad(model.parameters(), lr=0.003, lr_decay=0.01) + + criterion = nn.CrossEntropyLoss(weight=None if not config.get("loss_weights") else torch.tensor(list(config.loss_weights))) data_gen = DataGenerator(config) @@ -63,7 +75,7 @@ def train(config_name): scheduler = CyclicLR( optimizer, mode="exp_range", - gamma=0.99, + gamma=0.999, base_lr=0, max_lr=0.006, step_size_up=config.batches_per_epoch * 0.7, @@ -129,7 +141,10 @@ def train(config_name): # Backward pass and optimizer step + loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) # Gradient clipping + optimizer.step() # Calculate F-score for training data using reshaped tensors @@ -157,7 +172,7 @@ def train(config_name): print( f"Epoch {epoch + 1}, {batch=}, {loss=}, {train_fscore=:.2f} {optimizer.param_groups[0]['lr']:.2E}" ) - if train_fscore > max_fscore and train_fscore > 0.5: + if train_fscore > max_fscore and train_fscore > 0.7: break avg_loss = total_loss / config.batches_per_epoch diff --git a/lib/config.py b/lib/config.py new file mode 100644 index 0000000..2b6da4d --- /dev/null +++ b/lib/config.py @@ -0,0 +1,3 @@ +import os + +system_path = os.environ.get("SYSTEM", "../dialectics") diff --git a/lib/git_tools.py b/lib/git_tools.py new file mode 100644 index 0000000..3e17c40 --- /dev/null +++ b/lib/git_tools.py @@ -0,0 +1,31 @@ +import subprocess # For running shell commands, 🚀🐚🚀 + # With Python's touch, so grand! 🌟🐍🌟 + +# A function so neat, a treat to repeat, 🍬🎶🍬 +def check_git_config(): # Let's take a seat! 🪑🌟🪑 + try: + # For email, we'll peek, with Python technique! 📧🔍📧 + email = subprocess.check_output( + ["git", "config", "--global", "user.email"], + text=True).strip() + # For name, the same, in this Git game! 🎮🔍🎮 + name = subprocess.check_output( + ["git", "config", "--global", "user.name"], + text=True).strip() + + # If found around, let joy resound! 🎉✨🎉 + if email and name: + print(f"Email found: {email}, 📧🌈📧\nName's around: {name}! 🌟👤🌟") + return True + else: + print("Some configs are missing, 🚫🤔🚫\nLet's keep on fishing! 🎣🌊🎣") + return False + except subprocess.CalledProcessError: + # If error's in sight, we'll set it right! 🚨🛠️🚨 + print("Git configs not found, 🚫🔍🚫\nIn silence they're bound. 🤫🌌🤫") + return None + + +if __name__ == "__main__": + # Now let's invoke, with a stroke of hope! 🌈🙏🌈 + check_git_config() \ No newline at end of file diff --git a/lib/json.py b/lib/json.py index 3cc8730..efb01bb 100644 --- a/lib/json.py +++ b/lib/json.py @@ -1,28 +1,27 @@ import json from enum import Enum - class EnumCodec(json.JSONEncoder): - def __init__(self, enum_type, *args, **kwargs): - self.enum_type = enum_type - super().__init__(*args, **kwargs) - def default(self, obj): if isinstance(obj, Enum): return {"__enum__": f"{obj.__class__.__name__}.{obj.name}"} - return super().default(obj) + else: + return super().default(obj) - @classmethod - def decode(cls, enum_type): - def decode_enum(dct): - if "__enum__" in dct: - enum_name, member_name = dct["__enum__"].split('.') - if enum_name == enum_type.__name__: - return enum_type[member_name] - return dct - return decode_enum + @staticmethod + def decode_enum(dct, enum_type=None): + if "__enum__" in dct: + enum_name, member_name = dct["__enum__"].split('.') + # Assuming enum_type is provided and matches enum_name + if enum_type and enum_type.__name__ == enum_name: + return enum_type[member_name] + return dct -def encode(enum_instance, enum_type): - return json.dumps(enum_instance, cls=EnumCodec, enum_type=enum_type) +def encode(data, enum_type=None): + # Convert enum keys to strings + if isinstance(data, dict): + data = {k.name if isinstance(k, Enum) else k: v for k, v in data.items()} + return json.dumps(data, cls=EnumCodec) def decode(json_str, enum_type): - return json.loads(json_str, object_hook=EnumCodec.decode(enum_type)) + object_hook = lambda dct: EnumCodec.decode_enum(dct, enum_type=enum_type) + return json.loads(json_str, object_hook=object_hook) \ No newline at end of file