Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make project usable by other python projects: remove git lfs and move files into an audioclip folder #7

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .gitattributes

This file was deleted.

3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
assets/
.ipynb_checkpoints/
__pycache__/
3 changes: 0 additions & 3 deletions assets/AudioCLIP-Full-Training.pt

This file was deleted.

3 changes: 0 additions & 3 deletions assets/AudioCLIP-Partial-Training.pt

This file was deleted.

3 changes: 0 additions & 3 deletions assets/CLIP.pt

This file was deleted.

3 changes: 0 additions & 3 deletions assets/ESRNXFBSP.pt

This file was deleted.

1 change: 0 additions & 1 deletion assets/README.md

This file was deleted.

3 changes: 0 additions & 3 deletions assets/bpe_simple_vocab_16e6.txt.gz

This file was deleted.

File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os as _os
import sys as _sys

from ignite_trainer.version import __version__
from audioclip.ignite_trainer.version import __version__
from ._trainer import main, run
from ._utils import load_class
from ._interfaces import AbstractNet, AbstractTransform
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
from collections import defaultdict
from collections.abc import Iterable

from ignite_trainer import _utils
from ignite_trainer import _visdom
from ignite_trainer import _interfaces
from audioclip.ignite_trainer import _utils
from audioclip.ignite_trainer import _visdom
from audioclip.ignite_trainer import _interfaces

VISDOM_HOST = 'localhost'
VISDOM_PORT = 8097
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
8 changes: 4 additions & 4 deletions model/audioclip.py → audioclip/model/audioclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import torch
import torch.nn.functional as F

from model.clip import CLIP
from model.clip.clip import tokenize
from model.esresnet import ESResNeXtFBSP
from audioclip.model.clip import CLIP
from audioclip.model.clip.clip import tokenize
from audioclip.model.esresnet import ESResNeXtFBSP

from typing import List
from typing import Tuple
Expand Down Expand Up @@ -157,7 +157,7 @@ def forward(self,

if text is not None:
if batch_indices is None:
batch_indices = torch.arange(len(text), dtype=torch.int64, device=self.device)
batch_indices = torch.arange(len(text), dtype=self.dtype, device=self.device)

text_features = self.encode_text(text, '{}', batch_indices)
text_features = text_features / text_features.norm(dim=-1, keepdim=True)
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions model/clip/clip.py → audioclip/model/clip/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from tqdm import tqdm

from .model import build_model
from utils.simple_tokenizer import SimpleTokenizer as _Tokenizer
from audioclip.model.clip.model import build_model
from audioclip.utils.simple_tokenizer import SimpleTokenizer as _Tokenizer

__all__ = ["available_models", "load", "tokenize"]
_tokenizer = _Tokenizer()
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
12 changes: 7 additions & 5 deletions model/esresnet/base.py → audioclip/model/esresnet/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

import torchvision as tv

import ignite_trainer as it
import audioclip.ignite_trainer as it

from model.esresnet import attention
from utils.transforms import scale
from audioclip.model.esresnet import attention
from audioclip.utils.transforms import scale

from typing import cast
from typing import List
Expand Down Expand Up @@ -386,9 +386,11 @@ def _forward_classifier(self, x: torch.Tensor) -> torch.Tensor:

def forward(self,
x: torch.Tensor,
y: Optional[torch.Tensor] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
y: Optional[torch.Tensor] = None,
skip_prepro=False) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:

x = self._forward_pre_processing(x)
if not skip_prepro:
x = self._forward_pre_processing(x)
x = self._forward_features(x)
x = self._forward_reduction(x)
y_pred = self._forward_classifier(x)
Expand Down
30 changes: 15 additions & 15 deletions model/esresnet/fbsp.py → audioclip/model/esresnet/fbsp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

import torchvision as tv

from utils import transforms
from model.esresnet.base import _ESResNet
from model.esresnet.base import Bottleneck
from audioclip.utils import transforms
from audioclip.model.esresnet.base import _ESResNet
from audioclip.model.esresnet.base import Bottleneck

from typing import cast
from typing import List
Expand Down Expand Up @@ -135,19 +135,19 @@ def _inject_members(self):
)

def spectrogram(self, x: torch.Tensor) -> torch.Tensor:
with torch.no_grad():
frames = transforms.frame_signal(
signal=x.view(-1, x.shape[-1]),
frame_length=self.win_length,
hop_length=self.hop_length,
window=self.window
)
#with torch.no_grad():
frames = transforms.frame_signal(
signal=x.view(-1, x.shape[-1]),
frame_length=self.win_length,
hop_length=self.hop_length,
window=self.window
)

if self.n_fft > self.win_length:
pad_length = self.n_fft - self.win_length
pad_left = pad_length // 2
pad_right = pad_length - pad_left
frames = F.pad(frames, [pad_left, pad_right])
if self.n_fft > self.win_length:
pad_length = self.n_fft - self.win_length
pad_left = pad_length // 2
pad_right = pad_length - pad_left
frames = F.pad(frames, [pad_left, pad_right])

spec, ttf_weights_ = self.fbsp(frames)

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import sklearn.model_selection as skms

import utils.transforms as transforms
import audioclip.utils.transforms as transforms

from typing import Any
from typing import Dict
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

@lru_cache()
def default_bpe():
return os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'assets', 'bpe_simple_vocab_16e6.txt.gz')
return os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', 'assets', 'bpe_simple_vocab_16e6.txt.gz')


@lru_cache()
Expand Down
2 changes: 1 addition & 1 deletion utils/transforms.py → audioclip/utils/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import torch
import torchvision as tv

import ignite_trainer as it
import audioclip.ignite_trainer as it


def scale(old_value, old_min, old_max, new_min, new_max):
Expand Down
Loading