-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from NERC-CEH/more_data
Proof of concept of similarity search with the scivision model
- Loading branch information
Showing
58 changed files
with
1,567 additions
and
2,559 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[flake8] | ||
max-line-length=120 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
.env | ||
**/.ipynb_checkpoints/ | ||
**/__pycache__/ | ||
vectors/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
"""Utilities for expressing our dataset as an intake catalog""" | ||
|
||
|
||
def intake_yaml( | ||
test_url: str, | ||
catalog_url: str, | ||
): | ||
""" | ||
Write a minimal YAML template describing this as an intake datasource | ||
Example: plankton dataset made available through scivision, metadata | ||
https://raw.githubusercontent.com/alan-turing-institute/plankton-cefas-scivision/test_data_catalog/scivision.yml | ||
See the comments below for decisions about its structure | ||
""" | ||
template = f""" | ||
sources: | ||
test_image: | ||
description: Single test image from the plankton collection | ||
origin: | ||
driver: intake_xarray.image.ImageSource | ||
args: | ||
urlpath: ["{test_url}"] | ||
exif_tags: False | ||
plankton: | ||
description: A CSV index of all the images of plankton | ||
origin: | ||
driver: intake.source.csv.CSVSource | ||
args: | ||
urlpath: ["{catalog_url}"] | ||
""" | ||
# coerce_shape: [256, 256] | ||
return template |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
"""Thin wrapper around the s3 object store with images and metadata""" | ||
|
||
import s3fs | ||
from dotenv import load_dotenv | ||
import os | ||
|
||
load_dotenv() | ||
|
||
|
||
def s3_endpoint(): | ||
"""Return a reference to the object store, | ||
reading the credentials set in the environment. | ||
""" | ||
fs = s3fs.S3FileSystem( | ||
anon=False, | ||
key=os.environ.get("FSSPEC_S3_KEY", ""), | ||
secret=os.environ.get("FSSPEC_S3_SECRET", ""), | ||
client_kwargs={"endpoint_url": os.environ["ENDPOINT"]}, | ||
) | ||
return fs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from intake_xarray import ImageSource | ||
from torch import Tensor | ||
from cyto_ml.models.scivision import prepare_image, flat_embeddings | ||
|
||
|
||
def test_embeddings(scivision_model, single_image): | ||
features = scivision_model(prepare_image(ImageSource(single_image).to_dask())) | ||
|
||
assert isinstance(features, Tensor) | ||
|
||
embeddings = flat_embeddings(features) | ||
|
||
assert len(embeddings) == features.size()[1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from cyto_ml.data.vectorstore import vector_store, client | ||
import numpy as np | ||
|
||
|
||
def test_client_no_telemetry(): | ||
assert not client.get_settings()["anonymized_telemetry"] | ||
|
||
|
||
def test_store(): | ||
store = vector_store() # default 'test_collection' | ||
id = "id_1" # insists on a str | ||
filename = "https://example.com/filename.tif" | ||
store.add( | ||
documents=[filename], # we use image location in s3 rather than text content | ||
embeddings=[list(np.random.rand(2048))], # wants a list of lists | ||
ids=[id], | ||
) # wants a list of ids | ||
|
||
record = store.get("id_1", include=["embeddings"]) | ||
assert record | ||
assert len(record["embeddings"][0]) == 2048 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.