Skip to content

Commit

Permalink
chore: rename astra to astradb (#3458)
Browse files Browse the repository at this point in the history
DataStax wanted all references to be astradb instead of astra. As per
@erichare

We'll also have to do the same in unstructured-ingest :)
  • Loading branch information
potter-potter authored Aug 5, 2024
1 parent 7e88744 commit 59ec642
Show file tree
Hide file tree
Showing 31 changed files with 123 additions and 112 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
## 0.15.2-dev0

### Enhancements

### Features


### Fixes

* **Renames Astra to Astra DB** Conforms with DataStax internal naming conventions.

## 0.15.1

### Enhancements
Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ include requirements/huggingface.in

# Ingest extras
include requirements/ingest/airtable.in
include requirements/ingest/astra.in
include requirements/ingest/astradb.in
include requirements/ingest/azure-cognitive-search.in
include requirements/ingest/azure.in
include requirements/ingest/biomed.in
Expand Down
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,9 @@ install-ingest-mongodb:
install-ingest-databricks-volumes:
python3 -m pip install -r requirements/ingest/databricks-volumes.txt

.PHONY: install-ingest-astra
install-ingest-astra:
python3 -m pip install -r requirements/ingest/astra.txt
.PHONY: install-ingest-astradb
install-ingest-astradb:
python3 -m pip install -r requirements/ingest/astradb.txt

.PHONY: install-ingest-clarifai
install-ingest-clarifai:
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./ingest/astra.in
# pip-compile ./ingest/astradb.in
#
anyio==3.7.1
# via
# -c ./ingest/../base.txt
# -c ./ingest/../deps/constraints.txt
# httpx
astrapy==1.4.0
# via -r ./ingest/astra.in
# via -r ./ingest/astradb.in
bson==0.5.10
# via astrapy
cassandra-driver==3.29.1
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def load_requirements(file_list: Optional[Union[str, List[str]]] = None) -> List
"xlsx": xlsx_reqs,
# Extra requirements for data connectors
"airtable": load_requirements("requirements/ingest/airtable.in"),
"astra": load_requirements("requirements/ingest/astra.in"),
"astradb": load_requirements("requirements/ingest/astradb.in"),
"azure": load_requirements("requirements/ingest/azure.in"),
"azure-cognitive-search": load_requirements(
"requirements/ingest/azure-cognitive-search.in",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -e
SRC_PATH=$(dirname "$(realpath "$0")")
SCRIPT_DIR=$(dirname "$SRC_PATH")
cd "$SCRIPT_DIR"/.. || exit 1
OUTPUT_FOLDER_NAME=astra-dest
OUTPUT_FOLDER_NAME=astradb-dest
OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
Expand All @@ -21,7 +21,7 @@ if [ -z "$ASTRA_DB_API_ENDPOINT" ]; then
fi

RANDOM_SUFFIX=$((RANDOM % 100000 + 1))
COLLECTION_NAME="astra_test_output_$RANDOM_SUFFIX"
COLLECTION_NAME="astradb_test_output_$RANDOM_SUFFIX"
EMBEDDING_DIMENSION=384

# shellcheck disable=SC1091
Expand All @@ -31,7 +31,7 @@ function cleanup() {
cleanup_dir "$OUTPUT_DIR"
cleanup_dir "$WORK_DIR"

python "$SCRIPT_DIR"/python/test-ingest-astra-output.py \
python "$SCRIPT_DIR"/python/test-ingest-astradb-output.py \
--token "$ASTRA_DB_APPLICATION_TOKEN" \
--api-endpoint "$ASTRA_DB_API_ENDPOINT" \
--collection-name "$COLLECTION_NAME" down
Expand All @@ -51,14 +51,14 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
--chunk-max-characters 1500 \
--chunk-multipage-sections \
--embedding-provider "langchain-huggingface" \
astra \
astradb \
--token "$ASTRA_DB_APPLICATION_TOKEN" \
--api-endpoint "$ASTRA_DB_API_ENDPOINT" \
--collection-name "$COLLECTION_NAME" \
--embedding-dimension "$EMBEDDING_DIMENSION" \
--requested-indexing-policy '{"deny": ["metadata"]}'

python "$SCRIPT_DIR"/python/test-ingest-astra-output.py \
python "$SCRIPT_DIR"/python/test-ingest-astradb-output.py \
--token "$ASTRA_DB_APPLICATION_TOKEN" \
--api-endpoint "$ASTRA_DB_API_ENDPOINT" \
--collection-name "$COLLECTION_NAME" check
2 changes: 1 addition & 1 deletion test_unstructured_ingest/dest/mongodb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ trap cleanup EXIT

# NOTE(robinson) - per pymongo docs, pymongo ships with its own version of the bson library,
# which is incompatible with the bson installed from pypi. bson is installed as part of the
# astra dependencies.
# astradb dependencies.
# ref: https://pymongo.readthedocs.io/en/stable/installation.html
pip uninstall -y bson pymongo
make install-ingest-mongodb
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def get_client(token, api_endpoint, collection_name) -> AstraDB:
return astra_db, astra_db_collection


@click.group(name="astra-ingest")
@click.group(name="astradb-ingest")
@click.option("--token", type=str)
@click.option("--api-endpoint", type=str)
@click.option("--collection-name", type=str, default="collection_test")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -e
SRC_PATH=$(dirname "$(realpath "$0")")
SCRIPT_DIR=$(dirname "$SRC_PATH")
cd "$SCRIPT_DIR"/.. || exit 1
OUTPUT_FOLDER_NAME=astra
OUTPUT_FOLDER_NAME=astradb
OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME
Expand All @@ -23,7 +23,7 @@ fi
COLLECTION_NAME="ingest_test_src"

PYTHONPATH=. ./unstructured/ingest/main.py \
astra \
astradb \
--token "$ASTRA_DB_APPLICATION_TOKEN" \
--api-endpoint "$ASTRA_DB_API_ENDPOINT" \
--collection-name "$COLLECTION_NAME" \
Expand Down
2 changes: 1 addition & 1 deletion test_unstructured_ingest/src/mongodb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ fi

# NOTE(robinson) - per pymongo docs, pymongo ships with its own version of the bson library,
# which is incompatible with the bson installed from pypi. bson is installed as part of the
# astra dependencies.
# astradb dependencies.
# ref: https://pymongo.readthedocs.io/en/stable/installation.html
pip uninstall -y bson pymongo
make install-ingest-mongodb
Expand Down
2 changes: 1 addition & 1 deletion test_unstructured_ingest/test-ingest-dest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ cd "$SCRIPT_DIR"/.. || exit 1
export OMP_THREAD_LIMIT=1

all_tests=(
'astra.sh'
'astradb.sh'
'azure.sh'
'azure-cognitive-search.sh'
'box.sh'
Expand Down
2 changes: 1 addition & 1 deletion test_unstructured_ingest/test-ingest-src.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export OMP_THREAD_LIMIT=1
all_tests=(
's3.sh'
's3-minio.sh'
'astra.sh'
'astradb.sh'
'azure.sh'
'biomed-api.sh'
'biomed-path.sh'
Expand Down
2 changes: 1 addition & 1 deletion unstructured/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.15.1" # pragma: no cover
__version__ = "0.15.2-dev0" # pragma: no cover
8 changes: 4 additions & 4 deletions unstructured/ingest/cli/cmds/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from unstructured.ingest.cli.cmds.fsspec.sftp import get_base_src_cmd as sftp_base_src_cmd

from .airtable import get_base_src_cmd as airtable_base_src_cmd
from .astra import get_base_dest_cmd as astra_base_dest_cmd
from .astra import get_base_src_cmd as astra_base_src_cmd
from .astradb import get_base_dest_cmd as astradb_base_dest_cmd
from .astradb import get_base_src_cmd as astradb_base_src_cmd
from .azure_cognitive_search import get_base_dest_cmd as azure_cognitive_search_base_dest_cmd
from .biomed import get_base_src_cmd as biomed_base_src_cmd
from .chroma import get_base_dest_cmd as chroma_base_dest_cmd
Expand Down Expand Up @@ -63,7 +63,7 @@

base_src_cmd_fns: t.List[t.Callable[[], BaseSrcCmd]] = [
airtable_base_src_cmd,
astra_base_src_cmd,
astradb_base_src_cmd,
azure_base_src_cmd,
biomed_base_src_cmd,
box_base_src_cmd,
Expand Down Expand Up @@ -106,7 +106,7 @@
)

base_dest_cmd_fns: t.List[t.Callable[[], "BaseDestCmd"]] = [
astra_base_dest_cmd,
astradb_base_dest_cmd,
azure_base_dest_cmd,
box_base_dest_cmd,
chroma_base_dest_cmd,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
import click

from unstructured.ingest.cli.interfaces import CliConfig, Dict
from unstructured.ingest.connector.astra import AstraWriteConfig, SimpleAstraConfig
from unstructured.ingest.connector.astradb import AstraDBWriteConfig, SimpleAstraDBConfig


@dataclass
class AstraCliConfig(SimpleAstraConfig, CliConfig):
class AstraDBCliConfig(SimpleAstraDBConfig, CliConfig):
@staticmethod
def get_cli_options() -> t.List[click.Option]:
options = [
Expand Down Expand Up @@ -48,7 +48,7 @@ def get_cli_options() -> t.List[click.Option]:


@dataclass
class AstraCliWriteConfig(AstraWriteConfig, CliConfig):
class AstraDBCliWriteConfig(AstraDBWriteConfig, CliConfig):
@staticmethod
def get_cli_options() -> t.List[click.Option]:
options = [
Expand Down Expand Up @@ -81,8 +81,8 @@ def get_base_src_cmd():
from unstructured.ingest.cli.base.src import BaseSrcCmd

cmd_cls = BaseSrcCmd(
cmd_name="astra",
cli_config=AstraCliConfig,
cmd_name="astradb",
cli_config=AstraDBCliConfig,
)
return cmd_cls

Expand All @@ -91,9 +91,9 @@ def get_base_dest_cmd():
from unstructured.ingest.cli.base.dest import BaseDestCmd

cmd_cls = BaseDestCmd(
cmd_name="astra",
cli_config=AstraCliConfig,
additional_cli_options=[AstraCliWriteConfig],
write_config=AstraWriteConfig,
cmd_name="astradb",
cli_config=AstraDBCliConfig,
additional_cli_options=[AstraDBCliWriteConfig],
write_config=AstraDBWriteConfig,
)
return cmd_cls
Loading

0 comments on commit 59ec642

Please sign in to comment.