diff --git a/.github/workflows/ci_pipe.yml b/.github/workflows/ci_pipe.yml index 4226827473..96b2e561d7 100644 --- a/.github/workflows/ci_pipe.yml +++ b/.github/workflows/ci_pipe.yml @@ -137,7 +137,8 @@ jobs: test: name: Test runs-on: linux-amd64-gpu-v100-latest-1 - timeout-minutes: 60 + # Consider lowering this back down to 60 minutes per https://github.com/nv-morpheus/Morpheus/issues/1948 + timeout-minutes: 90 container: credentials: username: '$oauthtoken' diff --git a/examples/gnn_fraud_detection_pipeline/stages/model.py b/examples/gnn_fraud_detection_pipeline/stages/model.py index c3c8c9a8f6..253409b203 100644 --- a/examples/gnn_fraud_detection_pipeline/stages/model.py +++ b/examples/gnn_fraud_detection_pipeline/stages/model.py @@ -407,7 +407,7 @@ def load_model(model_dir: str, n_layers=hyperparameters['n_layers'], embedding_size=hyperparameters['embedding_size'], target=hyperparameters['target_node']).to(device) - model.load_state_dict(torch.load(os.path.join(model_dir, 'model.pt'))) + model.load_state_dict(torch.load(os.path.join(model_dir, 'model.pt'), weights_only=False)) return model, graph, hyperparameters diff --git a/examples/llm/common/utils.py b/examples/llm/common/utils.py index fd6d21451a..4e09a9de10 100644 --- a/examples/llm/common/utils.py +++ b/examples/llm/common/utils.py @@ -15,7 +15,7 @@ import logging import pymilvus -from langchain.embeddings import HuggingFaceEmbeddings # pylint: disable=no-name-in-module +from langchain_community.embeddings import HuggingFaceEmbeddings from morpheus_llm.llm.services.llm_service import LLMService from morpheus_llm.llm.services.nemo_llm_service import NeMoLLMService diff --git a/examples/llm/vdb_upload/langchain.py b/examples/llm/vdb_upload/langchain.py index 9b9b616bd3..d0f42b2f20 100644 --- a/examples/llm/vdb_upload/langchain.py +++ b/examples/llm/vdb_upload/langchain.py @@ -16,9 +16,9 @@ # pylint: disable=no-name-in-module from langchain.document_loaders.rss import RSSFeedLoader -from langchain.embeddings.huggingface import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores.milvus import Milvus +from langchain_community.embeddings import HuggingFaceEmbeddings from examples.llm.vdb_upload.vdb_utils import DEFAULT_RSS_URLS from morpheus.utils.logging_timer import log_time diff --git a/examples/llm/vdb_upload/module/content_extractor_module.py b/examples/llm/vdb_upload/module/content_extractor_module.py index c02dd89718..7b2f16aa2b 100755 --- a/examples/llm/vdb_upload/module/content_extractor_module.py +++ b/examples/llm/vdb_upload/module/content_extractor_module.py @@ -29,7 +29,8 @@ import pypdfium2 as libpdfium from docx import Document from langchain.text_splitter import RecursiveCharacterTextSplitter -from pydantic import BaseModel # pylint: disable=no-name-in-module +from pydantic import BaseModel +from pydantic import ConfigDict from pydantic import Field from pydantic import ValidationError from pydantic import field_validator @@ -43,9 +44,7 @@ class CSVConverterSchema(BaseModel): chunk_overlap: int = 102 # Example default value chunk_size: int = 1024 text_column_names: List[str] - - class Config: - extra = "forbid" + model_config = ConfigDict(extra='forbid') class ContentExtractorSchema(BaseModel): @@ -54,6 +53,7 @@ class ContentExtractorSchema(BaseModel): chunk_size: int = 512 converters_meta: Dict[str, Dict] = Field(default_factory=dict) num_threads: int = 10 + model_config = ConfigDict(extra='forbid') @field_validator('converters_meta', mode="before") @classmethod @@ -66,9 +66,6 @@ def val_converters_meta(cls, to_validate: Dict[str, Dict]) -> Dict[str, Dict]: validated_meta[key] = value return validated_meta - class Config: - extra = "forbid" - logger = logging.getLogger(__name__) diff --git a/examples/llm/vdb_upload/module/file_source_pipe.py b/examples/llm/vdb_upload/module/file_source_pipe.py index b39ee23e4e..c6d2121e46 100644 --- a/examples/llm/vdb_upload/module/file_source_pipe.py +++ b/examples/llm/vdb_upload/module/file_source_pipe.py @@ -20,6 +20,7 @@ import mrc from pydantic import BaseModel +from pydantic import ConfigDict from pydantic import Field from pydantic import ValidationError @@ -48,9 +49,7 @@ class FileSourcePipeSchema(BaseModel): vdb_resource_name: str watch: bool = False # Flag to watch file changes watch_interval: float = -5.0 # Interval to watch file changes - - class Config: - extra = "forbid" + model_config = ConfigDict(extra='forbid') FileSourcePipeLoaderFactory = ModuleLoaderFactory("file_source_pipe", "morpheus_examples_llm", FileSourcePipeSchema) diff --git a/examples/llm/vdb_upload/module/rss_source_pipe.py b/examples/llm/vdb_upload/module/rss_source_pipe.py index 55b309e032..e69cc55a97 100644 --- a/examples/llm/vdb_upload/module/rss_source_pipe.py +++ b/examples/llm/vdb_upload/module/rss_source_pipe.py @@ -20,9 +20,10 @@ import mrc from pydantic import BaseModel +from pydantic import ConfigDict from pydantic import Field from pydantic import ValidationError -from pydantic import validator +from pydantic import field_validator from morpheus.modules.general.monitor import MonitorLoaderFactory from morpheus.modules.input.rss_source import RSSSourceLoaderFactory @@ -52,8 +53,9 @@ class RSSSourcePipeSchema(BaseModel): strip_markup: bool = True vdb_resource_name: str web_scraper_config: Optional[Dict[Any, Any]] = None + model_config = ConfigDict(extra='forbid') - @validator('feed_input', pre=True) + @field_validator('feed_input') def validate_feed_input(cls, to_validate): # pylint: disable=no-self-argument if isinstance(to_validate, str): return [to_validate] @@ -63,9 +65,6 @@ def validate_feed_input(cls, to_validate): # pylint: disable=no-self-argument raise ValueError('feed_input must be a string or a list of strings') - class Config: - extra = "forbid" - RSSSourcePipeLoaderFactory = ModuleLoaderFactory("rss_source_pipe", "morpheus_examples_llm", RSSSourcePipeSchema) diff --git a/examples/llm/vdb_upload/module/schema_transform.py b/examples/llm/vdb_upload/module/schema_transform.py index 43e86c3dd4..ae631cd615 100644 --- a/examples/llm/vdb_upload/module/schema_transform.py +++ b/examples/llm/vdb_upload/module/schema_transform.py @@ -20,6 +20,7 @@ import mrc import mrc.core.operators as ops from pydantic import BaseModel +from pydantic import ConfigDict from pydantic import Field from pydantic import ValidationError @@ -39,16 +40,12 @@ class ColumnTransformSchema(BaseModel): dtype: str op_type: str from_: Optional[str] = Field(None, alias="from") - - class Config: - extra = "forbid" + model_config = ConfigDict(extra='forbid') class SchemaTransformSchema(BaseModel): schema_transform_config: Dict[str, Dict[str, Any]] = Field(default_factory=dict) - - class Config: - extra = "forbid" + model_config = ConfigDict(extra='forbid') SchemaTransformLoaderFactory = ModuleLoaderFactory("schema_transform", "morpheus_examples_llm", SchemaTransformSchema) diff --git a/examples/llm/vdb_upload/module/vdb_resource_tagging_module.py b/examples/llm/vdb_upload/module/vdb_resource_tagging_module.py index 2e3227149a..c11c96fd8b 100644 --- a/examples/llm/vdb_upload/module/vdb_resource_tagging_module.py +++ b/examples/llm/vdb_upload/module/vdb_resource_tagging_module.py @@ -16,6 +16,7 @@ import mrc from pydantic import BaseModel +from pydantic import ConfigDict from pydantic import ValidationError from morpheus.messages import ControlMessage @@ -27,9 +28,7 @@ class VDBResourceTaggingSchema(BaseModel): vdb_resource_name: str - - class Config: - extra = "forbid" + model_config = ConfigDict(extra='forbid') VDBResourceTaggingLoaderFactory = ModuleLoaderFactory("vdb_resource_tagging", diff --git a/examples/llm/vdb_upload/module/web_scraper_module.py b/examples/llm/vdb_upload/module/web_scraper_module.py index c361339d49..9321246931 100644 --- a/examples/llm/vdb_upload/module/web_scraper_module.py +++ b/examples/llm/vdb_upload/module/web_scraper_module.py @@ -22,7 +22,8 @@ import requests_cache from bs4 import BeautifulSoup from langchain.text_splitter import RecursiveCharacterTextSplitter -from pydantic import BaseModel # pylint: disable=no-name-in-module +from pydantic import BaseModel +from pydantic import ConfigDict from pydantic import ValidationError import cudf @@ -41,9 +42,7 @@ class WebScraperSchema(BaseModel): enable_cache: bool = False cache_path: str = "./.cache/http/RSSDownloadStage.sqlite" cache_dir: str = "./.cache/llm/rss" - - class Config: - extra = "forbid" + model_config = ConfigDict(extra='forbid') WebScraperLoaderFactory = ModuleLoaderFactory("web_scraper", "morpheus_examples_llm", WebScraperSchema) diff --git a/pyproject.toml b/pyproject.toml index c5b01a7d3f..678c060041 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,9 @@ filterwarnings = [ testpaths = ["tests"] -addopts = "--benchmark-disable" +# Don't run the benchmarks by default, don't search for tests in the tests/_utils directory which will trigger false +# alarms +addopts = "--benchmark-disable --ignore=tests/_utils" asyncio_mode = "auto" diff --git a/python/morpheus/morpheus/modules/schemas/deserialize_schema.py b/python/morpheus/morpheus/modules/schemas/deserialize_schema.py index 4031a9e74b..d6f054e10a 100644 --- a/python/morpheus/morpheus/modules/schemas/deserialize_schema.py +++ b/python/morpheus/morpheus/modules/schemas/deserialize_schema.py @@ -18,6 +18,7 @@ from typing import Optional from pydantic import BaseModel +from pydantic import ConfigDict logger = logging.getLogger(__name__) @@ -30,6 +31,4 @@ class DeserializeSchema(BaseModel): batch_size: int = 1024 max_concurrency: int = 1 should_log_timestamp: bool = True - - class Config: - extra = "forbid" + model_config = ConfigDict(extra='forbid') diff --git a/python/morpheus/morpheus/modules/schemas/multi_file_source_schema.py b/python/morpheus/morpheus/modules/schemas/multi_file_source_schema.py index 5ce4f50b7f..e2a83043ef 100644 --- a/python/morpheus/morpheus/modules/schemas/multi_file_source_schema.py +++ b/python/morpheus/morpheus/modules/schemas/multi_file_source_schema.py @@ -16,6 +16,7 @@ from typing import List from pydantic import BaseModel +from pydantic import ConfigDict from pydantic import Field logger = logging.getLogger(f"morpheus.{__name__}") @@ -26,6 +27,4 @@ class MultiFileSourceSchema(BaseModel): watch_dir: bool = False watch_interval: float = 1.0 batch_size: int = 128 - - class Config: - extra = "forbid" + model_config = ConfigDict(extra='forbid') diff --git a/python/morpheus/morpheus/modules/schemas/rss_source_schema.py b/python/morpheus/morpheus/modules/schemas/rss_source_schema.py index d2c6470061..0a34f958fa 100644 --- a/python/morpheus/morpheus/modules/schemas/rss_source_schema.py +++ b/python/morpheus/morpheus/modules/schemas/rss_source_schema.py @@ -16,6 +16,7 @@ from typing import List from pydantic import BaseModel +from pydantic import ConfigDict from pydantic import Field logger = logging.getLogger(__name__) @@ -32,6 +33,4 @@ class RSSSourceSchema(BaseModel): interval_sec: int = 600 stop_after_rec: int = 0 strip_markup: bool = True - - class Config: - extra = "forbid" + model_config = ConfigDict(extra='forbid') diff --git a/python/morpheus/morpheus/stages/input/arxiv_source.py b/python/morpheus/morpheus/stages/input/arxiv_source.py index 1e24fb855f..b995d3c6b8 100644 --- a/python/morpheus/morpheus/stages/input/arxiv_source.py +++ b/python/morpheus/morpheus/stages/input/arxiv_source.py @@ -160,7 +160,7 @@ def _generate_frames(self, subscription: mrc.Subscription): def _process_pages(self, pdf_path: str): try: - from langchain.document_loaders import PyPDFLoader + from langchain_community.document_loaders import PyPDFLoader from pypdf.errors import PdfStreamError except ImportError as exc: raise ImportError(IMPORT_ERROR_MESSAGE) from exc diff --git a/python/morpheus/morpheus/stages/input/http_server_source_stage.py b/python/morpheus/morpheus/stages/input/http_server_source_stage.py index fbcac0debf..8bf22084cf 100644 --- a/python/morpheus/morpheus/stages/input/http_server_source_stage.py +++ b/python/morpheus/morpheus/stages/input/http_server_source_stage.py @@ -19,6 +19,7 @@ import time import typing from http import HTTPStatus +from io import StringIO import mrc @@ -166,7 +167,7 @@ def _parse_payload(self, payload: str) -> HttpParseResponse: df = self._payload_to_df_fn(payload, self._lines) else: # engine='cudf' is needed when lines=False to avoid using pandas - df = cudf.read_json(payload, lines=self._lines, engine='cudf') + df = cudf.read_json(StringIO(initial_value=payload), lines=self._lines, engine='cudf') except Exception as e: err_msg = "Error occurred converting HTTP payload to Dataframe" diff --git a/python/morpheus/morpheus/utils/column_info.py b/python/morpheus/morpheus/utils/column_info.py index 80f9a73db3..75119320e4 100644 --- a/python/morpheus/morpheus/utils/column_info.py +++ b/python/morpheus/morpheus/utils/column_info.py @@ -381,7 +381,7 @@ def _process_column(self, df: pd.DataFrame) -> pd.Series: The processed column as a datetime Series. """ - dt_series = pd.to_datetime(df[self.input_name], infer_datetime_format=True, utc=True) + dt_series = pd.to_datetime(df[self.input_name], utc=True) dtype = self.get_pandas_dtype() if dtype == 'datetime64[ns]': diff --git a/python/morpheus_llm/morpheus_llm/modules/schemas/write_to_vector_db_schema.py b/python/morpheus_llm/morpheus_llm/modules/schemas/write_to_vector_db_schema.py index 8000dabfbc..eb65651f33 100644 --- a/python/morpheus_llm/morpheus_llm/modules/schemas/write_to_vector_db_schema.py +++ b/python/morpheus_llm/morpheus_llm/modules/schemas/write_to_vector_db_schema.py @@ -15,8 +15,9 @@ import logging from pydantic import BaseModel +from pydantic import ConfigDict from pydantic import Field -from pydantic import validator +from pydantic import field_validator from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_ids import WRITE_TO_VECTOR_DB @@ -38,18 +39,16 @@ class WriteToVDBSchema(BaseModel): service_kwargs: dict = Field(default_factory=dict) batch_size: int = 1024 write_time_interval: float = 1.0 + model_config = ConfigDict(extra='forbid') - @validator('service', pre=True) + @field_validator('service') def validate_service(cls, to_validate): # pylint: disable=no-self-argument if not to_validate: raise ValueError("Service must be a service name or a serialized instance of VectorDBService") return to_validate - @validator('default_resource_name', pre=True) + @field_validator('default_resource_name') def validate_resource_name(cls, to_validate): # pylint: disable=no-self-argument if not to_validate: raise ValueError("Resource name must not be None or Empty.") return to_validate - - class Config: - extra = "forbid" diff --git a/tests/_utils/test_directories.py b/tests/_utils/test_directories.py index 60c3b11b0e..3c133d7aa7 100644 --- a/tests/_utils/test_directories.py +++ b/tests/_utils/test_directories.py @@ -33,4 +33,3 @@ def __init__(self, tests_dir) -> None: self.tests_data_dir = os.path.join(self.tests_dir, 'tests_data') self.mock_triton_servers_dir = os.path.join(self.tests_dir, 'mock_triton_server') self.mock_rest_server = os.path.join(self.tests_dir, 'mock_rest_server') - print(self) diff --git a/tests/conftest.py b/tests/conftest.py index 2d7fef3ee3..55c3b03605 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -36,7 +36,6 @@ from _utils.kafka import kafka_bootstrap_servers_fixture # noqa: F401 pylint:disable=unused-import from _utils.kafka import kafka_consumer_fixture # noqa: F401 pylint:disable=unused-import from _utils.kafka import kafka_topics_fixture # noqa: F401 pylint:disable=unused-import -from morpheus.utils.shared_process_pool import SharedProcessPool # Don't let pylint complain about pytest fixtures # pylint: disable=redefined-outer-name,unused-argument @@ -1081,6 +1080,32 @@ def openai_fixture(fail_missing: bool): yield import_or_skip("openai", reason=OPT_DEP_SKIP_REASON.format(package="openai"), fail_missing=fail_missing) +@pytest.fixture(scope='session') +def dask_distributed(fail_missing: bool): + """ + Mark tests requiring dask.distributed + """ + yield import_or_skip("dask.distributed", + reason=OPT_DEP_SKIP_REASON.format(package="dask.distributed"), + fail_missing=fail_missing) + + +@pytest.fixture(scope='session') +def dask_cuda(fail_missing: bool): + """ + Mark tests requiring dask_cuda + """ + yield import_or_skip("dask_cuda", reason=OPT_DEP_SKIP_REASON.format(package="dask_cuda"), fail_missing=fail_missing) + + +@pytest.fixture(scope='session') +def mlflow(fail_missing: bool): + """ + Mark tests requiring mlflow + """ + yield import_or_skip("mlflow", reason=OPT_DEP_SKIP_REASON.format(package="mlflow"), fail_missing=fail_missing) + + @pytest.fixture(name="langchain", scope='session') def langchain_fixture(fail_missing: bool): """ @@ -1168,6 +1193,8 @@ def mock_subscription_fixture(): # Any tests that use the SharedProcessPool should use this fixture @pytest.fixture(scope="module") def shared_process_pool_setup_and_teardown(): + from morpheus.utils.shared_process_pool import SharedProcessPool + # Set lower CPU usage for unit test to avoid slowing down the test os.environ["MORPHEUS_SHARED_PROCESS_POOL_CPU_USAGE"] = "0.1" diff --git a/tests/examples/gnn_fraud_detection_pipeline/conftest.py b/tests/examples/gnn_fraud_detection_pipeline/conftest.py index 30176f71e4..1ab1cc7544 100644 --- a/tests/examples/gnn_fraud_detection_pipeline/conftest.py +++ b/tests/examples/gnn_fraud_detection_pipeline/conftest.py @@ -68,22 +68,22 @@ def seed_fn(seed=42): yield seed_fn -@pytest.fixture(name="example_dir") +@pytest.fixture(scope='session', name="example_dir") def example_dir_fixture(): yield os.path.join(TEST_DIRS.examples_dir, 'gnn_fraud_detection_pipeline') -@pytest.fixture(name="training_file") +@pytest.fixture(scope='session', name="training_file") def training_file_fixture(example_dir: str): yield os.path.join(example_dir, 'training.csv') -@pytest.fixture(name="model_dir") +@pytest.fixture(scope='session', name="model_dir") def model_dir_fixture(example_dir: str): yield os.path.join(example_dir, 'model') -@pytest.fixture(name="xgb_model") +@pytest.fixture(scope='session', name="xgb_model") def xgb_model_fixture(model_dir: str): yield os.path.join(model_dir, 'xgb.pt') diff --git a/tests/examples/llm/common/conftest.py b/tests/examples/llm/common/conftest.py index 591ed21cba..3a19807f25 100644 --- a/tests/examples/llm/common/conftest.py +++ b/tests/examples/llm/common/conftest.py @@ -19,7 +19,6 @@ import pytest from _utils import TEST_DIRS -from _utils import import_or_skip @pytest.fixture(scope="function") @@ -53,12 +52,8 @@ def import_content_extractor_module(restore_sys_path): # pylint: disable=unused @pytest.fixture(name="langchain", autouse=True, scope='session') -def langchain_fixture(fail_missing: bool): +def langchain_fixture(langchain): """ All the tests in this subdir require langchain """ - - skip_reason = ("Tests for the WebScraperStage require the langchain package to be installed, to install this run:\n" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/dev_cuda-125_arch-x86_64.yaml --prune`") - yield import_or_skip("langchain", reason=skip_reason, fail_missing=fail_missing) + yield langchain diff --git a/tests/examples/ransomware_detection/conftest.py b/tests/examples/ransomware_detection/conftest.py index a92786555a..7c3ca3e74e 100644 --- a/tests/examples/ransomware_detection/conftest.py +++ b/tests/examples/ransomware_detection/conftest.py @@ -20,7 +20,6 @@ import yaml from _utils import TEST_DIRS -from _utils import import_or_skip from _utils import remove_module # pylint: disable=redefined-outer-name @@ -30,12 +29,12 @@ "for details on installing these additional dependencies") -@pytest.fixture(autouse=True, scope='session') -def dask_distributed(fail_missing: bool): +@pytest.fixture(name="dask_distributed", autouse=True, scope='session') +def dask_distributed_fixture(dask_distributed): """ All of the tests in this subdir requires dask.distributed """ - yield import_or_skip("dask.distributed", reason=SKIP_REASON, fail_missing=fail_missing) + yield dask_distributed @pytest.fixture(name="config") @@ -48,24 +47,29 @@ def config_fixture(config, use_cpp: bool): # pylint: disable=unused-argument yield config -@pytest.fixture(name="example_dir") +@pytest.fixture(name="example_dir", scope='session') def example_dir_fixture(): yield os.path.join(TEST_DIRS.examples_dir, 'ransomware_detection') -@pytest.fixture(name="conf_file") +@pytest.fixture(name="conf_file", scope='session') def conf_file_fixture(example_dir): yield os.path.join(example_dir, 'config/ransomware_detection.yaml') -@pytest.fixture -def rwd_conf(conf_file): +@pytest.fixture(scope='session') +def _rwd_conf(conf_file): with open(conf_file, encoding='UTF-8') as fh: conf = yaml.safe_load(fh) yield conf +@pytest.fixture(scope='function') +def rwd_conf(_rwd_conf): + yield _rwd_conf.copy() + + @pytest.fixture def interested_plugins(): yield ['ldrmodules', 'threadlist', 'envars', 'vadinfo', 'handles'] diff --git a/tests/morpheus/conftest.py b/tests/morpheus/conftest.py index 3443e3c91a..3d532d3354 100644 --- a/tests/morpheus/conftest.py +++ b/tests/morpheus/conftest.py @@ -21,6 +21,6 @@ from _utils import TEST_DIRS -@pytest.fixture(autouse=True) +@pytest.fixture(scope='session', autouse=True) def morpheus_sys_path(): sys.path.append(os.path.join(TEST_DIRS.tests_dir, "morpheus")) diff --git a/tests/morpheus/stages/test_multi_processing_stage.py b/tests/morpheus/stages/test_multi_processing_stage.py index f3b07ad5c6..470c27c1f8 100644 --- a/tests/morpheus/stages/test_multi_processing_stage.py +++ b/tests/morpheus/stages/test_multi_processing_stage.py @@ -148,6 +148,7 @@ def pandas_dataframe_generator(dataset_pandas: DatasetManager, count: int) -> Ge yield df +@pytest.mark.slow def test_created_stage_pipe(config: Config, dataset_pandas: DatasetManager): config.num_threads = os.cpu_count() @@ -176,6 +177,7 @@ def test_created_stage_pipe(config: Config, dataset_pandas: DatasetManager): assert df.equals(expected_df) +@pytest.mark.slow def test_derived_stage_pipe(config: Config, dataset_pandas: DatasetManager): config.num_threads = os.cpu_count() @@ -197,6 +199,7 @@ def test_derived_stage_pipe(config: Config, dataset_pandas: DatasetManager): assert_results(comp_stage.get_results()) +@pytest.mark.slow def test_multiple_stages_pipe(config: Config, dataset_pandas: DatasetManager): config.num_threads = os.cpu_count() diff --git a/tests/morpheus/utils/test_downloader.py b/tests/morpheus/utils/test_downloader.py index 451c6cde64..9cc8306568 100644 --- a/tests/morpheus/utils/test_downloader.py +++ b/tests/morpheus/utils/test_downloader.py @@ -22,20 +22,17 @@ import morpheus.utils.downloader from _utils import TEST_DIRS -from _utils import import_or_skip from morpheus.utils.downloader import DOWNLOAD_METHODS_MAP from morpheus.utils.downloader import Downloader from morpheus.utils.downloader import DownloadMethods -@pytest.fixture(autouse=True, scope='session') -def dask_distributed(fail_missing: bool): +@pytest.fixture(name="dask_distributed", autouse=True, scope='session') +def dask_distributed_fixture(dask_distributed): """ Mark tests requiring dask.distributed """ - yield import_or_skip("dask.distributed", - reason="Downloader requires dask and dask.distributed", - fail_missing=fail_missing) + yield dask_distributed @pytest.mark.usefixtures("restore_environ") diff --git a/tests/morpheus_dfp/conftest.py b/tests/morpheus_dfp/conftest.py index c441a24e6b..4609e4ceee 100644 --- a/tests/morpheus_dfp/conftest.py +++ b/tests/morpheus_dfp/conftest.py @@ -21,7 +21,6 @@ import pytest from _utils import TEST_DIRS -from _utils import import_or_skip from _utils.dataset_manager import DatasetManager from morpheus.config import Config @@ -30,28 +29,28 @@ "Morpheus development environment.") -@pytest.fixture(autouse=True, scope='session') -def dask_distributed(fail_missing: bool): +@pytest.fixture(name="dask_distributed", autouse=True, scope='session') +def dask_distributed_fixture(dask_distributed): """ Mark tests requiring dask.distributed """ - yield import_or_skip("dask.distributed", reason=SKIP_REASON, fail_missing=fail_missing) + yield dask_distributed -@pytest.fixture(autouse=True, scope='session') -def dask_cuda(fail_missing: bool): +@pytest.fixture(name="dask_cuda", autouse=True, scope='session') +def dask_cuda_fixture(dask_cuda): """ - Mark tests requiring dask.distributed + Mark tests requiring dask_cuda """ - yield import_or_skip("dask_cuda", reason=SKIP_REASON, fail_missing=fail_missing) + yield dask_cuda -@pytest.fixture(autouse=True, scope='session') -def mlflow(fail_missing: bool): +@pytest.fixture(name="mlflow", autouse=True, scope='session') +def mlflow_fixture(mlflow): """ Mark tests requiring mlflow """ - yield import_or_skip("mlflow", reason=SKIP_REASON, fail_missing=fail_missing) + yield mlflow @pytest.fixture(name='ae_feature_cols', scope='session') diff --git a/tests/morpheus_llm/stages/arxiv/conftest.py b/tests/morpheus_llm/stages/arxiv/conftest.py index 6166cb1250..bc705f9b36 100644 --- a/tests/morpheus_llm/stages/arxiv/conftest.py +++ b/tests/morpheus_llm/stages/arxiv/conftest.py @@ -20,10 +20,10 @@ from _utils import import_or_skip -SKIP_REASON = ("Tests for the arxiv_source require a number of packages not installed in the Morpheus development " - "environment. To install these run:\n" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/dev_cuda-125_arch-x86_64.yaml --prune`") +SKIP_REASON = ( + "Tests for the arxiv_source require a number of packages not installed in the Morpheus development " + "environment. To install these run:\n" + "`conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-125_arch-x86_64.yaml`") @pytest.fixture(name="arxiv", autouse=True, scope='session') @@ -35,11 +35,19 @@ def arxiv_fixture(fail_missing: bool): @pytest.fixture(name="langchain", autouse=True, scope='session') -def langchain_fixture(fail_missing: bool): +def langchain_fixture(langchain): """ All of the tests in this subdir require langchain """ - yield import_or_skip("langchain", reason=SKIP_REASON, fail_missing=fail_missing) + yield langchain + + +@pytest.fixture(name="langchain_community", autouse=True, scope='session') +def langchain_community_fixture(langchain_community): + """ + All of the tests in this subdir require langchain_community + """ + yield langchain_community @pytest.fixture(name="pypdf", autouse=True, scope='session') diff --git a/tests/morpheus_llm/stages/arxiv/test_arxiv_source.py b/tests/morpheus_llm/stages/arxiv/test_arxiv_source.py index 262ace1c37..b5bd9deb12 100644 --- a/tests/morpheus_llm/stages/arxiv/test_arxiv_source.py +++ b/tests/morpheus_llm/stages/arxiv/test_arxiv_source.py @@ -116,7 +116,7 @@ def test_process_pages_error(config: Config, tmp_path: str): stage._process_pages(bad_pdf_filename) -@mock.patch("langchain.document_loaders.PyPDFLoader") +@mock.patch("langchain_community.document_loaders.PyPDFLoader") def test_process_pages_retry(mock_pdf_loader: mock.MagicMock, config: Config, pypdf: types.ModuleType): call_count = 0 @@ -144,7 +144,7 @@ def mock_load(): @pytest.mark.parametrize("chunk_size", [200, 1000]) def test_splitting_pages(config: Config, pdf_file: str, - langchain: types.ModuleType, + langchain_community: types.ModuleType, chunk_size: int, dataset_cudf: DatasetManager): chunk_overlap = 100 @@ -179,7 +179,7 @@ def test_splitting_pages(config: Config, "page_content": page_content_col, "source": source_col, "page": page_col, "type": type_col }) - loader = langchain.document_loaders.PyPDFLoader(pdf_file) + loader = langchain_community.document_loaders.PyPDFLoader(pdf_file) documents = loader.load() assert len(documents) == 1 @@ -194,7 +194,7 @@ def test_splitting_pages(config: Config, def test_splitting_pages_no_chunks(config: Config, pdf_file: str, - langchain: types.ModuleType, + langchain_community: types.ModuleType, dataset_cudf: DatasetManager): content = "Morpheus\nunittest" page_content_col = [content] @@ -206,7 +206,7 @@ def test_splitting_pages_no_chunks(config: Config, "page_content": page_content_col, "source": source_col, "page": page_col, "type": type_col }) - loader = langchain.document_loaders.PyPDFLoader(pdf_file) + loader = langchain_community.document_loaders.PyPDFLoader(pdf_file) documents = loader.load() assert len(documents) == 1