open-craft · pomegranited · Aug 28, 2024 · Aug 28, 2024 · Sep 5, 2024 · Sep 5, 2024
diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
@@ -317,6 +317,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
  Fields.context_key,
  Fields.org,
  Fields.tags,
+ Fields.tags + "." + Fields.tags_collections,
  Fields.tags + "." + Fields.tags_taxonomy,
  Fields.tags + "." + Fields.tags_level0,
  Fields.tags + "." + Fields.tags_level1,
@@ -339,6 +340,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
  # are searchable only if at least one document in the index has a value. If we didn't list them here and,
  # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
  # these sub-fields: "Attribute `tags.level3` is not searchable."
+ Fields.tags + "." + Fields.tags_collections,
  Fields.tags + "." + Fields.tags_taxonomy,
  Fields.tags + "." + Fields.tags_level0,
  Fields.tags + "." + Fields.tags_level1,

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
@@ -7,7 +7,9 @@
 from hashlib import blake2b
 
 from django.utils.text import slugify
+from django.core.exceptions import ObjectDoesNotExist
 from opaque_keys.edx.keys import LearningContextKey, UsageKey
+from openedx_learning.api import authoring as authoring_api
 
 from openedx.core.djangoapps.content.search.models import SearchAccess
 from openedx.core.djangoapps.content_libraries import api as lib_api
@@ -52,6 +54,7 @@ class Fields:
  tags_level1 = "level1"
  tags_level2 = "level2"
  tags_level3 = "level3"
+ tags_collections = "collections" # subfield of tags, i.e. tags.collections
  # The "content" field is a dictionary of arbitrary data, depending on the block_type.
  # It comes from each XBlock's index_dictionary() method (if present) plus some processing.
  # Text (html) blocks have an "html_content" key in here, capa has "capa_content" and "problem_types", and so on.
@@ -164,10 +167,11 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
 
  See the comments above on "Field.tags" for an explanation of the format.
 
- e.g. for something tagged "Difficulty: Hard" and "Location: Vancouver" this
- would return:
+ e.g. for something tagged "Difficulty: Hard" and "Location: Vancouver",
+ and in Collections 3 and 4, this would return:
  {
  "tags": {
+ "collections": [3, 4],
  "taxonomy": ["Location", "Difficulty"],
  "level0": ["Location > North America", "Difficulty > Hard"],
  "level1": ["Location > North America > Canada"],
@@ -182,21 +186,16 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
  strings in a particular format that the frontend knows how to render to
  support hierarchical refinement by tag.
  """
+ result = {}
+
  # Note that we could improve performance for indexing many components from the same library/course,
  # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
  # tags for each component separately.
  all_tags = tagging_api.get_object_tags(str(object_id)).all()
- if not all_tags:
- # Clear out tags in the index when unselecting all tags for the block, otherwise
- # it would remain the last value if a cleared Fields.tags field is not included
- return {Fields.tags: {}}
- result = {
- Fields.tags_taxonomy: [],
- Fields.tags_level0: [],
- # ... other levels added as needed
- }
  for obj_tag in all_tags:
  # Add the taxonomy name:
+ if Fields.tags_taxonomy not in result:
+ result[Fields.tags_taxonomy] = []
  if obj_tag.taxonomy.name not in result[Fields.tags_taxonomy]:
  result[Fields.tags_taxonomy].append(obj_tag.taxonomy.name)
  # Taxonomy name plus each level of tags, in a list: # e.g. ["Location", "North America", "Canada", "Vancouver"]
@@ -220,6 +219,20 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
  if len(parts) == level + 2:
  break # We have all the levels for this tag now (e.g. parts=["Difficulty", "Hard"] -> need level0 only)
 
+ # Gather the collections associated with this object
+ collections = []
+ try:
+ component = lib_api.get_component_from_usage_key(object_id)
+ collections = authoring_api.get_entity_collections(
+ component.learning_package_id,
+ component.key,
+ ).values_list("key", flat=True)
+ except ObjectDoesNotExist:
+ log.warning(f"No component found for {object_id}")
+
+ if collections:
+ result[Fields.tags_collections] = list(collections)
+
  return {Fields.tags: result}
 
 

diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py
@@ -6,7 +6,14 @@
 
 from django.db.models.signals import post_delete
 from django.dispatch import receiver
-from openedx_events.content_authoring.data import ContentLibraryData, ContentObjectData, LibraryBlockData, XBlockData
+from openedx_events.content_authoring.data import (
+ ContentLibraryData,
+ ContentObjectChangedData,
+ LibraryBlockData,
+ XBlockData,
+)
+from opaque_keys import InvalidKeyError
+from opaque_keys.edx.keys import UsageKey
 from openedx_events.content_authoring.signals import (
  CONTENT_LIBRARY_DELETED,
  CONTENT_LIBRARY_UPDATED,
@@ -16,9 +23,8 @@
  XBLOCK_CREATED,
  XBLOCK_DELETED,
  XBLOCK_UPDATED,
- CONTENT_OBJECT_TAGS_CHANGED,
+ CONTENT_OBJECT_ASSOCIATIONS_CHANGED,
 )
-from openedx.core.djangoapps.content_tagging.utils import get_content_key_from_string
 
 from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
 from openedx.core.djangoapps.content.search.models import SearchAccess
@@ -139,22 +145,22 @@ def content_library_updated_handler(**kwargs) -> None:
  update_content_library_index_docs.delay(str(content_library_data.library_key))
 
 
-@receiver(CONTENT_OBJECT_TAGS_CHANGED)
+@receiver(CONTENT_OBJECT_ASSOCIATIONS_CHANGED)
 @only_if_meilisearch_enabled
-def content_object_tags_changed_handler(**kwargs) -> None:
+def content_object_associations_changed_handler(**kwargs) -> None:
  """
- Update the tags data in the index for the Content Object
+ Update the collections/tags data in the index for the Content Object
  """
- content_object_tags = kwargs.get("content_object", None)
- if not content_object_tags or not isinstance(content_object_tags, ContentObjectData):
+ content_object = kwargs.get("content_object", None)
+ if not content_object or not isinstance(content_object, ContentObjectChangedData):
  log.error("Received null or incorrect data for event")
  return
 
  try:
  # Check if valid if course or library block
- get_content_key_from_string(content_object_tags.object_id)
- except ValueError:
+ usage_key = UsageKey.from_string(str(content_object.object_id))
+ except InvalidKeyError:
  log.error("Received invalid content object id")
  return
 
- upsert_block_tags_index_docs(content_object_tags.object_id)
+ upsert_block_tags_index_docs(usage_key)
diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py
@@ -175,7 +175,7 @@ def setUp(self):
  tagging_api.add_tag_to_taxonomy(self.taxonomyB, "three")
  tagging_api.add_tag_to_taxonomy(self.taxonomyB, "four")
 
- # Create a collection:
+ # Create collections:
  self.learning_package = authoring_api.get_learning_package_by_key(self.library.key)
  with freeze_time(created_date):
  self.collection = authoring_api.create_collection(
@@ -379,11 +379,37 @@ def test_index_library_block_tags(self, mock_meilisearch):
  """
  Test indexing an Library Block with tags.
  """
+ collection1 = authoring_api.create_collection(
+ learning_package_id=self.library.learning_package.id,
+ key="COL1",
+ title="Collection 1",
+ created_by=None,
+ description="First Collection",
+ )
+
+ collection2 = authoring_api.create_collection(
+ learning_package_id=self.library.learning_package.id,
+ key="COL2",
+ title="Collection 2",
+ created_by=None,
+ description="Second Collection",
+ )
 
  # Tag XBlock (these internally call `upsert_block_tags_index_docs`)
  tagging_api.tag_object(str(self.problem1.usage_key), self.taxonomyA, ["one", "two"])
  tagging_api.tag_object(str(self.problem1.usage_key), self.taxonomyB, ["three", "four"])
 
+ # Add Problem1 to both Collections (these internally call `upsert_block_tags_index_docs`)
+ # (adding in reverse order to test sorting of collection tag))
+ for collection in (collection2, collection1):
+ library_api.update_library_collection_components(
+ self.library.key,
+ collection_key=collection.key,
+ usage_keys=[
+ self.problem1.usage_key,
+ ],
+ )
+
  # Build expected docs with tags at each stage
  doc_problem_with_tags1 = {
  "id": self.doc_problem1["id"],
@@ -399,11 +425,29 @@ def test_index_library_block_tags(self, mock_meilisearch):
  'level0': ['A > one', 'A > two', 'B > four', 'B > three']
  }
  }
+ doc_problem_with_collection2 = {
+ "id": self.doc_problem1["id"],
+ "tags": {
+ 'collections': [collection2.key],
+ 'taxonomy': ['A', 'B'],
+ 'level0': ['A > one', 'A > two', 'B > four', 'B > three']
+ }
+ }
+ doc_problem_with_collection1 = {
+ "id": self.doc_problem1["id"],
+ "tags": {
+ 'collections': [collection1.key, collection2.key],
+ 'taxonomy': ['A', 'B'],
+ 'level0': ['A > one', 'A > two', 'B > four', 'B > three']
+ }
+ }
 
  mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
  [
  call([doc_problem_with_tags1]),
  call([doc_problem_with_tags2]),
+ call([doc_problem_with_collection2]),
+ call([doc_problem_with_collection1]),
  ],
  any_order=True,
  )

diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py
@@ -8,19 +8,26 @@
 from openedx_learning.api import authoring as authoring_api
 
 from openedx.core.djangoapps.content_tagging import api as tagging_api
+from openedx.core.djangoapps.content_libraries import api as library_api
 from openedx.core.djangolib.testing.utils import skip_unless_cms
 from xmodule.modulestore.django import modulestore
 from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
 from xmodule.modulestore.tests.factories import BlockFactory, ToyCourseFactory
 
 try:
  # This import errors in the lms because content.search is not an installed app there.
- from ..documents import searchable_doc_for_course_block, searchable_doc_tags, searchable_doc_for_collection
+ from ..documents import (
+ searchable_doc_for_course_block,
+ searchable_doc_tags,
+ searchable_doc_for_collection,
+ searchable_doc_for_library_block,
+ )
  from ..models import SearchAccess
 except RuntimeError:
  searchable_doc_for_course_block = lambda x: x
  searchable_doc_tags = lambda x: x
  searchable_doc_for_collection = lambda x: x
+ searchable_doc_for_library_block = lambda x: x
  SearchAccess = {}
 
 
@@ -38,21 +45,52 @@ class StudioDocumentsTest(SharedModuleStoreTestCase):
  def setUpClass(cls):
  super().setUpClass()
  cls.store = modulestore()
+ cls.org = Organization.objects.create(name="edX", short_name="edX")
  cls.toy_course = ToyCourseFactory.create() # See xmodule/modulestore/tests/sample_courses.py
  cls.toy_course_key = cls.toy_course.id
 
  # Get references to some blocks in the toy course
  cls.html_block_key = cls.toy_course_key.make_usage_key("html", "toyjumpto")
- # Create a problem in library
+ # Create a problem in course
  cls.problem_block = BlockFactory.create(
  category="problem",
  parent_location=cls.toy_course_key.make_usage_key("vertical", "vertical_test"),
  display_name='Test Problem',
  data="<problem>What is a test?<multiplechoiceresponse></multiplechoiceresponse></problem>",
  )
 
+ # Create a library and collection with a block
+ created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
+ with freeze_time(created_date):
+ cls.library = library_api.create_library(
+ org=cls.org,
+ slug="2012_Fall",
+ title="some content_library",
+ description="some description",
+ )
+ cls.collection = library_api.create_library_collection(
+ cls.library.key,
+ collection_key="TOY_COLLECTION",
+ title="Toy Collection",
+ created_by=None,
+ description="my toy collection description"
+ )
+ cls.library_block = library_api.create_library_block(
+ cls.library.key,
+ "html",
+ "text2",
+ )
+
+ # Add the problem block to the collection
+ library_api.update_library_collection_components(
+ cls.library.key,
+ collection_key="TOY_COLLECTION",
+ usage_keys=[
+ cls.library_block.usage_key,
+ ]
+ )
+
  # Create a couple taxonomies and some tags
- cls.org = Organization.objects.create(name="edX", short_name="edX")
  cls.difficulty_tags = tagging_api.create_taxonomy(name="Difficulty", orgs=[cls.org], allow_multiple=False)
  tagging_api.add_tag_to_taxonomy(cls.difficulty_tags, tag="Easy")
  tagging_api.add_tag_to_taxonomy(cls.difficulty_tags, tag="Normal")
@@ -69,6 +107,7 @@ def setUpClass(cls):
  tagging_api.tag_object(str(cls.problem_block.usage_key), cls.difficulty_tags, tags=["Easy"])
  tagging_api.tag_object(str(cls.html_block_key), cls.subject_tags, tags=["Chinese", "Jump Links"])
  tagging_api.tag_object(str(cls.html_block_key), cls.difficulty_tags, tags=["Normal"])
+ tagging_api.tag_object(str(cls.library_block.usage_key), cls.difficulty_tags, tags=["Normal"])
 
  @property
  def toy_course_access_id(self):
@@ -80,6 +119,16 @@ def toy_course_access_id(self):
  """
  return SearchAccess.objects.get(context_key=self.toy_course_key).id
 
+ @property
+ def library_access_id(self):
+ """
+ Returns the SearchAccess.id created for the library.
+
+ This SearchAccess object is created when documents are added to the search index, so this method must be called
+ after this step, or risk a DoesNotExist error.
+ """
+ return SearchAccess.objects.get(context_key=self.library.key).id
+
  def test_problem_block(self):
  """
  Test how a problem block gets represented in the search index
@@ -205,6 +254,56 @@ def test_video_block_untagged(self):
  # This video has no tags.
  }
 
+ def test_html_library_block(self):
+ """
+ Test how a library block gets represented in the search index
+ """
+ doc = {}
+ doc.update(searchable_doc_for_library_block(self.library_block))
+ doc.update(searchable_doc_tags(self.library_block.usage_key))
+ assert doc == {
+ "id": "lbedx2012_fallhtmltext2-4bb47d67",
+ "type": "library_block",
+ "block_type": "html",
+ "usage_key": "lb:edX:2012_Fall:html:text2",
+ "block_id": "text2",
+ "context_key": "lib:edX:2012_Fall",
+ "org": "edX",
+ "access_id": self.library_access_id,
+ "display_name": "Text",
+ "breadcrumbs": [
+ {
+ "display_name": "some content_library",
+ },
+ ],
+ "last_published": None,
+ "created": 1680674828.0,
+ "modified": 1680674828.0,
+ "content": {
+ "html_content": "",
+ },
+ "tags": {
+ "collections": ["TOY_COLLECTION"],
+ "taxonomy": ["Difficulty"],
+ "level0": ["Difficulty > Normal"],
+ },
+ }
+
+ def test_collection_with_library(self):
+ doc = searchable_doc_for_collection(self.collection)
+ assert doc == {
+ "id": "TOY_COLLECTION",
+ "type": "collection",
+ "org": "edX",
+ "display_name": "Toy Collection",
+ "description": "my toy collection description",
+ "context_key": "lib:edX:2012_Fall",
+ "access_id": self.library_access_id,
+ "breadcrumbs": [{"display_name": "some content_library"}],
+ "created": 1680674828.0,
+ "modified": 1680674828.0,
+ }
+
  def test_collection_with_no_library(self):
  created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
  with freeze_time(created_date):