Skip to content

Commit

Permalink
Group migration: additional logging (#2239)
Browse files Browse the repository at this point in the history
## Changes

This PR contains logging improvements in the group manager to help
understand (potential) causes of problems during group migration.

### Linked issues

Relates #2227.

### Functionality

- modified existing workflow: `group-migration`
  • Loading branch information
asnare authored Jul 24, 2024
1 parent 643bac6 commit 3f25ca5
Showing 1 changed file with 32 additions and 9 deletions.
41 changes: 32 additions & 9 deletions src/databricks/labs/ucx/workspace_access/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ def rename_groups(self):
workspace_groups_in_workspace = self._workspace_groups_in_workspace()
groups_to_migrate = self.get_migration_state().groups

logger.info(f"Starting to rename {len(groups_to_migrate)} groups for migration...")
for migrated_group in groups_to_migrate:
if migrated_group.name_in_account in account_groups_in_workspace:
logger.info(f"Skipping {migrated_group.name_in_account}: already in workspace")
Expand Down Expand Up @@ -472,13 +473,15 @@ def _rename_group(self, group_id: str, new_group_name: str) -> None:
def _wait_for_rename(self, group_id: str, old_group_name: str, new_group_name: str) -> None:
group = self._ws.groups.get(group_id)
if group.display_name == old_group_name:
# Rename still pending.
logger.debug(
f"Group {group_id} still has old name; still waiting for rename to take effect: {old_group_name} -> {new_group_name}"
)
raise GroupRenameIncompleteError(group_id, old_group_name, new_group_name)
if group.display_name != new_group_name:
# Group has an entirely unexpected name; something else is interfering.
msg = f"While waiting for group {group_id} rename ({old_group_name} -> {new_group_name} an unexpected name was observed: {group.display_name}"
msg = f"While waiting for group {group_id} rename ({old_group_name} -> {new_group_name}) an unexpected name was observed: {group.display_name}"
raise RuntimeError(msg)
# Normal exit; group has been renamed.
logger.debug(f"Group {group_id} rename has taken effect: {old_group_name} -> {new_group_name}")

@retried(on=[ManyError], timeout=timedelta(minutes=2))
def _wait_for_renamed_groups(self, expected_groups: Collection[tuple[str, str]]) -> None:
Expand All @@ -492,9 +495,15 @@ def _wait_for_renamed_groups(self, expected_groups: Collection[tuple[str, str]])
for group_id, expected_name in expected_groups:
found_name = found_groups.get(group_id, None)
if found_name is None:
pending_renames.append(RuntimeError(f"Missing group with id: {group_id} (renamed to {expected_name}"))
logger.warning(f"Group enumeration omits renamed group: {group_id} (renamed to {expected_name})")
pending_renames.append(RuntimeError(f"Missing group with id: {group_id} (renamed to {expected_name})"))
elif found_name != expected_name:
logger.debug(
f"Group enumeration does not yet reflect rename: {group_id} (renamed to {expected_name} but currently {found_name})"
)
pending_renames.append(GroupRenameIncompleteError(group_id, found_name, expected_name))
else:
logger.debug(f"Group enumeration reflects renamed group: {group_id} (renamed to {expected_name})")
if pending_renames:
raise ManyError(pending_renames)

Expand All @@ -503,6 +512,7 @@ def reflect_account_groups_on_workspace(self):
account_groups_in_account = self._account_groups_in_account()
account_groups_in_workspace = self._account_groups_in_workspace()
groups_to_migrate = self.get_migration_state().groups
logger.info(f"Starting to reflect {len(groups_to_migrate)} account groups into workspace for migration...")
for migrated_group in groups_to_migrate:
if migrated_group.name_in_account in account_groups_in_workspace:
logger.info(f"Skipping {migrated_group.name_in_account}: already in workspace")
Expand All @@ -523,7 +533,9 @@ def delete_original_workspace_groups(self):
tasks = []
workspace_groups_in_workspace = self._workspace_groups_in_workspace()
account_groups_in_workspace = self._account_groups_in_workspace()
for migrated_group in self.snapshot():
migrated_groups = self.snapshot()
logger.info(f"Starting to remove {len(migrated_groups)} migrated workspace groups...")
for migrated_group in migrated_groups:
if migrated_group.temporary_name not in workspace_groups_in_workspace:
logger.info(f"Skipping {migrated_group.name_in_workspace}: no longer in workspace")
continue
Expand Down Expand Up @@ -570,19 +582,25 @@ def validate_group_membership(self) -> list[dict]:
workspace_groups_in_workspace = self._workspace_groups_in_workspace()
account_groups_in_account = self._account_groups_in_account()
strategy = self._get_strategy(workspace_groups_in_workspace, account_groups_in_account)
migrated_groups = strategy.generate_migrated_groups()
migrated_groups = list(strategy.generate_migrated_groups())
mismatch_group = []
retry_on_internal_error = retried(on=[InternalError], timeout=self._verify_timeout)
get_account_group = retry_on_internal_error(self._get_account_group)
logger.info(f"Starting to validate {len(migrated_groups)} migrated workspace groups...")
for ws_group in migrated_groups:
# Users with the same display name but different email will be deduplicated!
ws_members_set = {m.get("display") for m in json.loads(ws_group.members)} if ws_group.members else set()
acc_group = get_account_group(account_groups_in_account[ws_group.name_in_account].id)
acc_group_id = account_groups_in_account[ws_group.name_in_account].id
acc_group = get_account_group(acc_group_id)
if not acc_group:
logger.debug(
f"Skipping validation; account group no longer present: {ws_group.name_in_account} (id={acc_group_id})"
)
continue # group not present anymore
acc_members_set = {a.as_dict().get("display") for a in acc_group.members} if acc_group.members else set()
set_diff = (ws_members_set - acc_members_set).union(acc_members_set - ws_members_set)
if not set_diff:
logger.debug(f"Validated group, no differences found: {ws_group.name_in_account} (id={acc_group_id})")
continue
mismatch_group.append(
{
Expand All @@ -594,9 +612,11 @@ def validate_group_membership(self) -> list[dict]:
}
)
if not mismatch_group:
logger.info("There are no groups with different membership between account and workspace")
logger.info("There are no groups with different membership between account and workspace.")
else:
logger.info("There are groups with different membership between account and workspace")
logger.info(
f"There are {len(mismatch_group)} (of {len(migrated_groups)}) groups with different membership between account and workspace."
)
return mismatch_group

def has_workspace_group(self, name):
Expand All @@ -612,6 +632,7 @@ def _workspace_groups_in_workspace(self) -> dict[str, Group]:
groups = {}
for group in self._list_workspace_groups("WorkspaceGroup", attributes):
if not group.display_name:
logger.debug(f"Ignoring workspace group without name: {group.id}")
continue
groups[group.display_name] = group
return groups
Expand All @@ -620,6 +641,7 @@ def _account_groups_in_workspace(self) -> dict[str, Group]:
groups = {}
for group in self._list_workspace_groups("Group", "id,displayName,externalId,meta"):
if not group.display_name:
logger.debug(f"Ignoring account group in workspace without name: {group.id}")
continue
groups[group.display_name] = group
return groups
Expand All @@ -628,6 +650,7 @@ def _account_groups_in_account(self) -> dict[str, Group]:
groups = {}
for group in self._list_account_groups("id,displayName,externalId"):
if not group.display_name:
logger.debug(f"Ignoring account group in without name: {group.id}")
continue
groups[group.display_name] = group
return groups
Expand Down

0 comments on commit 3f25ca5

Please sign in to comment.