From e66ea16c95e1336fabe017b36ef4ec430ec935ca Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Mon, 25 Nov 2024 17:37:38 +0100 Subject: [PATCH 1/4] Make sure create_repo respect organization privacy settings --- src/huggingface_hub/_commit_scheduler.py | 5 +++-- src/huggingface_hub/_tensorboard_logger.py | 6 +++--- src/huggingface_hub/_upload_large_folder.py | 2 +- src/huggingface_hub/fastai_utils.py | 5 +++-- src/huggingface_hub/hf_api.py | 16 ++++++++++------ src/huggingface_hub/hub_mixin.py | 5 +++-- src/huggingface_hub/keras_mixin.py | 5 +++-- tests/test_hf_api.py | 20 +++++++++++++++++++- tests/testing_constants.py | 5 +++++ 9 files changed, 50 insertions(+), 19 deletions(-) diff --git a/src/huggingface_hub/_commit_scheduler.py b/src/huggingface_hub/_commit_scheduler.py index ba0b63afc7..2c1409f003 100644 --- a/src/huggingface_hub/_commit_scheduler.py +++ b/src/huggingface_hub/_commit_scheduler.py @@ -50,7 +50,8 @@ class CommitScheduler: revision (`str`, *optional*): The revision of the repo to commit to. Defaults to `main`. private (`bool`, *optional*): - Whether to make the repo private. Defaults to `False`. This value is ignored if the repo already exist. + Whether to make the repo private. If `None` (default), will default to been public except if + the organization's default is private. This value is ignored if the repo already exist. token (`str`, *optional*): The token to use to commit to the repo. Defaults to the token saved on the machine. allow_patterns (`List[str]` or `str`, *optional*): @@ -106,7 +107,7 @@ def __init__( path_in_repo: Optional[str] = None, repo_type: Optional[str] = None, revision: Optional[str] = None, - private: bool = False, + private: Optional[bool] = None, token: Optional[str] = None, allow_patterns: Optional[Union[List[str], str]] = None, ignore_patterns: Optional[Union[List[str], str]] = None, diff --git a/src/huggingface_hub/_tensorboard_logger.py b/src/huggingface_hub/_tensorboard_logger.py index 792dc7c4df..d7526f33ca 100644 --- a/src/huggingface_hub/_tensorboard_logger.py +++ b/src/huggingface_hub/_tensorboard_logger.py @@ -75,8 +75,8 @@ class HFSummaryWriter(SummaryWriter): repo_revision (`str`, *optional*): The revision of the repo to which the logs will be pushed. Defaults to "main". repo_private (`bool`, *optional*): - Whether to create a private repo or not. Defaults to False. This argument is ignored if the repo already - exists. + Whether to create a private repo or not. If `None` (default), will default to been public except if + the organization's default is private. This argument is ignored if the repo already exists. path_in_repo (`str`, *optional*): The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/". repo_allow_patterns (`List[str]` or `str`, *optional*): @@ -137,7 +137,7 @@ def __init__( squash_history: bool = False, repo_type: Optional[str] = None, repo_revision: Optional[str] = None, - repo_private: bool = False, + repo_private: Optional[bool] = None, path_in_repo: Optional[str] = "tensorboard", repo_allow_patterns: Optional[Union[List[str], str]] = "*.tfevents.*", repo_ignore_patterns: Optional[Union[List[str], str]] = None, diff --git a/src/huggingface_hub/_upload_large_folder.py b/src/huggingface_hub/_upload_large_folder.py index bb336de172..a736e7562a 100644 --- a/src/huggingface_hub/_upload_large_folder.py +++ b/src/huggingface_hub/_upload_large_folder.py @@ -52,7 +52,7 @@ def upload_large_folder_internal( *, repo_type: str, # Repo type is required! revision: Optional[str] = None, - private: bool = False, + private: Optional[bool] = None, allow_patterns: Optional[Union[List[str], str]] = None, ignore_patterns: Optional[Union[List[str], str]] = None, num_workers: Optional[int] = None, diff --git a/src/huggingface_hub/fastai_utils.py b/src/huggingface_hub/fastai_utils.py index 3a9bf25f44..e75eba2a8b 100644 --- a/src/huggingface_hub/fastai_utils.py +++ b/src/huggingface_hub/fastai_utils.py @@ -345,7 +345,7 @@ def push_to_hub_fastai( *, repo_id: str, commit_message: str = "Push FastAI model using huggingface_hub.", - private: bool = False, + private: Optional[bool] = None, token: Optional[str] = None, config: Optional[dict] = None, branch: Optional[str] = None, @@ -369,8 +369,9 @@ def push_to_hub_fastai( The repository id for your model in Hub in the format of "namespace/repo_name". The namespace can be your individual account or an organization to which you have write access (for example, 'stanfordnlp/stanza-de'). commit_message (`str`, *optional*): Message to commit while pushing. Will default to :obj:`"add model"`. - private (`bool`, *optional*, defaults to `False`): + private (`bool`, *optional*): Whether or not the repository created should be private. + If `None` (default), will default to been public except if the organization's default is private. token (`str`, *optional*): The Hugging Face account token to use as HTTP bearer authorization for remote files. If :obj:`None`, the token will be asked by a prompt. config (`dict`, *optional*): diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 963c8fdf88..c685356627 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -3439,7 +3439,7 @@ def create_repo( repo_id: str, *, token: Union[str, bool, None] = None, - private: bool = False, + private: Optional[bool] = None, repo_type: Optional[str] = None, exist_ok: bool = False, resource_group_id: Optional[str] = None, @@ -3461,8 +3461,9 @@ def create_repo( token, which is the recommended method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). To disable authentication, pass `False`. - private (`bool`, *optional*, defaults to `False`): - Whether the model repo should be private. + private (`bool`, *optional*): + Whether the model repo should be private. If `None` (default), will default to been public except if + the organization's default is private. repo_type (`str`, *optional*): Set to `"dataset"` or `"space"` if uploading to a dataset or space, `None` or `"model"` if uploading to a model. Default is @@ -3503,7 +3504,9 @@ def create_repo( if repo_type not in constants.REPO_TYPES: raise ValueError("Invalid repo type") - json: Dict[str, Any] = {"name": name, "organization": organization, "private": private} + json: Dict[str, Any] = {"name": name, "organization": organization} + if private is not None: + json["private"] = private if repo_type is not None: json["type"] = repo_type if repo_type == "space": @@ -5017,7 +5020,7 @@ def upload_large_folder( *, repo_type: str, # Repo type is required! revision: Optional[str] = None, - private: bool = False, + private: Optional[bool] = None, allow_patterns: Optional[Union[List[str], str]] = None, ignore_patterns: Optional[Union[List[str], str]] = None, num_workers: Optional[int] = None, @@ -5045,7 +5048,8 @@ def upload_large_folder( revision (`str`, `optional`): The branch to commit to. If not provided, the `main` branch will be used. private (`bool`, `optional`): - Whether the repository should be private. Defaults to False. + Whether the repository should be private. + If `None` (default), will default to been public except if the organization's default is private. allow_patterns (`List[str]` or `str`, *optional*): If provided, only files matching at least one pattern are uploaded. ignore_patterns (`List[str]` or `str`, *optional*): diff --git a/src/huggingface_hub/hub_mixin.py b/src/huggingface_hub/hub_mixin.py index b23ef2ae41..f1277f9fbc 100644 --- a/src/huggingface_hub/hub_mixin.py +++ b/src/huggingface_hub/hub_mixin.py @@ -620,7 +620,7 @@ def push_to_hub( *, config: Optional[Union[dict, "DataclassInstance"]] = None, commit_message: str = "Push model using huggingface_hub.", - private: bool = False, + private: Optional[bool] = None, token: Optional[str] = None, branch: Optional[str] = None, create_pr: Optional[bool] = None, @@ -643,8 +643,9 @@ def push_to_hub( Model configuration specified as a key/value dictionary or a dataclass instance. commit_message (`str`, *optional*): Message to commit while pushing. - private (`bool`, *optional*, defaults to `False`): + private (`bool`, *optional*): Whether the repository created should be private. + If `None` (default), will default to been public except if the organization's default is private. token (`str`, *optional*): The token to use as HTTP bearer authorization for remote files. By default, it will use the token cached when running `huggingface-cli login`. diff --git a/src/huggingface_hub/keras_mixin.py b/src/huggingface_hub/keras_mixin.py index f5d9edf37a..b2a2128e66 100644 --- a/src/huggingface_hub/keras_mixin.py +++ b/src/huggingface_hub/keras_mixin.py @@ -301,7 +301,7 @@ def push_to_hub_keras( *, config: Optional[dict] = None, commit_message: str = "Push Keras model using huggingface_hub.", - private: bool = False, + private: Optional[bool] = None, api_endpoint: Optional[str] = None, token: Optional[str] = None, branch: Optional[str] = None, @@ -330,8 +330,9 @@ def push_to_hub_keras( ID of the repository to push to (example: `"username/my-model"`). commit_message (`str`, *optional*, defaults to "Add Keras model"): Message to commit while pushing. - private (`bool`, *optional*, defaults to `False`): + private (`bool`, *optional*): Whether the repository created should be private. + If `None` (default), will default to been public except if the organization's default is private. api_endpoint (`str`, *optional*): The API endpoint to use when pushing the model to the hub. token (`str`, *optional*): diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index 158a32ca30..58550ba20b 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -84,7 +84,16 @@ ) from huggingface_hub.utils.endpoint_helpers import _is_emission_within_threshold -from .testing_constants import ENDPOINT_STAGING, FULL_NAME, OTHER_TOKEN, OTHER_USER, TOKEN, USER +from .testing_constants import ( + ENDPOINT_STAGING, + ENTERPRISE_ORG, + ENTERPRISE_TOKEN, + FULL_NAME, + OTHER_TOKEN, + OTHER_USER, + TOKEN, + USER, +) from .testing_utils import ( DUMMY_DATASET_ID, DUMMY_DATASET_ID_REVISION_ONE_SPECIFIC_COMMIT, @@ -455,6 +464,15 @@ def test_create_repo_already_exists_but_no_write_permission(self): # Clean up self._api.delete_repo(repo_id=repo_id, token=OTHER_TOKEN) + def test_create_repo_private_by_default(self): + """Enterprise Hub allows creating private repos by default. Let's test that.""" + repo_id = f"{ENTERPRISE_ORG}/{repo_name()}" + self._api.create_repo(repo_id, token=ENTERPRISE_TOKEN) + info = self._api.model_info(repo_id, token=ENTERPRISE_TOKEN, expand="private") + assert info.private + + self._api.delete_repo(repo_id, token=ENTERPRISE_TOKEN) + @use_tmp_repo() def test_upload_file_create_pr(self, repo_url: RepoUrl) -> None: repo_id = repo_url.repo_id diff --git a/tests/testing_constants.py b/tests/testing_constants.py index 80a566b58e..04ef7d6768 100644 --- a/tests/testing_constants.py +++ b/tests/testing_constants.py @@ -10,6 +10,11 @@ OTHER_USER = "DVUser" OTHER_TOKEN = "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" +# Used to test enterprise features, typically creating private repos by default +ENTERPRISE_USER = "DVEnterpriseUser" +ENTERPRISE_ORG = "DVEnterpriseOrg" +ENTERPRISE_TOKEN = "hf_enterprise_user_token" + ENDPOINT_PRODUCTION = "https://huggingface.co" ENDPOINT_STAGING = "https://hub-ci.huggingface.co" From eeda6eb104ea00b649aaf50d1639a6b833f1bb4e Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Mon, 25 Nov 2024 17:57:28 +0100 Subject: [PATCH 2/4] update creds --- tests/testing_constants.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testing_constants.py b/tests/testing_constants.py index 04ef7d6768..ffab056415 100644 --- a/tests/testing_constants.py +++ b/tests/testing_constants.py @@ -11,9 +11,9 @@ OTHER_TOKEN = "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" # Used to test enterprise features, typically creating private repos by default -ENTERPRISE_USER = "DVEnterpriseUser" -ENTERPRISE_ORG = "DVEnterpriseOrg" -ENTERPRISE_TOKEN = "hf_enterprise_user_token" +ENTERPRISE_USER = "EnterpriseAdmin" +ENTERPRISE_ORG = "EnterpriseOrgPrivate" +ENTERPRISE_TOKEN = "hf_enterprise_admin_token" ENDPOINT_PRODUCTION = "https://huggingface.co" ENDPOINT_STAGING = "https://hub-ci.huggingface.co" From 53329b4d3d333b73899b713882aa43139087f051 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 27 Nov 2024 11:43:05 +0100 Subject: [PATCH 3/4] fix mocked tests --- tests/test_hf_api.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index 58550ba20b..f3f299655f 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -3411,7 +3411,6 @@ def test_create_space_with_hardware(self) -> None: json={ "name": self.repo_id, "organization": None, - "private": False, "type": "space", "sdk": "gradio", "hardware": "t4-medium", @@ -3432,7 +3431,6 @@ def test_create_space_with_hardware_and_sleep_time(self) -> None: json={ "name": self.repo_id, "organization": None, - "private": False, "type": "space", "sdk": "gradio", "hardware": "t4-medium", @@ -3453,7 +3451,6 @@ def test_create_space_with_storage(self) -> None: json={ "name": self.repo_id, "organization": None, - "private": False, "type": "space", "sdk": "gradio", "storageTier": "large", @@ -3480,7 +3477,6 @@ def test_create_space_with_secrets_and_variables(self) -> None: json={ "name": self.repo_id, "organization": None, - "private": False, "type": "space", "sdk": "gradio", "secrets": [ From c3ee1da5ae9159f4b9b66ce8780cc05ce4259a1a Mon Sep 17 00:00:00 2001 From: Lucain Date: Wed, 27 Nov 2024 12:13:29 +0100 Subject: [PATCH 4/4] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: CĂ©lina --- src/huggingface_hub/_commit_scheduler.py | 3 +-- src/huggingface_hub/_tensorboard_logger.py | 3 +-- src/huggingface_hub/hf_api.py | 5 ++--- src/huggingface_hub/hub_mixin.py | 2 +- src/huggingface_hub/keras_mixin.py | 2 +- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/huggingface_hub/_commit_scheduler.py b/src/huggingface_hub/_commit_scheduler.py index 2c1409f003..f1f20339e7 100644 --- a/src/huggingface_hub/_commit_scheduler.py +++ b/src/huggingface_hub/_commit_scheduler.py @@ -50,8 +50,7 @@ class CommitScheduler: revision (`str`, *optional*): The revision of the repo to commit to. Defaults to `main`. private (`bool`, *optional*): - Whether to make the repo private. If `None` (default), will default to been public except if - the organization's default is private. This value is ignored if the repo already exist. + Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. token (`str`, *optional*): The token to use to commit to the repo. Defaults to the token saved on the machine. allow_patterns (`List[str]` or `str`, *optional*): diff --git a/src/huggingface_hub/_tensorboard_logger.py b/src/huggingface_hub/_tensorboard_logger.py index d7526f33ca..5e91097246 100644 --- a/src/huggingface_hub/_tensorboard_logger.py +++ b/src/huggingface_hub/_tensorboard_logger.py @@ -75,8 +75,7 @@ class HFSummaryWriter(SummaryWriter): repo_revision (`str`, *optional*): The revision of the repo to which the logs will be pushed. Defaults to "main". repo_private (`bool`, *optional*): - Whether to create a private repo or not. If `None` (default), will default to been public except if - the organization's default is private. This argument is ignored if the repo already exists. + Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. path_in_repo (`str`, *optional*): The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/". repo_allow_patterns (`List[str]` or `str`, *optional*): diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index c685356627..3c1bd6a5d5 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -3462,8 +3462,7 @@ def create_repo( https://huggingface.co/docs/huggingface_hub/quick-start#authentication). To disable authentication, pass `False`. private (`bool`, *optional*): - Whether the model repo should be private. If `None` (default), will default to been public except if - the organization's default is private. + Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. repo_type (`str`, *optional*): Set to `"dataset"` or `"space"` if uploading to a dataset or space, `None` or `"model"` if uploading to a model. Default is @@ -5049,7 +5048,7 @@ def upload_large_folder( The branch to commit to. If not provided, the `main` branch will be used. private (`bool`, `optional`): Whether the repository should be private. - If `None` (default), will default to been public except if the organization's default is private. + If `None` (default), the repo will be public unless the organization's default is private. allow_patterns (`List[str]` or `str`, *optional*): If provided, only files matching at least one pattern are uploaded. ignore_patterns (`List[str]` or `str`, *optional*): diff --git a/src/huggingface_hub/hub_mixin.py b/src/huggingface_hub/hub_mixin.py index f1277f9fbc..cc352ff799 100644 --- a/src/huggingface_hub/hub_mixin.py +++ b/src/huggingface_hub/hub_mixin.py @@ -645,7 +645,7 @@ def push_to_hub( Message to commit while pushing. private (`bool`, *optional*): Whether the repository created should be private. - If `None` (default), will default to been public except if the organization's default is private. + If `None` (default), the repo will be public unless the organization's default is private. token (`str`, *optional*): The token to use as HTTP bearer authorization for remote files. By default, it will use the token cached when running `huggingface-cli login`. diff --git a/src/huggingface_hub/keras_mixin.py b/src/huggingface_hub/keras_mixin.py index b2a2128e66..e1c7ad503e 100644 --- a/src/huggingface_hub/keras_mixin.py +++ b/src/huggingface_hub/keras_mixin.py @@ -332,7 +332,7 @@ def push_to_hub_keras( Message to commit while pushing. private (`bool`, *optional*): Whether the repository created should be private. - If `None` (default), will default to been public except if the organization's default is private. + If `None` (default), the repo will be public unless the organization's default is private. api_endpoint (`str`, *optional*): The API endpoint to use when pushing the model to the hub. token (`str`, *optional*):