diff --git a/.github/workflows/ir_repos.yml b/.github/workflows/ir_repos.yml
new file mode 100644
index 0000000000..c3438df2bc
--- /dev/null
+++ b/.github/workflows/ir_repos.yml
@@ -0,0 +1,116 @@
+#file: noinspection YAMLSchemaValidation
+name: Irregular Repos
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: '0 * * * 0'
+
+jobs:
+  check_irregular_repo:
+    name: Check Repos
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - 'ubuntu-latest'
+        python-version:
+          - '3.8'
+
+    steps:
+      - name: Get system version for Linux
+        if: ${{ contains(matrix.os, 'ubuntu') }}
+        shell: bash
+        run: |
+          echo "OS_NAME=Linux" >> $GITHUB_ENV
+          echo "IS_WIN=" >> $GITHUB_ENV
+          echo "IS_MAC=" >> $GITHUB_ENV
+      - name: Get system version for Windows
+        if: ${{ contains(matrix.os, 'windows') }}
+        shell: bash
+        run: |
+          echo "OS_NAME=Windows" >> $GITHUB_ENV
+          echo "IS_WIN=1" >> $GITHUB_ENV
+          echo "IS_MAC=" >> $GITHUB_ENV
+      - name: Get system version for MacOS
+        if: ${{ contains(matrix.os, 'macos') }}
+        shell: bash
+        run: |
+          echo "OS_NAME=MacOS" >> $GITHUB_ENV
+          echo "IS_WIN=" >> $GITHUB_ENV
+          echo "IS_MAC=1" >> $GITHUB_ENV
+      - name: Set environment for Cpython
+        if: ${{ !contains(matrix.python-version, 'pypy') }}
+        shell: bash
+        run: |
+          echo "IS_PYPY=" >> $GITHUB_ENV
+      - name: Set environment for PyPy
+        if: ${{ contains(matrix.python-version, 'pypy') }}
+        shell: bash
+        run: |
+          echo "IS_PYPY=1" >> $GITHUB_ENV
+      - name: Checkout code
+        uses: actions/checkout@v3.3.0
+        with:
+          fetch-depth: 20
+          submodules: 'recursive'
+      - name: Set up system dependences on Linux
+        if: ${{ env.OS_NAME == 'Linux' }}
+        shell: bash
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y tree cloc wget curl make zip
+          sudo apt-get install -y git-lfs
+          sudo apt-get install p7zip-full rar unrar
+      - name: Set up system dependences on Windows
+        if: ${{ env.OS_NAME == 'Windows' }}
+        shell: bash
+        run: |
+          choco install tree cloc wget curl make zip
+          choco install 7zip winrar  # unrar should be added
+      - name: Set up system dependences on MacOS
+        if: ${{ env.OS_NAME == 'MacOS' }}
+        run: |
+          brew install tree cloc wget curl make zip
+          brew install sevenzip
+          brew install --cask rar
+      - name: Set up python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          pip install --upgrade flake8 setuptools wheel twine
+          pip install -r requirements.txt
+          pip install -r requirements-test.txt
+      - name: Test the basic environment
+        shell: bash
+        run: |
+          python -V
+          pip --version
+          pip list
+          tree .
+          cloc hfutils
+          cloc test
+      - name: Run unittest
+        env:
+          CI: 'true'
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        shell: bash
+        run: |
+          python -m tools.irregular_repo
+      - name: Change Commit
+        id: commit
+        run: |
+          git config user.name 'narugo1992'
+          git config user.email 'narugo992@gmail.com'
+          git add -A
+          git diff-index --quiet HEAD || git commit -a -m "dev(narugo): auto sync irregular repositories $(date -R)"
+      - name: Push changes
+        uses: ad-m/github-push-action@master
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          branch: ${{ github.ref }}
diff --git a/.gitignore b/.gitignore
index e2e99e4fde..211e41fe16 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1223,4 +1223,5 @@ fabric.properties
 *.pt
 /runs
 /YOLOv8
-.benchmarks
\ No newline at end of file
+.benchmarks
+!/hfutils/utils/irregular_repo.json
\ No newline at end of file
diff --git a/docs/source/api_doc/utils/index.rst b/docs/source/api_doc/utils/index.rst
index c3f0bb5c97..ecb408601e 100644
--- a/docs/source/api_doc/utils/index.rst
+++ b/docs/source/api_doc/utils/index.rst
@@ -11,6 +11,7 @@ hfutils.utils
 
     binary
     download
+    path
     tqdm_
     walk
 
diff --git a/docs/source/api_doc/utils/path.rst b/docs/source/api_doc/utils/path.rst
new file mode 100644
index 0000000000..4418c54259
--- /dev/null
+++ b/docs/source/api_doc/utils/path.rst
@@ -0,0 +1,36 @@
+hfutils.utils.path
+=================================
+
+.. currentmodule:: hfutils.utils.path
+
+.. automodule:: hfutils.utils.path
+
+
+hf_normpath
+--------------------------------------------
+
+.. autofunction:: hf_normpath
+
+
+
+hf_fs_path
+--------------------------------------------
+
+.. autofunction:: hf_fs_path
+
+
+
+parse_hf_fs_path
+--------------------------------------------
+
+.. autofunction:: parse_hf_fs_path
+
+
+
+HfFileSystemPath
+--------------------------------------------
+
+.. autoclass:: HfFileSystemPath
+
+
+
diff --git a/hfutils/operate/base.py b/hfutils/operate/base.py
index bea4b6b7a8..85a9ae49c5 100644
--- a/hfutils/operate/base.py
+++ b/hfutils/operate/base.py
@@ -5,6 +5,8 @@
 
 from huggingface_hub import HfApi, HfFileSystem
 
+from hfutils.utils import parse_hf_fs_path
+
 RepoTypeTyping = Literal['dataset', 'model', 'space']
 REPO_TYPES = ['dataset', 'model', 'space']
 
@@ -113,12 +115,15 @@ def list_files_in_repository(repo_id: str, repo_type: RepoTypeTyping = 'dataset'
 
     try:
         _exist_files = [
-            os.path.relpath(file, repo_root_path)
+            parse_hf_fs_path(file).filename
             for file in hf_fs.glob(f'{repo_root_path}/**', revision=revision)
         ]
     except FileNotFoundError:
         return []
-    _exist_ps = sorted([(file, file.split(os.sep)) for file in _exist_files], key=lambda x: x[1])
+    if subdir and subdir != '.':
+        _exist_files = [os.path.relpath(file, subdir) for file in _exist_files]
+
+    _exist_ps = sorted([(file, file.split('/')) for file in _exist_files], key=lambda x: x[1])
     retval = []
     for i, (file, segments) in enumerate(_exist_ps):
         if i < len(_exist_ps) - 1 and segments == _exist_ps[i + 1][1][:len(segments)]:
diff --git a/hfutils/utils/__init__.py b/hfutils/utils/__init__.py
index 275591104c..7fd98e3d25 100644
--- a/hfutils/utils/__init__.py
+++ b/hfutils/utils/__init__.py
@@ -1,6 +1,6 @@
 from .binary import is_binary_file
 from .download import download_file
-from .path import hf_normpath
+from .path import hf_normpath, hf_fs_path, parse_hf_fs_path, HfFileSystemPath
 from .temp import TemporaryDirectory
 from .tqdm_ import tqdm
 from .walk import walk_files
diff --git a/hfutils/utils/irregular_repo.json b/hfutils/utils/irregular_repo.json
new file mode 100644
index 0000000000..7378355758
--- /dev/null
+++ b/hfutils/utils/irregular_repo.json
@@ -0,0 +1,662 @@
+{
+    "datasets": [
+        "acronym_identification",
+        "ade_corpus_v2",
+        "aeslc",
+        "afrikaans_ner_corpus",
+        "ag_news",
+        "air_dialogue",
+        "ajgt_twitter_ar",
+        "allegro_reviews",
+        "allocine",
+        "alt",
+        "amazon_polarity",
+        "amazon_reviews_multi",
+        "amazon_us_reviews",
+        "ambig_qa",
+        "ami",
+        "amttl",
+        "app_reviews",
+        "aqua_rat",
+        "aquamuse",
+        "ar_res_reviews",
+        "ar_sarcasm",
+        "arabic_billion_words",
+        "arabic_pos_dialect",
+        "arabic_speech_corpus",
+        "arcd",
+        "arsentd_lev",
+        "art",
+        "arxiv_dataset",
+        "ascent_kb",
+        "aslg_pc12",
+        "asnq",
+        "assin",
+        "assin2",
+        "atomic",
+        "autshumato",
+        "banking77",
+        "bbaw_egyptian",
+        "bbc_hindi_nli",
+        "bc2gm_corpus",
+        "best2009",
+        "bible_para",
+        "big_patent",
+        "billsum",
+        "bing_coronavirus_query_set",
+        "biomrc",
+        "biosses",
+        "blended_skill_talk",
+        "blog_authorship_corpus",
+        "bn_hate_speech",
+        "bnl_newspapers",
+        "bookcorpus",
+        "bookcorpusopen",
+        "bprec",
+        "break_data",
+        "brwac",
+        "bsd_ja_en",
+        "bswac",
+        "c3",
+        "c4",
+        "cail2018",
+        "caner",
+        "casino",
+        "catalonia_independence",
+        "cats_vs_dogs",
+        "cawac",
+        "cbt",
+        "cc100",
+        "ccaligned_multilingual",
+        "cdsc",
+        "cdt",
+        "cedr",
+        "cfq",
+        "chr_en",
+        "cifar10",
+        "cifar100",
+        "circa",
+        "clickbait_news_bg",
+        "climate_fever",
+        "clinc_oos",
+        "clue",
+        "cmrc2018",
+        "cmu_hinglish_dog",
+        "cnn_dailymail",
+        "coached_conv_pref",
+        "coarse_discourse",
+        "codah",
+        "code_search_net",
+        "code_x_glue_cc_clone_detection_big_clone_bench",
+        "code_x_glue_cc_clone_detection_poj104",
+        "code_x_glue_cc_cloze_testing_all",
+        "code_x_glue_cc_cloze_testing_maxmin",
+        "code_x_glue_cc_code_completion_line",
+        "code_x_glue_cc_code_completion_token",
+        "code_x_glue_cc_code_refinement",
+        "code_x_glue_cc_code_to_code_trans",
+        "code_x_glue_cc_defect_detection",
+        "code_x_glue_ct_code_to_text",
+        "code_x_glue_tc_nl_code_search_adv",
+        "code_x_glue_tc_text_to_code",
+        "code_x_glue_tt_text_to_text",
+        "com_qa",
+        "common_language",
+        "common_voice",
+        "compguesswhat",
+        "conceptnet5",
+        "conll2000",
+        "conll2002",
+        "conll2003",
+        "conllpp",
+        "consumer-finance-complaints",
+        "conv_ai",
+        "conv_ai_2",
+        "conv_ai_3",
+        "conv_questions",
+        "cornell_movie_dialog",
+        "cos_e",
+        "cosmos_qa",
+        "counter",
+        "covid_qa_castorini",
+        "covid_qa_deepset",
+        "covid_qa_ucsd",
+        "covid_tweets_japanese",
+        "covost2",
+        "cppe-5",
+        "craigslist_bargains",
+        "crawl_domain",
+        "crd3",
+        "crime_and_punish",
+        "crows_pairs",
+        "cryptonite",
+        "cs_restaurants",
+        "cuad",
+        "curiosity_dialogs",
+        "daily_dialog",
+        "dane",
+        "danish_political_comments",
+        "dart",
+        "datacommons_factcheck",
+        "dbrd",
+        "deal_or_no_dialog",
+        "definite_pronoun_resolution",
+        "dengue_filipino",
+        "dialog_re",
+        "diplomacy_detection",
+        "disaster_response_messages",
+        "discofuse",
+        "discovery",
+        "disfl_qa",
+        "doc2dial",
+        "docred",
+        "doqa",
+        "dream",
+        "dutch_social",
+        "dyk",
+        "e2e_nlg",
+        "e2e_nlg_cleaned",
+        "ecb",
+        "ecthr_cases",
+        "eduge",
+        "ehealth_kd",
+        "electricity_load_diagrams",
+        "eli5",
+        "eli5_category",
+        "emea",
+        "emo",
+        "emotone_ar",
+        "empathetic_dialogues",
+        "enriched_web_nlg",
+        "eraser_multi_rc",
+        "esnli",
+        "eth_py150_open",
+        "ethos",
+        "eu_regulatory_ir",
+        "eurlex",
+        "euronews",
+        "europa_eac_tm",
+        "europa_ecdc_tm",
+        "event2Mind",
+        "evidence_infer_treatment",
+        "factckbr",
+        "fake_news_english",
+        "fake_news_filipino",
+        "farsi_news",
+        "fashion_mnist",
+        "fever",
+        "few_rel",
+        "financial_phrasebank",
+        "finer",
+        "flores",
+        "flue",
+        "food101",
+        "fquad",
+        "freebase_qa",
+        "gap",
+        "gem",
+        "generated_reviews_enth",
+        "generics_kb",
+        "german_legal_entity_recognition",
+        "germaner",
+        "germeval_14",
+        "giga_fren",
+        "gigaword",
+        "glucose",
+        "gnad10",
+        "go_emotions",
+        "gooaq",
+        "google_wellformed_query",
+        "grail_qa",
+        "great_code",
+        "guardian_authorship",
+        "gutenberg_time",
+        "hans",
+        "hansards",
+        "hard",
+        "harem",
+        "has_part",
+        "hate_offensive",
+        "hate_speech18",
+        "hate_speech_filipino",
+        "hate_speech_pl",
+        "hate_speech_portuguese",
+        "hatexplain",
+        "hausa_voa_ner",
+        "hausa_voa_topics",
+        "hda_nli_hindi",
+        "head_qa",
+        "health_fact",
+        "hebrew_projectbenyehuda",
+        "hebrew_sentiment",
+        "hebrew_this_world",
+        "hind_encorp",
+        "hindi_discourse",
+        "hippocorpus",
+        "hkcancor",
+        "hlgd",
+        "hope_edi",
+        "hotpot_qa",
+        "hover",
+        "hrenwac_para",
+        "hrwac",
+        "humicroedit",
+        "hybrid_qa",
+        "hyperpartisan_news_detection",
+        "iapp_wiki_qa_squad",
+        "id_clickbait",
+        "id_liputan6",
+        "id_nergrit_corpus",
+        "id_newspapers_2018",
+        "id_panl_bppt",
+        "id_puisi",
+        "igbo_english_machine_translation",
+        "igbo_monolingual",
+        "igbo_ner",
+        "ilist",
+        "imdb_urdu_reviews",
+        "imppres",
+        "indic_glue",
+        "indonli",
+        "inquisitive_qg",
+        "interpress_news_category_tr",
+        "interpress_news_category_tr_lite",
+        "irc_disentangle",
+        "isixhosa_ner_corpus",
+        "isizulu_ner_corpus",
+        "iwslt2017",
+        "jeopardy",
+        "jnlpba",
+        "journalists_questions",
+        "kan_hope",
+        "kannada_news",
+        "kd_conv",
+        "kde4",
+        "kelm",
+        "kilt_tasks",
+        "kilt_wikipedia",
+        "kinnews_kirnews",
+        "klue",
+        "kor_3i4k",
+        "kor_hate",
+        "kor_ner",
+        "kor_nli",
+        "kor_nlu",
+        "kor_qpair",
+        "kor_sae",
+        "kor_sarcasm",
+        "labr",
+        "lama",
+        "large_spanish_corpus",
+        "laroseda",
+        "lc_quad",
+        "lener_br",
+        "liar",
+        "librispeech_asr",
+        "librispeech_lm",
+        "limit",
+        "lince",
+        "linnaeus",
+        "liveqa",
+        "lj_speech",
+        "lm1b",
+        "lst20",
+        "m_lama",
+        "mac_morpho",
+        "makhzan",
+        "masakhaner",
+        "math_dataset",
+        "math_qa",
+        "matinf",
+        "mbpp",
+        "mc4",
+        "mc_taco",
+        "md_gender_bias",
+        "mdd",
+        "med_hop",
+        "medal",
+        "medical_dialog",
+        "medical_questions_pairs",
+        "menyo20k_mt",
+        "meta_woz",
+        "metooma",
+        "metrec",
+        "miam",
+        "mkb",
+        "mkqa",
+        "mlqa",
+        "mlsum",
+        "mnist",
+        "mocha",
+        "moroco",
+        "movie_rationales",
+        "mrqa",
+        "ms_marco",
+        "ms_terms",
+        "msr_genomics_kbcomp",
+        "msr_sqa",
+        "msr_text_compression",
+        "msr_zhen_translation_parity",
+        "msra_ner",
+        "mt_eng_vietnamese",
+        "muchocine",
+        "multi_booked",
+        "multi_news",
+        "multi_nli_mismatch",
+        "multi_para_crawl",
+        "multi_re_qa",
+        "multi_woz_v22",
+        "multi_x_science_sum",
+        "multidoc2dial",
+        "multilingual_librispeech",
+        "mutual_friends",
+        "mwsc",
+        "myanmar_news",
+        "narrativeqa",
+        "narrativeqa_manual",
+        "natural_questions",
+        "ncbi_disease",
+        "nchlt",
+        "ncslgr",
+        "nell",
+        "neural_code_search",
+        "newsgroup",
+        "newsph",
+        "newsph_nli",
+        "newspop",
+        "newsqa",
+        "newsroom",
+        "nkjp-ner",
+        "nli_tr",
+        "nlu_evaluation_data",
+        "norec",
+        "norne",
+        "norwegian_ner",
+        "nq_open",
+        "nsmc",
+        "numer_sense",
+        "numeric_fused_head",
+        "oclar",
+        "offcombr",
+        "offenseval2020_tr",
+        "offenseval_dravidian",
+        "ofis_publik",
+        "ohsumed",
+        "ollie",
+        "omp",
+        "onestop_english",
+        "onestop_qa",
+        "open_subtitles",
+        "openai_humaneval",
+        "openslr",
+        "opinosis",
+        "orange_sum",
+        "oscar",
+        "para_crawl",
+        "para_pat",
+        "parsinlu_reading_comprehension",
+        "pass",
+        "paws-x",
+        "paws",
+        "pec",
+        "peoples_daily_ner",
+        "per_sent",
+        "persian_ner",
+        "pg19",
+        "php",
+        "pib",
+        "piqa",
+        "pn_summary",
+        "poem_sentiment",
+        "polemo2",
+        "poleval2019_cyberbullying",
+        "poleval2019_mt",
+        "polsum",
+        "polyglot_ner",
+        "prachathai67k",
+        "pragmeval",
+        "proto_qa",
+        "psc",
+        "ptb_text_only",
+        "pubmed",
+        "py_ast",
+        "qa4mre",
+        "qa_srl",
+        "qa_zre",
+        "qangaroo",
+        "qanta",
+        "qed",
+        "qed_amara",
+        "quac",
+        "quail",
+        "quarel",
+        "quora",
+        "quoref",
+        "re_dial",
+        "reasoning_bg",
+        "recipe_nlg",
+        "reclor",
+        "red_caps",
+        "reddit_tifu",
+        "refresd",
+        "reuters21578",
+        "riddle_sense",
+        "ro_sent",
+        "ro_sts",
+        "ro_sts_parallel",
+        "roman_urdu",
+        "ronec",
+        "rotten_tomatoes",
+        "samsum",
+        "sanskrit_classic",
+        "saudinewsnet",
+        "sberquad",
+        "scan",
+        "scb_mt_enth_2020",
+        "scene_parse_150",
+        "schema_guided_dstc8",
+        "scielo",
+        "scientific_papers",
+        "search_qa",
+        "sede",
+        "selqa",
+        "sem_eval_2010_task_8",
+        "sem_eval_2014_task_1",
+        "sem_eval_2018_task_1",
+        "sem_eval_2020_task_11",
+        "sent_comp",
+        "senti_lex",
+        "senti_ws",
+        "sentiment140",
+        "sepedi_ner",
+        "sesotho_ner_corpus",
+        "setimes",
+        "setswana_ner_corpus",
+        "sharc_modified",
+        "sick",
+        "silicone",
+        "simple_questions_v2",
+        "siswati_ner_corpus",
+        "smartdata",
+        "sms_spam",
+        "snips_built_in_intents",
+        "snow_simplified_japanese_corpus",
+        "so_stacksample",
+        "social_bias_frames",
+        "social_i_qa",
+        "sofc_materials_articles",
+        "sogou_news",
+        "spanish_billion_words",
+        "spc",
+        "species_800",
+        "speech_commands",
+        "squad_adversarial",
+        "squad_es",
+        "squad_it",
+        "squad_kor_v1",
+        "squad_kor_v2",
+        "squad_v1_pt",
+        "squadshifts",
+        "srwac",
+        "sst",
+        "story_cloze",
+        "stsb_mt_sv",
+        "style_change_detection",
+        "subjqa",
+        "super_glue",
+        "superb",
+        "svhn",
+        "swag",
+        "swahili",
+        "swahili_news",
+        "swda",
+        "swedish_medical_ner",
+        "swedish_ner_corpus",
+        "swedish_reviews",
+        "tab_fact",
+        "tamilmixsentiment",
+        "tanzil",
+        "tapaco",
+        "tashkeela",
+        "taskmaster1",
+        "taskmaster2",
+        "taskmaster3",
+        "tatoeba",
+        "ted_hrlr",
+        "ted_iwlst2013",
+        "ted_multi",
+        "ted_talks_iwslt",
+        "telugu_books",
+        "telugu_news",
+        "tep_en_fa_para",
+        "text2log",
+        "thai_toxicity_tweet",
+        "thainer",
+        "thaiqa_squad",
+        "thaisum",
+        "the_pile_books3",
+        "the_pile_openwebtext2",
+        "the_pile_stack_exchange",
+        "tilde_model",
+        "time_dial",
+        "times_of_india_news_headlines",
+        "timit_asr",
+        "tlc",
+        "tmu_gfm_dataset",
+        "told-br",
+        "totto",
+        "trec",
+        "tsac",
+        "ttc4900",
+        "tunizi",
+        "tuple_ie",
+        "turk",
+        "turkic_xwmt",
+        "turkish_movie_sentiment",
+        "turkish_ner",
+        "turkish_product_reviews",
+        "turkish_shrinked_ner",
+        "turku_ner_corpus",
+        "tweet_eval",
+        "tweet_qa",
+        "tweets_ar_en_parallel",
+        "tweets_hate_speech_detection",
+        "twi_text_c3",
+        "twi_wordsim353",
+        "tydiqa",
+        "ubuntu_dialogs_corpus",
+        "udhr",
+        "um005",
+        "universal_dependencies",
+        "universal_morphologies",
+        "urdu_fake_news",
+        "urdu_sentiment_corpus",
+        "vctk",
+        "vivos",
+        "web_nlg",
+        "web_of_science",
+        "web_questions",
+        "weibo_ner",
+        "wi_locness",
+        "wider_face",
+        "wiki40b",
+        "wiki_asp",
+        "wiki_atomic_edits",
+        "wiki_auto",
+        "wiki_bio",
+        "wiki_dpr",
+        "wiki_hop",
+        "wiki_lingua",
+        "wiki_movies",
+        "wiki_qa",
+        "wiki_qa_ar",
+        "wiki_snippets",
+        "wiki_source",
+        "wiki_split",
+        "wiki_summary",
+        "wikiann",
+        "wikicorpus",
+        "wikihow",
+        "wikipedia",
+        "wikisql",
+        "wikitext",
+        "wikitext_tl39",
+        "wili_2018",
+        "wino_bias",
+        "winograd_wsc",
+        "winogrande",
+        "wiqa",
+        "wisesight1000",
+        "wisesight_sentiment",
+        "wnut_17",
+        "wongnai_reviews",
+        "woz_dialogue",
+        "wrbsc",
+        "x_stance",
+        "xcopa",
+        "xcsr",
+        "xed_en_fi",
+        "xglue",
+        "xnli",
+        "xor_tydi_qa",
+        "xquad",
+        "xquad_r",
+        "xsum_factuality",
+        "xtreme",
+        "yahoo_answers_qa",
+        "yahoo_answers_topics",
+        "yelp_polarity",
+        "yelp_review_full",
+        "yoruba_bbc_topics",
+        "yoruba_gv_ner",
+        "yoruba_text_c3",
+        "yoruba_wordsim353",
+        "youtube_caption_corrections",
+        "zest",
+        "elkarhizketak",
+        "wikitablequestions",
+        "conll2012_ontonotesv5",
+        "monash_tsf",
+        "roman_urdu_hate_speech",
+        "adv_glue",
+        "metashift",
+        "gsm8k",
+        "sbu_captions",
+        "conceptual_captions",
+        "conceptual_12m",
+        "visual_genome",
+        "imagenet-1k",
+        "tne",
+        "textvqa",
+        "ett",
+        "imagenet_sketch",
+        "biwi_kinect_head_pose",
+        "enwik8",
+        "truthful_qa",
+        "bigbench",
+        "quickdraw",
+        "lccc"
+    ],
+    "models": [],
+    "spaces": []
+}
\ No newline at end of file
diff --git a/hfutils/utils/path.py b/hfutils/utils/path.py
index b15adc19cf..8d4ab909f8 100644
--- a/hfutils/utils/path.py
+++ b/hfutils/utils/path.py
@@ -1,5 +1,144 @@
+import json
 import os
+import re
+from dataclasses import dataclass
+from functools import lru_cache
+from typing import Optional, Dict, Set, Literal
 
+RepoTypeTyping = Literal['dataset', 'model', 'space']
 
-def hf_normpath(path):
-    return os.path.normpath(path).replace('\\', '/')
+
+def hf_normpath(path) -> str:
+    """
+    Normalize a given path.
+
+    :param path: The path to normalize.
+    :type path: Any
+
+    :return: The normalized path.
+    :rtype: str
+    """
+    return re.sub(
+        r'[\\/]+', '/',
+        os.path.relpath(os.path.normpath(os.path.join(os.sep, path)), os.sep)
+    )
+
+
+def hf_fs_path(repo_id: str, filename: str,
+               repo_type: RepoTypeTyping = 'dataset', revision: Optional[str] = None):
+    """
+    Get the huggingface filesystem path.
+
+    :param repo_id: The repository ID.
+    :type repo_id: str
+
+    :param filename: The filename.
+    :type filename: str
+
+    :param repo_type: The type of repository. (default: 'dataset')
+    :type repo_type: RepoTypeTyping
+
+    :param revision: The revision of the repository. (default: None)
+    :type revision: Optional[str]
+
+    :return: The huggingface filesystem path.
+    :rtype: str
+    """
+    filename = hf_normpath(filename)
+    if repo_type == 'dataset':
+        prefix = 'datasets/'
+    elif repo_type == 'space':
+        prefix = 'spaces/'
+    else:
+        prefix = ''
+
+    if revision is not None:
+        revision_text = f'@{revision}'
+    else:
+        revision_text = ''
+
+    return f'{prefix}{repo_id}{revision_text}/{filename}'
+
+
+@lru_cache()
+def _irregular_repos() -> Dict[RepoTypeTyping, Set[str]]:
+    """
+    Get irregular repositories.
+
+    :return: A dictionary containing irregular repositories.
+    :rtype: Dict[RepoTypeTyping, Set[str]]
+    """
+    with open(os.path.join(os.path.dirname(__file__), 'irregular_repo.json'), 'r') as f:
+        data = json.load(f)
+        return {
+            'model': set(data['models']),
+            'dataset': set(data['datasets']),
+            'space': set(data['spaces']),
+        }
+
+
+_RE_IR_PATH = re.compile(
+    r'^(?P<repo_id>[^@/]+)(@(?P<revision>[^@/]+))?(/(?P<filename>[\s\S]+))?$')
+_RE_PATH = re.compile(
+    r'^(?P<repo_id>[^@/]+/[^@/]+)(@(?P<revision>[^@/]+))?(/(?P<filename>[\s\S]+))?$')
+
+
+@dataclass
+class HfFileSystemPath:
+    """
+    Huggingface FileSystem Path.
+
+    :param repo_id: The repository ID.
+    :type repo_id: str
+
+    :param filename: The filename.
+    :type filename: str
+
+    :param repo_type: The type of repository.
+    :type repo_type: RepoTypeTyping
+
+    :param revision: The revision of the repository.
+    :type revision: Optional[str]
+    """
+    repo_id: str
+    filename: str
+    repo_type: RepoTypeTyping
+    revision: Optional[str]
+
+
+def parse_hf_fs_path(path: str) -> HfFileSystemPath:
+    """
+    Parse the huggingface filesystem path.
+
+    :param path: The path to parse.
+    :type path: str
+
+    :return: The parsed huggingface filesystem path.
+    :rtype: HfFileSystemPath
+    :raises ValueError: If this path is invalid.
+    """
+    origin_path = path
+    repo_type: RepoTypeTyping
+    if path.startswith('datasets/'):
+        repo_type = 'dataset'
+        path = path[len('datasets/'):]
+    elif path.startswith('spaces/'):
+        repo_type = 'space'
+        path = path[len('spaces/'):]
+    else:
+        repo_type = 'model'
+
+    matching = _RE_IR_PATH.fullmatch(path)
+    if matching:
+        if matching.group('repo_id') not in _irregular_repos()[repo_type]:
+            matching = None
+    if not matching:
+        matching = _RE_PATH.fullmatch(path)
+
+    if matching:
+        repo_id = matching.group('repo_id')
+        revision = matching.group('revision') or None
+        filename = hf_normpath(matching.group('filename') or '.')
+        return HfFileSystemPath(repo_id, filename, repo_type, revision)
+    else:
+        raise ValueError(f'Invalid huggingface filesystem path - {origin_path!r}.')
diff --git a/requirements.txt b/requirements.txt
index 1024267b78..6ca723a5ba 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ tqdm
 requests
 click>=7
 tzlocal
+natsort
\ No newline at end of file
diff --git a/test/operate/test_base.py b/test/operate/test_base.py
index cd9d7ddf81..2b4388df70 100644
--- a/test/operate/test_base.py
+++ b/test/operate/test_base.py
@@ -21,6 +21,16 @@ def test_list_files_in_repository(self):
         assert (set(should_exists) & set(files)) == set(should_exists)
         assert not (set(should_not_exists) & set(files))
 
+    def test_list_files_in_repository_revision(self):
+        files = list_files_in_repository(
+            repo_id='narugo/test_ds_repo',
+            repo_type='dataset',
+            revision='another_branch',
+        )
+        should_exists = ['cloc.sh', 'raw_text', 'surtr_dataset.zip', 'surtr_dataset.zip_x']
+        assert (set(should_exists) & set(files)) == set(should_exists)
+        assert not (set(should_not_exists) & set(files))
+
     def test_list_files_in_repository_no_ignore(self):
         files = list_files_in_repository('deepghs/highres_datasets', ignore_patterns=[])
         should_exists = [
diff --git a/test/utils/test_path.py b/test/utils/test_path.py
new file mode 100644
index 0000000000..6a4a8ba995
--- /dev/null
+++ b/test/utils/test_path.py
@@ -0,0 +1,86 @@
+import os.path
+
+import pytest
+
+from hfutils.utils import hf_normpath, hf_fs_path, parse_hf_fs_path, HfFileSystemPath
+
+
+@pytest.mark.unittest
+class TestUtilsPath:
+    def test_hf_normpath(self):
+        assert hf_normpath('./1/2/3') == '1/2/3'
+        assert hf_normpath('1/../2/3') == '2/3'
+        assert hf_normpath('1///3') == '1/3'
+        assert hf_normpath('1\\2/3') == '1/2/3'
+        assert hf_normpath(os.path.join('1', '..', '2', '3', '4')) == '2/3/4'
+
+    def test_hf_fs_path(self):
+        assert hf_fs_path(
+            repo_id='narugo/test_ds_repo',
+            filename='1/2\\3'
+        ) == 'datasets/narugo/test_ds_repo/1/2/3'
+        assert hf_fs_path(
+            repo_id='narugo/test_ds_repo',
+            filename='1/2\\3',
+            revision='main',
+        ) == 'datasets/narugo/test_ds_repo@main/1/2/3'
+        assert hf_fs_path(
+            repo_id='narugo/test_ds_repo',
+            repo_type='model',
+            filename='1/2\\3',
+            revision='r3',
+        ) == 'narugo/test_ds_repo@r3/1/2/3'
+        assert hf_fs_path(
+            repo_id='narugo/test_ds_repo',
+            repo_type='space',
+            filename='1/2\\3',
+            revision='r3',
+        ) == 'spaces/narugo/test_ds_repo@r3/1/2/3'
+
+    def test_parse_hf_fs_path(self):
+        assert parse_hf_fs_path('datasets/narugo/test_ds_repo/1/2/3') == HfFileSystemPath(
+            repo_id='narugo/test_ds_repo',
+            filename='1/2/3',
+            revision=None,
+            repo_type='dataset',
+        )
+        assert parse_hf_fs_path('datasets/narugo/test_ds_repo@main/1/2/3') == HfFileSystemPath(
+            repo_id='narugo/test_ds_repo',
+            filename='1/2/3',
+            revision='main',
+            repo_type='dataset',
+        )
+        assert parse_hf_fs_path('narugo/test_ds_repo@r3/1/2/3') == HfFileSystemPath(
+            repo_id='narugo/test_ds_repo',
+            repo_type='model',
+            filename='1/2/3',
+            revision='r3',
+        )
+        assert parse_hf_fs_path('spaces/narugo/test_ds_repo@r3/1/2/3') == HfFileSystemPath(
+            repo_id='narugo/test_ds_repo',
+            repo_type='space',
+            filename='1/2/3',
+            revision='r3',
+        )
+        assert parse_hf_fs_path('datasets/imagenet-1k/classes.py') == HfFileSystemPath(
+            repo_id='imagenet-1k',
+            repo_type='dataset',
+            filename='classes.py',
+            revision=None,
+        )
+        assert parse_hf_fs_path('datasets/imagenet-1k@main/classes.py') == HfFileSystemPath(
+            repo_id='imagenet-1k',
+            repo_type='dataset',
+            filename='classes.py',
+            revision='main',
+        )
+        assert parse_hf_fs_path('datasets/narugo/test_ds_repo') == HfFileSystemPath(
+            repo_id='narugo/test_ds_repo',
+            filename='.',
+            revision=None,
+            repo_type='dataset',
+        )
+
+    def test_parse_hf_fs_path_invalid(self):
+        with pytest.raises(ValueError):
+            _ = parse_hf_fs_path('datasets/narugo/test_ds_repo@@main/classes.py')
diff --git a/tools/__init__.py b/tools/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tools/irregular_repo.py b/tools/irregular_repo.py
new file mode 100644
index 0000000000..b94aa4e590
--- /dev/null
+++ b/tools/irregular_repo.py
@@ -0,0 +1,49 @@
+import json
+import logging
+import os.path
+
+from hbutils.string import plural_word
+from natsort import natsorted
+
+from hfutils.operate import get_hf_client
+from hfutils.utils import tqdm
+
+
+def main():
+    hf_client = get_hf_client()
+
+    logging.info('Scanning datasets')
+    ir_datasets = []
+    for item in tqdm(hf_client.list_datasets(), desc='Hf Datasets'):
+        if item.id.count('/') != 1:
+            ir_datasets.append(item.id)
+    ir_datasets = natsorted(set(ir_datasets))
+    logging.info(f'{plural_word(len(ir_datasets), "irregular dataset")} found.')
+
+    ir_models = []
+    for item in tqdm(hf_client.list_models(), desc='Hf Models'):
+        if item.id.count('/') != 1:
+            ir_models.append(item.id)
+    ir_models = natsorted(set(ir_models))
+    logging.info(f'{plural_word(len(ir_models), "irregular model")} found.')
+
+    ir_spaces = []
+    for item in tqdm(hf_client.list_spaces(), desc='Hf Spaces'):
+        if item.id.count('/') != 1:
+            ir_spaces.append(item.id)
+    ir_spaces = natsorted(set(ir_spaces))
+    logging.info(f'{plural_word(len(ir_spaces), "irregular space")} found.')
+
+    target = os.path.join('hfutils', 'utils', 'irregular_repo.json')
+    logging.info(f'Saving to {target!r} ...')
+    with open(target, 'w') as f:
+        json.dump({
+            'datasets': ir_datasets,
+            'models': ir_models,
+            'spaces': ir_spaces,
+        }, f, sort_keys=True, ensure_ascii=False, indent=4)
+
+
+if __name__ == '__main__':
+    logging.basicConfig(level=logging.INFO)
+    main()