1 |
skt/kobest_v1 |
skt |
30333 |
34 |
none |
ko |
monolingual |
polars |
json |
2204.04541 |
original |
cc-by-sa-4.0 |
10K<n<100K |
none |
2 |
maywell/korean_textbooks |
maywell |
2102 |
97 |
none |
ko |
none |
polars |
parquet |
2306.11644 |
none |
apache-2.0 |
1M<n<10M |
none |
3 |
beomi/KoAlpaca-v1.1a |
beomi |
1273 |
35 |
none |
ko |
none |
polars |
parquet |
none |
none |
none |
10K<n<100K |
text-generation |
4 |
sean0042/KorMedMCQA |
sean0042 |
1262 |
21 |
none |
ko |
none |
polars |
parquet |
2403.01469 |
none |
cc-by-nc-2.0 |
1K<n<10K |
question-answering |
5 |
MarkrAI/KOpen-HQ-Hermes-2.5-60K |
MarkrAI |
1144 |
53 |
none |
ko |
none |
polars |
parquet |
none |
none |
mit |
10K<n<100K |
text-generation |
6 |
KorQuAD/squad_kor_v1 |
KorQuAD |
943 |
18 |
extractive-qa |
ko |
monolingual |
polars |
parquet |
1909.07005 |
original |
cc-by-nd-4.0 |
10K<n<100K |
question-answering |
7 |
MarkrAI/KoCommercial-Dataset |
MarkrAI |
615 |
125 |
none |
ko |
none |
polars |
parquet |
2107.06499 |
none |
mit |
100K<n<1M |
none |
8 |
CarrotAI/ko-instruction-dataset |
CarrotAI |
484 |
22 |
none |
ko |
none |
polars |
json |
2304.12244 |
none |
apache-2.0 |
1K<n<10K |
text-generation |
9 |
taeminlee/Ko-StrategyQA |
taeminlee |
431 |
12 |
document-retrieval |
ko |
monolingual |
polars |
json |
none |
Ko-StrategyQA |
none |
10K<n<100K |
text-retrieval |
10 |
maywell/ko_wikidata_QA |
maywell |
410 |
36 |
none |
none |
none |
polars |
csv |
none |
none |
none |
100K<n<1M |
none |
11 |
LDCC/korag |
LDCC |
333 |
6 |
none |
ko |
none |
polars |
parquet |
none |
none |
none |
10K<n<100K |
text-generation |
12 |
KETI-AIR/korquad |
KETI-AIR |
310 |
1 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
13 |
smilegate-ai/kor_unsmile |
smilegate-ai |
293 |
3 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
10K<n<100K |
none |
14 |
sionic/ko-dpo-mix-7k-trl-style |
sionic |
220 |
5 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1K<n<10K |
none |
15 |
kakaobrain/kor_nli |
kakaobrain |
214 |
16 |
multi-input-text-classification |
ko |
monolingual |
polars |
parquet |
none |
extended |
xnli |
cc-by-sa-4.0 |
100K<n<1M |
16 |
nlpai-lab/ko-triplet-v1.0 |
nlpai-lab |
168 |
7 |
none |
ko |
none |
polars |
parquet |
none |
none |
none |
100K<n<1M |
none |
17 |
ChuGyouk/AI-MO-NuminaMath-CoT-Ko |
ChuGyouk |
150 |
2 |
none |
ko |
none |
polars |
parquet |
none |
none |
cc-by-nc-4.0 |
100K<n<1M |
text-generation |
18 |
allganize/RAG-Evaluation-Dataset-KO |
allganize |
149 |
69 |
none |
ko |
none |
polars |
csv |
none |
none |
mit |
n<1K |
none |
19 |
maywell/koVast |
maywell |
138 |
20 |
none |
none |
none |
polars |
parquet |
none |
none |
other |
100K<n<1M |
none |
20 |
bebechien/korean_cake_boss |
bebechien |
124 |
1 |
none |
none |
none |
polars |
csv |
none |
none |
cc |
n<1K |
none |
21 |
squarelike/OpenOrca-gugugo-ko |
squarelike |
105 |
34 |
none |
ko |
none |
mlcroissant |
json |
2301.13688 |
none |
mit |
1M<n<10M |
text2text-generation |
22 |
msarmi9/korean-english-multitarget-ted-talks-task |
msarmi9 |
96 |
8 |
none |
ko |
multilingual |
polars |
json |
none |
none |
cc-by-nc-nd-4.0 |
100K<n<1M |
none |
23 |
kresnik/zeroth_korean |
kresnik |
93 |
6 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
24 |
jojo0217/korean_rlhf_dataset |
jojo0217 |
90 |
17 |
none |
ko |
none |
polars |
json |
none |
none |
apache-2.0 |
100K<n<1M |
text-generation |
25 |
dkoterwa/kor-sts |
dkoterwa |
90 |
2 |
none |
none |
none |
polars |
parquet |
none |
none |
cc-by-sa-4.0 |
1K<n<10K |
none |
26 |
open-llm-leaderboard-old/details_yanolja__EEVE-Korean-Instruct-2.8B-v1.0 |
open-llm-leaderboard-old |
79 |
0 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
27 |
FreedomIntelligence/alpaca-gpt4-korean |
FreedomIntelligence |
79 |
8 |
none |
none |
none |
polars |
json |
none |
none |
none |
10K<n<100K |
none |
28 |
100suping/korean_unlabeled_web_text |
100suping |
78 |
1 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1M<n<10M |
none |
29 |
unoooo/alpaca-korean |
unoooo |
77 |
0 |
none |
none |
none |
polars |
json |
none |
none |
none |
100K<n<1M |
none |
30 |
SpellOnYou/kor_sarcasm |
SpellOnYou |
76 |
5 |
none |
ko |
monolingual |
polars |
parquet |
none |
original |
mit |
1K<n<10K |
text-classification |
31 |
lcw99/wikipedia-korean-20240501 |
lcw99 |
71 |
14 |
none |
ko |
none |
polars |
parquet |
none |
none |
apache-2.0 |
100K<n<1M |
none |
32 |
HAERAE-HUB/KOREAN-SyntheticText-1.5B |
HAERAE-HUB |
70 |
10 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1M<n<10M |
none |
33 |
inmoonlight/kor_hate |
inmoonlight |
68 |
5 |
multi-label-classification |
ko |
monolingual |
none |
none |
2005.12503 |
original |
cc-by-sa-4.0 |
1K<n<10K |
text-classification |
34 |
jeanlee/kmhas_korean_hate_speech |
jeanlee |
67 |
18 |
hate-speech-detection |
ko |
monolingual |
mlcroissant |
none |
2208.10684 |
original |
cc-by-sa-4.0 |
100K<n<1M |
text-classification |
35 |
lcw99/wikipedia-korean-20221001 |
lcw99 |
67 |
5 |
none |
ko |
none |
polars |
parquet |
none |
none |
apache-2.0 |
100K<n<1M |
none |
36 |
coastral/korean-writing-style-instruct |
coastral |
66 |
2 |
none |
ko |
none |
polars |
parquet |
none |
none |
apache-2.0 |
10K<n<100K |
text-generation |
37 |
open-llm-leaderboard-old/details_quantumaikr__KoreanLM-hf |
open-llm-leaderboard-old |
66 |
0 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
38 |
open-llm-leaderboard-old/details_yanolja__EEVE-Korean-2.8B-v1.0 |
open-llm-leaderboard-old |
65 |
0 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
39 |
open-llm-leaderboard-old/details_yanolja__EEVE-Korean-Instruct-10.8B-v1.0 |
open-llm-leaderboard-old |
65 |
0 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
40 |
open-llm-leaderboard-old/details_quantumaikr__QuantumLM-llama2-70B-Korean-LoRA |
open-llm-leaderboard-old |
63 |
0 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
41 |
kyujinpy/OpenOrca-KO |
kyujinpy |
54 |
28 |
none |
ko |
none |
polars |
parquet |
2301.13688 |
none |
mit |
10K<n<100K |
text2text-generation |
42 |
Bingsu/zeroth-korean |
Bingsu |
53 |
17 |
none |
ko |
monolingual |
polars |
parquet |
none |
extended |
kresnik/zeroth_korean |
cc-by-4.0 |
10K<n<100K |
43 |
junelee/sharegpt_deepl_ko |
junelee |
52 |
60 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
44 |
werty1248/Korean-1930-Novel-Scene-Summarize |
werty1248 |
48 |
1 |
none |
ko |
none |
polars |
json |
none |
none |
mit |
10K<n<100K |
summarization |
45 |
KorQuAD/squad_kor_v2 |
KorQuAD |
44 |
12 |
extractive-qa |
ko |
monolingual |
none |
none |
none |
original |
cc-by-nd-4.0 |
10K<n<100K |
question-answering |
46 |
kakaobrain/kor_nlu |
kakaobrain |
38 |
5 |
text-scoring |
ko |
monolingual |
none |
none |
2004.03289 |
extended |
snli |
cc-by-sa-4.0 |
100K<n<1M |
47 |
kotzeje/lamini_docs.jsonl |
kotzeje |
38 |
9 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1K<n<10K |
none |
48 |
heegyu/open-korean-instructions |
heegyu |
34 |
21 |
none |
none |
none |
mlcroissant |
json |
none |
none |
mit |
100K<n<1M |
none |
49 |
amphora/korfin-asc |
amphora |
34 |
2 |
sentiment-classification |
ko |
monolingual |
polars |
parquet |
2301.03136 |
klue |
cc-by-sa-4.0 |
1K<n<10K |
text-classification |
50 |
Ammad1Ali/Korean-conversational-dataset |
Ammad1Ali |
34 |
2 |
none |
none |
none |
polars |
csv |
none |
none |
none |
10K<n<100K |
none |
51 |
HAERAE-HUB/qarv-instruct-ko |
HAERAE-HUB |
33 |
18 |
none |
ko |
none |
polars |
parquet |
none |
none |
apache-2.0 |
10K<n<100K |
text-generation |
52 |
sepidmnorozy/Korean_sentiment |
sepidmnorozy |
30 |
4 |
none |
none |
none |
polars |
csv |
none |
none |
none |
10K<n<100K |
none |
53 |
heegyu/korean-petitions |
heegyu |
30 |
5 |
none |
none |
none |
polars |
json |
none |
none |
mit |
100K<n<1M |
none |
54 |
mncai/orca_dpo_pairs_ko |
mncai |
29 |
4 |
none |
none |
none |
polars |
json |
none |
none |
apache-2.0 |
10K<n<100K |
none |
55 |
nayohan/finance-alpaca-ko |
nayohan |
29 |
1 |
none |
ko |
none |
polars |
parquet |
none |
none |
none |
10K<n<100K |
none |
56 |
NX2411/AIhub-korean-speech-data-large |
NX2411 |
27 |
1 |
none |
none |
none |
polars |
parquet |
none |
none |
apache-2.0 |
10K<n<100K |
none |
57 |
mncai/MedGPT-5k-ko |
mncai |
26 |
9 |
none |
ko |
none |
polars |
json |
none |
none |
gpl-3.0 |
1K<n<10K |
none |
58 |
datasciathlete/open-ner-english-aihub-korean |
datasciathlete |
26 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
100K<n<1M |
none |
59 |
joonhok-exo-ai/korean_law_open_data_precedents |
joonhok-exo-ai |
24 |
15 |
none |
ko |
none |
polars |
csv |
none |
none |
openrail |
10K<n<100K |
none |
60 |
KETI-AIR/kor_corpora |
KETI-AIR |
23 |
0 |
none |
none |
none |
mlcroissant |
none |
none |
none |
none |
1M<n<10M |
none |
61 |
Laplace04/KoreanSummarizeAiHub |
Laplace04 |
20 |
4 |
none |
none |
none |
polars |
json |
none |
none |
other |
10K<n<100K |
none |
62 |
gayom/KoreaSpellingCorrection |
gayom |
18 |
3 |
none |
none |
none |
polars |
csv |
none |
none |
none |
10K<n<100K |
none |
63 |
CertifiedJoon/Korean-Instruction |
CertifiedJoon |
16 |
5 |
none |
ko |
none |
polars |
parquet |
none |
none |
cdla-permissive-2.0 |
1K<n<10K |
question-answering |
64 |
kyujinpy/KOpen-platypus |
kyujinpy |
15 |
32 |
none |
ko |
none |
polars |
parquet |
2308.07317 |
none |
cc-by-4.0 |
10K<n<100K |
none |
65 |
psyche/korean_idioms |
psyche |
14 |
4 |
none |
ko |
monolingual |
polars |
parquet |
none |
original |
none |
10K<n<100K |
text-classification |
66 |
speech31/zeroth_korean_ipa |
speech31 |
12 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
10K<n<100K |
none |
67 |
wicho/kor_sae |
wicho |
12 |
5 |
intent-classification |
ko |
monolingual |
none |
none |
1811.04231 |
original |
cc-by-sa-4.0 |
10K<n<100K |
text-classification |
68 |
songys/kor_qpair |
songys |
12 |
4 |
semantic-similarity-classification |
ko |
monolingual |
none |
none |
none |
original |
mit |
1K<n<10K |
text-classification |
69 |
nlp-kmu/kor_ner |
nlp-kmu |
12 |
6 |
named-entity-recognition |
ko |
monolingual |
none |
none |
none |
original |
mit |
1K<n<10K |
token-classification |
70 |
heegyu/korquad-chat-v1 |
heegyu |
12 |
14 |
none |
none |
none |
polars |
json |
none |
none |
mit |
1K<n<10K |
none |
71 |
LGAI-EXAONE/KoMT-Bench |
LGAI-EXAONE |
12 |
28 |
none |
ko |
none |
polars |
parquet |
2408.03541 |
none |
lgpl-3.0 |
n<1K |
question-answering |
72 |
devngho/ko_llm_annotations |
devngho |
11 |
1 |
none |
ko |
none |
polars |
parquet |
none |
blueapple8259/c4-ko-cleaned-2 |
mit |
1M<n<10M |
text-classification |
73 |
wicho/kor_3i4k |
wicho |
10 |
3 |
intent-classification |
ko |
monolingual |
polars |
parquet |
1811.04231 |
original |
cc-by-4.0 |
10K<n<100K |
text-classification |
74 |
Bingsu/laion2b_multi_korean_subset_with_image |
Bingsu |
10 |
4 |
none |
ko |
monolingual |
mlcroissant |
none |
none |
extended |
laion/laion2B-multi |
cc-by-4.0 |
100K<n<1M |
75 |
korean-corpus/namu_wiki_512_char_seg |
korean-corpus |
10 |
2 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1M<n<10M |
none |
76 |
ChuGyouk/argilla-distilabel-math-preference-dpo-korean |
ChuGyouk |
10 |
3 |
none |
ko |
none |
polars |
json |
none |
none |
apache-2.0 |
1K<n<10K |
none |
77 |
Nexdata/English-Korean_Parallel_Corpus_Data |
Nexdata |
10 |
0 |
none |
ko |
none |
mlcroissant |
imagefolder |
none |
none |
none |
n<1K |
translation |
78 |
open-llm-leaderboard-old/details_quantumaikr__llama-2-70b-fb16-korean |
open-llm-leaderboard-old |
10 |
0 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
79 |
AI-it/korean-hate-speech |
AI-it |
10 |
3 |
none |
none |
none |
mlcroissant |
text |
none |
none |
none |
1K<n<10K |
none |
80 |
Jack0508/TED2020vi_kor |
Jack0508 |
9 |
0 |
none |
none |
none |
polars |
json |
none |
none |
none |
n<1K |
none |
81 |
channelcorp/KoMagpie-raw |
channelcorp |
9 |
15 |
none |
ko |
none |
polars |
parquet |
none |
none |
none |
1M<n<10M |
none |
82 |
Jack0508/TED2020_kor |
Jack0508 |
9 |
1 |
none |
none |
none |
mlcroissant |
text |
none |
none |
none |
100K<n<1M |
none |
83 |
Jack0508/vi-ko-TED-txt |
Jack0508 |
8 |
0 |
none |
none |
none |
mlcroissant |
text |
none |
none |
none |
100K<n<1M |
none |
84 |
KomeijiForce/moe_girl_wiki |
KomeijiForce |
7 |
3 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
100K<n<1M |
none |
85 |
dev7halo/korean-mcfaq |
dev7halo |
7 |
3 |
none |
ko |
none |
polars |
csv |
none |
none |
apache-2.0 |
1K<n<10K |
none |
86 |
nayohan/CodeFeedback-Filtered-Instruction-ko |
nayohan |
7 |
1 |
none |
ko |
none |
polars |
parquet |
none |
none |
none |
100K<n<1M |
none |
87 |
Dogge/bluemoon-fandom-1-1-rp-cleaned-korean-tranlated |
Dogge |
6 |
3 |
none |
none |
none |
polars |
json |
none |
none |
wtfpl |
n<1K |
none |
88 |
Nexdata/Chinese-Korean_Parallel_Corpus_Data |
Nexdata |
6 |
0 |
none |
ko |
none |
mlcroissant |
imagefolder |
none |
none |
none |
n<1K |
translation |
89 |
nebchi/kor-orpo |
nebchi |
5 |
1 |
none |
ko |
none |
polars |
json |
none |
none |
none |
10K<n<100K |
none |
90 |
NomaDamas/Ko-StrategyQA |
NomaDamas |
5 |
12 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
91 |
Nexdata/Korean_Speech_Data_by_Mobile_Phone_Reading |
Nexdata |
5 |
0 |
none |
none |
none |
mlcroissant |
audiofolder |
none |
none |
none |
n<1K |
none |
92 |
Nexdata/Korean_Pronunciation_Dictionary |
Nexdata |
4 |
0 |
none |
ko |
none |
mlcroissant |
imagefolder |
none |
none |
none |
n<1K |
automatic-speech-recognition |
93 |
koen-47/COLUMBUS |
koen-47 |
4 |
1 |
none |
none |
none |
mlcroissant |
imagefolder |
2409.04053 |
none |
mit |
1K<n<10K |
none |
94 |
Nexdata/Korean_Spontaneous_Speech_Data |
Nexdata |
4 |
0 |
none |
ko |
none |
mlcroissant |
audiofolder |
none |
none |
none |
n<1K |
automatic-speech-recognition |
95 |
heegyu/open-korean-instructions-v20231020 |
heegyu |
4 |
2 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
100K<n<1M |
none |
96 |
trueorfalse441/korean_hate_speech_copy |
trueorfalse441 |
4 |
1 |
hate-speech-detection |
ko |
monolingual |
none |
none |
2208.10684 |
original |
cc-by-sa-4.0 |
100K<n<1M |
text-classification |
97 |
minecode/koreanstudydataset |
minecode |
4 |
2 |
none |
none |
none |
polars |
json |
none |
none |
none |
10K<n<100K |
none |
98 |
eaglewatch/Korean_Wikipedia_Dataset_for_GPT2_August_2022 |
eaglewatch |
4 |
6 |
visual-question-answering |
ko |
multilingual |
polars |
parquet |
none |
none |
apache-2.0 |
100K<n<1M |
visual-question-answering |
99 |
minecode/koreanstudylang |
minecode |
4 |
1 |
none |
none |
none |
polars |
json |
none |
none |
none |
n<1K |
none |
100 |
star1sh/korean-child-free-voice_sample |
star1sh |
4 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
n<1K |
none |
101 |
BitTranslate/chatgpt-prompts-Korean |
BitTranslate |
4 |
0 |
none |
none |
none |
polars |
csv |
none |
none |
cc0-1.0 |
n<1K |
none |
102 |
Bingsu/laion2B-multi-korean-subset |
Bingsu |
4 |
11 |
none |
ko |
monolingual |
polars |
parquet |
none |
none |
cc-by-4.0 |
10M<n<100M |
feature-extraction |
103 |
minecode/Korean_study_dataset |
minecode |
4 |
1 |
none |
none |
none |
polars |
json |
none |
none |
none |
n<1K |
none |
104 |
tellarin-ai/ntx_llm_inst_korean |
tellarin-ai |
4 |
0 |
none |
ko |
none |
polars |
json |
2303.18103 |
none |
cc-by-sa-4.0 |
n<1K |
token-classification |
105 |
re2panda/grade_school_math_korean |
re2panda |
4 |
1 |
none |
none |
none |
polars |
json |
none |
none |
none |
1K<n<10K |
none |
106 |
pratik33/korean_STT |
pratik33 |
4 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
n<1K |
none |
107 |
mangostin2010/Korean-Wise-Saying |
mangostin2010 |
4 |
1 |
none |
none |
none |
mlcroissant |
text |
none |
none |
unknown |
n<1K |
none |
108 |
coref-data/korean_ecmt_indiscrim |
coref-data |
4 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1K<n<10K |
none |
109 |
ziozzang/Korean_QA_gen_datasets |
ziozzang |
4 |
3 |
none |
none |
none |
polars |
json |
none |
none |
none |
n<1K |
none |
110 |
sieu-n/korean-newstext-dump |
sieu-n |
4 |
4 |
none |
none |
none |
mlcroissant |
text |
none |
none |
none |
1M<n<10M |
none |
111 |
jlbaker361/korra_captioned-augmented |
jlbaker361 |
3 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1K<n<10K |
none |
112 |
ohgnues/korean-qa-paraphrase |
ohgnues |
3 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
100K<n<1M |
none |
113 |
jlbaker361/korra-lite_captioned-augmented |
jlbaker361 |
3 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1K<n<10K |
none |
114 |
Nexdata/Mixed_Speech_with_Korean_and_English_Data_by_Mobile_Phone |
Nexdata |
3 |
0 |
none |
none |
none |
mlcroissant |
audiofolder |
none |
none |
none |
n<1K |
none |
115 |
Nexdata/Korean_Speech_Data_by_Mobile_Phone_Guiding |
Nexdata |
3 |
0 |
none |
none |
none |
mlcroissant |
audiofolder |
none |
none |
none |
n<1K |
none |
116 |
Nikutka/L1_poleval_korpus_wzorcowy_test |
Nikutka |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
n<1K |
none |
117 |
FreedomIntelligence/sharegpt-korean |
FreedomIntelligence |
2 |
3 |
none |
none |
none |
polars |
json |
none |
none |
apache-2.0 |
1K<n<10K |
none |
118 |
abwicke/koplo |
abwicke |
2 |
0 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
119 |
poperson1205/mrtydi-v1.1-korean-fixed |
poperson1205 |
2 |
2 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1K<n<10K |
none |
120 |
toriving/kosimcse |
toriving |
2 |
2 |
none |
none |
none |
polars |
csv |
none |
none |
none |
100K<n<1M |
none |
121 |
2tle/korean-curse-filtering-dataset |
2tle |
2 |
1 |
none |
none |
none |
mlcroissant |
text |
none |
none |
mit |
1K<n<10K |
none |
122 |
jason9693/autotrain-data-kor_hate_eval |
jason9693 |
2 |
1 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
text-classification |
123 |
hongdijk/kor_nlu_hufs |
hongdijk |
2 |
0 |
none |
none |
none |
polars |
csv |
none |
none |
cc-by-sa-4.0 |
1K<n<10K |
none |
124 |
kimcando/KOR-RE-natures-and-environments |
kimcando |
2 |
1 |
none |
none |
none |
polars |
csv |
none |
none |
apache-2.0 |
1K<n<10K |
none |
125 |
4n3mone/mmmlu_kor |
4n3mone |
2 |
2 |
none |
ko |
none |
polars |
parquet |
2009.033 |
none |
mit |
10K<n<100K |
question-answering |
126 |
Nikutka/L1_scraped_korpus_pelny_train |
Nikutka |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1M<n<10M |
none |
127 |
StudentLLM/Korean_MT-Bench_questions |
StudentLLM |
2 |
1 |
none |
none |
none |
polars |
json |
none |
none |
none |
n<1K |
none |
128 |
StudentLLM/Korean_Vicuna_questions |
StudentLLM |
2 |
1 |
none |
none |
none |
polars |
json |
none |
none |
none |
n<1K |
none |
129 |
SungBeom/chatwine-korean |
SungBeom |
2 |
0 |
none |
none |
none |
none |
none |
none |
none |
none |
n<1K |
none |
130 |
FreedomIntelligence/evol-instruct-korean |
FreedomIntelligence |
2 |
7 |
none |
none |
none |
polars |
json |
none |
none |
none |
10K<n<100K |
none |
131 |
Nexdata/Korean_Speech_Data_by_Mobile_Phone |
Nexdata |
2 |
0 |
none |
none |
none |
mlcroissant |
audiofolder |
none |
none |
none |
n<1K |
none |
132 |
ChuGyouk/WebInstructSub-mathstackexchange-Ko-sample |
ChuGyouk |
2 |
1 |
none |
none |
none |
polars |
parquet |
none |
none |
mit |
1K<n<10K |
none |
133 |
krishnakalyan3/kolors-20k |
krishnakalyan3 |
2 |
2 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
10K<n<100K |
none |
134 |
Nexdata/Korean_Speech_Data |
Nexdata |
2 |
1 |
none |
none |
none |
mlcroissant |
audiofolder |
none |
none |
none |
n<1K |
none |
135 |
woolyclouds/stt_korean_240123 |
woolyclouds |
2 |
0 |
none |
none |
none |
none |
none |
none |
none |
mit |
none |
none |
136 |
datasciathlete/corpus4everyone-korean-NER |
datasciathlete |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
100K<n<1M |
none |
137 |
psyche/synonyms_ko |
psyche |
2 |
1 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
138 |
datasciathlete/aihub-korean |
datasciathlete |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
100K<n<1M |
none |
139 |
softh/alt-kotlin-source-1.4kk |
softh |
2 |
1 |
none |
none |
none |
mlcroissant |
json |
none |
none |
none |
1M<n<10M |
none |
140 |
aintnotimetolose/intervieweedataset-ko |
aintnotimetolose |
2 |
1 |
none |
none |
none |
polars |
csv |
none |
none |
none |
10K<n<100K |
none |
141 |
Jinho11/korean_speech_recognition_dataset |
Jinho11 |
2 |
0 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
142 |
coref-data/korean_ecmt_raw |
coref-data |
2 |
1 |
none |
none |
none |
polars |
parquet |
none |
none |
cc-by-nc-sa-4.0 |
1K<n<10K |
none |
143 |
brainer/korean-medicine-prescription |
brainer |
2 |
0 |
none |
none |
none |
none |
none |
none |
none |
none |
none |
none |
144 |
traintogpb/aihub-koja-integrated-prime-base-300k |
traintogpb |
2 |
1 |
none |
none |
none |
polars |
csv |
none |
none |
none |
100K<n<1M |
none |
145 |
traintogpb/aihub-kozh-integrated-prime-base-300k |
traintogpb |
2 |
2 |
none |
none |
none |
polars |
csv |
none |
none |
none |
100K<n<1M |
none |
146 |
ricecake/Genshin_Impact_RaidenShogun_Voice_korean |
ricecake |
2 |
1 |
none |
none |
none |
mlcroissant |
audiofolder |
none |
none |
other |
n<1K |
none |
147 |
FreedomIntelligence/MMLU_Korean |
FreedomIntelligence |
2 |
4 |
none |
ko |
none |
none |
none |
none |
none |
mit |
none |
none |
148 |
Nexdata/Korean_Speaking_English_Speech_Data_by_Mobile_Phone |
Nexdata |
2 |
1 |
none |
none |
none |
mlcroissant |
audiofolder |
none |
none |
none |
n<1K |
none |
149 |
autoevaluate/autoeval-eval-KETI-AIR__korquad-v1.0-acb0d1-1711659840 |
autoevaluate |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
10K<n<100K |
none |
150 |
lim4349/korquad |
lim4349 |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
10K<n<100K |
none |
151 |
lim4349/origin_added_korquad |
lim4349 |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
10K<n<100K |
none |
152 |
Korakoe/NijiJourney-Prompt-Pairs |
Korakoe |
2 |
13 |
none |
none |
none |
polars |
parquet |
none |
none |
creativeml-openrail-m |
1K<n<10K |
none |
153 |
Ash-Hun/korean_slangData |
Ash-Hun |
2 |
0 |
none |
none |
none |
none |
none |
none |
none |
mit |
none |
none |
154 |
Nikutka/L1_poleval_korpus_pelny_train |
Nikutka |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1K<n<10K |
none |
155 |
autoevaluate/autoeval-eval-squad_kor_v1-squad_kor_v1-7a81b4-2244371597 |
autoevaluate |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
1K<n<10K |
none |
156 |
Nexdata/Handwriting_OCR_Data_of_Japanese_and_Korean |
Nexdata |
2 |
1 |
none |
none |
none |
mlcroissant |
imagefolder |
none |
none |
none |
n<1K |
none |
157 |
datasciathlete/corpus4everyone-klue-korean-NER |
datasciathlete |
2 |
1 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
100K<n<1M |
none |
158 |
Bingsu/national_library_of_korea_book_info |
Bingsu |
2 |
1 |
none |
ko |
monolingual |
polars |
csv |
none |
none |
other |
1M<n<10M |
none |
159 |
jungsungmoon/Korean_dialog |
jungsungmoon |
2 |
4 |
none |
none |
none |
polars |
csv |
none |
none |
unknown |
1K<n<10K |
none |
160 |
brainer/KoreanApartmentDealData |
brainer |
2 |
0 |
none |
none |
none |
none |
none |
none |
none |
other |
none |
tabular-regression |
161 |
NX2411/AIhub-korean-speech-data |
NX2411 |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
apache-2.0 |
1K<n<10K |
none |
162 |
NX2411/AIhub-korean-speech-data-large-no-lm |
NX2411 |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
apache-2.0 |
10K<n<100K |
none |
163 |
Korsholm22/go-emotion-dk-autotranlated-10k |
Korsholm22 |
2 |
0 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
10K<n<100K |
none |
164 |
mansiksohn/opendict-korean-proverb |
mansiksohn |
2 |
2 |
language-modeling |
ko |
monolingual |
none |
none |
none |
original |
cc-by-2.0 |
n<1K |
text-generation |
165 |
Nexdata/Korean_Conversational_Speech_Data_by_Mobile_Phone |
Nexdata |
2 |
0 |
none |
ko |
none |
mlcroissant |
audiofolder |
none |
none |
none |
n<1K |
none |
166 |
JisuofthePark/Uneek_Korean_EFL |
JisuofthePark |
2 |
0 |
none |
none |
none |
mlcroissant |
audiofolder |
none |
none |
none |
n<1K |
none |
167 |
korean-corpus/korquad_v1.0_namu_candidates_256 |
korean-corpus |
2 |
1 |
none |
none |
none |
polars |
parquet |
none |
none |
none |
10K<n<100K |
none |
168 |
team-monolith/korea-university-programming-dataset |
team-monolith |
1 |
2 |
none |
ko |
none |
none |
none |
none |
none |
none |
100M<n<1B |
time-series-forecasting |
169 |
PrompTart/PTT_en_ko |
PrompTart |
0 |
2 |
none |
ko |
none |
polars |
json |
2410.00683 |
none |
none |
1K<n<10K |
translation |
170 |
williamjeong2/msmarco-triplets-ko-v1 |
williamjeong2 |
0 |
5 |
none |
ko |
none |
polars |
parquet |
none |
none |
none |
100K<n<1M |
feature-extraction |
171 |
youjunhyeok/PersonaHub-ko |
youjunhyeok |
0 |
1 |
none |
ko |
none |
polars |
parquet |
2406.20094 |
none |
cc-by-nc-sa-4.0 |
100K<n<1M |
text2text-generation |