Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test #44

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open

test #44

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions finetune/[2024-02-15]_ner_test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"\n",
"from transformers import ElectraForTokenClassification, ElectraTokenizer, AutoTokenizer, AutoModelForTokenClassification, pipeline\n",
"import torch\n",
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"model_name = \"cujabes/koelectra-small-v3-discriminator\"\n",
"cache_dir = \"../../../../models/huggingface\"\n",
"# model = ElectraForTokenClassification.from_pretrained(model_name, cache_dir=cache_dir)\n",
"# tokenizer = ElectraTokenizer.from_pretrained(model_name, cache_dir=cache_dir)\n",
"# model.to(device)\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)\n",
"model = AutoModelForTokenClassification.from_pretrained(model_name, cache_dir=cache_dir)\n",
"model.to(device)\n",
"\n",
"ner = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"사람: O\n",
"##과: O\n",
"인간: O\n"
]
}
],
"source": [
"# # 입력 문장\n",
"# sentence = \"사람과 인간\"\n",
"\n",
"# # 토큰화 및 토큰 인덱스\n",
"# tokens = tokenizer.tokenize(sentence)\n",
"# token_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
"\n",
"# # 개체명 태깅 예측\n",
"# with torch.no_grad():\n",
"# outputs = model(torch.tensor([token_ids]).to(device))\n",
"# predictions = outputs.logits.argmax(dim=2)[0].cpu().numpy()\n",
"\n",
"# # 개체명 태깅 결과 출력\n",
"# for token, pred_label_id in zip(tokens, predictions):\n",
"# pred_label = model.config.id2label[pred_label_id]\n",
"# print(f\"{token}: {pred_label}\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity': 'NUM-B', 'score': 0.7409795, 'index': 6, 'word': '재배의', 'start': 14, 'end': 17}, {'entity': 'NUM-B', 'score': 0.9995691, 'index': 7, 'word': '90', 'start': 18, 'end': 20}]\n"
]
},
{
"data": {
"text/plain": [
"'재배의 90'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"example = \"서울역으로 안내해줘.\"\n",
"# example = \"안녕하세요, 허깅페이스 트랜스포머 예제 코드입니다.\"\n",
"# example = \"경기도 양평 농막토지 추천! 양서면 증동리 164평 토목완료 토지매매 보전관리지역 별장지 전원주택지로도 추천드립니다. 경기도농막토지, 양평농막토지, 양평토지매매, 양서면토지, 증동리토지, 양서면농막토지, 양평별장지, 양평전원주택지\"\n",
"example = \"많이 따는 다 수확 고추 재배의 90%는 밭 만들기에서 결정됩니다.\"\n",
"result = ner(example)\n",
"print(result)\n",
"\n",
"result_text = str()\n",
"for i in reversed(range(len(result))):\n",
" word = result[i]['word']\n",
" if not word.startswith('##'):\n",
" result_text = \" \"+ word + result_text\n",
" else:\n",
" word = word.replace('##', '')\n",
" result_text = word + result_text\n",
"result_text.strip()\n",
"### before: ' 경기도 양평 양서 증동 164평 경기도 양평 양평 양서 증동리 양서 양평별 양평전원'\n",
"### after: '경기도 양서면 양평토지매매 양서면토지 증동리토지 양서면농막토지 양평전원주택지'\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "CSU",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading