Skip to content

Commit

Permalink
Beim Eintrag von Hyperonym- und Antonym-Relationen werden nun die sym…
Browse files Browse the repository at this point in the history
…metrischen Relationen mit eingetragen.
  • Loading branch information
hdaSprachtechnologie committed Sep 18, 2020
1 parent 53ad2f9 commit d96cb9c
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 73 deletions.
176 changes: 136 additions & 40 deletions Arbeiten_mit_OdeNet.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -27,7 +27,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {
"scrolled": true
},
Expand All @@ -36,24 +36,27 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Lied n \n",
"SENSE: odenet-1540-n ('i73641', [], 'Musikalisches Werk.', [('hypernym', 'odenet-15512-n')], ['Stück', 'Komposition', 'Musikstück', 'Lied', 'Track', 'Titel'])\n",
"immer a \n",
"SENSE: odenet-6115-a ('i18253', 'at all times; all the time and on every occasion', [], [], ['ständig', 'stetig', 'immerwährend', 'persistent', 'konstant', 'kontinuierlich', 'immer', 'perpetuierlich', 'alleweil', 'allweil', 'immer nur', 'stets und ständig', 'fortwährend', 'in Zeit und Ewigkeit', 'stets', 'immerdar', 'allzeit', 'perpetuell'])\n",
"\n",
"SENSE: odenet-15493-n ('i73707', [], 'Musikalische Komposition mit Wörtern.', [('mero_part', 'odenet-7410-n'), ('hypernym', 'odenet-1540-n')], ['Lied', 'Song', 'Kantate'])\n",
"SENSE: odenet-28716-a ('i18253', 'at all times; all the time and on every occasion', [], [], ['immer', 'jedes Mal', 'von Mal zu Mal'])\n",
"\n",
"HYPERNYMS: [('w7633_1540-n', 'odenet-15512-n', ['Artefakt', 'Werk', 'Opus', 'Arbeitsergebnis', 'Handlungsprodukt']), ('w7633_15493-n', 'odenet-1540-n', ['Stück', 'Komposition', 'Musikstück', 'Lied', 'Track', 'Titel'])]\n",
"HYPERNYMS: []\n",
"HYPONYMS: []\n",
"MERONYMS: [('w7633_15493-n', 'odenet-7410-n', ['Kehrreim', 'Refrain'])]\n",
"MERONYMS: []\n",
"HOLONYMS: []\n",
"ANTONYMS: []\n",
"LEMMA: Lied\n",
"LEMMA: immer\n",
"POS: a\n",
"SENSE ID: w26121_28716-a\n",
"LEMMA: immer\n",
"POS: n\n",
"SENSE ID: w7633_15493-n\n"
"SENSE ID: w109586_32550-n\n"
]
}
],
"source": [
"OdeNet.word_info(\"Lied\")"
"OdeNet.word_info(\"immer\")"
]
},
{
Expand All @@ -65,85 +68,178 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 23,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['avoid']\n",
"i25721\n",
"stay clear from; keep away from; keep out of the way of someone or something\n",
"['debar', 'avoid', 'deflect', 'ward off', 'avert', 'obviate', 'forefend', 'forfend', 'fend off', 'head off', 'stave off']\n",
"i33990\n",
"prevent the occurrence of; prevent from happening; to protect from or to keep away anything undesirable; to ward off\n",
"['avoid']\n",
"i34037\n",
"refrain from doing something\n",
"['avoid', 'keep off']\n",
"i27567\n",
"refrain from certain foods or beverages\n",
"['void', 'nullify', 'invalidate', 'avoid', 'annul', 'quash']\n",
"i34110\n",
"declare invalid\n"
]
}
],
"source": [
"OdeNet.check_ili_in_pwn(\"composition\")"
"OdeNet.check_ili_in_pwn(\"avoid\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 26,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Synset id=\"odenet-27012-v\" ili=\"i59258\" partOfSpeech=\"v\" dc:description=\"an extra component of a machine or other apparatus\"><Definition>Keine Schulden machen.</Definition><SynsetRelation target='odenet-13853-n' relType='hypernym'/></Synset>\n",
"\n"
]
}
],
"source": [
"# Definitionen einfügen'odenet-35395-a','Etwas, das nicht getan werden kann',r\"C:\\Users\\melaniesiegel\\Documents\\05_Projekte\\WordNet\\OdeNet\\deWNaccess\\odenet_oneline.xml\")\n",
"add_definition_to_ss('odenet-15493-n','Musikalische Komposition mit Wörtern.',\"odenet_oneline.xml\")"
"add_definition_to_ss('odenet-27012-v','Keine Schulden machen.',\"odenet_oneline.xml\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 27,
"metadata": {
"scrolled": true
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Synset id=\"odenet-27012-v\" ili=\"i59258\" partOfSpeech=\"v\" ><Definition>Keine Schulden machen.</Definition><SynsetRelation target='odenet-13853-n' relType='hypernym'/></Synset>\n",
"\n"
]
}
],
"source": [
"# Englische Definitionen löschen\n",
"delete_english_definition('odenet-15493-n',\"odenet_oneline.xml\")"
"delete_english_definition('odenet-27012-v',\"odenet_oneline.xml\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 28,
"metadata": {
"scrolled": true
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Synset id=\"odenet-27012-v\" ili=\"\" partOfSpeech=\"v\" ><Definition>Keine Schulden machen.</Definition><SynsetRelation target='odenet-13853-n' relType='hypernym'/></Synset>\n",
"\n"
]
}
],
"source": [
"# Attribute (v.a. ili) im Synset verändern\n",
"\n",
"change_attribute_in_ss('odenet-1540-n','ili','i73641',\"odenet_oneline.xml\")\n"
"change_attribute_in_ss('odenet-27012-v','ili','',\"odenet_oneline.xml\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Synset id=\"odenet-28716-a\" ili=\"i18253\" partOfSpeech=\"a\" dc:description=\"at all times; all the time and on every occasion\"><SynsetRelation target='odenet-6080-a' relType='antonym'/></Synset>\n",
"\n",
"<Synset id=\"odenet-6080-a\" ili=\"i18262\" partOfSpeech=\"a\" dc:description=\"not ever; at no time in the past or future\"><SynsetRelation target='odenet-28716-a' relType='antonym'/></Synset>\n",
"\n"
]
}
],
"source": [
"# Relationen zum Synset hinzufügen\n",
"# Antonym-Relation zum Synset hinzufügen\n",
"\n",
"add_rel_to_ss(\"odenet-15493-n\",\"<SynsetRelation target='odenet-1540-n' relType='hypernym'/>\",\"odenet_oneline.xml\")"
"add_antonym_rel_to_ss(\"odenet-28716-a\",\"odenet-6080-a\",\"odenet_oneline.xml\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Synset id=\"odenet-11177-n\" ili=\"i54970\" partOfSpeech=\"n\" dc:description=\"small crude shelter used as a dwelling\"><SynsetRelation target='odenet-1635-n' relType='hypernym'/></Synset>\n",
"\n",
"<Synset id=\"odenet-1635-n\" ili=\"i113537\" partOfSpeech=\"n\" dc:description=\"an irrecoverable state of devastation and destruction\"><SynsetRelation target='odenet-25325-n' relType='hypernym'/><SynsetRelation target='odenet-28661-n' relType='hyponym'/><SynsetRelation target='odenet-11145-n' relType='hyponym'/><SynsetRelation target='odenet-14061-n' relType='hyponym'/><SynsetRelation target='odenet-5908-n' relType='hyponym'/><SynsetRelation target='odenet-21802-n' relType='hyponym'/><SynsetRelation target='odenet-11177-n' relType='hyponym'/></Synset>\n",
"\n"
]
}
],
"source": [
"# Hyperonym-Relation zum Synset hinzufügen\n",
"\n",
"add_hypernym_rel_to_ss(\"odenet-11177-n\",\"odenet-1635-n\",\"odenet_oneline.xml\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Synset id=\"odenet-27012-v\" ili=\"\" partOfSpeech=\"v\" ><Definition>Keine Schulden machen.</Definition><SynsetRelation target='odenet-13853-n' relType='hypernym'/><Example>Die Regierung bemüht sich zu sparen.</Example></Synset>\n",
"\n"
]
}
],
"source": [
"# Beispiel einfügen\n",
"\n",
"add_example_to_ss('odenet-15493-n','Sie singt ein Lied.',\"odenet_oneline.xml\")"
"add_example_to_ss('odenet-27012-v','Die Regierung bemüht sich zu sparen.',\"odenet_oneline.xml\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 30,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<LexicalEntry id=\"w2953\" dc:type=\"Basiswortschatz\" confidenceScore=\"1.0\"><Lemma writtenForm=\"sparen\" partOfSpeech=\"v\"/><Sense id=\"w2953_586-v\" synset=\"odenet-586-v\"/><Sense id=\"w2953_1955-v\" synset=\"odenet-1955-v\"/><Sense id=\"w2953_14046-v\" synset=\"odenet-14046-v\"/><Sense id=\"w2953_27012-v\" synset=\"odenet-27012-v\"/></LexicalEntry>\n",
"\n"
]
}
],
"source": [
"# Attribute zum LexEntry hinzufügen, z.B. confidenceScore\n",
"change_attribute_in_lexentry(\"Lied\",\"confidenceScore\",\"1.0\",\"odenet_oneline.xml\")"
"change_attribute_in_lexentry(\"sparen\",\"confidenceScore\",\"1.0\",\"odenet_oneline.xml\")"
]
},
{
Expand All @@ -169,7 +265,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand Down
42 changes: 18 additions & 24 deletions OdeNetAccess.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,40 +25,34 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Werk n \n",
"SENSE: odenet-8695-n ('i61245', 'a product produced or accomplished through the effort or activity or agency of a person or thing', [], [('hyponym', 'odenet-16778-n'), ('hyponym', 'odenet-12034-n'), ('hyponym', 'odenet-15899-n'), ('hyponym', 'odenet-16052-n'), ('hyponym', 'odenet-168-n'), ('hyponym', 'odenet-19502-n'), ('hyponym', 'odenet-30673-n'), ('hyponym', 'odenet-31887-n'), ('hyponym', 'odenet-31971-n'), ('hyponym', 'odenet-3359-n'), ('hyponym', 'odenet-35074-n'), ('hyponym', 'odenet-8609-n'), ('hypernym', 'odenet-11520-n')], ['Betrieb', 'Fertigungsanlage', 'Werk', 'Fabrik'])\n",
"\n",
"SENSE: odenet-11774-n ('i70169', 'a brief treatise on a subject of interest; published in the form of a booklet', [], [('hypernym', 'odenet-31696-n'), ('hyponym', 'odenet-13113-n'), ('hyponym', 'odenet-7008-n')], ['Arbeit', 'Werk', 'Ausarbeitung', 'Traktat', 'Abhandlung', 'akademische Arbeit', 'wissenschaftliche Arbeit'])\n",
"\n",
"SENSE: odenet-12463-n ('i56156', [], 'Ergebnis menschlicher Arbeit, Dienstleistung', [('holo_part', 'odenet-8531-n'), ('holo_part', 'odenet-30078-n')], ['Artikel', 'Fabrikat', 'Erzeugnis', 'Produkt', 'Werk', 'Gebilde', 'Ware', 'Manufakt', 'Gut', 'Handelsgut', 'Arbeit', 'Machwerk'])\n",
"Gebäude n \n",
"SENSE: odenet-1635-n ('i113537', 'an irrecoverable state of devastation and destruction', [], [('hypernym', 'odenet-25325-n'), ('hyponym', 'odenet-28661-n'), ('hyponym', 'odenet-11145-n'), ('hyponym', 'odenet-14061-n'), ('hyponym', 'odenet-5908-n'), ('hyponym', 'odenet-21802-n')], ['Bau', 'Gemäuer', 'Bauwerk', 'Gebäude'])\n",
"\n",
"SENSE: odenet-15512-n ('i35572', [], [], [('hypernym', 'odenet-5502-n'), ('hyponym', 'odenet-26803-v')], ['Artefakt', 'Werk', 'Opus', 'Arbeitsergebnis', 'Handlungsprodukt'])\n",
"SENSE: odenet-33774-n ('i59707', 'a thing constructed; a complex entity constructed of many parts', [], [('hypernym', 'odenet-2254-n'), ('hyponym', 'odenet-11952-n'), ('hyponym', 'odenet-29385-n'), ('hyponym', 'odenet-3014-n')], ['Bauwerk', 'Gebäude', 'bauliche Anlage'])\n",
"\n",
"SENSE: odenet-28819-n ('i61245', 'a product produced or accomplished through the effort or activity or agency of a person or thing', [], [('hyponym', 'odenet-16778-n')], ['Werk', 'Schaffen', 'Gesamtwerk', 'Werke', 'Œuvre'])\n",
"\n",
"HYPERNYMS: [('w35277_8695-n', 'odenet-11520-n', ['Gerät', 'Anlage', 'Apparatur', 'Maschine', 'Apparat', 'Aggregat', 'Automat']), ('w35277_11774-n', 'odenet-31696-n', ['Monographie', 'Monografie']), ('w35277_15512-n', 'odenet-5502-n', ['Handlung', 'Operation', 'Aktion', 'Tat'])]\n",
"HYPONYMS: [('w35277_8695-n', 'odenet-16778-n', ['Rohling', 'Leer-DVD', 'Leer-CD']), ('w35277_8695-n', 'odenet-12034-n', ['Meisterwerk', 'Opus magnum', 'Opus', 'Kunstwerk', 'großes Werk']), ('w35277_8695-n', 'odenet-15899-n', ['Gerberei', 'Lederfabrik']), ('w35277_8695-n', 'odenet-16052-n', ['Ziegelfabrik', 'Ziegelei', 'Ziegelwerk']), ('w35277_8695-n', 'odenet-168-n', ['manuelle Arbeit', 'Handarbeit']), ('w35277_8695-n', 'odenet-19502-n', ['Nachfassen', 'Follow-up', 'Folgeaktivität']), ('w35277_8695-n', 'odenet-30673-n', ['Werkunterricht', 'Werkerziehung', 'Werken']), ('w35277_8695-n', 'odenet-31887-n', ['Traumfabrik', 'Hollywood']), ('w35277_8695-n', 'odenet-31971-n', ['Gebälk', 'Balkenwerk', 'Balkenverband']), ('w35277_8695-n', 'odenet-3359-n', ['Produktionsfabrik', 'Produktionsanlage']), ('w35277_8695-n', 'odenet-35074-n', ['Think-Tank', 'Ideenschmiede', 'Ideenfabrik', 'Denkfabrik', 'Thinktank']), ('w35277_8695-n', 'odenet-8609-n', ['Meisterstück', 'Meisterwerk']), ('w35277_11774-n', 'odenet-13113-n', ['Examen', 'Zula', 'Wissenschaftliche Hausarbeit', 'Staatsarbeit', 'Staatsexamensarbeit', 'Examensarbeit', 'Zulassungsarbeit', 'Qualifizierungsarbeit']), ('w35277_11774-n', 'odenet-7008-n', ['Seminararbeit', 'Hausarbeit', 'Semesterarbeit', 'Studienarbeit']), ('w35277_15512-n', 'odenet-26803-v', ['einebnen', 'dem Erdboden gleichmachen', 'völlig zerstören']), ('w35277_28819-n', 'odenet-16778-n', ['Rohling', 'Leer-DVD', 'Leer-CD'])]\n",
"HYPERNYMS: [('w8072_1635-n', 'odenet-25325-n', ['Verödung', 'Sklerotherapie']), ('w8072_33774-n', 'odenet-2254-n', ['Erscheinung', 'Phantom', 'Artefakt', 'Hirngespinst', 'Einbildung', 'Phantasmagorie', 'Sinnestäuschung', 'Schimäre', 'Trugbild', 'Chimäre'])]\n",
"HYPONYMS: [('w8072_1635-n', 'odenet-28661-n', ['Gestalt', 'Körperbau', 'Statur', 'Wuchs', 'Habitus', 'Körpergröße', '(körperliche) Konstitution']), ('w8072_1635-n', 'odenet-11145-n', ['Villa', 'Prachtbau', 'Chateau', 'Palast', 'Schloss', 'Palais']), ('w8072_1635-n', 'odenet-14061-n', ['runder Saal', 'Rotunde', 'Rundbau']), ('w8072_1635-n', 'odenet-5908-n', ['Luftschutzraum', 'Luftschutzkeller', 'Schutzraum', 'Schutzbau']), ('w8072_1635-n', 'odenet-21802-n', ['Zweckbau', 'Nutzbau']), ('w8072_33774-n', 'odenet-11952-n', ['Schule', 'Schulhaus', 'Schulgebäude']), ('w8072_33774-n', 'odenet-29385-n', ['Kreuzungsbauwerk', 'Überwerfungsbauwerk']), ('w8072_33774-n', 'odenet-3014-n', ['Tunnel', 'Tunnelbauwerk', 'Tunell'])]\n",
"MERONYMS: []\n",
"HOLONYMS: [('w35277_12463-n', 'odenet-8531-n', ['Auslass', 'Auslauf']), ('w35277_12463-n', 'odenet-30078-n', ['Ökologie', 'Bioökologie'])]\n",
"HOLONYMS: []\n",
"ANTONYMS: []\n",
"LEMMA: Werk\n",
"LEMMA: Gebäude\n",
"POS: n\n",
"SENSE ID: w35277_28819-n\n",
"LEMMA: Werk\n",
"SENSE ID: w8072_33774-n\n",
"LEMMA: Gebäude\n",
"POS: n\n",
"SENSE ID: w117549_35252-n\n"
"SENSE ID: w98500_28738-n\n"
]
}
],
"source": [
"OdeNet.word_info(\"Werk\")"
"OdeNet.word_info(\"Gebäude\")"
]
},
{
Expand Down Expand Up @@ -156,26 +150,26 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('',\n",
"('i12795',\n",
" [],\n",
" 'in gerader Richtung verlaufend, in gerader Strecke verlaufend',\n",
" [],\n",
" [('hyponym', 'odenet-25851-n')],\n",
" ['Lied', 'Chanson', 'Liedermacher-Song'])"
" ['direkt', 'geradlinig', 'frontal', 'ohne Umwege', 'geradeaus', 'ohne Umweg'])"
]
},
"execution_count": 12,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"check_synset(\"odenet-25855-n\")"
"check_synset(\"odenet-1544-a\")"
]
},
{
Expand Down
22 changes: 16 additions & 6 deletions odenet/odenet_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,24 +95,34 @@ def format_odenet_oneline():



def add_rel_to_ss(synset,relation,wordnetfile):
if synset not in relation:
def add_rel_to_ss(synset,relation,target,wordnetfile):
if synset not in target:
de_wn = open(wordnetfile,"r",encoding="utf-8")
lines = de_wn.readlines()
de_wn.close()
out_odenet = open(wordnetfile,"w",encoding="utf-8")
ss_string = '<Synset id="' + synset + '"'
for line in lines:
if ss_string in line and relation not in line:
if ss_string in line and target not in line:
if '<Example>' in line:
line = line.replace('<Example>',relation + '<Example>')
line = line.replace('<Example>',"<SynsetRelation target='" + target + "' relType='" + relation + "'/>" + '<Example>')
elif '</Synset>' in line:
line = line.replace('</Synset>',relation + '</Synset>')
line = line.replace('</Synset>',"<SynsetRelation target='" + target + "' relType='" + relation + "'/>" + '</Synset>')
else:
line = line.replace('/>', '>' + relation + '</Synset>')
line = line.replace('/>', '>' + "<SynsetRelation target='" + target + "' relType='" + relation + "'/>" + '</Synset>')
print(line)
out_odenet.write(line)
out_odenet.close()

# Symmetrische Relationen hinzufügen. D.h.: Im Fall von Hyperonym auch die Hyponym-Relation, im Fall von Antonym auch die Rück-Relation usw.

def add_antonym_rel_to_ss(synset, target, wordnetfile):
add_rel_to_ss(synset,"antonym",target,wordnetfile)
add_rel_to_ss(target,"antonym",synset,wordnetfile)

def add_hypernym_rel_to_ss(synset,target,wordnetfile):
add_rel_to_ss(synset,"hypernym",target,wordnetfile)
add_rel_to_ss(target,"hyponym",synset,wordnetfile)

# Attribute in Synsets verändern, z.B. ili
# change_attribute_in_ss('odenet-412-a','ili','i10007',r"C:\Users\melaniesiegel\Documents\05_Projekte\WordNet\OdeNet\deWNaccess\odenet_oneline.xml")
Expand Down
Loading

0 comments on commit d96cb9c

Please sign in to comment.