Skip to content

Commit

Permalink
#180 generated piece 4 data and some update on wikiapi developemnt fo…
Browse files Browse the repository at this point in the history
…r fast actor coding
  • Loading branch information
YanLiang1102 committed Jul 4, 2017
1 parent 3bb809b commit 84900b0
Show file tree
Hide file tree
Showing 1,434 changed files with 225,400 additions and 306 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 8,
"metadata": {
"collapsed": true
},
Expand All @@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 9,
"metadata": {
"collapsed": true
},
Expand All @@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 10,
"metadata": {
"collapsed": true
},
Expand Down Expand Up @@ -102,10 +102,8 @@
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def clean_line(line):\n",
Expand Down Expand Up @@ -158,36 +156,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"18390"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dict_dict)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -198,7 +167,7 @@
" 'roles': ['[BHRGOV 070101-100831]']}"
]
},
"execution_count": 14,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -229,7 +198,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 7,
"metadata": {
"collapsed": true
},
Expand All @@ -253,66 +222,228 @@
},
{
"cell_type": "code",
"execution_count": 72,
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'actor_ar': 'أحمد الجلبي',\n",
" 'actor_en': 'AHMAD_CHALABI',\n",
" 'alt_names_ar': ['أحمد شلبي', 'أحمد جلبي'],\n",
" 'alt_names_en': [],\n",
" 'roles': ['[IRQELI 620101-030901]',\n",
" '[IRQGOV 030901-030930]',\n",
" '[IRQGOV 031101-040630]',\n",
" '[IRQGOV 050601-060531]',\n",
" '[IRQELI]']}"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eng_to_ar(dict_dict[7777])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def grabAllTheEnglishNamesThatNoArName(dict_dict):\n",
" noFindList=[]\n",
" #count=0\n",
" for item in dict_dict:\n",
" if(item['actor_en']!=\"\"):\n",
" try:\n",
" eng_to_ar(item)\n",
" temp=eng_to_ar(item)\n",
" if(temp['actor_ar']==''):\n",
" noFindList.append(item['actor_en'])\n",
" except Exception as e:\n",
" noFindList.append(item['actor_en'])\n",
" else\n",
" #print(e)\n",
" return noFindList "
]
},
{
"cell_type": "code",
"execution_count": 71,
"execution_count": 23,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"count=0\n",
"for item in dict_dict:\n",
" if(item[\"actor_en\"]!=\"\"):\n",
" count=count+1\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'SIBGHATULLAH_MOJADEDI'"
"18389"
]
},
"execution_count": 71,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notfind[3]"
"count"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 27min 40s, sys: 13.6 s, total: 27min 54s\n",
"Wall time: 4h 34min 55s\n"
]
}
],
"source": [
"%%time\n",
"anotherNoFind=grabAllTheEnglishNamesThatNoArName(dict_dict)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"15949"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(anotherNoFind)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 27min 37s, sys: 12.3 s, total: 27min 49s\n",
"Wall time: 4h 30min 43s\n"
]
}
],
"source": [
"%%time\n",
"notfind=grabAllTheEnglishNamesThatNoArName(dict_dict)\n",
"#then dump the data to pickle\n",
"# try:\n",
"# with open(\"noFindWord\", 'wb') as f:\n",
"# pickle.dump(notfind, f, pickle.HIGHEST_PROTOCOL)\n",
"# except:\n",
"# print(\"failed to save the result to disk\")\n",
"# pass\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"15949"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(notfind)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" with open(\"noFindWord\", 'wb') as f:\n",
" pickle.dump(notfind, f, pickle.HIGHEST_PROTOCOL)\n",
" with open(\"notFindFromPreviousMethod\",'wb') as f:\n",
" pickle.dump(notfind,f,pickle.HIGHEST_PROTOCOL)\n",
"except:\n",
" print(\"failed to save the result to disk\")\n",
" pass\n"
" print(\"failed to save\")\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 26,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"'AZAM_DADFAR'"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notfind[33]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2441"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notfind"
"len(dict_dict)-len(notfind)"
]
},
{
Expand Down
14 changes: 14 additions & 0 deletions otherHelperCode/english_to_arabic_dictionary/geckodriver.log
Original file line number Diff line number Diff line change
Expand Up @@ -653,3 +653,17 @@ JavaScript warning: https://a.slack-edge.com/bv1-1/rollup-brand.c4ec2770eb61bcaf
1498966171101 Marionette INFO Ceased listening
1498966174287 Marionette INFO Ceased listening
1498966181329 Marionette INFO Ceased listening
1499194286927 geckodriver INFO Listening on 127.0.0.1:36094
1499194288021 geckodriver::marionette INFO Starting browser /usr/lib/firefox/firefox.sh with args ["-marionette"]
1499194291685 Marionette INFO Listening on port 33840
1499194291759 Marionette WARN TLS certificate errors will be ignored for this session
1499194315617 geckodriver INFO Listening on 127.0.0.1:59905
1499194316713 geckodriver::marionette INFO Starting browser /usr/lib/firefox/firefox.sh with args ["-marionette"]
1499194320333 Marionette INFO Listening on port 44874
1499194320432 Marionette WARN TLS certificate errors will be ignored for this session
*************************
A coding exception was thrown and uncaught in a Task.

Full message: TypeError: NetworkError when attempting to fetch resource.
Full stack:
*************************
Loading

0 comments on commit 84900b0

Please sign in to comment.