You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I run the command python -m wikiextractor.WikiExtractor enwiki-20220701-pages-articles-multistream.xml -o enwiki/ --json --html but got the following errors:
Traceback (most recent call last):
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/WikiExtractor.py", line 473, in extract_process
Extractor(*job[:-1]).extract(out, html_safe) # (id, urlbase, title, page)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 857, in extract
text = self.clean_text(text, html_safe=html_safe)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 847, in clean_text
text = compact(text, mark_headers=mark_headers)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 256, in compact
page.append(listItem[n] % line)
KeyError: ' '
Process ForkProcess-35:
Traceback (most recent call last):
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/WikiExtractor.py", line 473, in extract_process
Extractor(*job[:-1]).extract(out, html_safe) # (id, urlbase, title, page)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 857, in extract
text = self.clean_text(text, html_safe=html_safe)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 847, in clean_text
text = compact(text, mark_headers=mark_headers)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 256, in compact
page.append(listItem[n] % line)
KeyError: ' '
Process ForkProcess-13:
Traceback (most recent call last):
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/WikiExtractor.py", line 473, in extract_process
Extractor(*job[:-1]).extract(out, html_safe) # (id, urlbase, title, page)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 857, in extract
text = self.clean_text(text, html_safe=html_safe)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 847, in clean_text
text = compact(text, mark_headers=mark_headers)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 256, in compact
page.append(listItem[n] % line)
KeyError: 'ፐ'
Process ForkProcess-12:
Traceback (most recent call last):
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/WikiExtractor.py", line 473, in extract_process
Extractor(*job[:-1]).extract(out, html_safe) # (id, urlbase, title, page)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 857, in extract
text = self.clean_text(text, html_safe=html_safe)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 847, in clean_text
text = compact(text, mark_headers=mark_headers)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 256, in compact
page.append(listItem[n] % line)
KeyError: '𐤅'
Process ForkProcess-24:
Traceback (most recent call last):
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/WikiExtractor.py", line 473, in extract_process
Extractor(*job[:-1]).extract(out, html_safe) # (id, urlbase, title, page)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 857, in extract
text = self.clean_text(text, html_safe=html_safe)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 847, in clean_text
text = compact(text, mark_headers=mark_headers)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 256, in compact
page.append(listItem[n] % line)
KeyError: '&'
Process ForkProcess-21:
Traceback (most recent call last):
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/WikiExtractor.py", line 473, in extract_process
Extractor(*job[:-1]).extract(out, html_safe) # (id, urlbase, title, page)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 857, in extract
text = self.clean_text(text, html_safe=html_safe)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 847, in clean_text
text = compact(text, mark_headers=mark_headers)
File "/data/v-wangyuxin/miniconda3/envs/plotmachine/lib/python3.8/site-packages/wikiextractor/extract.py", line 256, in compact
page.append(listItem[n] % line)
KeyError: ' '
But to solve this?
The text was updated successfully, but these errors were encountered:
I run the command
python -m wikiextractor.WikiExtractor enwiki-20220701-pages-articles-multistream.xml -o enwiki/ --json --html
but got the following errors:But to solve this?
The text was updated successfully, but these errors were encountered: