Skip to content

Commit

Permalink
#694: replace spltlines with split on Harvest and blacklists to avoid…
Browse files Browse the repository at this point in the history
… two URLs on one line
  • Loading branch information
Fasand committed Jul 18, 2024
1 parent afb797a commit f520bf4
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 8 deletions.
4 changes: 2 additions & 2 deletions Seeder/blacklists/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def collect_urls_by_type(cls, blacklist_type):
blacklist_type=blacklist_type
).values_list('url_list', flat=True)
# blacklist urls is now list of contents
urls_parsed = map(str.splitlines, blacklist_urls)
urls_parsed = map(str.split, blacklist_urls)
return reduce(operator.add, urls_parsed, [])

@classmethod
Expand All @@ -57,6 +57,6 @@ def last_change(cls):
@classmethod
def dump(cls):
blacklist_urls = cls.objects.all().values_list('url_list', flat=True)
urls_parsed = map(str.splitlines, blacklist_urls)
urls_parsed = map(str.split, blacklist_urls)
# Remove duplicates
return list(set(reduce(operator.add, urls_parsed, [])))
10 changes: 5 additions & 5 deletions Seeder/harvests/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def repr(self):

return u'FRQ: {0}, custom seeds: {1}, custom sources: {2}'.format(
self.get_target_frequency_display(),
len(self.custom_seeds.splitlines()),
len(self.custom_seeds.split()),
self.custom_sources.count()
)

Expand Down Expand Up @@ -118,7 +118,7 @@ def pair_custom_seeds(self):
## Still takes a lot of time due to 'icontains'
## Potentially can return wrong things because of the 'icontains'
query = Q()
for seed_url in self.custom_seeds.splitlines():
for seed_url in self.custom_seeds.split():
query |= Q(seed__url__icontains=seed_url)
sources = Source.objects.filter(
query, seed__state=source_constants.SEED_STATE_INCLUDE)
Expand Down Expand Up @@ -146,7 +146,7 @@ def get_custom_seeds(self):
if not self.custom_seeds:
return set()
# Unwanted tabs and newlines can appear when entering as text
return set(map(str.strip, self.custom_seeds.splitlines())) - set([""])
return set(self.custom_seeds.split()) - set([""])

def get_custom_sources_seeds(self):
seeds = Seed.objects.filter(
Expand All @@ -158,7 +158,7 @@ def get_seeds(self, blacklisted=None, frozen_only=False):
:return: set of urls
"""
if self.seeds_frozen and self.seeds_frozen != '':
return set(self.seeds_frozen.splitlines())
return set(self.seeds_frozen.split())
if frozen_only: # Prematurely return so seeds aren't computed
return set()

Expand Down Expand Up @@ -505,7 +505,7 @@ def get_topic_collection_seeds(self, slug):

def get_seeds(self, blacklisted=None, frozen_only=False):
if self.seeds_frozen and self.seeds_frozen != '':
return set(self.seeds_frozen.splitlines())
return set(self.seeds_frozen.split())
if frozen_only: # Prematurely return so seeds aren't computed
return set()

Expand Down
2 changes: 1 addition & 1 deletion Seeder/www/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def get_context_data(self, **kwargs):
'url': url,
'wayback_url': get_wayback_url(url)
}
for url in set(self.get_object().custom_seeds.splitlines())
for url in set(self.get_object().custom_seeds.split())
]

page = self.get_page_num()
Expand Down

0 comments on commit f520bf4

Please sign in to comment.