Skip to content

Commit

Permalink
sfp_ahmia: Update module code style (smicallef#1214)
Browse files Browse the repository at this point in the history
  • Loading branch information
bcoles authored Jul 4, 2021
1 parent b81b1e5 commit 1e1777a
Showing 1 changed file with 94 additions and 65 deletions.
159 changes: 94 additions & 65 deletions modules/sfp_ahmia.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
# -------------------------------------------------------------------------------
# Name: sfp_ahmia
# Purpose: Searches the Tor search engine 'Ahmia' for content related
# to the domain in question.
# Name: sfp_ahmia
# Purpose: Searches the Tor search engine 'Ahmia' for content related to the
# target.
#
# Author: Steve Micallef <[email protected]>
#
Expand All @@ -12,6 +12,9 @@
# -------------------------------------------------------------------------------

import re
import urllib.error
import urllib.parse
import urllib.request

from spiderfoot import SpiderFootEvent, SpiderFootPlugin

Expand All @@ -20,7 +23,7 @@ class sfp_ahmia(SpiderFootPlugin):

meta = {
'name': "Ahmia",
'summary': "Search Tor 'Ahmia' search engine for mentions of the target domain.",
'summary': "Search Tor 'Ahmia' search engine for mentions of the target.",
'useCases': ["Footprint", "Investigate"],
'categories': ["Search Engines"],
'dataSource': {
Expand All @@ -47,7 +50,6 @@ class sfp_ahmia(SpiderFootPlugin):

# Default options
opts = {
# We don't bother with pagination as ahmia seems fairly limited in coverage
'fetchlinks': True,
'fullnames': True
}
Expand All @@ -58,7 +60,6 @@ class sfp_ahmia(SpiderFootPlugin):
'fullnames': "Search for human names?"
}

# Target
results = None

def setup(self, sfc, userOpts=dict()):
Expand All @@ -73,85 +74,113 @@ def watchedEvents(self):
return ["DOMAIN_NAME", "HUMAN_NAME", "EMAILADDR"]

# What events this module produces
# This is to support the end user in selecting modules based on events
# produced.
def producedEvents(self):
return ["DARKNET_MENTION_URL", "DARKNET_MENTION_CONTENT", "SEARCH_ENGINE_WEB_CONTENT"]

def handleEvent(self, event):
eventName = event.eventType
srcModuleName = event.module
eventData = event.data

self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

if not self.opts['fullnames'] and eventName == 'HUMAN_NAME':
self.sf.debug(f"Skipping HUMAN_NAME: {eventData}")
return

if eventData in self.results:
self.sf.debug("Already did a search for " + eventData + ", skipping.")
self.sf.debug(f"Skipping {eventData}, already checked.")
return

self.results[eventData] = True

# Sites hosted on the domain
data = self.sf.fetchUrl("https://ahmia.fi/search/?q=" + eventData.replace(" ", "%20"),
useragent=self.opts['_useragent'],
timeout=self.opts['_fetchtimeout'])
if data is None or not data.get('content'):
self.sf.info("No results returned from ahmia.fi.")
params = urllib.parse.urlencode({
'q': eventData
})

data = self.sf.fetchUrl(
f"https://ahmia.fi/search/?{params}",
useragent=self.opts['_useragent'],
timeout=15
)

if not data:
self.sf.info(f"No results for {eventData} returned from Ahmia.fi.")
return

if "redirect_url=" in data['content']:
content = data.get('content')

if not content:
self.sf.info(f"No results for {eventData} returned from Ahmia.fi.")
return

# We don't bother with pagination as Ahmia seems fairly limited in coverage
# and displays hundreds of results per page
links = re.findall("redirect_url=(.[^\"]+)\"", content, re.IGNORECASE | re.DOTALL)

if not links:
self.sf.info(f"No results for {eventData} returned from Ahmia.fi.")
return

reported = False
for link in links:
if self.checkForStop():
return

links = re.findall("redirect_url=(.[^\"]+)\"", data['content'], re.IGNORECASE | re.DOTALL)

reported = False
for link in links:
if link in self.results:
continue
else:
self.results[link] = True
self.sf.debug("Found a darknet mention: " + link)
if self.sf.urlFQDN(link).endswith(".onion"):
if self.checkForStop():
return
if self.opts['fetchlinks']:
res = self.sf.fetchUrl(link, timeout=self.opts['_fetchtimeout'],
useragent=self.opts['_useragent'],
verify=False)

if res['content'] is None:
self.sf.debug("Ignoring " + link + " as no data returned")
continue

if eventData not in res['content']:
self.sf.debug("Ignoring " + link + " as no mention of " + eventData)
continue
evt = SpiderFootEvent("DARKNET_MENTION_URL", link, self.__name__, event)
self.notifyListeners(evt)
reported = True

try:
startIndex = res['content'].index(eventData) - 120
endIndex = startIndex + len(eventData) + 240
except Exception:
self.sf.debug("String not found in content.")
continue

wdata = res['content'][startIndex:endIndex]
evt = SpiderFootEvent("DARKNET_MENTION_CONTENT", "..." + wdata + "...",
self.__name__, evt)
self.notifyListeners(evt)
reported = True
else:
evt = SpiderFootEvent("DARKNET_MENTION_URL", link, self.__name__, event)
self.notifyListeners(evt)
reported = True

if reported:
# Submit the search results for analysis
evt = SpiderFootEvent("SEARCH_ENGINE_WEB_CONTENT", data['content'],
self.__name__, event)
if link in self.results:
continue

self.results[link] = True

self.sf.debug(f"Found a darknet mention: {link}")

if not self.sf.urlFQDN(link).endswith(".onion"):
continue

if not self.opts['fetchlinks']:
evt = SpiderFootEvent("DARKNET_MENTION_URL", link, self.__name__, event)
self.notifyListeners(evt)
reported = True
continue

res = self.sf.fetchUrl(
link,
timeout=self.opts['_fetchtimeout'],
useragent=self.opts['_useragent'],
verify=False
)

if res['content'] is None:
self.sf.debug(f"Ignoring {link} as no data returned")
continue

if eventData not in res['content']:
self.sf.debug(f"Ignoring {link} as no mention of {eventData}")
continue

evt = SpiderFootEvent("DARKNET_MENTION_URL", link, self.__name__, event)
self.notifyListeners(evt)
reported = True

try:
startIndex = res['content'].index(eventData) - 120
endIndex = startIndex + len(eventData) + 240
except Exception:
self.sf.debug(f"String '{eventData}' not found in content.")
continue

wdata = res['content'][startIndex:endIndex]
evt = SpiderFootEvent("DARKNET_MENTION_CONTENT", f"...{wdata}...", self.__name__, evt)
self.notifyListeners(evt)

if reported:
# Submit the search results for analysis
evt = SpiderFootEvent(
"SEARCH_ENGINE_WEB_CONTENT",
content,
self.__name__,
event
)
self.notifyListeners(evt)

# End of sfp_ahmia class

0 comments on commit 1e1777a

Please sign in to comment.