From d85b6d285185482a489fdaee83e4f8091966f5ad Mon Sep 17 00:00:00 2001 From: bcoles Date: Mon, 5 Jul 2021 03:06:33 +1000 Subject: [PATCH] sfp_names: Fix HUMAN_NAME extraction from email addresses (#1215) --- modules/sfp_names.py | 32 +++++----- test/unit/modules/test_sfp_names.py | 93 +++++++++++++++++++++++++++-- 2 files changed, 103 insertions(+), 22 deletions(-) diff --git a/modules/sfp_names.py b/modules/sfp_names.py index 110d78348a..12bca702d5 100644 --- a/modules/sfp_names.py +++ b/modules/sfp_names.py @@ -82,27 +82,27 @@ def handleEvent(self, event): self.sf.debug("Ignoring web content from CSS/JS.") return None + # Find names in email addresses in ".@" format if eventName == "EMAILADDR" and self.opts['emailtoname']: - if "." in eventData.split("@")[0]: - if type(eventData) == str: - name = " ".join(map(str.capitalize, eventData.split("@")[0].split("."))) - else: - name = " ".join(map(str.capitalize, eventData.split("@")[0].split("."))) - name = str(name) + potential_name = eventData.split("@")[0] - # Names don't have numbers - if re.match("[0-9]*", name): - return None + if "." not in potential_name: + return None - # Notify other modules of what you've found - evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) - if event.moduleDataSource: - evt.moduleDataSource = event.moduleDataSource - else: - evt.moduleDataSource = "Unknown" - self.notifyListeners(evt) + name = " ".join(map(str.capitalize, potential_name.split("."))) + + # Names usually do not contain numbers + if re.search("[0-9]", name): return None + evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) + if event.moduleDataSource: + evt.moduleDataSource = event.moduleDataSource + else: + evt.moduleDataSource = "Unknown" + self.notifyListeners(evt) + return None + # For RAW_RIR_DATA, there are only specific modules we # expect to see RELEVANT names within. if eventName == "RAW_RIR_DATA": diff --git a/test/unit/modules/test_sfp_names.py b/test/unit/modules/test_sfp_names.py index d5399e41e8..e4996b16ff 100644 --- a/test/unit/modules/test_sfp_names.py +++ b/test/unit/modules/test_sfp_names.py @@ -34,26 +34,107 @@ def test_producedEvents_should_return_list(self): module = sfp_names() self.assertIsInstance(module.producedEvents(), list) - def test_handleEvent(self): - """ - Test handleEvent(self, event) - """ + def test_handleEvent_event_data_email_address_containing_human_names_should_return_event(self): + sf = SpiderFoot(self.default_options) + + module = sfp_names() + module.setup(sf, dict()) + + target_value = 'spiderfoot.net' + target_type = 'INTERNET_NAME' + target = SpiderFootTarget(target_value, target_type) + module.setTarget(target) + + def new_notifyListeners(self, event): + expected = 'HUMAN_NAME' + if str(event.eventType) != expected: + raise Exception(f"{event.eventType} != {expected}") + + expected = "Firstname Lastname" + if str(event.data) != expected: + raise Exception(f"{event.data} != {expected}") + + raise Exception("OK") + + module.notifyListeners = new_notifyListeners.__get__(module, sfp_names) + + event_type = 'ROOT' + event_data = 'example data' + event_module = '' + source_event = '' + evt = SpiderFootEvent(event_type, event_data, event_module, source_event) + + event_type = 'EMAILADDR' + event_data = 'firstname.lastname@spiderfoot.net' + event_module = 'example module' + source_event = evt + evt = SpiderFootEvent(event_type, event_data, event_module, source_event) + + with self.assertRaises(Exception) as cm: + module.handleEvent(evt) + + self.assertEqual("OK", str(cm.exception)) + + def test_handleEvent_event_data_email_address_containing_human_names_containing_numbers_should_not_return_event(self): + sf = SpiderFoot(self.default_options) + + module = sfp_names() + module.setup(sf, dict()) + + target_value = 'spiderfoot.net' + target_type = 'INTERNET_NAME' + target = SpiderFootTarget(target_value, target_type) + module.setTarget(target) + + def new_notifyListeners(self, event): + raise Exception(f"Raised event {event.eventType}: {event.data}") + + module.notifyListeners = new_notifyListeners.__get__(module, sfp_names) + + event_type = 'ROOT' + event_data = 'example data' + event_module = '' + source_event = '' + evt = SpiderFootEvent(event_type, event_data, event_module, source_event) + + event_type = 'EMAILADDR' + event_data = 'firstname.lastname1@spiderfoot.net' + event_module = 'example module' + source_event = evt + evt = SpiderFootEvent(event_type, event_data, event_module, source_event) + + result = module.handleEvent(evt) + + self.assertIsNone(result) + + def test_handleEvent_event_data_email_address_not_containing_names_should_not_return_event(self): sf = SpiderFoot(self.default_options) module = sfp_names() module.setup(sf, dict()) - target_value = 'example target value' - target_type = 'IP_ADDRESS' + target_value = 'spiderfoot.net' + target_type = 'INTERNET_NAME' target = SpiderFootTarget(target_value, target_type) module.setTarget(target) + def new_notifyListeners(self, event): + raise Exception(f"Raised event {event.eventType}: {event.data}") + + module.notifyListeners = new_notifyListeners.__get__(module, sfp_names) + event_type = 'ROOT' event_data = 'example data' event_module = '' source_event = '' evt = SpiderFootEvent(event_type, event_data, event_module, source_event) + event_type = 'EMAILADDR' + event_data = 'lastname@spiderfoot.net' + event_module = 'example module' + source_event = evt + evt = SpiderFootEvent(event_type, event_data, event_module, source_event) + result = module.handleEvent(evt) self.assertIsNone(result)