Skip to content

Commit

Permalink
handle acl
Browse files Browse the repository at this point in the history
  • Loading branch information
danielecalda committed Apr 11, 2023
1 parent 89d429a commit fcbeabc
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 12 deletions.
18 changes: 11 additions & 7 deletions email-parser/app/imap/imap_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@

class AsyncEmailExtraction(threading.Thread):

def __init__(self, mail_server, port, username, password, timestamp, datasource_id, folder, schedule_id, tenant_id):
def __init__(self, mail_server, port, username, password, timestamp, datasource_id, folder, schedule_id, tenant_id,
index_acl):
super(AsyncEmailExtraction, self).__init__()

self.mail_server = mail_server
Expand All @@ -49,6 +50,7 @@ def __init__(self, mail_server, port, username, password, timestamp, datasource_
self.folder = folder
self.schedule_id = schedule_id
self.tenant_id = tenant_id
self.index_acl = index_acl

self.status_logger = logging.getLogger('email-logger')

Expand Down Expand Up @@ -111,21 +113,23 @@ def extract(self):

payload = {
"datasourceId": self.datasource_id,
"contentId": msg_id,
"contentId": str(msg_id).replace("<", "").replace(">", ""),
"parsingDate": int(end_timestamp),
"rawContent": raw_msg,
"rawContent": "",
"datasourcePayload": datasource_payload,
"resources": {
"binaries": binaries,
"binaries": [],
"splitBinaries": True
},
"scheduleId": self.schedule_id,
"tenantId": self.tenant_id,
"acl": {
"email": acl_list
}
}

if self.index_acl:
payload["acl"] = acl_list
else:
payload["acl"] = []

try:
post_message(ingestion_url, payload, 10)
# self.status_logger.info(payload)
Expand Down
35 changes: 31 additions & 4 deletions email-parser/app/imap/util/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,47 @@ def parse_email(fetched_msg):

msg_date = datetime.fromtimestamp(local_date).strftime("%a, %d %b %Y %H:%M:%S")

acl_list = [email.utils.parseaddr(msg_from)[1]]

raw_msg = ""
if msg_subject is not None:
raw_msg = raw_msg + "Subject: " + msg_subject

msg_from_email = email.utils.parseaddr(msg_from)[1]
msg_from_user = email.utils.parseaddr(msg_from)[0]
if len(msg_from_user) == 0:
msg_from_user = None

acl_list = [msg_from_email]

raw_msg = raw_msg + " Date: " + msg_date + " From: " + msg_from

msg_to_email = msg_to_user = msg_cc_email = msg_cc_user = None

if msg_to is not None:

msg_to_email = [email.utils.parseaddr(to)[1] for to in str(msg_to).split(",")]
msg_to_user = [email.utils.parseaddr(to)[0] if len(to) > 0 else None for to in str(msg_to).split(",")]

raw_msg = raw_msg + " To: " + msg_to
acl_list.append(msg_to)

acl_list = acl_list + msg_to_email

if msg_cc is not None:

msg_cc_email = [email.utils.parseaddr(cc)[1] for cc in str(msg_cc).split(",")]
msg_cc_user = [email.utils.parseaddr(cc)[0] if len(cc) > 0 else None for cc in str(msg_cc).split(",")]

raw_msg = raw_msg + " CC: " + msg_cc

acl_list = acl_list + msg_cc_email

raw_msg = raw_msg + " " + body

struct_msg = {'date': (local_date * 1000), 'from': msg_from, 'subject': msg_subject, 'to': msg_to, 'cc': msg_cc,
struct_msg = {'date': (local_date * 1000),
'from_user': msg_from_user, 'from_email': msg_from_email,
'to_user': msg_to_user, 'to_email': msg_to_email,
'cc_user': msg_cc_user, 'cc_email': msg_cc_email,
'subject': msg_subject,
'to': msg_to, 'cc': msg_cc,
'body': body, "htmlBody": raw_body}

return raw_msg, struct_msg, msg_id, binaries, acl_list
4 changes: 3 additions & 1 deletion email-parser/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class ImapRequest(BaseModel):
scheduleId: str
folder: str
tenantId: str
indexAcl: bool


@app.post("/execute")
Expand All @@ -49,9 +50,10 @@ def get_data(request: ImapRequest):
folder = request["folder"]
schedule_id = request["scheduleId"]
tenant_id = request["tenantId"]
index_acl = request["indexAcl"]

email_extraction_task = AsyncEmailExtraction(mail_server, port, username, password, timestamp, datasource_id,
folder, schedule_id, tenant_id)
folder, schedule_id, tenant_id, index_acl)

thread = threading.Thread(target=email_extraction_task.extract())
thread.start()
Expand Down

0 comments on commit fcbeabc

Please sign in to comment.