-
Notifications
You must be signed in to change notification settings - Fork 0
/
Getfiles.py
80 lines (68 loc) · 2.43 KB
/
Getfiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#this gets a list of all the filenames from the data directory
#needs to be able to iterate through directories within data directories
#needs to go through multiple zipped files -- use os.walk()
#needs to unzip files
#
#then calls the xml parser iteratively, changing the file name everytime
from zipfile38 import ZipFile
import os
import pandas as pd
import xml.etree.ElementTree as ET
import csv
from bs4 import BeautifulSoup
import lxml
import xmlparser
import time
import SnowflakeUpload
import MAParser
directory = r'Insert Path here'
#gets a list of zip file names
def get_file_names(directory):
#1. Navigate to the directory passed in function call
#2. Loop through the directory to add all file names with a given suffix to a list of names
#3. Return list of zip file names
os.chdir(directory)
suffix = ".zip"
names = []
for folder, dirs, files in os.walk(directory, topdown=False):
for name in files:
if name.endswith(suffix):
names.append(name)
return names
#Extracts files from zip files and places them in a new folder with the same name
def extractor(names):
#1.Iterate through the list of names of zip files
#2.Creates a new folder to hold the extracted files
#3 Closes and removes zip file
#4.Records new paths of unzipped folders in a list
size = len(names)
unzipfolder = ''
unzipfolderls = []
for i in range(0, size):
zipfolder = names[i]
myzip = ZipFile(zipfolder, "r")
unzipfolder = zipfolder[:-4]
myzip.extractall(path=unzipfolder)
myzip.close()
return None
#calls xml parser iteratively by walking through the directory and passing folder by folder
def call_parser():
directory = r'Insert Path here'
for root,dirs,files in os.walk(directory,topdown= False):
for name in dirs:
filepath = os.path.join(directory,name)
print(filepath)
output1 = filepath[:-4]+'geninfo'
output2 = filepath[:-4]+'genofficerinfo'
print("Calling Parser")
xmlparser.iteratebasicfpls(filepath,output1,output2)
return None
if __name__ == '__main__':
t1 = time.time()
#zipfiles = get_file_names(directory)
#unzipfps = extractor(zipfiles)
#call_parser()
#MAParser.parsefiles()
SnowflakeUpload.add_timestamp()
conn = SnowflakeUpload.snowflakeconn()
SnowflakeUpload.uploadsingle(conn)