Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use headers based on json-struct and dockerfile #6

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM python:3

WORKDIR /usr/src/app

COPY csv_to_elastic.* ./

ENTRYPOINT ["./csv_to_elastic.sh"]
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,10 @@ Required:
Specify row ID column. Used for updating data.
--delimiter DELIMITER
Delimiter to use in csv file (default is ';')
--file-without-header
Use the keys of json-struct
```

## Notes
- CSV must have headers
- insert elastic address (with port) as argument, it defaults to localhost:9200
- Bulk insert method is used, because inserting row by row is unbelievably slow
9 changes: 9 additions & 0 deletions cli/test-import.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

python3 csv_to_elastic.py --csv-file users.csv --elastic-index 'index' \
--delimiter ',' \
--json-struct '{
"id" : "%id%",
"name" : "%name%",
"username" : "%username%"
}' --file-without-header
23 changes: 20 additions & 3 deletions csv_to_elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
from base64 import b64encode


def main(file_path, delimiter, max_rows, elastic_index, json_struct, datetime_field, elastic_type, elastic_address, ssl, username, password, id_column):
def main(file_path, delimiter, max_rows, elastic_index, json_struct, datetime_field, elastic_type, elastic_address, ssl, username, password, id_column, without_header):
endpoint = '/_bulk'
if max_rows is None:
max_rows_disp = "all"
Expand All @@ -87,14 +87,24 @@ def main(file_path, delimiter, max_rows, elastic_index, json_struct, datetime_fi
count = 0
headers = []
headers_position = {}

headers_new = []
headers_new_position = {}

if True == without_header:
for iterator, col in enumerate(json.loads(json_struct)):
headers.append(col)
headers_position[col] = iterator

to_elastic_string = ""
with open(file_path, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=delimiter, quotechar='"')
for row in reader:
if count == 0:
if count == 0 and len(headers) == 0:
for iterator, col in enumerate(row):
headers.append(col)
headers_position[col] = iterator

elif max_rows is not None and count >= max_rows:
print('Max rows imported - exit')
break
Expand All @@ -108,6 +118,7 @@ def main(file_path, delimiter, max_rows, elastic_index, json_struct, datetime_fi
else:
_data = json_struct.replace("'", '"')
_data = _data.replace('\n','').replace('\r','')

for header in headers:
if header == datetime_field:
datetime_type = dateutil.parser.parse(row[pos])
Expand Down Expand Up @@ -232,10 +243,16 @@ def send_to_elastic(elastic_address, endpoint, ssl, username, password, to_elast
type=str,
default=";",
help='If you want to have a different delimiter than ;')
parser.add_argument('--file-without-header',
default=False,
help='If your csv file dont have a header',
action="store_true")

parsed_args = parser.parse_args()

main(file_path=parsed_args.csv_file, delimiter = parsed_args.delimiter, json_struct=parsed_args.json_struct,
elastic_index=parsed_args.elastic_index, elastic_type=parsed_args.elastic_type,
datetime_field=parsed_args.datetime_field, max_rows=parsed_args.max_rows,
elastic_address=parsed_args.elastic_address, ssl=parsed_args.ssl, username=parsed_args.username, password=parsed_args.password, id_column=parsed_args.id_column)
elastic_address=parsed_args.elastic_address, ssl=parsed_args.ssl, username=parsed_args.username, password=parsed_args.password, id_column=parsed_args.id_column,
without_header=parsed_args.file_without_header)

3 changes: 3 additions & 0 deletions csv_to_elastic.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

python csv_to_elastic.py $@
22 changes: 22 additions & 0 deletions users.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
6e172695-c76c-4364-8dd9-44e6d2d3aed9,Heitor Rovaron,heitor.rovaron
4e8660b0-7350-4211-9b9b-9ba50792ccd9,Melony Terci,melony.terci
7e3d4092-6664-4162-9866-c4256507a35e,Tatiana Arrieiro Filgueira,tatianaarrieirofilgueira
d41d1b73-e28a-4464-a640-abfe1d913cfd,Nadja Elias,nadjaelias
657eb911-3fd1-4317-9430-f5f53199754c,Rafael Furtado,rafael.furtado
c5dcda1f-6d9c-4fe4-824b-ee9a0b138a9c,Luzanira Sardenberg,luzanira.sardenberg
6dec2c10-1522-4cc7-8e45-89d78a6274c2,Donizete Kohler,donizete.kohler
61423001-c484-4fbf-9ee7-9475fb93cf3c,Alinice Araujp,alinicearaujp
c720558d-652d-48d3-b952-40b16124b989,Alax Kaiser Raquel,alax.kaiser.raquel
bea7457f-3d0a-48f8-9ee9-c3ff79bf756b,Candisse Mattis,candisse.mattis
fd7ec052-7e2a-4425-a109-a4a37f105512,Ivanice Callado,ivanicecallado
18c369fe-2b6c-4638-9693-425b13b22948,Vixpark Valariano Federici,vixpark.valariano.federici
850778f4-0bc9-45d0-a732-d023a8f25196, Falfan Brant,falfanbrant
e7e46a01-9618-4f48-978c-9a712949389d,Gustav Angione,gustav.angione
64b7f8c1-6264-4edf-b47a-90f8692056e0,Emannuelly Reginaldo Fofonka,emannuelly.reginaldo.fofonka
cfe9ca52-4b8b-4a15-86f1-4b0adb0f9227,Flaviane Salgado Dudar,flaviane.salgado.dudar
93a30bf5-7c91-45a2-8624-80c79b35b422,Francisca Brunelli Mafetone,francisca.brunelli.mafetone
d2173551-7d7b-47d5-8425-133cdcb69724,Leonaia Farias,leonaiafarias
81bb7a02-c574-4114-85f9-368514aaba29,Rejane fernandes Marinonio,rejane.fernandes.marinonio
f687f04d-5f24-4327-bd74-b07c5660f364,Marcia Louredo,marcialouredo
48da4ef1-9cdf-4a18-aa93-92cdef0d6482,Joeliton RODRIGUES,joelitonrodrigues
a8461f62-d18f-4190-bf97-1684a13b5531,Everaldo Birkman L,everaldo.birkman.l