diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..22af8e0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,7 @@ +FROM python:3 + +WORKDIR /usr/src/app + +COPY csv_to_elastic.* ./ + +ENTRYPOINT ["./csv_to_elastic.sh"] \ No newline at end of file diff --git a/README.md b/README.md index 989ee34..1a0a122 100644 --- a/README.md +++ b/README.md @@ -85,9 +85,10 @@ Required: Specify row ID column. Used for updating data. --delimiter DELIMITER Delimiter to use in csv file (default is ';') + --file-without-header + Use the keys of json-struct ``` ## Notes - - CSV must have headers - insert elastic address (with port) as argument, it defaults to localhost:9200 - Bulk insert method is used, because inserting row by row is unbelievably slow diff --git a/cli/test-import.sh b/cli/test-import.sh new file mode 100755 index 0000000..7074c18 --- /dev/null +++ b/cli/test-import.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +python3 csv_to_elastic.py --csv-file users.csv --elastic-index 'index' \ + --delimiter ',' \ + --json-struct '{ + "id" : "%id%", + "name" : "%name%", + "username" : "%username%" + }' --file-without-header \ No newline at end of file diff --git a/csv_to_elastic.py b/csv_to_elastic.py index 75a3924..774851e 100644 --- a/csv_to_elastic.py +++ b/csv_to_elastic.py @@ -72,7 +72,7 @@ from base64 import b64encode -def main(file_path, delimiter, max_rows, elastic_index, json_struct, datetime_field, elastic_type, elastic_address, ssl, username, password, id_column): +def main(file_path, delimiter, max_rows, elastic_index, json_struct, datetime_field, elastic_type, elastic_address, ssl, username, password, id_column, without_header): endpoint = '/_bulk' if max_rows is None: max_rows_disp = "all" @@ -87,14 +87,24 @@ def main(file_path, delimiter, max_rows, elastic_index, json_struct, datetime_fi count = 0 headers = [] headers_position = {} + + headers_new = [] + headers_new_position = {} + + if True == without_header: + for iterator, col in enumerate(json.loads(json_struct)): + headers.append(col) + headers_position[col] = iterator + to_elastic_string = "" with open(file_path, 'r') as csvfile: reader = csv.reader(csvfile, delimiter=delimiter, quotechar='"') for row in reader: - if count == 0: + if count == 0 and len(headers) == 0: for iterator, col in enumerate(row): headers.append(col) headers_position[col] = iterator + elif max_rows is not None and count >= max_rows: print('Max rows imported - exit') break @@ -108,6 +118,7 @@ def main(file_path, delimiter, max_rows, elastic_index, json_struct, datetime_fi else: _data = json_struct.replace("'", '"') _data = _data.replace('\n','').replace('\r','') + for header in headers: if header == datetime_field: datetime_type = dateutil.parser.parse(row[pos]) @@ -232,10 +243,16 @@ def send_to_elastic(elastic_address, endpoint, ssl, username, password, to_elast type=str, default=";", help='If you want to have a different delimiter than ;') + parser.add_argument('--file-without-header', + default=False, + help='If your csv file dont have a header', + action="store_true") parsed_args = parser.parse_args() main(file_path=parsed_args.csv_file, delimiter = parsed_args.delimiter, json_struct=parsed_args.json_struct, elastic_index=parsed_args.elastic_index, elastic_type=parsed_args.elastic_type, datetime_field=parsed_args.datetime_field, max_rows=parsed_args.max_rows, - elastic_address=parsed_args.elastic_address, ssl=parsed_args.ssl, username=parsed_args.username, password=parsed_args.password, id_column=parsed_args.id_column) + elastic_address=parsed_args.elastic_address, ssl=parsed_args.ssl, username=parsed_args.username, password=parsed_args.password, id_column=parsed_args.id_column, + without_header=parsed_args.file_without_header) + diff --git a/csv_to_elastic.sh b/csv_to_elastic.sh new file mode 100755 index 0000000..395940d --- /dev/null +++ b/csv_to_elastic.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python csv_to_elastic.py $@ \ No newline at end of file diff --git a/users.csv b/users.csv new file mode 100644 index 0000000..d120fbc --- /dev/null +++ b/users.csv @@ -0,0 +1,22 @@ +6e172695-c76c-4364-8dd9-44e6d2d3aed9,Heitor Rovaron,heitor.rovaron +4e8660b0-7350-4211-9b9b-9ba50792ccd9,Melony Terci,melony.terci +7e3d4092-6664-4162-9866-c4256507a35e,Tatiana Arrieiro Filgueira,tatianaarrieirofilgueira +d41d1b73-e28a-4464-a640-abfe1d913cfd,Nadja Elias,nadjaelias +657eb911-3fd1-4317-9430-f5f53199754c,Rafael Furtado,rafael.furtado +c5dcda1f-6d9c-4fe4-824b-ee9a0b138a9c,Luzanira Sardenberg,luzanira.sardenberg +6dec2c10-1522-4cc7-8e45-89d78a6274c2,Donizete Kohler,donizete.kohler +61423001-c484-4fbf-9ee7-9475fb93cf3c,Alinice Araujp,alinicearaujp +c720558d-652d-48d3-b952-40b16124b989,Alax Kaiser Raquel,alax.kaiser.raquel +bea7457f-3d0a-48f8-9ee9-c3ff79bf756b,Candisse Mattis,candisse.mattis +fd7ec052-7e2a-4425-a109-a4a37f105512,Ivanice Callado,ivanicecallado +18c369fe-2b6c-4638-9693-425b13b22948,Vixpark Valariano Federici,vixpark.valariano.federici +850778f4-0bc9-45d0-a732-d023a8f25196, Falfan Brant,falfanbrant +e7e46a01-9618-4f48-978c-9a712949389d,Gustav Angione,gustav.angione +64b7f8c1-6264-4edf-b47a-90f8692056e0,Emannuelly Reginaldo Fofonka,emannuelly.reginaldo.fofonka +cfe9ca52-4b8b-4a15-86f1-4b0adb0f9227,Flaviane Salgado Dudar,flaviane.salgado.dudar +93a30bf5-7c91-45a2-8624-80c79b35b422,Francisca Brunelli Mafetone,francisca.brunelli.mafetone +d2173551-7d7b-47d5-8425-133cdcb69724,Leonaia Farias,leonaiafarias +81bb7a02-c574-4114-85f9-368514aaba29,Rejane fernandes Marinonio,rejane.fernandes.marinonio +f687f04d-5f24-4327-bd74-b07c5660f364,Marcia Louredo,marcialouredo +48da4ef1-9cdf-4a18-aa93-92cdef0d6482,Joeliton RODRIGUES,joelitonrodrigues +a8461f62-d18f-4190-bf97-1684a13b5531,Everaldo Birkman L,everaldo.birkman.l \ No newline at end of file