From 6887770dc7c7ddae70dc94755ee9a49941c6fd20 Mon Sep 17 00:00:00 2001 From: sean-eagles <112004392+sean-eagles@users.noreply.github.com> Date: Thu, 21 Sep 2023 11:52:19 -0400 Subject: [PATCH] Update GeoCore_to_GeoJSON_parse.py Eliminated second loop, as was not needed. Also de-lynted the file. Still in Python 2.7 due to ESRI desktop limitations. --- GeoCore_to_GeoJSON_parse.py | 266 +++++++++++++----------------------- 1 file changed, 95 insertions(+), 171 deletions(-) diff --git a/GeoCore_to_GeoJSON_parse.py b/GeoCore_to_GeoJSON_parse.py index da8fb94..fc2533d 100644 --- a/GeoCore_to_GeoJSON_parse.py +++ b/GeoCore_to_GeoJSON_parse.py @@ -1,9 +1,8 @@ -import geojson -import json -import arcpy import urllib2 from contextlib import closing +import geojson import dbf +import arcpy def main(): pass @@ -18,153 +17,117 @@ def download_url(url, save_path): # Variables: # # to do: change hardcoded to variables - folder = "C:/TEMP" - filename = "catalogue_scrape.dbf" - catalogue_scrape = folder + "//" + filename - filename_geocore = "000183ed-8864-42f0-ae43-c4313a860720.geojson" - geojson_url = 'https://geocore.metadata.geo.ca/' + str(filename_geocore) - save_path = "C:/TEMP/{}".format(filename_geocore) - file_id = str(filename_geocore) + FOLDER = "C:/TEMP" + FILENAME = "catalogue_scrape.dbf" + CATALOGUE_SCRAPE = FOLDER + "//" + FILENAME + #filename_geocore = "34c0dbf2-9595-84f3-679c-7d2d7c90ecfe.geojson" #worked + #filename_geocore = "000183ed-8864-42f0-ae43-c4313a860720.geojson" #worked + #filename_geocore = "cebc283f-bae1-4eae-a91f-a26480cd4e4a.geojson" #worked + #filename_geocore = "2606b1b4-c895-4d23-b466-ad3d64b6381e.geojson" #worked + #filename_geocore = "08166334-889d-4c3a-9b25-b1b85ba48f2c.geojson" #worked + #filename_geocore = "05002515-f6cc-4516-b225-38d510eaaf9c.geojson" #worked + #filename_geocore = "090494a6-8aaf-4c26-b7b3-cf52400bf619.geojson" #worked + #filename_geocore = "054cf636-6637-2508-9aef-0f9139734f4a.geojson" #worked, 42 records long + #filename_geocore = "02d6f853-b0fe-4aa4-bc73-dff0db45d8ae.geojson" + #filename_geocore = "d2af02fe-9e12-413d-8959-06be963bde52.geojson" + FILENAME_GEOCORE = "9e1507cd-f25c-4c64-995b-6563bf9d65bd.geojson" + GEOJSON_URL = 'https://geocore.metadata.geo.ca/' + str(FILENAME_GEOCORE) + SAVE_PATH = "C:/TEMP/{}".format(FILENAME_GEOCORE) + FILE_ID = str(FILENAME_GEOCORE) #cgp_encoding = "" # now download the geojson file and save it to the TEMP directory # - download_url(geojson_url, save_path) - - #with open(r"C:\Users\seagles\Desktop\Bounding Boxes\00ccde98-1bbd-45bb-acf4-3f8b0e8aef1d.geojson") as f: - #with open(r"c:\Users\seagles\Desktop\Bounding Boxes\0a2dfadd-57eb-4d64-a56d-ff53c431aaaa.geojson") as f: - with open(save_path) as f: - gj = geojson.load(f) - '''features = gj['features'][0] - print features''' - '''options = gj['features'][0]['properties']['options'][0] - print options''' - item_dict = gj - options_count = len(item_dict['features'][0]['properties']['options']) - print str(options_count) + " = options count" - url_count = options_count - print "URL count for all options = " + str(url_count) - protocol_count = options_count - print "PROTOCOL count for all options = " + str(protocol_count) - name_count = options_count - #name_count = len(item_dict['features'][0]['properties']['options'][0]['name']) - #name_count = name_count * options_count - print "NAME count for all options = " + str(name_count) - description_count = len(item_dict['features'][0]['properties']['options'][0]['description']) - description_count = description_count * options_count - print "DESCRIPTION count for all options = " + str(description_count) + download_url(GEOJSON_URL, SAVE_PATH) + + with open(SAVE_PATH) as f: + GEOJSON = geojson.load(f) + ITEM_DICTIONARY = GEOJSON + OPTIONS_COUNT = len(ITEM_DICTIONARY['features'][0]['properties']['options']) + print str(OPTIONS_COUNT) + " = options count" + URL_COUNT = OPTIONS_COUNT + print "URL count for all options = " + str(URL_COUNT) + PROTOCOL_COUNT = OPTIONS_COUNT + print "PROTOCOL count for all options = " + str(PROTOCOL_COUNT) + NAME_COUNT = OPTIONS_COUNT + print "NAME count for all options = " + str(NAME_COUNT) + DESCRIPTION_COUNT = len(ITEM_DICTIONARY['features'][0]['properties']['options'][0]['description']) + DESCRIPTION_COUNT = DESCRIPTION_COUNT * OPTIONS_COUNT + print "DESCRIPTION count for all options = " + str(DESCRIPTION_COUNT) print "Downloaded geoJSON GeoCore file" - print "URL = " + str(geojson_url) - print "save_path = " + str(save_path) - + print "URL = " + str(GEOJSON_URL) + print "save_path = " + str(SAVE_PATH) + #We now have every attribute counted from the original GeoJSON. + #We are going to create an attribute table to hold all of this data. + #We are going to use the name_count attribute as a number to define how many + #rows we are going to add to the attribute table. + #For this example the number of rows to add is - """ - We now have every attribute counted from the original GeoJSON. - We are going to create an attribute table to hold all of this data. - We are going to use the name_count attribute as a number to define how many - rows we are going to add to the attribute table. - For this example the number of rows to add is """ - print "Number of lines to be added " + str(name_count) - """the name_count attribute""" + print "Number of lines to be added " + unicode(NAME_COUNT) + #the name_count attribute""" - # Create attribute table - # - arcpy.CreateTable_management(folder, "catalogue_scrape.dbf") - print("Table Created") - ''' - # add fields that are needed to the attribute table + # Set DBF encoding from ASCII to UTF-8, also created new DBF table # - arcpy.AddField_management(catalogue_scrape, "FILENAME", "TEXT", field_length=100) - print "FILENAME field added to catalogue_scrape.dbf" - arcpy.AddField_management(catalogue_scrape, "FILEID", "TEXT", field_length=100) - print "FILEID field added to catalogue_scrape.dbf" - arcpy.AddField_management(catalogue_scrape, "URL", "TEXT", field_length=256) - print "URL field added to catalogue_scrape.dbf" - arcpy.AddField_management(catalogue_scrape, "PROTOCOL", "TEXT", field_length=100) - print "PROTOCOL field added to catalogue_scrape.dbf" - arcpy.AddField_management(catalogue_scrape, "NAME", "TEXT", field_length=256) - print "NAME field added to catalogue_scrape.dbf" - arcpy.AddField_management(catalogue_scrape, "NAME-EN", "TEXT", field_length=256) - print "NAME-EN field added to catalogue_scrape.dbf" - arcpy.AddField_management(catalogue_scrape, "NAME-FR", "TEXT", field_length=256) - print "NAME-FR field added to catalogue_scrape.dbf" - arcpy.AddField_management(catalogue_scrape, "DESC", "TEXT", field_length=256) - print "DESC field added to catalogue_scrape.dbf" - arcpy.AddField_management(catalogue_scrape, "DESC-EN", "TEXT", field_length=256) - print "DESC-EN field added to catalogue_scrape.dbf" - arcpy.AddField_management(catalogue_scrape, "DESC-FR", "TEXT", field_length=256) - print "DESC-EN field added to catalogue_scrape.dbf" - arcpy.AddField_management(catalogue_scrape, "ROWID", "TEXT", field_length=10) - print "ROWIDS field added to catalogue_scrape.dbf" - ''' - - # Set DBF encoding from ASCII to UTF-8 - # - dbf.Table('C:\TEMP\catalogue_scrape.dbf', 'FILENAME C(100); FILEID C(100); URL C(254); PROTOCOL C(100); NAME C(254); NAME_EN C(254); NAME_FR C(254); DESC_ C(254); DESC_EN C(254); DESC_FR C(254); ROWID_ C(10)', codepage=0xf0) - - # delete the automatically added Field1 attribute field - #arcpy.DeleteField_management(catalogue_scrape, "Field1") + dbf.Table(r'C:\TEMP\catalogue_scrape.dbf', 'FILENAME C(100); FILEID C(100); URL C(254); PROTOCOL C(100); NAME C(254); NAME_EN C(254); NAME_FR C(254); DESC_ C(254); DESC_EN C(254); DESC_FR C(254); ROWID_ C(10)', codepage=0xf0) + print "Table created, catalogue_scrape.dbf" # Create insert cursor for table # - rows = arcpy.InsertCursor("c:\TEMP\catalogue_scrape.dbf") + ROWS = arcpy.InsertCursor(r"C:\TEMP\catalogue_scrape.dbf") # Create new rows based off of how many names # exist in the options attributes. # - for x in range(0, name_count): - row = rows.newRow() - rows.insertRow(row) - #row.setValue("distance", 100) - #row.setValue() - + for x in range(0, NAME_COUNT): + row = ROWS.newRow() + ROWS.insertRow(row) # Delete cursor and row objects to remove locks on the data # del row - del rows + del ROWS - rows = arcpy.UpdateCursor("c:\TEMP\catalogue_scrape.dbf") + ROWS = arcpy.UpdateCursor(r"C:\TEMP\catalogue_scrape.dbf") - for row in rows: - row.setValue("FILENAME", save_path) - row.setValue("FILEID", file_id) - rows.updateRow(row) + for row in ROWS: + row.setValue("FILENAME", SAVE_PATH) + row.setValue("FILEID", FILE_ID) + ROWS.updateRow(row) del row - del rows + del ROWS - feature = 0 - while feature < options_count: + FEATURE = 0 + while FEATURE < OPTIONS_COUNT: - rows = arcpy.UpdateCursor("c:\TEMP\catalogue_scrape.dbf") - row_number = 0 - print str(row_number) + " = row number in catalo" - print str(options_count) + " = options count" - print str(feature) + " = feature counter" + ROWS = arcpy.UpdateCursor(r"C:\TEMP\catalogue_scrape.dbf") + ROW_NUMBER = 0 + print str(ROW_NUMBER) + " = row number in catalo" + print str(OPTIONS_COUNT) + " = options count" + print str(FEATURE) + " = feature counter" print "inside loop1" - for row in rows: + for row in ROWS: print "inside loop2" #url = gj['features'][0]['properties']['options'][int(feature)]['url'] #print str(feature) + " = feature" - print str(row_number) + " = row number" - if feature == row_number and feature < options_count: + print str(ROW_NUMBER) + " = row number" + if FEATURE == ROW_NUMBER and FEATURE < OPTIONS_COUNT: print "inside if statement" - print feature - url = gj['features'][0]['properties']['options'][int(feature)]['url'] - protocol = gj['features'][0]['properties']['options'][int(feature)]['protocol'] - name_en = gj['features'][0]['properties']['options'][int(feature)]['name']['en'] - name_fr = gj['features'][0]['properties']['options'][int(feature)]['name']['fr'] - name = unicode(gj['features'][0]['properties']['options'][int(feature)]['name']['en']) + unicode(gj['features'][0]['properties']['options'][int(feature)]['name']['fr']) + print FEATURE + url = GEOJSON['features'][0]['properties']['options'][int(FEATURE)]['url'] + protocol = GEOJSON['features'][0]['properties']['options'][int(FEATURE)]['protocol'] + name_en = GEOJSON['features'][0]['properties']['options'][int(FEATURE)]['name']['en'] + name_fr = GEOJSON['features'][0]['properties']['options'][int(FEATURE)]['name']['fr'] + name = unicode(GEOJSON['features'][0]['properties']['options'][int(FEATURE)]['name']['en']) + unicode(GEOJSON['features'][0]['properties']['options'][int(FEATURE)]['name']['fr']) print name + " = name" - description_en = gj['features'][0]['properties']['options'][int(feature)]['description']['en'] - description_fr = gj['features'][0]['properties']['options'][int(feature)]['description']['fr'] + description_en = GEOJSON['features'][0]['properties']['options'][int(FEATURE)]['description']['en'] + description_fr = GEOJSON['features'][0]['properties']['options'][int(FEATURE)]['description']['fr'] description = unicode(description_en) + unicode(description_fr) print description + " = description" row.setValue("NAME", name) @@ -175,76 +138,37 @@ def download_url(url, save_path): row.setValue("DESC_FR", description_fr) row.setValue("PROTOCOL", protocol) row.setValue("URL", url) - row.setValue("ROWID_", feature) - rows.updateRow(row) + row.setValue("ROWID_", FEATURE) + ROWS.updateRow(row) print "url = " + str(url) - feature = feature + 1 - row_number = row_number + 1 - - '''elif feature == row_number and feature >= name_count: - url_adjust =''' - + FEATURE = FEATURE + 1 + ROW_NUMBER = ROW_NUMBER + 1 - print str(row_number) + " = row number" - print str(feature) + " = feature number" + print str(ROW_NUMBER) + " = row number" + print str(FEATURE) + " = feature number" #print "Option count = " + str(options_count) #print "Feature = " + str(feature) - #options = gj['features'][0]['properties']['options'][int(feature)] + #options = GEOJSON['features'][0]['properties']['options'][int(feature)] - print "url count = " + str(url_count) + print "url count = " + str(URL_COUNT) #print options del row - del rows - - feature2 = 0 - feature = options_count - print "Feature 2 = " + str(feature2) - print "Feature = " + str(feature) - print "Options count = " + str(options_count) - - ''' - while feature >= options_count and feature < name_count: - - row_number = 0 - - rows = arcpy.UpdateCursor("c:\TEMP\catalogue_scrape.dbf") - print "row number Loop 1 = " + str(row_number) - - for row in rows: - print "row number Loop 2 = " + str(row_number) - - if feature == row_number and feature2 < options_count: - print "Feature2 in if statement = " + str(feature2) - url = gj['features'][0]['properties']['options'][int(feature2)]['url'] - row.setValue("URL", url) - rows.updateRow(row) - print "url = " + str(url) - print str(row_number) + " = row number" - print str(feature) + " = feature number" - print str(options_count) + " = options count" - print str(feature2) + " = feature2 number" - feature2 = feature2 + 1 - feature = feature + 1 - row_number = row_number + 1 - - del row - del rows - ''' + del ROWS - print str(feature) + " = features for second round calculations" - print str(row_number) + " = row number to start with for new calculations" - print str(options_count) + " = options counter for calculating starter row value" - print str(name_count) + " = maximum number for row calculations" + print str(FEATURE) + " = features for second round calculations" + print str(ROW_NUMBER) + " = row number to start with for new calculations" + print str(OPTIONS_COUNT) + " = options counter for calculating starter row value" + print str(NAME_COUNT) + " = maximum number for row calculations" print "When this is complete attributes should be added to another table using the append function" print "For the options attributes to be used to populate the attribute table, the statistics are below." - print "There are " + str(options_count) + " options" - print "There are " + str(url_count) + " urls, one for each feature under options" - print "There are " + str(protocol_count) + " protocols, one for each feature under options" - print "There are " + str(name_count) + " names, one english and one french for each feature under options" - print "There are " + str(description_count) + " descriptions, one english and one french for each feature under options" - print "This means that there are " + str(name_count) + " lines to be added to the attribute table, and four lines to fill based on attributes on the line before them" + print "There are " + str(OPTIONS_COUNT) + " options" + print "There are " + str(URL_COUNT) + " urls, one for each feature under options" + print "There are " + str(PROTOCOL_COUNT) + " protocols, one for each feature under options" + print "There are " + str(NAME_COUNT) + " names, one english and one french for each feature under options" + print "There are " + str(DESCRIPTION_COUNT) + " descriptions, one english and one french for each feature under options" + print "This means that there are " + str(NAME_COUNT) + " lines to be added to the attribute table"