forked from ncss-tech/NASIS-Pedons
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Extract_Pedons_from_NASIS_byTextFile.py
1200 lines (917 loc) · 55.2 KB
/
Extract_Pedons_from_NASIS_byTextFile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#-------------------------------------------------------------------------------
# Name: Extract Pedons from NASIS
#
# Author: Adolfo.Diaz
# e-mail: [email protected]
# phone: 608.662.4422 ext. 216
#
# Author: Jason.Nemecek
# e-mail: [email protected]
# phone: 608.662.4422 ext. 190
#
# Created: 7/04/2016
# Last Modified: 3/07/2017
#-------------------------------------------------------------------------------
## ===================================================================================
class ExitError(Exception):
pass
## ===================================================================================
def AddMsgAndPrint(msg, severity=0):
# prints message to screen if run as a python script
# Adds tool message to the geoprocessor
#
#Split the message on \n first, so that if it's multiple lines, a GPMessage will be added for each line
try:
print msg
#for string in msg.split('\n'):
#Add a geoprocessing message (in case this is run as a tool)
if severity == 0:
arcpy.AddMessage(msg)
elif severity == 1:
arcpy.AddWarning(msg)
elif severity == 2:
arcpy.AddError(msg)
except:
pass
## ===================================================================================
def errorMsg():
try:
exc_type, exc_value, exc_traceback = sys.exc_info()
theMsg = "\t" + traceback.format_exception(exc_type, exc_value, exc_traceback)[1] + "\n\t" + traceback.format_exception(exc_type, exc_value, exc_traceback)[-1]
AddMsgAndPrint(theMsg,2)
except:
AddMsgAndPrint("Unhandled error in errorMsg method", 2)
pass
### ===================================================================================
def setScratchWorkspace():
""" This function will set the scratchWorkspace for the interim of the execution
of this tool. The scratchWorkspace is used to set the scratchGDB which is
where all of the temporary files will be written to. The path of the user-defined
scratchWorkspace will be compared to existing paths from the user's system
variables. If there is any overlap in directories the scratchWorkspace will
be set to C:\TEMP, assuming C:\ is the system drive. If all else fails then
the packageWorkspace Environment will be set as the scratchWorkspace. This
function returns the scratchGDB environment which is set upon setting the scratchWorkspace"""
try:
AddMsgAndPrint("\nSetting Scratch Workspace")
scratchWK = arcpy.env.scratchWorkspace
# -----------------------------------------------
# Scratch Workspace is defined by user or default is set
if scratchWK is not None:
# dictionary of system environmental variables
envVariables = os.environ
# get the root system drive
if envVariables.has_key('SYSTEMDRIVE'):
sysDrive = envVariables['SYSTEMDRIVE']
else:
sysDrive = None
varsToSearch = ['ESRI_OS_DATADIR_LOCAL_DONOTUSE','ESRI_OS_DIR_DONOTUSE','ESRI_OS_DATADIR_MYDOCUMENTS_DONOTUSE',
'ESRI_OS_DATADIR_ROAMING_DONOTUSE','TEMP','LOCALAPPDATA','PROGRAMW6432','COMMONPROGRAMFILES','APPDATA',
'USERPROFILE','PUBLIC','SYSTEMROOT','PROGRAMFILES','COMMONPROGRAMFILES(X86)','ALLUSERSPROFILE']
""" This is a printout of my system environmmental variables - Windows 7
-----------------------------------------------------------------------------------------
ESRI_OS_DATADIR_LOCAL_DONOTUSE C:\Users\adolfo.diaz\AppData\Local\
ESRI_OS_DIR_DONOTUSE C:\Users\ADOLFO~1.DIA\AppData\Local\Temp\6\arc3765\
ESRI_OS_DATADIR_MYDOCUMENTS_DONOTUSE C:\Users\adolfo.diaz\Documents\
ESRI_OS_DATADIR_COMMON_DONOTUSE C:\ProgramData\
ESRI_OS_DATADIR_ROAMING_DONOTUSE C:\Users\adolfo.diaz\AppData\Roaming\
TEMP C:\Users\ADOLFO~1.DIA\AppData\Local\Temp\6\arc3765\
LOCALAPPDATA C:\Users\adolfo.diaz\AppData\Local
PROGRAMW6432 C:\Program Files
COMMONPROGRAMFILES : C:\Program Files (x86)\Common Files
APPDATA C:\Users\adolfo.diaz\AppData\Roaming
USERPROFILE C:\Users\adolfo.diaz
PUBLIC C:\Users\Public
SYSTEMROOT : C:\Windows
PROGRAMFILES : C:\Program Files (x86)
COMMONPROGRAMFILES(X86) : C:\Program Files (x86)\Common Files
ALLUSERSPROFILE : C:\ProgramData
------------------------------------------------------------------------------------------"""
bSetTempWorkSpace = False
""" Iterate through each Environmental variable; If the variable is within the 'varsToSearch' list
list above then check their value against the user-set scratch workspace. If they have anything
in common then switch the workspace to something local """
for var in envVariables:
if not var in varsToSearch:
continue
# make a list from the scratch and environmental paths
varValueList = (envVariables[var].lower()).split(os.sep) # ['C:', 'Users', 'adolfo.diaz', 'AppData', 'Local']
scratchWSList = (scratchWK.lower()).split(os.sep) # [u'C:', u'Users', u'adolfo.diaz', u'Documents', u'ArcGIS', u'Default.gdb', u'']
# remove any blanks items from lists
if '' in varValueList: varValueList.remove('')
if '' in scratchWSList: scratchWSList.remove('')
# First element is the drive letter; remove it if they are
# the same otherwise review the next variable.
if varValueList[0] == scratchWSList[0]:
scratchWSList.remove(scratchWSList[0])
varValueList.remove(varValueList[0])
# obtain a similarity ratio between the 2 lists above
#sM = SequenceMatcher(None,varValueList,scratchWSList)
# Compare the values of 2 lists; order is significant
common = [i for i, j in zip(varValueList, scratchWSList) if i == j]
if len(common) > 0:
bSetTempWorkSpace = True
break
# The current scratch workspace shares 1 or more directory paths with the
# system env variables. Create a temp folder at root
if bSetTempWorkSpace:
AddMsgAndPrint("\tCurrent Workspace: " + scratchWK,0)
if sysDrive:
tempFolder = sysDrive + os.sep + "TEMP"
if not os.path.exists(tempFolder):
os.makedirs(tempFolder,mode=777)
arcpy.env.scratchWorkspace = tempFolder
AddMsgAndPrint("\tTemporarily setting scratch workspace to: " + arcpy.env.scratchGDB,1)
else:
packageWS = [f for f in arcpy.ListEnvironments() if f=='packageWorkspace']
if arcpy.env[packageWS[0]]:
arcpy.env.scratchWorkspace = arcpy.env[packageWS[0]]
AddMsgAndPrint("\tTemporarily setting scratch workspace to: " + arcpy.env.scratchGDB,1)
else:
AddMsgAndPrint("\tCould not set any scratch workspace",2)
return False
# user-set workspace does not violate system paths; Check for read/write
# permissions; if write permissions are denied then set workspace to TEMP folder
else:
arcpy.env.scratchWorkspace = scratchWK
if arcpy.env.scratchGDB == None:
AddMsgAndPrint("\tCurrent scratch workspace: " + scratchWK + " is READ only!",0)
if sysDrive:
tempFolder = sysDrive + os.sep + "TEMP"
if not os.path.exists(tempFolder):
os.makedirs(tempFolder,mode=777)
arcpy.env.scratchWorkspace = tempFolder
AddMsgAndPrint("\tTemporarily setting scratch workspace to: " + arcpy.env.scratchGDB,1)
else:
packageWS = [f for f in arcpy.ListEnvironments() if f=='packageWorkspace']
if arcpy.env[packageWS[0]]:
arcpy.env.scratchWorkspace = arcpy.env[packageWS[0]]
AddMsgAndPrint("\tTemporarily setting scratch workspace to: " + arcpy.env.scratchGDB,1)
else:
AddMsgAndPrint("\tCould not set any scratch workspace",2)
return False
else:
AddMsgAndPrint("\tUser-defined scratch workspace is set to: " + arcpy.env.scratchGDB,0)
# No workspace set (Very odd that it would go in here unless running directly from python)
else:
AddMsgAndPrint("\tNo user-defined scratch workspace ",0)
sysDrive = os.environ['SYSTEMDRIVE']
if sysDrive:
tempFolder = sysDrive + os.sep + "TEMP"
if not os.path.exists(tempFolder):
os.makedirs(tempFolder,mode=777)
arcpy.env.scratchWorkspace = tempFolder
AddMsgAndPrint("\tTemporarily setting scratch workspace to: " + arcpy.env.scratchGDB,1)
else:
packageWS = [f for f in arcpy.ListEnvironments() if f=='packageWorkspace']
if arcpy.env[packageWS[0]]:
arcpy.env.scratchWorkspace = arcpy.env[packageWS[0]]
AddMsgAndPrint("\tTemporarily setting scratch workspace to: " + arcpy.env.scratchGDB,1)
else:
return False
arcpy.Compact_management(arcpy.env.scratchGDB)
return arcpy.env.scratchGDB
except:
# All Failed; set workspace to packageWorkspace environment
try:
packageWS = [f for f in arcpy.ListEnvironments() if f=='packageWorkspace']
if arcpy.env[packageWS[0]]:
arcpy.env.scratchWorkspace = arcpy.env[packageWS[0]]
arcpy.Compact_management(arcpy.env.scratchGDB)
return arcpy.env.scratchGDB
else:
AddMsgAndPrint("\tCould not set scratchWorkspace. Not even to default!",2)
return False
except:
errorMsg()
return False
## ================================================================================================================
def tic():
""" Returns the current time """
return time.time()
## ================================================================================================================
def toc(_start_time):
""" Returns the total time by subtracting the start time - finish time"""
try:
t_sec = round(time.time() - _start_time)
(t_min, t_sec) = divmod(t_sec,60)
(t_hour,t_min) = divmod(t_min,60)
if t_hour:
return ('{} hour(s): {} minute(s): {} second(s)'.format(int(t_hour),int(t_min),int(t_sec)))
elif t_min:
return ('{} minute(s): {} second(s)'.format(int(t_min),int(t_sec)))
else:
return ('{} second(s)'.format(int(t_sec)))
except:
errorMsg()
## ================================================================================================================
def splitThousands(someNumber):
""" will determine where to put a thousands seperator if one is needed.
Input is an integer. Integer with or without thousands seperator is returned."""
try:
return re.sub(r'(\d{3})(?=\d)', r'\1,', str(someNumber)[::-1])[::-1]
except:
errorMsg()
return someNumber
## ================================================================================================================
def createPedonFGDB():
"""This Function will create a new File Geodatabase using a pre-established XML workspace
schema. All Tables will be empty and should correspond to that of the access database.
Relationships will also be pre-established.
Return false if XML workspace document is missing OR an existing FGDB with the user-defined
name already exists and cannot be deleted OR an unhandled error is encountered.
Return the path to the new Pedon File Geodatabase if everything executes correctly."""
try:
AddMsgAndPrint("\nCreating New Pedon File Geodatabase",0)
arcpy.SetProgressorLabel("Creating New Pedon File Geodatabase")
# pedon xml template that contains empty pedon Tables and relationships
# schema and will be copied over to the output location
pedonXML = os.path.dirname(sys.argv[0]) + os.sep + "Extract_Pedons_from_NASIS_XMLWorkspace.xml"
localPedonGDB = os.path.dirname(sys.argv[0]) + os.sep + "NasisPedonsTemplate.gdb"
# Return false if xml file is not found
if not arcpy.Exists(pedonXML):
AddMsgAndPrint("\t" + os.path.basename(pedonXML) + " Workspace document was not found!",2)
return ""
# Return false if pedon fGDB template is not found
if not arcpy.Exists(localPedonGDB):
AddMsgAndPrint("\t" + os.path.basename(localPedonGDB) + " FGDB template was not found!",2)
return ""
newPedonFGDB = os.path.join(outputFolder,GDBname + ".gdb")
if arcpy.Exists(newPedonFGDB):
try:
arcpy.Delete_management(newPedonFGDB)
AddMsgAndPrint("\t" + GDBname + ".gdb already exists. Deleting and re-creating FGDB\n",1)
except:
AddMsgAndPrint("\t" + GDBname + ".gdb already exists. Failed to delete\n",2)
return ""
# copy template over to new location
AddMsgAndPrint("\tCreating " + GDBname + ".gdb with NCSS Pedon Schema 7.3")
arcpy.Copy_management(localPedonGDB,newPedonFGDB)
## # Create empty temp File Geodatabae
## arcpy.CreateFileGDB_management(outputFolder,os.path.splitext(os.path.basename(newPedonFGDB))[0])
##
## # set the pedon schema on the newly created temp Pedon FGDB
## AddMsgAndPrint("\tImporting NCSS Pedon Schema 7.3 into " + GDBname + ".gdb")
## arcpy.ImportXMLWorkspaceDocument_management(newPedonFGDB, pedonXML, "DATA", "DEFAULTS")
arcpy.UncompressFileGeodatabaseData_management(newPedonFGDB)
arcpy.RefreshCatalog(outputFolder)
AddMsgAndPrint("\tSuccessfully created: " + GDBname + ".gdb")
return newPedonFGDB
except arcpy.ExecuteError:
AddMsgAndPrint(arcpy.GetMessages(2),2)
return ""
except:
AddMsgAndPrint("Unhandled exception (createFGDB)", 2)
errorMsg()
return ""
## ===============================================================================================================
def getTableAliases(pedonFGDBloc):
# Retrieve physical and alias names from MDSTATTABS table and assigns them to a blank dictionary.
# Stores physical names (key) and aliases (value) in a Python dictionary i.e. {chasshto:'Horizon AASHTO,chaashto'}
# Fieldnames are Physical Name = AliasName,IEfilename
try:
arcpy.SetProgressorLabel("Gathering Table and Field aliases")
# Open Metadata table containing information for other pedon tables
theMDTable = pedonFGDBloc + os.sep + "MetadataTable"
arcpy.env.workspace = pedonFGDBloc
# Establishes a cursor for searching through field rows. A search cursor can be used to retrieve rows.
# This method will return an enumeration object that will, in turn, hand out row objects
if not arcpy.Exists(theMDTable):
return False
tableList = arcpy.ListTables("*")
tableList.append("pedon")
nameOfFields = ["TablePhysicalName","TableLabel"]
for table in tableList:
# Skip any Metadata files
if table.find('Metadata') > -1: continue
expression = arcpy.AddFieldDelimiters(theMDTable,"TablePhysicalName") + " = '" + table + "'"
with arcpy.da.SearchCursor(theMDTable,nameOfFields, where_clause = expression) as cursor:
for row in cursor:
# read each table record and assign 'TablePhysicalName' and 'TableLabel' to 2 variables
physicalName = row[0]
aliasName = row[1]
# i.e. {phtexture:'Pedon Horizon Texture',phtexture}; will create a one-to-many dictionary
# As long as the physical name doesn't exist in dict() add physical name
# as Key and alias as Value.
if not tblAliases.has_key(physicalName):
tblAliases[physicalName] = aliasName
del physicalName,aliasName
del theMDTable,tableList,nameOfFields
return True
except arcpy.ExecuteError:
AddMsgAndPrint(arcpy.GetMessages(2),2)
return False
except:
AddMsgAndPrint("Unhandled exception (GetTableAliases)", 2)
errorMsg()
return False
## ===============================================================================================================
def createEmptyDictOfTables():
# Create a new dictionary called pedonGDBtables that will contain every table in the newly created
# pedonFGDB above as a key. Individual records of tables will be added as values to the table keys.
# These values will be in the form of lists. This dictionary will be populated using the results of
# the WEB_AnalysisPC_MAIN_URL_EXPORT NASIS report. Much faster than opening and closing cursors.
try:
arcpy.env.workspace = pedonFGDB
tables = arcpy.ListTables()
tables.append(arcpy.ListFeatureClasses('pedon','Point')[0]) ## pedon is a feature class and gets excluded by the ListTables function
# Create dictionary where keys will be tables and values will be later populated
# {'area': [],'areatype': [],'basalareatreescounted': [],'beltdata': [],'belttransectsummary': []........}
pedonGDBtablesDict = dict()
for table in tables:
# Skip any Metadata files
if table.find('Metadata') > -1: continue
pedonGDBtablesDict[str(table)] = []
del tables
return pedonGDBtablesDict
except:
AddMsgAndPrint("Unhandled exception (GetTableAliases) \n", 2)
errorMsg()
sys.exit()
## ===============================================================================================================
def parsePedonsIntoLists():
""" This function will parse pedons into manageable chunks that will be sent to the 2nd URL report.
There is an inherent URL character limit of 2,083. The report URL is 123 characters long which leaves 1,960 characters
available. I arbitrarily chose to have a max URL of 1,860 characters long to avoid problems. Most pedonIDs are about
6 characters. This would mean an average max request of 265 pedons at a time.
This function returns a list of pedon lists"""
#1860 = 265
try:
arcpy.SetProgressorLabel("Determining the number of requests to send the server")
# Total Count
i = 1
listOfPedonStrings = list() # List containing pedonIDstring lists; individual lists are comprised of about 265 pedons
pedonIDstr = ""
for pedonID in pedonList:
pedonID = pedonID.strip()
# End of pedon list has been reached
if i == len(pedonList):
pedonIDstr = pedonIDstr + str(pedonID)
listOfPedonStrings.append(pedonIDstr)
# End of pedon list NOT reached
else:
# Max URL length reached - retrieve pedon data and start over
if len(pedonIDstr) > 1860:
pedonIDstr = pedonIDstr + str(pedonID)
listOfPedonStrings.append(pedonIDstr)
## reset the pedon ID string to empty
pedonIDstr = ""
i+=1
# concatenate pedonID to string and continue
else:
pedonIDstr = pedonIDstr + str(pedonID) + ",";i+=1
numOfPedonStrings = len(listOfPedonStrings) # Number of unique requests that will be sent
if not numOfPedonStrings:
AddMsgAndPrint("\n\t Something Happened here.....WTF!",2)
sys.exit()
else:
return listOfPedonStrings,numOfPedonStrings
except:
AddMsgAndPrint("Unhandled exception (createFGDB)", 2)
errorMsg()
sys.exit()
## ================================================================================================================
def getPedonHorizon(pedonList):
try:
# Strictly for formatting
if numOfPedonStrings > 1:
tab = "\t\t"
else:
tab = "\t"
""" ---------------------- Create a dictionary of number of fields per table -----------------"""
''' Create a dictionary that will contain table:number of fields in order
to double check that the values from the web report are correct
this was added b/c there were text fields that were getting disconnected in the report
and being read as 2 lines -- Jason couldn't address this issue in NASIS '''
arcpy.env.workspace = pedonFGDB
tableFldDict = dict() # contains all valid tables and the number of fields that it contains i.e. petext:11
validTables = arcpy.ListTables("*")
validTables.append('pedon')
for table in validTables:
# Skip any Metadata files
if table.find('Metadata') > -1: continue
numOfFields = arcpy.Describe(os.path.join(pedonFGDB,table)).fields
numOfValidFlds = 0
for field in numOfFields:
if not field.type.lower() in ("oid","geometry"):
numOfValidFlds +=1
# Add 2 more fields to the pedon table for X,Y
if table == 'pedon':
numOfValidFlds += 2
tableFldDict[table] = numOfValidFlds
del numOfFields;numOfValidFlds
"""----------------------------------- Open a network object --------------------------------"""
''' Open a network object using the URL with the search string already concatenated.
As soon as the url is opened it needs to be read otherwise there will be a socket
error raised. Experienced this when the url was being opened before the above
dictionary was created. Bizarre'''
URL = r'https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=WEB_AnalysisPC_MAIN_URL_EXPORT&pedonid_list=' + pedonList
requestStartTime = tic()
try:
theReport = urlopen(URL).readlines()
except:
try:
AddMsgAndPrint(tab + "2nd attempt at requesting data")
theReport = urlopen(URL).readlines()
except:
try:
AddMsgAndPrint(tab + "3rd attempt at requesting data")
theReport = urlopen(URL).readlines()
except:
errorMsg()
return False
#AddMsgAndPrint(tab + "Network Request Time: " + toc(requestStartTime))
invalidTable = 0 # represents tables that don't correspond with the GDB
invalidRecord = 0 # represents records that were not added
validRecord = 0
bHeader = False # flag indicating if value is html junk
currentTable = "" # The table found in the report
numOfFields = "" # The number of fields a specific table should contain
partialValue = "" # variable containing part of a value that is not complete
originalValue = "" # variable containing the original incomplete value
bPartialValue = False # flag indicating if value is incomplete; append next record
""" ------------------- Begin Adding data from URL into a dictionary of lists ---------------"""
# iterate through the lines in the report
arcpy.SetProgressor("step", "Reading NASIS Report: 'WEB_AnalysisPC_MAIN_URL_EXPORT'", 0, len(theReport),1)
memoryStartTime = tic()
for theValue in theReport:
theValue = theValue.strip() # remove whitespace characters
# represents the start of valid table
if theValue.find('@begin') > -1:
theTable = theValue[theValue.find('@') + 7:] ## Isolate the table
numOfFields = tableFldDict[theTable]
# Check if the table name exists in the list of dictionaries
# if so, set the currentTable variable and bHeader
if pedonGDBtables.has_key(theTable):
currentTable = theTable
bHeader = True ## Next line will be the header
else:
AddMsgAndPrint("\t" + theTable + " Does not exist in the FGDB schema! Figure this out Jason Nemecek!",2)
invalidTable += 1
# end of the previous table has been reached; reset currentTable
elif theValue.find('@end') > -1:
currentTable = ""
bHeader = False
# represents header line; skip this line
elif bHeader:
bHeader = False
# this is a valid record that should be collected
elif not bHeader and currentTable:
numOfValues = len(theValue.split('|'))
# Add the record to its designated list within the dictionary
# Do not remove the double quotes b/c doing so converts the object
# to a list which increases its object size. Remove quotes before
# inserting into table
# this should represent the 2nd half of a valid value
if bPartialValue:
partialValue += theValue # append this record to the previous record
# This value completed the previous value
if len(partialValue.split('|')) == numOfFields:
pedonGDBtables[currentTable].append(partialValue)
validRecord += 1
bPartialValue = False
partialValue,originalValue = "",""
# appending this value still falls short of number of possible fields
# add another record; this would be the 3rd record appended and may
# exceed number of values.
elif len(partialValue.split('|')) < numOfFields:
arcpy.SetProgressorPosition()
continue
# appending this value exceeded the number of possible fields
else:
AddMsgAndPrint("\t\tIncorrectly formatted Record Found in " + currentTable + " table:",2)
AddMsgAndPrint("\t\t\tRecord should have " + str(numOfFields) + " values but has " + str(len(partialValue.split('|'))),2)
AddMsgAndPrint("\t\t\tOriginal Record: " + originalValue,2)
AddMsgAndPrint("\t\t\tAppended Record: " + partialValue,2)
invalidRecord += 1
bPartialValue = False
partialValue,originalValue = ""
# number of values do not equal the number of fields in the corresponding tables
elif numOfValues != numOfFields:
# number of values exceed the number of fields; Big Error
if numOfValues > numOfFields:
AddMsgAndPrint("\n\t\tIncorrectly formatted Record Found in " + currentTable + " table:",2)
AddMsgAndPrint("\t\t\tRecord should have " + str(numOfFields) + " values but has " + str(numOfValues),2)
AddMsgAndPrint("\t\t\tRecord: " + theValue,2)
invalidRecord += 1
# number of values falls short of the number of correct fields
else:
partialValue,originalValue = theValue,theValue
bPartialValue = True
else:
pedonGDBtables[currentTable].append(theValue)
validRecord += 1
bPartialValue = False
partialValue = ""
elif theValue.find("ERROR") > -1:
AddMsgAndPrint("\n\t\t" + theValue[theValue.find("ERROR"):],2)
return False
else:
invalidRecord += 1
arcpy.SetProgressorPosition()
#Resets the progressor back to its initial state
arcpy.ResetProgressor()
#AddMsgAndPrint(tab + "Storing Data into Memory: " + toc(memoryStartTime))
if not validRecord:
AddMsgAndPrint("\t\tThere were no valid records captured from NASIS request",2)
return False
# Report any invalid tables found in report; This should take care of itself as Jason perfects the report.
if invalidTable and invalidRecord:
AddMsgAndPrint("\t\tThere were " + splitThousands(invalidTable) + " invalid table(s) included in the report with " + splitThousands(invalidRecord) + " invalid record(s)",1)
# Report any invalid records found in report; There are 27 html lines reserved for headers and footers
if invalidRecord > 28:
AddMsgAndPrint("\t\tThere were " + splitThousands(invalidRecord) + " invalid record(s) not captured",1)
return True
except URLError, e:
if hasattr(e, 'reason'):
AddMsgAndPrint(tab + "URL Error: " + str(e.reason), 2)
elif hasattr(e, 'code'):
AddMsgAndPrint(tab + e.msg + " (errorcode " + str(e.code) + ")", 2)
return False
except socket.timeout, e:
AddMsgAndPrint(tab + "Server Timeout Error", 2)
return False
except socket.error, e:
AddMsgAndPrint(tab + "NASIS Reports Website connection failure", 2)
return False
except httplib.BadStatusLine:
AddMsgAndPrint(tab + "NASIS Reports Website connection failure", 2)
return False
except:
errorMsg()
return False
## ================================================================================================================
def importPedonData(tblAliases,verbose=False):
""" This function will purge the contents from the pedonGDBtables dictionary which contains all of the pedon
data into the pedon FGDB. Depending on the number of pedons in the user's AOI, this function will be
used multiple times. The pedonGDBtables dictionary could possilbly allocate all of the computer's
memory so a fail-save was built in to make sure a memory exception error wasn't encountered. This
function is invoked when approximately 40,000 pedons have been retrieved from the server and stored in \
memory."""
try:
if verbose: AddMsgAndPrint("\nImporting Pedon Data into FGDB")
arcpy.SetProgressorLabel("Importing Pedon Data into FGDB")
# use the tblAliases so that tables are imported in alphabetical order
if bAliasName:
tblKeys = tblAliases.keys()
maxCharTable = max([len(table) for table in tblKeys]) + 1
maxCharAlias = max([len(value[1]) for value in tblAliases.items()])
firstTab = (maxCharTable - len("Table Physical Name")) * " "
headerName = "\n\tTable Physical Name" + firstTab + "Table Alias Name"
if verbose: AddMsgAndPrint(headerName,0)
if verbose: AddMsgAndPrint("\t" + len(headerName) * "=",0)
else:
maxCharTable = max([len(table) for table in tblKeys]) + 1
tblKeys = pedonGDBtables.keys()
tblKeys.sort()
""" ---------------------------------------------------"""
arcpy.SetProgressor("step","Importing Pedon Data into FGDB table: ",0,len(tblKeys),1)
for table in tblKeys:
arcpy.SetProgressorLabel("Importing Pedon Data into FGDB: " + table)
arcpy.SetProgressorPosition()
# Skip any Metadata files
if table.find('Metadata') > -1: continue
# Capture the alias name of the table
if bAliasName:
aliasName = tblAliases[table]
# Strictly for standardizing reporting
firstTab = (maxCharTable - len(table)) * " "
# check if list contains records to be added
if len(pedonGDBtables[table]):
numOfRowsAdded = 0
GDBtable = pedonFGDB + os.sep + table # FGDB Pyhsical table path
""" -------------------------------- Collect field information -----------------------"""
''' For the current table, get the field length if the field is a string. I do this b/c
the actual value may exceed the field length and error out as has happened in SSURGO. If
the value does exceed the field length then the value will be truncated to the max length
of the field '''
# Put all the field names in a list
fieldList = arcpy.Describe(GDBtable).fields
nameOfFields = []
fldLengths = []
for field in fieldList:
# Skip Object ID field Shape field (only for site)
if not field.type.lower() in ("oid","geometry"):
nameOfFields.append(field.name)
if field.type.lower() == "string":
fldLengths.append(field.length)
else:
fldLengths.append(0)
# Add a new field at the end called 'labsampleIndicator' to indicate whether
# record is a LAB pedon. Addd XY token to list
if table == 'pedon':
## labField = 'labsampleIndicator'
## arcpy.AddField_management(GDBtable,'labsampleIndicator','TEXT','#','#',3,'Lab Sample Indicator','#','#','#')
## nameOfFields.append(labField)
# Pedon feature class will have X,Y geometry added; Add XY token to list
nameOfFields.append('SHAPE@XY')
fldLengths.append(0) # X coord
fldLengths.append(0) # Y coord
## peiidFld = [f.name for f in arcpy.ListFields(table,'peiid')][0]
## peiidIndex = nameOfFields.index(peiidFld)
""" -------------------------------- Insert Rows ------------------------------------------
Iterate through every value from a specific table in the pedonGDBtables dictary
and add it to the appropriate FGDB table Truncate the value if it exceeds the
max number of characters. Set the value to 'None' if it is an empty string."""
# Initiate the insert cursor object using all of the fields
cursor = arcpy.da.InsertCursor(GDBtable,nameOfFields)
recNum = 0
# '"S1962WI025001","43","15","9","North","89","7","56","West",,"Dane County, Wisconsin. 100 yards south of road."'
for rec in pedonGDBtables[table]:
newRow = list() # list containing the values that will populate a new row
fldNo = 0 # list position to reference the field lengths in order to compare
for value in rec.replace('"','').split('|'):
value = value.strip()
fldLen = fldLengths[fldNo]
if value == '' or value == 'NULL': ## Empty String
value = None
elif fldLen > 0: ## record is a string, truncate it
value = value[0:fldLen]
else: ## record is a number, keep it
value = value
newRow.append(value)
fldNo += 1
del value, fldLen
# Add XY coordinates to the pedon point feature class.
if table == 'pedon':
try:
xValue = float(newRow[-1]) # Long
yValue = float(newRow[-2]) # Lat
except:
xValue = 0.00
yValue = 90.0
# remove the X,Y coords from the newRow list b/c X,Y
# fields don't exist in the pedon Table
newRow = newRow[:-2]
newRow.append((xValue,yValue))
del xValue,yValue
try:
cursor.insertRow(newRow)
numOfRowsAdded += 1;recNum += 1
except arcpy.ExecuteError:
AddMsgAndPrint("\n\tError in :" + table + " table: Field No: " + str(fldNo) + " : " + str(rec),2)
AddMsgAndPrint("\n\t" + arcpy.GetMessages(2),2)
break
except:
AddMsgAndPrint("\n\tError in: " + table + " table")
print "\n\t" + str(rec)
print "\n\tRecord Number: " + str(recNum)
AddMsgAndPrint("\tNumber of Fields in GDB: " + str(len(nameOfFields)))
AddMsgAndPrint("\tNumber of fields in report: " + str(len([rec.split('|')][0])))
errorMsg()
break
del newRow,fldNo
# Report the # of records added to the table
if bAliasName:
secondTab = (maxCharAlias - len(aliasName)) * " "
if verbose: AddMsgAndPrint("\t" + table + firstTab + aliasName + secondTab + " Records Added: " + splitThousands(numOfRowsAdded),1)
else:
if verbose: AddMsgAndPrint("\t" + table + firstTab + " Records Added: " + splitThousands(numOfRowsAdded),1)
del numOfRowsAdded,GDBtable,fieldList,nameOfFields,fldLengths,cursor
# Table had no records; still print it out
else:
if bAliasName:
secondTab = (maxCharAlias - len(aliasName)) * " "
if verbose: AddMsgAndPrint("\t" + table + firstTab + aliasName + secondTab + " Records Added: 0",1)
else:
if verbose: AddMsgAndPrint("\t" + table + firstTab + " Records Added: 0",1)
#Resets the progressor back to its initial state
arcpy.ResetProgressor()
return True
except arcpy.ExecuteError:
AddMsgAndPrint(arcpy.GetMessages(2),2)
return False
except:
errorMsg()
return False
## ================================================================================================================
def getObjectSize(obj, handlers={}, verbose=False):
""" Returns the approximate memory footprint an object and all of its contents.
Automatically finds the contents of the following builtin containers and
their subclasses: tuple, list, deque, dict, set and frozenset.
To search other containers, add handlers to iterate over their contents:
handlers = {SomeContainerClass: iter,
OtherContainerClass: OtherContainerClass.get_elements}
"""
try:
# lamda function to iterate through a dictionary
dict_handler = lambda d: chain.from_iterable(d.items())
# Use the following lines if you want to determine the size for ANY object
## all_handlers = {tuple: iter,
## list: iter,
## deque: iter,
## dict: dict_handler,
## set: iter,
## frozenset: iter,
## }
# Limit the focus to just dictionaries since that is the only thing I will pass
all_handlers = {dict: dict_handler}
all_handlers.update(handlers) # user handlers take precedence
seen = set() # unique list of Object's memory ID
default_size = getsizeof(0) # estimate sizeof object without __sizeof__; a dict will always be 140 bytes
def sizeof(obj):
if id(obj) in seen: # do not double count the same object's memory ID
return 0
seen.add(id(obj))
s = getsizeof(obj, default_size)
if verbose:
print(s, type(obj), repr(obj))
# iterate through all itemized objects (tuple,list) 'all_handlers' including their content
for typ, handler in all_handlers.items():
# check if the object is associated with the type at hand. i.e. if the current
# type is dict then check if the object 'o' is a dict. ({'a': 1, 'c': 3, 'b': 2, 'e': 'a string of chars', 'd': [4, 5, 6, 7]})
# if True, go thru and add the bytes for each eleement
if isinstance(obj, typ):
s += sum(map(sizeof, handler(obj))) # Iterates through this function
break
return s
byteSize = sizeof(obj)
if byteSize < 1024:
return splitThousands(byteSize) + " bytes"
elif byteSize > 1023 and byteSize < 1048576:
return splitThousands(round((byteSize / 1024.0),1)) + " KB"
elif byteSize > 1048575 and byteSize < 1073741824:
return splitThousands(round((byteSize / (1024*1024.0)),1)) + " MB"
elif byteSize > 1073741823:
return splitThousands(round(byteSize / (1024*1024*1024.0),1)) + " GB"
except:
errorMsg()
pass
#===================================================================================================================================
""" ----------------------------------------My Notes -------------------------------------------------"""
""" --------------- Column Headers
Column order
1. Row_Number2,
2. upedonid,
3. peiid,
4. pedlabsampnum,
5. longstddecimaldegrees ,
6. latstddecimaldegrees
----------------------"""
""" 1st Report """
# Used to get a number of pedons that are within a bounding box
# https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=WEB_ANALYSIS_PC_PEDON_NUMBER_SUM&lat1=43&lat2=45&long1=-90&long2=-88
""" 2nd Report """
# Used to get a list of peiid which will be passed over to the 2nd report0
# https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=WEB_EXPORT_PEDON_BOX_COUNT&lat1=43&lat2=45&long1=-90&long2=-88
""" 3rd Report """
# This report will contain pedon information to be parsed into a FGDB.
# Raw URL
# https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=TEST_sub_pedon_pc_6.1_phorizon&pedonid_list= OLD one
# https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=WEB_AnalysisPC_MAIN_URL_EXPORT&pedonid_list= NEW one
# Sample complete URL with pedonIDs:
# https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=WEB_AnalysisPC_MAIN_URL_EXPORT&pedonid_list=36186,59976,60464,60465,101219,102867,106105,106106
#===================================================================================================================================