From 72dee87b95bce872e82d1b7dfd7ae2d6f0bcadb8 Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Mon, 14 Dec 2015 21:34:55 -0800 Subject: [PATCH] 1.0.5, bug fixes --- .gitignore | 1 + ChangeLog.txt | 11 +- ChangeLog.txt~ | 148 -------------------- detex/__init__.py | 10 +- detex/construct.py | 334 ++++++++++++++++++++++----------------------- detex/getdata.py | 322 +++++++++++++++++++++++++++---------------- detex/results.py | 47 ++++--- detex/subspace.py | 65 ++++----- detex/util.py | 23 +++- setup.py | 40 +++--- tearDown.py | 133 ++++++++++++++++++ 11 files changed, 618 insertions(+), 516 deletions(-) create mode 100644 .gitignore delete mode 100644 ChangeLog.txt~ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b25c15b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*~ diff --git a/ChangeLog.txt b/ChangeLog.txt index 02a65a7..b380b9e 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -139,7 +139,7 @@ Changed the name of writeTemplateKeyFromEQSearchSum to eqSearch2TemplateKey, and Changed the name of several arguments slightly to be more consistent, should be mostly backward compatible -added the NumStations to results tables +Added the NumStations to results tables Version 1.0.4 ------------- @@ -147,4 +147,13 @@ Fixed bug in detex.results._deleteDups where detections from different stations Changed the intro tutorial slightly +Version 1.0.5 +------------- +Removed consisLen argument from createCluster, fillZeros=True will produce approximately the same behavior as consisLen = False + +Changed input of __init__.deb from varlist to *varlist + +Removed subSamp parameter from createCluster. Will by default calculate subsamples but store the decimal sample in a separate DataFrame. + +Fxed issues 15, 16, 17, 18, 19. diff --git a/ChangeLog.txt~ b/ChangeLog.txt~ deleted file mode 100644 index da65d3b..0000000 --- a/ChangeLog.txt~ +++ /dev/null @@ -1,148 +0,0 @@ -Before Version Numbers -______________________ - -2/12/15- added plotBasisVectors method to subspace object in order to plot used basis vectors from the SVD - -2/12/15 - fixed a bug causing getAllData(reverse=True) to put hour 23 in the wrong jday folder - -2/12/15 - fixed a bug causing auto-correlations to be less than 1 (multiplexing got off by one sample on one channel because starttimes for all channels were not equal). Applied fix to subspace functions as well - -2/19/15 - fixed a bug causing 1 higher degree of rep. to be selected in the SVD function than necessary - -2/20/15 - Fixed a bug causing any incomplete data chunks to report wrong detection time - -2/20/15 - When calling detex.xcorr.getFAS the filter parameter was not passed to the apply filter function causing an error at line 91, fixed - -2/21/15 - Added a MSTMPmax and MSTAMPmin columns to the subspace database. These are calculated based on the min and max offset times of all of the events that went into creating the subspace. Essentially allows an estimation of the origin time of the event and to associated detections from different stations together - -2/21/15 - Added the Mag column to the subspace database. It is a magnitude estimate that works by first finding the subspace representation of the contours data by projecting the part of the data that triggered the detector into the current subspace. The templates that went into the subspace creation are then projected into the subspace and the Gibbons and Ringdal iterative scaling method is applied to estimate scaling factors and magnitudes for each of the template projections. The median value is then selected as the magnitude estimate. Seems to estimate about 10% too low currently. - -2/25/15 - detex.subspace.SubSpaceStream method pickSubSpaceTimes now has a traceLimit parameter that will limit the number of events in the current subspace that obspyck will try and display - -3/3/15 - changed the multiplex command to properly handle 1 channel data in subspace module - -3/3/15 - added a try except clause around the function that reads the continous data so if one data chunk is bad it is skipped rather than killing the susbspace detection - - -Version 0.0.1 -_____________ - -Added the ssResults method to the detex.result module that asssociates detections from multiple stations together - -Improved the magnitude estimate method to use only the energy of the detected waveform and best correlated event to calculate a scaling factor. -This was tested against local catalogs for both a mining region in Colorado and an Earthquake swarm in Yellow Stone - -In addition to the "master station" option for clustering, where waveform groups across the network are forced based on the clustering at one station, -each station is allowed to cluster independently if no master station argument is passed. As a result, the subspaces dataframes of the subspace objects are now -organized by a dictionary with the corresponding station as the key. - -Added min. number of events for a subspace to be created. Default value is 3 - -Added a version module for checking current version of detex by typing detex.version.version - -In detex.subspace, moved the import basemap line into the method that actually calls it so if someone cannot get basemap installed detex can still be run - - -Version 0.0.2 -_____________ -The updateReqCC function of detex.subspace.SSclusterStream did not call the write function, now it does - -Changed the magnitude estimation scheme to a weighted average of the std ratios of continous data to each of the training waveforms based on the square of the correlation coeficient - -Suppressed the SQLite output so it will no longer spam the consol when performing detections - -Changed default acceptable probability of false detection of the detex.subspace.createSubSpace function from 10**-9 to 10**-10 - -Added the eventDir input argument to the detex method of the subspace object. If a str of an event directory (the same structure orginized by the detex.getdata method) the subspace detections will be performed on all the events in the directory rather than on the continous data. Additionally, a dataframe will be created, whose name is governed by the eventCorFile parameter, showing each event and the maximum detection statstics. Useful for classifying events into pre-determined subspaces. - -Added the UTCSaves parameters which if not None must be a list of UTC objects. If the data being processed during the subspace detection emcompasses a time in the UTCSaves list several parameters will be written to the "UTCsaves.pkl" dataframe including continous data, vector of detection statistics etc. This was created for determining causes of subspace failure to detect specific events. - -Fixed a bug in getdata that caused the code to throw an error if the instrument response could not be removed. Now such data chunks are simply not saved after a warning is printed to the screen. - -Fragmented event files caused clustering to error out. Now any event file with 80% than the median data points for other event files will simply be skipped - -Version 0.0.4 -_____________ - -Added a grid search function to estimate appropriate thresholds when scipy.stats.beta.isf fails (see this bug report https://github.com/scipy/scipy/issues/4677) - -Added a normalize option to the SVD method of the detex.subspace.SubSpaceStream class. Essentially just normalizes all the input training events used for making a subspace. This gives all inputs equal wweight, which might be bad if you input a lot of noisy events, but is recommended in the Harris paper - -Added the Stations input parameter to detex.results.ssResults to allow a list of stations to be passed to restrict the results to detections that occur on a station in the list - -Added the Pf input parameter to detex.results.ssResults which loads a pickled subspace and uses the false alarm statistic (FAS) beta parameters to only use detections for each station/subspace that correspond to some value on the fitted beta above the Pf. - -Version 0.0.5 -_____________ - -Allows station/network names to be numbers with no letters - -added consistentLength input parameter to createCluster function of detex.subspace. Default is true. If false allows hopelessly inhomogenious lengthed data to still be correlated together, but is much slower and will not allow subspace construction (IE if False cannot be used for subspace construction) - -Version 0.0.6 -_____________ - -Made SubSpaceStream object indexable -Fixed various other bugs - -Version 0.0.7 -______________ - -Added function to generate N distinct colors for dendrograms and plotting events if default is exceeded - -Fixed a problem causing OS X and pyqt to not play well together - -Added feature to allow event lists to be independent from station to station rather than just using the union of the two - -Accounted for various other bugs that can happen in subspace detection with less than high quality data - -Version 0.0.8 -_____________ -Small bug fixes - -Added option to subspace detection to allow continuous data to be filled with 0s, facilitates the use of gappy data - -Version 0.1.0 -_____________ - -Deleted xcorr module, added support for singles (or singletons) from the subspace module. This represents a shift from the previous subspace and cross correlation paradigm to an only subspace paradigm where one-dimensional subspaces can be run (equivalent to cross correlation). Also added some features and cleaned up the code a bit. - -Changed event phases to use csv format by default rather than pkl - - -Version 0.1.1 -_____________ -Changed default phase pick file format to csv -changed detex to set any phase picks made before waveform data is available to earliest point when data are available and emit a warning -Fixed a bug in detex.subspace.loadclusters - -Version 0.1.2 -_____________ -Changed behavior of the trim input to the create clusters function to now reference time before origin in element 0 and time after origin in element 1. - - -Version 1.0.3b -______________ -Huge changes! - -Detex now conforms (mostly) to pep8 - -Split the gigantic subspace module into 3 modules: construct [for creating cluster and subspace classes], fas [for determining false alarm statistics], detect [for running subspace detections], and subspace [now only containing ClusterStream, Cluster, and SubSpaceStream classes] - -Added the DataFetcher class to the getdata module with is responsible for serving data to all other detex functions and classes. Allows for the use of a local directory structure or an obspy client for getting data. - -When local directories are used to store data they are automatically indexed with a SQLite database. This is done by parsing every non-directory file in the directory and trying to pass it to obspy.read. If it is readable it is included in the index, along with station, start times, end times, gaps, etc. - -Various bug fixes - -Changed the name of writeTemplateKeyFromEQSearchSum to eqSearch2TemplateKey, and the name of makeTemplatemkeyey to catalog2TemplateKey and moved it from getdata to util. - -Changed the name of several arguments slightly to be more consistent, should be mostly backward compatible - -added the NumStations to results tables - -Version 1.0.4 -------------- -Fixed bug in detex.results._deleteDups where detections from different stations can get grouped together - - diff --git a/detex/__init__.py b/detex/__init__.py index 2ac6b3a..952b3e6 100644 --- a/detex/__init__.py +++ b/detex/__init__.py @@ -30,7 +30,7 @@ # import all modules in detex directory #modules = glob.glob(os.path.dirname(__file__)+"/*.py") #__all__ = [os.path.basename(f)[:-3] for f in modules if os.path.isfile(f)] -#warnings.filterwarnings('error') #uncomment this to make all warnings errors +warnings.filterwarnings('error') #uncomment this to make all warnings errors # Imports for lazy people (ie make detex.createCluster callable) from construct import createCluster, createSubSpace @@ -41,7 +41,7 @@ maxSize = 10 * 1024*1024 # max size log file can be in bytes (10 mb defualt) verbose = True # set to false to avoid printing to screen makeLog = False # set to false to not make log file -__version__='1.0.4' # current detex version +__version__ = '1.0.5' # current detex version ## Configure logger to be used across all of Detex def setLogger(filename='detex_log.log'): @@ -69,7 +69,8 @@ def setLogger(filename='detex_log.log'): fh.setLevel(logging.DEBUG) fmat = '%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s' formatter = logging.Formatter(fmat) - fh.setFormatter(formatter) + fh.setFormatter(formatter) + global logger logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) logger.addHandler(fh) @@ -137,12 +138,11 @@ def closeLogger(): handler.close() logger.removeHandler(handler) -def deb(varlist): +def deb(*varlist): global de de = varlist sys.exit(1) - if makeLog: logger, lpath = setLogger() diff --git a/detex/construct.py b/detex/construct.py index a4d5ad2..e154b2e 100644 --- a/detex/construct.py +++ b/detex/construct.py @@ -10,15 +10,10 @@ import obspy import detex import scipy -import warnings -import sys from scipy.cluster.hierarchy import linkage -pd.options.mode.chained_assignment = None #mute setting copy warning - - - +pd.options.mode.chained_assignment = None # mute setting copy warning ################ CLUSTERING FUNCTIONS AND CLASSES ################ @@ -32,73 +27,64 @@ def createCluster(CCreq=0.5, fileName='clust.pkl', decimate=None, dtype='double', - consisLen=True, eventsOnAllStations=False, subSamp=True, enforceOrigin=False, fillZeros=False): """ - Function to initialize an instance of the cluster class which - contains the linkage matrix, event names, and a few visualization - methods + Function to create an instance of the ClusterStream class Parameters ------- CCreq : float, between 0 and 1 - The minimum correlation coefficient for a grouping waveforms. - 0 means all waveforms grouped together, 1 will not form any - groups (in order to run each waveform as a correlation detector) - fetch_arg : str or detex.getdata.DataFetcher object - fetch_arg of detex.getdata.quickFetch, see docs for details + The minimum correlation coefficient for grouping waveforms. + 0.0 results in all waveforms grouping together and 1.0 will not + form any groups. + fetch_arg : str or detex.getdata.DataFetcher instance + Fetch_arg of detex.getdata.quickFetch, see docs for details. filt : list A list of the required input parameters for the obspy bandpass - filter [freqmin,freqmax,corners,zerophase] + filter [freqmin, freqmax, corners, zerophase]. stationKey : str or pd.DataFrame - Path to the station key used by the events or loaded station key - in DataFrame + Path to the station key or DataFrame of station key. templateKey : str or pd.DataFrame - Path to the template key or loaded template key in DataFrame + Path to the template key or loaded template key in DataFrame. trim : list - A list with seconds to trim events with respect to the origin time - reported in the station key. The defualt value of [10, 120] means - 10 seconds before origin time is kept and 120 seconds after origin - time. The larger the calues of trim the longer the computation time - and higher potential that events will get missaligned. If trim - values are too low, however, the event could be missed entirely. - saveClust : boolean + A list with seconds to trim from events with respect to the origin + time reported in the template key. The default value of [10, 120] + means each event will be trimmed to only contain 10 seconds before + its origin time and 120 seconds after. The larger the values of this + argument the longer the computation time and chance of misalignment, + but values that are too small may trim out desired phases of the + waveform. + saveClust : bool If true save the cluster object in the current working - directory as clustname + directory. The name is controlled by the fileName parameter. fileName : str - path (or name) to save the clustering instance, only used - if saveClust is True + Path (or name) to save the clustering instance, only used + if saveClust is True. decimate : int or None - A decimation factor to apply to all data (parameter is simply - passed to the obspy trace/stream method decimate). - Can greatly increase speed and is desirable if the data are - oversampled + A decimation factor to apply to all data in order to decrease run + time. Can be very useful if the the data are oversampled. For + example, if the data are sampled at 200 Hz but a 1 to 10 Hz + bandpass filter is applied it may be appropriate to apply a + decimation factor of 5 to bring the sampling rate down to 40 hz. dytpe : str - The data type to use for recasting both event waveforms and - continuous data arrays. If none the default of float 64 is - kept. Options include: + An option to recast data type of the seismic data. Options are: double- numpy float 64 - single- numpy float 32, much faster and amenable with - cuda GPU processing, sacrifices precision - consisLen : bool - If true the data in the events files are more or less the - same length. Switch to false if the data are not, but can - greatly increase run times. + single- numpy float 32, much faster and amenable to cuda GPU + processing, sacrifices precision. eventsOnAllStations : bool If True only use the events that occur on all stations, if - false let each station have an independent event list - subSamp : boolean - If True subsample lag times with cosine extrapolation + false let each station have an independent event list. enforceOrigin : bool - If True make sure each traces starts at the reported origin time - for a give event (trim or merge with zeros if not). Required - for lag times to be meaningful for hypoDD input. + If True make sure each trace starts at the reported origin time in + the template key. If not trim or merge with zeros. Required for + lag times to be meaningful for hypoDD input. fillZeros : bool - If True fill zeroes from trim[0] to trim[1]. Suggested for older - data or if only triggered data are avaliable. + If True fill zeros from trim[0] to trim[1]. Suggested for older + data or if only triggered data are available. + Returns --------- An instance of the detex SSClustering class @@ -106,7 +92,7 @@ def createCluster(CCreq=0.5, # Read in stationkey and template keys and check a few key parameters stakey = detex.util.readKey(stationKey, key_type='station') temkey = detex.util.readKey(templateKey, key_type='template') - _checkClusterInputs(filt, dtype, trim) + _checkClusterInputs(filt, dtype, trim, decimate) # get a data fetcher fetcher = detex.getdata.quickFetch(fetch_arg, fillZeros=fillZeros) @@ -120,9 +106,6 @@ def createCluster(CCreq=0.5, msg = ('No events survived pre-processing, check DataFetcher and event\ quality') detex.log(__name__, msg, level='error') - # make sure lengths are all equal else remove problem events - if consisLen: - TRDF = _testStreamLengths(TRDF) # Prune event that do not occur on all stations if required if eventsOnAllStations: @@ -145,25 +128,26 @@ def createCluster(CCreq=0.5, if not eventsOnAllStations: eventList = row.Events if len(row.Events) < 2: # if only one event on this station skip it - msg = 'Less than 2 valid events on station ' + row.station + msg = 'Less than 2 valid events on station ' + row.Station detex.log(__name__, msg, level='warning', pri=True) continue - DFcc, DFlag = _makeDFcclags(eventList, row, consisLen=consisLen, - subSamp=subSamp) + DFcc, DFlag, DFsubsamp = _makeDFcclags(eventList, row) TRDF.Lags[ind] = DFlag TRDF.CCs[ind] = DFcc + TRDF.Subsamp[ind] = DFsubsamp cx = np.array([]) cxdf = 1.0000001 - DFcc # get dissimilarities # flatten ccs and remove nans cx = _flatNoNan(cxdf) - link = linkage(cx) # get cluster linkage TRDF.loc[ind, 'Link'] = link - trdf = TRDF[['Station', 'Link', 'CCs', 'Lags', 'Events', 'Stats']] - + # define columns to keep + colstk = ['Station', 'Link', 'CCs', 'Lags','Subsamp', 'Events', 'Stats'] + trdf = TRDF[colstk] + eventListAll = list(set.union(*[set(x) for x in TRDF.Events])) #try: clust = detex.subspace.ClusterStream(trdf, temkey, stakey, fetcher, - eventList, CCreq, filt, decimate, + eventListAll, CCreq, filt, decimate, trim, fileName, eventsOnAllStations, enforceOrigin) @@ -189,27 +173,31 @@ def createSubSpace(Pf=10**-12, clust='clust.pkl', minEvents=2, dtype='double', framework in Harris 2006 Theory (eq 20, not yet supported) Or by fitting a PDF to an empirical estimation of the null space (similar to Wiechecki-Vergara 2001). Thresholds are not set until - calling the SVD function of the subspace stream class + calling the SVD function of the subspace stream class. clust: str or instance of detex.subspace.ClusterStream The path to a pickled instance of ClusterStream or an instance of ClusterStream. Used in defining the subspaces. minEvents : int The Min number of events that must be in a cluster in order for a - subspace to be created from that cluster + subspace to be created from that cluster. dtype : str ('single' or 'double') The data type of the numpy arrays used in the detections. Options are: single- a np.float32, slightly faster (~30%) less precise double- a np.float64 (default) conDatFetcher : None, str, or instance of detex.getdata.DataFetcher Parameter to indicate how continuous data will be fetched in the newly - created instance of SubSpace. - If None is passed detex will try to deduce the appropriate type of - DataFetcher from the event datafetcher attached to cluster instance - If a str the str will be then be passed to detex.getdata.quickFetch - which returns a fetcher based on the str - If an instance of detex.getdata.DataFetcher is passed then it will be - used as the continuous data fetcher - + created instance of SubSpace. Descriptions are the three accepted types + are: + 1. (None) If None is passed detex will try to deduce the appropriate + type of DataFetcher from the event datafetcher attached to cluster + instance. + 2. (str) conDatFetcher is a string it will be passed to + detex.getdata.quickFetch function which expects a path to the + directory where the data are stored or a valid DataFetcher method. + See the docs of the quickFetch function in detex.getdata for more info. + 3. (instance of detex.getdata.DataFetcher) If an instance of + detex.getdata.DataFetcher is passed then it will be used as the + continuous data fetcher. Returns ----------- @@ -273,7 +261,7 @@ def createSubSpace(Pf=10**-12, clust='clust.pkl', minEvents=2, dtype='double', link = linkage(cx) #get cluster linkage staSS.loc[sind, 'Link'] = link # get lag times and align waveforms - CCtoLag = _makeLagSeries(cx, lags) # a map from cc to lag times + CCtoLag = _makeCC2LagMap(cx, lags) # a map from cc to lag times delays = _getDelays(link, CCtoLag, cx, lags, cxdf) delayNP = -1 * np.min(delays) delayDF = pd.DataFrame(delays + delayNP, columns=['SampleDelays']) @@ -293,6 +281,7 @@ def createSubSpace(Pf=10**-12, clust='clust.pkl', minEvents=2, dtype='double', substream = detex.subspace.SubSpace(singDic, ssDict, cl, dtype, Pf, cfetcher) return substream + def _getInfoFromClust(cl, srow): """ get the DFcc dataframe and lags dataframe from values already stored in @@ -311,8 +300,6 @@ def _getInfoFromClust(cl, srow): DFcc.columns = range(1, len(DFlag) + 1) return DFcc, DFlag - - def _makeEventListKey(evelist1, evelist2): """ Make index key to make evelist1 to evelist2 @@ -326,7 +313,6 @@ def _fastWhere(eve, objs): """ an = next(nu for nu, obj in enumerate(objs) if eve == obj) return an - def _getOffsetList(sind, srow, staSS): """ @@ -356,9 +342,7 @@ def _updateStartTimes(srow, delayDF, temkey): statsdict[key]['offset'] = stime_new - otime #predict offset time return statsdict - -def _makeDFcclags(eventList, row, consisLen=True, - subSamp=False): +def _makeDFcclags(eventList, row): """ Function to make correlation matrix and lag time matrix """ @@ -366,61 +350,54 @@ def _makeDFcclags(eventList, row, consisLen=True, indicies = np.arange(0,len(eventList)-1) DFcc = pd.DataFrame(columns=cols, index=indicies) DFlag = pd.DataFrame(columns=cols,index=indicies) + DFsubsamp = pd.DataFrame(columns=cols,index=indicies) # Loop over indicies and fill in cc and lags for b in DFcc.index.values: for c in range(b+1,len(DFcc)+1): - rev = 1 #if order is switched make multiply lags by -1 + rev = 1 #if order is switched multiply lags by -1 mptd1 = row.loc['MPtd'][eventList[b]] mptd2 = row.loc['MPtd'][eventList[c]] - if consisLen: # if all templates same length use faster method - mpfd1 = row.loc['MPfd'][eventList[b]] - mpfd2 = row.loc['MPfd'][eventList[c]] - Nc1 = row.loc['Channels'][eventList[b]] - Nc2 = row.loc['Channels'][eventList[c]] - maxcc, sampleLag = _CCX2(mpfd1, mpfd2, mptd1, mptd2, Nc1, Nc2, - subSamp=subSamp) + mpfd1 = row.loc['MPfd'][eventList[b]] + mpfd2 = row.loc['MPfd'][eventList[c]] + Nc1 = row.loc['Channels'][eventList[b]] + Nc2 = row.loc['Channels'][eventList[c]] + maxcc, sampleLag, subsamp = _CCX2(mpfd1, mpfd2, mptd1, mptd2, Nc1, + Nc2) # if all the templates are not the same length use slower method - else: - maxlen = np.max([len(mptd1),len(mptd2)]) - if not len(mptd1) < maxlen: # reverse mptd1 is shorter - mptd1, mptd2 =mptd2 , mptd1 - rev = -1 - lmp = len(mptd1)/2 # padding length - mptd2 = np.pad(mptd2, (lmp,lmp), 'constant', - constant_values=(0,0)) - cc = fast_normcorr(mptd1, mptd2) - maxcc = cc.max() - if subSamp: - sampleLag = _subSamp(cc) - len(mptd1)/2 - else: - sampleLag = cc.argmax() - len(mptd1)/2 DFcc.loc[b, c] = maxcc DFlag.loc[b, c] = sampleLag - return DFcc, DFlag * rev + DFsubsamp.loc[b, c] = subsamp + return DFcc, DFlag * rev, DFsubsamp -def _subSamp(Ceval) : +def _subSamp(Ceval, ind): """ Method to estimate subsample time delays using cosine-fit interpolation Cespedes, I., Huang, Y., Ophir, J. & Spratt, S. Methods for estimation of sub-sample time delays of digitized echo signals. Ultrason. Imaging 17, 142–171 (1995) + + Returns + ------- + The amount the sample should be shifted (float between -.5 and .5) """ - ind = Ceval.argmax() # If max occurs at start or end of CC no extrapolation if ind == 0 or ind == len(Ceval) - 1: - tau=float(ind) + tau = 0.0 else: - alpha = np.arccos((Ceval[ind-1] + Ceval[ind+1])/(2 * Ceval[ind])) - tau=(-(np.arctan((Ceval[ind-1] - Ceval[ind+1]) / - (2 * Ceval[ind] * np.sin(alpha))) / alpha) + ind) - if abs(tau - ind) > 1: - msg = ('subsample failing, more than 1 sample shift predicted') + cb4 = Ceval[ind - 1] + caf = Ceval[ind + 1] + cn = Ceval[ind] + alpha = np.arccos((cb4 + caf)/(2 * cn)) + alsi = np.sin(alpha) + tau = -(np.arctan((cb4 -caf) / (2 * cn * alsi)) / alpha) + if abs(tau) > .5: + msg = ('subsample failing, more than .5 sample shift predicted') detex.log(__name__, msg ,level='Warning', pri=True) return ind return tau -def _CCX2(mpfd1, mpfd2, mptd1, mptd2, Nc1, Nc2, subSamp=False): +def _CCX2(mpfd1, mpfd2, mptd1, mptd2, Nc1, Nc2): """ Function find max correlation coeficient and corresponding lag time between 2 traces. fft should have previously been performed @@ -432,30 +409,29 @@ def _CCX2(mpfd1, mpfd2, mptd1, mptd2, Nc1, Nc2, subSamp=False): if len(mptd1) != len(mptd2) or len(mpfd2) !=len(mpfd1): msg = 'Lengths not equal on multiplexed data, cannot correlate' detex.log(__name__, msg, level='error') - n=len(mptd1) + n = len(mptd1) mptd2Temp = mptd2.copy() mptd2Temp = np.lib.pad(mptd2Temp, (n-1,n-1), 'constant', constant_values=(0,0)) a = pd.rolling_mean(mptd2Temp, n)[n-1:] b = pd.rolling_std(mptd2Temp, n)[n-1:] - b *= np.sqrt((n-1.0) / n) + b *= np.sqrt((n - 1.0) / n) c = np.real(scipy.fftpack.ifft(np.multiply(np.conj(mpfd1), mpfd2))) - c1 = np.concatenate([c[-n+1:],c[:n]]) - result = ((c1 - mptd1.sum() * a) / (n*b*np.std(mptd1)))[Nc-1::Nc] + c1 = np.concatenate([c[-n + 1:], c[:n]]) + result = ((c1 - mptd1.sum() * a) / (n * b * np.std(mptd1)))[Nc - 1::Nc] try: maxcc = np.nanmax(result) + mincc = np.nanmin(result) maxind = np.nanargmax(result) - if maxcc > 1.1: #if a inf is found in array - result[result == np.inf] = 0 + if maxcc > 1. or mincc < -1.: #if a inf is found in array + # this can happen if some of the waveforms have been zeroed out + result[(result > 1) | (result < -1)] = 0 maxcc = np.nanmax(result) maxind = np.nanargmax(result) - except: - return 0.0,0.0 - - #return maxcc,maxind-n +1 - if subSamp: - maxind = _subSamp(result) - return maxcc,(maxind+1)*Nc - n + except ValueError: # if fails skip + return 0.0, 0.0, 0.0 + subsamp = _subSamp(result, maxind) + return maxcc, (maxind + 1) * Nc - n, subsamp def fast_normcorr(t, s): """ @@ -560,6 +536,7 @@ def _makeSSDF(row, minEvents): evelist = row.Clust[ind] evelist.sort() DF['Events'][ind] = evelist + DF['numEvents'][ind] = len(evelist) DF['MPtd'][ind] = _trimDict(row, 'MPtd', evelist) DF['MPfd'][ind] = _trimDict(row, 'MPfd', evelist) DF['Stats'][ind] = _trimDict(row, 'Stats', evelist) @@ -586,7 +563,7 @@ def _loadEvents(fetcher, filt, trim, stakey, temkey, event templates, multiplexed data, obspy traces etc. """ columns = ['Events', 'MPtd', 'MPfd', 'Channels', 'Stats', 'Link', 'Clust', - 'Lags', 'CCs', 'numEvents'] + 'Lags','Subsamp', 'CCs', 'numEvents'] TRDF = pd.DataFrame(columns=columns) stanets = stakey.NETWORK + '.' + stakey.STATION TRDF['Station'] = stanets @@ -611,43 +588,54 @@ def _loadEvents(fetcher, filt, trim, stakey, temkey, TRDF.loc[ind, 'numEvents'] = len(TRDF.loc[ind, 'Events']) # get multiplexed time domain and freq. domain arrays - for key in TRDF.loc[ind, 'Events']: # loop each event - Nc = TRDF.loc[ind, 'Stats'][key]['Nc'] - mp = multiplex(sts[key], Nc) # multiplexed time domain - st = sts[key] # current stream - TRDF.loc[ind, 'MPtd'][key] = mp - stu = st[0].stats.starttime.timestamp #updated starttime - TRDF.loc[ind, 'Stats']['starttime'] = stu - reqlen = 2 * len(TRDF.loc[ind,'MPtd'][key]) #required length - reqlenbits = 2**reqlen.bit_length() # required length fd - mpfd = scipy.fftpack.fft(mp, n=reqlenbits) - TRDF.loc[ind, 'MPfd'][key] = mpfd + TRDF = _getTimeDomainWFs(TRDF, row, ind, sts, eves) + TRDF = _testStreamLengths(TRDF, row, ind) + TRDF = _getFreqDomain(TRDF, row, ind) + TRDF = TRDF[TRDF.Keep] TRDF.sort_values(by='Station', inplace=True) TRDF.reset_index(inplace=True, drop=True) return TRDF -def _testStreamLengths(TRDF): - for ind, row in TRDF.iterrows(): #get lengths - lens = np.array([len(x) for x in row.MPtd.values()]) +def _getTimeDomainWFs(TRDF, row, ind, sts, eves): + for key in eves: # loop each event + Nc = TRDF.loc[ind, 'Stats'][key]['Nc'] + mp = multiplex(sts[key], Nc) # multiplexed time domain + st = sts[key] # current stream + TRDF.loc[ind, 'MPtd'][key] = mp + stu = st[0].stats.starttime.timestamp #updated starttime + TRDF.loc[ind, 'Stats']['starttime'] = stu + return TRDF + +def _getFreqDomain(TRDF, row, ind): + for key in row.Events: # loop each event + mp = TRDF.loc[ind, 'MPtd'][key] + reqlen = 2 * len(mp) #required length + reqlenbits = 2 ** reqlen.bit_length() # required length fd + mpfd = scipy.fftpack.fft(mp, n=reqlenbits) + TRDF.loc[ind, 'MPfd'][key] = mpfd + return TRDF + +def _testStreamLengths(TRDF, row, ind): + lens = np.array([len(x) for x in row.MPtd.values()]) # trim to smallest length if within 90% of median, else kill key le = np.min(lens[lens > np.median(lens)*.9]) - for ind, row in TRDF.iterrows(): - keysToKill = [x for x in row.Events if len(row.MPtd[x]) < le] - # trim events slightly too small if any - for key in row.Events: - trimed = row.MPtd[key][:le] - TRDF.loc[ind, 'MPtd'][key] = trimed - #rest keys on TRDF - tmar = np.array(TRDF.Events[ind]) - tk = [not x in keysToKill for x in TRDF.Events[ind]] - TRDF.Events[ind] = tmar[np.array(tk)] - for key in keysToKill: - msg = ('%s on %s is out of length tolerance, removing' % - (key, row.Station)) - detex.log(__name__, msg, level='warn', pri=True) - TRDF.MPtd[ind].pop(key,None) - TRDF.MPfd[ind].pop(key,None) + + keysToKill = [x for x in row.Events if len(row.MPtd[x]) < le] + # trim events slightly too small if any + for key in row.Events: + trimed = row.MPtd[key][:le] + TRDF.loc[ind, 'MPtd'][key] = trimed + #rest keys on TRDF + tmar = np.array(TRDF.Events[ind]) + tk = [not x in keysToKill for x in TRDF.Events[ind]] + TRDF.Events[ind] = tmar[np.array(tk)] + for key in keysToKill: + msg = ('%s on %s is out of length tolerance, removing' % + (key, row.Station)) + detex.log(__name__, msg, level='warn', pri=True) + TRDF.MPtd[ind].pop(key,None) + return TRDF def _flatNoNan(df): @@ -700,14 +688,15 @@ def _traceEventDendro(dflink, x, lags, CCtoLag, clustDict, clus): cl22 = clustDict[int(a[1].i2)] else: cl22 = clustDict[int(a[1].i1)] - currentLag = CCtoLag[a[1].cc] - for b in cl22: #record and update lags for second cluster - lagSeries[b] += currentLag # + # reference cc to lag samps map, round and cast to sample int + currentLag = int(np.round(CCtoLag[a[1].cc])) + + for b in cl22: #record and update lags for second cluster + lagSeries[b] += currentLag lags = _updateLags(b, lags, len(dflink), currentLag) - CCtoLag = _makeLagSeries(x,lags) + CCtoLag = _makeCC2LagMap(x, lags) return lagSeries - def _updateLags(evenum, lags, N, currentLag): """ function to add current lag shifts to effected @@ -722,16 +711,16 @@ def _updateLags(evenum, lags, N, currentLag): return lags def _getDow(N, evenum): - dow = [0]*evenum + dow = [0] * evenum if len(dow) > 0: for a in range(len(dow)): - dow[a] = _triangular(N-1)-1+evenum-_triangular(N-1-a) + dow[a] = _triangular(N-1)-1 + evenum-_triangular(N-1-a) return dow def _getAcr(N, evenum): acr = [0]*(N-evenum) if len(acr) > 0: - acr[0] = _triangular(N)-_triangular(N-(evenum)) + acr[0] = _triangular(N) - _triangular(N-(evenum)) for a in range(1, len(acr)): acr[a] = acr[a-1]+1 return acr @@ -741,7 +730,6 @@ def _triangular(n): calculate sum of triangle with base N, see http://en.wikipedia.org/wiki/Triangular_number """ - return sum(range(n+1)) def _getClustDict(linkup,N): @@ -787,7 +775,7 @@ def _ensureUnique(cx, cxdf): cxdf.values[a, sindex:] = cx[tri1 - tri2 , tri1 - tri3] return se.values,cxdf -def _makeLagSeries(x, lags): +def _makeCC2LagMap(x, lags): LS = pd.Series(lags, index=x) return LS @@ -819,7 +807,7 @@ def _loadStream(fetcher, filt, trim, decimate, station, dtype, Nc = detex.util.get_number_channels(st) #get number of channels if Nc != len(st): msg = ('%s on %s is fractured or channels are missing, consider ' - 'setting fillZeros to True in ClusterStream to try and ' + 'setting fillZeros to True in ClusterStream to try to ' 'make it usable, skipping') % (evename, station) detex.log(__name__, msg, pri=True) continue @@ -972,18 +960,20 @@ def _mergeChannelsFill(st): st.merge(fill_value=0.0) return st -def _checkClusterInputs(filt, dtype, trim): +def _checkClusterInputs(filt, dtype, trim, decimate): """ Check a few key input parameters to make sure everything is kosher """ if filt is not None and len(filt) != 4: # check filt msg = 'filt must either be None (no filter) or a len 4 list or tuple' detex.log(__name__, msg, level='error') + if dtype != 'double' and dtype != 'single': # check dtype msg = ('dype must be either "double" or "single" not %s, setting to \ double' % dtype) dtype = 'double' detex.log(__name__, msg, level='warn', pri=True) + if trim is not None: # check trim if len(trim) != 2: msg = 'Trim must be a list or tuple of length 2' @@ -991,4 +981,14 @@ def _checkClusterInputs(filt, dtype, trim): else: if -trim[0] > trim[1]: msg = 'Invalid trim parameters' - detex.log(__name__, msg, level='error') \ No newline at end of file + detex.log(__name__, msg, level='error') + + if decimate is not None: + if not isinstance(decimate, int): + msg = 'decimate must be an int' + detex.log(__name__, msg, level='error', e=TypeError) + + + + + \ No newline at end of file diff --git a/detex/getdata.py b/detex/getdata.py index 68a690f..789cb44 100644 --- a/detex/getdata.py +++ b/detex/getdata.py @@ -9,8 +9,10 @@ import json import random +#client imports import obspy.fdsn import obspy.neic +import obspy.earthworm conDirDefault = 'ContinuousWaveForms' eveDirDefault = 'EventWaveForms' @@ -37,7 +39,8 @@ def quickFetch(fetch_arg, **kwargs): data directory make sure it does not share names with a valid DataFetcher method - kwargs are passed to the DataFetcher, see DataFetcher docs for details + kwargs are passed to the DataFetcher class, see DataFetcher + docs for details Returns ------- @@ -77,6 +80,7 @@ def makeDataDirectories(template_key='TemplateKey.csv', timeAfterOrigin=4 * 60, conDir=conDirDefault, secBuf=120, + conDatDuration=3600, multiPro=False, getContinuous=True, getTemplates=True, @@ -117,6 +121,8 @@ def makeDataDirectories(template_key='TemplateKey.csv', The number of seconds to download after each hour of continuous data. This might be non-zero in order to capture some detections that would normally be overlooked if data did not overlap somewhat. + conDatDuration : real number (int, float, etc.) + The duration of the continuous data to download in seconds. multiPro : bool If True fork several processes to get data at once, potentially much faster but a bit inconsiderate on the server hosting the data @@ -170,7 +176,8 @@ def makeDataDirectories(template_key='TemplateKey.csv', if getContinuous: msg = 'Getting continuous data' detex.log(__name__, msg, level='info', pri=True) - _getConData(fetcher, stakey, conDir, secBuf, opType, formatOut) + _getConData(fetcher, stakey, conDir, secBuf, opType, formatOut, + duration=conDatDuration) ## Log finish msg = "finished makeDataDirectories call" @@ -193,12 +200,14 @@ def _getTemData(temkey, stakey, temDir, formatOut, fetcher, tb4, taft): if not os.path.exists(os.path.join(temDir,'.index.db')): indexDirectory(temDir) -def _getConData(fetcher, stakey, conDir, secBuf, opType, formatOut): +def _getConData(fetcher, stakey, conDir, secBuf, opType, formatOut, + duration=3600): streamGenerator = fetcher.getConData(stakey, secBuf, returnName=True, conDir=conDir, - skipIfExists=True) + skipIfExists=True, + duration=duration) for st, path, fname in streamGenerator: if st is not None: #if data were returned if not os.path.exists(path): @@ -211,7 +220,7 @@ def _getConData(fetcher, stakey, conDir, secBuf, opType, formatOut): class DataFetcher(object): """ \n - Class to handle data aquisition + Class to handle data acquisition Parameters ---------- @@ -221,15 +230,15 @@ class DataFetcher(object): "dir" : A data directory as created by makeDataDirectories "client" : an obspy client can be passed to get data useful if using an in-network database - "iris" : an iris client is initiated + "iris" : an iris client is initiated, also uses IRIS for inventory "uuss" : A client attached to the university of utah - seismograph stations is initated using CWB for waveforms + seismograph stations is initiated using CWB for waveforms and IRIS is used for station inventories client : An obspy client object Client object used to get data, from obspy.fdsn, obspy.neic etc. removeResponse : bool - If true remove response before returning stream - inventoryClient : An obspy client object + If True remove response before returning stream. + inventoryArg : None, obspy client object, or obspy Inventory object A seperate client for station inventories, only used if removeResponse == True, also supports keyword "iris" for iris client directoryName : str @@ -244,7 +253,7 @@ class DataFetcher(object): conDatDuration : int or float Duration for continuous data in seconds conBuff : int or float - The amount of data, in seconds, to donwnload at the end of the + The amount of data, in seconds, to download at the end of the conDatDuration. Ideally should be equal to template length, important in order to avoid missing potential events at the end of a stream timeBeforeOrigin : int or float @@ -255,29 +264,36 @@ class DataFetcher(object): If True apply some data checks before returning streams, can be useful for older data sets. fillZeros : bool - If True fill data that arent avaliable with 0s (provided some data are - avaliable) + If True fill data that are not available with 0s (provided some data are + available) """ supMethods = ['dir', 'client', 'uuss', 'iris'] - def __init__(self, method, client=None, removeResponse=False, - inventoryClient=None, directoryName=None, opType='VEL', + def __init__(self, method, client=None, removeResponse=True, + inventoryArg=None, directoryName=None, opType='VEL', prefilt=[.05, .1, 15, 20], conDatDuration=3600, conBuff=120, timeBeforeOrigin=1*60, timeAfterOrigin=4*60, checkData=True, fillZeros=False): self.__dict__.update(locals()) # Instantiate all inputs + self.inventory = _getInventory(inventoryArg) self._checkInputs() + if self.removeResponse and self.inventory is None: + if self.method == 'dir': + msg = ('Cannot remove response without a valid inventoryArg, ' + 'setting removeResponse to False') + detex.log(__name__, msg, level='warning', pri=True) + self.removeResponse = False def _checkInputs(self): if not isinstance (self.method, str): - msg = 'method must be a string. options are:\n %s' % supMethods + msg = 'method must be a string. options:\n %s' % self.supMethods detex.log(__name__, msg, level='error') self.method = self.method.lower() # parameter to lowercase if not self.method in DataFetcher.supMethods: msg = ('method %s not supported. Options are:\n %s' % - (self.method, supMethods)) + (self.method, self.supMethods)) detex.log(__name__, msg, level='error') if self.method == 'dir': @@ -291,25 +307,21 @@ def _checkInputs(self): else: self.directory = dirPath[0] self._getStream = _loadDirectoryData - if self.removeResponse: - msg = ('method %s does not support remove response, the ' - 'response should have been removed on the' - 'detex.getdata.makeDataDirectory call') % self.method elif self.method == "client": if self.client is None: msg = 'Method %s requires a valid obspy client' % self.method detex.log(__name__, msg, level='error') - self._getStream = _loadFromClient + self._getStream = _assignClientFunction(self.client) elif self.method == "iris": self.client = obspy.fdsn.Client("IRIS") - self._getStream = _loadFromClient + self._getStream = _assignClientFunction(self.client) elif self.method == 'uuss': # uuss setting - self.client = obspy.neic.Client(u'128.110.129.227') - self._getStream = _loadFromClient - self.inventoryClient = obspy.fdsn.Client('iris') # use iris + self.client = obspy.neic.Client('128.110.129.227') + self._getStream = _assignClientFunction(self.client) + self.inventory = obspy.fdsn.Client('iris') # use iris for resps def getTemData(self, temkey, stakey, tb4=None, taft=None, returnName=True, temDir=None, skipIfExists=False, skipDict=None, @@ -353,6 +365,9 @@ def getTemData(self, temkey, stakey, tb4=None, taft=None, returnName=True, taft = self.timeAfterOrigin if skipDict is not None and len(skipDict.keys()) < 1: skipDict = None + stakey = detex.util.readKey(stakey, key_type='station') + temkey = detex.util.readKey(temkey, key_type='template') + indexiter = itertools.product(stakey.index, temkey.index) #iter through each station/event pair and fetch data for stain, temin in indexiter: @@ -482,7 +497,7 @@ def getStream(self, start, end, net, sta, chan='???', loc='??'): Network code, usually 2 letters sta : str Station code - chan : str or list of str (supports wildcard) + chan : str or list of str (should support wildcard) Channels to fetch loc : str Location code for station @@ -495,6 +510,15 @@ def getStream(self, start, end, net, sta, chan='???', loc='??'): # make sure start and end are UTCDateTimes start = obspy.UTCDateTime(start) end = obspy.UTCDateTime(end) + + # check that chan input is ok + if not isinstance(chan, (list, tuple)): + if not isinstance(chan, str): + msg = 'chan must be a string or list of strings' + detex.log(__name__, msg, level='error') + chan = [chan] + + # fetch stream st = self._getStream(self, start, end, net, sta, chan, loc) # perform checks if required @@ -505,16 +529,17 @@ def getStream(self, start, end, net, sta, chan='???', loc='??'): if st is None or len(st) < 1: return None - + # attach response + if self.removeResponse and self.inventory is not None: + if not _hasResponse(st): + st = _attachResponse(self, st, start, end, net, sta, loc, chan) + # remove response if self.removeResponse: - netsta = net + '.' + sta - name = obspy.UTCDateTime(start).formatIRISWebService() - try: - _removeInstrumentResposne(st, self.prefilt, self.opType) - except: - msg = 'Remove response failed for %s on %s' % (name, netsta) - detex.log(__name__, msg, level='warning') + st = _removeInstrumentResponse(self, st) + + if st is None: # return None if response removal failed + return None # trims and zero fills st.trim(starttime=start, endtime=end) @@ -525,7 +550,6 @@ def getStream(self, start, end, net, sta, chan='???', loc='??'): if self.fillZeros: st.trim(starttime=start, endtime=end, pad=True, fill_value=0.0) st.merge(1, fill_value=0.0) - #nc = len(set([x.stats.channel for x in st])) return st ########## Functions for loading data based on selected methods ########### @@ -547,18 +571,35 @@ def _loadDirectoryData(fet, start, end, net, sta, chan, loc): t2p = obspy.UTCDateTime(t2) msg = 'data from %s to %s on %s not found in %s' % (t1p, t2p, sta, fet.directoryName) - detex.log(__name__, msg, level='warning', pri=True) + detex.log(__name__, msg, level='warning', pri=False) + return None + # define conditions in which condata should not be loaded + # con1 and con2 - No overlap (other than 10%) + tra = t2 - t1 # time range + con1 = ((dfind.Starttime <= t1) & (dfind.Endtime -tra * .1 < t1) & + (dfind.Starttime < t2) & (dfind.Endtime < t2)) + con2 = ((dfind.Starttime > t1) & (dfind.Endtime > t1) & + (dfind.Starttime + tra*.1 > t2) & (dfind.Endtime >= t2)) + df = dfind[~(con1 | con2)] +# cst = dfind.Starttime <= t1 +# cet = dfind.Endtime >= t2 +# cstam = cst.argmin() +# cetam = cet.argmax() +# +# ind1 = cstam - 1 if cstam > 0 else 0 +# ind2 = cetam + 1 +# df = dfind[ind1:ind2] + if len(df) < 1: + t1p = obspy.UTCDateTime(t1) + t2p = obspy.UTCDateTime(t2) + msg = 'data from %s to %s on %s not found in %s' % (t1p, t2p, sta, + fet.directoryName) + detex.log(__name__, msg, level='warning', pri=False) return None - cst = dfind.Starttime <= t1 - cet = dfind.Endtime >= t2 - cstam = cst.argmin() - cetam = cet.argmax() - - ind1 = cstam - 1 if cstam > 0 else 0 - ind2 = cetam + 1 - df = dfind[ind1:ind2] st = obspy.core.Stream() + if len(df.Path) < 1: + detex.deb([df, dfind]) for path, fname in zip(df.Path, df.FileName): fil = os.path.join(path, fname) try: @@ -574,65 +615,130 @@ def _loadDirectoryData(fet, start, end, net, sta, chan, loc): stout = obspy.core.Stream() for cha in chan: stout += st.select(channel=cha) + loc = '*' if loc in ['???','??'] else loc # convert ? to * - st = st.select(location=loc) + stout = stout.select(location=loc) + return stout + +def _assignClientFunction(client): + """ + function to take an obspy client FDSN, NEIC, EW, etc. return the + correct loadFromClient function for getting data. + """ + if isinstance(client, obspy.fdsn.Client): + return _loadFromFDSN + elif isinstance(client, obspy.neic.Client): + return _loadFromNEIC + elif isinstance(client, obspy.earthworm.Client): + return _loadFromEarthworm + else: + msg = 'Client type not supported' + detex.log(__name__, msg, level='error') + +## load from client functions, this is needed because the APIs are not the same + +def _loadFromNEIC(fet, start, end, net, sta, chan, loc): + """ + Use obspy.neic.Client to fetch waveforms + """ + client = fet.client + # str reps of utc objects for error messages + startstr = start.formatIRISWebService() + endstr = end.formatIRISWebService() + st = obspy.Stream() + for cha in chan: + try: #try neic client + st += client.getWaveform(net, sta, loc, cha, start, end) + except: + msg = ('Could not fetch data on %s from %s to %s' % + (net + '.' + sta, startstr, endstr)) + detex.log(__name__, msg, level='warning', pri=False) return st -def _loadFromClient(fet, start, end, net, sta, chan, loc): +def _loadFromEarthworm(fet, start, end, net, sta, chan, loc): + client = fet.client + startstr = start.formatIRISWebService() + endstr = end.formatIRISWebService() + st = obspy.Stream() + for cha in chan: + try: #try neic client + st += client.getWaveform(net, sta, loc, cha, start, end) + except: + msg = ('Could not fetch data on %s from %s to %s' % + (net + '.' + sta, startstr, endstr)) + detex.log(__name__, msg, level='warning', pri=False) + return st + +def _loadFromFDSN(fet, start, end, net, sta, chan, loc): """ - Use obspy client to fetch waveforms + Use obspy.fdsn.Client to fetch waveforms """ client = fet.client - invClient = fet.inventoryClient # str reps of utc objects for error messages startstr = start.formatIRISWebService() endstr = end.formatIRISWebService() - try: # try to fetch data - try: - st = client.get_waveforms(net, sta, loc, ','.join(chan), start, - end, attach_response=fet.removeResponse) - except AttributeError: # if not fdsn client - try: #try neic client - st = client.getWaveform(net, sta, loc, chan, start, end) - except: - msg = ('Client type not understood') - detex.log(__name__, msg, level='error') - if fet.removeResponse: - inv = invClient.get_stations(starttime=start, - endtime=end, - network=net, - station=sta, - loc=loc, - channel=chan, - level="response") - st.attach_response(inv) + # convert channels to correct format (list seperated by ,) + if not isinstance(chan, str): + chan = ','.join(chan) + else: + if '-' in chan: + chan = ','.join(chan.split('-')) + # try to get waveforms, else return None + try: + st = client.get_waveforms(net, sta, loc, chan, start, end, + attach_response=fet.removeResponse) except: msg = ('Could not fetch data on %s from %s to %s' % - (net+'.'+sta, startstr, endstr)) - detex.log(__name__, msg, level='warning', pri=True) + (net + '.' + sta, startstr, endstr)) + detex.log(__name__, msg, level='warning', pri=False) st = None return st - + + + + + ########## MISC functions ############# +def _attachResponse(fet, st, start, end, net, sta, loc, chan): + """ + Function to attach response from inventory or client + """ + if not fet.removeResponse or fet.inventory is None: + return st + if isinstance(fet.inventory, obspy.station.inventory.Inventory): + st.attach(fet.inventory) + else: + inv = fet.inventory.get_stations(starttime=start, + endtime=end, + network=net, + station=sta, + loc=loc, + channel=chan, + level="response") + st.attach(inv) + return st - -def _progress_bar(total, fileMoveCount): - global countSoFar - countSoFar += 1 - width = 25 - percent = float((float(countSoFar)/float(total))*100.0) - completed=int(percent) - totalLeft=100 - completedAmount = int(completed/(float(totalLeft)/float(width))) - spaceAmount = int((float(totalLeft)-float(completed))/ - (float(totalLeft)/float(width))) - - for i in xrange(width): - sys.stdout.write("\r[" + "=" * completedAmount + " " * spaceAmount + - "]" + str(round(float(percent), 2)) + "%" + " " + str(countSoFar) + - "/" + str(total) + " Bad Files:" + str(fileMoveCount) + " ") - sys.stdout.flush() +def _getInventory(invArg): + """ + Take a string, Obspy client, or inventory object and return inventory + object used to attach responses to stream objects for response removal + """ + if isinstance(invArg, obspy.station.inventory.Inventory): + return invArg + elif isinstance (invArg, obspy.fdsn.Client): + return invArg + elif isinstance(invArg, str): + if invArg.lower() == 'iris': + invArg = obspy.fdsn.Client('IRIS') + elif not os.path.exists(invArg): + msg = ('if inventoryArg is str then it must be a client name, ie ' + 'IRIS, or a path to a station xml') + detex.log(__name__, msg, level='error') + else: + return detex.read_inventory(invArg) + elif invArg is None: + return None def _dataCheck(st, start, end): @@ -655,7 +761,8 @@ def _dataCheck(st, start, end): tr.stats.sampling_rate = np.round(tr.stats.sampling_rate) msg = ('Found non-int sampling_rates, rounded to nearest \ int on %s around %s' % (netsta, time)) - detex.log(__name__, msg, level='info') + detex.log(__name__, msg, level='warning') + if any([not np.any(x.data) for x in st]): msg = ('At least one channel is all 0s on %s around %s, skipping' % (netsta, time)) @@ -669,16 +776,20 @@ def _hasResponse(st): """ return all([hasattr(tr.stats, 'response') for tr in st]) -def _removeInstrumentResposne(st, prefilt, opType): +def _removeInstrumentResponse(fet, st): + if not fet.removeResponse: # pass stream back if no response removal + return st st.detrend('linear') # detrend st = _fftprep(st) try: - st.remove_response(output=opType, pre_filt=prefilt) + st.remove_response(output=fet.opType, pre_filt=fet.prefilt) except: - msg = 'RemoveResponse Failed for %s,%s, not saving' % ( - st[0].stats.network, st[0].stats.station) - detex.log(__name__, msg, level='warning') - st = False + utc1 = str(st[0].stats.starttime).split('.')[0] + utc2 = str(st[0].stats.endtime).split('.')[0] + msg = 'RemoveResponse Failed for %s,%s, from %s to %s, skipping' % ( + st[0].stats.network, st[0].stats.station, utc1, utc2) + detex.log(__name__, msg, level='warning', pri=True) + st = None return st @@ -831,15 +942,10 @@ def _checkQuality(stPath): duration = endtime - starttime nc = len(list(set([x.stats.channel for x in st]))) netsta = st[0].stats.network + '.' + st[0].stats.station - if len(gaps) > 0: - hasGaps = True - else: - hasGaps = False outDict = {'Gaps': gapsum, 'Starttime' : starttime, 'Endtime' : endtime, 'Duration' : duration, 'Nc' : nc, 'Nt':lengthStream, 'Station' : netsta} return outDict - def _loadIndexDb(dirPath, station, t1, t2): indexFile = glob.glob(os.path.join(dirPath, '.index.db')) @@ -872,20 +978,4 @@ def _associatePathList(pathList, dfin): return os.path.join(*pat) getAllData = makeDataDirectories - - - - - - - - - - - - - - - - - + \ No newline at end of file diff --git a/detex/results.py b/detex/results.py index bdca5f8..f2dd494 100644 --- a/detex/results.py +++ b/detex/results.py @@ -164,29 +164,34 @@ def _makePfKey(ss_info, sg_info, Pf): """ if not Pf: # if no Pf value passed simply return none return None, None + + ss_df = pd.DataFrame(columns=['Sta', 'Name', 'DS', 'betadist']) + sg_df = pd.DataFrame(columns=['Sta', 'Name', 'DS', 'betadist']) + if isinstance(ss_info, pd.DataFrame): + for num, row in ss_info.iterrows(): + TH = scipy.stats.beta.isf(Pf, row.beta1, row.beta2, 0, 1) + # if isf gives unrealistic pf, initiated forward grid serach + if TH > .94: + TH, Pftemp = _approximateThreshold( + row.beta1, row.beta2, Pf, 1000, 3) + ss_df.loc[len(ss_df)] = [row.Sta, row.Name, + TH, [row.beta1, row.beta2, 0, 1]] + ss_df.reset_index(drop=True, inplace=True) else: - ss_df = pd.DataFrame(columns=['Sta', 'Name', 'DS', 'betadist']) - sg_df = pd.DataFrame(columns=['Sta', 'Name', 'DS', 'betadist']) - if isinstance(ss_info, pd.DataFrame): - for num, row in ss_info.iterrows(): - TH = scipy.stats.beta.isf(Pf, row.beta1, row.beta2, 0, 1) - # if isf gives unrealistic pf, initiated forward grid serach - if TH > .94: - TH, Pftemp = _approximateThreshold( - row.beta1, row.beta2, Pf, 1000, 3) - ss_df.loc[len(ss_df)] = [row.Sta, row.Name, - TH, [row.beta1, row.beta2, 0, 1]] - if isinstance(sg_info, pd.DataFrame): - for num, row in sg_info.iterrows(): - TH = scipy.stats.beta.isf(Pf, row.beta1, row.beta2, 0, 1) - if TH > .94: - TH, Pftemp = _approximateThreshold( - row.beta1, row.beta2, Pf, 1000, 3) - sg_df.loc[len(sg_df)] = [row.Sta, row.Name, - TH, [row.beta1, row.beta2, 0, 1]] + ss_df = None + + if isinstance(sg_info, pd.DataFrame): + for num, row in sg_info.iterrows(): + TH = scipy.stats.beta.isf(Pf, row.beta1, row.beta2, 0, 1) + if TH > .94: + TH, Pftemp = _approximateThreshold( + row.beta1, row.beta2, Pf, 1000, 3) + sg_df.loc[len(sg_df)] = [row.Sta, row.Name, + TH, [row.beta1, row.beta2, 0, 1]] sg_df.reset_index(drop=True, inplace=True) - ss_df.reset_index(drop=True, inplace=True) - return ss_df, sg_df + else: + sg_df = None + return ss_df, sg_df def _approximateThreshold(beta_a, beta_b, target, numintervals, numloops): diff --git a/detex/subspace.py b/detex/subspace.py index a380bdd..b87f973 100644 --- a/detex/subspace.py +++ b/detex/subspace.py @@ -40,10 +40,10 @@ class ClusterStream(object): detex.construct.createCluster """ - def __init__(self, trdf, temkey, stakey, fetcher, eventList, CCreq, filt, + def __init__(self, trdf, temkey, stakey, fetcher, eventList, ccReq, filt, decimate, trim, fileName, eventsOnAllStations, enforceOrigin): self.__dict__.update(locals()) # Instantiate all input variables - self.CCreq = None # set to None because it can vary between stations + self.ccReq = None # set to None because it can vary between stations self.clusters = [0] * len(trdf) self.stalist = trdf.Station.values.tolist() # station lists self.stalist2 = [x.split('.')[1] for x in self.stalist] @@ -55,7 +55,7 @@ def __init__(self, trdf, temkey, stakey, fetcher, eventList, CCreq, filt, else: evlist = eventList self.clusters[num] = Cluster(self, row.Station, temkey, evlist, - row.Link, CCreq, filt, decimate, trim, + row.Link, ccReq, filt, decimate, trim, row.CCs) def writeSimpleHypoDDInput(self, fileName='dt.cc', coef=1, minCC=.35): @@ -132,9 +132,11 @@ def writeSimpleHypoDDInput(self, fileName='dt.cc', coef=1, minCC=.35): if cc < minCC: continue lagsamps = trdf.Lags[ind2][ind1] + subsamps = trdf.Subsamp[ind2][ind1] if np.isnan(lagsamps): # if lag from other end of mat lagsamps = -trdf.Lags[ind1][ind2] - lags = lagsamps / (sr * Nc) + subsamps = trdf.Subsamp[ind1][ind2] + lags = lagsamps / (sr * Nc) + subsamps obsline = self._makeObsLine(sta, lags, cc**coef) if isinstance(obsline, str): count += 1 @@ -193,12 +195,12 @@ def printAtr(self): # print out basic attributes used to make cluster for cl in self.clusters: cl.printAtr() - def dendro(self): + def dendro(self, **kwargs): """ Create dendrograms for each station """ for cl in self.clusters: - cl.dendro() + cl.dendro(**kwargs) def simMatrix(self, groupClusts=False, savename=False, returnMat=False, **kwargs): @@ -222,7 +224,7 @@ def simMatrix(self, groupClusts=False, savename=False, returnMat=False, dout = cl.simMatrix(groupClusts, savename, returnMat, **kwargs) out.append(dout) - def plotEvents(self, projection='merc', plotNonClusts=True): + def plotEvents(self, projection='merc', plotNonClusts=True, **kwargs): """ Plot the event locations for each station @@ -234,7 +236,7 @@ def plotEvents(self, projection='merc', plotNonClusts=True): If true also plot the singletons (events that dont cluster) """ for cl in self.clusters: - cl.plotEvents(projection=projection, plotNonClusts=plotNonClusts) + cl.plotEvents(projection, plotNonClusts, **kwargs) def write(self): # uses pickle to write class to disk """ @@ -267,7 +269,7 @@ def __repr__(self): class Cluster(object): - def __init__(self, clustStream, station, temkey, eventList, link, CCreq, + def __init__(self, clustStream, station, temkey, eventList, link, ccReq, filt, decimate, trim, DFcc): # instantiate a few needed varaibles (not all to save space) @@ -276,25 +278,25 @@ def __init__(self, clustStream, station, temkey, eventList, link, CCreq, self.station = station self.temkey = temkey self.key = eventList - self.updateReqCC(CCreq) + self.updateReqCC(ccReq) self.nonClustColor = '0.6' # use a grey of 0.6 for singletons - def updateReqCC(self, newCCreq): + def updateReqCC(self, newccReq): """ Function to update the required correlation coeficient for this station Parameters ------------- - newCCreq : float (between 0 and 1) + newccReq : float (between 0 and 1) Required correlation coef """ - if newCCreq < 0. or newCCreq > 1.: - msg = 'Parameter CCreq must be between 0 and 1' + if newccReq < 0. or newccReq > 1.: + msg = 'Parameter ccReq must be between 0 and 1' detex.log(__name__, msg, level='error') - self.CCreq = newCCreq + self.ccReq = newccReq self.dflink, serclus = self._makeDFLINK(truncate=False) # get events that actually cluster (filter out singletons) - dfcl = self.dflink[self.dflink.disSim <= 1 -self.CCreq] + dfcl = self.dflink[self.dflink.disSim <= 1 -self.ccReq] # sort putting highest links in cluster on top dfcl.sort_values(by='disSim', inplace=True, ascending=False) dfcl.reset_index(inplace=True, drop=True) @@ -318,8 +320,8 @@ def updateReqCC(self, newCCreq): self.singles = list(keyset.difference(clustset)) self.clustcount = np.sum([len(x) for x in self.clusts]) self.clustColors = self._getColors(len(self.clusts)) - msg = ('CCreq for station %s updated to CCreq=%1.3f' % - (self.station, newCCreq)) + msg = ('ccReq for station %s updated to ccReq=%1.3f' % + (self.station, newccReq)) detex.log(__name__,msg ,level='info', pri=True) def _getColors(self, numClusts): @@ -364,10 +366,10 @@ def _makeDFLINK(self, truncate=True): # make the link dataframe # append cluster numbers to link array link = np.append(self.link, np.arange(N+1,N+N+1).reshape(N, 1), 1) if truncate: # truncate after required coeficient - linkup = link[link[:, 2] <= 1 - self.CCreq] + linkup = link[link[:, 2] <= 1 - self.ccReq] else: linkup = link - T = fcluster(link[:, 0:4], 1 - self.CCreq, criterion='distance') + T = fcluster(link[:, 0:4], 1 - self.ccReq, criterion='distance') serclus = pd.Series(T) clusdict = pd.Series([np.array([x]) for x in np.arange( @@ -380,7 +382,7 @@ def _makeDFLINK(self, truncate=True): # make the link dataframe if len(dflink) > 0: dflink['II'] = list else: - msg = 'No events cluster with corr coef = %1.3f' % self.CCreq + msg = 'No events cluster with corr coef = %1.3f' % self.ccReq detex.log(__name__, msg, level='info', pri=True) for a in dflink.iterrows(): # enumerate cluster contents ar1 = list(np.array(clusdict[int(a[1].i1)])) @@ -416,7 +418,7 @@ def dendro(self, hideEventLabels=True, show=True, saveName=False, for a in range(len(self.clusts)): plt.plot([], [], '-', color=self.clustColors[a]) plt.plot([], [], '-', color=self.nonClustColor) - dendrogram(self.link, color_threshold=1 - self.CCreq, count_sort=True, + dendrogram(self.link, color_threshold=1 - self.ccReq, count_sort=True, link_color_func=lambda x: color_list[x], **kwargs) ax = plt.gca() if legend: @@ -448,7 +450,6 @@ def plotEvents(self, projection='merc', plotNonClusts=True): detex.log(__name__, msg, level='error', e=ImportError) # TODO make dot size scale with magnitudes - self.dendro() plt.figure() # plt.subplot(1,3,1) @@ -555,10 +556,9 @@ def simMatrix(self, groupClusts=False, savename=False, returnMat=False, if groupClusts: # if grouping clusters together clusts = copy.deepcopy(self.clusts) # get cluster list clusts.append(self.singles) # add singles list at end - eveOrder = list(itertools.chain.from_iterable( - clusts)) # Flatten cluster list + eveOrder = list(itertools.chain.from_iterable(clusts)) indmask = { - num: self.key.index(eve) for num, + num: list(self.key).index(eve) for num, eve in enumerate(eveOrder)} # create a mask forthe order else: # blank index mask if not @@ -605,7 +605,7 @@ def printAtr(self): # print out basic attributes used to make cluster print ('%d Events cluster out of %d' % (self.clustcount, len(self.singles) + self.clustcount)) print('Total number of clusters = %d' % len(self.clusts)) - print ('Required Cross Correlation Coeficient = %.3f' % self.CCreq) + print ('Required Cross Correlation Coeficient = %.3f' % self.ccReq) def __getitem__(self, index): # allow indexing return self.clusts[index] @@ -745,8 +745,8 @@ def SVD(self, selectCriteria=2, selectValue=0.9, conDatNum=100, self.subspaces[station].FracEnergy[ind] = fracEnergy self.subspaces[station].UsedSVDKeys[ind] = usedBasis self.subspaces[station].SVDdefined[ind] = True - self.subspaces[station].NumBasis[ind] = len( - self.subspaces[station].UsedSVDKeys[ind]) + numBas = len(self.subspaces[station].UsedSVDKeys[ind]) + self.subspaces[station].NumBasis[ind] = numBas if len(self.ssStations) > 0: self._setThresholds(selectCriteria, selectValue, conDatNum, threshold, basisLength, kwargs) @@ -807,24 +807,25 @@ def _getFracEnergy(self, ind, row, svdDict, U): calculates the % energy capture for each stubspace for each possible dimension of rep. (up to # of events that go into the subspace) """ + #detex.deb([self, ind, row, svdDict, U]) fracDict = {} keys = row.Events svales = svdDict.keys() svales.sort(reverse=True) stkeys = row.SampleTrims.keys() # dict defining sample trims for key in keys: - aliTD = row.AlignedTD[key] # aligned waveform for event "key" + aliTD = row.AlignedTD[key] # aligned waveform for event key if 'Starttime' in stkeys and 'Endtime' in stkeys: start = row.SampleTrims['Starttime'] # start of trim in samps end = row.SampleTrims['Endtime'] # end of trim in samps - aliwf = aliTD[start:end] + aliwf = aliTD[start : end] else: aliwf = aliTD Ut = np.transpose(U) # transpose of basis vects # normalized dot product (mat. mult.) normUtAliwf = scipy.dot(Ut, aliwf) / scipy.linalg.norm(aliwf) # add 0% energy capture for dim of 0 - repvect = np.insert(normUtAliwf, 0 , 0) + repvect = np.insert(np.square(normUtAliwf), 0 , 0) # cumul. energy captured for increasing dim. reps cumrepvect = [np.sum(repvect[:x+1]) for x in range(len(repvect))] fracDict[key] = cumrepvect # add cumul. to keys diff --git a/detex/util.py b/detex/util.py index b048dae..ce9045b 100644 --- a/detex/util.py +++ b/detex/util.py @@ -64,7 +64,7 @@ def writeKMLFromTemplateKey(df='TemplateKey.csv', outname='templates.kml'): def writeKMLFromStationKey(df='StationKey.csv', outname='stations.kml'): """ - Write a KML file from a tempalteKey + Write a KML file from a station key Parameters ------------- @@ -824,7 +824,12 @@ def loadSQLite(corDB, tableName, sql=None, readExcpetion=False, silent=True, if sql is None: sql = 'SELECT %s FROM %s' % ('*', tableName) with connect(corDB, detect_types=PARSE_DECLTYPES) as con: - df = psql.read_sql(sql, con) + try: + df = psql.read_sql(sql, con) + except pd.io.sql.DatabaseError: + msg = "Table %s not found in %s" % (tableName, corDB) + detex.log(__name__, msg, level='warning', pri=True) + return None if convertNumeric: for item, ser in df.iteritems(): try: @@ -887,7 +892,19 @@ def readLog(logpath='detex_log.log'): df = pd.read_table(logpath, names=['Time','Mod','Level','Msg']) return df - +def templateKey2Catalog(temkey): + """ + Function to create an obspy.Catalog instance from a template key + """ + temkey = readKey(temkey, 'template') + cat = obspy.Catalog() + for ind, row in temkey.iterrows(): + orig = obspy.core.event.Origin(time=obspy.UTCDateTime(row.TIME), longitude=row.LON, latitude=row.LAT, depth=row.DEPTH) + mag = obspy.core.event.Magnitude(mag = row.MAG) + eve = obspy.core.event.Event(origins=[orig], magnitudes=[mag]) + cat.append(eve) + return cat + ###################### Data processing functions ########################### def get_number_channels(st): diff --git a/setup.py b/setup.py index 7f98cc4..7ec50a9 100755 --- a/setup.py +++ b/setup.py @@ -6,38 +6,30 @@ """ from setuptools import setup, find_packages -from codecs import open -from os import path +#from codecs import open +#from os import path -here = path.abspath(path.dirname(__file__)) - -# Get the long description from the relevant file -#with open(path.join(here, 'DESCRIPTION.rst'), encoding='utf-8') as f: -# long_description = f.read() setup( name='detex', - # Versions should comply with PEP440. For a discussion on single-sourcing - # the version across setup.py and the project code, see - # https://packaging.python.org/en/latest/single_source_version.html - version='1.0.4', + version = '1.0.5', - description='A package for performing subspace and correlation detections on seismic data', + description = 'A package for performing subspace and correlation detections on seismic data', + # The project's main homepage. - url='https://github.com/DerrickChambers/detex', + url = 'https://github.com/d-chambers/detex', # Author details - author='Derrick Chambers', - author_email='derrick.chambers@comcast.net', + author = 'Derrick Chambers', + author_email = 'djachambeador@gmail.com', - # Choose your license - license='MIT', + # Liscense + license = 'MIT', - # See https://pypi.python.org/pypi?%3Aaction=list_classifiers - classifiers=[ + classifiers = [ - 'Development Status :: 3 - Alpha', + 'Development Status :: 4 - Beta', 'Intended Audience :: Geo-scientists', 'Topic :: Earthquake detections', 'License :: OSI Approved :: MIT License', @@ -45,7 +37,9 @@ ], # What does your project relate to? - keywords='seismology signal detection', - packages=find_packages(exclude=['contrib', 'docs', 'tests*']), - install_requires=['peppercorn','obspy','basemap','numpy','pandas >= 0.17.0','scipy','matplotlib','joblib','multiprocessing','glob2'], + keywords = 'seismology signal detection', + packages = find_packages(exclude=['contrib', 'docs', 'tests*']), + install_requires = ['peppercorn', 'obspy', 'basemap', 'numpy', + 'pandas >= 0.17.0', 'scipy', 'matplotlib', + 'multiprocessing', 'glob2'], ) diff --git a/tearDown.py b/tearDown.py index 271b035..c23421d 100644 --- a/tearDown.py +++ b/tearDown.py @@ -8,11 +8,114 @@ """ import os import shutil +import sys +import glob + # define files to delete files_to_kill = ['detex_log.log', 'clust.pkl', 'SubSpace.db', 'subspace.pkl'] # define directories to delete dirs_to_kill = ['ContinuousWaveForms', 'EventWaveForms', 'detex.egg-info', 'dist', 'build', '.ipynb_checkpoints', 'DetectedEvents'] + +######### Get paths +def _get_detex_paths(): + # go up one level to avoid importing detex in repo + cwd = os.getcwd() + detex_dir = os.path.join(cwd, 'detex') + up_one = cwd.split(os.path.sep)[1] + up_one = os.path.dirname(cwd) + up_two = os.path.dirname(up_one) + os.chdir(up_two) + print os.getcwd() + try: # make sure new instance of detex is imported + reload(detex) + except NameError: + import detex + detex_path = os.path.dirname(detex.__file__) + print detex_dir, detex_path + return cwd, detex_dir, detex_path + +############### Bump version functions +def bump_version(cwd, detex_dir, detex_path): + """ + bump up the version number in __init__ and setup + """ + version, init_line, setup_line = find_version(detex_dir, cwd) + verint = [int(x) for x in version.split('.')] + msg = 'Bump version? y, [n]' + resp1 = get_user_input(msg) + if resp1 not in ['y', 'n', '']: + raise Exception ("input not understood, use 'y', 'n' or enter'") + if resp1 == 'y': + msg = 'Bump which? major, minor, [micro]' + resp2 = get_user_input(msg) + if resp2 == '' or 'micro': + verint[2] += 1 + elif resp2 == 'minor': + verint[1] += 1 + elif resp2 == 'major': + verint[0] += 1 + else: + raise Exception("Input must be: 'major', 'minor', or 'micro'") + version2 = '.'.join([str(x) for x in verint]) + _replace_str(detex_dir, cwd, init_line, setup_line, version, version2) + +def find_version(detex_dir, cwd): + # Get version indicated in setup.py + setup = os.path.join(cwd, 'setup.py') + setup_version_str = _find_line('version =', setup) + setup_version = setup_version_str.split("'")[1] + + # get version in init + init = os.path.join(detex_dir, '__init__.py') + init_version_str = _find_line('__version__', init) + init_version = init_version_str.split("'")[1] + if not init_version == setup_version: + raise Exception ("versions not equal in __init__.py and setup.py") + return init_version, init_version_str, setup_version_str + +def _find_line(string, file_name): + """ + find the first occurence of a string in a file + """ + + with open(file_name, 'r') as fi: + for line in fi: + if string in line: + return line + +def _replace_str(detex_dir, cwd, init_line, setup_line, version, version2): + setup = os.path.join(cwd, 'setup.py') + _replace_line(setup_line, setup_line.replace(version, version2), setup) + + init = os.path.join(detex_dir, '__init__.py') + _replace_line(init_line, init_line.replace(version, version2), init) + +def _replace_line(old_line, new_line, file_name): + with open(file_name, 'r') as fi1: + with open('temp.txt', 'w') as fi2: + for lin in fi1: + if old_line in lin: + fi2.write(new_line) + else: + fi2.write(lin) + shutil.copy('temp.txt', file_name) + os.remove('temp.txt') + +########### Pull Detex from site packages +def pull_detex(cwd, detex_dir, detex_path): + """ + Function to pull detex from the import path (generally in site packages) + """ + + msg = 'pull detex from %s and delete old copy? [y] or n' % detex_dir + response = get_user_input(msg) + if response == 'y' or response == '': + files_to_copy = glob.glob(os.path.join(detex_path, '*.py')) + for fi in files_to_copy: + shutil.copy(fi, detex_dir) + +########### Tear down def tear_down(): for root, dirs, files in os.walk("."): path = os.path.normpath(root) @@ -23,5 +126,35 @@ def tear_down(): if di in dirs_to_kill: shutil.rmtree(os.path.join(path, di)) +######### user inputs +def get_user_input(msg): + py3 = sys.version_info[0] > 2 + if py3: + response = input(msg + '\n') + else: + response = raw_input(msg + '\n') + return response + +################ Go! if __name__ == "__main__": + cwd, detex_dir, detex_path = _get_detex_paths() + bump_version(cwd, detex_dir, detex_path) + pull_detex(cwd, detex_dir, detex_path) tear_down() + + + + + + + + + + + + + + + + +