diff --git a/docs/source/Reference/sr3_options.7.rst b/docs/source/Reference/sr3_options.7.rst index 6bc83020b..c985df2fb 100644 --- a/docs/source/Reference/sr3_options.7.rst +++ b/docs/source/Reference/sr3_options.7.rst @@ -227,8 +227,8 @@ OPTION TYPES sr3 options come in several types: -count - integer count type. +count, size + integer count type. same as size described below. duration a floating point number indicating a quantity of seconds (0.001 is 1 milisecond) @@ -249,9 +249,10 @@ set size integer size. Suffixes k, m, and g for kilo, mega, and giga (base 2) multipliers. + alone base 10: 1k=1000, with a 'b' suffix, base 2: 1kb=1024 str - an string value + a string value OPTIONS diff --git a/docs/source/fr/Reference/sr3_options.7.rst b/docs/source/fr/Reference/sr3_options.7.rst index 0e48572b7..608b9dcb3 100644 --- a/docs/source/fr/Reference/sr3_options.7.rst +++ b/docs/source/fr/Reference/sr3_options.7.rst @@ -226,7 +226,7 @@ TYPES D'OPTIONS Les options de sr3 ont plusieurs types : count - type de nombre entier. + type de nombre entier. Même format que *size* détaillé plus bas. duration un nombre à virgule flottante qui indique une quantité en secondes (0.001 est 1 milliseconde) @@ -246,7 +246,8 @@ set un assortissement de chaîne de caractères, chaque occurrence successive s'unionise au total. size - taille entière. Suffixes k, m et g pour les multiplicateurs kilo, méga et giga (base 2). + taille entière. Suffixes k, m et g pour les multiplicateurs kilo, méga et giga (base 10). + si on rajoute ´b' ... c´est base 2 : 1k=1000, 1kb=1024 str une chaîne de caractères. diff --git a/sarracenia/config.py b/sarracenia/config.py index e28cce731..6d2b2659e 100755 --- a/sarracenia/config.py +++ b/sarracenia/config.py @@ -331,6 +331,13 @@ def isTrue(S): S = S[-1] return S.lower() in ['true', 'yes', 'on', '1'] +def parse_count(cstr): + if cstr[0] == '-': + offset=1 + else: + offset=0 + count=humanfriendly.parse_size(cstr[offset:], binary=cstr[-1].lower() in ['i','b'] ) + return -count if offset else count def get_package_lib_dir(): return os.path.dirname(inspect.getfile(Config)) @@ -982,7 +989,7 @@ def _build_mask(self, option, arguments): try: regex = re.compile(arguments[0]) except: - logger.critical( f"{self.files}{self.lineno} invalid regular expression: {arguments[0]}, ignored." ) + logger.critical( f"{','.join(self.files)}{self.lineno} invalid regular expression: {arguments[0]}, ignored." ) return None if len(arguments) > 1: @@ -1082,26 +1089,26 @@ def add_option(self, option, kind='list', default_value=None, all_values=None ): if kind not in [ 'list', 'set' ] and type(v) == list: v=v[-1] - logger.warning( f"{self.files}{self.lineno} multiple declarations of {kind} {option}={getattr(self,option)} choosing last one: {v}" ) + logger.warning( f"{','.join(self.files)}{self.lineno} multiple declarations of {kind} {option}={getattr(self,option)} choosing last one: {v}" ) if kind == 'count': count_options.append(option) if type(v) is not int: - setattr(self, option, humanfriendly.parse_size(v)) + setattr(self, option, parse_count(v)) elif kind == 'duration': duration_options.append(option) if type(v) is not float: setattr(self, option, durationToSeconds(v,default_value)) - elif kind == 'flag': + elif kind == 'flag' or kind == bool: flag_options.append(option) if type(v) is not bool: setattr(self, option, isTrue(v)) - elif kind == 'float': + elif kind == 'float' or kind == float : float_options.append(option) if type(v) is not float: setattr(self, option, float(v)) - elif kind == 'list': + elif kind == 'list' or kind == list: list_options.append( option ) if type(v) is not list: #subtlety... None means: has not been set, @@ -1121,22 +1128,22 @@ def add_option(self, option, kind='list', default_value=None, all_values=None ): if all_values: set_choices[option] = all_values - elif kind == 'size': + elif kind == 'size' or kind == int: size_options.append(option) if type(v) is not int: - setattr(self, option, humanfriendly.parse_size(v)) + setattr(self, option, parse_count(v)) - elif kind == 'str': + elif kind == 'str' or kind == str: str_options.append(option) if v is None: setattr(self, option, None) elif type(v) is not str: setattr(self, option, str(v)) else: - logger.error( f'{self.files}{self.lineno} invalid kind: %s for option: %s, ignored' % ( kind, option ) ) + logger.error( f"{','.join(self.files)}{self.lineno} invalid kind: {kind} for option: {option} ignored" ) return - logger.debug( f'{self.files}{self.lineno} {option} declared as type:{type(getattr(self,option))} value:{v}' ) + logger.debug( f"{','.join(self.files)}{self.lineno} {option} declared as type:{type(getattr(self,option))} value:{v}" ) def dump(self): """ print out what the configuration looks like. @@ -1286,7 +1293,7 @@ def _parse_binding(self, subtopic_string): if type(subtopic_string) is str: if not hasattr(self, 'broker') or self.broker is None or self.broker.url is None: - logger.error( f'{self.files}:{self.lineno} broker needed before subtopic' ) + logger.error( f"{','.join(self.files)}:{self.lineno} broker needed before subtopic" ) return if self.broker.url.scheme == 'amq' : @@ -1405,6 +1412,9 @@ def _parse_sum(self, value): self.identity_method = 'invalid' #logger.error('returning 4: invalid' ) + + + def parse_file(self, cfg, component=None): """ add settings from a given config file to self """ @@ -1436,237 +1446,241 @@ def parse_file(self, cfg, component=None): self.files.append(cfgfilepath) for l in open(cfgfilepath, "r").readlines(): - l = l.strip() lineno+=1 if self.lineno > 0: - saved_lineno = self.lineno - self.lineno = lineno - line = l.split() + saved_lineno = self.lineno + self.parse_line( component, cfg, cfname, lineno, l.strip() ) - #print('FIXME parsing %s:%d %s' % (cfg, lineno, line )) + self.files.pop() + self.lineno = saved_lineno - if (len(line) < 1) or (line[0].startswith('#')): - continue - k = line[0] - if k in Config.synonyms: - k = Config.synonyms[k] - elif k == 'destination': - if component == 'poll': - k = 'pollUrl' - else: - k = 'sendTo' - elif k == 'broker' and component == 'poll' : - k = 'post_broker' + def parse_line(self, component, cfg, cfname, lineno, l ): + self.lineno = lineno + line = l.split() - if (k in convert_to_v3): - self.log_flowcb_needed |= '_log' in k + #print('FIXME parsing %s:%d %s' % (cfg, lineno, line )) + + if (len(line) < 1) or (line[0].startswith('#')): + return + + k = line[0] + if k in Config.synonyms: + k = Config.synonyms[k] + elif k == 'destination': + if component == 'poll': + k = 'pollUrl' + else: + k = 'sendTo' + elif k == 'broker' and component == 'poll' : + k = 'post_broker' + + if (k in convert_to_v3): + self.log_flowcb_needed |= '_log' in k - if (len(line) > 1): - v = line[1].replace('.py', '', 1) - if (v in convert_to_v3[k]): - line = convert_to_v3[k][v] - k = line[0] - if 'continue' in line: - logger.debug( f'{cfname}:{lineno} obsolete v2: \"{l}\" ignored' ) - else: - logger.debug( f'{cfname}:{lineno} obsolete v2:\"{l}\" converted to sr3:\"{" ".join(line)}\"' ) - else: - line = convert_to_v3[k] - k=line[0] - v=line[1] + if (len(line) > 1): + v = line[1].replace('.py', '', 1) + if (v in convert_to_v3[k]): + line = convert_to_v3[k][v] + k = line[0] + if 'continue' in line: + logger.debug( f'{cfname}:{lineno} obsolete v2: \"{l}\" ignored' ) + else: + logger.debug( f'{cfname}:{lineno} obsolete v2:\"{l}\" converted to sr3:\"{" ".join(line)}\"' ) + else: + line = convert_to_v3[k] + k=line[0] + v=line[1] - if k == 'continue': - continue + if k == 'continue': + return - #FIXME: note for Clea, line conversion to v3 complete here. + #FIXME: note for Clea, line conversion to v3 complete here. - line = list(map(lambda x: self._varsub(x), line)) + line = list(map(lambda x: self._varsub(x), line)) - if len(line) == 1: - v = True - else: - v = line[1] + if len(line) == 1: + v = True + else: + v = line[1] - # FIXME... I think synonym check should happen here, but no time to check right now. + # FIXME... I think synonym check should happen here, but no time to check right now. - if k in flag_options: - if len(line) == 1: - setattr(self, k, True) - else: - setattr(self, k, isTrue(v)) - if k in ['logReject'] and self.logReject: - self.logEvents = self.logEvents | set(['reject']) - continue + if k in flag_options: + if len(line) == 1: + setattr(self, k, True) + else: + setattr(self, k, isTrue(v)) + if k in ['logReject'] and self.logReject: + self.logEvents = self.logEvents | set(['reject']) + return - if len(line) < 2: - logger.error( f'{self.files}:{lineno} {k} missing argument(s) ' ) - continue - if k in ['accept', 'reject' ]: - self.masks.append(self._build_mask(k, line[1:])) - elif k in [ 'callback', 'cb' ]: - #vv = v.split('.') - #v = 'sarracenia.flowcb.' + v + '.' + vv[-1].capitalize() - if v not in self.plugins_late: - self.plugins_late.append(v) - elif k in [ 'callback_prepend', 'cbp' ]: - #vv = v.split('.') - #v = 'sarracenia.flowcb.' + v + '.' + vv[-1].capitalize() - if v not in self.plugins_early: - self.plugins_early.insert(0,v) - elif k in ['declare']: - self._parse_declare(line[1:]) - elif k in ['feeder', 'manager']: - self.feeder = urllib.parse.urlparse(line[1]) - self.declared_users[self.feeder.username] = 'feeder' - elif k in ['header', 'h']: - (kk, vv) = line[1].split('=') - self.fixed_headers[kk] = vv - elif k in ['include', 'config']: - try: - self.parse_file(v) - except Exception as ex: - logger.error( f'{self.files}:{self.lineno} file {v} failed to parse: {ex}' ) - logger.debug('Exception details: ', exc_info=True) - elif k in ['subtopic']: - self._parse_binding(v) - elif k in ['topicPrefix']: - if '/' in v : - self.topicPrefix = v.split('/') - else: - self.topicPrefix = v.split('.') - elif k in ['post_topicPrefix']: - #if (not self.post_broker.url) or self.post_broker.url.scheme[0:3] == 'amq': - if '/' in v : - self.post_topicPrefix = v.split('/') - else: - self.post_topicPrefix = v.split('.') - elif k in ['import']: - self.imports.append(v) - elif k in ['flow_callback', 'flowcb', 'fcb', 'flowCallback' ]: - if v not in self.plugins_late: - self.plugins_late.append(v) - elif k in ['flow_callback_prepend', 'flowcb_prepend', 'fcbp', 'flowCallbackPrepend' ]: - if v not in self.plugins_early: - self.plugins_early.insert(0, v) - elif k in ['set', 'setting', 's']: - self._parse_setting(line[1], line[2:]) - elif k in ['identity', 'integrity']: - self._parse_sum(v) - elif k in Config.port_required: - logger.error( f' {cfname}:{lineno} {k} {v} not supported in v3, consult porting guide. Option ignored.' ) - logger.error( f' porting guide: https://github.com/MetPX/sarracenia/blob/v03_wip/docs/How2Guides/v2ToSr3.rst ' ) - continue - elif k in Config.v2entry_points: - #if k in self.plugins: - # self.plugins.remove(v) - self._parse_v2plugin(k, v) - elif k in ['no-import']: - self._parse_v3unplugin(v) - elif k in ['inflight', 'lock']: - if v[:-1].isnumeric(): - vv = durationToSeconds(v) - setattr(self, k, vv) - self.fileAgeMin = vv + if len(line) < 2: + logger.error( f"{','.join(self.files)}:{lineno} {k} missing argument(s)" ) + return + if k in ['accept', 'reject' ]: + self.masks.append(self._build_mask(k, line[1:])) + elif k in [ 'callback', 'cb' ]: + #vv = v.split('.') + #v = 'sarracenia.flowcb.' + v + '.' + vv[-1].capitalize() + if v not in self.plugins_late: + self.plugins_late.append(v) + elif k in [ 'callback_prepend', 'cbp' ]: + #vv = v.split('.') + #v = 'sarracenia.flowcb.' + v + '.' + vv[-1].capitalize() + if v not in self.plugins_early: + self.plugins_early.insert(0,v) + elif k in ['declare']: + self._parse_declare(line[1:]) + elif k in ['feeder', 'manager']: + self.feeder = urllib.parse.urlparse(line[1]) + self.declared_users[self.feeder.username] = 'feeder' + elif k in ['header', 'h']: + (kk, vv) = line[1].split('=') + self.fixed_headers[kk] = vv + elif k in ['include', 'config']: + try: + self.parse_file(v) + except Exception as ex: + logger.error( f"{','.join(self.files)}:{self.lineno} file {v} failed to parse: {ex}" ) + logger.debug('Exception details: ', exc_info=True) + elif k in ['subtopic']: + self._parse_binding(v) + elif k in ['topicPrefix']: + if '/' in v : + self.topicPrefix = v.split('/') + else: + self.topicPrefix = v.split('.') + elif k in ['post_topicPrefix']: + #if (not self.post_broker.url) or self.post_broker.url.scheme[0:3] == 'amq': + if '/' in v : + self.post_topicPrefix = v.split('/') + else: + self.post_topicPrefix = v.split('.') + elif k in ['import']: + self.imports.append(v) + elif k in ['flow_callback', 'flowcb', 'fcb', 'flowCallback' ]: + if v not in self.plugins_late: + self.plugins_late.append(v) + elif k in ['flow_callback_prepend', 'flowcb_prepend', 'fcbp', 'flowCallbackPrepend' ]: + if v not in self.plugins_early: + self.plugins_early.insert(0, v) + elif k in ['set', 'setting', 's']: + self._parse_setting(line[1], line[2:]) + elif k in ['identity', 'integrity']: + self._parse_sum(v) + elif k in Config.port_required: + logger.error( f' {cfname}:{lineno} {k} {v} not supported in v3, consult porting guide. Option ignored.' ) + logger.error( f' porting guide: https://github.com/MetPX/sarracenia/blob/v03_wip/docs/How2Guides/v2ToSr3.rst ' ) + return + elif k in Config.v2entry_points: + #if k in self.plugins: + # self.plugins.remove(v) + self._parse_v2plugin(k, v) + elif k in ['no-import']: + self._parse_v3unplugin(v) + elif k in ['inflight', 'lock']: + if v[:-1].isnumeric(): + vv = durationToSeconds(v) + setattr(self, k, vv) + self.fileAgeMin = vv + else: + if line[1].lower() in ['none', 'off', 'false']: + setattr(self, k, None) else: - if line[1].lower() in ['none', 'off', 'false']: - setattr(self, k, None) - else: - setattr(self, k, v) - elif k in ['strip']: - """ - 2020/08/26 - PAS - strip in config file gets translated into two separate attributes: strip and pstrip. - strip is the numeric variety (0-n) and if the supplied option in a regex pattern, - then instead pstrip is set, and strip is set to 0. - - I don't know why it is done this way... just documenting/conforming to existing state. - """ - if v.isdigit(): - self.strip = int(v) - self.pstrip = None + setattr(self, k, v) + elif k in ['strip']: + """ + 2020/08/26 - PAS + strip in config file gets translated into two separate attributes: strip and pstrip. + strip is the numeric variety (0-n) and if the supplied option in a regex pattern, + then instead pstrip is set, and strip is set to 0. + + I don't know why it is done this way... just documenting/conforming to existing state. + """ + if v.isdigit(): + self.strip = int(v) + self.pstrip = None + else: + if v[0] == '/': + self.pstrip = v[1:] else: - if v[0] == '/': - self.pstrip = v[1:] - else: - self.pstrip = v - self.strip = 0 - elif k in duration_options: - if len(line) == 1: - logger.error( - '%s:%d %s is a duration option requiring a decimal number of seconds value' - % ( cfname, lineno, line[0]) ) - continue - setattr(self, k, durationToSeconds(v)) - elif k in float_options: - try: + self.pstrip = v + self.strip = 0 + elif k in duration_options: + if len(line) == 1: + logger.error( + '%s:%d %s is a duration option requiring a decimal number of seconds value' + % ( cfname, lineno, line[0]) ) + return + setattr(self, k, durationToSeconds(v)) + elif k in float_options: + try: + setattr(self, k, float(v)) + except (ValueError, TypeError) as e: + logger.error(f"{','.join(self.files)}:{self.lineno} Ignored '{i}': {e}") + elif k in perm_options: + if v.isdigit(): + setattr(self, k, octal_number(int(v, base=8))) + else: + logger.error( f'{",".join(self.files)}:{lineno} {k} setting to {v} ignored: only numberic modes supported' ) + elif k in size_options: + setattr(self, k, parse_count(v)) + elif k in count_options: + setattr(self, k, parse_count(v)) + elif k in list_options: + if not hasattr(self, k) or not getattr(self,k): + setattr(self, k, [' '.join(line[1:])]) + else: + l = getattr(self, k) + l.append(' '.join(line[1:])) + elif k in set_options: + if v.lower() == 'none': + setattr(self, k, set([])) + return + if v.lower() in [ 'all' , '+all' ]: + if k in set_choices: + setattr(self,k,set_choices[k]) + return + v=v.replace('|',',') + vs = self._parse_set_string(v,getattr(self,k)) + setattr(self, k, vs ) + + if k in set_choices : + for i in getattr(self,k): + if i not in set_choices[k]: + logger.error( f'{",".join(self.files)}:{lineno} invalid entry {i} in {k}. Must be one of: {set_choices[k]}' ) + + elif k in str_options: + if ( k == 'directory' ) and not self.download: + logger.info( f"{','.join(self.files)}:{lineno} if download is false, directory has no effect" ) + + v = ' '.join(line[1:]) + if v == 'None': + v=None + setattr(self, k, v) + else: + #FIXME: with _options lists for all types and addition of declare, this is probably now dead code. + if k not in self.undeclared: + logger.debug( f'{",".join(self.files)}:{self.lineno} possibly undeclared option: {line}' ) + v = ' '.join(line[1:]) + if hasattr(self, k): + if type(getattr(self, k)) is float: setattr(self, k, float(v)) - except (ValueError, TypeError) as e: - logger.error(f'{self.files}:{self.lineno} Ignored "{i}": {e}') - elif k in perm_options: - if v.isdigit(): - setattr(self, k, octal_number(int(v, base=8))) - else: - logger.error( f'{self.files}:{lineno} {k} setting to {v} ignored: only numberic modes supported' ) - elif k in size_options: - setattr(self, k, humanfriendly.parse_size(v)) - elif k in count_options: - setattr(self, k, humanfriendly.parse_size(v)) - elif k in list_options: - if not hasattr(self, k) or not getattr(self,k): - setattr(self, k, [' '.join(line[1:])]) - else: - l = getattr(self, k) - l.append(' '.join(line[1:])) - elif k in set_options: - if v.lower() == 'none': - setattr(self, k, set([])) - continue - if v.lower() in [ 'all' , '+all' ]: - if k in set_choices: - setattr(self,k,set_choices[k]) - continue - v=v.replace('|',',') - vs = self._parse_set_string(v,getattr(self,k)) - setattr(self, k, vs ) - - if k in set_choices : - for i in getattr(self,k): - if i not in set_choices[k]: - logger.error( f'{self.files}:{lineno} invalid entry {i} in {k}. Must be one of: {set_choices[k]}' ) - - elif k in str_options: - if ( k == 'directory' ) and not self.download: - logger.info( f"{self.files}:{lineno} if download is false, directory has no effect" ) - - v = ' '.join(line[1:]) - if v == 'None': - v=None - setattr(self, k, v) + elif type(getattr(self, k)) is int: + # the only integers that have units are durations. + # integers without units will come out unchanged. + setattr(self, k, durationToSeconds(v)) + elif type(getattr(self, k)) is str: + setattr(self, k, [getattr(self, k), v]) + elif type(getattr(self, k)) is list: + newv=getattr(self,k) + newv.append(v) + setattr(self, k, newv) else: - #FIXME: with _options lists for all types and addition of declare, this is probably now dead code. - if k not in self.undeclared: - logger.debug( f'{self.files}:{self.lineno} possibly undeclared option: {line}' ) - v = ' '.join(line[1:]) - if hasattr(self, k): - if type(getattr(self, k)) is float: - setattr(self, k, float(v)) - elif type(getattr(self, k)) is int: - # the only integers that have units are durations. - # integers without units will come out unchanged. - setattr(self, k, durationToSeconds(v)) - elif type(getattr(self, k)) is str: - setattr(self, k, [getattr(self, k), v]) - elif type(getattr(self, k)) is list: - newv=getattr(self,k) - newv.append(v) - setattr(self, k, newv) - else: - # FIXME: - setattr(self, k, v) - self.undeclared.append( (cfname, lineno, k) ) - self.files.pop() - self.lineno = saved_lineno + # FIXME: + setattr(self, k, v) + self.undeclared.append( (cfname, lineno, k) ) def _resolveQueueName(self,component,cfg): @@ -1815,14 +1829,14 @@ def finalize(self, component=None, config=None): setattr(self, d, durationToSeconds(getattr(self, d))) if hasattr(self, 'kbytes_ps'): - bytes_ps = humanfriendly.parse_size(self.kbytes_ps) + bytes_ps = parse_count(self.kbytes_ps) if not self.kbytes_ps[-1].isalpha(): bytes_ps *= 1024 setattr(self, 'byteRateMax', bytes_ps) for d in count_options: if hasattr(self, d) and (type(getattr(self, d)) is str): - setattr(self, d, humanfriendly.parse_size(getattr(self, d))) + setattr(self, d, parse_count(getattr(self, d))) for d in size_options: if hasattr(self, d) and (type(getattr(self, d)) is str): @@ -1855,7 +1869,6 @@ def finalize(self, component=None, config=None): self.plugins_early.append( 'nodupe.name' ) delattr( self, 'nodupe_basis' ) - # FIXME: note that v2 *user_cache_dir* is, v3 called: cfg_run_dir if config[-5:] == '.conf': cfg = config[:-5] else: @@ -1872,6 +1885,7 @@ def finalize(self, component=None, config=None): if self.retry_ttl == 0: self.retry_ttl = None + # FIXME: note that v2 *user_cache_dir* is, v3 called: cfg_run_dir if not hasattr(self, 'cfg_run_dir'): if self.statehost: hostdir = self.hostdir @@ -2016,10 +2030,10 @@ def check_undeclared_options(self): setattr(self,u,str(getattr(self,u))) elif u in count_options: if type( getattr(self,u) ) not in [ int, float ]: - setattr(self,u,humanfriendly.parse_size(getattr(self,u))) + setattr(self,u,parse_count(getattr(self,u))) elif u in size_options: if type( getattr(self,u) ) not in [ int, float ]: - setattr(self,u,humanfriendly.parse_size(getattr(self,u))) + setattr(self,u,parse_count(getattr(self,u))) elif u in duration_options: if type( getattr(self,u) ) not in [ int, float ]: setattr(self,u,durationToSeconds(getattr(self,u))) diff --git a/sarracenia/credentials.py b/sarracenia/credentials.py index f0ea7d119..7f81b7f1c 100755 --- a/sarracenia/credentials.py +++ b/sarracenia/credentials.py @@ -176,11 +176,14 @@ def add(self, urlstr, details=None): """ # need to create url object + key=urlstr if details == None: details = Credential() details.url = urllib.parse.urlparse(urlstr) + if hasattr(details.url,'password'): + key = key.replace( f":{details.url.password}", "" ) - self.credentials[urlstr] = details + self.credentials[key] = details def get(self, urlstr): """Retrieve a Credential from the DB by urlstr. If the Credential is valid, but not already cached, it will be @@ -220,7 +223,6 @@ def get(self, urlstr): return False, self.credentials[urlstr] # resolved from defined credentials - ok, details = self._resolve(urlstr, url) if ok: return True, details diff --git a/sarracenia/sr.py b/sarracenia/sr.py index 839bdc2bc..f761fa459 100755 --- a/sarracenia/sr.py +++ b/sarracenia/sr.py @@ -2019,7 +2019,8 @@ def sanity(self): if pcount != 0: self._find_missing_instances() - self._clean_missing_proc_state() + if not self.options.dry_run: + self._clean_missing_proc_state() self._read_states() self._resolve() filtered_missing = [] diff --git a/tests/sarracenia/config_test.py b/tests/sarracenia/config_test.py index d3ab586bb..b368dfbb8 100644 --- a/tests/sarracenia/config_test.py +++ b/tests/sarracenia/config_test.py @@ -10,6 +10,7 @@ import sarracenia import sarracenia.config +import sarracenia.credentials logger = logging.getLogger('sarracenia.config') logger.setLevel('DEBUG') @@ -86,6 +87,265 @@ def test_variableExpansion(): assert( try_pattern( options, None, '${PBD}/${%Y%m%d}' , r'/apps/sarra/public_data/[0-9]{8}' )) +def test_read_line_declare(): - # to get stuff to print out, make it fail. - #assert False + options = sarracenia.config.default_config() + options.baseDir = '/data/whereIcameFrom' + options.documentRoot = options.baseDir + options.post_baseDir = '/data/whereIamGoingTo' + options.varTimeOffset= 0 + num_subscribers = len(options.declared_users) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "declare subscriber sub1" ) + new_num_subscribers = len(options.declared_users) + logger.info( f"before declare: {num_subscribers}, after: {new_num_subscribers}" ) + assert( num_subscribers +1 == new_num_subscribers ) + assert( options.declared_users['sub1'] == 'subscriber' ) + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "declare env VAR99=hoho" ) + assert( options.env['VAR99'] == 'hoho' ) + +def test_read_line_flags(): + + options = sarracenia.config.default_config() + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "download off" ) + assert( options.download == False ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "download on" ) + assert( options.download == True ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "download False" ) + assert( options.download == False ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "download True" ) + assert( options.download == True ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "download no" ) + assert( options.download == False ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "download yes" ) + assert( options.download == True ) + + assert( options.acceptSizeWrong == False ) + assert( options.acceptUnmatched == True ) + assert( options.sourceFromExchange == False ) + assert( options.sourceFromMessage == False ) + assert( options.sundew_compat_regex_first_match_is_zero == False ) + assert( options.topicCopy == False ) + +def test_read_line_counts(): + + options = sarracenia.config.default_config() + + # crasher input: + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "batch -1" ) + assert( options.batch == -1 ) + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "batch 1" ) + assert( options.batch == 1 ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "batch 1kb" ) + assert( options.batch == 1024 ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "batch 1k" ) + assert( options.batch == 1000 ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "batch 1m" ) + assert( options.batch == 1000000 ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "batch 1mb" ) + assert( options.batch == 1024*1024 ) + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "batch 134684" ) + assert( options.batch == 134684 ) + + # count truncates floats... is this a good thing? + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "batch 1.9" ) + assert( options.batch == 1 ) + + +def test_read_line_sets(): + + options = sarracenia.config.default_config() + logger.info( f" {options.fileEvents=} " ) + + assert( options.fileEvents == set( ['create', 'delete', 'link', 'mkdir', 'modify', 'rmdir' ] ) ) + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "fileEvents -rmdir" ) + logger.info( f" {options.fileEvents=} " ) + assert( options.fileEvents == set( ['create', 'delete', 'link', 'mkdir', 'modify' ] ) ) + + #FIXME: must print an error message... how to test for that? + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "fileEvents moodify" ) + logger.info( f" {options.fileEvents=} " ) + assert( options.fileEvents == set( [ 'moodify' ] ) ) + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "fileEvents modify" ) + logger.info( f" {options.fileEvents=} " ) + assert( options.fileEvents == set( [ 'modify' ] ) ) + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "fileEvents +link" ) + logger.info( f" {options.fileEvents=} " ) + assert( options.fileEvents == set( [ 'link', 'modify' ] ) ) + + +def test_read_line_perms(): + + options = sarracenia.config.default_config() + logger.info( f" {options.permDefault=:o} " ) + + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "permDefault 0755" ) + logger.info( f" {options.permDefault=:o} " ) + assert( options.permDefault == 0o755 ) + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "permDefault 644" ) + logger.info( f" {options.permDefault=:o} " ) + assert( options.permDefault == 0o644 ) + + +def test_read_line_duration(): + + options = sarracenia.config.default_config() + logger.info( f" {options.sleep=} " ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "sleep 30" ) + logger.info( f" {options.sleep=} " ) + assert( options.sleep == 30 ) + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "sleep 30m" ) + logger.info( f" {options.sleep=} " ) + assert( options.sleep == 30*60 ) + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "sleep 3h" ) + logger.info( f" {options.sleep=} " ) + assert( options.sleep == 3*60*60 ) + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "sleep 2d" ) + logger.info( f" {options.sleep=} " ) + assert( options.sleep == 2*24*60*60 ) + + +def test_read_line_add_option(): + + options = sarracenia.config.default_config() + + options.add_option( 'list_one', kind='list', default_value=['1','2'], all_values=['1','2','3','4'] ) + logger.info( f" {options.list_one=} " ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "list_one 3" ) + logger.info( f" {options.list_one=} " ) + assert( options.list_one == ['1','2','3'] ) + + options.add_option( 'str_one', kind='str', default_value='one' ) + logger.info( f" {options.str_one=} " ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "str_one too" ) + logger.info( f" {options.str_one=} " ) + assert( options.str_one == 'too' ) + + options.add_option( 'set_one', kind='set', default_value=['1','2'], all_values=['1','2','3','4'] ) + logger.info( f" {options.set_one=} " ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "set_one +3" ) + logger.info( f" {options.set_one=} " ) + assert( options.set_one == set( ['1','2','3'] ) ) + + options.add_option( 'count_one', kind='count', default_value=30 ) + logger.info( f" {options.count_one=} " ) + assert( options.count_one == 30 ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "count_one 60" ) + logger.info( f" {options.count_one=} " ) + assert( options.count_one == 60 ) + + options.add_option( 'duration_one', kind='duration', default_value=30 ) + logger.info( f" {options.duration_one=} " ) + assert( options.duration_one == 30 ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "duration_one 60m" ) + logger.info( f" {options.duration_one=} " ) + assert( options.duration_one == 3600 ) + + options.add_option( 'octal_one', kind='octal', default_value='644' ) + logger.info( f" {options.octal_one=:o} " ) + assert( options.octal_one == 0o644 ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "octal_one 755" ) + logger.info( f" {options.octal_one=:o} " ) + assert( options.octal_one == 0o755 ) + + options.add_option( 'size_one', kind='size', default_value='644' ) + logger.info( f" {options.size_one=} " ) + assert( options.size_one == 644 ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "size_one 4kb" ) + logger.info( f" {options.size_one=} " ) + assert( options.size_one == 4096 ) + + options.add_option( 'float_one', kind='float', default_value='3.4' ) + logger.info( f" {options.float_one=} " ) + assert( options.float_one == 3.4 ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "float_one 7.2" ) + logger.info( f" {options.float_one=} " ) + assert( options.float_one == 7.2 ) + + options.add_option( 'flag_one', kind='flag', default_value=False ) + logger.info( f" {options.flag_one=} " ) + assert( options.flag_one == False ) + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "flag_one on" ) + logger.info( f" {options.flag_one=} " ) + assert( options.flag_one == True ) + +def test_source_from_exchange(): + + options = sarracenia.config.default_config() + + # crasher input: + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "declare source tsource" ) + assert( 'tsource' in options.declared_users ) + assert( options.declared_users['tsource'] == 'source' ) + + options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "exchange xs_tsource_favourite" ) + + assert( options.exchange == 'xs_tsource_favourite' ) + + source = options.get_source_from_exchange(options.exchange) + assert( source == 'tsource' ) + + +def test_broker_finalize(): + + options = sarracenia.config.default_config() + options.component = 'subscribe' + options.config = 'ex1' + options.action = 'start' + + before_add=len(options.credentials.credentials) + + options.credentials.add( 'amqp://bunnypeer:passthepoi@localhost' ) + + after_add=len(options.credentials.credentials) + + assert( before_add + 1 == after_add ) + + options.parse_line( options.component, options.config, "subscribe/ex1", 1, "broker amqp://bunnypeer@localhost" ) + options.parse_line( options.component, options.config, "subscribe/ex1", 1, "post_broker amqp://bunnypeer@localhost" ) + + assert( options.broker.url.username == 'bunnypeer' ) + assert( options.broker.url.password == 'passthepoi' ) + assert( options.broker.url.username == 'bunnypeer' ) + assert( options.broker.url.password == 'passthepoi' ) + + assert( options.exchange == None ) + assert( not hasattr(options,'post_exchange') ) + assert( not hasattr(options,'retry_ttl') ) + + options.parse_line( options.component, options.config, "subscribe/ex1", 1, "directory ~/ex1" ) + options.parse_line( options.component, options.config, "subscribe/ex1", 1, "no 1" ) + + assert( len(options.bindings) == 0 ) + assert( options.directory == '~/ex1' ) + assert( not hasattr( options, 'queue_filename' ) ) + assert( options.queueName == None ) + + options.finalize() + + assert( hasattr(options,'retry_ttl') ) + assert( hasattr( options, 'queue_filename' ) ) + assert( hasattr( options, 'queueName' ) ) + assert( type(options.queueName) == str ) + assert( options.queueName.startswith('q_bunnypeer_subscribe.ex1') ) + assert( options.directory == os.path.expanduser( '~/ex1' ) ) + assert( len(options.bindings) == 1 ) + assert( options.exchange == 'xs_bunnypeer' ) + assert( options.post_exchange == [ 'xs_bunnypeer' ] ) + assert( hasattr(options,'nodupe_ttl') ) + assert( hasattr(options,'metricsFilename') ) + assert( hasattr(options,'pid_filename') ) + assert( hasattr(options,'retry_path') ) + assert( hasattr(options,'novipFilename') ) + assert( hasattr(options,'bindings') ) diff --git a/tests/sarracenia/flowcb/gather/am_test.py b/tests/sarracenia/flowcb/gather/am_test.py new file mode 100755 index 000000000..2e4b33cb3 --- /dev/null +++ b/tests/sarracenia/flowcb/gather/am_test.py @@ -0,0 +1,365 @@ +import pytest +import os, types, copy + +#useful for debugging tests +import pprint +def pretty(*things, **named_things): + for t in things: + pprint.PrettyPrinter(indent=2, width=200).pprint(t) + for k,v in named_things.items(): + print(str(k) + ":") + pprint.PrettyPrinter(indent=2, width=200).pprint(v) + +from sarracenia.flowcb.gather.am import Am +import sarracenia.config + +from sarracenia import Message as SR3Message +from sarracenia.flowcb.rename.raw2bulletin import Raw2bulletin + +class Options: + def __init__(self): + # self.o = sarracenia.config.default_config() + self.logLevel = "DEBUG" + self.logFormat = "" + self.queueName = "TEST_QUEUE_NAME" + self.component = "flow" + self.config = "foobar_am.conf" + self.sendTo = "am://127.0.0.1:5005" + self.pid_filename = "/tmp/sarracenia/am_test/pid_filename" + self.directory = "/tmp/test/directory" + self.housekeeping = float(39) + self.fileAgeMin = 0 + self.fileAgeMax = 0 + self.post_baseUrl = "http://localhost/" + self.post_format = "v02" + + def add_option(self, option, type, default = None): + if not hasattr(self, option): + setattr(self, option, default) + pass + +def make_message(): + m = SR3Message() + m["pubTime"] = "20180118151049.356378078" + m["topic"] = "v02.post.sent_by_tsource2send" + m["mtime"] = "20180118151048" + m["identity"] = { + "method": "md5", + "value": "c35f14e247931c3185d5dc69c5cd543e" + } + m["atime"] = "201801181.51049.356378078" + m["content"] = {"encoding":"" , "value": ""} + m["from_cluster"] = "localhost" + m["mode"] = "644" + m["source"] = "tsource" + m["sum"] = "d,c35f14e247931c3185d5dc69c5cd543e" + m["to_clusters"] = "localhost" + m["baseUrl"] = "https://NotARealURL" + m["post_baseUrl"] = "https://NotARealURL" + m["relPath"] = "ThisIsAPath/To/A/File.txt" + m["_deleteOnPost"] = set() + return m + +# NOTE: Need to test filtering as well? +# WorkList = types.SimpleNamespace() +# WorkList.ok = [] +# WorkList.incoming = [] +# WorkList.rejected = [] +# WorkList.failed = [] +# WorkList.directories_ok = [] + +# def test___init__(): +# BaseOptions = Options() +# am_instance = Am(BaseOptions) +# renamer = Raw2bulletin(BaseOptions) + +def _get_bulletin_info(message): + charset = message['content']['encoding'] + bulletin = message['content']['value'] + lines = bulletin.splitlines() + if message['content']['encoding'] != 'base64': + firstchars = bulletin[0:2].decode(charset) + if list(lines[1].split()): + station = lines[1].split()[0].decode(charset) + else: + station = lines[1].decode(charset) + else: + firstchars = "XX" + station = "XXX" + missing_ahl = 'CN00 CWAO' + return bulletin, firstchars, lines, missing_ahl, station, charset + +# For unit testing, we mostly want to check how the bulletins get corrected. +# We have lots of use cases where bulletin get corrected so it's important to test all of these cases + + +# @pytest.mark.depends(on=['test___init__']) + +# Test 1: Check a regular binary bulletin. +def test_am_binary_bulletin(): + from base64 import b64encode + + BaseOptions = Options() + renamer = Raw2bulletin(BaseOptions) + + message_test1 = make_message() + message_test1['content']['encoding'] = 'base64' + message_test1['content']['value'] = b'ISAA41 CYWA 030000\nBUFR\x00\x00\xa8\x02\x00\x00\x12\x00\x006\x00\x00\x00\x00\r\r\x18\x05\x03\x00\x00\x00\x00\x00L\x00\x00\x01\x00\x01\xcc\x06\x02\x05\x02\x07\x01\x04\x01\x04\x02\x04\x03\x04\x04\x04\x05\x02\xc4\x01\xc3\x14\xd5\x14\r\x14\xce\x14\xc5\x14\x0b\x14\x01\n\x04\n3\x0c\x01\x0c\x02\x0c\x03\x0c\xc7\x08\x15\x04\x19\x0b\x0b\x0b\x0c\x04\x19\x08\x15\n4\n?\n=\r\x03\x85\x11\x00\x00\x00>\x00YWA (\x1cj6\x08I\xfa\x140\x00\xe0a@F1\x92g/\x9f6\xd0l~\xc1,hO\xfdh\x01_\xff\xfc\xf9D\xff\xc3DENSITY ALT 479FT7777\n' + + bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test1) + + bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') + message_test1['new_file'] = bulletinHeader + '__12345' + message_test1['new_dir'] = BaseOptions.directory + message_test1['content']['value'] = b64encode(message_test1['content']['value']).decode('ascii') + + # Check renamer. + message_test1 = renamer.rename(message_test1, False) + assert message_test1['new_file'] == 'ISAA41_CYWA_030000___00001' + + +# Test 2: Check a regular CACN bulletin +def test_cacn_regular(): + + BaseOptions = Options() + renamer = Raw2bulletin(BaseOptions) + am_instance = Am(BaseOptions) + + message_test2 = make_message() + message_test2['content']['encoding'] = 'iso-8859-1' + message_test2['content']['value'] = b'CA\nWVO\n100,2024,123,1600,0,100,13.5,5.6,79.4,0.722,11.81,11.74,1.855,6.54,16.76,1544,2.344,14.26,0,375.6,375.6,375.5,375.5,0,11.58,11.24,3.709,13.89,13.16,11.22,11,9.45,11.39,5.033,79.4,0.694,-6999,41.19,5.967,5.887,5.93,6.184,5.64,5.066,5.253,-6999,7.3,0.058,0,5.715,4.569,0,0,1.942,-6999,57.4,0,0.531,-6999,1419,1604,1787,-6999,-6999,-6999,-6999,-6999,1601,-6999,-6999,6,5.921,5.956,6.177,5.643,5.07,5.256,-6999,9.53,11.22,10.09,10.61,125.4,9.1\n' + + bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test2) + + bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') + message_test2['new_file'] = bulletinHeader + '__12345' + message_test2['new_dir'] = BaseOptions.directory + + # Check correcting the bulletin contents of a CACN + new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) + assert new_bulletin == b'CACN00 CWAO 021600\nWVO\n100,2024,123,1600,0,100,13.5,5.6,79.4,0.722,11.81,11.74,1.855,6.54,16.76,1544,2.344,14.26,0,375.6,375.6,375.5,375.5,0,11.58,11.24,3.709,13.89,13.16,11.22,11,9.45,11.39,5.033,79.4,0.694,-6999,41.19,5.967,5.887,5.93,6.184,5.64,5.066,5.253,-6999,7.3,0.058,0,5.715,4.569,0,0,1.942,-6999,57.4,0,0.531,-6999,1419,1604,1787,-6999,-6999,-6999,-6999,-6999,1601,-6999,-6999,6,5.921,5.956,6.177,5.643,5.07,5.256,-6999,9.53,11.22,10.09,10.61,125.4,9.1\n' + + # Check renamer. + message_test2['content']['value'] = new_bulletin.decode('iso-8859-1') + message_test2 = renamer.rename(message_test2, False) + assert message_test2['new_file'] == 'CACN00_CWAO_021600__WVO_00001' + +# Test 3: Check an erronous CACN bulletin (missing timestamp in bulletin contents) +def test_cacn_erronous(): + import re + + BaseOptions = Options() + renamer = Raw2bulletin(BaseOptions) + am_instance = Am(BaseOptions) + + message_test3 = make_message() + message_test3['content']['encoding'] = 'iso-8859-1' + message_test3['content']['value'] = b'CA\nWPK\n0.379033,325.078,1.13338\n' + + bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test3) + + bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') + message_test3['new_file'] = bulletinHeader + '__12345' + message_test3['new_dir'] = BaseOptions.directory + + # Check correcting the bulletin contents of a CACN + new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) + assert new_bulletin == b'CACN00 CWAO\nWPK\n0.379033,325.078,1.13338\n' + + # Check renamer. + message_test3['content']['value'] = new_bulletin.decode('iso-8859-1') + message_test3 = renamer.rename(message_test3, False) + assert re.match('CACN00_CWAO_......__WPK_00001_PROBLEM' , message_test3['new_file']) + +# Test 4: Bulletin with double line separator after header (my-header\n\n) +def test_bulletin_double_linesep(): + + BaseOptions = Options() + renamer = Raw2bulletin(BaseOptions) + am_instance = Am(BaseOptions) + + message_test4 = make_message() + message_test4['content']['encoding'] = 'iso-8859-1' + message_test4['content']['value'] = b'SXCN35 CWVR 021100\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff' + + bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test4) + + bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') + message_test4['new_file'] = bulletinHeader + '__12345' + message_test4['new_dir'] = BaseOptions.directory + + # Check correcting the bulletin contents of the bulletin + # Checking for b'' because this is what returns when correctContents has no problems to report correcting. + new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) + assert new_bulletin == b'' + + # Check renamer. + message_test4['content']['value'] = message_test4['content']['value'].decode('iso-8859-1') + message_test4 = renamer.rename(message_test4, False) + assert message_test4['new_file'] == 'SXCN35_CWVR_021100___00001' + +# Test 5: Bulletin with invalid year in timestamp (Fix: https://github.com/MetPX/sarracenia/pull/973) +def test_bulletin_invalid_timestamp(caplog): + import re, datetime + + BaseOptions = Options() + renamer = Raw2bulletin(BaseOptions) + am_instance = Am(BaseOptions) + + message_test5 = make_message() + message_test5['content']['encoding'] = 'iso-8859-1' + message_test5['content']['value'] = b'CA\nWVO\n100,1024,123,1600,0,100,13.5,5.6,79.4,0.722,11.81,11.74,1.855,6.54,16.76,1544,2.344,14.26,0,375.6,375.6,375.5,375.5,0,11.58,11.24,3.709,13.89,13.16,11.22,11,9.45,11.39,5.033,79.4,0.694,-6999,41.19,5.967,5.887,5.93,6.184,5.64,5.066,5.253,-6999,7.3,0.058,0,5.715,4.569,0,0,1.942,-6999,57.4,0,0.531,-6999,1419,1604,1787,-6999,-6999,-6999,-6999,-6999,1601,-6999,-6999,6,5.921,5.956,6.177,5.643,5.07,5.256,-6999,9.53,11.22,10.09,10.61,125.4,9.1\n' + + bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test5) + + bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') + message_test5['new_file'] = bulletinHeader + '__12345' + message_test5['new_dir'] = BaseOptions.directory + + new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) + assert new_bulletin == b'CACN00 CWAO\nWVO\n100,1024,123,1600,0,100,13.5,5.6,79.4,0.722,11.81,11.74,1.855,6.54,16.76,1544,2.344,14.26,0,375.6,375.6,375.5,375.5,0,11.58,11.24,3.709,13.89,13.16,11.22,11,9.45,11.39,5.033,79.4,0.694,-6999,41.19,5.967,5.887,5.93,6.184,5.64,5.066,5.253,-6999,7.3,0.058,0,5.715,4.569,0,0,1.942,-6999,57.4,0,0.531,-6999,1419,1604,1787,-6999,-6999,-6999,-6999,-6999,1601,-6999,-6999,6,5.921,5.956,6.177,5.643,5.07,5.256,-6999,9.53,11.22,10.09,10.61,125.4,9.1\n' + + message_test5['content']['value'] = message_test5['content']['value'].decode('iso-8859-1') + message_test5 = renamer.rename(message_test5, False) + # We want to make sure the proper errors are raised from the logs + assert 'Unable to fetch header contents. Skipping message' in caplog.text and 'Unable to verify year from julian time.' in caplog.text + + +# Test 6: Bulletin with trailing spaces at the end of the header (Fix: https://github.com/MetPX/sarracenia/pull/956) +def test_bulletin_header_trailing_space(): + + BaseOptions = Options() + renamer = Raw2bulletin(BaseOptions) + am_instance = Am(BaseOptions) + + message_test6 = make_message() + message_test6['content']['encoding'] = 'iso-8859-1' + message_test6['content']['value'] = b'SXCN35 CWVR 021100 \n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' + + + bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test6) + + bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') + message_test6['new_file'] = bulletinHeader + '__12345' + message_test6['new_dir'] = BaseOptions.directory + + # Check correcting the bulletin contents of the bulletin + new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) + assert new_bulletin == b'SXCN35 CWVR 021100\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' + + +# Test 7: Bulletin with a wrong station name (Fix: https://github.com/MetPX/sarracenia/pull/963/files) +def test_bulletin_wrong_station(): + + BaseOptions = Options() + renamer = Raw2bulletin(BaseOptions) + am_instance = Am(BaseOptions) + + message_test7 = make_message() + message_test7['content']['encoding'] = 'iso-8859-1' + message_test7['content']['value'] = b'UECN99 CYCX 071200\nTTDD21 /// 5712/ 71701 NIL=\n\n\n\n' + + bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test7) + + bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') + message_test7['new_file'] = bulletinHeader + '__12345' + message_test7['new_dir'] = BaseOptions.directory + + # Check correcting the bulletin contents of the bulletin + # Checking for b'' because this is what returns when correctContents has no problems to report correcting + new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) + assert new_bulletin == b'' + + # Check renamer. + message_test7['content']['value'] = message_test7['content']['value'].decode('iso-8859-1') + message_test7 = renamer.rename(message_test7, False) + assert message_test7['new_file'] == 'UECN99_CYCX_071200___00001_PROBLEM' + +# Test 8: SM Bulletin - Add station mapping + SM/SI bulletin accomodities +def test_SM_bulletin(): + + BaseOptions = Options() + renamer = Raw2bulletin(BaseOptions) + am_instance = Am(BaseOptions) + + message_test8 = make_message() + message_test8['content']['encoding'] = 'iso-8859-1' + message_test8['content']['value'] = b'SM 030000\n71816 11324 80313 10004 20003 30255 40318 52018 60031 77177 887//\n333 10017 20004 42001 70118 90983 93101=\n' + + bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test8) + + bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') + message_test8['new_file'] = bulletinHeader + '__12345' + message_test8['new_dir'] = BaseOptions.directory + + # Check correcting the bulletin contents of the bulletin + am_instance.o.mapStations2AHL = ['SMCN06 CWAO COLL 71816 71818 71821 71825 71827 71828 71831 71832 71834 71841 71842 71845 71850 71854'] + new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) + assert new_bulletin == b'SMCN06 CWAO 030000\nAAXX 03004\n71816 11324 80313 10004 20003 30255 40318 52018 60031 77177 887//\n333 10017 20004 42001 70118 90983 93101=\n' + + message_test8['content']['value'] = new_bulletin.decode('iso-8859-1') + message_test8 = renamer.rename(message_test8, False) + assert message_test8['new_file'] == 'SMCN06_CWAO_030000__71816_00001' + +# Test 9: Bulletin with 5 fields in header (invalid) +def test_bulletin_header_five_fileds(): + + BaseOptions = Options() + renamer = Raw2bulletin(BaseOptions) + am_instance = Am(BaseOptions) + + message_test9 = make_message() + message_test9['content']['encoding'] = 'iso-8859-1' + message_test9['content']['value'] = b'SXCN35 CWVR 021100 AAA OOPS\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' + + bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test9) + + bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') + message_test9['new_file'] = bulletinHeader + '__12345' + message_test9['new_dir'] = BaseOptions.directory + + # Check correcting the bulletin contents of the bulletin + new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) + assert new_bulletin == b'SXCN35 CWVR 021100 AAA\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' + +# Test 10: Bulletin with 6 fields in header (invalid) +def test_bulletin_header_six_fileds(): + + BaseOptions = Options() + renamer = Raw2bulletin(BaseOptions) + am_instance = Am(BaseOptions) + + message_test10 = make_message() + message_test10['content']['encoding'] = 'iso-8859-1' + message_test10['content']['value'] = b'SXCN35 CWVR 021100 AAA OTHER OHNO\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' + + bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test10) + + bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') + message_test10['new_file'] = bulletinHeader + '__12345' + message_test10['new_dir'] = BaseOptions.directory + + # Check correcting the bulletin contents of the bulletin + new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) + assert new_bulletin == b'SXCN35 CWVR 021100 AAA OTHER\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' + + +# Test 11: Bulletin with a timestamp (DDHHmm) bigger then 6 chars +def test_bulletin_timestamp_6chars_plus(): + BaseOptions = Options() + renamer = Raw2bulletin(BaseOptions) + am_instance = Am(BaseOptions) + + message_test11 = make_message() + message_test11['content']['encoding'] = 'iso-8859-1' + message_test11['content']['value'] = b'SXCN35 CWVR 021100Z\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff' + + bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test11) + + bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') + message_test11['new_file'] = bulletinHeader + '__12345' + message_test11['new_dir'] = BaseOptions.directory + + # Check correcting the bulletin contents of the bulletin + new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) + assert new_bulletin == b'SXCN35 CWVR 021100\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' \ No newline at end of file