diff --git a/.github/workflows/flow.yml b/.github/workflows/flow.yml index dbb254124..4d6d957b2 100644 --- a/.github/workflows/flow.yml +++ b/.github/workflows/flow.yml @@ -40,7 +40,7 @@ jobs: timeout-minutes: 40 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install dependencies run: | diff --git a/.github/workflows/flow_amqp_consumer.yml b/.github/workflows/flow_amqp_consumer.yml index 1f91724ad..c476fae57 100644 --- a/.github/workflows/flow_amqp_consumer.yml +++ b/.github/workflows/flow_amqp_consumer.yml @@ -37,7 +37,7 @@ jobs: timeout-minutes: 40 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install dependencies # 2023-11-13 RS Added SSH config changes to see if it makes the tests more reliable diff --git a/.github/workflows/flow_basic.yml b/.github/workflows/flow_basic.yml index f6a99dae5..62e7ccc1a 100644 --- a/.github/workflows/flow_basic.yml +++ b/.github/workflows/flow_basic.yml @@ -34,7 +34,7 @@ jobs: timeout-minutes: 20 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install dependencies run: | diff --git a/.github/workflows/flow_mqtt.yml b/.github/workflows/flow_mqtt.yml index e1532404b..cae6f69b7 100644 --- a/.github/workflows/flow_mqtt.yml +++ b/.github/workflows/flow_mqtt.yml @@ -37,7 +37,7 @@ jobs: timeout-minutes: 45 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install dependencies run: | diff --git a/.github/workflows/flow_redis.yml b/.github/workflows/flow_redis.yml index cc2e9c567..1cde2dadb 100644 --- a/.github/workflows/flow_redis.yml +++ b/.github/workflows/flow_redis.yml @@ -6,6 +6,7 @@ on: push: branches: - v03_disabled + - '*redis*' paths-ignore: - '.github/**' @@ -37,7 +38,7 @@ jobs: timeout-minutes: 40 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # Enable tmate debugging of manually-triggered workflows if the input option was provided # https://github.com/marketplace/actions/debugging-with-tmate diff --git a/.github/workflows/ghcr-base.yml b/.github/workflows/ghcr-base.yml index 9ec62eb63..b841a6e68 100644 --- a/.github/workflows/ghcr-base.yml +++ b/.github/workflows/ghcr-base.yml @@ -26,7 +26,7 @@ jobs: steps: - name: Checkout branch - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v1 diff --git a/.github/workflows/ghcr-dev.yml b/.github/workflows/ghcr-dev.yml index 5484b0775..3809bee52 100644 --- a/.github/workflows/ghcr-dev.yml +++ b/.github/workflows/ghcr-dev.yml @@ -27,7 +27,7 @@ jobs: steps: - name: Checkout branch - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v1 diff --git a/.github/workflows/ghcr.yml b/.github/workflows/ghcr.yml index 631de5f61..6d08c582a 100644 --- a/.github/workflows/ghcr.yml +++ b/.github/workflows/ghcr.yml @@ -25,7 +25,7 @@ jobs: steps: - name: Checkout branch - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v1 diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 185acf7cd..fb55e0864 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v3 diff --git a/debian/changelog b/debian/changelog index 987babe52..1029f9c80 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,32 @@ +metpx-sr3 (3.00.56) unstable; urgency=medium + + * fix related to #1275 so "attempts 0" works for outages. + * fix #1278 regression: ack failure loops forever. + * fix #1271 AM charset issues, made some plugins more flexible. + * fix #1266 complain when explicitly asked to start disabled config. + * fix #1261 http performance regression. + * fix #1270 invalid mirroring of relative symlinks when realpath is + symlinked. (requires next metpx-sr3c version 3.24.10rc2 ?3?) + * fix #1267 queueName before subtopic from this point forward. + * fix #1262 mtime referenced before assignment (download check fail.) + * fix #1271 renamer for AM socket generalized. + * fix #1255 remove malformed useless error message. + * incrementing github action version because of plaintive supplications + * demoted false-alarm error message about missing subscriptions.json to debug. + * NEW option *recursion* option ( #1237 ) (actually restored, was removed.) + * fix #1156 - NEW option *logDuplicates* + * NEW EXPERIMENTAL option: *logJson* + * fix #1223 Switch from global QoS to per-consumer. + * AM sender problems identified in satnet testing. + * working on #35 augmenting .qname with subscriptions.json state file. + * working on #1183, sr3 manages instances > 100 correctly. + * PR #1228 Unquote secret_access_key in s3CloudSender plugin + * PR #1230 ... reduced spurious "unknown options" messages from config. + * fix #1199 - directory / not interpreted properly. + * fix #1246 delete source when no options given + + -- SSC-5CD2310S60 Thu, 31 Oct 2024 12:25:14 -0400 + metpx-sr3 (3.00.55post1) unstable; urgency=medium * fix #1242 sftp creates directories when it should not. diff --git a/docs/source/Contribution/Release.rst b/docs/source/Contribution/Release.rst index aab8d4b61..e746a8450 100644 --- a/docs/source/Contribution/Release.rst +++ b/docs/source/Contribution/Release.rst @@ -19,7 +19,7 @@ To publish a pre-release one needs to: * git pull * git checkout development_py36 * git pull - * git merge development + * git merge --strategy-option=theirs development - validate that the correct version of C stack will be running when running flow tests. on each server:: @@ -84,7 +84,7 @@ To publish a pre-release one needs to: * git pull * git checkout pre_release_py36 * git pull - * git merge development_py36 + * git merge --strategy-option=theirs development_py36 * git push * git push origin o3.xx.yyrcz @@ -92,7 +92,7 @@ To publish a pre-release one needs to: * git tag -a v3.xx.yy.rcZ -m "pre-release v3.xx.yy.rcz" * git checkout pre_release * git pull - * git merge development + * git merge --strategy-option=theirs development * git push * git push origin v3.xx.yyrcz @@ -172,7 +172,7 @@ the stable release does not require any explicit testing. git pull git checkout stable git pull - git merge pre-release + git merge --strategy-option=theirs pre-release git push # there will be conflicts here for debian/changelog and sarracenia/_version.py @@ -191,7 +191,7 @@ the stable release does not require any explicit testing. git pull git checkout stable_py36 git pull - git merge pre_release_py36 + git merge --strategy-option=theirs pre_release_py36 git push # same editing required as above. git tag -a o3.xx.yy -m "o3.xx.yy" @@ -256,6 +256,26 @@ ubuntu 18 is not compatible with the current pypi.org. * attach redhat 9 rpm * attach windows exe ... see: `Building a Windows Installer`_ +Post-Release +------------ + +Sometimes there is *just one bug* that should really be addressed in a stable release. +Usually build with: + +* choose the branch (either pre-release or stable or similar for v2) +* one (or more) *git cherry-pick* emergency fixes from some other branch. +* one individual commit for sarracenia/_version.py to change add the post1 branch, +* one commit to debian/changelog to update that with information about post1 and the changes therein + +These changes to debian/changelog and sarracenia/_version that will cause conflicts with +future development. Afterwards, one needs to cherry-pick the debian/changelog back into +development to have a record of it. This will likely cause a conflict, but it's easy to +resolve. + +That's why a lot of the merges have *--strategy-option=theirs* because these postX releases +cause conflicts. + + Details ------- @@ -376,7 +396,7 @@ A tag should be created to identify the end of the cycle:: Once the tag is in the development branch, promote it to stable:: git checkout pre-release - git merge development + git merge --strategy-option=theirs development git push Once stable is updated on github, the docker images will be automatically upgraded, but @@ -527,6 +547,12 @@ Repositories: * metpx-sr3-old -- on demand build sr3 packages from *stable_py36* branch. * metpx-sarracenia-release -- on deman build v2 packages from *v2_stable* branch. +* Post-Release ( version.post1, post2 etc...) + usually the result of a cherry-pick for an emergency fix, and a commit or two about + debian/changelog and sarracenia/_version that will cause conflicts with future development. + * metpx-sr3 is the repository to target (using *stable* (or *v2_stable*) branch) + + for more discussion see `Which Version is stable `_ diff --git a/docs/source/Explanation/CommandLineGuide.rst b/docs/source/Explanation/CommandLineGuide.rst index 979eee30a..1115a9a1b 100644 --- a/docs/source/Explanation/CommandLineGuide.rst +++ b/docs/source/Explanation/CommandLineGuide.rst @@ -542,6 +542,7 @@ The second row of output gives detailed headings within each category: The configurations are listed on the left. For each configuraion, the state will be: +* cpuS: process is expensive in CPU usage (runStateThreshold_cpuSlow) * disa: disabled, configured not to run. * hung: processes appear hung, not writing anything to logs. * idle: all processes running, but no data or message transfers for too long (runStateThreshold_idle) @@ -552,6 +553,8 @@ will be: * run: all processes are running (and transferring, and not behind, and not slow... normal state.) * slow: transfering less than minimum bytes/second ( runStateThreshold_slow ) * stop: no processes are running. +* stby: Standby mode: all processes running, but messages are being stored in the local download_retry queue. +* wVip: process doesn't have the vip (only applies when the vip option is specified in the config) The next columns to the right give more information, detailing how many processes are Running, out of the number expected. For example, 3/3 means 3 processes or instances found of the 3 expected to be found. @@ -2055,6 +2058,11 @@ are errors. The back-off can accumulate to the point where retries could be sepa or two. Once the server begins responding normally again, the programs will return to normal processing speed. +If a failure will last for a while, one can stop the flow, configure *attempts 0* to fill the +retry queue without making vain attempts to download or send. At the end of the outage, return +*attempts* to normal, and the retry queue will gradually be drained when there +is room in the current data flow. + EXAMPLES ======== diff --git a/docs/source/Explanation/DeploymentConsiderations.rst b/docs/source/Explanation/DeploymentConsiderations.rst index 4b2503e15..924328c00 100644 --- a/docs/source/Explanation/DeploymentConsiderations.rst +++ b/docs/source/Explanation/DeploymentConsiderations.rst @@ -147,6 +147,47 @@ Security Considerations This section is meant to provide insight to those who need to perform a security review of the application prior to implementation. + +Architecture +~~~~~~~~~~~~ + +Sarracenia can be a component in many solutions, and can be deployed as a cloud component. +However, in it's rawest, simplest form, Sarracenia is not used like cloud services, where +one service is accessible from anywhere. It is more of a component or toolkit that is +expected to work with traditional network security zoning. Rather than have one service +for all, and requiring traffic/firewall exceptions and external security scanning to +intercept traffic, one deploys pump at each network zone demarcation. + +Data is delivered to the pump at the demarcation point, and then another pump +forwards data to the next zone. As part of demarcation processing, one can download a +file, run it through processing, such as malware scanning, and then only announce +it's availability to the following pump if it's ok. + +Each pump has independent authentication, and pump administrators +and users can define what traffic is made available to users on the other side of +the demarcation point. Pumps are chained together by copying from one to the next +to the next, where each one can have different access, purpose, and ownership. + +No formal federation or whole network identity is needed to pass data around +the network. Instead, each pump establishes authentication for the neigbouring pump. +If countries operated data pumps, one could imagine a situation like the following: +The Russians and Americans want to transfer data but do not want to be exposed to each +others' servers directly. The Russians could share with Kazakstan, The Kazakhs exchange +with Korea, and Korea exchanges with Canada. The Americans only need to have +a good relationship with the Canadians or Koreans. Each link in the chain +exposing themselves directly only to peers they have an explicit and +agreed relationship with. Each link in the chain can perform their own +scanning and processing before accepting the data. + +.. image:: Concepts/sr3_flow_example.svg + :scale: 100% + :align: center + +In this example, you can see that there are the ddsr pumps deployed on internal +operations zones, and they push or pull from pumps in other zones, such as another +operations zone, or a public access zone. Pumps are expected to +mediate traffic travelling between network zones. + Client ~~~~~~ diff --git a/docs/source/How2Guides/UPGRADING.rst b/docs/source/How2Guides/UPGRADING.rst index 8716dde14..b19e47aac 100644 --- a/docs/source/How2Guides/UPGRADING.rst +++ b/docs/source/How2Guides/UPGRADING.rst @@ -39,6 +39,25 @@ Installation Instructions git --- +3.0.56 +------ + +*CHANGE*: code refactor *sarracenia.credentials...* classes are now +*sarracenia.config.credentials* any code using credentials need to be +updated. + +*CHANGE*: queue settings stored in subscriptions.json state file, +instead of a .qname file, along with more information. Transition +is perhaps complex. This version will read and write both files, +so as to preserve ability to downgrade. later version will drop +support for qname files. + +*CHANGE*: in configuration files: *subtopic* must come after +the relevant queue naming options (queueName, queueShare) +in prior releases, the queue naming was a global setting. +In a future version, one will be able to subscribe to multiple +queues with a single subscriber. + 3.0.54 ------ diff --git a/docs/source/Reference/sr3_options.7.rst b/docs/source/Reference/sr3_options.7.rst index 247b8ea2d..e4f66f0f9 100644 --- a/docs/source/Reference/sr3_options.7.rst +++ b/docs/source/Reference/sr3_options.7.rst @@ -438,6 +438,30 @@ of **attempts** (or send, in a sender) will cause the notification message to be for later retry. When there are no notification messages ready to consume from the AMQP queue, the retry queue will be queried. +If: + +* It is known that transfers will fail for a long time, because of some sort of outage or maintenance + on your destination + +* There is a large volume of files you expect to queue up for transfer. so the queues + on the data pump will grow to a point where the pump admins will be uncomfortable. + Note that: All advice about message broker performance and availability tuning + asks users to minimize queueing on the brokers. + +* The local state directory ( ~/.cache ) is writable during the outage period. + +Then: + +One can set *attempts* to 0. This will cause messages queued for transfer to be written +to local download_retry queues (written in the local state directories) and offload +the broker. + +When *attempts* is 0, the *sr3 status* command will report that the flow is in the +*standby* state. The retry queue count will rise, and only messages (no data) will be transferred. +When the maintenance activity or failure has been addressed. + + + baseDir (default: /) ---------------------------- @@ -1089,6 +1113,19 @@ to control what is written to log files. The format is documented here: * https://docs.python.org/3/library/logging.html#logrecord-attributes +logJson (default: False) EXPERIMENTAL +-------------------------------------------- + +when *logJson on* is set, a second log file with the .json extension is made beside the +normal .log file. Each line of the .json logs is a .json structure, containing +a message written by the flow log. It does not contain unformatted output of +sub-shells, and plugins which may produce arbitrary output. + +The .log file will contain the output of sub-programs launched by the flow, +and the .json will only contain properly formatted log messages from the application itself +and properly written callbacks (that use normal python logging mechanisms.) + + logLevel ( default: info ) -------------------------- @@ -1536,6 +1573,9 @@ Instances started on any node with access to the same shared file will use the same queue. Some may want use the *queueName* option as a more explicit method of sharing work across multiple nodes. +This *subtopic* option should appear after the queueName setting in files +for the topic bindings to apply to the given queue. + queueShare ( default: ${USER}_${HOSTNAME}_${RAND8} ) ---------------------------------------------------------- @@ -1554,6 +1594,9 @@ will result in a random 8 digit number being appended to the queue name. All the instances within the configuration with access to the same state directory will use the queue name thus defined. +This *subtopic* option should appear after the queueShare setting in files +for the topic bindings to apply to the given queue. + randomize ---------------- @@ -1596,6 +1639,14 @@ given. This option also enforces traversing of symbolic links. This option is being used to investigate some use cases, and may disappear in future. +recursive (default: on) +------------------------------ + +when scanning a path (for poll, post, cpost, or watch) if you encounter a directory +do you include it's contents as well? To only scan the specified directory +and no sub-directories, specify *recursive off* + + rename ------------- @@ -1771,7 +1822,7 @@ appropriately. scheduled_interval,scheduled_hour,scheduled_minute,scheduled_time --------------------------------------------------- +----------------------------------------------------------------- When working with scheduled flows, such as polls, one can configure a duration (no units defaults to seconds, suffixes: m-minute, h-hour) at which to run a @@ -1798,7 +1849,7 @@ this will poll the data at 15:30, 16:30 and 18:59 every day. This option allows your time field then previous options. sendTo ---------------- +------------ Specification of a remote resource to deliver to in a sender. @@ -1952,8 +2003,8 @@ message flows. subtopic (default: #) ------------------------------------ -Within an exchange's postings, the subtopic setting narrows the product selection. -To give a correct value to the subtopic, +Within an exchange's postings, the subtopic setting narrows the product selection, +for objects to place in the currently selected queue. To give a correct value to the subtopic, one has the choice of filtering using **subtopic** with only AMQP's limited wildcarding and length limited to 255 encoded bytes, or the more powerful regular expression based **accept/reject** mechanisms described below. The difference being that the @@ -1962,18 +2013,18 @@ to the client at all. The **accept/reject** patterns apply to messages sent by broker to the subscriber. In other words, **accept/reject** are client side filters, whereas **subtopic** is server side filtering. -It is best practice to use server side filtering to reduce the number of notification messages sent -to the client to a small superset of what is relevant, and perform only a fine-tuning with the -client side mechanisms, saving bandwidth and processing for all. +Use server side filtering to reduce the number of notification messages sent +to the client to a small superset of what is relevant, and refine further with the +client side accept/reject, saving bandwidth and processing for all. -topicPrefix is primarily of interest during protocol version transitions, -where one wishes to specify a non-default protocol version of messages to -subscribe to. +Often, the user specifies one exchange, and several subtopic options. +**Subtopic** is what is normally used to indicate messages of interest +for a given queue. If needed, queueName, and/or queueShare need to be +earlier in the configuration file for the subtopic to apply to the selected queue. -Usually, the user specifies one exchange, and several subtopic options. -**Subtopic** is what is normally used to indicate messages of interest. To use the subtopic to filter the products, match the subtopic string with -the relative path of the product. +the relative path of the product (non Sarracenia pumps may have different +topic hierarchy conventions.) For example, consuming from DD, to give a correct value to subtopic, one can browse the our website **http://dd.weather.gc.ca** and write down all directories @@ -2063,9 +2114,8 @@ Sarracenia has a convention for how topics for products should be organized. The a topicPrefix, followed by subtopics derived from the *relPath* field of the message. Some networks may choose to use different topic conventions, external to sarracenia. - -timeout (default: 0) -------------------------------- +timeout (default: 300) +--------------------------------- The **timeout** option, sets the number of seconds to wait before aborting a connection or download transfer (applied per buffer during transfer). @@ -2079,10 +2129,10 @@ Examples: Canada/Pacific, Pacific/Nauru, Canada/Eastern, Europe/Paris Has no effect other than in when polling an FTP server. -tlsRigour (default: medium) +tlsRigour (default: normal) --------------------------- -tlsRigour can be set to: *lax, medium, or strict*, and gives a hint to the +tlsRigour can be set to: *lax, normal, or strict*, and gives a hint to the application of how to configure TLS connections. TLS, or Transport Level Security (used to be called Secure Socket Layer (SSL)) is the wrapping of normal TCP sockets in standard encryption. There are many aspects of TLS @@ -2113,6 +2163,14 @@ prepended to the sub-topic to form a complete topic hierarchy. This option applies to subscription bindings. Denotes the version of messages received in the sub-topics. (v03 refers to ``_) +topicPrefix is primarily of interest during protocol version transitions, +where one wishes to specify a non-default protocol version of messages to +subscribe to. + +For example, Sr3 expects v03 messages by default, but there are +plenty of sources that offer the old version (requiring a topicPrefix of *v02.post*) +to specify the old version of messages. + users (default: false) ----------------------------- diff --git a/docs/source/fr/CommentFaire/MiseANiveau.rst b/docs/source/fr/CommentFaire/MiseANiveau.rst index b3844a7a7..83e8e231e 100644 --- a/docs/source/fr/CommentFaire/MiseANiveau.rst +++ b/docs/source/fr/CommentFaire/MiseANiveau.rst @@ -38,6 +38,24 @@ Instructions d’installation git --- +3.0.56 +------ + +*CHANGEMENT* : refactorisation du code *sarracenia.credentials...* les classes sont désormais +*sarracenia.config.credentials* tout code utilisant des informations d'identification doit être +mis à jour. + +*CHANGEMENT* : paramètres de file d'attente stockés dans le fichier d'état subscriptions.json, +au lieu d'un fichier .qname, avec plus d'informations. La transition +est peut-être complexe. Cette version lira et écrira les deux fichiers, +afin de préserver la possibilité de rétrogradation. La version ultérieure abandonnera +la prise en charge des fichiers qname. + +*CHANGEMENT* : dans les fichiers de configuration : *subtopic* doit venir après +la spécification de nom de file d'attente pertinentes (queueName, queueShare.) +Dans les versions précédentes, la dénomination de la file d'attente était un paramètre global. +Dans une future version, on pourra s'abonner à plusieurs files d'attente avec un seul abonné. + 3.0.54 ------ diff --git a/docs/source/fr/Explication/ConsiderationsDeployments.rst b/docs/source/fr/Explication/ConsiderationsDeployments.rst index 4da5ad37b..56a6fd80f 100644 --- a/docs/source/fr/Explication/ConsiderationsDeployments.rst +++ b/docs/source/fr/Explication/ConsiderationsDeployments.rst @@ -167,6 +167,46 @@ Considérations de sécurité Cette section a pour but de donner un aperçu à ceux qui ont besoin d'effectuer un examen de sécurité. de l'application avant la mise en œuvre. +Architecture +~~~~~~~~~~~~ + +Sarracenia peut être un composant de nombreuses solutions et peut être déployé en tant que composant cloud. +Cependant, dans sa forme la plus brute et la plus simple, Sarracenia n'est pas utilisé comme les services cloud, où +un service est accessible de n'importe où. Il s'agit plutôt d'un composant ou d'une boîte à outils qui est +censé fonctionner avec le zonage de sécurité réseau traditionnel. Plutôt que d'avoir un service +pour tous et d'exiger des exceptions de trafic/pare-feu et une analyse de sécurité externe pour +intercepter le trafic, on déploie une pompe à chaque démarcation de zone réseau. + +Les données sont livrées à la pompe au point de démarcation, puis une autre pompe +transmet les données à la zone suivante. Dans le cadre du traitement de démarcation, on +peut télécharger un fichier, l'exécuter via un traitement, comme une analyse des logiciels +malveillants, puis annoncer sa disponibilité à la pompe suivante uniquement si elle est correcte. + +Chaque pompe dispose d'une authentification indépendante, et les administrateurs de pompe +et les utilisateurs peuvent définir le trafic mis à disposition des utilisateurs de l'autre côté +du point de démarcation. Les pompes sont enchaînées en copiant de l'une à l'autre +à l'autre, où chacune peut avoir un accès, un but et une propriété différents. + +Aucune fédération formelle ou identité de réseau complet n'est nécessaire pour transmettre des données +sur le réseau. Au lieu de cela, chaque pompe établit une authentification pour la pompe voisine. +Si les pays exploitaient des pompes de données, on pourrait imaginer une situation comme celle-ci : +Les Russes et les Américains veulent transférer des données mais ne veulent pas être exposés directement aux serveurs +des autres. Les Russes pourraient partager avec le Kazakhstan, les Kazakhs échanger +avec la Corée et la Corée échanger avec le Canada. Les Américains n'ont besoin que d'avoir +une bonne relation avec les Canadiens ou les Coréens. Chaque maillon de la chaîne +s'expose directement uniquement aux pairs avec lesquels il a une relation explicite et +convenue. Chaque maillon de la chaîne peut effectuer sa propre analyse et son propre traitement avant d'accepter les données. + +.. image:: Concepts/sr3_exemple_de_flux.svg + :scale: 100% + :align: center + +Dans cet exemple, vous pouvez voir que les pompes DDR sont déployées sur des zones d'opérations +internes et qu'elles poussent ou tirent depuis des pompes situées dans d'autres zones, telles qu'une autre +zone d'opérations ou une zone d'accès public. Les pompes sont censées +servir de médiateur au trafic circulant entre les zones du réseau. + + Client ~~~~~~ diff --git a/docs/source/fr/Explication/GuideLigneDeCommande.rst b/docs/source/fr/Explication/GuideLigneDeCommande.rst index 92820b5da..1f8eb36c0 100644 --- a/docs/source/fr/Explication/GuideLigneDeCommande.rst +++ b/docs/source/fr/Explication/GuideLigneDeCommande.rst @@ -552,8 +552,8 @@ sera : * rtry : tous les processus en cours d'exécution, mais un grand nombre de transferts échouent, causant d'autres tentatives (runStateThreshold_retry ) * run : tous les processus sont en cours d'exécution (et en transfert, et pas en retard, et pas lents... état normal.) * slow : transfert de moins que le minimum d'octets/seconde ( runStateThreshold_slow ) +* stby : Mode veille (Standby): tous les processus sont en cours d'exécution, mais les messages sont stockés dans la file d'attente download_retry locale. * stop : aucun processus n'est en cours d'exécution. - Les colonnes à droite donnent plus d’informations, détaillant le nombre de processus en cours d’exécution à partir du nombre attendu. Par exemple, 3/3 signifie 3 processus ou instances sont trouvés à partir des 3 attendus. Expected liste combien de processus devraient être exécutés à partir d'une configuration même si ils sont arrêtés. @@ -694,8 +694,8 @@ utilisera le nom de file d'attente ainsi défini. -AMQP QUEUE BINDINGS -------------------- +Liasons AMQP QUEUE +------------------ Une fois qu'on a une fil d'attente, elle doit être liée à un échange (exchange.) Les utilisateurs ont presque toujours besoin de définir ces options. Une @@ -784,8 +784,8 @@ On peut désactiver la liaison de fil d’attente comme cela:: -Client-side Filtering ---------------------- +Filtrage côté client +-------------------- Nous avons sélectionné nos messages via **exchange**, **subtopic** et **subtopic**. Le courtier met les messages correspondants dans notre fil d'attente (*queue*). @@ -794,8 +794,8 @@ Le composant télécharge ces messages. Les clients Sarracenia implémentent un filtrage plus flexible côté client en utilisant les expressions régulières. -Brief Introduction to Regular Expressions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Bref introduction aux expressions régulières +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Les expressions régulières sont un moyen très puissant d'exprimer les correspondances de motifs. Ils offrent une flexibilité extrême, mais dans ces exemples, nous utiliserons seulement un @@ -2043,6 +2043,12 @@ des erreurs. Le back-off peut s’accumuler au point où les nouvelles tentative ou deux. Une fois que le serveur recommence à répondre normalement, les programmes reviendront à la vitesse normale de traitement. +Si une panne dure un certain temps, on peut arrêter le flux, configurer *attempts 0* pour remplir la +file d'attente de nouvelles tentatives sans faire de vaines tentatives de téléchargement ou d'envoi. À la fin de la panne, +remettez *attempts* à la normale et la file d'attente de nouvelles tentatives sera progressivement vidée lorsqu'il y aura +de la place dans le flux de données actuel. + + EXEMPLES ======== diff --git a/docs/source/fr/Reference/sr3_options.7.rst b/docs/source/fr/Reference/sr3_options.7.rst index 04766f035..2232db7ff 100644 --- a/docs/source/fr/Reference/sr3_options.7.rst +++ b/docs/source/fr/Reference/sr3_options.7.rst @@ -444,6 +444,28 @@ des **attempts** (ou d’envoi, pour un sender) va entrainer l’ajout du messag pour une nouvelle tentative plus tard. Lorsque aucun message d'annonce n’est prêt à être consommé dans la fil d’attente AMQP, les requêtes se feront avec la fil d’attente de "retry". +Si: + +* on sait que les transferts échoueront pendant une longue période, en raison d'une panne ou d'une maintenance +à la destination. + +* Vous vous attendez à ce qu'un grand volume de fichiers soit mis en file d'attente pour transfert. Les files d'attente +sur la pompe de données augmenteront donc jusqu'à un point où les administrateurs de la pompe ne seront plus à l'aise. +Notez que : Tous les conseils sur le réglage des performances et de la disponibilité de courtier de messages +demandent aux utilisateurs de minimiser la population des files d'attente sur les courtiers. + +* Le répertoire d'état local ( ~/.cache ) est accessible en écriture pendant la période de la panne. + +Alors : + +On peut définir *attempts* sur 0. Cela entraînera l'écriture des messages mis en file d'attente pour le transfert +dans les files d'attente de *download_retry* locales (écrites dans les répertoires d'état locaux) et déchargera +le courtier. + +Lorsque *attempts* est égal à 0, la commande *sr3 status* signalera que le flux est dans l'état +*standby*. Le nombre de files d'attente de nouvelles tentatives augmentera et seuls les messages (pas de données) seront transférés. +Lorsque l'activité de maintenance ou la panne a été résolue. + baseDir (défaut: /) ---------------------------- @@ -1083,6 +1105,17 @@ de python. Le format est documenté ici: * https://docs.python.org/fr/3/library/logging.html#logrecord-attributes +logJson (par défaut : faux) EXPÉRIMENTAL +------------------------------------------------ + +lorsque *logJson on* est défini, un deuxième fichier journal avec l'extension .json est créé à côté du +fichier .log normal. Chaque ligne des journaux .json est une structure .json, contenant +un message écrit par le journal de flux. Il ne contient pas de sortie non formatée +des sous-shell et des plugins qui peuvent produire une sortie arbitraire. + +Le fichier .log contiendra la sortie des sous-programmes lancés par le flux, +et le .json ne contiendra que les messages de journal correctement formatés provenant de l'application elle-même +et des rappels correctement écrits (qui utilisent des mécanismes de journalisation python normaux.) logLevel ( défaut: info ) ------------------------- @@ -1528,6 +1561,8 @@ Les instances démarrées sur n’importe quel nœud ayant accès au même fichi même fil d’attente. Certains voudront peut-être utiliser l’option *queueName* comme méthode plus explicite de partager le travail sur plusieurs nœuds. Il est pourtant recommandé d´utiliser queueShare a cette fin. +l´option *subtopic* devrait apparaître après le paramètre queueName dans les fichiers +pour que les liaisons de sujet s'appliquent à la file d'attente spécifié. queueShare (default: ${USER}_${HOSTNAME}_${RAND8} ) @@ -1547,6 +1582,8 @@ Ce entraînera l'ajout d'un nombre aléatoire à 8 chiffres au nom de la file d' Toutes les instances de la configuration ayant accès au même répertoire d'état utilisera le nom de file d'attente ainsi défini. +l´option *subtopic* devrait apparaître après le paramètre queueShare dans les fichiers +pour que les liaisons de sujet s'appliquent à la file d'attente spécifié. randomize ---------------- @@ -1587,8 +1624,12 @@ donné. Cette option impose également la traversée de liens symboliques. Cette option est utilisée pour étudier certains cas d'utilisation et pourrait disparaître à l'avenir. + recursive (par défaut : activé) +-------------------------------------- - +Lors de l'analyse d'un chemin (pour un *poll*, une *post*, un *cpost* ou un *watch*), si vous +rencontrez un répertoire, incluez-vous également son contenu ? Pour analyser uniquement le répertoire spécifié +et aucun sous-répertoire, spécifiez *recursive off* rename --------------- @@ -1770,8 +1811,8 @@ sanity_log_dead (défaut: 1.5*housekeeping) L’option **sanity_log_dead** définit la durée à prendre en compte avant de redémarrer un composant. -scheduled_interval,scheduled_hour,scheduled_minute --------------------------------------------------- +scheduled_interval,scheduled_hour,scheduled_minute,scheduled_time +----------------------------------------------------------------- Lorsque vous travaillez avec des flux cédulés, tels que des sondages, vous pouvez configurer une durée (unité: seconde par défaut, suffixes : m-minute, h-heure) à laquelle exécuter un @@ -1950,7 +1991,8 @@ origine. À utiliser uniquement avec des flux de données fiables et organisés subtopic (défaut: #) ----------------------------------- -Dans les publications d’un échange, le paramètre de subtopic restreint la sélection du produit. +Dans les publications d’un échange, le paramètre de subtopic sert à préciser des messages +à placer dans la file d'attente actuellement sélectionnée. Pour donner la bonne valeur au subtopic, on a le choix de filtrer en utilisant **subtopic** seulement avec le wildcarding limité d’AMQP et une longueur limitée à 255 octets encodés, ou de manière plus puissante, les expressions régulière basés sur les mécanismes **accept/reject** décrits ci-dessous. La différence est que le @@ -1963,12 +2005,17 @@ Il est recommandé d’utiliser le filtrage côté serveur pour réduire le nomb au client et envoyer seulement ce qui est pertinent, et seulement régler les mécanismes côté client, économisant du bandwidth et du traitement pour tous. -topicPrefix est principalement utilisé lors des transitions de version de protocole, -où l’on souhaite spécifier une version de protocole non-commune des messages d'annonce auquel s’abonner. - -Normalement, l’utilisateur spécifie un échange et plusieurs options de subtopic. **subtopic** est ce qui est normalement utilisé pour indiquer les messages d'annonce d'intérêt. Pour utiliser **subtopic** pour filtrer les produits, + +Souvent, l'utilisateur spécifie un échange et plusieurs options de sous-thèmes. +Le **subtopic** est ce qui est normalement utilisé pour indiquer les messages d'intérêt +pour une file d'attente donnée. Si nécessaire, **queueName** et/ou **queueShare** +doivent apparaître plus tôt dans le fichier de configuration pour que le sous-thème +s'applique à la file d'attente sélectionnée. + il faut que la chaîne de caractère subtopic corresponde au chemin relatif du produit. +(les pompes non Sarracenia peuvent avoir d´autres conventions de hiérarchie des sujets.) + Par exemple, en consommant à partir de DD, pour donner la bonne valeur au subtopic, il est possible de parcourir le site Web **http://dd.weather.gc.ca** et noter tous les répertoires @@ -2048,8 +2095,8 @@ source et la destination sont comparés. Lorsqu’il est défini dans un composant de publication, les en-têtes *atime* et *mtime* des messages d'annonce sont éliminés. -timeout (défaut: 0) --------------------------------- +timeout (défaut: 300) +---------------------------------- L’option **timeout** définit le nombre de secondes à attendre avant d’interrompre un transfert de connexion ou de téléchargement (appliqué pendant le transfert). @@ -2063,10 +2110,10 @@ exemples: Canada/Pacific, Pacific/Nauru, Europe/Paris Seulement actif dans le contexte de sondage de serveur FTP. -tlsRigour (défaut: medium) +tlsRigour (défaut: normal) -------------------------- -*tlsRigour* peut être réglé a : *lax, medium ou strict*, et donne un indice à l'application par rapport à la +*tlsRigour* peut être réglé a : *lax, normal ou strict*, et donne un indice à l'application par rapport à la configuration des connexions TLS. TLS, ou Transport Layer Security (autrefois appelée Secure Socket Layer (SSL)) est l’encapsulation de sockets TCP normales en cryptage standard. Il existe de nombreux aspects de négociations TLS, vérification du nom d’hôte, vérification des certificats, validation, choix de @@ -2094,6 +2141,12 @@ rajouté au subtopic pour former une hiérarchie complète de thèmes (topics). Cette option s’applique aux liaisons d’abonnement. Indique la version des messages d'annonce reçus dans les subtopics. (V03 fait référence à ``_) +topicPrefix sert principalement lors des transitions de format de messages. +Le topicPrefix identifie dans quel version de format les messages sous le thème +sont créés. Sr3 s´attend a des messages v03 par défault, mais il y plein +de sources qui offrent l´ancienne version (nécessitant une topicPrefix de *v02.post*) +pour spécifier l´ancienned version de messages. + topicCopy (défaut: False) ------------------------- diff --git a/sarracenia/__init__.py b/sarracenia/__init__.py index a739e2ecd..f0c8b64da 100755 --- a/sarracenia/__init__.py +++ b/sarracenia/__init__.py @@ -417,13 +417,17 @@ def durationToSeconds(str_value, default=None) -> float: 206: "Partial Content: received and inserted.", 304: "Not modified (Checksum validated, unchanged, so no download resulted.)", 307: "Insertion deferred (writing to temporary part file for the moment.)", - 410: "Gone: server data different from notification message", + 404: "Not Found: no pattern match", + 406: "Not Acceptable: file older than fileAgeMax", + 410: "Gone: file too old", 417: "Expectation Failed: invalid notification message (corrupt headers)", 422: "Unprocessable Content: could not determine path to transfer to", + 425: "Too Early: file younger than fileAgeMin", 499: "Failure: Not Copied. SFTP/FTP/HTTP download problem", #FIXME : should not have 503 error code 3 times in a row # 503: "Service unavailable. delete (File removal not currently supported.)", 503: "Unable to process: Service unavailable", + 504: "Gateway Timeout: message too old" # 503: "Unsupported transport protocol specified in posting." } @@ -563,7 +567,8 @@ def deriveSource(msg,o): pass elif source: msg['source'] = source - msg['_deleteOnPost'] |= set(['source']) + elif 'source' in msg: + del msg['source'] def deriveTopics(msg,o,topic,separator='.'): """ @@ -1014,10 +1019,11 @@ def getContent(msg,options=None): # inlined/embedded case. if 'content' in msg: + logger.info("Getting msg from inline'd content") if msg['content']['encoding'] == 'base64': return b64decode(msg['content']['value']) else: - return msg['content']['value'].encode('utf-8') + return msg['content']['value'].encode('utf-8') if not hasattr(options,'inputCharset') else msg['content']['value'].encode(options.inputCharset) path='' if msg['baseUrl'].startswith('file:'): diff --git a/sarracenia/_version.py b/sarracenia/_version.py index 42c895b90..4c319678d 100755 --- a/sarracenia/_version.py +++ b/sarracenia/_version.py @@ -1 +1 @@ -__version__ = "3.00.55post1" +__version__ = "3.00.56" diff --git a/sarracenia/bulletin.py b/sarracenia/bulletin.py index 6e2b3b0a0..fc461f5a1 100644 --- a/sarracenia/bulletin.py +++ b/sarracenia/bulletin.py @@ -23,7 +23,9 @@ class Bulletin: from sarracenia.bulletin import Bulletin """ - def __init__(self): + def __init__(self,options): + super().__init__() + self.o = options self.seq = 0 self.binary = 0 @@ -125,7 +127,7 @@ def getData(self, msg, path): try: self.binary = 0 - if msg['content']: + if 'content' in msg: data = msg['content']['value'] # Change from b64. We want to get the header from the raw binary data. Not retrievable in b64 format @@ -339,4 +341,4 @@ def getTime(self, data): ddHHMM = time.strftime('%d%H%M', timeStruct) return ddHHMM except Exception as e: - return None \ No newline at end of file + return None diff --git a/sarracenia/config.py b/sarracenia/config/__init__.py similarity index 92% rename from sarracenia/config.py rename to sarracenia/config/__init__.py index addf4e77b..d14bdee20 100755 --- a/sarracenia/config.py +++ b/sarracenia/config/__init__.py @@ -48,7 +48,8 @@ def __call__(self, parser, namespace, values, option_string=None): import sarracenia from sarracenia import durationToSeconds, site_config_dir, user_config_dir, user_cache_dir from sarracenia.featuredetection import features -import sarracenia.credentials +import sarracenia.config.credentials +from sarracenia.config.subscription import Subscription,Subscriptions import sarracenia.flow import sarracenia.flowcb @@ -95,7 +96,9 @@ def __repr__(self) -> str: 'inline': False, 'inlineOnly': False, 'identity_method': 'sha512', + 'logDuplicates': False, 'logFormat': '%(asctime)s [%(levelname)s] %(name)s %(funcName)s %(message)s', + 'logJson': False, 'logMetrics': False, 'logStdout': False, 'metrics_writeInterval': 5, @@ -139,14 +142,16 @@ def __repr__(self) -> str: # all the boolean settings. -flag_options = [ 'acceptSizeWrong', 'acceptUnmatched', 'amqp_consumer', 'baseUrl_relPath', 'debug', \ - 'delete', 'discard', 'download', 'dry_run', 'durable', 'exchangeDeclare', 'exchangeSplit', 'logReject', 'realpathFilter', \ - 'follow_symlinks', 'force_polling', 'inline', 'inlineOnly', 'inplace', 'logMetrics', 'logStdout', 'logReject', 'restore', \ - 'messageDebugDump', 'mirror', 'timeCopy', 'notify_only', 'overwrite', 'post_on_start', \ - 'permCopy', 'persistent', 'queueBind', 'queueDeclare', 'randomize', 'recursive', 'realpathPost', \ + +flag_options = [ 'acceptSizeWrong', 'acceptUnmatched', 'amqp_consumer', 'baseUrl_relPath', 'debug', + 'delete', 'discard', 'download', 'dry_run', 'durable', 'exchangeDeclare', 'exchangeSplit', + 'follow_symlinks', 'force_polling', 'inline', 'inlineOnly', 'inplace', 'logJson', + 'logMetrics', 'logReject', 'logStdout', 'logReject', 'restore', 'messageDebugDump', + 'mirror', 'notify_only', 'overwrite', 'post_on_start', 'permCopy', 'persistent', + 'queueBind', 'queueDeclare', 'randomize', 'recursive', 'realpathFilter', 'realpathPost', 'reconnect', 'report', 'reset', 'retry_refilter', 'retryEmptyBeforeExit', 'save', - 'sundew_compat_regex_first_match_is_zero', 'sourceFromExchange', 'sourceFromMessage', 'topicCopy', - 'statehost', 'users', 'v2compatRenameDoublePost', 'wololo' + 'sundew_compat_regex_first_match_is_zero', 'sourceFromExchange', 'sourceFromMessage', + 'statehost', 'timeCopy', 'topicCopy', 'users', 'v2compatRenameDoublePost', 'wololo' ] float_options = [ 'messageRateMax', 'messageRateMin' ] @@ -163,7 +168,7 @@ def __repr__(self) -> str: set_options = [ 'logEvents', 'fileEvents' ] set_choices = { - 'logEvents' : set(sarracenia.flowcb.entry_points + [ 'reject' ]), + 'logEvents' : set(sarracenia.flowcb.entry_points + [ 'reject', 'nodupe' ]), 'fileEvents' : set( [ 'create', 'delete', 'link', 'mkdir', 'modify', 'rmdir' ] ) } # FIXME: doesn't work... wonder why? @@ -174,8 +179,9 @@ def __repr__(self) -> str: size_options = ['accelThreshold', 'blockSize', 'bufSize', 'byteRateMax', 'fileSizeMax', 'inlineByteMax'] str_options = [ + 'accelCpCommand', 'accelWgetCommand', 'accelScpCommand', 'action', 'admin', 'baseDir', 'broker', 'cluster', 'directory', 'exchange', - 'exchange_suffix', 'feeder', 'filename', 'flatten', 'flowMain', 'header', + 'exchangeSuffix', 'feeder', 'filename', 'flatten', 'flowMain', 'header', 'hostname', 'identity', 'inlineEncoding', 'logFormat', 'logLevel', 'pollUrl', 'post_baseUrl', 'post_baseDir', 'post_broker', 'post_exchange', 'post_exchangeSuffix', 'post_format', 'post_topic', 'queueName', 'queueShare', 'sendTo', 'rename', @@ -222,8 +228,8 @@ def __repr__(self) -> str: 'accel_scp': ['continue'], 'accel_cp': ['continue'], 'msg_total_save': ['continue'], + 'file_total_save' : [ 'continue' ], 'post_total_save': ['continue'], - 'post_total_interval': ['continue'] }, 'destfn_script': { 'manual_conversion_required' : [ 'continue' ] }, 'do_get': { 'manual_conversion_required' : [ 'continue' ] }, @@ -235,6 +241,11 @@ def __repr__(self) -> str: 'file_email' : [ 'callback', 'send.email' ], }, 'do_task': { 'manual_conversion_required' : [ 'continue' ] }, + 'file_total_interval' : [ 'continue' ], + 'post_total_interval': [ 'continue' ], + 'ls_file_index': [ 'continue' ], + 'post_log_format': ['continue'], + 'msg_total_interval' : [ 'continue' ], 'no_download': [ 'download', 'False' ], 'notify_only': [ 'download', 'False' ], 'do_data': { 'manual_conversion_required' : [ 'continue' ] }, @@ -392,7 +403,7 @@ def parse_float(cstr): return 0.0 def get_package_lib_dir(): - return os.path.dirname(inspect.getfile(Config)) + return os.path.dirname(inspect.getfile(sarracenia)) def get_site_config_dir(): @@ -636,7 +647,7 @@ class Config: it can be instantiated with one of: - * one_config(component, config, action, isPost=False) -- read the options for + * one_config(component, config, action, isPost=False, hostdir=None) -- read the options for a given component an configuration, (all in one call.) On the other hand, a configu can be built up from the following constructors: @@ -648,7 +659,7 @@ class Config: cfg = no_file_config() - cfg.broker = sarracenia.credentials.Credential('amqps://anonymous:anonymous@hpfx.collab.science.gc.ca') + cfg.broker = sarracenia.config.credentials.Credential('amqps://anonymous:anonymous@hpfx.collab.science.gc.ca') cfg.topicPrefix = [ 'v02', 'post'] cfg.component = 'subscribe' cfg.config = 'flow_demo' @@ -693,9 +704,13 @@ class Config: # Correct name on the right, old name on the left. synonyms = { 'a': 'action', + 'accel_cp_command': 'accelCpCommand', + 'accel_scp_command': 'accelScpCommand', + 'accel_wget_command': 'accelWgetCommand', 'accel_cp_threshold': 'accelThreshold', 'accel_scp_threshold': 'accelThreshold', 'accel_wget_threshold': 'accelThreshold', + 'accel_threshold': 'accelThreshold', 'accept_unmatch': 'acceptUnmatched', 'accept_unmatched': 'acceptUnmatched', 'at': 'attempts', @@ -727,6 +742,7 @@ class Config: 'destination_timezone': 'timezone', 'document_root': 'documentRoot', 'download-and-discard': 'discard', + 'download_cp_command': 'accelCpCommand', 'e' : 'fileEvents', 'events' : 'fileEvents', 'ex': 'exchange', @@ -812,19 +828,21 @@ def __init__(self, parent=None) -> 'Config': instantiate an empty Configuration """ self.bindings = [] + self.subscriptions = Subscriptions() self.__admin = None self.__broker = None self.__post_broker = None + self.__queue_file_read = False if Config.credentials is None: - Config.credentials = sarracenia.credentials.CredentialDB() + Config.credentials = sarracenia.config.credentials.CredentialDB() Config.credentials.read(get_user_config_dir() + os.sep + "credentials.conf") self.directory = None self.env = copy.deepcopy(os.environ) - egdir = os.path.dirname(inspect.getfile(sarracenia.config.Config)) + os.sep + 'examples' + egdir = os.path.dirname(inspect.getfile(sarracenia)) + os.sep + 'examples' self.config_search_path = [ "." , get_user_config_dir(), egdir, egdir + os.sep + 'flow' ] @@ -923,7 +941,7 @@ def __deepcopy__(self, memo) -> 'Configuration': setattr(result, k, copy.deepcopy(v, memo)) return result - def _validate_urlstr(self, urlstr) -> tuple : + def hohoo_validate_urlstr(self, urlstr) -> tuple : """ returns a tuple ( bool, expanded_url ) the bool is whether the expansion worked, and the expanded_url is one with @@ -935,7 +953,7 @@ def _validate_urlstr(self, urlstr) -> tuple : if cred_details is None: logging.critical("bad credential %s" % urlstr) # Callers expect that a Credential object will be returned - cred_details = sarracenia.credentials.Credential() + cred_details = sarracenia.config.credentials.Credential() cred_details.url = urllib.parse.urlparse(urlstr) return False, cred_details return True, cred_details @@ -964,7 +982,7 @@ def admin(self): @admin.setter def admin(self, v): if type(v) is str: - ok, cred_details = self._validate_urlstr(v) + ok, cred_details = self.credentials.validate_urlstr(v) if ok: self.__admin = cred_details else: @@ -977,7 +995,7 @@ def broker(self): @broker.setter def broker(self, v): if type(v) is str: - ok, cred_details = self._validate_urlstr(v) + ok, cred_details = self.credentials.validate_urlstr(v) if ok: self.__broker = cred_details else: @@ -990,7 +1008,7 @@ def post_broker(self): @post_broker.setter def post_broker(self, v): if type(v) is str: - ok, cred_details = self._validate_urlstr(v) + ok, cred_details = self.credentials.validate_urlstr(v) if ok: self.__post_broker = cred_details else: @@ -1229,6 +1247,12 @@ def dump(self): while i < len(c['masks']): d['masks'].append( self.mask_ppstr(c['masks'][i]) ) i+=1 + elif k in ['broker', 'post_broker' ]: + d[k]=str(c[k]) + elif k in ['subscriptions' ]: + d['subscriptions'] = c['subscriptions'] + for s in d['subscriptions']: + s['broker'] = str(s['broker']) else: d[k] = copy.deepcopy(c[k]) @@ -1236,7 +1260,7 @@ def dump(self): del d[omit] for k in d: - if type(d[k]) is sarracenia.credentials.Credential : + if type(d[k]) is sarracenia.config.credentials.Credential : d[k] = str(d[k]) pprint.pprint( d, width=term.columns, compact=True ) @@ -1354,14 +1378,14 @@ def _parse_binding(self, subtopic_string): also should sqwawk about error if no exchange or topicPrefix defined. also None to reset to empty, not done. """ - if hasattr(self, 'broker') and self.broker is not None and self.broker.url is not None: - self._resolve_exchange() + if not hasattr(self, 'broker') or self.broker is None or self.broker.url is None: + logger.error( f"{','.join(self.files)}:{self.lineno} broker needed before subtopic" ) + return - if type(subtopic_string) is str: - if not hasattr(self, 'broker') or self.broker is None or self.broker.url is None: - logger.error( f"{','.join(self.files)}:{self.lineno} broker needed before subtopic" ) - return + self._resolve_exchange() + self.queueName = self._resolveQueueName(self.component,self.config) + if type(subtopic_string) is str: if self.broker.url.scheme == 'amq' : subtopic = subtopic_string.split('.') else: @@ -1369,6 +1393,7 @@ def _parse_binding(self, subtopic_string): if hasattr(self, 'exchange') and hasattr(self, 'topicPrefix'): self.bindings.append((self.exchange, self.topicPrefix, subtopic)) + self.subscriptions.add(Subscription(self, self.queueName, subtopic)) def _parse_v2plugin(self, entryPoint, value): """ @@ -1489,7 +1514,7 @@ def parse_file(self, cfg, component=None): else: cfname = cfg - logger.debug( f'looking for {cfg} (in {os.getcwd()}') + #logger.debug( f'looking for {cfg} (in {os.getcwd()}') cfg=os.path.expanduser(cfg) @@ -1505,12 +1530,14 @@ def parse_file(self, cfg, component=None): if not cfgfilepath: logger.error( f'failed to find {cfg}' ) return - logger.debug( f'found {cfgfilepath}') + #logger.debug( f'found {cfgfilepath}') lineno=0 saved_lineno=0 self.files.append(cfgfilepath) + self.subtopic_seen=False + for l in open(cfgfilepath, "r").readlines(): lineno+=1 if self.lineno > 0: @@ -1544,6 +1571,7 @@ def parse_line(self, component, cfg, cfname, lineno, l ): if (k in convert_to_v3): self.log_flowcb_needed |= '_log' in k + if (len(line) > 1): v = line[1].replace('.py', '', 1) if (v in convert_to_v3[k]): @@ -1554,7 +1582,15 @@ def parse_line(self, component, cfg, cfname, lineno, l ): else: logger.debug( f'{cfname}:{lineno} obsolete v2:\"{l}\" converted to sr3:\"{" ".join(line)}\"' ) else: + if convert_to_v3[k] == 'continue': + if k in self.undeclared: + self.undeclared.remove(k) + line = convert_to_v3[k] + if 'continue' in line: + if k in self.unknown: + self.unknown.remove(k) + return k=line[0] v=line[1] @@ -1577,6 +1613,9 @@ def parse_line(self, component, cfg, cfname, lineno, l ): setattr(self, k, isTrue(v)) if k in ['logReject'] and self.logReject: self.logEvents = self.logEvents | set(['reject']) + + if k in ['logDuplicates'] and self.logDuplicates: + self.logEvents = self.logEvents | set(['nodupe']) return if len(line) < 2: @@ -1609,6 +1648,7 @@ def parse_line(self, component, cfg, cfname, lineno, l ): logger.error( f"{','.join(self.files)}:{self.lineno} file {v} failed to parse: {ex}" ) logger.debug('Exception details: ', exc_info=True) elif k in ['subtopic']: + self.subtopic_seen=True self._parse_binding(v) elif k in ['topicPrefix']: if '/' in v : @@ -1716,6 +1756,12 @@ def parse_line(self, component, cfg, cfname, lineno, l ): logger.error( f'{",".join(self.files)}:{lineno} invalid entry {i} in {k}. Must be one of: {set_choices[k]}' ) elif k in str_options: + # queueName warning... is for something that is not an error... + # probably need to remove this warning later... because people could use default queue with subtopic and + # specify a second queue with different bindings... so this warning could be complaining about something + # that is correct. but in every current case, the warning will be helpful. + if ( k == 'queueName' ) and self.subtopic_seen: + logger.warning( f"{','.join(self.files)}:{lineno} queueName usually should be before subtopic in configs: subtopic to default queue" ) if ( k == 'directory' ) and not self.download: logger.info( f"{','.join(self.files)}:{lineno} if download is false, directory has no effect" ) @@ -1746,6 +1792,40 @@ def parse_line(self, component, cfg, cfname, lineno, l ): setattr(self, k, v) self.undeclared.append( (cfname, lineno, k) ) + def _getSubscriptionsFileName(self,component,cfg): + + sfile = sarracenia.user_cache_dir( + Config.appdir_stuff['appname'], + Config.appdir_stuff['appauthor']) + + if self.statehost: + sfile += os.sep + self.hostdir + + sfile += os.sep + component + os.sep + cfg + sfile += os.sep + "subscriptions.json" + return sfile + + def _writeQueueFile(self): + + # first make sure directory exists. + if not os.path.isdir(os.path.dirname(self.queue_filename)): + pathlib.Path(os.path.dirname(self.queue_filename)).mkdir(parents=True, exist_ok=True) + + if not os.path.isfile(self.queue_filename) and (self.queueName is not None): + tmpQfile=self.queue_filename+'.tmp' + if not os.path.isfile(tmpQfile): + f = open(tmpQfile, 'w') + f.write(self.queueName) + f.close() + os.rename( tmpQfile, self.queue_filename ) + else: + logger.info( f'Queue name {self.queueName} being persisted to {self.queue_filename} by some other process, so ignoring it.' ) + return + + logger.debug( f'queue name {self.queueName} persisted to {self.queue_filename}' ) + + + def _resolveQueueName(self,component,cfg): queuefile = sarracenia.user_cache_dir( @@ -1765,6 +1845,16 @@ def _resolveQueueName(self,component,cfg): self.queue_filename = queuefile + if not hasattr(self, 'old_subscriptions'): + self.subscriptionsPath=self._getSubscriptionsFileName(self.component,self.config) + self.old_subscriptions=self.subscriptions.read(self, self.subscriptionsPath) + + if hasattr(self, 'old_subscriptions') and self.old_subscriptions: + for s in self.old_subscriptions: + if self.broker == s['broker']: + #logger.info( f" {s['queue']['name']=} ") + return s['queue']['name'] + #while (not hasattr(self, 'queueName')) or (self.queueName is None): """ @@ -1781,68 +1871,40 @@ def _resolveQueueName(self,component,cfg): """ + queueName=self.queueName if hasattr(self,'no') and self.no > 1: - # worker instances need give lead instance time to write the queuefile - time.sleep(randint(4,14)) - queue_file_read=False config_read_try=0 - while not queue_file_read: - if os.path.isfile(queuefile): - f = open(queuefile, 'r') - self.queueName = f.read() - f.close() - else: - self.queueName = '' - - config_read_try += 1 - logger.debug( f'instance read try {config_read_try} queueName {self.queueName} from queue state file {queuefile}' ) - if len(self.queueName) < 1: - nap=randint(1,4) - logger.debug( f'queue name corrupt take a short {nap} second nap, then try again' ) - time.sleep(nap) - if config_read_try > 5: - logger.critical( f'failed to read queue name from {queuefile}') - sys.exit(2) - else: - queue_file_read=True + if os.path.isfile(queuefile): + f = open(queuefile, 'r') + queueName = f.read() + f.close() + else: + queueName = '' + logger.debug( f'instance read queueName {queueName} from queue state file {queuefile}' ) + if len(queueName) < 1: + logger.critical( f'failed to read queue name from {queuefile}') + sys.exit(2) else: # only lead instance (0-foreground, 1-start, or none in the case of 'declare') # should write the state file. - - # lead instance shou - if os.path.isfile(queuefile): + # lead instance should + if not self.__queue_file_read and os.path.isfile(queuefile): f = open(queuefile, 'r') - self.queueName = f.read() + queueName = f.read() f.close() + self.__queue_file_read=True #if the queuefile is corrupt, then will need to guess anyways. - if ( self.queueName is None ) or ( self.queueName == '' ): + if ( queueName is None ) or ( queueName == '' ): queueShare = self._varsub(self.queueShare) - self.queueName = f"q_{self.broker.url.username}." + '.'.join([component,cfg,queueShare]) + queueName = f"q_{self.broker.url.username}." + '.'.join([component,cfg,queueShare]) logger.debug( f'default guessed queueName {self.queueName} ' ) - - if self.action not in [ 'start', 'foreground', 'declare' ]: - return + return queueName - # first make sure directory exists. - if not os.path.isdir(os.path.dirname(queuefile)): - pathlib.Path(os.path.dirname(queuefile)).mkdir(parents=True, exist_ok=True) - - if not os.path.isfile(queuefile) and (self.queueName is not None): - tmpQfile=queuefile+'.tmp' - if not os.path.isfile(tmpQfile): - f = open(tmpQfile, 'w') - f.write(self.queueName) - f.close() - os.rename( tmpQfile, queuefile ) - else: - logger.info( f'Queue name {self.queueName} being persisted to {queuefile} by some other process, so ignoring it.' ) - return - logger.debug( f'queue name {self.queueName} persisted to {queuefile}' ) @@ -1988,7 +2050,7 @@ def finalize(self, component=None, config=None): if self.broker and self.broker.url and self.broker.url.username: self._resolve_exchange() - self._resolveQueueName(component,cfg) + self.queueName = self._resolveQueueName(component,cfg) valid_inlineEncodings = [ 'guess', 'text', 'binary' ] if hasattr(self, 'inlineEncoding') and self.inlineEncoding not in valid_inlineEncodings: @@ -2004,9 +2066,23 @@ def finalize(self, component=None, config=None): self.retry_path = self.pid_filename.replace('.pid', '.retry') self.novipFilename = self.pid_filename.replace('.pid', '.noVip') + self.subscriptionsPath=self._getSubscriptionsFileName(self.component,self.config) + + if self.broker and self.broker.url and self.broker.url.username: + + if (self.bindings == [] and hasattr(self, 'exchange')): + self.bindings = [(self.exchange, self.topicPrefix, [ '#' ])] + self.subscriptions.append(Subscription(self, self.queueName, '#')) + + # read old subscriptions, compare to current. + #old_subscriptions=self.subscriptions.read(self, self.subscriptionsPath) + + if self.action in [ 'start', 'foreground', 'declare' ] and \ + (not hasattr(self,'no') or self.no < 2) and \ + self.broker and self.broker.url : - if (self.bindings == [] and hasattr(self, 'exchange')): - self.bindings = [(self.exchange, self.topicPrefix, [ '#' ])] + self.subscriptions.write(self.subscriptionsPath) + self._writeQueueFile() if hasattr(self, 'documentRoot') and (self.documentRoot is not None): path = os.path.expanduser(os.path.abspath(self.documentRoot)) @@ -2138,6 +2214,9 @@ def sundew_dirPattern(self, pattern, urlstr, basename, destDir): does substitutions for patterns in directories. """ + if destDir=='/': + return destDir + BN = basename.split(":") EN = BN[0].split("_") @@ -2421,6 +2500,7 @@ def __call__(self, parser, namespace, values, option_string): namespace.bindings = [] namespace._resolve_exchange() + qn = namespace._resolveQueueName(namespace.component,namespace.config) if not hasattr(namespace, 'broker'): raise Exception('broker needed before subtopic') @@ -2442,6 +2522,7 @@ def __call__(self, parser, namespace, values, option_string): namespace.bindings.append( (namespace.exchange, topicPrefix, values)) + namespace.subscriptions.add(Subscription(namespace, qn, values)) def parse_args(self, isPost=False): """ @@ -2735,7 +2816,7 @@ def no_file_config(): return cfg -def one_config(component, config, action, isPost=False): +def one_config(component, config, action, isPost=False, hostDir=None): """ single call return a fully parsed single configuration for a single component to run. @@ -2761,6 +2842,9 @@ def one_config(component, config, action, isPost=False): cfg = copy.deepcopy(default_cfg) + if hostDir: + cfg.hostdir = hostDir + cfg.applyComponentDefaults( component ) store_pwd = os.getcwd() @@ -2773,6 +2857,10 @@ def one_config(component, config, action, isPost=False): else: fname = os.path.expanduser(config) + #FIXME parse old subscriptions here. + cfg.subscriptionsPath=cfg._getSubscriptionsFileName(cfg.component,cfg.config) + cfg.old_subscriptions=cfg.subscriptions.read(cfg, cfg.subscriptionsPath) + if os.path.exists(fname): cfg.parse_file(fname,component) else: diff --git a/sarracenia/credentials.py b/sarracenia/config/credentials.py similarity index 85% rename from sarracenia/credentials.py rename to sarracenia/config/credentials.py index e51f402a7..98cb2151b 100755 --- a/sarracenia/credentials.py +++ b/sarracenia/config/credentials.py @@ -72,7 +72,7 @@ class Credential: # build a credential from a url string: - from sarracenia.credentials import Credential + from sarracenia.config.credentials import Credential broker = Credential('amqps://anonymous:anonymous@hpfx.collab.science.gc.ca') @@ -124,18 +124,25 @@ def __str__(self): if self.url.path: s += self.url.path - s += " %s" % self.ssh_keyfile - s += " %s" % self.passive - s += " %s" % self.binary - s += " %s" % self.tls - s += " %s" % self.prot_p - s += " %s" % self.bearer_token - s += " %s" % self.login_method - s += " %s" % self.s3_endpoint - #want to show they provided a session token, but not leak it (like passwords above) - s += " %s" % 'Yes' if self.s3_session_token != None else 'No' - s += " %s" % 'Yes' if self.azure_credentials != None else 'No' - s += " %s" % self.implicit_ftps + alist = [ 'ssh_keyfile', 'passive', 'binary', 'tls', 'prot_p', 'bearer_token', 'login_method', 's3_endpoint', 'implicit_ftps'] + if hasattr(self,'url') and self.url: + scheme = self.url.scheme + if scheme.startswith('ftp'): + alist = [ 'passive', 'binary', 'tls', 'prot_p', 'login_method', 'implicit_ftps' ] + elif scheme.startswith('sftp'): + alist = [ 'ssh_keyfile' ] + elif scheme.startswith('amqp') or scheme.startswith('mqtt'): + alist = [ 'login_method' ] + elif scheme.startswith('https'): + alist = [ 'prot_p', 'bearer_token', 'login_method', 's3_endpoint', 'implicit_ftps'] + if self.s3_session_token: + s += " %s" % 's3_session_token=Yes' + if self.azure_credentials: + s += " %s" % 'azure_credentials=Yes' + + for a in alist: + if getattr(self, a): + s+=f"+{a}={getattr(self,a)}" return s @@ -144,11 +151,11 @@ class CredentialDB: """Parses, stores and manages Credential objects. Attributes: - credentials (dict): contains all sarracenia.credentials.Credential objects managed by the CredentialDB. + credentials (dict): contains all sarracenia.config.credentials.Credential objects managed by the CredentialDB. Usage: # build a credential via lookup in the normal files: - import CredentialDB from sarracenia.credentials + import CredentialDB from sarracenia.config.credentials credentials = CredentialDB.read( "/the/path/to/the/credentials.conf" ) @@ -174,7 +181,7 @@ def add(self, urlstr, details=None): Args: urlstr (str): string-formatted URL to be parsed and added to DB. - details (sarracenia.credentials.Credential): a Credential object can be passed in, otherwise one is + details (sarracenia.config.credentials.Credential): a Credential object can be passed in, otherwise one is created by parsing urlstr. """ @@ -200,7 +207,7 @@ def get(self, urlstr): cache_result (bool): ``True`` if the credential was retrieved from the CredentialDB cache, ``False`` if it was not in the cache. Note that ``False`` does not imply the Credential or urlstr is invalid. - credential (sarracenia.credentials.Credential): the Credential + credential (sarracenia.config.credentials.Credential): the Credential object matching the urlstr, ``None`` if urlstr is invalid. """ #logger.debug("CredentialDB get %s" % urlstr) @@ -266,7 +273,7 @@ def isValid(self, url, details=None): Args: url (urllib.parse.ParseResult): ParseResult object for a URL. - details (sarracenia.credentials.Credential): sarra Credential object containing additional details about + details (sarracenia.config.credentials.Credential): sarra Credential object containing additional details about the URL. Returns: @@ -306,7 +313,7 @@ def isValid(self, url, details=None): # we only have a user ... permitted only for sftp if url.scheme != 'sftp': - logger.error( f'credential not found' ) + logger.error( f"credential {url} not found" ) return False # sftp and an ssh_keyfile was provided... check that it exists @@ -429,7 +436,7 @@ def read(self, path): def _resolve(self, urlstr, url=None): """Resolve credentials for AMQP vhost from ones passed as a string, and optionally a urllib.parse.ParseResult - object, into a sarracenia.credentials.Credential object. + object, into a sarracenia.config.credentials.Credential object. Args: urlstr (str): credentials in a URL string. @@ -438,7 +445,7 @@ def _resolve(self, urlstr, url=None): Returns: tuple: containing result (bool): ``False`` if the creds were not in the CredentialDB. ``True`` if they were. - details (sarracenia.credentials.Credential): the updated Credential object, or ``None``. + details (sarracenia.config.credentials.Credential): the updated Credential object, or ``None``. """ @@ -480,3 +487,22 @@ def _resolve(self, urlstr, url=None): return True, details return False, None + + + def validate_urlstr(self, urlstr) -> tuple : + """ + returns a tuple ( bool, expanded_url ) + the bool is whether the expansion worked, and the expanded_url is one with + the added necessary authentication details from sarracenia.Credentials. + + """ + # check url and add credentials if needed from credential file + ok, cred_details = self.get(urlstr) + if cred_details is None: + logging.critical("bad credential %s" % urlstr) + # Callers expect that a Credential object will be returned + cred_details = Credential() + cred_details.url = urllib.parse.urlparse(urlstr) + return False, cred_details + return True, cred_details + diff --git a/sarracenia/config/subscription.py b/sarracenia/config/subscription.py new file mode 100755 index 000000000..bd882181d --- /dev/null +++ b/sarracenia/config/subscription.py @@ -0,0 +1,92 @@ + +import json +import logging + +logger = logging.getLogger(__name__) + + +class Subscription(dict): + + def __init__(self, options, queueName, subtopic): + + self['broker'] = options.broker + self['bindings'] = [ { 'exchange': options.exchange, 'prefix': options.topicPrefix, 'sub': subtopic } ] + + self['queue']={ 'name': queueName, 'cleanup_needed': None } + for a in [ 'auto_delete', 'durable', 'expire', 'message_ttl', 'prefetch', 'qos', 'queueBind', 'queueDeclare' ]: + aa = a.replace('queue','').lower() + if hasattr(options, a) and getattr(options,a): + self['queue'][aa] = getattr(options,a) + +class Subscriptions(list): + # list of subscription + + def read(self,options,fn): + try: + with open(fn,'r') as f: + #self=json.loads(f.readlines()) + self=json.load(f) + + for s in self: + if type(s['broker']) is str: + ok, broker = options.credentials.validate_urlstr(s['broker']) + if ok: + s['broker'] = broker + return self + except Exception as Ex: + logger.debug( f"failed {fn}: {Ex}" ) + logger.debug('Exception details: ', exc_info=True) + return None + + def write(self,fn): + + jl=[] + for s in self: + jd=s + jd['broker']=str(s['broker']) + jl.append(jd) + + try: + with open(fn,'w') as f: + f.write(json.dumps(jl)) + except Exception as Ex: + logger.error( f"failed: {fn}: {Ex}" ) + logger.debug('Exception details: ', exc_info=True) + + def add(self, new_subscription): + + found=False + for s in self: + if ( s['broker'] == new_subscription['broker'] ) and \ + ( s['queue']['name'] == new_subscription['queue']['name'] ): + newb = new_subscription['bindings'][0] + for b in s['bindings']: + if newb == b: + found=True + if not found: + s['bindings'].append( newb ) + + if not found: + self.append(new_subscription) + + + def deltAnalyze(self, other): + """ + given one list of subscriptions, and another set of subscriptions. + + return the list of subscriptions that are in other, but not in self. + or perhaps: + + * for each subscription add s['bindings_to_remove'] ... + * got each subscription add s['queue']['cleanup_needed'] = "reason" + + the reason could be: + * current expiry mismatch + * durable mismatch + * auto-delete mismatch + * exclusive mismatch + """ + if self == other: + return None + + different_subscriptons=[] diff --git a/sarracenia/examples/flow/amserver.conf b/sarracenia/examples/flow/amserver.conf index b141b4e79..338e4c62b 100644 --- a/sarracenia/examples/flow/amserver.conf +++ b/sarracenia/examples/flow/amserver.conf @@ -17,7 +17,9 @@ directory /tmp/am_receiver accept .* sum sha512 AllowIPs 127.0.0.1 -AllowIPs 199.212.17.131/24 +AllowIPs 199.212.17.131 +AllowIPs 199.212.17.132 +AllowIPs 199.212.17.133 -sendTo am://0.0.0.0:5003 +sendTo am://0.0.0.0:5005 debug on diff --git a/sarracenia/examples/flow_api_consumer.py b/sarracenia/examples/flow_api_consumer.py index 107443b23..4718ac34b 100644 --- a/sarracenia/examples/flow_api_consumer.py +++ b/sarracenia/examples/flow_api_consumer.py @@ -2,11 +2,11 @@ import sarracenia.config from sarracenia.flow.subscribe import Subscribe import sarracenia.flowcb -import sarracenia.credentials +import sarracenia.config.credentials cfg = sarracenia.config.no_file_config() -cfg.broker = sarracenia.credentials.Credential( +cfg.broker = sarracenia.config.credentials.Credential( 'amqps://anonymous:anonymous@hpfx.collab.science.gc.ca') cfg.topicPrefix = ['v02', 'post'] cfg.component = 'subscribe' diff --git a/sarracenia/examples/moth_api_consumer.py b/sarracenia/examples/moth_api_consumer.py index cf0d6f506..07ad55e93 100644 --- a/sarracenia/examples/moth_api_consumer.py +++ b/sarracenia/examples/moth_api_consumer.py @@ -5,7 +5,7 @@ """ import sarracenia.moth import sarracenia.moth.amqp -import sarracenia.credentials +import sarracenia.config.credentials import time import socket @@ -13,7 +13,7 @@ options = sarracenia.moth.default_options options.update(sarracenia.moth.amqp.default_options) -options['broker'] = sarracenia.credentials.Credential( +options['broker'] = sarracenia.config.credentials.Credential( 'amqps://anonymous:anonymous@hpfx.collab.science.gc.ca') # binding tuple: consists of prefix, exchange, rest. diff --git a/sarracenia/examples/moth_api_producer.py b/sarracenia/examples/moth_api_producer.py index c52d29efd..390da2d00 100644 --- a/sarracenia/examples/moth_api_producer.py +++ b/sarracenia/examples/moth_api_producer.py @@ -4,7 +4,7 @@ """ import sarracenia.moth import sarracenia -import sarracenia.credentials +import sarracenia.config.credentials from sarracenia.config import default_config import os @@ -23,7 +23,7 @@ cfg = default_config() #cfg.logLevel = 'debug' -cfg.broker = sarracenia.credentials.Credential( broker ) +cfg.broker = sarracenia.config.credentials.Credential( broker ) cfg.exchange = 'xsarra' cfg.post_baseUrl = 'http://host' cfg.post_baseDir = '/tmp' diff --git a/sarracenia/examples/moth_http_retrieval.py b/sarracenia/examples/moth_http_retrieval.py index ad0964f1d..fefcde02b 100644 --- a/sarracenia/examples/moth_http_retrieval.py +++ b/sarracenia/examples/moth_http_retrieval.py @@ -4,7 +4,7 @@ """ import sarracenia.moth import sarracenia.moth.amqp -import sarracenia.credentials +import sarracenia.config.credentials import time import socket @@ -14,7 +14,7 @@ options = sarracenia.moth.default_options options.update(sarracenia.moth.amqp.default_options) -options['broker'] = sarracenia.credentials.Credential( +options['broker'] = sarracenia.config.credentials.Credential( 'amqps://anonymous:anonymous@hpfx.collab.science.gc.ca') options['topicPrefix'] = ['v02', 'post'] options['bindings'] = [('xpublic', ['v02', 'post'], diff --git a/sarracenia/examples/subscribe/dd_2mqtt.conf b/sarracenia/examples/subscribe/dd_2mqtt.conf index 7c9f4d4c3..baf3b28ff 100644 --- a/sarracenia/examples/subscribe/dd_2mqtt.conf +++ b/sarracenia/examples/subscribe/dd_2mqtt.conf @@ -15,4 +15,8 @@ post_exchange xpublic directory /tmp/dd_2mqt +# new topic... in 2025 +subtopic *.WXO-DD.bulletins.# + +# old topics likely replaced by above in 2025 subtopic bulletins.# diff --git a/sarracenia/examples/subscribe/dd_amis.conf b/sarracenia/examples/subscribe/dd_amis.conf index fb0cb466d..deb03ef0c 100644 --- a/sarracenia/examples/subscribe/dd_amis.conf +++ b/sarracenia/examples/subscribe/dd_amis.conf @@ -11,6 +11,10 @@ instances 5 # expire, in operational use, should be longer than longest expected interruption expire 10m +# new topic... in 2025 +subtopic *.WXO-DD.bulletins.alphanumeric.# + +# old topics likely replaced by above in 2025 subtopic bulletins.alphanumeric.# directory /tmp/dd_amis diff --git a/sarracenia/examples/subscribe/dd_aqhi.conf b/sarracenia/examples/subscribe/dd_aqhi.conf index 7aa8fede1..1e54471a3 100644 --- a/sarracenia/examples/subscribe/dd_aqhi.conf +++ b/sarracenia/examples/subscribe/dd_aqhi.conf @@ -18,6 +18,10 @@ instances 2 # valeur basse bonne pour essais initial, valeur haute (1d == 1 jour) pour les opérations. expire 10m +# new topic... in 2025 +subtopic *.WXO-DD.air_quality.aqhi.# + +# old topics likely replaced by above in 2025 subtopic air_quality.aqhi.# directory /tmp/dd_aqhi diff --git a/sarracenia/examples/subscribe/dd_cacn_bulletins.conf b/sarracenia/examples/subscribe/dd_cacn_bulletins.conf index 2da93a120..5a6422950 100644 --- a/sarracenia/examples/subscribe/dd_cacn_bulletins.conf +++ b/sarracenia/examples/subscribe/dd_cacn_bulletins.conf @@ -19,6 +19,10 @@ instances 2 expire 10m +# new topic... in 2025 +subtopic *.WXO-DD.bulletins.alphanumeric.*.CA.*.# + +# old topics likely replaced by above in 2025 subtopic bulletins.alphanumeric.*.CA.*.# directory /tmp/cacn_bulletins accept .*CACN45.* diff --git a/sarracenia/examples/subscribe/dd_citypage.conf b/sarracenia/examples/subscribe/dd_citypage.conf index 412b86bad..a29edb21e 100644 --- a/sarracenia/examples/subscribe/dd_citypage.conf +++ b/sarracenia/examples/subscribe/dd_citypage.conf @@ -18,6 +18,10 @@ instances 2 # durée de vie du file d´attente sur le serveur. Pour usage opérationnel, augmentez a 1d (1 jour.) expire 10m +# new topic... in 2025 +subtopic *.WXO-DD.citypage_weather.# + +# old topics likely replaced by above in 2025 subtopic citypage_weather.# #subtopic citypage_weather.xml.YT.# diff --git a/sarracenia/examples/subscribe/dd_cmml.conf b/sarracenia/examples/subscribe/dd_cmml.conf index aed3018b9..17be6b039 100644 --- a/sarracenia/examples/subscribe/dd_cmml.conf +++ b/sarracenia/examples/subscribe/dd_cmml.conf @@ -21,6 +21,10 @@ instances 2 # le file est enlevé. expire 10m +# new topic... in 2025 +subtopic *.WXO-DD.meteocode.*.cmml.# + +# old topics likely replaced by above in 2025 subtopic meteocode.*.cmml.# directory /tmp/dd_cmml diff --git a/sarracenia/examples/subscribe/dd_gdps.conf b/sarracenia/examples/subscribe/dd_gdps.conf index 0afbc46c1..c11801104 100644 --- a/sarracenia/examples/subscribe/dd_gdps.conf +++ b/sarracenia/examples/subscribe/dd_gdps.conf @@ -24,5 +24,9 @@ instances 5 expire 10m #expire, in operational use, use 1d (1 day) as it needs to be longer than the longest interruption in downloads we want to tolerate without dropping downloads. +# new topic... in 2025 +subtopic *.WXO-DD.model_gem_global.25km.grib2.# + +# old topics likely replaced by above in 2025 subtopic model_gem_global.25km.grib2.# directory /tmp/dd_gdps diff --git a/sarracenia/examples/subscribe/dd_radar.conf b/sarracenia/examples/subscribe/dd_radar.conf index 0dc7d3af8..5c89e99c2 100644 --- a/sarracenia/examples/subscribe/dd_radar.conf +++ b/sarracenia/examples/subscribe/dd_radar.conf @@ -24,4 +24,8 @@ expire 10m directory /tmp/dd_radar +# new topic... in 2025 +subtopic *.WXO-DD.radar.CAPPI.GIF.XAM.# + +# old topics likely replaced by above in 2025 subtopic radar.CAPPI.GIF.XAM.# diff --git a/sarracenia/examples/subscribe/dd_rdps.conf b/sarracenia/examples/subscribe/dd_rdps.conf index 9bb845135..829f36982 100644 --- a/sarracenia/examples/subscribe/dd_rdps.conf +++ b/sarracenia/examples/subscribe/dd_rdps.conf @@ -25,5 +25,9 @@ expire 10m # suggest 1d (1 day.) +# new topic... in 2025 +subtopic *.WXO-DD.model_gem_regional.10km.grib2.# + +# old topics likely replaced by above in 2025 subtopic model_gem_regional.10km.grib2.# director /tmp/dd_rdps diff --git a/sarracenia/examples/subscribe/dd_swob.conf b/sarracenia/examples/subscribe/dd_swob.conf index 8789f236c..d5778f04f 100644 --- a/sarracenia/examples/subscribe/dd_swob.conf +++ b/sarracenia/examples/subscribe/dd_swob.conf @@ -21,6 +21,11 @@ expire 10m #expire, in operations, needs to be longer than the longest expected interruption # All stations + +# new topic... in 2025 +subtopic *.WXO-DD.observations.swob-ml.# + +# old topics likely replaced by above in 2025 subtopic observations.swob-ml.# directory /tmp/dd_swob diff --git a/sarracenia/featuredetection.py b/sarracenia/featuredetection.py index 9d48c0304..a78bb336f 100755 --- a/sarracenia/featuredetection.py +++ b/sarracenia/featuredetection.py @@ -69,6 +69,9 @@ 'humanize' : { 'modules_needed': ['humanize', 'humanfriendly' ], 'present': False, 'lament': 'humans will have to read larger, uglier numbers', 'rejoice': 'humans numbers that are easier to read.' }, + 'jsonlogs' : { 'modules_needed': ['pythonjsonlogger' ], 'present': False, + 'lament': 'only have raw text logs', + 'rejoice': 'can write json logs, in addition to text ones.' }, 'mqtt' : { 'modules_needed': ['paho.mqtt.client'], 'present': False, 'lament': 'cannot connect to mqtt brokers (need >= 2.1.0)' , 'rejoice': 'can connect to mqtt brokers' }, diff --git a/sarracenia/flow/__init__.py b/sarracenia/flow/__init__.py index 80b858225..3cd0db90a 100644 --- a/sarracenia/flow/__init__.py +++ b/sarracenia/flow/__init__.py @@ -235,7 +235,6 @@ def metricsFlowReset(self) -> None: self.metrics=self.new_metrics # removing old metrics files - #logger.debug( f"looking for old metrics for {self.o.metricsFilename}" ) old_metrics=sorted(glob.glob(self.o.metricsFilename+'.*'))[0:-self.o.logRotateCount] for o in old_metrics: logger.info( f"removing old metrics file: {o} " ) @@ -876,7 +875,7 @@ def updateFieldsAccepted(self, msg, urlstr, pattern, maskDir, if path_strip_count > 0: - logger.warning( f"path_strip_count:{path_strip_count} ") + logger.debug( f"path_strip_count:{path_strip_count} ") strip=path_strip_count if strip < len(token): token = token[strip:] @@ -891,12 +890,9 @@ def updateFieldsAccepted(self, msg, urlstr, pattern, maskDir, if f in msg['fileOp']: fopv = msg['fileOp'][f].split('/') # an absolute path file posted is relative to '/' (in relPath) but the values in - # the link and rename fields may be absolute, requiring and adjustmeent when stripping + # the link and rename fields may be absolute, requiring and adjustment when stripping if fopv[0] == '': strip += 1 - elif len(fopv) == 1: - toclimb=len(token)-1 - msg['fileOp'][f] = '../'*(toclimb) + fopv[0] if len(fopv) > strip: rest=fopv[strip:] toclimb=len(token)-rest.count('..')-1 @@ -948,9 +944,6 @@ def updateFieldsAccepted(self, msg, urlstr, pattern, maskDir, if (f in msg['fileOp']) : if msg['fileOp'][f].startswith(self.o.baseDir): msg['fileOp'][f] = msg['fileOp'][f].replace(self.o.baseDir, d, 1) - elif os.sep not in msg['fileOp'][f]: - toclimb=len(token)-1 - msg['fileOp'][f] = '../'*(toclimb) + msg['fileOp'][f] elif 'fileOp' in msg and new_dir: u = sarracenia.baseUrlParse(msg['baseUrl']) @@ -959,9 +952,6 @@ def updateFieldsAccepted(self, msg, urlstr, pattern, maskDir, if (len(u.path) > 1): if msg['fileOp'][f].startswith(u.path): msg['fileOp'][f] = msg['fileOp'][f].replace(u.path, new_dir, 1) - elif '/' not in msg['fileOp'][f]: - toclimb=len(token)-1 - msg['fileOp'][f] = '../'*(toclimb) + msg['fileOp'][f] if self.o.mirror and len(token) > 1: new_dir = new_dir + '/' + '/'.join(token[:-1]) @@ -1071,7 +1061,7 @@ def filter(self) -> None: (m['fileOp']['rename'])) else: self.reject( - m, 304, "mask=%s strip=%s url=%s" % + m, 404, "mask=%s strip=%s url=%s" % (str(mask), strip, urlToMatch)) break @@ -1106,7 +1096,7 @@ def filter(self) -> None: self.o.flatten) filtered_worklist.append(m) else: - self.reject(m, 304, "unmatched pattern %s" % url) + self.reject(m, 404, "unmatched pattern %s" % url) self.worklist.incoming = filtered_worklist @@ -1292,7 +1282,6 @@ def post(self,now) -> None: mfn.write( f'\"{timestamp}\" : {metrics},\n') # removing old metrics files - #logger.debug( f"looking for old metrics for {self.o.metricsFilename}" ) old_metrics=sorted(glob.glob(self.o.metricsFilename+'.*'))[0:-self.o.logRotateCount] for o in old_metrics: logger.info( f"removing old metrics file: {o} " ) @@ -1499,16 +1488,15 @@ def file_should_be_downloaded(self, msg) -> bool: old_mtime = sarracenia.timestr2flt(x.get('mtime')) except: pass - + if new_mtime <= old_mtime: - self.reject(msg, 304, + self.reject(msg, 406, "mtime not newer %s " % (msg['new_path'])) return False else: logger.debug( - "{} new version is {} newer (new: {} vs old: {} )".format( - msg['new_path'], new_mtime - old_mtime, new_mtime, - old_mtime)) + f"{msg['new_path']} new version is {new_mtime - old_mtime} " \ + f"newer (new: {new_mtime,} vs old: {old_mtime} )" ) elif method in ['random', 'cod']: logger.debug("content_match %s sum random/zero/cod never matches" % @@ -1887,6 +1875,7 @@ def do_download(self) -> None: parsed_url = sarracenia.baseUrlParse(msg['baseUrl']) self.scheme = parsed_url.scheme + ok = False i = 1 while i <= self.o.attempts: @@ -2341,6 +2330,7 @@ def send(self, msg, options): local_file = os.path.basename(local_path).replace('\\', '/') new_dir = msg['new_dir'].replace('\\', '/') new_file = msg['new_file'].replace('\\', '/') + new_inflight_path = None try: @@ -2589,7 +2579,7 @@ def send(self, msg, options): else: len_written = self.proto[self.scheme].put( msg, local_file, new_file) except Exception as ex: - logger.error( f"could not send {local_dir}{os.sep}{local_file} to inflight=None {sendTo} {msg['new_dir']}/{new_file}: {ex}" ) + logger.error( f"could not send {local_dir}{os.sep}{local_file} to inflight=None {sendTo} {msg['new_dir']} ... {new_file}: {ex}" ) return False elif (('blocks' in msg) @@ -2867,6 +2857,7 @@ def do_send(self): # N attempts to send + ok = False i = 1 while i <= self.o.attempts: if i != 1: diff --git a/sarracenia/flow/winnow.py b/sarracenia/flow/winnow.py index 70489dc12..4bbf71fc0 100644 --- a/sarracenia/flow/winnow.py +++ b/sarracenia/flow/winnow.py @@ -6,6 +6,7 @@ default_options = { 'acceptUnmatched': True, 'nodupe_ttl': 300, + 'logDuplicates': True } diff --git a/sarracenia/flowcb/authenticate/__init__.py b/sarracenia/flowcb/authenticate/__init__.py index 22add21a6..efba4ace3 100644 --- a/sarracenia/flowcb/authenticate/__init__.py +++ b/sarracenia/flowcb/authenticate/__init__.py @@ -55,7 +55,7 @@ def after_accept(self, worklist): logger.info(f"Token for {msg['baseUrl']} not in credentials database. Adding it!") # Add the new bearer token to the internal credentials db. If the credential is already in the db, it will # be replaced which is desirable. - cred = sarracenia.credentials.Credential(urlstr=msg['baseUrl']) + cred = sarracenia.config.credentials.Credential(urlstr=msg['baseUrl']) cred.bearer_token = token self.o.credentials.add(msg['baseUrl'], details=cred) diff --git a/sarracenia/flowcb/gather/am.py b/sarracenia/flowcb/gather/am.py index b92a98095..e615fc38f 100644 --- a/sarracenia/flowcb/gather/am.py +++ b/sarracenia/flowcb/gather/am.py @@ -68,7 +68,6 @@ import sarracenia from sarracenia.bulletin import Bulletin -from sarracenia.flowcb.rename.raw2bulletin import Raw2bulletin import sarracenia.config from sarracenia.flowcb import FlowCB @@ -79,8 +78,7 @@ class Am(FlowCB): def __init__(self, options): super().__init__(options,logger) - self.bulletinHandler = Bulletin() - self.renamer = Raw2bulletin(self.o) + self.bulletinHandler = Bulletin(self.o) self.url = urllib.parse.urlparse(self.o.sendTo) @@ -261,7 +259,7 @@ def addBuffer(self): # We don't want to wait on a hanging connection. We use the timeout error to exit out of the reception if there is nothing. # This in turn makes the whole flow the same as any other sarracenia flow. - except TimeoutError: + except (TimeoutError,socket.timeout): return except Exception as e: @@ -522,6 +520,9 @@ def gather(self, messageCountMax): "value":decoded_bulletin } + # For renamer (to be deleted after rename plugin is called) + msg['isProblem'] = isProblem + # Receiver is looking for raw message. msg['size'] = len(bulletin) @@ -536,11 +537,11 @@ def gather(self, messageCountMax): ident.update(bulletin) msg['identity'] = {'method':self.o.identity_method, 'value':ident.value} - # Call renamer - msg = self.renamer.rename(msg,isProblem) - if msg == None: - continue - logger.debug(f"New sarracenia message: {msg}") + # # Call renamer + # msg = self.renamer.rename(msg,isProblem) + # if msg == None: + # continue + # logger.debug(f"New sarracenia message: {msg}") newmsg.append(msg) diff --git a/sarracenia/flowcb/gather/file.py b/sarracenia/flowcb/gather/file.py index f328e7e86..28dd4aaeb 100755 --- a/sarracenia/flowcb/gather/file.py +++ b/sarracenia/flowcb/gather/file.py @@ -69,7 +69,7 @@ class File(FlowCB): also should likely switch from listdir to scandir """ def on_add(self, event, src, dst): - logger.debug("on_add %s %s %s" % ( event, src, dst ) ) + logger.debug("%s %s %s" % ( event, src, dst ) ) self.new_events['%s %s' % (src, dst)] = (event, src, dst) def on_created(self, event): @@ -465,12 +465,15 @@ def process_event(self, event, src, dst): age = time.time() - lstat.st_mtime if age < self.o.fileAgeMin: - logger.debug( "%d vs (inflight setting) %d seconds. Too New!" % (age,self.o.fileAgeMin) ) + logger.debug("%d vs (fileAgeMin setting) %d seconds. Too New! %s" % (age,self.o.fileAgeMin,src) ) return (False, []) if self.o.fileAgeMax > 0 and age > self.o.fileAgeMax: - logger.debug("%d vs (fileAgeMax setting) %d seconds. Too Old!" % (age,self.o.fileAgeMax) ) + logger.debug("%d vs (fileAgeMax setting) %d seconds. Too Old! %s" % (age,self.o.fileAgeMax,src) ) return (True, []) + else: + logger.debug(f"lstat or st_mtime problem? lstat={lstat}") + logger.debug(f"st_mtime={lstat.st_mtime}") # post it @@ -607,7 +610,7 @@ def walk_priming(self, p): try: ow = self.observer.schedule(self.watch_handler, d, - recursive=True) + recursive=self.o.recursive) self.obs_watched.append(ow) self.inl[dir_dev_id] = (ow, d) logger.info( diff --git a/sarracenia/flowcb/log.py b/sarracenia/flowcb/log.py index 7b881cc47..9c6e58b14 100755 --- a/sarracenia/flowcb/log.py +++ b/sarracenia/flowcb/log.py @@ -159,6 +159,13 @@ def after_accept(self, worklist): (msg['relPath'], msg['report']['code'], msg['report']['message'])) else: logger.info("rejected: %s " % self._messageAcceptStr(msg)) + + elif 'nodupe' in self.o.logEvents: + for msg in worklist.rejected: + if 'report' in msg and msg['report']['code'] in [ 304 ]: + logger.info( + "%s rejected: %d %s " % + (msg.getIDStr(), msg['report']['code'], msg['report']['message'])) for msg in worklist.incoming: @@ -188,6 +195,13 @@ def after_work(self, worklist): else: logger.info("rejected: %s " % self._messageStr(msg)) + elif 'nodupe' in self.o.logEvents: + for msg in worklist.rejected: + if 'report' in msg and msg['report']['code'] in [ 304 ]: + logger.info( + "%s rejected: %d %s " % + (msg.getIDStr(), msg['report']['code'], msg['report']['message'])) + for msg in worklist.ok: if 'size' in msg: self.fileBytes += msg['size'] diff --git a/sarracenia/flowcb/nodupe/disk.py b/sarracenia/flowcb/nodupe/disk.py index 76b8c72a2..e223c67c7 100755 --- a/sarracenia/flowcb/nodupe/disk.py +++ b/sarracenia/flowcb/nodupe/disk.py @@ -163,13 +163,13 @@ def after_accept(self, worklist): if mtime < min_mtime: m['_deleteOnPost'] |= set(['reject']) m['reject'] = f"{m['mtime']} too old (nodupe check), oldest allowed {timeflt2str(min_mtime)}" - m.setReport(304, f"{m['mtime']} too old (nodupe check), oldest allowed {timeflt2str(min_mtime)}" ) + m.setReport(406, f"{m['mtime']} too old (nodupe check), oldest allowed {timeflt2str(min_mtime)}" ) worklist.rejected.append(m) continue elif mtime > max_mtime: m['_deleteOnPost'] |= set(['reject']) m['reject'] = f"{m['mtime']} too new (nodupe check), newest allowed {timeflt2str(max_mtime)}" - m.setReport(304, f"{m['mtime']} too new (nodupe check), newest allowed {timeflt2str(max_mtime)}" ) + m.setReport(425, f"{m['mtime']} too new (nodupe check), newest allowed {timeflt2str(max_mtime)}" ) worklist.rejected.append(m) continue @@ -177,8 +177,8 @@ def after_accept(self, worklist): new_incoming.append(m) else: m['_deleteOnPost'] |= set(['reject']) - m['reject'] = "not modifified 1 (nodupe check)" - m.setReport(304, 'Not modified 1 (cache check)') + m['reject'] = "not modified 1 (nodupe check)" + m.setReport(304, 'Not modified 1 (nodupe check)') worklist.rejected.append(m) if self.fp: diff --git a/sarracenia/flowcb/nodupe/redis.py b/sarracenia/flowcb/nodupe/redis.py index c9ce9701b..feb4d4447 100644 --- a/sarracenia/flowcb/nodupe/redis.py +++ b/sarracenia/flowcb/nodupe/redis.py @@ -163,13 +163,13 @@ def after_accept(self, worklist): if mtime < min_mtime: m['_deleteOnPost'] |= set(['reject']) m['reject'] = f"{m['mtime']} too old (nodupe check), oldest allowed {timeflt2str(min_mtime)}" - m.setReport(304, f"{m['mtime']} too old (nodupe check), oldest allowed {timeflt2str(min_mtime)}" ) + m.setReport(406, f"{m['mtime']} too old (nodupe check), oldest allowed {timeflt2str(min_mtime)}" ) worklist.rejected.append(m) continue elif mtime > max_mtime: m['_deleteOnPost'] |= set(['reject']) m['reject'] = f"{m['mtime']} too new (nodupe check), newest allowed {timeflt2str(max_mtime)}" - m.setReport(304, f"{m['mtime']} too new (nodupe check), newest allowed {timeflt2str(max_mtime)}" ) + m.setReport(425, f"{m['mtime']} too new (nodupe check), newest allowed {timeflt2str(max_mtime)}" ) worklist.rejected.append(m) continue @@ -177,8 +177,8 @@ def after_accept(self, worklist): new_incoming.append(m) else: m['_deleteOnPost'] |= set(['reject']) - m['reject'] = "not modifified 1 (nodupe check)" - m.setReport(304, 'Not modified 1 (cache check)') + m['reject'] = "not modified 1 (nodupe check)" + m.setReport(304, 'Not modified 1 (nodupe check)') worklist.rejected.append(m) logger.debug("items registered in duplicate suppression cache: %d" % (len(self._redis.keys(self._rkey_base + ":*"))) ) diff --git a/sarracenia/flowcb/poll/__init__.py b/sarracenia/flowcb/poll/__init__.py index e9485915e..b11491330 100755 --- a/sarracenia/flowcb/poll/__init__.py +++ b/sarracenia/flowcb/poll/__init__.py @@ -443,20 +443,22 @@ def poll_directory(self, pdir): # post poll list - msgs.extend(self.poll_list_post(pdir, dir_dict, dir_dict.keys())) + if self.o.recursive: + msgs.extend(self.poll_list_post(pdir, dir_dict, dir_dict.keys())) msgs.extend(self.poll_list_post(pdir, desclst, filelst)) # poll in children directory - sdir = sorted(dir_dict.keys()) - for d in sdir: - if d == '.' or d == '..': continue + if self.o.recursive: + sdir = sorted(dir_dict.keys()) + for d in sdir: + if d == '.' or d == '..': continue - #d_lspath = lspath + '_' + d - d_pdir = pdir + os.sep + d + #d_lspath = lspath + '_' + d + d_pdir = pdir + os.sep + d - msgs.extend(self.poll_directory(d_pdir)) + msgs.extend(self.poll_directory(d_pdir)) return msgs diff --git a/sarracenia/flowcb/rename/__init__.py b/sarracenia/flowcb/rename/__init__.py new file mode 100755 index 000000000..fc220e5f8 --- /dev/null +++ b/sarracenia/flowcb/rename/__init__.py @@ -0,0 +1,13 @@ + +""" + +sarracenia.flowcb.rename modules are ones where the main focus is on the after_accept entry point. + +These plugins should be used when the filename is desired to be renamed before the file is worked upon +(downloaded). + +Problematic or wrong files should be moved to worklist.rejected to be properly discarded. + +""" + +pass diff --git a/sarracenia/flowcb/rename/raw2bulletin.py b/sarracenia/flowcb/rename/raw2bulletin.py index ec2db0c05..6d8c8a115 100644 --- a/sarracenia/flowcb/rename/raw2bulletin.py +++ b/sarracenia/flowcb/rename/raw2bulletin.py @@ -76,95 +76,112 @@ def __init__(self,options) : super().__init__(options,logger) self.seq = 0 self.binary = 0 - self.bulletinHandler = Bulletin() + self.bulletinHandler = Bulletin(self.o) # Need to redeclare these options to have their default values be initialized. self.o.add_option('inputCharset', 'str', 'utf-8') self.o.add_option('binaryInitialCharacters', 'list', [b'BUFR' , b'GRIB', b'\211PNG']) # If file was converted, get rid of extensions it had - def rename(self,msg,isProblem): - - path = msg['new_dir'] + '/' + msg['new_file'] - - data = self.bulletinHandler.getData(msg, path) - - # AM bulletins that need their filename rewritten with data should only have two chars before the first underscore - # This is in concordance with Sundew logic -> https://github.com/MetPX/Sundew/blob/main/lib/bulletinAm.py#L70-L71 - # These messages are still good, so we will add them to the good_msgs list - # if len(filenameFirstChars) != 2 and self.binary: - # good_msgs.append(msg) - # continue - - if data == None: - return None - - lines = data.split('\n') - #first_line = lines[0].strip('\r') - #first_line = first_line.strip(' ') - #first_line = first_line.strip('\t') - first_line = lines[0].split(' ') - - # Build header from bulletin - header = self.bulletinHandler.buildHeader(first_line) - if header == None: - logger.error("Unable to fetch header contents. Skipping message") - return None - - # Get the station timestamp from bulletin - if len(header.split('_')) == 2: - ddhhmm = self.bulletinHandler.getTime(data) - if ddhhmm == None: - logger.error("Unable to get julian time.") - else: - ddhhmm = '' - - # Get the BBB from bulletin - BBB = self.bulletinHandler.getBBB(first_line) - - # Get the station ID from bulletin - stn_id = self.bulletinHandler.getStation(data) - - # Generate a sequence (random ints) - seq = self.bulletinHandler.getSequence() - - - - # Rename file with data fetched - try: - # We can't disseminate bulletins downstream if they're missing the timestamp, but we want to keep the bulletins to troubleshoot source problems - # We'll append "_PROBLEM" to the filename to be able to identify erronous bulletins - if ddhhmm == None or isProblem: - timehandler = datetime.datetime.now() - - # Add current time as new timestamp to filename - new_file = header + "_" + timehandler.strftime('%d%H%M') + "_" + BBB + "_" + stn_id + "_" + seq + "_PROBLEM" - - # Write the file manually as the messages don't get posted downstream. - # The message won't also get downloaded further downstream - msg['new_file'] = new_file - new_path = msg['new_dir'] + '/' + msg['new_file'] + def after_accept(self,worklist): + + new_worklist = [] + + for msg in worklist.incoming: + + # If called by a sarra, should always have post_baseDir, so should be OK in specifying it + path = self.o.post_baseDir + '/' + msg['relPath'] - # with open(new_path, 'w') as f: f.write(data) + data = msg.getContent(self.o) - logger.error(f"New filename (for problem file): {new_file}") - elif stn_id == None: - new_file = header + "_" + BBB + "_" + '' + "_" + seq + "_PROBLEM" - logger.error(f"New filename (for problem file): {new_file}") - elif ddhhmm == '': - new_file = header + "_" + BBB + "_" + stn_id + "_" + seq + # Determine if bulletin is binary or not + # From sundew source code + if data.splitlines()[1][:4] in self.o.binaryInitialCharacters: + # Decode data, only text. The raw binary data contains the header in which we're interested. Only get that header. + data = data.splitlines()[0].decode('ascii') else: - new_file = header + "_" + ddhhmm + "_" + BBB + "_" + stn_id + "_" + seq + # Data is not binary + data = data.decode(self.o.inputCharset) + + + if not data: + logger.error("No data was found. Skipping message") + worklist.rejected.append(msg) + continue + + lines = data.split('\n') + #first_line = lines[0].strip('\r') + #first_line = first_line.strip(' ') + #first_line = first_line.strip('\t') + first_line = lines[0].split(' ') + + # Build header from bulletin + header = self.bulletinHandler.buildHeader(first_line) + if header == None: + logger.error("Unable to fetch header contents. Skipping message") + worklist.rejected.append(msg) + continue + + # Get the station timestamp from bulletin + if len(header.split('_')) == 2: + ddhhmm = self.bulletinHandler.getTime(data) + if ddhhmm == None: + logger.error("Unable to get julian time.") + else: + ddhhmm = '' + + # Get the BBB from bulletin + BBB = self.bulletinHandler.getBBB(first_line) + + # Get the station ID from bulletin + stn_id = self.bulletinHandler.getStation(data) + + # Generate a sequence (random ints) + seq = self.bulletinHandler.getSequence() + + # Assign a default value for messages not coming from AM + if 'isProblem' not in msg: + msg['isProblem'] = False - msg['new_file'] = new_file - # We need the rest of the fields to be also updated - del(msg['relPath']) - msg.updatePaths(self.o, msg['new_dir'], msg['new_file']) - logger.info(f"New filename (with path): {msg['relPath']}") + # Rename file with data fetched + try: + # We can't disseminate bulletins downstream if they're missing the timestamp, but we want to keep the bulletins to troubleshoot source problems + # We'll append "_PROBLEM" to the filename to be able to identify erronous bulletins + if ddhhmm == None or msg['isProblem']: + timehandler = datetime.datetime.now() + + # Add current time as new timestamp to filename + new_file = header + "_" + timehandler.strftime('%d%H%M') + "_" + BBB + "_" + stn_id + "_" + seq + "_PROBLEM" + + # Write the file manually as the messages don't get posted downstream. + # The message won't also get downloaded further downstream + msg['new_file'] = new_file + new_path = msg['new_dir'] + '/' + msg['new_file'] + + # with open(new_path, 'w') as f: f.write(data) + + logger.error(f"New filename (for problem file): {new_file}") + elif stn_id == None: + new_file = header + "_" + BBB + "_" + '' + "_" + seq + "_PROBLEM" + logger.error(f"New filename (for problem file): {new_file}") + elif ddhhmm == '': + new_file = header + "_" + BBB + "_" + stn_id + "_" + seq + else: + new_file = header + "_" + ddhhmm + "_" + BBB + "_" + stn_id + "_" + seq + + msg['new_file'] = new_file + + # No longer needed + if 'isProblem' in msg: + del(msg['isProblem']) - return msg + # msg.updatePaths(self.o, msg['new_dir'], msg['new_file']) - except Exception as e: - logger.error(f"Error in renaming. Error message: {e}") + logger.info(f"New filename: {msg['new_file']}") + new_worklist.append(msg) + + except Exception as e: + logger.error(f"Error in renaming. Error message: {e}") + continue - return None \ No newline at end of file + worklist.incoming = new_worklist diff --git a/sarracenia/flowcb/report.py b/sarracenia/flowcb/report.py index cc9402305..e35a5c998 100755 --- a/sarracenia/flowcb/report.py +++ b/sarracenia/flowcb/report.py @@ -41,7 +41,7 @@ def __init__(self, options): if hasattr(self.o, 'report_broker') and self.o.report_broker: if type(self.o.report_broker) == str: - ok, cred_details = self.o._validate_urlstr(self.o.report_broker) + ok, cred_details = self.o.credentials.validate_urlstr(self.o.report_broker) if ok: self.o.report_broker = cred_details diff --git a/sarracenia/flowcb/send/am.py b/sarracenia/flowcb/send/am.py index 0e6b63980..70b10f1ad 100755 --- a/sarracenia/flowcb/send/am.py +++ b/sarracenia/flowcb/send/am.py @@ -70,8 +70,8 @@ def wrapbulletin(self, sarra_msg): s = struct.Struct(self.patternAM) size = struct.calcsize('80s') - msg_path = sarra_msg['new_relPath'] - msg_file = open(os.sep + msg_path, 'rb') + msg_path = sarra_msg['new_dir'] + '/' + sarra_msg['new_file'] + msg_file = open(msg_path, 'rb') data = msg_file.read() msg_file.close() diff --git a/sarracenia/flowcb/send/s3CloudSender.py b/sarracenia/flowcb/send/s3CloudSender.py index 0d7177a74..e09cec0b8 100644 --- a/sarracenia/flowcb/send/s3CloudSender.py +++ b/sarracenia/flowcb/send/s3CloudSender.py @@ -100,10 +100,11 @@ import logging import os -from sarracenia.credentials import Credential +from sarracenia.config.credentials import Credential from sarracenia.flowcb import FlowCB import boto3 from botocore.exceptions import ClientError +from urllib.parse import unquote logger = logging.getLogger(__name__) @@ -167,6 +168,9 @@ def __init__(self, options): self.s3_url = "https://"+netloc[1] if not (type(netloc[1]) == str and netloc[1] == "None") else None self.access_key_id = usr_pwd[0] if not (type(usr_pwd[0]) == str and usr_pwd[0] == "None") else None self.secret_access_key = usr_pwd[1] if not (type(usr_pwd[1]) == str and usr_pwd[1] == "None") else None + if self.secret_access_key: + # sometimes the key will have a slash in it, in that case, the slash should be changed to %2F in credentials.conf + self.secret_access_key = unquote(self.secret_access_key) logger.info(f"Successfully loaded credentials for sendTo URL {self.o.sendTo}") diff --git a/sarracenia/instance.py b/sarracenia/instance.py index 37c0e0fb4..471e0bdaf 100755 --- a/sarracenia/instance.py +++ b/sarracenia/instance.py @@ -11,6 +11,8 @@ import os import pathlib from sarracenia.moth import Moth +from sarracenia.featuredetection import features + import signal import sys import threading @@ -25,9 +27,12 @@ logger = logging.getLogger(__name__) +if features['jsonlogs']['present']: + from pythonjsonlogger import jsonlogger + + +class RedirectedTimedRotatingFileHandler(logging.handlers.TimedRotatingFileHandler): -class RedirectedTimedRotatingFileHandler( - logging.handlers.TimedRotatingFileHandler): def doRollover(self): super().doRollover() @@ -114,23 +119,43 @@ def start(self): logger.setLevel(logLevel) + if (not os.sep in cfg_preparse.configurations[0]): + component = 'flow' + config = cfg_preparse.configurations[0] + else: + component, config = cfg_preparse.configurations[0].split(os.sep) + + cfg_preparse = sarracenia.config.one_config(component, config, cfg_preparse.action) + + if cfg_preparse.statehost: + hostdir = cfg_preparse.hostdir + else: + hostdir = None + + pidfilename = sarracenia.config.get_pid_filename( hostdir, component, config, cfg_preparse.no) + if not hasattr(cfg_preparse, 'no') and not (cfg_preparse.action == 'foreground'): logger.critical('need an instance number to run.') return + elif cfg_preparse.no > 1: + # worker instances need give lead instance time to write subscriptions/queueNames/bindings + # FIXME: might be better to loop here until lead instance .pid file exists? + leadpidfilename = sarracenia.config.get_pid_filename( hostdir, component, config, 1) + time.sleep(cfg_preparse.no) + while not os.path.isdir(os.path.dirname(leadpidfilename)): + logger.debug("waiting for lead instance to create state directory") + time.sleep(cfg_preparse.no) + while not os.path.isfile(leadpidfilename): + logger.debug("waiting for lead instance to create pid file: {leadpidfilename}") + time.sleep(cfg_preparse.no) + if (len(cfg_preparse.configurations) > 1 ) and \ ( cfg_preparse.configurations[0].split(os.sep)[0] != 'post' ): logger.critical("can only run one configuration in an instance") return - if (not os.sep in cfg_preparse.configurations[0]): - component = 'flow' - config = cfg_preparse.configurations[0] - else: - component, config = cfg_preparse.configurations[0].split(os.sep) - - cfg_preparse = sarracenia.config.one_config(component, config, cfg_preparse.action) if cfg_preparse.logRotateInterval < (24*60*60): logRotateInterval=int(cfg_preparse.logRotateInterval) @@ -142,10 +167,6 @@ def start(self): # init logs here. need to know instance number and configuration and component before here. if cfg_preparse.action in ['start','run'] : - if cfg_preparse.statehost: - hostdir = cfg_preparse.hostdir - else: - hostdir = None metricsfilename = sarracenia.config.get_metrics_filename( hostdir, component, config, cfg_preparse.no) @@ -165,7 +186,6 @@ def start(self): if not cfg_preparse.logStdout: logfilename = sarracenia.config.get_log_filename( hostdir, component, config, cfg_preparse.no) - dir_not_there = not os.path.exists(os.path.dirname(logfilename)) while dir_not_there: try: @@ -196,6 +216,15 @@ def start(self): logger.addHandler(handler) + if sarracenia.features['jsonlogs']['present'] and cfg_preparse.logJson: + jsonHandler = RedirectedTimedRotatingFileHandler( + logfilename.replace('.log','.json'), + when=lr_when, + interval=logRotateInterval, + backupCount=cfg_preparse.logRotateCount) + jsonHandler.setFormatter(jsonlogger.JsonFormatter(log_format)) + logger.addHandler(jsonHandler) + if hasattr(cfg_preparse, 'permLog'): os.chmod(logfilename, cfg_preparse.permLog) @@ -215,15 +244,13 @@ def start(self): else: hostdir = None - pidfilename = sarracenia.config.get_pid_filename( hostdir, component, config, cfg_preparse.no) - if not os.path.isdir(os.path.dirname(pidfilename)): pathlib.Path(os.path.dirname(pidfilename)).mkdir(parents=True, exist_ok=True) with open(pidfilename, 'w') as pfn: pfn.write('%d' % os.getpid()) - cfg = sarracenia.config.one_config(component, config, cfg_preparse.action) + cfg = sarracenia.config.one_config(component, config, cfg_preparse.action, isPost=False, hostDir=hostdir) cfg.novipFilename = pidfilename.replace(".pid", ".noVip") diff --git a/sarracenia/moth/__init__.py b/sarracenia/moth/__init__.py index e48b24d5f..9406bb283 100755 --- a/sarracenia/moth/__init__.py +++ b/sarracenia/moth/__init__.py @@ -88,16 +88,16 @@ class Moth(): * queueName (for amqp, used as client-id for mqtt) this library knows nothing about Sarracenia, the only code used from sarracenia is to interpret - duration properties, from the root sarracenia/__init__.py, the broker argument from sarracenia.credentials + duration properties, from the root sarracenia/__init__.py, the broker argument from sarracenia.config.credentials usage:: import sarracenia.moth - import sarracenia.credentials + import sarracenia.config.credentials props = sarracenia.moth.default_options - props['broker'] = sarracenia.credentials.Credential('amqps://anonymous:anonymous@hpfx.collab.science.gc.ca') + props['broker'] = sarracenia.config.credentials.Credential('amqps://anonymous:anonymous@hpfx.collab.science.gc.ca') props['expire'] = 300 props['batch'] = 1 is_subscriber=True diff --git a/sarracenia/moth/amqp.py b/sarracenia/moth/amqp.py index f2bf6398a..9a51db4a8 100755 --- a/sarracenia/moth/amqp.py +++ b/sarracenia/moth/amqp.py @@ -340,6 +340,10 @@ def getSetup(self) -> None: if not self.__connect(self.o['broker']): logger.critical('could not connect') break + + if self.o['prefetch'] != 0: + # using global False because RabbitMQ Quorum Queues don't support Global QoS, issue #1233 + self.channel.basic_qos(0, self.o['prefetch'], False) # only first/lead instance needs to declare a queue and bindings. if 'no' in self.o and self.o['no'] >= 2: @@ -348,9 +352,6 @@ def getSetup(self) -> None: #logger.info('getSetup connected to {}'.format(self.o['broker'].url.hostname) ) - if self.o['prefetch'] != 0: - self.channel.basic_qos(0, self.o['prefetch'], True) - #FIXME: test self.first_setup and props['reset']... delete queue... broker_str = self.o['broker'].url.geturl().replace( ':' + self.o['broker'].url.password + '@', '@') @@ -602,13 +603,11 @@ def ack(self, m: sarracenia.Message) -> None: except Exception as err: logger.warning("failed for tag: %s: %s" % (m['ack_id'], err)) logger.debug('Exception details: ', exc_info=True) - if type(err) == BrokenPipeError or type(err) == ConnectionResetError: - # Cleanly close partially broken connection - self.close() - # No point in trying to ack again if the connection is broken - del m['ack_id'] - m['_deleteOnPost'].remove('ack_id') - return False + # No point in trying to ack again if the connection is broken + del m['ack_id'] + m['_deleteOnPost'].remove('ack_id') + self.close() + return False if ebo < 60: ebo *= 2 diff --git a/sarracenia/moth/amqpconsumer.py b/sarracenia/moth/amqpconsumer.py index 2701b968e..44eb1e827 100755 --- a/sarracenia/moth/amqpconsumer.py +++ b/sarracenia/moth/amqpconsumer.py @@ -69,11 +69,6 @@ def __get_on_message(self, msg): # This will block until the msg can be put in the queue self._raw_msg_q.put(msg) - def getCleanUp(self) -> None: - # TODO cancel consumer with basic_cancel(consumer_tag)? - super().getCleanUp() - self._active_consumer_tag = None - def getSetup(self) -> None: super().getSetup() # (re)create queue. Anything in the queue is invalid after re-creating a connection. @@ -147,3 +142,16 @@ def getNewMessage(self) -> sarracenia.Message: self.close() time.sleep(1) return None + + def close(self) -> None: + # TODO cancel consumer with basic_cancel(consumer_tag)? + if self._active_consumer_tag: + try: + self.channel.basic_cancel(self._active_consumer_tag) + logger.info(f"cancelled consumer with tag {self._active_consumer_tag}") + except Exception as e: + logger.warning(f"failed to cancel consumer with tag {self._active_consumer_tag} {e}") + logger.debug("Exception details:", exc_info=True) + self._active_consumer_tag = None + super().close() + diff --git a/sarracenia/sr.py b/sarracenia/sr.py index f3bacf728..8a6b07d3c 100755 --- a/sarracenia/sr.py +++ b/sarracenia/sr.py @@ -490,7 +490,9 @@ def _read_state_dir(self): continue if pathname[-4:] == '.pid': - i = int(pathname[-6:-4]) + i = self._instance_num_from_pidfile(pathname, c, cfg) + if i < 0: + continue if t.isdigit(): #print( "pid assignment: {c}/{cfg} instance: {i}, pid: {t}" ) self.states[c][cfg]['instance_pids'][i] = int(t) @@ -585,7 +587,9 @@ def _find_missing_instances_dir(self, dir): for filename in os.listdir(): # look at pid files, find ones where process is missing. if filename[-4:] == '.pid': - i = int(filename[-6:-4]) + i = self._instance_num_from_pidfile(filename, c, cfg) + if i < 0: + continue if i != 0: p = pathlib.Path(filename) if sys.version_info[0] > 3 or sys.version_info[ @@ -840,7 +844,7 @@ def _resolve_brokers(self): xl = self.__resolved_exchanges(c, cfg, o) q = self.__guess_queueName(c, cfg, o) - self.configs[c][cfg]['options'].resolved_qname = q + self.configs[c][cfg]['options'].queueName_resolved = q for exch in xl: if exch in self.brokers[host]['exchanges']: @@ -1109,11 +1113,16 @@ def _resolve(self): elif self.states[c][cfg]['metrics']['byteRate'] < self.configs[c][cfg]['options'].runStateThreshold_slow: flow_status = 'slow' elif self.states[c][cfg]['metrics']['retry'] > self.configs[c][cfg]['options'].runStateThreshold_retry: - flow_status = 'retry' + if self.configs[c][cfg]['options'].attempts == 0: + flow_status='standby' + else: + flow_status = 'retry' elif self.states[c][cfg]['metrics']['lagMean'] > self.configs[c][cfg]['options'].runStateThreshold_lag: flow_status = 'lagging' elif self.states[c][cfg]['metrics']['rejectPercent'] > self.configs[c][cfg]['options'].runStateThreshold_reject: flow_status = 'reject' + elif self.configs[c][cfg]['options'].attempts == 0: + flow_status='standby' elif hasattr(self.configs[c][cfg]['options'],'post_broker') and self.configs[c][cfg]['options'].post_broker \ and (now-self.states[c][cfg]['metrics']['txLast']) > self.configs[c][cfg]['options'].runStateThreshold_idle: flow_status = 'idle' @@ -1126,6 +1135,7 @@ def _resolve(self): self.states[c][cfg]['metrics']['msgRateCpu'] < self.configs[c][cfg]['options'].runStateThreshold_cpuSlow: flow_status = 'cpuSlow' else: + flow_status = 'running' self.states[c][cfg]['resource_usage'] = copy.deepcopy(resource_usage) @@ -1268,8 +1278,7 @@ def __init__(self, opt, config_fnmatches=None): self.invoking_directory = os.getcwd() self.bin_dir = os.path.dirname(os.path.realpath(__file__)) - self.package_lib_dir = os.path.dirname( - inspect.getfile(sarracenia.config.Config)) + self.package_lib_dir = os.path.dirname(inspect.getfile(sarracenia)) self.appauthor = 'MetPX' self.options = opt self.appname = os.getenv('SR_DEV_APPNAME') @@ -1303,7 +1312,7 @@ def __init__(self, opt, config_fnmatches=None): 'sender', 'shovel', 'subscribe', 'watch', 'winnow' ] # active means >= 1 process exists on the node. - self.status_active = ['cpuSlow', 'hung', 'idle', 'lagging', 'partial', 'reject', 'retry', 'running', 'slow', 'waitVip' ] + self.status_active = ['cpuSlow', 'hung', 'idle', 'lagging', 'partial', 'reject', 'retry', 'running', 'slow', 'standby', 'waitVip' ] self.status_values = self.status_active + [ 'disabled', 'include', 'missing', 'stopped', 'unknown' ] self.bin_dir = os.path.dirname(os.path.realpath(__file__)) @@ -1417,8 +1426,7 @@ def add(self): component = sp[-2] cfg = sp[-1] - iedir = os.path.dirname(inspect.getfile( - sarracenia.config.Config)) + os.sep + 'examples' + iedir = os.path.dirname(inspect.getfile(sarracenia)) + os.sep + 'examples' destdir = self.user_config_dir + os.sep + component @@ -1555,9 +1563,9 @@ def declare(self): logging.info('looking at %s/%s ' % (c, cfg)) o = self.configs[c][cfg]['options'] od = o.dictify() - if hasattr(o, 'resolved_qname'): + if hasattr(o, 'queueName_resolved'): od['broker'] = o.broker - od['queueName'] = o.resolved_qname + od['queueName'] = o.queueName_resolved od['dry_run'] = self.options.dry_run qdc = sarracenia.moth.Moth.subFactory(od) qdc.getSetup() @@ -1802,8 +1810,8 @@ def cleanup(self) -> bool: o = self.configs[c][cfg]['options'] - if hasattr(o, 'resolved_qname'): - #print('deleting: %s is: %s @ %s' % (f, o.resolved_qname, o.broker.url.hostname )) + if hasattr(o, 'queueName_resolved'): + #print('deleting: %s is: %s @ %s' % (f, o.queueName_resolved, o.broker.url.hostname )) qdc = sarracenia.moth.Moth.subFactory( { 'broker': o.broker, @@ -1812,13 +1820,13 @@ def cleanup(self) -> bool: 'queueDeclare': False, 'queueBind': False, 'broker': o.broker, - 'queueName': o.resolved_qname, + 'queueName': o.queueName_resolved, 'message_strategy': { 'stubborn':True } }) qdc.getSetup() qdc.getCleanUp() qdc.close() - queues_to_delete.append((o.broker, o.resolved_qname)) + queues_to_delete.append((o.broker, o.queueName_resolved)) for h in self.brokers: if self.please_stop: @@ -2197,9 +2205,10 @@ def sanity(self): else: print('no stray processes found') - for l in sarracenia.features.keys(): - if not sarracenia.features[l]['present']: - print( f"notice: python module {l} is missing: {sarracenia.features[l]['lament']}" ) + #It is enough to have it *features* not needed in sanity. + #for l in sarracenia.features.keys(): + # if not sarracenia.features[l]['present']: + # print( f"notice: python module {l} is missing: {sarracenia.features[l]['lament']}" ) # run on_sanity plugins. for f in self.filtered_configurations: @@ -2230,6 +2239,21 @@ def start(self): if len(self.leftovers) > 0 and not self._action_all_configs: logging.error( f"{self.leftovers} configuration not found" ) return + + has_disabled_config = False + + # if any configs are disabled, don't start any + if not self._action_all_configs: + for f in self.filtered_configurations: + (c, cfg) = f.split(os.sep) + + if self.configs[c][cfg]['status'] == 'disabled': + has_disabled_config = True + logger.error(f"Config {c}/{cfg} is disabled. It must be enabled before starting.") + + if has_disabled_config: + logger.error("No configs have been started due to disabled configurations.") + return pcount = 0 for f in self.filtered_configurations: @@ -2613,6 +2637,8 @@ def status(self): cfg_status = "rtry" if cfg_status == "runn" : cfg_status = "run" + if cfg_status == "stan" : + cfg_status = "stby" elif cfg_status == 'wait': cfg_status = 'wVip' @@ -2772,6 +2798,24 @@ def convert1(self,cfg): v3_cfg.write('#v2 sftp handling is always absolute, sr3 is relative. might need this, remove when all sr3:\n') v3_cfg.write('#flowcb accept.sftp_absolute\n') + queueName=None + + #1st prep pass (for cases when re-ordering needed.) + with open(v2_config_path, 'r') as v2_cfg: + for line in v2_cfg.readlines(): + if len(line.strip()) < 1: + continue + if line[0].startswith('#'): + continue + line = line.strip().split() + k = line[0] + if k in synonyms: + k = synonyms[k] + if k in [ 'queueName' ]: + queueName=line[1] + + #2nd re-write pass. + subtopicFound=False with open(v2_config_path, 'r') as v2_cfg: for line in v2_cfg.readlines(): if len(line.strip()) < 1: @@ -2815,7 +2859,13 @@ def convert1(self,cfg): else: logger.error( f"unknown checksum spec: {line}") continue - + elif k == 'queueName': + if subtopicFound or not queueName: + continue + elif k == 'subtopic': + if queueName: + v3_cfg.write(f'queueName {queueName}\n') + queueName=None if (k == 'accept') : if line[1] == '.*': accept_all_seen=True @@ -2829,12 +2879,16 @@ def convert1(self,cfg): elif (k == 'sleep' ) and (component == 'poll'): k = 'scheduled_interval' if k in convert_to_v3: + if convert_to_v3[k] == [ 'continue' ]: + logger.info( f"obsolete v2 keyword: {k}" ) + continue + if len(line) > 1: v = line[1].replace('.py', '', 1) if v in convert_to_v3[k]: line = convert_to_v3[k][v] if 'continue' in line: - logger.info("obsolete v2: " + v) + logger.info("obsolete v2: " + k) continue else: logger.warning( f"unknown {k} {v}, manual conversion required.") @@ -3018,6 +3072,19 @@ def _post_can_be_daemon(self, component, config): return (component in ['post', 'cpost'] and self.configs[component][config]['options'].sleep > 0.1 and hasattr(self.configs[component][config]['options'], 'path')) + def _instance_num_from_pidfile(self, pathname, component, cfg): + if os.sep in pathname: + pathname = pathname.split(os.sep)[-1] + if '_' in pathname: + i = int(pathname[0:-4].split('_')[-1]) + # sr3c components just use iXX.pid + elif component[0] == 'c': + i = int(pathname[0:-4].replace('i', '')) + else: + logger.error(f"Failed to determine instance # for {component}/{cfg} {pathname}") + i = -1 + return i + def main(): """ Main thread for sr dealing with parsing and action switch diff --git a/sarracenia/sr_flow.py b/sarracenia/sr_flow.py index a7949c4cc..a182a5e0f 100755 --- a/sarracenia/sr_flow.py +++ b/sarracenia/sr_flow.py @@ -19,7 +19,7 @@ '.py', '') + os.sep + sys.argv[-1] args = [ sys.executable, - os.path.dirname(inspect.getfile(Config)) + os.sep + 'sr.py' + os.path.dirname(inspect.getfile(sarracenia)) + os.sep + 'sr.py' ] args.extend(sys.argv[1:]) diff --git a/sarracenia/transfer/https.py b/sarracenia/transfer/https.py index 51df4a480..001637a83 100755 --- a/sarracenia/transfer/https.py +++ b/sarracenia/transfer/https.py @@ -106,15 +106,16 @@ def cd(self, path): self.path = path # for compatibility... always new connection with http - def check_is_connected(self): - logger.debug("sr_http check_is_connected") - - if not self.connected : return False - - if self.sendTo != self.o.sendTo: + def check_is_connected(self): + if (not self.connected + or not self.opener + or not self.head_opener + or self.sendTo != self.o.sendTo): + logger.debug("sr_http check_is_connected -> no") self.close() return False - + + logger.debug("sr_http check_is_connected -> yes") return True # close @@ -131,10 +132,36 @@ def connect(self): self.connected = False self.sendTo = self.o.sendTo self.timeout = self.o.timeout + self.opener = None + self.head_opener = None + self.password_mgr = None + + # Set up an opener, this used to be done in every call to __open__ uses a lot of CPU (issue #1261) + # FIXME? When done in connect, we create a new opener every time the destination changes + # which might still be too frequently, depending on the config. I'm not convinced that we ever + # need to create a new opener. Maybe just put it in __init__ ? + try: + self.password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() + auth_handler = urllib.request.HTTPBasicAuthHandler(self.password_mgr) + + ssl_handler = urllib.request.HTTPSHandler(0, self.tlsctx) - if not self.credentials(): return False + head_redirect_handler = HTTPRedirectHandlerSameMethod() - return True + # create "opener" (OpenerDirector instance) + self.opener = urllib.request.build_opener(auth_handler, ssl_handler) + self.head_opener = urllib.request.build_opener(auth_handler, ssl_handler, head_redirect_handler) + + except: + logger.error(f'unable to connect {self.o.sendTo}') + logger.debug('Exception details: ', exc_info=True) + self.connected = False + + if not self.credentials(): + self.connected = False + + self.connected = True + return self.connected # credentials... def credentials(self): @@ -149,12 +176,15 @@ def credentials(self): self.bearer_token = details.bearer_token if hasattr( details, 'bearer_token') else None + # username and password credentials + if self.user != None: + # continue with authentication + self.password_mgr.add_password(None, self.sendTo, self.user, unquote(self.password)) + return True except: - logger.error( - "sr_http/credentials: unable to get credentials for %s" % - self.sendTo) + logger.error("sr_http/credentials: unable to get credentials for %s" % self.sendTo) logger.debug('Exception details: ', exc_info=True) return False @@ -223,7 +253,10 @@ def init(self): self.http = None self.details = None self.seek = True + self.opener = None + self.head_opener = None + self.password_mgr = None self.urlstr = '' self.path = '' @@ -232,9 +265,7 @@ def init(self): self.data = '' self.entries = {} - -# ls - + # ls def ls(self): logger.debug("sr_http ls") @@ -291,7 +322,7 @@ def __url_redir_str(self): return redir_msg # open - def __open__(self, path, remote_offset=0, length=0, method:str=None, add_headers:dict=None, handlers:list=[]) -> bool: + def __open__(self, path, remote_offset=0, length=0, method:str=None, add_headers:dict=None) -> bool: """ Open a URL. When the open is successful, self.http is set to a urllib.response instance that can be read from like a file. @@ -300,10 +331,11 @@ def __open__(self, path, remote_offset=0, length=0, method:str=None, add_headers logger.debug( f"{path} " + (method if method else '')) self.http = None - self.connected = False self.req = None self.urlstr = path - self.opener = None + + if not self.check_is_connected(): + self.connect() # have noticed that some site does not allow // in path if path.startswith('http://') and '//' in path[7:]: @@ -335,38 +367,23 @@ def __open__(self, path, remote_offset=0, length=0, method:str=None, add_headers # username and password credentials if self.user != None: - password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() # takeaway credentials info from urlstr cred = self.user + '@' self.urlstr = self.urlstr.replace(cred, '') if self.password != None: cred = self.user + ':' + self.password + '@' self.urlstr = self.urlstr.replace(cred, '') - - # continue with authentication - password_mgr.add_password(None, self.urlstr, self.user, unquote(self.password)) - auth_handler = urllib.request.HTTPBasicAuthHandler(password_mgr) - handlers.append(auth_handler) - - ssl_handler = urllib.request.HTTPSHandler(0, self.tlsctx) - handlers.append(ssl_handler) - - # create "opener" (OpenerDirector instance) - self.opener = urllib.request.build_opener(*handlers) - - # Install the opener. Now all calls to get the request use our opener. - # NOTE not necessary since we call opener.open directly. Install only needed if we want to use urlopen. - # urllib.request.install_opener(opener) # Build the request that will get opened. If None is passed to method it defaults to GET. self.req = urllib.request.Request(self.urlstr, headers=headers, method=method) # open... we are connected + opener = self.head_opener if method == 'HEAD' else self.opener if self.timeout == None: # when timeout is not passed, urllib defaults to socket._GLOBAL_DEFAULT_TIMEOUT - self.http = self.opener.open(self.req) + self.http = opener.open(self.req) else: - self.http = self.opener.open(self.req, timeout=self.timeout) + self.http = opener.open(self.req, timeout=self.timeout) # knowing if we got redirected is useful for debugging try: @@ -413,9 +430,7 @@ def stat(self,path,msg) -> sarracenia.filemetadata.FmdStat: url += '/' url += path - # Default HTTPRedirectHandler may change a HEAD request to a GET when following the redirect. - handlers = [ HTTPRedirectHandlerSameMethod() ] - ok = self.__open__(url, method='HEAD', add_headers={'Accept-Encoding': 'identity'}, handlers=handlers) + ok = self.__open__(url, method='HEAD', add_headers={'Accept-Encoding': 'identity'}) if not ok: logger.debug(f"failed") return None diff --git a/sarracenia/transfer/sftp.py b/sarracenia/transfer/sftp.py index 2b84b29ed..9a9506749 100755 --- a/sarracenia/transfer/sftp.py +++ b/sarracenia/transfer/sftp.py @@ -80,6 +80,7 @@ def registered_as(): # cd def cd(self, path): + alarm_set(self.o.timeout) try: logger.debug("first cd to %s" % self.originalDir) @@ -476,9 +477,7 @@ def put(self, local_offset=0, remote_offset=0, length=0): - logger.debug( - "sr_sftp put %s %s %d %d %d" % - (local_file, remote_file, local_offset, remote_offset, length)) + logger.debug( f" local_file={local_file} remote_file={remote_file} local_offset={local_offset} remote_offset={remote_offset} length={length}" ) # simple file diff --git a/tests/sarracenia/config_test.py b/tests/sarracenia/config_test.py index 6d91497d3..96d664ff3 100644 --- a/tests/sarracenia/config_test.py +++ b/tests/sarracenia/config_test.py @@ -1,3 +1,4 @@ +import copy import pytest from tests.conftest import * #from unittest.mock import Mock @@ -10,7 +11,7 @@ import sarracenia import sarracenia.config -import sarracenia.credentials +import sarracenia.config.credentials logger = logging.getLogger('sarracenia.config') logger.setLevel('DEBUG') @@ -27,7 +28,7 @@ def try_pattern( options, message, pattern, goodre ): def test_variableExpansion(): - options = sarracenia.config.default_config() + options = copy.deepcopy(sarracenia.config.default_config()) options.baseDir = '/data/whereIcameFrom' options.documentRoot = options.baseDir options.post_baseDir = '/data/whereIamGoingTo' @@ -89,7 +90,7 @@ def test_variableExpansion(): def test_read_line_declare(): - options = sarracenia.config.default_config() + options = copy.deepcopy(sarracenia.config.default_config()) options.baseDir = '/data/whereIcameFrom' options.documentRoot = options.baseDir options.post_baseDir = '/data/whereIamGoingTo' @@ -106,7 +107,7 @@ def test_read_line_declare(): def test_read_line_flags(): - options = sarracenia.config.default_config() + options = copy.deepcopy(sarracenia.config.default_config()) options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "download off" ) assert( options.download == False ) @@ -130,7 +131,7 @@ def test_read_line_flags(): def test_read_line_counts(): - options = sarracenia.config.default_config() + options = copy.deepcopy(sarracenia.config.default_config()) # crasher input: options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "batch -1" ) @@ -156,7 +157,7 @@ def test_read_line_counts(): def test_read_line_floats(): - options = sarracenia.config.default_config() + options = copy.deepcopy(sarracenia.config.default_config()) options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "messageRateMax 1.5mb" ) assert( options.messageRateMax == 1572864 ) @@ -173,7 +174,7 @@ def test_read_line_floats(): def test_read_line_sets(): - options = sarracenia.config.default_config() + options = copy.deepcopy(sarracenia.config.default_config()) logger.info( f" {options.fileEvents=} " ) assert( options.fileEvents == set( ['create', 'delete', 'link', 'mkdir', 'modify', 'rmdir' ] ) ) @@ -198,7 +199,7 @@ def test_read_line_sets(): def test_read_line_perms(): - options = sarracenia.config.default_config() + options = copy.deepcopy(sarracenia.config.default_config()) logger.info( f" {options.permDefault=:o} " ) @@ -213,7 +214,7 @@ def test_read_line_perms(): def test_read_line_duration(): - options = sarracenia.config.default_config() + options = copy.deepcopy(sarracenia.config.default_config()) logger.info( f" {options.sleep=} " ) options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "sleep 30" ) logger.info( f" {options.sleep=} " ) @@ -234,7 +235,7 @@ def test_read_line_duration(): def test_read_line_add_option(): - options = sarracenia.config.default_config() + options = copy.deepcopy(sarracenia.config.default_config()) options.add_option( 'list_one', kind='list', default_value=['1','2'], all_values=['1','2','3','4'] ) logger.info( f" {options.list_one=} " ) @@ -298,7 +299,7 @@ def test_read_line_add_option(): def test_source_from_exchange(): - options = sarracenia.config.default_config() + options = copy.deepcopy(sarracenia.config.default_config()) # crasher input: options.parse_line( "subscribe", "ex1", "subscribe/ex1", 1, "declare source tsource" ) @@ -312,10 +313,46 @@ def test_source_from_exchange(): source = options.get_source_from_exchange(options.exchange) assert( source == 'tsource' ) +def test_subscription(): + + o = copy.deepcopy(sarracenia.config.default_config()) + + o.component = 'subscribe' + o.config = 'ex1' + o.action = 'start' + o.no = 1 + before_add=len(o.credentials.credentials) + o.credentials.add( 'amqp://lapinferoce:etpoilu@localhost' ) + o.credentials.add( 'amqp://capelli:tropcuit@localhost' ) + o.parse_line( o.component, o.config, "subscribe/ex1", 1, "broker amqp://lapinferoce@localhost" ) + o.parse_line( o.component, o.config, "subscribe/ex1", 2, "exchange hoho1" ) + + assert( o.exchange == "hoho1" ) + + o.parse_line( o.component, o.config, "subscribe/ex1", 3, "subtopic hoho.#" ) + o.parse_line( o.component, o.config, "subscribe/ex1", 3, "subtopic lala.hoho.#" ) + + assert( hasattr(o,'subscriptions') ) + assert( len(o.subscriptions)==1 ) + #assert( len(o.subscriptions[0]['bindings'] == 2 ) + + o.parse_line( o.component, o.config, "subscribe/ex1", 2, "exchange xpublic" ) + o.parse_line( o.component, o.config, "subscribe/ex1", 3, "subtopic #" ) + + assert( len(o.subscriptions)==1 ) + + o.parse_line( o.component, o.config, "subscribe/ex1", 1, "broker amqp://capelli@localhost" ) + o.parse_line( o.component, o.config, "subscribe/ex1", 2, "queue myfavouriteQ" ) + o.parse_line( o.component, o.config, "subscribe/ex1", 2, "topicPrefix v02.post" ) + o.parse_line( o.component, o.config, "subscribe/ex1", 3, "subtopic #" ) + + assert( len(o.subscriptions)==2 ) + + logger.info( f" {o.subscriptions=} " ) def test_broker_finalize(): - options = sarracenia.config.default_config() + options = copy.deepcopy(sarracenia.config.default_config()) options.component = 'subscribe' options.config = 'ex1' options.action = 'start' @@ -325,6 +362,7 @@ def test_broker_finalize(): options.credentials.add( 'amqp://bunnypeer:passthepoi@localhost' ) after_add=len(options.credentials.credentials) + logger.info( f" {before_add=} {after_add=} " ) assert( before_add + 1 == after_add ) diff --git a/tests/sarracenia/credentials_test.py b/tests/sarracenia/credentials_test.py index 1caa9ffe0..5524c6770 100644 --- a/tests/sarracenia/credentials_test.py +++ b/tests/sarracenia/credentials_test.py @@ -3,4 +3,4 @@ #from unittest.mock import Mock import sarracenia.config -import sarracenia.credentials \ No newline at end of file +import sarracenia.config.credentials diff --git a/tests/sarracenia/flowcb/gather/am__gather_test.py b/tests/sarracenia/flowcb/gather/am__gather_test.py index ece83160b..7f7981016 100755 --- a/tests/sarracenia/flowcb/gather/am__gather_test.py +++ b/tests/sarracenia/flowcb/gather/am__gather_test.py @@ -25,6 +25,7 @@ def __init__(self): self.fileAgeMax = 0 self.post_baseUrl = "http://localhost/" self.post_format = "v02" + self.post_baseDir = "/this/path/is/fake" def add_option(self, option, type, default = None): if not hasattr(self, option): @@ -49,17 +50,19 @@ def make_message(): m["to_clusters"] = "localhost" m["baseUrl"] = "https://NotARealURL" m["post_baseUrl"] = "https://NotARealURL" + m["post_baseDir"] = "/this/path/is/fake" m["relPath"] = "ThisIsAPath/To/A/File.txt" m["_deleteOnPost"] = set() return m -# NOTE: Need to test filtering as well? -# WorkList = types.SimpleNamespace() -# WorkList.ok = [] -# WorkList.incoming = [] -# WorkList.rejected = [] -# WorkList.failed = [] -# WorkList.directories_ok = [] +def make_worklist(): + WorkList = types.SimpleNamespace() + WorkList.ok = [] + WorkList.incoming = [] + WorkList.rejected = [] + WorkList.failed = [] + WorkList.directories_ok = [] + return WorkList # def test___init__(): # BaseOptions = Options() @@ -105,10 +108,14 @@ def test_am_binary_bulletin(): message_test1['new_file'] = bulletinHeader + '__12345' message_test1['new_dir'] = BaseOptions.directory message_test1['content']['value'] = b64encode(message_test1['content']['value']).decode('ascii') + message_test1["isProblem"] = False + + worklist = make_worklist() + worklist.incoming = [message_test1] # Check renamer. - message_test1 = renamer.rename(message_test1, False) - assert message_test1['new_file'] == 'ISAA41_CYWA_030000___00001' + renamer.after_accept(worklist) + assert worklist.incoming[0]['new_file'] == 'ISAA41_CYWA_030000___00001' # Test 2: Check a regular CACN bulletin @@ -132,10 +139,16 @@ def test_cacn_regular(): new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) assert new_bulletin == b'CACN00 CWAO 021600\nWVO\n100,2024,123,1600,0,100,13.5,5.6,79.4,0.722,11.81,11.74,1.855,6.54,16.76,1544,2.344,14.26,0,375.6,375.6,375.5,375.5,0,11.58,11.24,3.709,13.89,13.16,11.22,11,9.45,11.39,5.033,79.4,0.694,-6999,41.19,5.967,5.887,5.93,6.184,5.64,5.066,5.253,-6999,7.3,0.058,0,5.715,4.569,0,0,1.942,-6999,57.4,0,0.531,-6999,1419,1604,1787,-6999,-6999,-6999,-6999,-6999,1601,-6999,-6999,6,5.921,5.956,6.177,5.643,5.07,5.256,-6999,9.53,11.22,10.09,10.61,125.4,9.1\n' + # Check renamer. message_test2['content']['value'] = new_bulletin.decode('iso-8859-1') - message_test2 = renamer.rename(message_test2, False) - assert message_test2['new_file'] == 'CACN00_CWAO_021600__WVO_00001' + message_test2["isProblem"] = isProblem + + worklist = make_worklist() + worklist.incoming = [message_test2] + + renamer.after_accept(worklist) + assert worklist.incoming[0]['new_file'] == 'CACN00_CWAO_021600__WVO_00001' # Test 3: Check an erronous CACN bulletin (missing timestamp in bulletin contents) def test_cacn_erronous(): @@ -161,8 +174,14 @@ def test_cacn_erronous(): # Check renamer. message_test3['content']['value'] = new_bulletin.decode('iso-8859-1') - message_test3 = renamer.rename(message_test3, False) - assert re.match('CACN00_CWAO_......__WPK_00001_PROBLEM' , message_test3['new_file']) + message_test3["isProblem"] = isProblem + + worklist = make_worklist() + worklist.incoming = [message_test3] + + + renamer.after_accept(worklist) + assert re.match('CACN00_CWAO_......__WPK_00001_PROBLEM' , worklist.incoming[0]['new_file']) # Test 4: Bulletin with double line separator after header (my-header\n\n) def test_bulletin_double_linesep(): @@ -188,7 +207,12 @@ def test_bulletin_double_linesep(): # Check renamer. message_test4['content']['value'] = message_test4['content']['value'].decode('iso-8859-1') - message_test4 = renamer.rename(message_test4, False) + message_test4["isProblem"] = isProblem + + worklist = make_worklist() + worklist.incoming = [message_test4] + + renamer.after_accept(worklist) assert message_test4['new_file'] == 'SXCN35_CWVR_021100___00001' # Test 5: Bulletin with invalid year in timestamp (Fix: https://github.com/MetPX/sarracenia/pull/973) @@ -213,7 +237,12 @@ def test_bulletin_invalid_timestamp(caplog): assert new_bulletin == b'CACN00 CWAO\nWVO\n100,1024,123,1600,0,100,13.5,5.6,79.4,0.722,11.81,11.74,1.855,6.54,16.76,1544,2.344,14.26,0,375.6,375.6,375.5,375.5,0,11.58,11.24,3.709,13.89,13.16,11.22,11,9.45,11.39,5.033,79.4,0.694,-6999,41.19,5.967,5.887,5.93,6.184,5.64,5.066,5.253,-6999,7.3,0.058,0,5.715,4.569,0,0,1.942,-6999,57.4,0,0.531,-6999,1419,1604,1787,-6999,-6999,-6999,-6999,-6999,1601,-6999,-6999,6,5.921,5.956,6.177,5.643,5.07,5.256,-6999,9.53,11.22,10.09,10.61,125.4,9.1\n' message_test5['content']['value'] = message_test5['content']['value'].decode('iso-8859-1') - message_test5 = renamer.rename(message_test5, False) + message_test5["isProblem"] = isProblem + + worklist = make_worklist() + worklist.incoming = [message_test5] + + renamer.after_accept(worklist) # We want to make sure the proper errors are raised from the logs assert 'Unable to fetch header contents. Skipping message' in caplog.text and 'Unable to verify year from julian time.' in caplog.text @@ -265,7 +294,12 @@ def test_bulletin_wrong_station(): # Check renamer. message_test7['content']['value'] = message_test7['content']['value'].decode('iso-8859-1') - message_test7 = renamer.rename(message_test7, False) + message_test7["isProblem"] = isProblem + + worklist = make_worklist() + worklist.incoming = [message_test7] + + renamer.after_accept(worklist) assert message_test7['new_file'] == 'UECN99_CYCX_071200___00001_PROBLEM' # Test 8: SM Bulletin - Add station mapping + SM/SI bulletin accomodities @@ -291,7 +325,12 @@ def test_SM_bulletin(): assert new_bulletin == b'SMCN06 CWAO 030000\nAAXX 03004\n71816 11324 80313 10004 20003 30255 40318 52018 60031 77177 887//\n333 10017 20004 42001 70118 90983 93101=\n' message_test8['content']['value'] = new_bulletin.decode('iso-8859-1') - message_test8 = renamer.rename(message_test8, False) + message_test8["isProblem"] = isProblem + + worklist = make_worklist() + worklist.incoming = [message_test8] + + renamer.after_accept(worklist) assert message_test8['new_file'] == 'SMCN06_CWAO_030000__71816_00001' # Test 9: Bulletin with 5 fields in header (invalid) @@ -378,7 +417,12 @@ def test_random_bulletin_with_BBB(): assert new_bulletin == b'' message_test12['content']['value'] = bulletin.decode('iso-8859-1') - message_test12 = renamer.rename(message_test12, False) + message_test12["isProblem"] = isProblem + + worklist = make_worklist() + worklist.incoming = [message_test12] + + renamer.after_accept(worklist) assert message_test12['new_file'] == 'FXCN06_CYTR_230939_AAA__00001' # Test 13: SM Bulletin with BBB - Add station mapping + SM/SI bulletin accomodities + conserve BBB header @@ -404,5 +448,10 @@ def test_SM_bulletin_with_BBB(): assert new_bulletin == b'SMCN06 CWAO 030000 AAA\nAAXX 03004\n71816 11324 80313 10004 20003 30255 40318 52018 60031 77177 887//\n333 10017 20004 42001 70118 90983 93101=\n' message_test13['content']['value'] = new_bulletin.decode('iso-8859-1') - message_test13 = renamer.rename(message_test13, False) - assert message_test13['new_file'] == 'SMCN06_CWAO_030000_AAA_71816_00001' \ No newline at end of file + message_test13["isProblem"] = isProblem + + worklist = make_worklist() + worklist.incoming = [message_test13] + + renamer.after_accept(worklist) + assert message_test13['new_file'] == 'SMCN06_CWAO_030000_AAA_71816_00001' diff --git a/tests/sarracenia/flowcb/gather/am_test.py b/tests/sarracenia/flowcb/gather/am_test.py deleted file mode 100755 index 2e4b33cb3..000000000 --- a/tests/sarracenia/flowcb/gather/am_test.py +++ /dev/null @@ -1,365 +0,0 @@ -import pytest -import os, types, copy - -#useful for debugging tests -import pprint -def pretty(*things, **named_things): - for t in things: - pprint.PrettyPrinter(indent=2, width=200).pprint(t) - for k,v in named_things.items(): - print(str(k) + ":") - pprint.PrettyPrinter(indent=2, width=200).pprint(v) - -from sarracenia.flowcb.gather.am import Am -import sarracenia.config - -from sarracenia import Message as SR3Message -from sarracenia.flowcb.rename.raw2bulletin import Raw2bulletin - -class Options: - def __init__(self): - # self.o = sarracenia.config.default_config() - self.logLevel = "DEBUG" - self.logFormat = "" - self.queueName = "TEST_QUEUE_NAME" - self.component = "flow" - self.config = "foobar_am.conf" - self.sendTo = "am://127.0.0.1:5005" - self.pid_filename = "/tmp/sarracenia/am_test/pid_filename" - self.directory = "/tmp/test/directory" - self.housekeeping = float(39) - self.fileAgeMin = 0 - self.fileAgeMax = 0 - self.post_baseUrl = "http://localhost/" - self.post_format = "v02" - - def add_option(self, option, type, default = None): - if not hasattr(self, option): - setattr(self, option, default) - pass - -def make_message(): - m = SR3Message() - m["pubTime"] = "20180118151049.356378078" - m["topic"] = "v02.post.sent_by_tsource2send" - m["mtime"] = "20180118151048" - m["identity"] = { - "method": "md5", - "value": "c35f14e247931c3185d5dc69c5cd543e" - } - m["atime"] = "201801181.51049.356378078" - m["content"] = {"encoding":"" , "value": ""} - m["from_cluster"] = "localhost" - m["mode"] = "644" - m["source"] = "tsource" - m["sum"] = "d,c35f14e247931c3185d5dc69c5cd543e" - m["to_clusters"] = "localhost" - m["baseUrl"] = "https://NotARealURL" - m["post_baseUrl"] = "https://NotARealURL" - m["relPath"] = "ThisIsAPath/To/A/File.txt" - m["_deleteOnPost"] = set() - return m - -# NOTE: Need to test filtering as well? -# WorkList = types.SimpleNamespace() -# WorkList.ok = [] -# WorkList.incoming = [] -# WorkList.rejected = [] -# WorkList.failed = [] -# WorkList.directories_ok = [] - -# def test___init__(): -# BaseOptions = Options() -# am_instance = Am(BaseOptions) -# renamer = Raw2bulletin(BaseOptions) - -def _get_bulletin_info(message): - charset = message['content']['encoding'] - bulletin = message['content']['value'] - lines = bulletin.splitlines() - if message['content']['encoding'] != 'base64': - firstchars = bulletin[0:2].decode(charset) - if list(lines[1].split()): - station = lines[1].split()[0].decode(charset) - else: - station = lines[1].decode(charset) - else: - firstchars = "XX" - station = "XXX" - missing_ahl = 'CN00 CWAO' - return bulletin, firstchars, lines, missing_ahl, station, charset - -# For unit testing, we mostly want to check how the bulletins get corrected. -# We have lots of use cases where bulletin get corrected so it's important to test all of these cases - - -# @pytest.mark.depends(on=['test___init__']) - -# Test 1: Check a regular binary bulletin. -def test_am_binary_bulletin(): - from base64 import b64encode - - BaseOptions = Options() - renamer = Raw2bulletin(BaseOptions) - - message_test1 = make_message() - message_test1['content']['encoding'] = 'base64' - message_test1['content']['value'] = b'ISAA41 CYWA 030000\nBUFR\x00\x00\xa8\x02\x00\x00\x12\x00\x006\x00\x00\x00\x00\r\r\x18\x05\x03\x00\x00\x00\x00\x00L\x00\x00\x01\x00\x01\xcc\x06\x02\x05\x02\x07\x01\x04\x01\x04\x02\x04\x03\x04\x04\x04\x05\x02\xc4\x01\xc3\x14\xd5\x14\r\x14\xce\x14\xc5\x14\x0b\x14\x01\n\x04\n3\x0c\x01\x0c\x02\x0c\x03\x0c\xc7\x08\x15\x04\x19\x0b\x0b\x0b\x0c\x04\x19\x08\x15\n4\n?\n=\r\x03\x85\x11\x00\x00\x00>\x00YWA (\x1cj6\x08I\xfa\x140\x00\xe0a@F1\x92g/\x9f6\xd0l~\xc1,hO\xfdh\x01_\xff\xfc\xf9D\xff\xc3DENSITY ALT 479FT7777\n' - - bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test1) - - bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') - message_test1['new_file'] = bulletinHeader + '__12345' - message_test1['new_dir'] = BaseOptions.directory - message_test1['content']['value'] = b64encode(message_test1['content']['value']).decode('ascii') - - # Check renamer. - message_test1 = renamer.rename(message_test1, False) - assert message_test1['new_file'] == 'ISAA41_CYWA_030000___00001' - - -# Test 2: Check a regular CACN bulletin -def test_cacn_regular(): - - BaseOptions = Options() - renamer = Raw2bulletin(BaseOptions) - am_instance = Am(BaseOptions) - - message_test2 = make_message() - message_test2['content']['encoding'] = 'iso-8859-1' - message_test2['content']['value'] = b'CA\nWVO\n100,2024,123,1600,0,100,13.5,5.6,79.4,0.722,11.81,11.74,1.855,6.54,16.76,1544,2.344,14.26,0,375.6,375.6,375.5,375.5,0,11.58,11.24,3.709,13.89,13.16,11.22,11,9.45,11.39,5.033,79.4,0.694,-6999,41.19,5.967,5.887,5.93,6.184,5.64,5.066,5.253,-6999,7.3,0.058,0,5.715,4.569,0,0,1.942,-6999,57.4,0,0.531,-6999,1419,1604,1787,-6999,-6999,-6999,-6999,-6999,1601,-6999,-6999,6,5.921,5.956,6.177,5.643,5.07,5.256,-6999,9.53,11.22,10.09,10.61,125.4,9.1\n' - - bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test2) - - bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') - message_test2['new_file'] = bulletinHeader + '__12345' - message_test2['new_dir'] = BaseOptions.directory - - # Check correcting the bulletin contents of a CACN - new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) - assert new_bulletin == b'CACN00 CWAO 021600\nWVO\n100,2024,123,1600,0,100,13.5,5.6,79.4,0.722,11.81,11.74,1.855,6.54,16.76,1544,2.344,14.26,0,375.6,375.6,375.5,375.5,0,11.58,11.24,3.709,13.89,13.16,11.22,11,9.45,11.39,5.033,79.4,0.694,-6999,41.19,5.967,5.887,5.93,6.184,5.64,5.066,5.253,-6999,7.3,0.058,0,5.715,4.569,0,0,1.942,-6999,57.4,0,0.531,-6999,1419,1604,1787,-6999,-6999,-6999,-6999,-6999,1601,-6999,-6999,6,5.921,5.956,6.177,5.643,5.07,5.256,-6999,9.53,11.22,10.09,10.61,125.4,9.1\n' - - # Check renamer. - message_test2['content']['value'] = new_bulletin.decode('iso-8859-1') - message_test2 = renamer.rename(message_test2, False) - assert message_test2['new_file'] == 'CACN00_CWAO_021600__WVO_00001' - -# Test 3: Check an erronous CACN bulletin (missing timestamp in bulletin contents) -def test_cacn_erronous(): - import re - - BaseOptions = Options() - renamer = Raw2bulletin(BaseOptions) - am_instance = Am(BaseOptions) - - message_test3 = make_message() - message_test3['content']['encoding'] = 'iso-8859-1' - message_test3['content']['value'] = b'CA\nWPK\n0.379033,325.078,1.13338\n' - - bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test3) - - bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') - message_test3['new_file'] = bulletinHeader + '__12345' - message_test3['new_dir'] = BaseOptions.directory - - # Check correcting the bulletin contents of a CACN - new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) - assert new_bulletin == b'CACN00 CWAO\nWPK\n0.379033,325.078,1.13338\n' - - # Check renamer. - message_test3['content']['value'] = new_bulletin.decode('iso-8859-1') - message_test3 = renamer.rename(message_test3, False) - assert re.match('CACN00_CWAO_......__WPK_00001_PROBLEM' , message_test3['new_file']) - -# Test 4: Bulletin with double line separator after header (my-header\n\n) -def test_bulletin_double_linesep(): - - BaseOptions = Options() - renamer = Raw2bulletin(BaseOptions) - am_instance = Am(BaseOptions) - - message_test4 = make_message() - message_test4['content']['encoding'] = 'iso-8859-1' - message_test4['content']['value'] = b'SXCN35 CWVR 021100\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff' - - bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test4) - - bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') - message_test4['new_file'] = bulletinHeader + '__12345' - message_test4['new_dir'] = BaseOptions.directory - - # Check correcting the bulletin contents of the bulletin - # Checking for b'' because this is what returns when correctContents has no problems to report correcting. - new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) - assert new_bulletin == b'' - - # Check renamer. - message_test4['content']['value'] = message_test4['content']['value'].decode('iso-8859-1') - message_test4 = renamer.rename(message_test4, False) - assert message_test4['new_file'] == 'SXCN35_CWVR_021100___00001' - -# Test 5: Bulletin with invalid year in timestamp (Fix: https://github.com/MetPX/sarracenia/pull/973) -def test_bulletin_invalid_timestamp(caplog): - import re, datetime - - BaseOptions = Options() - renamer = Raw2bulletin(BaseOptions) - am_instance = Am(BaseOptions) - - message_test5 = make_message() - message_test5['content']['encoding'] = 'iso-8859-1' - message_test5['content']['value'] = b'CA\nWVO\n100,1024,123,1600,0,100,13.5,5.6,79.4,0.722,11.81,11.74,1.855,6.54,16.76,1544,2.344,14.26,0,375.6,375.6,375.5,375.5,0,11.58,11.24,3.709,13.89,13.16,11.22,11,9.45,11.39,5.033,79.4,0.694,-6999,41.19,5.967,5.887,5.93,6.184,5.64,5.066,5.253,-6999,7.3,0.058,0,5.715,4.569,0,0,1.942,-6999,57.4,0,0.531,-6999,1419,1604,1787,-6999,-6999,-6999,-6999,-6999,1601,-6999,-6999,6,5.921,5.956,6.177,5.643,5.07,5.256,-6999,9.53,11.22,10.09,10.61,125.4,9.1\n' - - bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test5) - - bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') - message_test5['new_file'] = bulletinHeader + '__12345' - message_test5['new_dir'] = BaseOptions.directory - - new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) - assert new_bulletin == b'CACN00 CWAO\nWVO\n100,1024,123,1600,0,100,13.5,5.6,79.4,0.722,11.81,11.74,1.855,6.54,16.76,1544,2.344,14.26,0,375.6,375.6,375.5,375.5,0,11.58,11.24,3.709,13.89,13.16,11.22,11,9.45,11.39,5.033,79.4,0.694,-6999,41.19,5.967,5.887,5.93,6.184,5.64,5.066,5.253,-6999,7.3,0.058,0,5.715,4.569,0,0,1.942,-6999,57.4,0,0.531,-6999,1419,1604,1787,-6999,-6999,-6999,-6999,-6999,1601,-6999,-6999,6,5.921,5.956,6.177,5.643,5.07,5.256,-6999,9.53,11.22,10.09,10.61,125.4,9.1\n' - - message_test5['content']['value'] = message_test5['content']['value'].decode('iso-8859-1') - message_test5 = renamer.rename(message_test5, False) - # We want to make sure the proper errors are raised from the logs - assert 'Unable to fetch header contents. Skipping message' in caplog.text and 'Unable to verify year from julian time.' in caplog.text - - -# Test 6: Bulletin with trailing spaces at the end of the header (Fix: https://github.com/MetPX/sarracenia/pull/956) -def test_bulletin_header_trailing_space(): - - BaseOptions = Options() - renamer = Raw2bulletin(BaseOptions) - am_instance = Am(BaseOptions) - - message_test6 = make_message() - message_test6['content']['encoding'] = 'iso-8859-1' - message_test6['content']['value'] = b'SXCN35 CWVR 021100 \n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' - - - bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test6) - - bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') - message_test6['new_file'] = bulletinHeader + '__12345' - message_test6['new_dir'] = BaseOptions.directory - - # Check correcting the bulletin contents of the bulletin - new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) - assert new_bulletin == b'SXCN35 CWVR 021100\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' - - -# Test 7: Bulletin with a wrong station name (Fix: https://github.com/MetPX/sarracenia/pull/963/files) -def test_bulletin_wrong_station(): - - BaseOptions = Options() - renamer = Raw2bulletin(BaseOptions) - am_instance = Am(BaseOptions) - - message_test7 = make_message() - message_test7['content']['encoding'] = 'iso-8859-1' - message_test7['content']['value'] = b'UECN99 CYCX 071200\nTTDD21 /// 5712/ 71701 NIL=\n\n\n\n' - - bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test7) - - bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') - message_test7['new_file'] = bulletinHeader + '__12345' - message_test7['new_dir'] = BaseOptions.directory - - # Check correcting the bulletin contents of the bulletin - # Checking for b'' because this is what returns when correctContents has no problems to report correcting - new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) - assert new_bulletin == b'' - - # Check renamer. - message_test7['content']['value'] = message_test7['content']['value'].decode('iso-8859-1') - message_test7 = renamer.rename(message_test7, False) - assert message_test7['new_file'] == 'UECN99_CYCX_071200___00001_PROBLEM' - -# Test 8: SM Bulletin - Add station mapping + SM/SI bulletin accomodities -def test_SM_bulletin(): - - BaseOptions = Options() - renamer = Raw2bulletin(BaseOptions) - am_instance = Am(BaseOptions) - - message_test8 = make_message() - message_test8['content']['encoding'] = 'iso-8859-1' - message_test8['content']['value'] = b'SM 030000\n71816 11324 80313 10004 20003 30255 40318 52018 60031 77177 887//\n333 10017 20004 42001 70118 90983 93101=\n' - - bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test8) - - bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') - message_test8['new_file'] = bulletinHeader + '__12345' - message_test8['new_dir'] = BaseOptions.directory - - # Check correcting the bulletin contents of the bulletin - am_instance.o.mapStations2AHL = ['SMCN06 CWAO COLL 71816 71818 71821 71825 71827 71828 71831 71832 71834 71841 71842 71845 71850 71854'] - new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) - assert new_bulletin == b'SMCN06 CWAO 030000\nAAXX 03004\n71816 11324 80313 10004 20003 30255 40318 52018 60031 77177 887//\n333 10017 20004 42001 70118 90983 93101=\n' - - message_test8['content']['value'] = new_bulletin.decode('iso-8859-1') - message_test8 = renamer.rename(message_test8, False) - assert message_test8['new_file'] == 'SMCN06_CWAO_030000__71816_00001' - -# Test 9: Bulletin with 5 fields in header (invalid) -def test_bulletin_header_five_fileds(): - - BaseOptions = Options() - renamer = Raw2bulletin(BaseOptions) - am_instance = Am(BaseOptions) - - message_test9 = make_message() - message_test9['content']['encoding'] = 'iso-8859-1' - message_test9['content']['value'] = b'SXCN35 CWVR 021100 AAA OOPS\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' - - bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test9) - - bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') - message_test9['new_file'] = bulletinHeader + '__12345' - message_test9['new_dir'] = BaseOptions.directory - - # Check correcting the bulletin contents of the bulletin - new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) - assert new_bulletin == b'SXCN35 CWVR 021100 AAA\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' - -# Test 10: Bulletin with 6 fields in header (invalid) -def test_bulletin_header_six_fileds(): - - BaseOptions = Options() - renamer = Raw2bulletin(BaseOptions) - am_instance = Am(BaseOptions) - - message_test10 = make_message() - message_test10['content']['encoding'] = 'iso-8859-1' - message_test10['content']['value'] = b'SXCN35 CWVR 021100 AAA OTHER OHNO\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' - - bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test10) - - bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') - message_test10['new_file'] = bulletinHeader + '__12345' - message_test10['new_dir'] = BaseOptions.directory - - # Check correcting the bulletin contents of the bulletin - new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) - assert new_bulletin == b'SXCN35 CWVR 021100 AAA OTHER\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' - - -# Test 11: Bulletin with a timestamp (DDHHmm) bigger then 6 chars -def test_bulletin_timestamp_6chars_plus(): - BaseOptions = Options() - renamer = Raw2bulletin(BaseOptions) - am_instance = Am(BaseOptions) - - message_test11 = make_message() - message_test11['content']['encoding'] = 'iso-8859-1' - message_test11['content']['value'] = b'SXCN35 CWVR 021100Z\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff' - - bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test11) - - bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_') - message_test11['new_file'] = bulletinHeader + '__12345' - message_test11['new_dir'] = BaseOptions.directory - - # Check correcting the bulletin contents of the bulletin - new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) - assert new_bulletin == b'SXCN35 CWVR 021100\n\nFacility: GVRD\nData valid at: 2024/05/02 11:00Z\n\nsome other stuff\n' \ No newline at end of file diff --git a/tests/sarracenia/interruptible_sleep_test.py b/tests/sarracenia/interruptible_sleep_test.py index e87825700..be3f0b605 100644 --- a/tests/sarracenia/interruptible_sleep_test.py +++ b/tests/sarracenia/interruptible_sleep_test.py @@ -3,9 +3,7 @@ #from unittest.mock import Mock import datetime -import os import signal -import subprocess from sarracenia.interruptible_sleep import interruptible_sleep class SleepThing(): @@ -14,54 +12,52 @@ def __init__(self): self.other_name = False signal.signal(signal.SIGTERM, self.signal_handler) signal.signal(signal.SIGINT, self.signal_handler) + signal.signal(signal.SIGALRM, self.signal_handler) def signal_handler(self, signum, stack): self._stop_requested = True self.other_name = True def test_interruptible_sleep(): - st = SleepThing() stime = 10 # Test that sleep sleeps for the right amount of time when not interrupted + st = SleepThing() before_time = datetime.datetime.now() result = interruptible_sleep(stime, st) after_time = datetime.datetime.now() assert (result == False) - assert ( (after_time - before_time).seconds == stime) + assert ( int((after_time - before_time).seconds) == stime) # Test that the sleep behaves correctly when interrupted - # send a SIGINT to this process after 5 seconds: - cmdline = f"""bash -c '/usr/bin/sleep 5; kill -SIGTERM {os.getpid()};' &""" - subprocess.run(cmdline, shell=True) + st = SleepThing() + # send a SIGALRM to this process after 5 seconds: + signal.alarm(5) before_time = datetime.datetime.now() result = interruptible_sleep(stime, st) after_time = datetime.datetime.now() assert result - assert ( (after_time - before_time).seconds == 5) + assert ( int((after_time - before_time).seconds) == 5) # Test using a different nap_time st = SleepThing() - # send a SIGINT to this process after 5 seconds: - cmdline = f"""bash -c '/usr/bin/sleep 5; kill -SIGTERM {os.getpid()};' &""" - subprocess.run(cmdline, shell=True) + # send a SIGALRM to this process after 5 seconds: + signal.alarm(5) before_time = datetime.datetime.now() result = interruptible_sleep(stime, st, nap_time=1) after_time = datetime.datetime.now() assert result - assert ( (after_time - before_time).seconds == 5) - + assert ( int((after_time - before_time).seconds) == 5) # Test using a different attribute name st = SleepThing() - # send a SIGINT to this process after 5 seconds: - cmdline = f"""bash -c '/usr/bin/sleep 5; kill -SIGTERM {os.getpid()};' &""" - subprocess.run(cmdline, shell=True) + # send a SIGALRM to this process after 5 seconds: + signal.alarm(5) before_time = datetime.datetime.now() result = interruptible_sleep(stime, st, stop_flag_name = 'other_name') after_time = datetime.datetime.now() assert result - assert ( (after_time - before_time).seconds == 5) + assert ( int((after_time - before_time).seconds) == 5) diff --git a/travis/add_redis.sh b/travis/add_redis.sh index 2213868f6..bb033391d 100755 --- a/travis/add_redis.sh +++ b/travis/add_redis.sh @@ -73,9 +73,25 @@ else fi echo -e "..done\n" +# later OS versions have later versions of pip that require convincing to actually install. +. /etc/os-release +if [ "${VERSION_ID}" \> "22.04" ]; then + pip_install="pip3 install --break-system-packages --user " +else + pip_install="pip3 install " +fi + echo "Install Redis Python modules... " # Install Python modules -pip3 install redis python-redis-lock +sudo apt -y install python3-redis || true +for PKG in redis python-redis-lock; do + PKG_INSTALLED="`pip3 list | grep ${PKG}`" + if [ "$?" == "0" ] ; then + echo "$PKG is already installed" + else + ${pip_install} ${PKG} + fi +done echo -e "..done\n" echo "Add redis options to sr3 default config... " diff --git a/travis/flow_autoconfig.sh b/travis/flow_autoconfig.sh index dd2d0ad91..ad0e1c514 100755 --- a/travis/flow_autoconfig.sh +++ b/travis/flow_autoconfig.sh @@ -18,16 +18,19 @@ sudo apt-key adv --keyserver "hkps.pool.sks-keyservers.net" --recv-keys "0x6B73A sudo add-apt-repository -y ppa:ssc-hpc-chp-spc/metpx sudo apt update sudo apt -y upgrade -sudo apt -y install python3-setuptools python3-magic python-setuptools python3-paramiko python3-requests +sudo apt -y install python3-setuptools python3-magic python-setuptools python3-paramiko python3-requests python3-pyftpdlib sudo apt -y install metpx-libsr3c metpx-libsr3c-dev metpx-sr3c sudo apt -y install metpx-libsr3c metpx-libsr3c-dev metpx-sr3c sudo apt -y install erlang-nox erlang-diameter erlang-eldap findutils git librabbitmq4 net-tools openssh-client openssh-server python3-pip rabbitmq-server xattr wget +sudo apt -y install wget ncftp + ${pip_install} -U pip # The dependencies that are installed using apt are only available to system default Python versions (e.g. Python 3.8 on Ubuntu 20.04) # If we are testing on a non-default Python version, we need to ensure these dependencies are still installed, so we use pip. # See issue #407, #445. +echo "Checking for missing Python packages and installing with pip" for PKG in amqp appdirs dateparser flufl.lock humanize jsonpickle netifaces paho-mqtt psutil rangehttpserver watchdog xattr paramiko pyftpdlib net-tools; do PKG_INSTALLED="`pip3 list | grep ${PKG}`" if [ "$?" == "0" ] ; then @@ -37,6 +40,9 @@ for PKG in amqp appdirs dateparser flufl.lock humanize jsonpickle netifaces paho fi done +# Need paho > 2.1.0 https://github.com/MetPX/sarracenia/pull/1119 +${pip_install} --upgrade paho-mqtt + # in case it was installed as a dependency. sudo apt -y remove metpx-sr3 diff --git a/travis/flow_autoconfig_redhat.sh b/travis/flow_autoconfig_redhat.sh index b63625374..a168a2595 100755 --- a/travis/flow_autoconfig_redhat.sh +++ b/travis/flow_autoconfig_redhat.sh @@ -20,7 +20,7 @@ sudo dnf install -y python3-setuptools python3-magic #sudo apt -y install metpx-libsr3c metpx-libsr3c-dev metpx-sr3c sudo dnf install -y erlang findutils git librabbitmq net-tools python3-pip rabbitmq-server -sudo dnf install -y wget +sudo dnf install -y wget ncftp sudo dnf install -y python3-devel sudo dnf install -y rpmdevtools rpmlint lsb_release